refactor: migrate main.py to modular routers and add project roadmap
- Migrated monolithic main.py to feature-scoped routers in app/routers/ - Added GEMINI.md for project context and AI instructional guidelines - Updated README.md with a comprehensive modernization plan (SQL migration, robust scraping DSL, frontend modernization) - Improved authentication with cookie support and modular JS - Updated test suite and documentation
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
# Anime Sites Downloaders
|
||||
|
||||
## OVERVIEW
|
||||
Handlers for French anime streaming catalogs that provide metadata and episode listings, delegating actual video extraction to video player handlers.
|
||||
|
||||
## WHERE TO LOOK
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `base.py` | Abstract `BaseAnimeSite` class defining the interface all anime sites implement |
|
||||
| `animesama.py` | Primary provider with dynamic domain switching, multiple video player extraction |
|
||||
| `nekosama.py` | Neko-Sama / Gupy integration (metadata-only, no direct downloads) |
|
||||
| `animeultime.py` | Anime-Ultime catalog handler |
|
||||
| `vostfree.py` | Vostfree catalog handler |
|
||||
| `frenchmanga.py` | French-Manga catalog handler |
|
||||
|
||||
## CONVENTIONS
|
||||
|
||||
### Interface Contract
|
||||
Each site must implement four async methods from `BaseAnimeSite`:
|
||||
- `can_handle(url: str) -> bool` — URL pattern matching
|
||||
- `search_anime(query, lang) -> list[dict]` — Returns `{title, url, cover_image}`
|
||||
- `get_episodes(anime_url, lang) -> list[dict]` — Returns `{episode_number, url, title, host}`
|
||||
- `get_anime_metadata(anime_url) -> dict` — Returns `{synopsis, genres, rating, release_year, studio, poster_image, total_episodes, status}`
|
||||
- `get_download_link(url) -> tuple[str, str]` — Returns `(video_player_url, filename)`
|
||||
|
||||
### Key Patterns
|
||||
- **Pipe-separated URLs**: `video_url|anime_page_url|episode_title` — preserves context across extraction
|
||||
- **Language parameter**: `lang="vostfr"` or `"vf"` — controls which episodes to return
|
||||
- **Video player delegation**: Anime sites return player URLs (vidmoly, sendvid, sibnet, lpayer), not direct downloads
|
||||
- **Filename generation**: `{anime_name} - S{season} - {episode}.mp4` format
|
||||
- **HTTP headers**: Browser UA and referer required to avoid blocking
|
||||
|
||||
### Domain Detection
|
||||
- `AnimeSamaDownloader` fetches current domain from `anime-sama.pw` dynamically
|
||||
- Uses fallback chain for video extraction: detected player → cached player → priority list
|
||||
|
||||
### Error Handling
|
||||
- Raise `Exception` with descriptive message on failure
|
||||
- Log at appropriate level (`debug` for expected failures, `error` for unexpected)
|
||||
- Validate extracted URLs with `_test_video_url()` before returning
|
||||
@@ -33,7 +33,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
"""Downloader for anime-sama.org / anime-sama.store"""
|
||||
|
||||
# Static list of known domains (will be updated dynamically)
|
||||
BASE_DOMAINS = ["anime-sama.tv", "anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
|
||||
BASE_DOMAINS = ["anime-sama.to", "www.anime-sama.to", "anime-sama.tv", "www.anime-sama.tv", "anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize AnimeSamaDownloader with working player cache"""
|
||||
@@ -43,46 +43,34 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
@classmethod
|
||||
async def get_current_domain(cls) -> str:
|
||||
"""
|
||||
Fetch the current active domain from anime-sama.pw
|
||||
Returns the current domain (e.g., 'anime-sama.si')
|
||||
Fetch the current active domain by testing known domains
|
||||
Returns the current working domain (e.g., 'anime-sama.to')
|
||||
"""
|
||||
try:
|
||||
import httpx
|
||||
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
|
||||
response = await client.get("https://anime-sama.pw")
|
||||
# Test known domains in order of recency
|
||||
for test_domain in ["anime-sama.to", "anime-sama.tv", "anime-sama.si", "anime-sama.org"]:
|
||||
try:
|
||||
test_url = f"https://{test_domain}/catalogue"
|
||||
response = await client.get(test_url)
|
||||
|
||||
# Look for the main link in the HTML
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
# Check if we got a valid page (not 404 and has content)
|
||||
if response.status_code == 200 and len(response.text) > 1000:
|
||||
# Check if it's the real anime-sama site (has catalog cards)
|
||||
if 'catalogue' in response.text or 'catalog-card' in response.text:
|
||||
logger.info(f"Working domain found: {test_domain}")
|
||||
return test_domain
|
||||
except Exception as e:
|
||||
logger.debug(f"Domain {test_domain} failed: {e}")
|
||||
continue
|
||||
|
||||
# Look for the primary button/link
|
||||
primary_link = soup.find('a', class_='btn-primary')
|
||||
if primary_link and primary_link.get('href'):
|
||||
href = primary_link['href']
|
||||
# Extract domain from URL
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(href)
|
||||
domain = parsed.netloc # e.g., 'anime-sama.si'
|
||||
logger.info(f"Current domain from anime-sama.pw: {domain}")
|
||||
return domain
|
||||
|
||||
# Fallback: look for any anime-sama.* link
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
if 'anime-sama.' in href and href.startswith('https://'):
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(href)
|
||||
domain = parsed.netloc
|
||||
if domain not in ['anime-sama.pw', 'www.anime-sama.pw']:
|
||||
logger.info(f"Found domain via fallback: {domain}")
|
||||
return domain
|
||||
|
||||
logger.warning("Could not determine current domain, using default")
|
||||
return "anime-sama.si"
|
||||
logger.warning("Could not determine working domain, using default")
|
||||
return "anime-sama.to"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching current domain: {e}")
|
||||
return "anime-sama.si"
|
||||
return "anime-sama.to"
|
||||
|
||||
@classmethod
|
||||
async def update_domains(cls) -> None:
|
||||
@@ -164,6 +152,14 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
anime_page_url=url,
|
||||
episode_title=None
|
||||
)
|
||||
# Handle Smoothpre URLs
|
||||
elif 'smoothpre' in url.lower():
|
||||
logger.info(f"Using fallback for Smoothpre: {url[:80]}...")
|
||||
return await self.get_download_link_with_fallback(
|
||||
url,
|
||||
anime_page_url=None,
|
||||
episode_title=None
|
||||
)
|
||||
# If it's an anime-sama page, try to find the video
|
||||
if 'anime-sama' in url.lower():
|
||||
if 'dingtez' in url or 'dingz' in url:
|
||||
@@ -190,7 +186,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
|
||||
for iframe in iframes:
|
||||
src = iframe.get('src', '')
|
||||
if src and any(provider in src for provider in ['vidmoly', 'player', 'stream', 'play', 'embed']):
|
||||
if src and any(provider in src for provider in ['vidmoly', 'player', 'stream', 'play', 'embed', 'smoothpre']):
|
||||
if not src.startswith('http'):
|
||||
src = urljoin(final_url, src)
|
||||
logger.debug(f"Found iframe: {src}")
|
||||
@@ -201,6 +197,11 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
logger.debug(f"Extracting from vidmoly iframe: {src}")
|
||||
video_url, filename = await self._extract_from_vidmoly(src, anime_page_url=url, episode_title="Episode")
|
||||
return video_url, filename
|
||||
# For smoothpre, use the smoothpre extractor
|
||||
elif 'smoothpre' in src.lower():
|
||||
logger.debug(f"Extracting from smoothpre iframe: {src}")
|
||||
video_url, filename = await self._extract_from_smoothpre(src, anime_page_url=url, episode_title="Episode")
|
||||
return video_url, filename
|
||||
else:
|
||||
video_url = await self._extract_from_player(src)
|
||||
if video_url:
|
||||
@@ -563,6 +564,49 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
# If yt-dlp fails, return m3u8 URL anyway (let download manager handle it)
|
||||
return m3u8_url, filename
|
||||
|
||||
async def _extract_from_smoothpre(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
|
||||
"""Extract video URL from smoothpre player - delegate to SmoothpreDownloader"""
|
||||
try:
|
||||
logger.debug(f"Extracting from smoothpre: {url}")
|
||||
logger.debug(f"Delegating to SmoothpreDownloader...")
|
||||
|
||||
# Import SmoothpreDownloader
|
||||
from ..video_players.smoothpre import SmoothpreDownloader
|
||||
|
||||
# Generate the target filename first
|
||||
if episode_title and anime_page_url:
|
||||
anime_name = self._generate_anime_name(anime_page_url)
|
||||
season_num = self._extract_season_number(anime_page_url)
|
||||
if season_num:
|
||||
target_filename = f"{anime_name} - S{season_num} - {episode_title}.mp4"
|
||||
else:
|
||||
target_filename = f"{anime_name} - {episode_title}.mp4"
|
||||
logger.debug(f"Generated filename: {target_filename} (episode: {episode_title})")
|
||||
elif anime_page_url:
|
||||
target_filename = self._generate_filename_from_anime_url(anime_page_url)
|
||||
logger.debug(f"Generated filename: {target_filename} (no episode title)")
|
||||
else:
|
||||
target_filename = None
|
||||
logger.debug(f"No target_filename generated")
|
||||
|
||||
# Use SmoothpreDownloader to extract the video URL
|
||||
smoothpre_downloader = SmoothpreDownloader()
|
||||
video_url, temp_filename = await smoothpre_downloader.get_download_link(url, target_filename=target_filename)
|
||||
|
||||
# Use the target filename if available
|
||||
filename = target_filename if target_filename else temp_filename
|
||||
|
||||
logger.debug(f"Got video: {filename}")
|
||||
logger.debug(f"Video URL: {video_url[:100] if video_url else 'None'}...")
|
||||
|
||||
# Return the direct video URL
|
||||
# The download_manager will handle the actual download
|
||||
return video_url, filename
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Smoothpre extraction error: {e}")
|
||||
raise Exception(f"Error extracting from smoothpre: {str(e)}")
|
||||
|
||||
async def _extract_from_player(self, player_url: str) -> str | None:
|
||||
"""Try to extract direct video URL from player iframe"""
|
||||
try:
|
||||
@@ -808,9 +852,9 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
start = time.time()
|
||||
logger.debug(f"Searching for '{query}' ({lang})...")
|
||||
|
||||
# Use anime-sama.tv directly (anime-sama.si has redirect issues)
|
||||
current_domain = "anime-sama.tv"
|
||||
|
||||
# Get the current working domain
|
||||
current_domain = await self.get_current_domain()
|
||||
logger.info(f"Using domain: {current_domain}")
|
||||
|
||||
# Use the official search API endpoint
|
||||
search_api_url = f"https://{current_domain}/template-php/defaut/fetch.php"
|
||||
@@ -1016,7 +1060,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
Exception: If all players fail
|
||||
"""
|
||||
# Define player priority list
|
||||
player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer']
|
||||
player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer', 'smoothpre']
|
||||
|
||||
# Extract video URLs from pipe format if needed
|
||||
# Format: url1|url2|url3|anime_page_url|episode_title
|
||||
@@ -1038,7 +1082,48 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
anime_page_url = parts[1]
|
||||
else:
|
||||
video_urls = [url]
|
||||
|
||||
|
||||
# Filter out empty or invalid URLs
|
||||
valid_video_urls = []
|
||||
for vu in video_urls:
|
||||
vu = vu.strip()
|
||||
# Skip empty URLs
|
||||
if not vu:
|
||||
logger.warning(f"Skipping empty URL")
|
||||
continue
|
||||
|
||||
# Skip URLs with incomplete query parameters (e.g., "videoid=" without value)
|
||||
if '=&' in vu or vu.endswith('='):
|
||||
logger.warning(f"Skipping incomplete URL (missing parameter value): {vu[:80]}...")
|
||||
continue
|
||||
|
||||
# Skip URLs that are just a base domain without ID (e.g., "https://sendvid.com/embed/")
|
||||
if vu.endswith('/') and len(vu) > 10:
|
||||
# Check if it's a base player URL without video ID
|
||||
base_urls = [
|
||||
'https://sendvid.com/embed/',
|
||||
'https://sendvid.com/embed',
|
||||
'https://vidmoly.to/embed/',
|
||||
'https://vidmoly.to/embed',
|
||||
'https://vidmoly.biz/embed/',
|
||||
'https://vidmoly.biz/embed',
|
||||
]
|
||||
if any(vu.startswith(base) for base in base_urls):
|
||||
logger.warning(f"Skipping incomplete URL (no video ID): {vu[:60]}...")
|
||||
continue
|
||||
|
||||
# Skip URLs with incomplete HTML filenames (e.g., "embed-.html")
|
||||
if 'embed-.html' in vu or 'embed_' in vu:
|
||||
logger.warning(f"Skipping malformed URL (incomplete HTML): {vu[:80]}...")
|
||||
continue
|
||||
|
||||
valid_video_urls.append(vu)
|
||||
|
||||
video_urls = valid_video_urls
|
||||
|
||||
if not video_urls:
|
||||
raise Exception("No valid video URLs found after filtering")
|
||||
|
||||
# Try each video URL in order (each may have different player)
|
||||
last_error = None
|
||||
for video_url in video_urls:
|
||||
@@ -1104,7 +1189,11 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
)
|
||||
elif player_name == 'lpayer':
|
||||
video_url_result, filename = await self._extract_from_lpayer_api(video_url, anime_page_url, episode_title, target_filename)
|
||||
|
||||
elif player_name == 'smoothpre':
|
||||
video_url_result, filename = await self._extract_from_smoothpre(
|
||||
video_url, anime_page_url, episode_title
|
||||
)
|
||||
|
||||
# Validate the extracted URL
|
||||
logger.info(f"Validating extracted URL from {player_name}...")
|
||||
is_valid = await self._test_video_url(video_url_result)
|
||||
@@ -1580,7 +1669,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
Exception: If all players fail
|
||||
"""
|
||||
# Define player priority list
|
||||
player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer']
|
||||
player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer', 'smoothpre']
|
||||
|
||||
# Extract video URLs from pipe format if needed
|
||||
# Format: url1|url2|url3|anime_page_url|episode_title
|
||||
@@ -1602,12 +1691,53 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
anime_page_url = parts[1]
|
||||
else:
|
||||
video_urls = [url]
|
||||
|
||||
|
||||
# Filter out empty or invalid URLs
|
||||
valid_video_urls = []
|
||||
for vu in video_urls:
|
||||
vu = vu.strip()
|
||||
# Skip empty URLs
|
||||
if not vu:
|
||||
logger.warning(f"Skipping empty URL")
|
||||
continue
|
||||
|
||||
# Skip URLs with incomplete query parameters (e.g., "videoid=" without value)
|
||||
if '=&' in vu or vu.endswith('='):
|
||||
logger.warning(f"Skipping incomplete URL (missing parameter value): {vu[:80]}...")
|
||||
continue
|
||||
|
||||
# Skip URLs that are just a base domain without ID (e.g., "https://sendvid.com/embed/")
|
||||
if vu.endswith('/') and len(vu) > 10:
|
||||
# Check if it's a base player URL without video ID
|
||||
base_urls = [
|
||||
'https://sendvid.com/embed/',
|
||||
'https://sendvid.com/embed',
|
||||
'https://vidmoly.to/embed/',
|
||||
'https://vidmoly.to/embed',
|
||||
'https://vidmoly.biz/embed/',
|
||||
'https://vidmoly.biz/embed',
|
||||
]
|
||||
if any(vu.startswith(base) for base in base_urls):
|
||||
logger.warning(f"Skipping incomplete URL (no video ID): {vu[:60]}...")
|
||||
continue
|
||||
|
||||
# Skip URLs with incomplete HTML filenames (e.g., "embed-.html")
|
||||
if 'embed-.html' in vu or 'embed_' in vu:
|
||||
logger.warning(f"Skipping malformed URL (incomplete HTML): {vu[:80]}...")
|
||||
continue
|
||||
|
||||
valid_video_urls.append(vu)
|
||||
|
||||
video_urls = valid_video_urls
|
||||
|
||||
if not video_urls:
|
||||
raise Exception("No valid video URLs found after filtering")
|
||||
|
||||
# Try each video URL in order (each may have different player)
|
||||
last_error = None
|
||||
for video_url in video_urls:
|
||||
logger.info(f"Trying video URL: {video_url[:50]}...")
|
||||
|
||||
|
||||
# Detect player type from URL
|
||||
detected_player = None
|
||||
url_lower = video_url.lower()
|
||||
@@ -1619,21 +1749,13 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
detected_player = 'sibnet'
|
||||
elif 'lpayer' in url_lower:
|
||||
detected_player = 'lpayer'
|
||||
elif 'dingtez' in url_lower:
|
||||
detected_player = 'dingtez'
|
||||
|
||||
url_lower = video_url.lower()
|
||||
if 'vidmoly' in url_lower:
|
||||
detected_player = 'vidmoly'
|
||||
elif 'sendvid' in url_lower:
|
||||
detected_player = 'sendvid'
|
||||
elif 'sibnet' in url_lower:
|
||||
detected_player = 'sibnet'
|
||||
elif 'lpayer' in url_lower or 'embed' in url_lower:
|
||||
detected_player = 'lpayer'
|
||||
elif 'smoothpre' in url_lower:
|
||||
detected_player = 'smoothpre'
|
||||
elif 'myvi' in url_lower or 'myvi.tv' in url_lower:
|
||||
detected_player = 'vidmoly' # MyVi is similar to VidMoly, try VidMoly downloader first
|
||||
elif 'dingtez' in url_lower:
|
||||
detected_player = 'lpayer' # Unknown player, try lpayer as fallback
|
||||
|
||||
|
||||
logger.debug(f"Detected player from URL: {detected_player}")
|
||||
|
||||
# Determine which player to try first
|
||||
@@ -1644,22 +1766,32 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
|
||||
# Build player order: cached player first, then detected, then rest in priority order
|
||||
player_order = []
|
||||
if cached_player and cached_player in player_priority:
|
||||
player_order.append(cached_player)
|
||||
if detected_player and detected_player not in player_order and detected_player in player_priority:
|
||||
player_order.append(detected_player)
|
||||
for p in player_priority:
|
||||
if p not in player_order:
|
||||
player_order.append(p)
|
||||
|
||||
|
||||
# Only try detected player if single video URL
|
||||
if len(video_urls) == 1:
|
||||
# When we have multiple video URLs, only try the detected player for each URL
|
||||
# If the detected player fails, we'll move to the next URL instead of trying other players
|
||||
if len(video_urls) > 1:
|
||||
# Multiple URLs: only try the detected player (or first in priority if none detected)
|
||||
if detected_player and detected_player in player_priority:
|
||||
player_order = [detected_player]
|
||||
logger.info(f"Multiple URLs detected, trying only detected player: {detected_player}")
|
||||
else:
|
||||
player_order = [player_priority[0]]
|
||||
|
||||
# No player detected, try cached if available, otherwise first in priority
|
||||
if cached_player and cached_player in player_priority:
|
||||
player_order = [cached_player]
|
||||
logger.info(f"Multiple URLs with no detected player, trying cached: {cached_player}")
|
||||
else:
|
||||
player_order = [player_priority[0]]
|
||||
logger.info(f"Multiple URLs with no detected/cached player, trying: {player_order[0]}")
|
||||
else:
|
||||
# Single URL: try cached player first, then detected, then all others in priority
|
||||
if cached_player and cached_player in player_priority:
|
||||
player_order.append(cached_player)
|
||||
if detected_player and detected_player not in player_order and detected_player in player_priority:
|
||||
player_order.append(detected_player)
|
||||
for p in player_priority:
|
||||
if p not in player_order:
|
||||
player_order.append(p)
|
||||
|
||||
logger.info(f"Player order: {player_order}")
|
||||
|
||||
# Try each player for this video URL
|
||||
@@ -1681,7 +1813,11 @@ class AnimeSamaDownloader(BaseAnimeSite):
|
||||
)
|
||||
elif player_name == 'lpayer':
|
||||
video_url_result, filename = await self._extract_from_lpayer_api(video_url, anime_page_url, episode_title, target_filename)
|
||||
|
||||
elif player_name == 'smoothpre':
|
||||
video_url_result, filename = await self._extract_from_smoothpre(
|
||||
video_url, anime_page_url, episode_title
|
||||
)
|
||||
|
||||
# Validate the extracted URL
|
||||
logger.info(f"Validating extracted URL from {player_name}...")
|
||||
is_valid = await self._test_video_url(video_url_result)
|
||||
|
||||
Reference in New Issue
Block a user