refactor: migrate main.py to modular routers and add project roadmap

- Migrated monolithic main.py to feature-scoped routers in app/routers/ - Added GEMINI.md for project context and AI instructional guidelines - Updated README.md with a comprehensive modernization plan (SQL migration, robust scraping DSL, frontend modernization) - Improved authentication with cookie support and modular JS - Updated test suite and documentation
2026-03-24 10:12:04 +00:00
parent 1b5d7f9238
commit d4d8d8a3b6
42 changed files with 4518 additions and 2426 deletions
@@ -0,0 +1,41 @@
+# Anime Sites Downloaders
+
+## OVERVIEW
+Handlers for French anime streaming catalogs that provide metadata and episode listings, delegating actual video extraction to video player handlers.
+
+## WHERE TO LOOK
+
+| File | Purpose |
+|------|---------|
+| `base.py` | Abstract `BaseAnimeSite` class defining the interface all anime sites implement |
+| `animesama.py` | Primary provider with dynamic domain switching, multiple video player extraction |
+| `nekosama.py` | Neko-Sama / Gupy integration (metadata-only, no direct downloads) |
+| `animeultime.py` | Anime-Ultime catalog handler |
+| `vostfree.py` | Vostfree catalog handler |
+| `frenchmanga.py` | French-Manga catalog handler |
+
+## CONVENTIONS
+
+### Interface Contract
+Each site must implement four async methods from `BaseAnimeSite`:
+- `can_handle(url: str) -> bool` — URL pattern matching
+- `search_anime(query, lang) -> list[dict]` — Returns `{title, url, cover_image}`
+- `get_episodes(anime_url, lang) -> list[dict]` — Returns `{episode_number, url, title, host}`
+- `get_anime_metadata(anime_url) -> dict` — Returns `{synopsis, genres, rating, release_year, studio, poster_image, total_episodes, status}`
+- `get_download_link(url) -> tuple[str, str]` — Returns `(video_player_url, filename)`
+
+### Key Patterns
+- **Pipe-separated URLs**: `video_url|anime_page_url|episode_title` — preserves context across extraction
+- **Language parameter**: `lang="vostfr"` or `"vf"` — controls which episodes to return
+- **Video player delegation**: Anime sites return player URLs (vidmoly, sendvid, sibnet, lpayer), not direct downloads
+- **Filename generation**: `{anime_name} - S{season} - {episode}.mp4` format
+- **HTTP headers**: Browser UA and referer required to avoid blocking
+
+### Domain Detection
+- `AnimeSamaDownloader` fetches current domain from `anime-sama.pw` dynamically
+- Uses fallback chain for video extraction: detected player → cached player → priority list
+
+### Error Handling
+- Raise `Exception` with descriptive message on failure
+- Log at appropriate level (`debug` for expected failures, `error` for unexpected)
+- Validate extracted URLs with `_test_video_url()` before returning
@@ -33,7 +33,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
    """Downloader for anime-sama.org / anime-sama.store"""

    # Static list of known domains (will be updated dynamically)
-    BASE_DOMAINS = ["anime-sama.tv", "anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
+    BASE_DOMAINS = ["anime-sama.to", "www.anime-sama.to", "anime-sama.tv", "www.anime-sama.tv", "anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]

    def __init__(self):
        """Initialize AnimeSamaDownloader with working player cache"""
@@ -43,46 +43,34 @@ class AnimeSamaDownloader(BaseAnimeSite):
    @classmethod
    async def get_current_domain(cls) -> str:
        """
-        Fetch the current active domain from anime-sama.pw
-        Returns the current domain (e.g., 'anime-sama.si')
+        Fetch the current active domain by testing known domains
+        Returns the current working domain (e.g., 'anime-sama.to')
        """
        try:
            import httpx
            async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
-                response = await client.get("https://anime-sama.pw")
+                # Test known domains in order of recency
+                for test_domain in ["anime-sama.to", "anime-sama.tv", "anime-sama.si", "anime-sama.org"]:
+                    try:
+                        test_url = f"https://{test_domain}/catalogue"
+                        response = await client.get(test_url)

-                # Look for the main link in the HTML
-                from bs4 import BeautifulSoup
-                soup = BeautifulSoup(response.text, 'lxml')
+                        # Check if we got a valid page (not 404 and has content)
+                        if response.status_code == 200 and len(response.text) > 1000:
+                            # Check if it's the real anime-sama site (has catalog cards)
+                            if 'catalogue' in response.text or 'catalog-card' in response.text:
+                                logger.info(f"Working domain found: {test_domain}")
+                                return test_domain
+                    except Exception as e:
+                        logger.debug(f"Domain {test_domain} failed: {e}")
+                        continue

-                # Look for the primary button/link
-                primary_link = soup.find('a', class_='btn-primary')
-                if primary_link and primary_link.get('href'):
-                    href = primary_link['href']
-                    # Extract domain from URL
-                    from urllib.parse import urlparse
-                    parsed = urlparse(href)
-                    domain = parsed.netloc  # e.g., 'anime-sama.si'
-                    logger.info(f"Current domain from anime-sama.pw: {domain}")
-                    return domain
-
-                # Fallback: look for any anime-sama.* link
-                for link in soup.find_all('a', href=True):
-                    href = link['href']
-                    if 'anime-sama.' in href and href.startswith('https://'):
-                        from urllib.parse import urlparse
-                        parsed = urlparse(href)
-                        domain = parsed.netloc
-                        if domain not in ['anime-sama.pw', 'www.anime-sama.pw']:
-                            logger.info(f"Found domain via fallback: {domain}")
-                            return domain
-
-                logger.warning("Could not determine current domain, using default")
-                return "anime-sama.si"
+                logger.warning("Could not determine working domain, using default")
+                return "anime-sama.to"

        except Exception as e:
            logger.error(f"Error fetching current domain: {e}")
-            return "anime-sama.si"
+            return "anime-sama.to"

    @classmethod
    async def update_domains(cls) -> None:
@@ -164,6 +152,14 @@ class AnimeSamaDownloader(BaseAnimeSite):
                    anime_page_url=url,
                    episode_title=None
                )
+            # Handle Smoothpre URLs
+            elif 'smoothpre' in url.lower():
+                logger.info(f"Using fallback for Smoothpre: {url[:80]}...")
+                return await self.get_download_link_with_fallback(
+                    url,
+                    anime_page_url=None,
+                    episode_title=None
+                )
            # If it's an anime-sama page, try to find the video
            if 'anime-sama' in url.lower():
                if 'dingtez' in url or 'dingz' in url:
@@ -190,7 +186,7 @@ class AnimeSamaDownloader(BaseAnimeSite):

                for iframe in iframes:
                    src = iframe.get('src', '')
-                    if src and any(provider in src for provider in ['vidmoly', 'player', 'stream', 'play', 'embed']):
+                    if src and any(provider in src for provider in ['vidmoly', 'player', 'stream', 'play', 'embed', 'smoothpre']):
                        if not src.startswith('http'):
                            src = urljoin(final_url, src)
                        logger.debug(f"Found iframe: {src}")
@@ -201,6 +197,11 @@ class AnimeSamaDownloader(BaseAnimeSite):
                                logger.debug(f"Extracting from vidmoly iframe: {src}")
                                video_url, filename = await self._extract_from_vidmoly(src, anime_page_url=url, episode_title="Episode")
                                return video_url, filename
+                            # For smoothpre, use the smoothpre extractor
+                            elif 'smoothpre' in src.lower():
+                                logger.debug(f"Extracting from smoothpre iframe: {src}")
+                                video_url, filename = await self._extract_from_smoothpre(src, anime_page_url=url, episode_title="Episode")
+                                return video_url, filename
                            else:
                                video_url = await self._extract_from_player(src)
                                if video_url:
@@ -563,6 +564,49 @@ class AnimeSamaDownloader(BaseAnimeSite):
        # If yt-dlp fails, return m3u8 URL anyway (let download manager handle it)
        return m3u8_url, filename

+    async def _extract_from_smoothpre(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
+        """Extract video URL from smoothpre player - delegate to SmoothpreDownloader"""
+        try:
+            logger.debug(f"Extracting from smoothpre: {url}")
+            logger.debug(f"Delegating to SmoothpreDownloader...")
+
+            # Import SmoothpreDownloader
+            from ..video_players.smoothpre import SmoothpreDownloader
+
+            # Generate the target filename first
+            if episode_title and anime_page_url:
+                anime_name = self._generate_anime_name(anime_page_url)
+                season_num = self._extract_season_number(anime_page_url)
+                if season_num:
+                    target_filename = f"{anime_name} - S{season_num} - {episode_title}.mp4"
+                else:
+                    target_filename = f"{anime_name} - {episode_title}.mp4"
+                logger.debug(f"Generated filename: {target_filename} (episode: {episode_title})")
+            elif anime_page_url:
+                target_filename = self._generate_filename_from_anime_url(anime_page_url)
+                logger.debug(f"Generated filename: {target_filename} (no episode title)")
+            else:
+                target_filename = None
+                logger.debug(f"No target_filename generated")
+
+            # Use SmoothpreDownloader to extract the video URL
+            smoothpre_downloader = SmoothpreDownloader()
+            video_url, temp_filename = await smoothpre_downloader.get_download_link(url, target_filename=target_filename)
+
+            # Use the target filename if available
+            filename = target_filename if target_filename else temp_filename
+
+            logger.debug(f"Got video: {filename}")
+            logger.debug(f"Video URL: {video_url[:100] if video_url else 'None'}...")
+
+            # Return the direct video URL
+            # The download_manager will handle the actual download
+            return video_url, filename
+
+        except Exception as e:
+            logger.debug(f"Smoothpre extraction error: {e}")
+            raise Exception(f"Error extracting from smoothpre: {str(e)}")
+
    async def _extract_from_player(self, player_url: str) -> str | None:
        """Try to extract direct video URL from player iframe"""
        try:
@@ -808,9 +852,9 @@ class AnimeSamaDownloader(BaseAnimeSite):
            start = time.time()
            logger.debug(f"Searching for '{query}' ({lang})...")

-            # Use anime-sama.tv directly (anime-sama.si has redirect issues)
-            current_domain = "anime-sama.tv"
-
+            # Get the current working domain
+            current_domain = await self.get_current_domain()
+            logger.info(f"Using domain: {current_domain}")

            # Use the official search API endpoint
            search_api_url = f"https://{current_domain}/template-php/defaut/fetch.php"
@@ -1016,7 +1060,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
            Exception: If all players fail
        """
        # Define player priority list
-        player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer']
+        player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer', 'smoothpre']
        
        # Extract video URLs from pipe format if needed
        # Format: url1|url2|url3|anime_page_url|episode_title
@@ -1038,7 +1082,48 @@ class AnimeSamaDownloader(BaseAnimeSite):
                    anime_page_url = parts[1]
        else:
            video_urls = [url]
-        
+
+        # Filter out empty or invalid URLs
+        valid_video_urls = []
+        for vu in video_urls:
+            vu = vu.strip()
+            # Skip empty URLs
+            if not vu:
+                logger.warning(f"Skipping empty URL")
+                continue
+
+            # Skip URLs with incomplete query parameters (e.g., "videoid=" without value)
+            if '=&' in vu or vu.endswith('='):
+                logger.warning(f"Skipping incomplete URL (missing parameter value): {vu[:80]}...")
+                continue
+
+            # Skip URLs that are just a base domain without ID (e.g., "https://sendvid.com/embed/")
+            if vu.endswith('/') and len(vu) > 10:
+                # Check if it's a base player URL without video ID
+                base_urls = [
+                    'https://sendvid.com/embed/',
+                    'https://sendvid.com/embed',
+                    'https://vidmoly.to/embed/',
+                    'https://vidmoly.to/embed',
+                    'https://vidmoly.biz/embed/',
+                    'https://vidmoly.biz/embed',
+                ]
+                if any(vu.startswith(base) for base in base_urls):
+                    logger.warning(f"Skipping incomplete URL (no video ID): {vu[:60]}...")
+                    continue
+
+            # Skip URLs with incomplete HTML filenames (e.g., "embed-.html")
+            if 'embed-.html' in vu or 'embed_' in vu:
+                logger.warning(f"Skipping malformed URL (incomplete HTML): {vu[:80]}...")
+                continue
+
+            valid_video_urls.append(vu)
+
+        video_urls = valid_video_urls
+
+        if not video_urls:
+            raise Exception("No valid video URLs found after filtering")
+
        # Try each video URL in order (each may have different player)
        last_error = None
        for video_url in video_urls:
@@ -1104,7 +1189,11 @@ class AnimeSamaDownloader(BaseAnimeSite):
                        )
                    elif player_name == 'lpayer':
                        video_url_result, filename = await self._extract_from_lpayer_api(video_url, anime_page_url, episode_title, target_filename)
-                    
+                    elif player_name == 'smoothpre':
+                        video_url_result, filename = await self._extract_from_smoothpre(
+                            video_url, anime_page_url, episode_title
+                        )
+
                    # Validate the extracted URL
                    logger.info(f"Validating extracted URL from {player_name}...")
                    is_valid = await self._test_video_url(video_url_result)
@@ -1580,7 +1669,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
            Exception: If all players fail
        """
        # Define player priority list
-        player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer']
+        player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer', 'smoothpre']
        
        # Extract video URLs from pipe format if needed
        # Format: url1|url2|url3|anime_page_url|episode_title
@@ -1602,12 +1691,53 @@ class AnimeSamaDownloader(BaseAnimeSite):
                    anime_page_url = parts[1]
        else:
            video_urls = [url]
-        
+
+        # Filter out empty or invalid URLs
+        valid_video_urls = []
+        for vu in video_urls:
+            vu = vu.strip()
+            # Skip empty URLs
+            if not vu:
+                logger.warning(f"Skipping empty URL")
+                continue
+
+            # Skip URLs with incomplete query parameters (e.g., "videoid=" without value)
+            if '=&' in vu or vu.endswith('='):
+                logger.warning(f"Skipping incomplete URL (missing parameter value): {vu[:80]}...")
+                continue
+
+            # Skip URLs that are just a base domain without ID (e.g., "https://sendvid.com/embed/")
+            if vu.endswith('/') and len(vu) > 10:
+                # Check if it's a base player URL without video ID
+                base_urls = [
+                    'https://sendvid.com/embed/',
+                    'https://sendvid.com/embed',
+                    'https://vidmoly.to/embed/',
+                    'https://vidmoly.to/embed',
+                    'https://vidmoly.biz/embed/',
+                    'https://vidmoly.biz/embed',
+                ]
+                if any(vu.startswith(base) for base in base_urls):
+                    logger.warning(f"Skipping incomplete URL (no video ID): {vu[:60]}...")
+                    continue
+
+            # Skip URLs with incomplete HTML filenames (e.g., "embed-.html")
+            if 'embed-.html' in vu or 'embed_' in vu:
+                logger.warning(f"Skipping malformed URL (incomplete HTML): {vu[:80]}...")
+                continue
+
+            valid_video_urls.append(vu)
+
+        video_urls = valid_video_urls
+
+        if not video_urls:
+            raise Exception("No valid video URLs found after filtering")
+
        # Try each video URL in order (each may have different player)
        last_error = None
        for video_url in video_urls:
            logger.info(f"Trying video URL: {video_url[:50]}...")
-            
+
            # Detect player type from URL
            detected_player = None
            url_lower = video_url.lower()
@@ -1619,21 +1749,13 @@ class AnimeSamaDownloader(BaseAnimeSite):
                detected_player = 'sibnet'
            elif 'lpayer' in url_lower:
                detected_player = 'lpayer'
-            elif 'dingtez' in url_lower:
-                detected_player = 'dingtez'
-
-            url_lower = video_url.lower()
-            if 'vidmoly' in url_lower:
-                detected_player = 'vidmoly'
-            elif 'sendvid' in url_lower:
-                detected_player = 'sendvid'
-            elif 'sibnet' in url_lower:
-                detected_player = 'sibnet'
-            elif 'lpayer' in url_lower or 'embed' in url_lower:
-                detected_player = 'lpayer'
+            elif 'smoothpre' in url_lower:
+                detected_player = 'smoothpre'
+            elif 'myvi' in url_lower or 'myvi.tv' in url_lower:
+                detected_player = 'vidmoly'  # MyVi is similar to VidMoly, try VidMoly downloader first
            elif 'dingtez' in url_lower:
                detected_player = 'lpayer'  # Unknown player, try lpayer as fallback
-            
+
            logger.debug(f"Detected player from URL: {detected_player}")
            
            # Determine which player to try first
@@ -1644,22 +1766,32 @@ class AnimeSamaDownloader(BaseAnimeSite):
            
            # Build player order: cached player first, then detected, then rest in priority order
            player_order = []
-            if cached_player and cached_player in player_priority:
-                player_order.append(cached_player)
-            if detected_player and detected_player not in player_order and detected_player in player_priority:
-                player_order.append(detected_player)
-            for p in player_priority:
-                if p not in player_order:
-                    player_order.append(p)
-            

-            # Only try detected player if single video URL
-            if len(video_urls) == 1:
+            # When we have multiple video URLs, only try the detected player for each URL
+            # If the detected player fails, we'll move to the next URL instead of trying other players
+            if len(video_urls) > 1:
+                # Multiple URLs: only try the detected player (or first in priority if none detected)
                if detected_player and detected_player in player_priority:
                    player_order = [detected_player]
+                    logger.info(f"Multiple URLs detected, trying only detected player: {detected_player}")
                else:
-                    player_order = [player_priority[0]]
-            
+                    # No player detected, try cached if available, otherwise first in priority
+                    if cached_player and cached_player in player_priority:
+                        player_order = [cached_player]
+                        logger.info(f"Multiple URLs with no detected player, trying cached: {cached_player}")
+                    else:
+                        player_order = [player_priority[0]]
+                        logger.info(f"Multiple URLs with no detected/cached player, trying: {player_order[0]}")
+            else:
+                # Single URL: try cached player first, then detected, then all others in priority
+                if cached_player and cached_player in player_priority:
+                    player_order.append(cached_player)
+                if detected_player and detected_player not in player_order and detected_player in player_priority:
+                    player_order.append(detected_player)
+                for p in player_priority:
+                    if p not in player_order:
+                        player_order.append(p)
+
            logger.info(f"Player order: {player_order}")
            
            # Try each player for this video URL
@@ -1681,7 +1813,11 @@ class AnimeSamaDownloader(BaseAnimeSite):
                        )
                    elif player_name == 'lpayer':
                        video_url_result, filename = await self._extract_from_lpayer_api(video_url, anime_page_url, episode_title, target_filename)
-                    
+                    elif player_name == 'smoothpre':
+                        video_url_result, filename = await self._extract_from_smoothpre(
+                            video_url, anime_page_url, episode_title
+                        )
+
                    # Validate the extracted URL
                    logger.info(f"Validating extracted URL from {player_name}...")
                    is_valid = await self._test_video_url(video_url_result)