docs: Update CLAUDE.md with three-tier architecture and new providers

- Added new video players: Vidzy, LuLuvid, Uqload - Added new anime site: French-Manga - Added new series sites category with FS7 - Updated documentation to reflect three-tier architecture (anime sites → series sites → video players) - Added BaseSeriesSite interface documentation - Added "Adding New Series Site" section - Updated test organization with test_french_manga.py Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <[email protected]> Co-Authored-By: Happy <[email protected]>
2026-01-25 10:34:39 +00:00
parent 3afad41d46
commit 4d280b5239
16 changed files with 1507 additions and 53 deletions
@@ -0,0 +1,23 @@
+"""Series streaming sites (catalogs) downloaders"""
+from .base import BaseSeriesSite
+# Import all series site downloaders
+from .fs7 import FS7Downloader
+
+__all__ = [
+    "BaseSeriesSite",
+    "FS7Downloader",
+]
+
+
+def get_series_site(url: str) -> BaseSeriesSite:
+    """Factory function to get the appropriate series site for a URL"""
+    sites = [
+        FS7Downloader(),
+    ]
+
+    for site in sites:
+        if site.can_handle(url):
+            return site
+
+    # Return None if no match (should not happen in normal flow)
+    return None
@@ -0,0 +1,131 @@
+"""Base class for series streaming sites (catalogs)"""
+from abc import abstractmethod
+from typing import List, Dict, Any, Optional, Tuple
+import logging
+import httpx
+from bs4 import BeautifulSoup
+
+logger = logging.getLogger(__name__)
+
+
+class BaseSeriesSite:
+    """
+    Base class for series streaming sites.
+
+    Series sites provide catalogs, metadata, and episode listings.
+    They typically link to video players for actual file hosting.
+
+    Examples: FS7 (French Stream), etc.
+
+    KEY FEATURE: Provides rich metadata and episode management for TV series
+    """
+
+    def __init__(self):
+        # Initialize HTTP client directly
+        self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True)
+
+    @abstractmethod
+    def can_handle(self, url: str) -> bool:
+        """Check if this series site can handle the given URL"""
+        pass
+
+    @abstractmethod
+    async def search_anime(
+        self,
+        query: str,
+        lang: str = "vf"
+    ) -> List[Dict[str, str]]:
+        """
+        Search for series on this site.
+
+        Args:
+            query: Search query (series title)
+            lang: Language preference (vf, vostfr)
+
+        Returns:
+            List of series with keys:
+                - title: Series title
+                - url: Series page URL
+                - cover_image: Optional cover image URL
+                - lang: Available languages
+        """
+        pass
+
+    @abstractmethod
+    async def get_episodes(
+        self,
+        anime_url: str,
+        lang: str = "vf"
+    ) -> List[Dict[str, str]]:
+        """
+        Get list of episodes for a series.
+
+        Args:
+            anime_url: URL of the series page
+            lang: Language preference
+
+        Returns:
+            List of episodes with keys:
+                - episode_number: Episode number
+                - url: Episode page URL
+                - title: Optional episode title
+                - host: Video player hosting the file
+        """
+        pass
+
+    @abstractmethod
+    async def get_anime_metadata(self, anime_url: str) -> Dict[str, Any]:
+        """
+        Get detailed metadata for a series.
+
+        Args:
+            anime_url: URL of the series page
+
+        Returns:
+            Dict with metadata:
+                - title: Series title
+                - synopsis: Plot summary
+                - genres: List of genres
+                - rating: Rating (e.g., "8.5/10")
+                - release_year: Release year
+                - studio: Production studio
+                - poster_image: Poster URL
+                - total_episodes: Total episode count
+                - status: Airing status (ongoing, completed)
+                - languages: Available languages
+        """
+        pass
+
+    @abstractmethod
+    async def get_download_link(self, url: str) -> Tuple[str, str]:
+        """
+        Get download link for a specific episode.
+
+        For series sites, this extracts the video player URL from an episode page.
+        Note: Returns video player URL, NOT direct download link!
+
+        Returns:
+            Tuple of (video_player_url, episode_title)
+        """
+        pass
+
+    # Common methods for all series sites
+    async def close(self):
+        """Close HTTP client"""
+        await self.client.aclose()
+
+    async def _fetch_page(self, url: str) -> str:
+        """Fetch HTML page content"""
+        response = await self.client.get(url)
+        response.raise_for_status()
+        return response.text
+
+    def _parse_html(self, html: str) -> BeautifulSoup:
+        """Parse HTML with BeautifulSoup"""
+        return BeautifulSoup(html, 'lxml')
+
+    def _extract_season_number(self, title: str) -> Optional[int]:
+        """Extract season number from title (e.g., 'Saison 2' -> 2)"""
+        import re
+        match = re.search(r'saison\s*(\d+)', title.lower())
+        return int(match.group(1)) if match else None
@@ -0,0 +1,262 @@
+"""FS7 (French Stream) series site downloader"""
+import logging
+import re
+from typing import List, Dict, Any, Optional
+from urllib.parse import urljoin, urlparse
+from bs4 import BeautifulSoup
+from app.utils import sanitize_filename
+from .base import BaseSeriesSite
+
+logger = logging.getLogger(__name__)
+
+
+class FS7Downloader(BaseSeriesSite):
+    """
+    Downloader for FS7 (French Stream) series site.
+
+    FS7 is a French streaming site for TV series and films.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.base_url = "https://fs7.lol"
+        self.search_url = f"{self.base_url}/"
+        # Update client headers to mimic browser
+        self.client.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
+            'Accept-Encoding': 'gzip, deflate',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1'
+        })
+
+    def can_handle(self, url: str) -> bool:
+        """Check if this downloader can handle the given URL"""
+        return "fs7.lol" in url.lower() or "french-stream" in url.lower()
+
+    async def search_anime(
+        self,
+        query: str,
+        lang: str = "vf"
+    ) -> List[Dict[str, str]]:
+        """
+        Search for series on FS7.
+
+        Args:
+            query: Search query
+            lang: Language preference (vf, vostfr)
+
+        Returns:
+            List of series with title, url, cover_image
+        """
+        try:
+            logger.info(f"Searching FS7 for: {query}")
+
+            # FS7 uses GET request with query parameters for search
+            response = await self.client.get(
+                self.search_url,
+                params={
+                    "do": "search",
+                    "subaction": "search",
+                    "story": query
+                }
+            )
+            response.raise_for_status()
+            html = response.text
+
+            soup = BeautifulSoup(html, 'lxml')
+            results = []
+
+            # Look for series items (FS7 has both films and series in search results)
+            # We filter for /s-tv/ URLs ending with .html (actual series/season pages)
+            items = soup.find_all('a', href=re.compile(r'/s-tv/\d+-.+\.html'))
+
+            for item in items[:20]:  # Limit to 20 results
+                url = item.get('href', '')
+                if not url.startswith('http'):
+                    url = urljoin(self.base_url, url)
+
+                # Extract title from the item
+                title_elem = item.find('img', alt=True)
+                if title_elem:
+                    title = title_elem.get('alt', '').strip()
+                else:
+                    # Get text content and clean it
+                    text = item.get_text(strip=True)
+                    # Skip if it's just a category name
+                    if any(cat in text.lower() for cat in ['séries', 'series', 'vf', 'vostfr', 'vo', 'netflix', 'disney', 'amazon', 'apple']):
+                        continue
+                    title = text
+
+                # Extract cover image
+                img = item.find('img')
+                cover_image = img.get('src', '') if img else ''
+
+                # Only add if we have a title and it's not empty
+                if title and len(title) > 5:
+                    # Avoid duplicates
+                    if not any(r['url'] == url for r in results):
+                        results.append({
+                            'title': title,
+                            'url': url,
+                            'cover_image': cover_image
+                        })
+
+            logger.info(f"Found {len(results)} series on FS7")
+            return results
+
+        except Exception as e:
+            logger.error(f"Error searching FS7: {e}")
+            return []
+
+    async def get_episodes(
+        self,
+        anime_url: str,
+        lang: str = "vf"
+    ) -> List[Dict[str, str]]:
+        """
+        Get episode list for a series.
+
+        Args:
+            anime_url: URL of the series page
+            lang: Language preference
+
+        Returns:
+            List of episodes with episode number and url
+        """
+        try:
+            logger.info(f"Fetching episodes from: {anime_url}")
+
+            response = await self.client.get(anime_url)
+            response.raise_for_status()
+            html = response.text
+
+            soup = BeautifulSoup(html, 'lxml')
+            episodes = []
+
+            # FS7 stores episode data in JavaScript div elements
+            # Format: <div data-ep="1" data-vidzy="..." data-uqload="..." data-netu="..." data-voe="..."></div>
+            episode_divs = soup.find_all('div', attrs={'data-ep': True})
+
+            for div in episode_divs:
+                ep_num = div.get('data-ep', '').strip()
+
+                # Try different video players in order of preference
+                video_url = None
+                for player in ['data-vidzy', 'data-uqload', 'data-voe', 'data-netu']:
+                    player_url = div.get(player, '').strip()
+                    if player_url:
+                        video_url = player_url
+                        logger.debug(f"Found episode {ep_num} on {player}")
+                        break
+
+                if video_url and ep_num:
+                    episodes.append({
+                        'episode': ep_num,
+                        'url': video_url
+                    })
+
+            # Sort by episode number
+            episodes.sort(key=lambda x: int(x['episode']) if x['episode'].isdigit() else 0)
+
+            logger.info(f"Found {len(episodes)} episodes")
+            return episodes
+
+        except Exception as e:
+            logger.error(f"Error getting episodes from FS7: {e}")
+            return []
+
+    async def get_anime_metadata(
+        self,
+        anime_url: str
+    ) -> Dict[str, Any]:
+        """
+        Get metadata for a series.
+
+        Args:
+            anime_url: URL of the series page
+
+        Returns:
+            Dictionary with metadata
+        """
+        try:
+            logger.info(f"Fetching metadata from: {anime_url}")
+
+            response = await self.client.get(anime_url)
+            response.raise_for_status()
+            html = response.text
+
+            soup = BeautifulSoup(html, 'lxml')
+
+            # Extract title
+            title = soup.find('h1')
+            title = title.get_text(strip=True) if title else "Unknown"
+
+            # Extract description/synopsis
+            description_elem = soup.find('div', class_='full-text')
+            description = description_elem.get_text(strip=True) if description_elem else ""
+
+            # Extract cover image
+            img = soup.find('img', class_='poster')
+            poster_image = img.get('src', '') if img else ''
+
+            # Try to get poster from meta tag if not found
+            if not poster_image:
+                meta_img = soup.find('meta', property='og:image')
+                poster_image = meta_img.get('content', '') if meta_img else ''
+
+            # Extract year
+            year_match = re.search(r'\b(19|20)\d{2}\b', description)
+            release_year = int(year_match.group()) if year_match else None
+
+            return {
+                'title': title,
+                'synopsis': description,
+                'poster_image': poster_image,
+                'release_year': release_year,
+                'genres': [],
+                'rating': None,
+                'studio': None,
+                'total_episodes': None,
+                'status': None
+            }
+
+        except Exception as e:
+            logger.error(f"Error getting metadata from FS7: {e}")
+            return {
+                'title': "Unknown",
+                'synopsis': "",
+                'poster_image': '',
+                'genres': [],
+                'rating': None,
+                'release_year': None,
+                'studio': None,
+                'total_episodes': None,
+                'status': None
+            }
+
+    async def get_download_link(
+        self,
+        url: str,
+        target_filename: Optional[str] = None
+    ) -> tuple[str, str]:
+        """
+        Extract download link from video player URL.
+
+        Args:
+            url: Video player URL
+            target_filename: Optional filename override
+
+        Returns:
+            Tuple of (download_url, filename)
+        """
+        # FS7 uses embedded video players
+        # Delegate to the appropriate video player downloader
+        from app.downloaders.video_players import get_video_player
+
+        player = get_video_player(url)
+        if player:
+            return await player.get_download_link(url, target_filename)
+        else:
+            raise ValueError(f"No video player found for URL: {url}")