docs: Update CLAUDE.md with three-tier architecture and new providers

- Added new video players: Vidzy, LuLuvid, Uqload - Added new anime site: French-Manga - Added new series sites category with FS7 - Updated documentation to reflect three-tier architecture (anime sites → series sites → video players) - Added BaseSeriesSite interface documentation - Added "Adding New Series Site" section - Updated test organization with test_french_manga.py Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
2026-01-25 10:34:39 +00:00
parent 3afad41d46
commit 4d280b5239
16 changed files with 1507 additions and 53 deletions
@@ -0,0 +1,299 @@
+"""French-Manga.net anime streaming site downloader"""
+from .base import BaseAnimeSite
+from bs4 import BeautifulSoup
+import re
+from typing import List, Dict, Any
+from app.utils import sanitize_filename
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class FrenchMangaDownloader(BaseAnimeSite):
+    """Downloader for french-manga.net anime streaming site"""
+
+    # Known domains for French-Manga
+    BASE_DOMAINS = [
+        "french-manga.net",
+        "w16.french-manga.net",
+        "w15.french-manga.net",
+        "www.french-manga.net"
+    ]
+
+    def __init__(self):
+        super().__init__()
+        self.base_url = "https://w16.french-manga.net"
+
+    def can_handle(self, url: str) -> bool:
+        """Check if this downloader can handle the given URL"""
+        return any(domain in url.lower() for domain in self.BASE_DOMAINS)
+
+    async def search_anime(
+        self,
+        query: str,
+        lang: str = "vostfr"
+    ) -> List[Dict[str, str]]:
+        """
+        Search for anime on French-Manga.
+
+        Args:
+            query: Search query (anime title)
+            lang: Language preference (vostfr, vf)
+
+        Returns:
+            List of anime with title, url, cover_image
+        """
+        try:
+            # French-Manga uses a search endpoint
+            search_url = f"{self.base_url}/index.php?do=search"
+            params = {
+                'do': 'search',
+                'subaction': 'search',
+                'story': query,
+                'x': '0',
+                'y': '0'
+            }
+
+            response = await self.client.post(search_url, data=params)
+            response.raise_for_status()
+            html = response.text
+
+            soup = BeautifulSoup(html, 'lxml')
+            results = []
+
+            # Look for search results in article or story classes
+            for item in soup.find_all('article', class_=lambda x: x and 'story' in x.lower()):
+                title_elem = item.find(['h2', 'h3', 'h4'])
+                link_elem = item.find('a', href=True)
+                img_elem = item.find('img')
+
+                if title_elem and link_elem:
+                    title = title_elem.get_text(strip=True)
+                    url = link_elem['href']
+
+                    # Ensure absolute URL
+                    if url.startswith('/'):
+                        url = self.base_url + url
+
+                    cover_image = ""
+                    if img_elem and img_elem.get('src'):
+                        cover_image = img_elem['src']
+                        if cover_image.startswith('/'):
+                            cover_image = self.base_url + cover_image
+
+                    results.append({
+                        'title': title,
+                        'url': url,
+                        'cover_image': cover_image,
+                        'lang': lang
+                    })
+
+            logger.info(f"Found {len(results)} anime results for query: {query}")
+            return results
+
+        except Exception as e:
+            logger.error(f"Error searching anime: {e}")
+            return []
+
+    async def get_episodes(
+        self,
+        anime_url: str,
+        lang: str = "vostfr"
+    ) -> List[Dict[str, str]]:
+        """
+        Get episode list for an anime.
+
+        Args:
+            anime_url: URL of the anime page
+            lang: Language preference
+
+        Returns:
+            List of episodes with episode_number, url, title
+        """
+        try:
+            response = await self.client.get(anime_url)
+            response.raise_for_status()
+            html = response.text
+
+            soup = BeautifulSoup(html, 'lxml')
+            episodes = []
+
+            # Look for episode links (typically in a list or table)
+            # French-Manga usually has episode links in <a> tags with episode numbers
+            for link in soup.find_all('a', href=True):
+                href = link['href']
+                text = link.get_text(strip=True)
+
+                # Pattern: Episode links usually contain "episode" or numbers
+                if re.search(r'episode?\s*\d+', text.lower()):
+                    episode_num = re.search(r'(\d+)', text)
+                    if episode_num:
+                        episode_number = int(episode_num.group(1))
+
+                        # Ensure absolute URL
+                        if href.startswith('/'):
+                            href = self.base_url + href
+
+                        episodes.append({
+                            'episode_number': episode_number,
+                            'url': href,
+                            'title': text,
+                            'host': 'french-manga'
+                        })
+
+            # Sort by episode number
+            episodes.sort(key=lambda x: x['episode_number'])
+
+            logger.info(f"Found {len(episodes)} episodes for {anime_url}")
+            return episodes
+
+        except Exception as e:
+            logger.error(f"Error getting episodes: {e}")
+            return []
+
+    async def get_anime_metadata(self, anime_url: str) -> Dict[str, Any]:
+        """
+        Get detailed metadata for an anime.
+
+        Args:
+            anime_url: URL of the anime page
+
+        Returns:
+            Dict with metadata (synopsis, genres, rating, etc.)
+        """
+        try:
+            response = await self.client.get(anime_url)
+            response.raise_for_status()
+            html = response.text
+
+            soup = BeautifulSoup(html, 'lxml')
+
+            # Extract title
+            title = ""
+            title_elem = soup.find('h1') or soup.find('h2', class_='title')
+            if title_elem:
+                title = title_elem.get_text(strip=True)
+
+            # Extract synopsis
+            synopsis = ""
+            synopsis_elem = soup.find('div', class_=lambda x: x and 'story' in x.lower())
+            if synopsis_elem:
+                synopsis = synopsis_elem.get_text(strip=True)
+
+            # Extract cover image
+            poster_image = ""
+            img_elem = soup.find('img', class_=lambda x: x and 'poster' in x.lower())
+            if img_elem and img_elem.get('src'):
+                poster_image = img_elem['src']
+                if poster_image.startswith('/'):
+                    poster_image = self.base_url + poster_image
+
+            # Extract genres
+            genres = []
+            genre_links = soup.find_all('a', href=re.compile(r'/xfsearch/.*genre/'))
+            for link in genre_links[:10]:  # Limit to 10 genres
+                genre = link.get_text(strip=True)
+                if genre:
+                    genres.append(genre)
+
+            # Extract rating (if available)
+            rating = ""
+            rating_elem = soup.find(['span', 'div'], class_=lambda x: x and 'rating' in x.lower())
+            if rating_elem:
+                rating = rating_elem.get_text(strip=True)
+
+            return {
+                'title': title,
+                'synopsis': synopsis,
+                'genres': genres,
+                'rating': rating,
+                'release_year': '',
+                'studio': '',
+                'poster_image': poster_image,
+                'total_episodes': len(await self.get_episodes(anime_url)),
+                'status': '',
+                'languages': ['vf', 'vostfr']
+            }
+
+        except Exception as e:
+            logger.error(f"Error getting anime metadata: {e}")
+            return {
+                'title': '',
+                'synopsis': '',
+                'genres': [],
+                'rating': '',
+                'release_year': '',
+                'studio': '',
+                'poster_image': '',
+                'total_episodes': 0,
+                'status': '',
+                'languages': ['vf', 'vostfr']
+            }
+
+    async def get_download_link(self, url: str) -> tuple[str, str]:
+        """
+        Get download link from episode page.
+
+        For French-Manga, this returns the video player URL.
+        The actual video extraction will be handled by the video player downloaders.
+
+        Args:
+            url: Episode page URL
+
+        Returns:
+            Tuple of (video_player_url, episode_title)
+        """
+        try:
+            response = await self.client.get(url)
+            response.raise_for_status()
+            html = response.text
+
+            soup = BeautifulSoup(html, 'lxml')
+
+            # Look for iframe or video player
+            iframe = soup.find('iframe', src=True)
+            if iframe:
+                video_url = iframe['src']
+            else:
+                # Look for video tag directly
+                video = soup.find('video', src=True)
+                if video:
+                    video_url = video['src']
+                else:
+                    # Try to find in script tags
+                    scripts = soup.find_all('script')
+                    for script in scripts:
+                        if script.string:
+                            # Look for iframe or video URLs in JavaScript
+                            patterns = [
+                                r'iframe.*?src=["\']([^"\']+)["\']',
+                                r'video.*?src=["\']([^"\']+)["\']',
+                            ]
+                            for pattern in patterns:
+                                match = re.search(pattern, script.string, re.IGNORECASE)
+                                if match:
+                                    video_url = match.group(1)
+                                    break
+                            if 'video_url' in locals():
+                                break
+
+                    if 'video_url' not in locals():
+                        raise ValueError("Could not find video player URL")
+
+            # Ensure absolute URL
+            if video_url.startswith('//'):
+                video_url = 'https:' + video_url
+            elif video_url.startswith('/'):
+                video_url = self.base_url + video_url
+
+            # Extract episode title
+            title_elem = soup.find('h1') or soup.find('h2')
+            episode_title = title_elem.get_text(strip=True) if title_elem else "Episode"
+            episode_title = sanitize_filename(episode_title)
+
+            logger.info(f"Extracted video player URL: {video_url[:60]}...")
+            return video_url, episode_title
+
+        except Exception as e:
+            logger.error(f"Error getting download link: {e}")
+            raise ValueError(f"Failed to extract download link: {str(e)}")