"""French-Manga.net anime streaming site downloader""" from .base import BaseAnimeSite from bs4 import BeautifulSoup import re from typing import List, Dict, Any from app.utils import sanitize_filename import logging logger = logging.getLogger(__name__) class FrenchMangaDownloader(BaseAnimeSite): """Downloader for french-manga.net anime streaming site""" # Known domains for French-Manga BASE_DOMAINS = [ "french-manga.net", "w16.french-manga.net", "w15.french-manga.net", "www.french-manga.net", ] def __init__(self): super().__init__() self.id = "french-manga" self.base_url = "https://w16.french-manga.net" def can_handle(self, url: str) -> bool: """Check if this downloader can handle the given URL""" return any(domain in url.lower() for domain in self.BASE_DOMAINS) async def search_anime( self, query: str, lang: str = "vostfr" ) -> List[Dict[str, str]]: """ Search for anime on French-Manga. Args: query: Search query (anime title) lang: Language preference (vostfr, vf) Returns: List of anime with title, url, cover_image """ try: # French-Manga uses a search endpoint search_url = f"{self.base_url}/index.php?do=search" params = { "do": "search", "subaction": "search", "story": query, "x": "0", "y": "0", } response = await self.client.post(search_url, data=params) response.raise_for_status() html = response.text soup = BeautifulSoup(html, "lxml") results = [] # Look for search results in article or story classes for item in soup.find_all( "article", class_=lambda x: x and "story" in x.lower() ): title_elem = item.find(["h2", "h3", "h4"]) link_elem = item.find("a", href=True) img_elem = item.find("img") if title_elem and link_elem: title = title_elem.get_text(strip=True) url = link_elem["href"] # Ensure absolute URL if url.startswith("/"): url = self.base_url + url cover_image = "" if img_elem and img_elem.get("src"): cover_image = img_elem["src"] if cover_image.startswith("/"): cover_image = self.base_url + cover_image results.append( { "title": title, "url": url, "cover_image": cover_image, "lang": lang, } ) logger.info(f"Found {len(results)} anime results for query: {query}") return results except Exception as e: logger.error(f"Error searching anime: {e}") return [] async def get_episodes( self, anime_url: str, lang: str = "vostfr" ) -> List[Dict[str, str]]: """ Get episode list for an anime. Args: anime_url: URL of the anime page lang: Language preference Returns: List of episodes with episode_number, url, title """ try: response = await self.client.get(anime_url) response.raise_for_status() html = response.text soup = BeautifulSoup(html, "lxml") episodes = [] # Look for episode links (typically in a list or table) # French-Manga usually has episode links in tags with episode numbers for link in soup.find_all("a", href=True): href = link["href"] text = link.get_text(strip=True) # Pattern: Episode links usually contain "episode" or numbers if re.search(r"episode?\s*\d+", text.lower()): episode_num = re.search(r"(\d+)", text) if episode_num: episode_number = int(episode_num.group(1)) # Ensure absolute URL if href.startswith("/"): href = self.base_url + href episodes.append( { "episode_number": episode_number, "url": href, "title": text, "host": "french-manga", } ) # Sort by episode number episodes.sort(key=lambda x: x["episode_number"]) logger.info(f"Found {len(episodes)} episodes for {anime_url}") return episodes except Exception as e: logger.error(f"Error getting episodes: {e}") return [] async def get_anime_metadata(self, anime_url: str) -> Dict[str, Any]: """ Get detailed metadata for an anime. Args: anime_url: URL of the anime page Returns: Dict with metadata (synopsis, genres, rating, etc.) """ try: response = await self.client.get(anime_url) response.raise_for_status() html = response.text soup = BeautifulSoup(html, "lxml") # Extract title title = "" title_elem = soup.find("h1") or soup.find("h2", class_="title") if title_elem: title = title_elem.get_text(strip=True) # Extract synopsis synopsis = "" synopsis_elem = soup.find( "div", class_=lambda x: x and "story" in x.lower() ) if synopsis_elem: synopsis = synopsis_elem.get_text(strip=True) # Extract cover image poster_image = "" img_elem = soup.find("img", class_=lambda x: x and "poster" in x.lower()) if img_elem and img_elem.get("src"): poster_image = img_elem["src"] if poster_image.startswith("/"): poster_image = self.base_url + poster_image # Extract genres genres = [] genre_links = soup.find_all("a", href=re.compile(r"/xfsearch/.*genre/")) for link in genre_links[:10]: # Limit to 10 genres genre = link.get_text(strip=True) if genre: genres.append(genre) # Extract rating (if available) rating = "" rating_elem = soup.find( ["span", "div"], class_=lambda x: x and "rating" in x.lower() ) if rating_elem: rating = rating_elem.get_text(strip=True) return { "title": title, "synopsis": synopsis, "genres": genres, "rating": rating, "release_year": "", "studio": "", "poster_image": poster_image, "total_episodes": len(await self.get_episodes(anime_url)), "status": "", "languages": ["vf", "vostfr"], } except Exception as e: logger.error(f"Error getting anime metadata: {e}") return { "title": "", "synopsis": "", "genres": [], "rating": "", "release_year": "", "studio": "", "poster_image": "", "total_episodes": 0, "status": "", "languages": ["vf", "vostfr"], } async def get_download_link(self, url: str) -> tuple[str, str]: """ Get download link from episode page. For French-Manga, this returns the video player URL. The actual video extraction will be handled by the video player downloaders. Args: url: Episode page URL Returns: Tuple of (video_player_url, episode_title) """ try: response = await self.client.get(url) response.raise_for_status() html = response.text soup = BeautifulSoup(html, "lxml") # Look for iframe or video player iframe = soup.find("iframe", src=True) if iframe: video_url = iframe["src"] else: # Look for video tag directly video = soup.find("video", src=True) if video: video_url = video["src"] else: # Try to find in script tags scripts = soup.find_all("script") for script in scripts: if script.string: # Look for iframe or video URLs in JavaScript patterns = [ r'iframe.*?src=["\']([^"\']+)["\']', r'video.*?src=["\']([^"\']+)["\']', ] for pattern in patterns: match = re.search(pattern, script.string, re.IGNORECASE) if match: video_url = match.group(1) break if "video_url" in locals(): break if "video_url" not in locals(): raise ValueError("Could not find video player URL") # Ensure absolute URL if video_url.startswith("//"): video_url = "https:" + video_url elif video_url.startswith("/"): video_url = self.base_url + video_url # Extract episode title title_elem = soup.find("h1") or soup.find("h2") episode_title = title_elem.get_text(strip=True) if title_elem else "Episode" episode_title = sanitize_filename(episode_title) logger.info(f"Extracted video player URL: {video_url[:60]}...") return video_url, episode_title except Exception as e: logger.error(f"Error getting download link: {e}") raise ValueError(f"Failed to extract download link: {str(e)}")