"""French-Manga.net anime streaming site downloader""" from .base import BaseAnimeSite from bs4 import BeautifulSoup import re from typing import List, Dict, Any from app.utils import sanitize_filename import logging logger = logging.getLogger(__name__) class FrenchMangaDownloader(BaseAnimeSite): """Downloader for french-manga.net anime streaming site""" # Known domains for French-Manga BASE_DOMAINS = [ "french-manga.net", "w16.french-manga.net", "w15.french-manga.net", "www.french-manga.net" ] def __init__(self): super().__init__() self.base_url = "https://w16.french-manga.net" def can_handle(self, url: str) -> bool: """Check if this downloader can handle the given URL""" return any(domain in url.lower() for domain in self.BASE_DOMAINS) async def search_anime( self, query: str, lang: str = "vostfr" ) -> List[Dict[str, str]]: """ Search for anime on French-Manga. Args: query: Search query (anime title) lang: Language preference (vostfr, vf) Returns: List of anime with title, url, cover_image """ try: # French-Manga uses a search endpoint search_url = f"{self.base_url}/index.php?do=search" params = { 'do': 'search', 'subaction': 'search', 'story': query, 'x': '0', 'y': '0' } response = await self.client.post(search_url, data=params) response.raise_for_status() html = response.text soup = BeautifulSoup(html, 'lxml') results = [] # Look for search results in article or story classes for item in soup.find_all('article', class_=lambda x: x and 'story' in x.lower()): title_elem = item.find(['h2', 'h3', 'h4']) link_elem = item.find('a', href=True) img_elem = item.find('img') if title_elem and link_elem: title = title_elem.get_text(strip=True) url = link_elem['href'] # Ensure absolute URL if url.startswith('/'): url = self.base_url + url cover_image = "" if img_elem and img_elem.get('src'): cover_image = img_elem['src'] if cover_image.startswith('/'): cover_image = self.base_url + cover_image results.append({ 'title': title, 'url': url, 'cover_image': cover_image, 'lang': lang }) logger.info(f"Found {len(results)} anime results for query: {query}") return results except Exception as e: logger.error(f"Error searching anime: {e}") return [] async def get_episodes( self, anime_url: str, lang: str = "vostfr" ) -> List[Dict[str, str]]: """ Get episode list for an anime. Args: anime_url: URL of the anime page lang: Language preference Returns: List of episodes with episode_number, url, title """ try: response = await self.client.get(anime_url) response.raise_for_status() html = response.text soup = BeautifulSoup(html, 'lxml') episodes = [] # Look for episode links (typically in a list or table) # French-Manga usually has episode links in tags with episode numbers for link in soup.find_all('a', href=True): href = link['href'] text = link.get_text(strip=True) # Pattern: Episode links usually contain "episode" or numbers if re.search(r'episode?\s*\d+', text.lower()): episode_num = re.search(r'(\d+)', text) if episode_num: episode_number = int(episode_num.group(1)) # Ensure absolute URL if href.startswith('/'): href = self.base_url + href episodes.append({ 'episode_number': episode_number, 'url': href, 'title': text, 'host': 'french-manga' }) # Sort by episode number episodes.sort(key=lambda x: x['episode_number']) logger.info(f"Found {len(episodes)} episodes for {anime_url}") return episodes except Exception as e: logger.error(f"Error getting episodes: {e}") return [] async def get_anime_metadata(self, anime_url: str) -> Dict[str, Any]: """ Get detailed metadata for an anime. Args: anime_url: URL of the anime page Returns: Dict with metadata (synopsis, genres, rating, etc.) """ try: response = await self.client.get(anime_url) response.raise_for_status() html = response.text soup = BeautifulSoup(html, 'lxml') # Extract title title = "" title_elem = soup.find('h1') or soup.find('h2', class_='title') if title_elem: title = title_elem.get_text(strip=True) # Extract synopsis synopsis = "" synopsis_elem = soup.find('div', class_=lambda x: x and 'story' in x.lower()) if synopsis_elem: synopsis = synopsis_elem.get_text(strip=True) # Extract cover image poster_image = "" img_elem = soup.find('img', class_=lambda x: x and 'poster' in x.lower()) if img_elem and img_elem.get('src'): poster_image = img_elem['src'] if poster_image.startswith('/'): poster_image = self.base_url + poster_image # Extract genres genres = [] genre_links = soup.find_all('a', href=re.compile(r'/xfsearch/.*genre/')) for link in genre_links[:10]: # Limit to 10 genres genre = link.get_text(strip=True) if genre: genres.append(genre) # Extract rating (if available) rating = "" rating_elem = soup.find(['span', 'div'], class_=lambda x: x and 'rating' in x.lower()) if rating_elem: rating = rating_elem.get_text(strip=True) return { 'title': title, 'synopsis': synopsis, 'genres': genres, 'rating': rating, 'release_year': '', 'studio': '', 'poster_image': poster_image, 'total_episodes': len(await self.get_episodes(anime_url)), 'status': '', 'languages': ['vf', 'vostfr'] } except Exception as e: logger.error(f"Error getting anime metadata: {e}") return { 'title': '', 'synopsis': '', 'genres': [], 'rating': '', 'release_year': '', 'studio': '', 'poster_image': '', 'total_episodes': 0, 'status': '', 'languages': ['vf', 'vostfr'] } async def get_download_link(self, url: str) -> tuple[str, str]: """ Get download link from episode page. For French-Manga, this returns the video player URL. The actual video extraction will be handled by the video player downloaders. Args: url: Episode page URL Returns: Tuple of (video_player_url, episode_title) """ try: response = await self.client.get(url) response.raise_for_status() html = response.text soup = BeautifulSoup(html, 'lxml') # Look for iframe or video player iframe = soup.find('iframe', src=True) if iframe: video_url = iframe['src'] else: # Look for video tag directly video = soup.find('video', src=True) if video: video_url = video['src'] else: # Try to find in script tags scripts = soup.find_all('script') for script in scripts: if script.string: # Look for iframe or video URLs in JavaScript patterns = [ r'iframe.*?src=["\']([^"\']+)["\']', r'video.*?src=["\']([^"\']+)["\']', ] for pattern in patterns: match = re.search(pattern, script.string, re.IGNORECASE) if match: video_url = match.group(1) break if 'video_url' in locals(): break if 'video_url' not in locals(): raise ValueError("Could not find video player URL") # Ensure absolute URL if video_url.startswith('//'): video_url = 'https:' + video_url elif video_url.startswith('/'): video_url = self.base_url + video_url # Extract episode title title_elem = soup.find('h1') or soup.find('h2') episode_title = title_elem.get_text(strip=True) if title_elem else "Episode" episode_title = sanitize_filename(episode_title) logger.info(f"Extracted video player URL: {video_url[:60]}...") return video_url, episode_title except Exception as e: logger.error(f"Error getting download link: {e}") raise ValueError(f"Failed to extract download link: {str(e)}")