from .base import BaseDownloader from bs4 import BeautifulSoup import re import httpx from urllib.parse import urljoin class AnimeUltimeDownloader(BaseDownloader): """Downloader for anime-ultime.net""" BASE_DOMAINS = ["anime-ultime.com", "anime-ultime.net", "www.anime-ultime.net"] def can_handle(self, url: str) -> bool: return any(domain in url.lower() for domain in self.BASE_DOMAINS) async def get_download_link(self, url: str) -> tuple[str, str]: """ Extract download link from anime-ultime URL Anime-Ultime stores video links in og:video meta tags """ try: # Follow redirects response = await self.client.get(url, follow_redirects=True) final_url = str(response.url) # Parse the page soup = BeautifulSoup(response.text, 'lxml') # Method 0: Look for og:video meta tag (most reliable for anime-ultime) og_video = soup.find('meta', property='og:video') if og_video and og_video.get('content'): video_url = og_video['content'] if video_url.endswith('.mp4'): filename = self._generate_filename(final_url) print(f"[ANIME-ULTIME] Found og:video link: {video_url}") return video_url, filename # Method 1: Look for direct download links (DDL) # Anime-Ultime often uses links to file hosts download_links = soup.find_all('a', href=True) for link in download_links: href = link['href'] text = link.get_text().lower() # Look for download buttons/links if any(keyword in text for keyword in ['télécharger', 'download', 'ddl', 'mega', 'google', 'drive']): # Check if it's a direct link or to a file host if any(host in href.lower() for host in ['mega.nz', 'drive.google.com', 'uptobox.com', '1fichier.com']): filename = self._generate_filename(final_url) return href, filename # Method 2: Look for iframe with video player iframes = soup.find_all('iframe') for iframe in iframes: src = iframe.get('src', '') if src and any(provider in src for provider in ['video', 'player', 'stream', 'play']): if src.startswith('http'): filename = self._generate_filename(final_url) return src, filename # Method 3: Look for video tags videos = soup.find_all('video') for video in videos: src = video.get('src', '') if src: filename = self._generate_filename(final_url) return src, filename # Check source tags sources = video.find_all('source') for source in sources: src = source.get('src', '') if src: filename = self._generate_filename(final_url) return src, filename # Method 4: Look in scripts for video URLs scripts = soup.find_all('script') for script in scripts: if script.string: # Look for common video patterns patterns = [ r'(https?://[^"\'>\s]+\.(?:mp4|m3u8|mkv)(?:\?[^"\'>\s]*)?)', r'"url":"([^"]+)"', r'"video":"([^"]+)"', r'"file":"([^"]+)"', r'file:\s*"([^"]+)"', ] for pattern in patterns: matches = re.findall(pattern, script.string) for match in matches: # Clean up escaped characters match = match.replace('\\/', '/').replace('\\', '') if any(ext in match for ext in ['mp4', 'm3u8', 'mkv']): filename = self._generate_filename(final_url) return match, filename # Look for anime-ultime specific patterns # They sometimes store links in JavaScript variables ddl_match = re.search(r'ddl["\']?\s*:\s*["\']([^"\']+)["\']', script.string) if ddl_match: ddl_url = ddl_match.group(1) if ddl_url.startswith('http'): filename = self._generate_filename(final_url) return ddl_url, filename # Method 5: Look for links with specific classes or IDs # Anime-Ultime might use specific class names for download links potential_links = soup.find_all('a', class_=re.compile(r'download|ddl|episode', re.I)) for link in potential_links: href = link.get('href', '') if href and href.startswith('http'): filename = self._generate_filename(final_url) return href, filename # If nothing found, raise error raise Exception("Could not find download link on page") except Exception as e: raise Exception(f"Error extracting Anime-Ultime link: {str(e)}") def _generate_filename(self, url: str) -> str: """Generate filename from URL""" # Extract anime name and episode from URL # URL formats: # - info-0-1/30200 # - info-0-1/30200/Naruto-OAV-01-vostfr # - file-0-1/2991-Naruto-OAV anime_name = "Anime" episode = "01" # Format: info-0-1/EPISODE_ID or info-0-1/EPISODE_ID/NAME-EP-vostfr if 'info-0-1/' in url: # Extract episode ID ep_match = re.search(r'info-0-1/(\d+)', url) if ep_match: ep_id = ep_match.group(1) # Try to get anime name from URL path name_match = re.search(r'info-0-1/\d+/([^/]+)', url) if name_match: raw_name = name_match.group(1) # Extract episode number ep_num_match = re.search(r'-(\d+)-vostfr$', raw_name, re.I) if ep_num_match: episode = ep_num_match.group(1).zfill(2) # Remove episode number and suffix from name anime_name = re.sub(r'-\d+-vostfr$', '', raw_name, flags=re.I).replace('-', ' ') else: # Just use the ID anime_name = f"Episode {ep_id}" else: anime_name = f"Episode {ep_id}" elif 'file-0-1/' in url: # Extract from file-0-1/ID-NAME format file_match = re.search(r'file-0-1/\d+-(.+)$', url) if file_match: anime_name = file_match.group(1).replace('-', ' ') # Sanitize filename anime_name = anime_name.replace('/', ' ').strip() filename = f"{anime_name} - Episode {episode}.mp4" return filename.title() async def get_anime_metadata(self, anime_url: str) -> dict: """ Extract rich metadata from anime page Returns synopsis, genres, rating, release year, studio, etc. """ try: print(f"[ANIME-ULTIME] Extracting metadata from: {anime_url}") response = await self.client.get(anime_url) soup = BeautifulSoup(response.text, 'lxml') metadata = { 'synopsis': None, 'genres': [], 'rating': None, 'release_year': None, 'studio': None, 'poster_image': None, 'banner_image': None, 'total_episodes': None, 'status': None, 'alternative_titles': [] } # Extract synopsis synopsis_selectors = [ 'div.synopsis', 'div.description', 'div[class*="synopsis"]', 'div[class*="synopsis"]', 'p.synopsis', '.info', 'div.texte' ] for selector in synopsis_selectors: synopsis_elem = soup.select_one(selector) if synopsis_elem: synopsis = synopsis_elem.get_text(strip=True) if len(synopsis) > 50: metadata['synopsis'] = synopsis break # Extract genres from meta tags and page content page_text = soup.get_text() # Look for genre in meta tags genre_meta = soup.find('meta', property='genre') or soup.find('meta', attrs={'name': 'genre'}) if genre_meta: genres_text = genre_meta.get('content', '') if genres_text: metadata['genres'] = [g.strip() for g in genres_text.split(',')] # Try to find genre links genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type|cat', re.I)) if genre_links: for link in genre_links[:5]: genre = link.get_text(strip=True) if genre and genre not in metadata['genres']: metadata['genres'].append(genre) # Extract rating rating_selectors = [ 'span.rating', 'div.rating', 'span.score', 'div.note', '.rating' ] for selector in rating_selectors: rating_elem = soup.select_one(selector) if rating_elem: rating_text = rating_elem.get_text(strip=True) rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text) if rating_match: metadata['rating'] = f"{rating_match.group(1)}/10" break rating_match = re.search(r'(\d+\.?\d*)\s*/\s*5', rating_text) if rating_match: rating_val = float(rating_match.group(1)) * 2 metadata['rating'] = f"{rating_val:.1f}/10" break # Extract release year year_match = re.search(r'\b(19\d{2}|20\d{2})\b', page_text) if year_match: import datetime current_year = datetime.datetime.now().year + 2 year = int(year_match.group(1)) if 1950 <= year <= current_year: metadata['release_year'] = year # Extract poster image from og:image og_image = soup.find('meta', property='og:image') if og_image: metadata['poster_image'] = og_image.get('content') # Extract total episodes episodes_count = len(await self.get_episodes(anime_url)) if episodes_count > 0: metadata['total_episodes'] = episodes_count print(f"[ANIME-ULTIME] Extracted metadata: {metadata}") return metadata except Exception as e: print(f"[ANIME-ULTIME] Error extracting metadata: {e}") return {} async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]: """ Search for anime on anime-ultime Returns list of anime with title, url, and cover image Args: query: Search query string lang: Language preference (vostfr, vf) include_metadata: Whether to fetch full metadata for each result (slower) """ try: import time start = time.time() print(f"[ANIME-ULTIME] Searching for '{query}' ({lang})...") # Anime-Ultime uses POST for search search_url = "https://www.anime-ultime.net/search-0-1" response = await self.client.post(search_url, data={'search': query}) soup = BeautifulSoup(response.text, 'lxml') elapsed = time.time() - start print(f"[ANIME-ULTIME] Got response {response.status_code} in {elapsed:.2f}s") results = [] # Look for search result links - better parsing # Search results use file-0-1/ pattern, not info- search_results = soup.find_all('a', href=re.compile(r'file-0-1/')) seen_urls = set() for result in search_results[:10]: # Limit to 10 results href = result.get('href', '') raw_title = result.get_text().strip() # Skip if no href if not href: continue # Skip duplicates if href in seen_urls: continue seen_urls.add(href) # Extract better title from URL or parent elements better_title = raw_title # If raw_title is just "Télécharger" or similar, try to find better title if len(raw_title) < 5 or raw_title.lower() in ['télécharger', 'download', 'ddl']: # Try to extract from URL (file-0-1/ID-Title format) url_match = re.search(r'file-0-1/\d+-(.+)$', href) if url_match: better_title = url_match.group(1).replace('-', ' ').title() # If still no good title, look at parent/row elements if len(better_title) < 5: # Check parent row (table structure) row = result.find_parent(['tr', 'td', 'div']) if row: # Look for text in the row that's not the link text row_text = row.get_text().strip() # Remove the link text from row text if raw_title in row_text: row_text = row_text.replace(raw_title, '').strip() if len(row_text) > 5 and len(row_text) < 100: better_title = row_text # Make URL absolute if not href.startswith('http'): href = urljoin("https://www.anime-ultime.net/", href) result_item = { 'title': better_title, 'url': href, 'type': 'search_result', 'metadata': None } # Fetch metadata if requested if include_metadata: metadata = await self.get_anime_metadata(href) result_item['metadata'] = metadata results.append(result_item) print(f"[ANIME-ULTIME] Found {len(results)} results") return results except Exception as e: print(f"[ANIME-ULTIME] Error: {e}") return [] async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]: """ Get list of episodes for an anime Returns list of episode numbers and their URLs """ try: response = await self.client.get(anime_url) soup = BeautifulSoup(response.text, 'lxml') episodes = [] # Look for episode links - anime-ultime uses info-XXXXX-Name-XX-vostfr format # The URL pattern is info-0-1/ID-Anime-Name-XX-vostfr where XX is episode number episode_links = soup.find_all('a', href=re.compile(r'info-0-1/\d+')) for link in episode_links: href = link.get('href', '') text = link.get_text().strip() # Extract episode number from URL pattern # Matches: info-0-1/30200/Naruto-OAV-01-vostfr match = re.search(r'-(\d+)-vostfr$', href, re.I) if not match: # Try other patterns match = re.search(r'Episode[-\s]?(\d+)', href, re.I) if not match: # Try to extract from text match = re.search(r'(\d+)', text) if match: episode_num = match.group(1).zfill(2) # Pad with zero # Extract the episode ID from href and build correct URL # href might be "info-0-1/30200" or "info-0-1/30200/..." # We need: https://www.anime-ultime.net/info-0-1/30200 ep_id_match = re.search(r'info-0-1/(\d+)', href) if ep_id_match: ep_id = ep_id_match.group(1) # Build the correct episode URL episode_url = f"https://www.anime-ultime.net/info-0-1/{ep_id}" else: # Fallback to making URL absolute if not href.startswith('http'): href = urljoin(anime_url, href) episode_url = href episodes.append({ 'episode': episode_num, 'url': episode_url, 'title': text }) # Remove duplicates and sort seen = set() unique_episodes = [] for ep in episodes: if ep['episode'] not in seen: seen.add(ep['episode']) unique_episodes.append(ep) unique_episodes.sort(key=lambda x: int(x['episode'])) return unique_episodes except Exception as e: print(f"Error getting episodes: {e}") return []