from .base import BaseAnimeSite from bs4 import BeautifulSoup import re import httpx from urllib.parse import urljoin class AnimeUltimeDownloader(BaseAnimeSite): """Downloader for anime-ultime.net""" BASE_DOMAINS = ["anime-ultime.com", "anime-ultime.net", "www.anime-ultime.net"] def __init__(self): super().__init__() self.id = "anime-ultime" def can_handle(self, url: str) -> bool: return any(domain in url.lower() for domain in self.BASE_DOMAINS) async def get_download_link(self, url: str) -> tuple[str, str]: """ Extract download link from anime-ultime URL Anime-Ultime stores video links in og:video meta tags """ try: # Follow redirects response = await self.client.get(url, follow_redirects=True) final_url = str(response.url) # Parse the page soup = BeautifulSoup(response.text, "lxml") # Method 0: Look for og:video meta tag (most reliable for anime-ultime) og_video = soup.find("meta", property="og:video") if og_video and og_video.get("content"): video_url = og_video["content"] if video_url.endswith(".mp4"): filename = self._generate_filename(final_url) print(f"[ANIME-ULTIME] Found og:video link: {video_url}") return video_url, filename # Method 1: Look for direct download links (DDL) # Anime-Ultime often uses links to file hosts download_links = soup.find_all("a", href=True) for link in download_links: href = link["href"] text = link.get_text().lower() # Look for download buttons/links if any( keyword in text for keyword in [ "télécharger", "download", "ddl", "mega", "google", "drive", ] ): # Check if it's a direct link or to a file host if any( host in href.lower() for host in [ "mega.nz", "drive.google.com", "uptobox.com", "1fichier.com", ] ): filename = self._generate_filename(final_url) return href, filename # Method 2: Look for iframe with video player iframes = soup.find_all("iframe") for iframe in iframes: src = iframe.get("src", "") if src and any( provider in src for provider in ["video", "player", "stream", "play"] ): if src.startswith("http"): filename = self._generate_filename(final_url) return src, filename # Method 3: Look for video tags videos = soup.find_all("video") for video in videos: src = video.get("src", "") if src: filename = self._generate_filename(final_url) return src, filename # Check source tags sources = video.find_all("source") for source in sources: src = source.get("src", "") if src: filename = self._generate_filename(final_url) return src, filename # Method 4: Look in scripts for video URLs scripts = soup.find_all("script") for script in scripts: if script.string: # Look for common video patterns patterns = [ r'(https?://[^"\'>\s]+\.(?:mp4|m3u8|mkv)(?:\?[^"\'>\s]*)?)', r'"url":"([^"]+)"', r'"video":"([^"]+)"', r'"file":"([^"]+)"', r'file:\s*"([^"]+)"', ] for pattern in patterns: matches = re.findall(pattern, script.string) for match in matches: # Clean up escaped characters match = match.replace("\\/", "/").replace("\\", "") if any(ext in match for ext in ["mp4", "m3u8", "mkv"]): filename = self._generate_filename(final_url) return match, filename # Look for anime-ultime specific patterns # They sometimes store links in JavaScript variables ddl_match = re.search( r'ddl["\']?\s*:\s*["\']([^"\']+)["\']', script.string ) if ddl_match: ddl_url = ddl_match.group(1) if ddl_url.startswith("http"): filename = self._generate_filename(final_url) return ddl_url, filename # Method 5: Look for links with specific classes or IDs # Anime-Ultime might use specific class names for download links potential_links = soup.find_all( "a", class_=re.compile(r"download|ddl|episode", re.I) ) for link in potential_links: href = link.get("href", "") if href and href.startswith("http"): filename = self._generate_filename(final_url) return href, filename # If nothing found, raise error raise Exception("Could not find download link on page") except Exception as e: raise Exception(f"Error extracting Anime-Ultime link: {str(e)}") def _generate_filename(self, url: str) -> str: """Generate filename from URL""" # Extract anime name and episode from URL # URL formats: # - info-0-1/30200 # - info-0-1/30200/Naruto-OAV-01-vostfr # - file-0-1/2991-Naruto-OAV anime_name = "Anime" episode = "01" # Format: info-0-1/EPISODE_ID or info-0-1/EPISODE_ID/NAME-EP-vostfr if "info-0-1/" in url: # Extract episode ID ep_match = re.search(r"info-0-1/(\d+)", url) if ep_match: ep_id = ep_match.group(1) # Try to get anime name from URL path name_match = re.search(r"info-0-1/\d+/([^/]+)", url) if name_match: raw_name = name_match.group(1) # Extract episode number ep_num_match = re.search(r"-(\d+)-vostfr$", raw_name, re.I) if ep_num_match: episode = ep_num_match.group(1).zfill(2) # Remove episode number and suffix from name anime_name = re.sub( r"-\d+-vostfr$", "", raw_name, flags=re.I ).replace("-", " ") else: # Just use the ID anime_name = f"Episode {ep_id}" else: anime_name = f"Episode {ep_id}" elif "file-0-1/" in url: # Extract from file-0-1/ID-NAME format file_match = re.search(r"file-0-1/\d+-(.+)$", url) if file_match: anime_name = file_match.group(1).replace("-", " ") # Sanitize filename anime_name = anime_name.replace("/", " ").strip() filename = f"{anime_name} - Episode {episode}.mp4" return filename.title() async def get_anime_metadata(self, anime_url: str) -> dict: """ Extract rich metadata from anime page Returns synopsis, genres, rating, release year, studio, etc. """ try: print(f"[ANIME-ULTIME] Extracting metadata from: {anime_url}") response = await self.client.get(anime_url) soup = BeautifulSoup(response.text, "lxml") metadata = { "synopsis": None, "genres": [], "rating": None, "release_year": None, "studio": None, "poster_image": None, "banner_image": None, "total_episodes": None, "status": None, "alternative_titles": [], } # Extract synopsis synopsis_selectors = [ "div.synopsis", "div.description", 'div[class*="synopsis"]', 'div[class*="synopsis"]', "p.synopsis", ".info", "div.texte", ] for selector in synopsis_selectors: synopsis_elem = soup.select_one(selector) if synopsis_elem: synopsis = synopsis_elem.get_text(strip=True) if len(synopsis) > 50: metadata["synopsis"] = synopsis break # Extract genres from meta tags and page content page_text = soup.get_text() # Look for genre in meta tags genre_meta = soup.find("meta", property="genre") or soup.find( "meta", attrs={"name": "genre"} ) if genre_meta: genres_text = genre_meta.get("content", "") if genres_text: metadata["genres"] = [g.strip() for g in genres_text.split(",")] # Try to find genre links genre_links = soup.find_all( "a", href=re.compile(r"genre|tag|type|cat", re.I) ) if genre_links: for link in genre_links[:5]: genre = link.get_text(strip=True) if genre and genre not in metadata["genres"]: metadata["genres"].append(genre) # Extract rating rating_selectors = [ "span.rating", "div.rating", "span.score", "div.note", ".rating", ] for selector in rating_selectors: rating_elem = soup.select_one(selector) if rating_elem: rating_text = rating_elem.get_text(strip=True) rating_match = re.search(r"(\d+\.?\d*)\s*/\s*10", rating_text) if rating_match: metadata["rating"] = f"{rating_match.group(1)}/10" break rating_match = re.search(r"(\d+\.?\d*)\s*/\s*5", rating_text) if rating_match: rating_val = float(rating_match.group(1)) * 2 metadata["rating"] = f"{rating_val:.1f}/10" break # Extract release year year_match = re.search(r"\b(19\d{2}|20\d{2})\b", page_text) if year_match: import datetime current_year = datetime.datetime.now().year + 2 year = int(year_match.group(1)) if 1950 <= year <= current_year: metadata["release_year"] = year # Extract poster image from og:image og_image = soup.find("meta", property="og:image") if og_image: metadata["poster_image"] = og_image.get("content") # Extract total episodes episodes_count = len(await self.get_episodes(anime_url)) if episodes_count > 0: metadata["total_episodes"] = episodes_count print(f"[ANIME-ULTIME] Extracted metadata: {metadata}") return metadata except Exception as e: print(f"[ANIME-ULTIME] Error extracting metadata: {e}") return {} async def search_anime( self, query: str, lang: str = "vostfr", include_metadata: bool = False ) -> list[dict]: """ Search for anime on anime-ultime Returns list of anime with title, url, and cover image Args: query: Search query string lang: Language preference (vostfr, vf) include_metadata: Whether to fetch full metadata for each result (slower) """ try: import time start = time.time() print(f"[ANIME-ULTIME] Searching for '{query}' ({lang})...") # Anime-Ultime uses POST for search search_url = "https://www.anime-ultime.net/search-0-1" response = await self.client.post(search_url, data={"search": query}) soup = BeautifulSoup(response.text, "lxml") elapsed = time.time() - start print( f"[ANIME-ULTIME] Got response {response.status_code} in {elapsed:.2f}s" ) results = [] # Look for search result links - better parsing # Search results use file-0-1/ pattern, not info- search_results = soup.find_all("a", href=re.compile(r"file-0-1/")) seen_urls = set() for result in search_results[:10]: # Limit to 10 results href = result.get("href", "") raw_title = result.get_text().strip() # Skip if no href if not href: continue # Skip duplicates if href in seen_urls: continue seen_urls.add(href) # Extract better title from URL or parent elements better_title = raw_title # If raw_title is just "Télécharger" or similar, try to find better title if len(raw_title) < 5 or raw_title.lower() in [ "télécharger", "download", "ddl", ]: # Try to extract from URL (file-0-1/ID-Title format) url_match = re.search(r"file-0-1/\d+-(.+)$", href) if url_match: better_title = url_match.group(1).replace("-", " ").title() # If still no good title, look at parent/row elements if len(better_title) < 5: # Check parent row (table structure) row = result.find_parent(["tr", "td", "div"]) if row: # Look for text in the row that's not the link text row_text = row.get_text().strip() # Remove the link text from row text if raw_title in row_text: row_text = row_text.replace(raw_title, "").strip() if len(row_text) > 5 and len(row_text) < 100: better_title = row_text # Make URL absolute if not href.startswith("http"): href = urljoin("https://www.anime-ultime.net/", href) result_item = { "title": better_title, "url": href, "type": "search_result", "metadata": None, } # Fetch metadata if requested if include_metadata: metadata = await self.get_anime_metadata(href) result_item["metadata"] = metadata results.append(result_item) print(f"[ANIME-ULTIME] Found {len(results)} results") return results except Exception as e: print(f"[ANIME-ULTIME] Error: {e}") return [] async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]: """ Get list of episodes for an anime Returns list of episode numbers and their URLs """ try: response = await self.client.get(anime_url) soup = BeautifulSoup(response.text, "lxml") episodes = [] # Look for episode links - anime-ultime uses info-XXXXX-Name-XX-vostfr format # The URL pattern is info-0-1/ID-Anime-Name-XX-vostfr where XX is episode number episode_links = soup.find_all("a", href=re.compile(r"info-0-1/\d+")) for link in episode_links: href = link.get("href", "") text = link.get_text().strip() # Extract episode number from URL pattern # Matches: info-0-1/30200/Naruto-OAV-01-vostfr match = re.search(r"-(\d+)-vostfr$", href, re.I) if not match: # Try other patterns match = re.search(r"Episode[-\s]?(\d+)", href, re.I) if not match: # Try to extract from text match = re.search(r"(\d+)", text) if match: episode_num = match.group(1).zfill(2) # Pad with zero # Extract the episode ID from href and build correct URL # href might be "info-0-1/30200" or "info-0-1/30200/..." # We need: https://www.anime-ultime.net/info-0-1/30200 ep_id_match = re.search(r"info-0-1/(\d+)", href) if ep_id_match: ep_id = ep_id_match.group(1) # Build the correct episode URL episode_url = f"https://www.anime-ultime.net/info-0-1/{ep_id}" else: # Fallback to making URL absolute if not href.startswith("http"): href = urljoin(anime_url, href) episode_url = href episodes.append( {"episode": episode_num, "url": episode_url, "title": text} ) # Remove duplicates and sort seen = set() unique_episodes = [] for ep in episodes: if ep["episode"] not in seen: seen.add(ep["episode"]) unique_episodes.append(ep) unique_episodes.sort(key=lambda x: int(x["episode"])) return unique_episodes except Exception as e: print(f"Error getting episodes: {e}") return []