Files
ohm_streaming/app/downloaders/anime_sites/animeultime.py
T
root 3afad41d46 refactor: Restructure downloaders with clear separation
This commit implements a complete reorganization of the downloader system
with a clear distinction between anime streaming sites and video hosting services.

## Structure Changes

**New Organization:**
- `app/downloaders/anime_sites/` - Anime streaming sites (catalogs + metadata)
- `app/downloaders/video_players/` - Video hosting services (file downloads)

**Base Classes:**
- `BaseAnimeSite` - For anime providers (search, episodes, metadata)
- `BaseVideoPlayer` - For video players (download link extraction)

**Migrated Downloaders:**
Anime Sites (4):
- AnimeSama, NekoSama, AnimeUltime, Vostfree

Video Players (8):
- Doodstream, Sibnet, VidMoly, SendVid, Lpayer, 1fichier, Uptobox, Rapidfile

## Key Improvements

1. **Clear Separation**: Distinct base classes for different use cases
2. **Preserved Functionality**: All existing features maintained
   - VidMoly: M3U8 support, Playwright, multi-domains, target_filename param
   - SendVid: target_filename parameter support
   - All others: No behavioral changes

3. **Better Organization**:
   - Anime sites: search_anime(), get_episodes(), get_anime_metadata()
   - Video players: get_download_link(url, target_filename=None)

4. **Fixed Imports**: Updated cross-imports in AnimeSama
   - from ..video_players.vidmoly import
   - from ..video_players.sendvid import
   - from ..video_players.sibnet import
   - from ..video_players.lpayer import

5. **Updated Tests**: All test imports use new structure
6. **Updated Providers**: Added 4 missing file hosts to providers.py

## Backward Compatibility

 Main API unchanged: get_downloader() works identically
 All 23 tests passing
 Frontend fully functional
 No breaking changes for users

## Documentation

- RESTRUCTURATION_SUMMARY.md - Technical details
- FIX_IMPORT_ERROR.md - Import error resolution
- IMPORT_VERIFICATION_REPORT.md - Complete import verification
- FRONTEND_VERIFICATION_FINAL.md - Frontend validation

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
2026-01-24 22:13:20 +00:00

436 lines
18 KiB
Python

from .base import BaseAnimeSite
from bs4 import BeautifulSoup
import re
import httpx
from urllib.parse import urljoin
class AnimeUltimeDownloader(BaseAnimeSite):
"""Downloader for anime-ultime.net"""
BASE_DOMAINS = ["anime-ultime.com", "anime-ultime.net", "www.anime-ultime.net"]
def can_handle(self, url: str) -> bool:
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
async def get_download_link(self, url: str) -> tuple[str, str]:
"""
Extract download link from anime-ultime URL
Anime-Ultime stores video links in og:video meta tags
"""
try:
# Follow redirects
response = await self.client.get(url, follow_redirects=True)
final_url = str(response.url)
# Parse the page
soup = BeautifulSoup(response.text, 'lxml')
# Method 0: Look for og:video meta tag (most reliable for anime-ultime)
og_video = soup.find('meta', property='og:video')
if og_video and og_video.get('content'):
video_url = og_video['content']
if video_url.endswith('.mp4'):
filename = self._generate_filename(final_url)
print(f"[ANIME-ULTIME] Found og:video link: {video_url}")
return video_url, filename
# Method 1: Look for direct download links (DDL)
# Anime-Ultime often uses links to file hosts
download_links = soup.find_all('a', href=True)
for link in download_links:
href = link['href']
text = link.get_text().lower()
# Look for download buttons/links
if any(keyword in text for keyword in ['télécharger', 'download', 'ddl', 'mega', 'google', 'drive']):
# Check if it's a direct link or to a file host
if any(host in href.lower() for host in ['mega.nz', 'drive.google.com', 'uptobox.com', '1fichier.com']):
filename = self._generate_filename(final_url)
return href, filename
# Method 2: Look for iframe with video player
iframes = soup.find_all('iframe')
for iframe in iframes:
src = iframe.get('src', '')
if src and any(provider in src for provider in ['video', 'player', 'stream', 'play']):
if src.startswith('http'):
filename = self._generate_filename(final_url)
return src, filename
# Method 3: Look for video tags
videos = soup.find_all('video')
for video in videos:
src = video.get('src', '')
if src:
filename = self._generate_filename(final_url)
return src, filename
# Check source tags
sources = video.find_all('source')
for source in sources:
src = source.get('src', '')
if src:
filename = self._generate_filename(final_url)
return src, filename
# Method 4: Look in scripts for video URLs
scripts = soup.find_all('script')
for script in scripts:
if script.string:
# Look for common video patterns
patterns = [
r'(https?://[^"\'>\s]+\.(?:mp4|m3u8|mkv)(?:\?[^"\'>\s]*)?)',
r'"url":"([^"]+)"',
r'"video":"([^"]+)"',
r'"file":"([^"]+)"',
r'file:\s*"([^"]+)"',
]
for pattern in patterns:
matches = re.findall(pattern, script.string)
for match in matches:
# Clean up escaped characters
match = match.replace('\\/', '/').replace('\\', '')
if any(ext in match for ext in ['mp4', 'm3u8', 'mkv']):
filename = self._generate_filename(final_url)
return match, filename
# Look for anime-ultime specific patterns
# They sometimes store links in JavaScript variables
ddl_match = re.search(r'ddl["\']?\s*:\s*["\']([^"\']+)["\']', script.string)
if ddl_match:
ddl_url = ddl_match.group(1)
if ddl_url.startswith('http'):
filename = self._generate_filename(final_url)
return ddl_url, filename
# Method 5: Look for links with specific classes or IDs
# Anime-Ultime might use specific class names for download links
potential_links = soup.find_all('a', class_=re.compile(r'download|ddl|episode', re.I))
for link in potential_links:
href = link.get('href', '')
if href and href.startswith('http'):
filename = self._generate_filename(final_url)
return href, filename
# If nothing found, raise error
raise Exception("Could not find download link on page")
except Exception as e:
raise Exception(f"Error extracting Anime-Ultime link: {str(e)}")
def _generate_filename(self, url: str) -> str:
"""Generate filename from URL"""
# Extract anime name and episode from URL
# URL formats:
# - info-0-1/30200
# - info-0-1/30200/Naruto-OAV-01-vostfr
# - file-0-1/2991-Naruto-OAV
anime_name = "Anime"
episode = "01"
# Format: info-0-1/EPISODE_ID or info-0-1/EPISODE_ID/NAME-EP-vostfr
if 'info-0-1/' in url:
# Extract episode ID
ep_match = re.search(r'info-0-1/(\d+)', url)
if ep_match:
ep_id = ep_match.group(1)
# Try to get anime name from URL path
name_match = re.search(r'info-0-1/\d+/([^/]+)', url)
if name_match:
raw_name = name_match.group(1)
# Extract episode number
ep_num_match = re.search(r'-(\d+)-vostfr$', raw_name, re.I)
if ep_num_match:
episode = ep_num_match.group(1).zfill(2)
# Remove episode number and suffix from name
anime_name = re.sub(r'-\d+-vostfr$', '', raw_name, flags=re.I).replace('-', ' ')
else:
# Just use the ID
anime_name = f"Episode {ep_id}"
else:
anime_name = f"Episode {ep_id}"
elif 'file-0-1/' in url:
# Extract from file-0-1/ID-NAME format
file_match = re.search(r'file-0-1/\d+-(.+)$', url)
if file_match:
anime_name = file_match.group(1).replace('-', ' ')
# Sanitize filename
anime_name = anime_name.replace('/', ' ').strip()
filename = f"{anime_name} - Episode {episode}.mp4"
return filename.title()
async def get_anime_metadata(self, anime_url: str) -> dict:
"""
Extract rich metadata from anime page
Returns synopsis, genres, rating, release year, studio, etc.
"""
try:
print(f"[ANIME-ULTIME] Extracting metadata from: {anime_url}")
response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml')
metadata = {
'synopsis': None,
'genres': [],
'rating': None,
'release_year': None,
'studio': None,
'poster_image': None,
'banner_image': None,
'total_episodes': None,
'status': None,
'alternative_titles': []
}
# Extract synopsis
synopsis_selectors = [
'div.synopsis',
'div.description',
'div[class*="synopsis"]',
'div[class*="synopsis"]',
'p.synopsis',
'.info',
'div.texte'
]
for selector in synopsis_selectors:
synopsis_elem = soup.select_one(selector)
if synopsis_elem:
synopsis = synopsis_elem.get_text(strip=True)
if len(synopsis) > 50:
metadata['synopsis'] = synopsis
break
# Extract genres from meta tags and page content
page_text = soup.get_text()
# Look for genre in meta tags
genre_meta = soup.find('meta', property='genre') or soup.find('meta', attrs={'name': 'genre'})
if genre_meta:
genres_text = genre_meta.get('content', '')
if genres_text:
metadata['genres'] = [g.strip() for g in genres_text.split(',')]
# Try to find genre links
genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type|cat', re.I))
if genre_links:
for link in genre_links[:5]:
genre = link.get_text(strip=True)
if genre and genre not in metadata['genres']:
metadata['genres'].append(genre)
# Extract rating
rating_selectors = [
'span.rating',
'div.rating',
'span.score',
'div.note',
'.rating'
]
for selector in rating_selectors:
rating_elem = soup.select_one(selector)
if rating_elem:
rating_text = rating_elem.get_text(strip=True)
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
if rating_match:
metadata['rating'] = f"{rating_match.group(1)}/10"
break
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*5', rating_text)
if rating_match:
rating_val = float(rating_match.group(1)) * 2
metadata['rating'] = f"{rating_val:.1f}/10"
break
# Extract release year
year_match = re.search(r'\b(19\d{2}|20\d{2})\b', page_text)
if year_match:
import datetime
current_year = datetime.datetime.now().year + 2
year = int(year_match.group(1))
if 1950 <= year <= current_year:
metadata['release_year'] = year
# Extract poster image from og:image
og_image = soup.find('meta', property='og:image')
if og_image:
metadata['poster_image'] = og_image.get('content')
# Extract total episodes
episodes_count = len(await self.get_episodes(anime_url))
if episodes_count > 0:
metadata['total_episodes'] = episodes_count
print(f"[ANIME-ULTIME] Extracted metadata: {metadata}")
return metadata
except Exception as e:
print(f"[ANIME-ULTIME] Error extracting metadata: {e}")
return {}
async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
"""
Search for anime on anime-ultime
Returns list of anime with title, url, and cover image
Args:
query: Search query string
lang: Language preference (vostfr, vf)
include_metadata: Whether to fetch full metadata for each result (slower)
"""
try:
import time
start = time.time()
print(f"[ANIME-ULTIME] Searching for '{query}' ({lang})...")
# Anime-Ultime uses POST for search
search_url = "https://www.anime-ultime.net/search-0-1"
response = await self.client.post(search_url, data={'search': query})
soup = BeautifulSoup(response.text, 'lxml')
elapsed = time.time() - start
print(f"[ANIME-ULTIME] Got response {response.status_code} in {elapsed:.2f}s")
results = []
# Look for search result links - better parsing
# Search results use file-0-1/ pattern, not info-
search_results = soup.find_all('a', href=re.compile(r'file-0-1/'))
seen_urls = set()
for result in search_results[:10]: # Limit to 10 results
href = result.get('href', '')
raw_title = result.get_text().strip()
# Skip if no href
if not href:
continue
# Skip duplicates
if href in seen_urls:
continue
seen_urls.add(href)
# Extract better title from URL or parent elements
better_title = raw_title
# If raw_title is just "Télécharger" or similar, try to find better title
if len(raw_title) < 5 or raw_title.lower() in ['télécharger', 'download', 'ddl']:
# Try to extract from URL (file-0-1/ID-Title format)
url_match = re.search(r'file-0-1/\d+-(.+)$', href)
if url_match:
better_title = url_match.group(1).replace('-', ' ').title()
# If still no good title, look at parent/row elements
if len(better_title) < 5:
# Check parent row (table structure)
row = result.find_parent(['tr', 'td', 'div'])
if row:
# Look for text in the row that's not the link text
row_text = row.get_text().strip()
# Remove the link text from row text
if raw_title in row_text:
row_text = row_text.replace(raw_title, '').strip()
if len(row_text) > 5 and len(row_text) < 100:
better_title = row_text
# Make URL absolute
if not href.startswith('http'):
href = urljoin("https://www.anime-ultime.net/", href)
result_item = {
'title': better_title,
'url': href,
'type': 'search_result',
'metadata': None
}
# Fetch metadata if requested
if include_metadata:
metadata = await self.get_anime_metadata(href)
result_item['metadata'] = metadata
results.append(result_item)
print(f"[ANIME-ULTIME] Found {len(results)} results")
return results
except Exception as e:
print(f"[ANIME-ULTIME] Error: {e}")
return []
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
"""
Get list of episodes for an anime
Returns list of episode numbers and their URLs
"""
try:
response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml')
episodes = []
# Look for episode links - anime-ultime uses info-XXXXX-Name-XX-vostfr format
# The URL pattern is info-0-1/ID-Anime-Name-XX-vostfr where XX is episode number
episode_links = soup.find_all('a', href=re.compile(r'info-0-1/\d+'))
for link in episode_links:
href = link.get('href', '')
text = link.get_text().strip()
# Extract episode number from URL pattern
# Matches: info-0-1/30200/Naruto-OAV-01-vostfr
match = re.search(r'-(\d+)-vostfr$', href, re.I)
if not match:
# Try other patterns
match = re.search(r'Episode[-\s]?(\d+)', href, re.I)
if not match:
# Try to extract from text
match = re.search(r'(\d+)', text)
if match:
episode_num = match.group(1).zfill(2) # Pad with zero
# Extract the episode ID from href and build correct URL
# href might be "info-0-1/30200" or "info-0-1/30200/..."
# We need: https://www.anime-ultime.net/info-0-1/30200
ep_id_match = re.search(r'info-0-1/(\d+)', href)
if ep_id_match:
ep_id = ep_id_match.group(1)
# Build the correct episode URL
episode_url = f"https://www.anime-ultime.net/info-0-1/{ep_id}"
else:
# Fallback to making URL absolute
if not href.startswith('http'):
href = urljoin(anime_url, href)
episode_url = href
episodes.append({
'episode': episode_num,
'url': episode_url,
'title': text
})
# Remove duplicates and sort
seen = set()
unique_episodes = []
for ep in episodes:
if ep['episode'] not in seen:
seen.add(ep['episode'])
unique_episodes.append(ep)
unique_episodes.sort(key=lambda x: int(x['episode']))
return unique_episodes
except Exception as e:
print(f"Error getting episodes: {e}")
return []