feat: Add SendVid downloader support
Add complete support for SendVid video hosting service used by Anime-Sama for anime series like Hell's Paradise. Changes: - Create SendVidDownloader class with proper headers to avoid 403 errors - Add SendVid detection and handling in AnimeSamaDownloader - Update download_manager to include SendVid-specific headers - Support custom episode naming (e.g., "Hells Paradise - Episode 01.mp4") Technical details: - SendVid embed pages require User-Agent and Referer headers - Direct MP4 URLs extracted from <source> tags with IP/time-based parameters - Tested with Hell's Paradise Episode 01 (7MB, 24min, 1280x720) Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -0,0 +1,313 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import httpx
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
class AnimeUltimeDownloader(BaseDownloader):
|
||||
"""Downloader for anime-ultime.net"""
|
||||
|
||||
BASE_DOMAINS = ["anime-ultime.com", "anime-ultime.net", "www.anime-ultime.net"]
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extract download link from anime-ultime URL
|
||||
Anime-Ultime stores video links in og:video meta tags
|
||||
"""
|
||||
try:
|
||||
# Follow redirects
|
||||
response = await self.client.get(url, follow_redirects=True)
|
||||
final_url = str(response.url)
|
||||
|
||||
# Parse the page
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Method 0: Look for og:video meta tag (most reliable for anime-ultime)
|
||||
og_video = soup.find('meta', property='og:video')
|
||||
if og_video and og_video.get('content'):
|
||||
video_url = og_video['content']
|
||||
if video_url.endswith('.mp4'):
|
||||
filename = self._generate_filename(final_url)
|
||||
print(f"[ANIME-ULTIME] Found og:video link: {video_url}")
|
||||
return video_url, filename
|
||||
|
||||
# Method 1: Look for direct download links (DDL)
|
||||
# Anime-Ultime often uses links to file hosts
|
||||
download_links = soup.find_all('a', href=True)
|
||||
for link in download_links:
|
||||
href = link['href']
|
||||
text = link.get_text().lower()
|
||||
|
||||
# Look for download buttons/links
|
||||
if any(keyword in text for keyword in ['télécharger', 'download', 'ddl', 'mega', 'google', 'drive']):
|
||||
# Check if it's a direct link or to a file host
|
||||
if any(host in href.lower() for host in ['mega.nz', 'drive.google.com', 'uptobox.com', '1fichier.com']):
|
||||
filename = self._generate_filename(final_url)
|
||||
return href, filename
|
||||
|
||||
# Method 2: Look for iframe with video player
|
||||
iframes = soup.find_all('iframe')
|
||||
for iframe in iframes:
|
||||
src = iframe.get('src', '')
|
||||
if src and any(provider in src for provider in ['video', 'player', 'stream', 'play']):
|
||||
if src.startswith('http'):
|
||||
filename = self._generate_filename(final_url)
|
||||
return src, filename
|
||||
|
||||
# Method 3: Look for video tags
|
||||
videos = soup.find_all('video')
|
||||
for video in videos:
|
||||
src = video.get('src', '')
|
||||
if src:
|
||||
filename = self._generate_filename(final_url)
|
||||
return src, filename
|
||||
|
||||
# Check source tags
|
||||
sources = video.find_all('source')
|
||||
for source in sources:
|
||||
src = source.get('src', '')
|
||||
if src:
|
||||
filename = self._generate_filename(final_url)
|
||||
return src, filename
|
||||
|
||||
# Method 4: Look in scripts for video URLs
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
# Look for common video patterns
|
||||
patterns = [
|
||||
r'(https?://[^"\'>\s]+\.(?:mp4|m3u8|mkv)(?:\?[^"\'>\s]*)?)',
|
||||
r'"url":"([^"]+)"',
|
||||
r'"video":"([^"]+)"',
|
||||
r'"file":"([^"]+)"',
|
||||
r'file:\s*"([^"]+)"',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, script.string)
|
||||
for match in matches:
|
||||
# Clean up escaped characters
|
||||
match = match.replace('\\/', '/').replace('\\', '')
|
||||
if any(ext in match for ext in ['mp4', 'm3u8', 'mkv']):
|
||||
filename = self._generate_filename(final_url)
|
||||
return match, filename
|
||||
|
||||
# Look for anime-ultime specific patterns
|
||||
# They sometimes store links in JavaScript variables
|
||||
ddl_match = re.search(r'ddl["\']?\s*:\s*["\']([^"\']+)["\']', script.string)
|
||||
if ddl_match:
|
||||
ddl_url = ddl_match.group(1)
|
||||
if ddl_url.startswith('http'):
|
||||
filename = self._generate_filename(final_url)
|
||||
return ddl_url, filename
|
||||
|
||||
# Method 5: Look for links with specific classes or IDs
|
||||
# Anime-Ultime might use specific class names for download links
|
||||
potential_links = soup.find_all('a', class_=re.compile(r'download|ddl|episode', re.I))
|
||||
for link in potential_links:
|
||||
href = link.get('href', '')
|
||||
if href and href.startswith('http'):
|
||||
filename = self._generate_filename(final_url)
|
||||
return href, filename
|
||||
|
||||
# If nothing found, raise error
|
||||
raise Exception("Could not find download link on page")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting Anime-Ultime link: {str(e)}")
|
||||
|
||||
def _generate_filename(self, url: str) -> str:
|
||||
"""Generate filename from URL"""
|
||||
# Extract anime name and episode from URL
|
||||
# URL formats:
|
||||
# - info-0-1/30200
|
||||
# - info-0-1/30200/Naruto-OAV-01-vostfr
|
||||
# - file-0-1/2991-Naruto-OAV
|
||||
|
||||
anime_name = "Anime"
|
||||
episode = "01"
|
||||
|
||||
# Format: info-0-1/EPISODE_ID or info-0-1/EPISODE_ID/NAME-EP-vostfr
|
||||
if 'info-0-1/' in url:
|
||||
# Extract episode ID
|
||||
ep_match = re.search(r'info-0-1/(\d+)', url)
|
||||
if ep_match:
|
||||
ep_id = ep_match.group(1)
|
||||
|
||||
# Try to get anime name from URL path
|
||||
name_match = re.search(r'info-0-1/\d+/([^/]+)', url)
|
||||
if name_match:
|
||||
raw_name = name_match.group(1)
|
||||
# Extract episode number
|
||||
ep_num_match = re.search(r'-(\d+)-vostfr$', raw_name, re.I)
|
||||
if ep_num_match:
|
||||
episode = ep_num_match.group(1).zfill(2)
|
||||
# Remove episode number and suffix from name
|
||||
anime_name = re.sub(r'-\d+-vostfr$', '', raw_name, flags=re.I).replace('-', ' ')
|
||||
else:
|
||||
# Just use the ID
|
||||
anime_name = f"Episode {ep_id}"
|
||||
else:
|
||||
anime_name = f"Episode {ep_id}"
|
||||
|
||||
elif 'file-0-1/' in url:
|
||||
# Extract from file-0-1/ID-NAME format
|
||||
file_match = re.search(r'file-0-1/\d+-(.+)$', url)
|
||||
if file_match:
|
||||
anime_name = file_match.group(1).replace('-', ' ')
|
||||
|
||||
# Sanitize filename
|
||||
anime_name = anime_name.replace('/', ' ').strip()
|
||||
filename = f"{anime_name} - Episode {episode}.mp4"
|
||||
return filename.title()
|
||||
|
||||
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Search for anime on anime-ultime
|
||||
Returns list of anime with title, url, and cover image
|
||||
"""
|
||||
try:
|
||||
import time
|
||||
start = time.time()
|
||||
print(f"[ANIME-ULTIME] Searching for '{query}' ({lang})...")
|
||||
|
||||
# Anime-Ultime uses POST for search
|
||||
search_url = "https://www.anime-ultime.net/search-0-1"
|
||||
|
||||
response = await self.client.post(search_url, data={'search': query})
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"[ANIME-ULTIME] Got response {response.status_code} in {elapsed:.2f}s")
|
||||
|
||||
results = []
|
||||
|
||||
# Look for search result links - better parsing
|
||||
# Search results use file-0-1/ pattern, not info-
|
||||
search_results = soup.find_all('a', href=re.compile(r'file-0-1/'))
|
||||
|
||||
seen_urls = set()
|
||||
for result in search_results[:10]: # Limit to 10 results
|
||||
href = result.get('href', '')
|
||||
raw_title = result.get_text().strip()
|
||||
|
||||
# Skip if no href
|
||||
if not href:
|
||||
continue
|
||||
|
||||
# Skip duplicates
|
||||
if href in seen_urls:
|
||||
continue
|
||||
seen_urls.add(href)
|
||||
|
||||
# Extract better title from URL or parent elements
|
||||
better_title = raw_title
|
||||
|
||||
# If raw_title is just "Télécharger" or similar, try to find better title
|
||||
if len(raw_title) < 5 or raw_title.lower() in ['télécharger', 'download', 'ddl']:
|
||||
# Try to extract from URL (file-0-1/ID-Title format)
|
||||
url_match = re.search(r'file-0-1/\d+-(.+)$', href)
|
||||
if url_match:
|
||||
better_title = url_match.group(1).replace('-', ' ').title()
|
||||
|
||||
# If still no good title, look at parent/row elements
|
||||
if len(better_title) < 5:
|
||||
# Check parent row (table structure)
|
||||
row = result.find_parent(['tr', 'td', 'div'])
|
||||
if row:
|
||||
# Look for text in the row that's not the link text
|
||||
row_text = row.get_text().strip()
|
||||
# Remove the link text from row text
|
||||
if raw_title in row_text:
|
||||
row_text = row_text.replace(raw_title, '').strip()
|
||||
if len(row_text) > 5 and len(row_text) < 100:
|
||||
better_title = row_text
|
||||
|
||||
# Make URL absolute
|
||||
if not href.startswith('http'):
|
||||
href = urljoin("https://www.anime-ultime.net/", href)
|
||||
|
||||
results.append({
|
||||
'title': better_title,
|
||||
'url': href,
|
||||
'type': 'search_result'
|
||||
})
|
||||
|
||||
print(f"[ANIME-ULTIME] Found {len(results)} results")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-ULTIME] Error: {e}")
|
||||
return []
|
||||
|
||||
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Get list of episodes for an anime
|
||||
Returns list of episode numbers and their URLs
|
||||
"""
|
||||
try:
|
||||
response = await self.client.get(anime_url)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
episodes = []
|
||||
|
||||
# Look for episode links - anime-ultime uses info-XXXXX-Name-XX-vostfr format
|
||||
# The URL pattern is info-0-1/ID-Anime-Name-XX-vostfr where XX is episode number
|
||||
episode_links = soup.find_all('a', href=re.compile(r'info-0-1/\d+'))
|
||||
|
||||
for link in episode_links:
|
||||
href = link.get('href', '')
|
||||
text = link.get_text().strip()
|
||||
|
||||
# Extract episode number from URL pattern
|
||||
# Matches: info-0-1/30200/Naruto-OAV-01-vostfr
|
||||
match = re.search(r'-(\d+)-vostfr$', href, re.I)
|
||||
if not match:
|
||||
# Try other patterns
|
||||
match = re.search(r'Episode[-\s]?(\d+)', href, re.I)
|
||||
if not match:
|
||||
# Try to extract from text
|
||||
match = re.search(r'(\d+)', text)
|
||||
|
||||
if match:
|
||||
episode_num = match.group(1).zfill(2) # Pad with zero
|
||||
|
||||
# Extract the episode ID from href and build correct URL
|
||||
# href might be "info-0-1/30200" or "info-0-1/30200/..."
|
||||
# We need: https://www.anime-ultime.net/info-0-1/30200
|
||||
ep_id_match = re.search(r'info-0-1/(\d+)', href)
|
||||
if ep_id_match:
|
||||
ep_id = ep_id_match.group(1)
|
||||
# Build the correct episode URL
|
||||
episode_url = f"https://www.anime-ultime.net/info-0-1/{ep_id}"
|
||||
else:
|
||||
# Fallback to making URL absolute
|
||||
if not href.startswith('http'):
|
||||
href = urljoin(anime_url, href)
|
||||
episode_url = href
|
||||
|
||||
episodes.append({
|
||||
'episode': episode_num,
|
||||
'url': episode_url,
|
||||
'title': text
|
||||
})
|
||||
|
||||
# Remove duplicates and sort
|
||||
seen = set()
|
||||
unique_episodes = []
|
||||
for ep in episodes:
|
||||
if ep['episode'] not in seen:
|
||||
seen.add(ep['episode'])
|
||||
unique_episodes.append(ep)
|
||||
|
||||
unique_episodes.sort(key=lambda x: int(x['episode']))
|
||||
|
||||
return unique_episodes
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting episodes: {e}")
|
||||
return []
|
||||
Reference in New Issue
Block a user