feat: Add SendVid downloader support

Add complete support for SendVid video hosting service used by Anime-Sama
for anime series like Hell's Paradise.

Changes:
- Create SendVidDownloader class with proper headers to avoid 403 errors
- Add SendVid detection and handling in AnimeSamaDownloader
- Update download_manager to include SendVid-specific headers
- Support custom episode naming (e.g., "Hells Paradise - Episode 01.mp4")

Technical details:
- SendVid embed pages require User-Agent and Referer headers
- Direct MP4 URLs extracted from <source> tags with IP/time-based parameters
- Tested with Hell's Paradise Episode 01 (7MB, 24min, 1280x720)

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
root
2026-01-23 08:17:10 +00:00
commit cb3ea8d926
25 changed files with 4657 additions and 0 deletions
+475
View File
@@ -0,0 +1,475 @@
from .base import BaseDownloader
from bs4 import BeautifulSoup
import re
import httpx
from urllib.parse import urljoin, unquote
class AnimeSamaDownloader(BaseDownloader):
"""Downloader for anime-sama.org / anime-sama.store"""
# Static list of known domains (will be updated dynamically)
BASE_DOMAINS = ["anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
@classmethod
async def get_current_domain(cls) -> str:
"""
Fetch the current active domain from anime-sama.pw
Returns the current domain (e.g., 'anime-sama.si')
"""
try:
import httpx
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
response = await client.get("https://anime-sama.pw")
# Look for the main link in the HTML
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'lxml')
# Look for the primary button/link
primary_link = soup.find('a', class_='btn-primary')
if primary_link and primary_link.get('href'):
href = primary_link['href']
# Extract domain from URL
from urllib.parse import urlparse
parsed = urlparse(href)
domain = parsed.netloc # e.g., 'anime-sama.si'
print(f"[ANIME-SAMA] Current domain from anime-sama.pw: {domain}")
return domain
# Fallback: look for any anime-sama.* link
for link in soup.find_all('a', href=True):
href = link['href']
if 'anime-sama.' in href and href.startswith('https://'):
from urllib.parse import urlparse
parsed = urlparse(href)
domain = parsed.netloc
if domain not in ['anime-sama.pw', 'www.anime-sama.pw']:
print(f"[ANIME-SAMA] Found domain via fallback: {domain}")
return domain
print("[ANIME-SAMA] Could not determine current domain, using default")
return "anime-sama.si"
except Exception as e:
print(f"[ANIME-SAMA] Error fetching current domain: {e}")
return "anime-sama.si"
@classmethod
async def update_domains(cls) -> None:
"""
Update the BASE_DOMAINS list with the current active domain
This should be called periodically to keep up with domain changes
"""
try:
current_domain = await cls.get_current_domain()
# Add the current domain and its www variant if not already present
domains_to_add = [current_domain]
if not current_domain.startswith('www.'):
domains_to_add.append(f'www.{current_domain}')
for domain in domains_to_add:
if domain not in cls.BASE_DOMAINS:
# Insert at the beginning for priority
cls.BASE_DOMAINS.insert(0, domain)
print(f"[ANIME-SAMA] Added new domain: {domain}")
except Exception as e:
print(f"[ANIME-SAMA] Error updating domains: {e}")
def can_handle(self, url: str) -> bool:
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
async def get_download_link(self, url: str) -> tuple[str, str]:
"""
Extract download link from anime-sama URL
Anime-Sama uses third-party video hosts (vidmoly, etc.)
We'll try to extract the video URL from these hosts
"""
try:
print(f"[ANIME-SAMA] Extracting link from: {url}")
# Check if URL contains the anime page context (format: video_url|anime_page_url|episode_title?)
if '|' in url:
parts = url.split('|')
video_url = parts[0]
anime_page_url = parts[1] if len(parts) > 1 else None
episode_title = parts[2] if len(parts) > 2 else None
print(f"[ANIME-SAMA] Split URL - video: {video_url[:60]}..., anime: {anime_page_url}, episode: {episode_title}")
# Extract video from the host URL with anime context for filename
if 'vidmoly.to' in video_url or 'vidmoly' in video_url:
return await self._extract_from_vidmoly(video_url, anime_page_url, episode_title)
elif 'sendvid.com' in video_url:
return await self._extract_from_sendvid(video_url, anime_page_url, episode_title)
else:
# Try to extract from other hosts
if episode_title:
filename = f"{self._generate_anime_name(anime_page_url)} - {episode_title}.mp4"
else:
filename = self._generate_filename_from_anime_url(anime_page_url)
return video_url, filename
# Check if this is a third-party host URL
if 'vidmoly.to' in url or 'vidmoly' in url:
return await self._extract_from_vidmoly(url)
# If it's an anime-sama page, try to find the video
if 'anime-sama' in url.lower():
response = await self.client.get(url, follow_redirects=True)
final_url = str(response.url)
soup = BeautifulSoup(response.text, 'lxml')
# Look for iframe with video player
iframes = soup.find_all('iframe')
for iframe in iframes:
src = iframe.get('src', '')
if src and any(provider in src for provider in ['vidmoly', 'player', 'stream', 'play', 'embed']):
if src.startswith('http'):
print(f"[ANIME-SAMA] Found iframe: {src}")
# Try to extract video from the player
video_url = await self._extract_from_player(src)
if video_url:
filename = self._generate_filename(final_url)
return video_url, filename
# Look for video tags
videos = soup.find_all('video')
for video in videos:
src = video.get('src', '')
if src:
if not src.startswith('http'):
src = urljoin(final_url, src)
filename = self._generate_filename(final_url)
return src, filename
sources = video.find_all('source')
for source in sources:
src = source.get('src', '')
if src:
if not src.startswith('http'):
src = urljoin(final_url, src)
filename = self._generate_filename(final_url)
return src, filename
raise Exception("Could not find video link on page")
except Exception as e:
raise Exception(f"Error extracting AnimeSama link: {str(e)}")
async def _extract_from_vidmoly(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
"""Extract video URL from vidmoly player - delegate to VidMolyDownloader"""
try:
print(f"[ANIME-SAMA] Extracting from vidmoly: {url}")
print(f"[ANIME-SAMA] Delegating to VidMolyDownloader...")
# Import VidMolyDownloader
from .vidmoly import VidMolyDownloader
# Generate the target filename first
if episode_title and anime_page_url:
anime_name = self._generate_anime_name(anime_page_url)
target_filename = f"{anime_name} - {episode_title}.mp4"
print(f"[ANIME-SAMA] Generated filename: {target_filename} (episode: {episode_title})")
elif anime_page_url:
target_filename = self._generate_filename_from_anime_url(anime_page_url)
print(f"[ANIME-SAMA] Generated filename: {target_filename} (no episode title)")
else:
target_filename = None
print(f"[ANIME-SAMA] No target_filename generated")
# Use VidMolyDownloader to extract and download
vidmoly_downloader = VidMolyDownloader()
# Pass the target filename to VidMolyDownloader if available
if target_filename:
video_url, temp_filename = await vidmoly_downloader.get_download_link(url, target_filename=target_filename)
else:
video_url, temp_filename = await vidmoly_downloader.get_download_link(url)
# Use the target filename
filename = target_filename if target_filename else temp_filename
print(f"[ANIME-SAMA] Got video: {filename}")
# Rename the file if needed
import os
if temp_filename != filename:
# temp_filename might be a full path or just the name
temp_path = temp_filename if os.path.isabs(temp_filename) else os.path.join('downloads', temp_filename)
if os.path.exists(temp_path):
final_path = os.path.join('downloads', filename)
if os.path.exists(final_path):
os.remove(final_path)
os.rename(temp_path, final_path)
print(f"[ANIME-SAMA] Renamed {temp_filename} -> {filename}")
else:
print(f"[ANIME-SAMA] Warning: temp file not found: {temp_path}")
# Return the original VidMoly URL - the file exists so download_manager will skip it
return url, filename
except Exception as e:
print(f"[ANIME-SAMA] Vidmoly extraction error: {e}")
raise Exception(f"Error extracting from vidmoly: {str(e)}")
async def _extract_from_sendvid(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
"""Extract video URL from sendvid player - delegate to SendVidDownloader"""
try:
print(f"[ANIME-SAMA] Extracting from sendvid: {url}")
print(f"[ANIME-SAMA] Delegating to SendVidDownloader...")
# Import SendVidDownloader
from .sendvid import SendVidDownloader
# Generate the target filename first
if episode_title and anime_page_url:
anime_name = self._generate_anime_name(anime_page_url)
target_filename = f"{anime_name} - {episode_title}.mp4"
print(f"[ANIME-SAMA] Generated filename: {target_filename} (episode: {episode_title})")
elif anime_page_url:
target_filename = self._generate_filename_from_anime_url(anime_page_url)
print(f"[ANIME-SAMA] Generated filename: {target_filename} (no episode title)")
else:
target_filename = None
print(f"[ANIME-SAMA] No target_filename generated")
# Use SendVidDownloader to extract the video URL
sendvid_downloader = SendVidDownloader()
# Pass the target filename to SendVidDownloader if available
if target_filename:
video_url, filename = await sendvid_downloader.get_download_link(url, target_filename=target_filename)
else:
video_url, filename = await sendvid_downloader.get_download_link(url)
# Use the target filename
filename = target_filename if target_filename else filename
print(f"[ANIME-SAMA] Got video: {filename}")
# Return the direct video URL (SendVid provides direct MP4 links)
# The download_manager will handle the actual download
return video_url, filename
except Exception as e:
print(f"[ANIME-SAMA] SendVid extraction error: {e}")
raise Exception(f"Error extracting from sendvid: {str(e)}")
def _generate_filename_from_anime_url(self, anime_url: str) -> str:
"""Generate filename from anime-sama anime page URL"""
try:
# Extract anime name from URL like: https://anime-sama.si/catalogue/naruto/saison1/vostfr/
# Format: /catalogue/{anime}/saison{N}/{lang}/
parts = anime_url.split('/')
for i, part in enumerate(parts):
if part == 'catalogue' and i + 1 < len(parts):
anime_name = parts[i + 1].replace('-', ' ').title()
# Try to find episode number
episode = "01"
for j, part2 in enumerate(parts):
if 'saison' in part2 and j + 2 < len(parts):
# Look for episode in the remaining path
pass
return f"{anime_name} - Episode {episode}.mp4"
# Fallback
return "Anime - Episode 01.Mp4"
except:
return "Anime - Episode 01.Mp4"
def _generate_anime_name(self, anime_url: str) -> str:
"""Extract just the anime name from anime-sama URL"""
try:
# Extract anime name from URL like: https://anime-sama.si/catalogue/naruto/saison1/vostfr/
parts = anime_url.split('/')
for i, part in enumerate(parts):
if part == 'catalogue' and i + 1 < len(parts):
return parts[i + 1].replace('-', ' ').title()
# Fallback
return "Anime"
except:
return "Anime"
async def _extract_from_player(self, player_url: str) -> str | None:
"""Try to extract direct video URL from player iframe"""
try:
response = await self.client.get(player_url)
soup = BeautifulSoup(response.text, 'lxml')
# Check for video tags
videos = soup.find_all('video')
for video in videos:
src = video.get('src') or video.get('data-src')
if src:
return src
# Check for source tags
sources = soup.find_all('source')
for source in sources:
src = source.get('src')
if src and any(ext in src for ext in ['mp4', 'm3u8', 'mkv']):
return src
# Check scripts in player page
scripts = soup.find_all('script')
for script in scripts:
if script.string:
match = re.search(r'(https?://[^"\'>\s]+\.(?:mp4|m3u8)(?:\?[^"\'>\s]*)?)', script.string)
if match:
return match.group(1)
except:
pass
return None
def _generate_filename(self, url: str) -> str:
"""Generate filename from URL"""
# Extract anime name and episode info from URL
# URL format: .../catalogue/{anime}/saison{N}/{vostfr|vf}/episode-{N}
parts = url.split('/')
anime_name = "anime"
episode = "1"
for i, part in enumerate(parts):
if part == 'catalogue' and i + 1 < len(parts):
anime_name = parts[i + 1].replace('-', ' ')
elif 'episode-' in part:
episode = part.replace('episode-', '')
elif part in ['vostfr', 'vf']:
lang = part.upper()
filename = f"{anime_name} - Episode {episode}.mp4"
return filename.title()
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
"""
Search for anime on anime-sama
Returns list of anime with title, url, and cover image
"""
try:
# Update domains before searching to ensure we have the current domain
await self.update_domains()
import time
start = time.time()
print(f"[ANIME-SAMA] Searching for '{query}' ({lang})...")
# Use the current domain from anime-sama.pw
current_domain = await self.get_current_domain()
# Convert query to URL format (lowercase, replace spaces with hyphens)
query_formatted = query.lower().replace(' ', '-').replace("'", '').replace(':', '')
search_url = f"https://{current_domain}/catalogue/{query_formatted}/saison1/{lang}/"
response = await self.client.get(search_url, follow_redirects=True)
elapsed = time.time() - start
print(f"[ANIME-SAMA] Got response {response.status_code} in {elapsed:.2f}s")
if response.status_code == 200:
# Check if it's a valid anime page by looking for episode selector
if 'selectEpisodes' in response.text or 'episodes.js' in response.text:
print(f"[ANIME-SAMA] Found anime at {str(response.url)}")
return [{
'title': query,
'url': str(response.url),
'type': 'direct'
}]
print(f"[ANIME-SAMA] No anime found (status: {response.status_code})")
return []
except Exception as e:
print(f"[ANIME-SAMA] Error: {str(e)}")
return []
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
"""
Get list of episodes for an anime
Returns list of episode numbers and their URLs
Anime-Sama uses a JavaScript file (episodes.js) to store episode URLs
"""
try:
response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml')
episodes = []
# Try to find the episodes.js file in the HTML
episodes_js_match = re.search(r'episodes\.js\?filever=(\d+)', response.text)
if episodes_js_match:
file_ver = episodes_js_match.group(1)
# Build the URL to episodes.js
episodes_js_url = f"{anime_url.rstrip('/')}/episodes.js?filever={file_ver}"
print(f"[ANIME-SAMA] Found episodes.js at {episodes_js_url}")
try:
# Fetch the episodes.js file
js_response = await self.client.get(episodes_js_url)
js_content = js_response.text
# Parse the JavaScript file to extract episode URLs
# The file contains arrays like: var eps1 = ['url1', 'url2', ...]
eps_matches = re.findall(r'var\s+eps\d+\s*=\s*(\[[^\]]+\])', js_content)
if eps_matches:
# Extract URLs from the first array found
urls_text = eps_matches[0]
# Parse the array of URLs
episode_urls = re.findall(r"'(https?://[^']+)'", urls_text)
for idx, url in enumerate(episode_urls, start=1):
episode_num = str(idx).zfill(2)
episode_title = f'Episode {episode_num}'
# Store both the video URL, the anime page URL, and the episode title
# Format: video_url|anime_page_url|episode_title
combined_url = f"{url}|{anime_url}|{episode_title}"
episodes.append({
'episode': episode_num,
'url': combined_url,
'title': episode_title
})
print(f"[ANIME-SAMA] Found {len(episodes)} episodes")
return episodes
except Exception as e:
print(f"[ANIME-SAMA] Error fetching episodes.js: {e}")
# Fallback: Try to find episode links in the HTML (old method)
episode_links = soup.find_all('a', href=True)
for link in episode_links:
href = link['href']
if 'episode-' in href:
# Extract episode number
match = re.search(r'episode-(\d+)', href)
if match:
episode_num = match.group(1)
full_url = urljoin(anime_url, href)
episodes.append({
'episode': episode_num,
'url': full_url
})
# Remove duplicates and sort
seen = set()
unique_episodes = []
for ep in episodes:
if ep['episode'] not in seen:
seen.add(ep['episode'])
unique_episodes.append(ep)
unique_episodes.sort(key=lambda x: int(x['episode']))
return unique_episodes
except Exception as e:
print(f"[ANIME-SAMA] Error getting episodes: {e}")
return []