feat: Add SendVid downloader support
Add complete support for SendVid video hosting service used by Anime-Sama for anime series like Hell's Paradise. Changes: - Create SendVidDownloader class with proper headers to avoid 403 errors - Add SendVid detection and handling in AnimeSamaDownloader - Update download_manager to include SendVid-specific headers - Support custom episode naming (e.g., "Hells Paradise - Episode 01.mp4") Technical details: - SendVid embed pages require User-Agent and Referer headers - Direct MP4 URLs extracted from <source> tags with IP/time-based parameters - Tested with Hell's Paradise Episode 01 (7MB, 24min, 1280x720) Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -0,0 +1,48 @@
|
||||
from .base import BaseDownloader
|
||||
from .unfichier import UnFichierDownloader
|
||||
from .doodstream import DoodStreamDownloader
|
||||
from .rapidfile import RapidFileDownloader
|
||||
from .uptobox import UptoboxDownloader
|
||||
from .animesama import AnimeSamaDownloader
|
||||
from .animeultime import AnimeUltimeDownloader
|
||||
from .nekosama import NekoSamaDownloader
|
||||
from .vostfree import VostfreeDownloader
|
||||
from .vidmoly import VidMolyDownloader
|
||||
from .sendvid import SendVidDownloader
|
||||
|
||||
|
||||
def get_downloader(url: str) -> BaseDownloader:
|
||||
"""Factory function to get the appropriate downloader for a URL"""
|
||||
downloaders = [
|
||||
# Anime sites
|
||||
AnimeSamaDownloader(),
|
||||
AnimeUltimeDownloader(),
|
||||
NekoSamaDownloader(),
|
||||
VostfreeDownloader(),
|
||||
# File hosts
|
||||
UnFichierDownloader(),
|
||||
UptoboxDownloader(),
|
||||
DoodStreamDownloader(),
|
||||
RapidFileDownloader(),
|
||||
VidMolyDownloader(),
|
||||
SendVidDownloader(),
|
||||
]
|
||||
|
||||
for downloader in downloaders:
|
||||
if downloader.can_handle(url):
|
||||
return downloader
|
||||
|
||||
# Return generic downloader if no match
|
||||
return GenericDownloader()
|
||||
|
||||
|
||||
class GenericDownloader(BaseDownloader):
|
||||
"""Generic downloader for unhandled hosts"""
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return True
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
# Just return the URL as-is
|
||||
filename = url.split('/')[-1] or "download"
|
||||
return url, filename
|
||||
@@ -0,0 +1,475 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import httpx
|
||||
from urllib.parse import urljoin, unquote
|
||||
|
||||
|
||||
class AnimeSamaDownloader(BaseDownloader):
|
||||
"""Downloader for anime-sama.org / anime-sama.store"""
|
||||
|
||||
# Static list of known domains (will be updated dynamically)
|
||||
BASE_DOMAINS = ["anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
|
||||
|
||||
@classmethod
|
||||
async def get_current_domain(cls) -> str:
|
||||
"""
|
||||
Fetch the current active domain from anime-sama.pw
|
||||
Returns the current domain (e.g., 'anime-sama.si')
|
||||
"""
|
||||
try:
|
||||
import httpx
|
||||
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
|
||||
response = await client.get("https://anime-sama.pw")
|
||||
|
||||
# Look for the main link in the HTML
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Look for the primary button/link
|
||||
primary_link = soup.find('a', class_='btn-primary')
|
||||
if primary_link and primary_link.get('href'):
|
||||
href = primary_link['href']
|
||||
# Extract domain from URL
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(href)
|
||||
domain = parsed.netloc # e.g., 'anime-sama.si'
|
||||
print(f"[ANIME-SAMA] Current domain from anime-sama.pw: {domain}")
|
||||
return domain
|
||||
|
||||
# Fallback: look for any anime-sama.* link
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
if 'anime-sama.' in href and href.startswith('https://'):
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(href)
|
||||
domain = parsed.netloc
|
||||
if domain not in ['anime-sama.pw', 'www.anime-sama.pw']:
|
||||
print(f"[ANIME-SAMA] Found domain via fallback: {domain}")
|
||||
return domain
|
||||
|
||||
print("[ANIME-SAMA] Could not determine current domain, using default")
|
||||
return "anime-sama.si"
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-SAMA] Error fetching current domain: {e}")
|
||||
return "anime-sama.si"
|
||||
|
||||
@classmethod
|
||||
async def update_domains(cls) -> None:
|
||||
"""
|
||||
Update the BASE_DOMAINS list with the current active domain
|
||||
This should be called periodically to keep up with domain changes
|
||||
"""
|
||||
try:
|
||||
current_domain = await cls.get_current_domain()
|
||||
|
||||
# Add the current domain and its www variant if not already present
|
||||
domains_to_add = [current_domain]
|
||||
if not current_domain.startswith('www.'):
|
||||
domains_to_add.append(f'www.{current_domain}')
|
||||
|
||||
for domain in domains_to_add:
|
||||
if domain not in cls.BASE_DOMAINS:
|
||||
# Insert at the beginning for priority
|
||||
cls.BASE_DOMAINS.insert(0, domain)
|
||||
print(f"[ANIME-SAMA] Added new domain: {domain}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-SAMA] Error updating domains: {e}")
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extract download link from anime-sama URL
|
||||
Anime-Sama uses third-party video hosts (vidmoly, etc.)
|
||||
We'll try to extract the video URL from these hosts
|
||||
"""
|
||||
try:
|
||||
print(f"[ANIME-SAMA] Extracting link from: {url}")
|
||||
|
||||
# Check if URL contains the anime page context (format: video_url|anime_page_url|episode_title?)
|
||||
if '|' in url:
|
||||
parts = url.split('|')
|
||||
video_url = parts[0]
|
||||
anime_page_url = parts[1] if len(parts) > 1 else None
|
||||
episode_title = parts[2] if len(parts) > 2 else None
|
||||
|
||||
print(f"[ANIME-SAMA] Split URL - video: {video_url[:60]}..., anime: {anime_page_url}, episode: {episode_title}")
|
||||
|
||||
# Extract video from the host URL with anime context for filename
|
||||
if 'vidmoly.to' in video_url or 'vidmoly' in video_url:
|
||||
return await self._extract_from_vidmoly(video_url, anime_page_url, episode_title)
|
||||
elif 'sendvid.com' in video_url:
|
||||
return await self._extract_from_sendvid(video_url, anime_page_url, episode_title)
|
||||
else:
|
||||
# Try to extract from other hosts
|
||||
if episode_title:
|
||||
filename = f"{self._generate_anime_name(anime_page_url)} - {episode_title}.mp4"
|
||||
else:
|
||||
filename = self._generate_filename_from_anime_url(anime_page_url)
|
||||
return video_url, filename
|
||||
|
||||
# Check if this is a third-party host URL
|
||||
if 'vidmoly.to' in url or 'vidmoly' in url:
|
||||
return await self._extract_from_vidmoly(url)
|
||||
|
||||
# If it's an anime-sama page, try to find the video
|
||||
if 'anime-sama' in url.lower():
|
||||
response = await self.client.get(url, follow_redirects=True)
|
||||
final_url = str(response.url)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Look for iframe with video player
|
||||
iframes = soup.find_all('iframe')
|
||||
for iframe in iframes:
|
||||
src = iframe.get('src', '')
|
||||
if src and any(provider in src for provider in ['vidmoly', 'player', 'stream', 'play', 'embed']):
|
||||
if src.startswith('http'):
|
||||
print(f"[ANIME-SAMA] Found iframe: {src}")
|
||||
# Try to extract video from the player
|
||||
video_url = await self._extract_from_player(src)
|
||||
if video_url:
|
||||
filename = self._generate_filename(final_url)
|
||||
return video_url, filename
|
||||
|
||||
# Look for video tags
|
||||
videos = soup.find_all('video')
|
||||
for video in videos:
|
||||
src = video.get('src', '')
|
||||
if src:
|
||||
if not src.startswith('http'):
|
||||
src = urljoin(final_url, src)
|
||||
filename = self._generate_filename(final_url)
|
||||
return src, filename
|
||||
|
||||
sources = video.find_all('source')
|
||||
for source in sources:
|
||||
src = source.get('src', '')
|
||||
if src:
|
||||
if not src.startswith('http'):
|
||||
src = urljoin(final_url, src)
|
||||
filename = self._generate_filename(final_url)
|
||||
return src, filename
|
||||
|
||||
raise Exception("Could not find video link on page")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting AnimeSama link: {str(e)}")
|
||||
|
||||
async def _extract_from_vidmoly(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
|
||||
"""Extract video URL from vidmoly player - delegate to VidMolyDownloader"""
|
||||
try:
|
||||
print(f"[ANIME-SAMA] Extracting from vidmoly: {url}")
|
||||
print(f"[ANIME-SAMA] Delegating to VidMolyDownloader...")
|
||||
|
||||
# Import VidMolyDownloader
|
||||
from .vidmoly import VidMolyDownloader
|
||||
|
||||
# Generate the target filename first
|
||||
if episode_title and anime_page_url:
|
||||
anime_name = self._generate_anime_name(anime_page_url)
|
||||
target_filename = f"{anime_name} - {episode_title}.mp4"
|
||||
print(f"[ANIME-SAMA] Generated filename: {target_filename} (episode: {episode_title})")
|
||||
elif anime_page_url:
|
||||
target_filename = self._generate_filename_from_anime_url(anime_page_url)
|
||||
print(f"[ANIME-SAMA] Generated filename: {target_filename} (no episode title)")
|
||||
else:
|
||||
target_filename = None
|
||||
print(f"[ANIME-SAMA] No target_filename generated")
|
||||
|
||||
# Use VidMolyDownloader to extract and download
|
||||
vidmoly_downloader = VidMolyDownloader()
|
||||
|
||||
# Pass the target filename to VidMolyDownloader if available
|
||||
if target_filename:
|
||||
video_url, temp_filename = await vidmoly_downloader.get_download_link(url, target_filename=target_filename)
|
||||
else:
|
||||
video_url, temp_filename = await vidmoly_downloader.get_download_link(url)
|
||||
|
||||
# Use the target filename
|
||||
filename = target_filename if target_filename else temp_filename
|
||||
|
||||
print(f"[ANIME-SAMA] Got video: {filename}")
|
||||
|
||||
# Rename the file if needed
|
||||
import os
|
||||
if temp_filename != filename:
|
||||
# temp_filename might be a full path or just the name
|
||||
temp_path = temp_filename if os.path.isabs(temp_filename) else os.path.join('downloads', temp_filename)
|
||||
|
||||
if os.path.exists(temp_path):
|
||||
final_path = os.path.join('downloads', filename)
|
||||
if os.path.exists(final_path):
|
||||
os.remove(final_path)
|
||||
os.rename(temp_path, final_path)
|
||||
print(f"[ANIME-SAMA] Renamed {temp_filename} -> {filename}")
|
||||
else:
|
||||
print(f"[ANIME-SAMA] Warning: temp file not found: {temp_path}")
|
||||
|
||||
# Return the original VidMoly URL - the file exists so download_manager will skip it
|
||||
return url, filename
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-SAMA] Vidmoly extraction error: {e}")
|
||||
raise Exception(f"Error extracting from vidmoly: {str(e)}")
|
||||
|
||||
async def _extract_from_sendvid(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
|
||||
"""Extract video URL from sendvid player - delegate to SendVidDownloader"""
|
||||
try:
|
||||
print(f"[ANIME-SAMA] Extracting from sendvid: {url}")
|
||||
print(f"[ANIME-SAMA] Delegating to SendVidDownloader...")
|
||||
|
||||
# Import SendVidDownloader
|
||||
from .sendvid import SendVidDownloader
|
||||
|
||||
# Generate the target filename first
|
||||
if episode_title and anime_page_url:
|
||||
anime_name = self._generate_anime_name(anime_page_url)
|
||||
target_filename = f"{anime_name} - {episode_title}.mp4"
|
||||
print(f"[ANIME-SAMA] Generated filename: {target_filename} (episode: {episode_title})")
|
||||
elif anime_page_url:
|
||||
target_filename = self._generate_filename_from_anime_url(anime_page_url)
|
||||
print(f"[ANIME-SAMA] Generated filename: {target_filename} (no episode title)")
|
||||
else:
|
||||
target_filename = None
|
||||
print(f"[ANIME-SAMA] No target_filename generated")
|
||||
|
||||
# Use SendVidDownloader to extract the video URL
|
||||
sendvid_downloader = SendVidDownloader()
|
||||
|
||||
# Pass the target filename to SendVidDownloader if available
|
||||
if target_filename:
|
||||
video_url, filename = await sendvid_downloader.get_download_link(url, target_filename=target_filename)
|
||||
else:
|
||||
video_url, filename = await sendvid_downloader.get_download_link(url)
|
||||
|
||||
# Use the target filename
|
||||
filename = target_filename if target_filename else filename
|
||||
|
||||
print(f"[ANIME-SAMA] Got video: {filename}")
|
||||
|
||||
# Return the direct video URL (SendVid provides direct MP4 links)
|
||||
# The download_manager will handle the actual download
|
||||
return video_url, filename
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-SAMA] SendVid extraction error: {e}")
|
||||
raise Exception(f"Error extracting from sendvid: {str(e)}")
|
||||
|
||||
def _generate_filename_from_anime_url(self, anime_url: str) -> str:
|
||||
"""Generate filename from anime-sama anime page URL"""
|
||||
try:
|
||||
# Extract anime name from URL like: https://anime-sama.si/catalogue/naruto/saison1/vostfr/
|
||||
# Format: /catalogue/{anime}/saison{N}/{lang}/
|
||||
parts = anime_url.split('/')
|
||||
for i, part in enumerate(parts):
|
||||
if part == 'catalogue' and i + 1 < len(parts):
|
||||
anime_name = parts[i + 1].replace('-', ' ').title()
|
||||
# Try to find episode number
|
||||
episode = "01"
|
||||
for j, part2 in enumerate(parts):
|
||||
if 'saison' in part2 and j + 2 < len(parts):
|
||||
# Look for episode in the remaining path
|
||||
pass
|
||||
return f"{anime_name} - Episode {episode}.mp4"
|
||||
# Fallback
|
||||
return "Anime - Episode 01.Mp4"
|
||||
except:
|
||||
return "Anime - Episode 01.Mp4"
|
||||
|
||||
def _generate_anime_name(self, anime_url: str) -> str:
|
||||
"""Extract just the anime name from anime-sama URL"""
|
||||
try:
|
||||
# Extract anime name from URL like: https://anime-sama.si/catalogue/naruto/saison1/vostfr/
|
||||
parts = anime_url.split('/')
|
||||
for i, part in enumerate(parts):
|
||||
if part == 'catalogue' and i + 1 < len(parts):
|
||||
return parts[i + 1].replace('-', ' ').title()
|
||||
# Fallback
|
||||
return "Anime"
|
||||
except:
|
||||
return "Anime"
|
||||
|
||||
async def _extract_from_player(self, player_url: str) -> str | None:
|
||||
"""Try to extract direct video URL from player iframe"""
|
||||
try:
|
||||
response = await self.client.get(player_url)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Check for video tags
|
||||
videos = soup.find_all('video')
|
||||
for video in videos:
|
||||
src = video.get('src') or video.get('data-src')
|
||||
if src:
|
||||
return src
|
||||
|
||||
# Check for source tags
|
||||
sources = soup.find_all('source')
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if src and any(ext in src for ext in ['mp4', 'm3u8', 'mkv']):
|
||||
return src
|
||||
|
||||
# Check scripts in player page
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
match = re.search(r'(https?://[^"\'>\s]+\.(?:mp4|m3u8)(?:\?[^"\'>\s]*)?)', script.string)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def _generate_filename(self, url: str) -> str:
|
||||
"""Generate filename from URL"""
|
||||
# Extract anime name and episode info from URL
|
||||
# URL format: .../catalogue/{anime}/saison{N}/{vostfr|vf}/episode-{N}
|
||||
parts = url.split('/')
|
||||
|
||||
anime_name = "anime"
|
||||
episode = "1"
|
||||
|
||||
for i, part in enumerate(parts):
|
||||
if part == 'catalogue' and i + 1 < len(parts):
|
||||
anime_name = parts[i + 1].replace('-', ' ')
|
||||
elif 'episode-' in part:
|
||||
episode = part.replace('episode-', '')
|
||||
elif part in ['vostfr', 'vf']:
|
||||
lang = part.upper()
|
||||
|
||||
filename = f"{anime_name} - Episode {episode}.mp4"
|
||||
return filename.title()
|
||||
|
||||
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Search for anime on anime-sama
|
||||
Returns list of anime with title, url, and cover image
|
||||
"""
|
||||
try:
|
||||
# Update domains before searching to ensure we have the current domain
|
||||
await self.update_domains()
|
||||
|
||||
import time
|
||||
start = time.time()
|
||||
print(f"[ANIME-SAMA] Searching for '{query}' ({lang})...")
|
||||
|
||||
# Use the current domain from anime-sama.pw
|
||||
current_domain = await self.get_current_domain()
|
||||
|
||||
# Convert query to URL format (lowercase, replace spaces with hyphens)
|
||||
query_formatted = query.lower().replace(' ', '-').replace("'", '').replace(':', '')
|
||||
search_url = f"https://{current_domain}/catalogue/{query_formatted}/saison1/{lang}/"
|
||||
|
||||
response = await self.client.get(search_url, follow_redirects=True)
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"[ANIME-SAMA] Got response {response.status_code} in {elapsed:.2f}s")
|
||||
|
||||
if response.status_code == 200:
|
||||
# Check if it's a valid anime page by looking for episode selector
|
||||
if 'selectEpisodes' in response.text or 'episodes.js' in response.text:
|
||||
print(f"[ANIME-SAMA] Found anime at {str(response.url)}")
|
||||
return [{
|
||||
'title': query,
|
||||
'url': str(response.url),
|
||||
'type': 'direct'
|
||||
}]
|
||||
|
||||
print(f"[ANIME-SAMA] No anime found (status: {response.status_code})")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-SAMA] Error: {str(e)}")
|
||||
return []
|
||||
|
||||
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Get list of episodes for an anime
|
||||
Returns list of episode numbers and their URLs
|
||||
Anime-Sama uses a JavaScript file (episodes.js) to store episode URLs
|
||||
"""
|
||||
try:
|
||||
response = await self.client.get(anime_url)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
episodes = []
|
||||
|
||||
# Try to find the episodes.js file in the HTML
|
||||
episodes_js_match = re.search(r'episodes\.js\?filever=(\d+)', response.text)
|
||||
if episodes_js_match:
|
||||
file_ver = episodes_js_match.group(1)
|
||||
# Build the URL to episodes.js
|
||||
episodes_js_url = f"{anime_url.rstrip('/')}/episodes.js?filever={file_ver}"
|
||||
|
||||
print(f"[ANIME-SAMA] Found episodes.js at {episodes_js_url}")
|
||||
|
||||
try:
|
||||
# Fetch the episodes.js file
|
||||
js_response = await self.client.get(episodes_js_url)
|
||||
js_content = js_response.text
|
||||
|
||||
# Parse the JavaScript file to extract episode URLs
|
||||
# The file contains arrays like: var eps1 = ['url1', 'url2', ...]
|
||||
eps_matches = re.findall(r'var\s+eps\d+\s*=\s*(\[[^\]]+\])', js_content)
|
||||
|
||||
if eps_matches:
|
||||
# Extract URLs from the first array found
|
||||
urls_text = eps_matches[0]
|
||||
# Parse the array of URLs
|
||||
episode_urls = re.findall(r"'(https?://[^']+)'", urls_text)
|
||||
|
||||
for idx, url in enumerate(episode_urls, start=1):
|
||||
episode_num = str(idx).zfill(2)
|
||||
episode_title = f'Episode {episode_num}'
|
||||
# Store both the video URL, the anime page URL, and the episode title
|
||||
# Format: video_url|anime_page_url|episode_title
|
||||
combined_url = f"{url}|{anime_url}|{episode_title}"
|
||||
episodes.append({
|
||||
'episode': episode_num,
|
||||
'url': combined_url,
|
||||
'title': episode_title
|
||||
})
|
||||
|
||||
print(f"[ANIME-SAMA] Found {len(episodes)} episodes")
|
||||
return episodes
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-SAMA] Error fetching episodes.js: {e}")
|
||||
|
||||
# Fallback: Try to find episode links in the HTML (old method)
|
||||
episode_links = soup.find_all('a', href=True)
|
||||
for link in episode_links:
|
||||
href = link['href']
|
||||
if 'episode-' in href:
|
||||
# Extract episode number
|
||||
match = re.search(r'episode-(\d+)', href)
|
||||
if match:
|
||||
episode_num = match.group(1)
|
||||
full_url = urljoin(anime_url, href)
|
||||
|
||||
episodes.append({
|
||||
'episode': episode_num,
|
||||
'url': full_url
|
||||
})
|
||||
|
||||
# Remove duplicates and sort
|
||||
seen = set()
|
||||
unique_episodes = []
|
||||
for ep in episodes:
|
||||
if ep['episode'] not in seen:
|
||||
seen.add(ep['episode'])
|
||||
unique_episodes.append(ep)
|
||||
|
||||
unique_episodes.sort(key=lambda x: int(x['episode']))
|
||||
|
||||
return unique_episodes
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-SAMA] Error getting episodes: {e}")
|
||||
return []
|
||||
@@ -0,0 +1,313 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import httpx
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
class AnimeUltimeDownloader(BaseDownloader):
|
||||
"""Downloader for anime-ultime.net"""
|
||||
|
||||
BASE_DOMAINS = ["anime-ultime.com", "anime-ultime.net", "www.anime-ultime.net"]
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extract download link from anime-ultime URL
|
||||
Anime-Ultime stores video links in og:video meta tags
|
||||
"""
|
||||
try:
|
||||
# Follow redirects
|
||||
response = await self.client.get(url, follow_redirects=True)
|
||||
final_url = str(response.url)
|
||||
|
||||
# Parse the page
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Method 0: Look for og:video meta tag (most reliable for anime-ultime)
|
||||
og_video = soup.find('meta', property='og:video')
|
||||
if og_video and og_video.get('content'):
|
||||
video_url = og_video['content']
|
||||
if video_url.endswith('.mp4'):
|
||||
filename = self._generate_filename(final_url)
|
||||
print(f"[ANIME-ULTIME] Found og:video link: {video_url}")
|
||||
return video_url, filename
|
||||
|
||||
# Method 1: Look for direct download links (DDL)
|
||||
# Anime-Ultime often uses links to file hosts
|
||||
download_links = soup.find_all('a', href=True)
|
||||
for link in download_links:
|
||||
href = link['href']
|
||||
text = link.get_text().lower()
|
||||
|
||||
# Look for download buttons/links
|
||||
if any(keyword in text for keyword in ['télécharger', 'download', 'ddl', 'mega', 'google', 'drive']):
|
||||
# Check if it's a direct link or to a file host
|
||||
if any(host in href.lower() for host in ['mega.nz', 'drive.google.com', 'uptobox.com', '1fichier.com']):
|
||||
filename = self._generate_filename(final_url)
|
||||
return href, filename
|
||||
|
||||
# Method 2: Look for iframe with video player
|
||||
iframes = soup.find_all('iframe')
|
||||
for iframe in iframes:
|
||||
src = iframe.get('src', '')
|
||||
if src and any(provider in src for provider in ['video', 'player', 'stream', 'play']):
|
||||
if src.startswith('http'):
|
||||
filename = self._generate_filename(final_url)
|
||||
return src, filename
|
||||
|
||||
# Method 3: Look for video tags
|
||||
videos = soup.find_all('video')
|
||||
for video in videos:
|
||||
src = video.get('src', '')
|
||||
if src:
|
||||
filename = self._generate_filename(final_url)
|
||||
return src, filename
|
||||
|
||||
# Check source tags
|
||||
sources = video.find_all('source')
|
||||
for source in sources:
|
||||
src = source.get('src', '')
|
||||
if src:
|
||||
filename = self._generate_filename(final_url)
|
||||
return src, filename
|
||||
|
||||
# Method 4: Look in scripts for video URLs
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
# Look for common video patterns
|
||||
patterns = [
|
||||
r'(https?://[^"\'>\s]+\.(?:mp4|m3u8|mkv)(?:\?[^"\'>\s]*)?)',
|
||||
r'"url":"([^"]+)"',
|
||||
r'"video":"([^"]+)"',
|
||||
r'"file":"([^"]+)"',
|
||||
r'file:\s*"([^"]+)"',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, script.string)
|
||||
for match in matches:
|
||||
# Clean up escaped characters
|
||||
match = match.replace('\\/', '/').replace('\\', '')
|
||||
if any(ext in match for ext in ['mp4', 'm3u8', 'mkv']):
|
||||
filename = self._generate_filename(final_url)
|
||||
return match, filename
|
||||
|
||||
# Look for anime-ultime specific patterns
|
||||
# They sometimes store links in JavaScript variables
|
||||
ddl_match = re.search(r'ddl["\']?\s*:\s*["\']([^"\']+)["\']', script.string)
|
||||
if ddl_match:
|
||||
ddl_url = ddl_match.group(1)
|
||||
if ddl_url.startswith('http'):
|
||||
filename = self._generate_filename(final_url)
|
||||
return ddl_url, filename
|
||||
|
||||
# Method 5: Look for links with specific classes or IDs
|
||||
# Anime-Ultime might use specific class names for download links
|
||||
potential_links = soup.find_all('a', class_=re.compile(r'download|ddl|episode', re.I))
|
||||
for link in potential_links:
|
||||
href = link.get('href', '')
|
||||
if href and href.startswith('http'):
|
||||
filename = self._generate_filename(final_url)
|
||||
return href, filename
|
||||
|
||||
# If nothing found, raise error
|
||||
raise Exception("Could not find download link on page")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting Anime-Ultime link: {str(e)}")
|
||||
|
||||
def _generate_filename(self, url: str) -> str:
|
||||
"""Generate filename from URL"""
|
||||
# Extract anime name and episode from URL
|
||||
# URL formats:
|
||||
# - info-0-1/30200
|
||||
# - info-0-1/30200/Naruto-OAV-01-vostfr
|
||||
# - file-0-1/2991-Naruto-OAV
|
||||
|
||||
anime_name = "Anime"
|
||||
episode = "01"
|
||||
|
||||
# Format: info-0-1/EPISODE_ID or info-0-1/EPISODE_ID/NAME-EP-vostfr
|
||||
if 'info-0-1/' in url:
|
||||
# Extract episode ID
|
||||
ep_match = re.search(r'info-0-1/(\d+)', url)
|
||||
if ep_match:
|
||||
ep_id = ep_match.group(1)
|
||||
|
||||
# Try to get anime name from URL path
|
||||
name_match = re.search(r'info-0-1/\d+/([^/]+)', url)
|
||||
if name_match:
|
||||
raw_name = name_match.group(1)
|
||||
# Extract episode number
|
||||
ep_num_match = re.search(r'-(\d+)-vostfr$', raw_name, re.I)
|
||||
if ep_num_match:
|
||||
episode = ep_num_match.group(1).zfill(2)
|
||||
# Remove episode number and suffix from name
|
||||
anime_name = re.sub(r'-\d+-vostfr$', '', raw_name, flags=re.I).replace('-', ' ')
|
||||
else:
|
||||
# Just use the ID
|
||||
anime_name = f"Episode {ep_id}"
|
||||
else:
|
||||
anime_name = f"Episode {ep_id}"
|
||||
|
||||
elif 'file-0-1/' in url:
|
||||
# Extract from file-0-1/ID-NAME format
|
||||
file_match = re.search(r'file-0-1/\d+-(.+)$', url)
|
||||
if file_match:
|
||||
anime_name = file_match.group(1).replace('-', ' ')
|
||||
|
||||
# Sanitize filename
|
||||
anime_name = anime_name.replace('/', ' ').strip()
|
||||
filename = f"{anime_name} - Episode {episode}.mp4"
|
||||
return filename.title()
|
||||
|
||||
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Search for anime on anime-ultime
|
||||
Returns list of anime with title, url, and cover image
|
||||
"""
|
||||
try:
|
||||
import time
|
||||
start = time.time()
|
||||
print(f"[ANIME-ULTIME] Searching for '{query}' ({lang})...")
|
||||
|
||||
# Anime-Ultime uses POST for search
|
||||
search_url = "https://www.anime-ultime.net/search-0-1"
|
||||
|
||||
response = await self.client.post(search_url, data={'search': query})
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"[ANIME-ULTIME] Got response {response.status_code} in {elapsed:.2f}s")
|
||||
|
||||
results = []
|
||||
|
||||
# Look for search result links - better parsing
|
||||
# Search results use file-0-1/ pattern, not info-
|
||||
search_results = soup.find_all('a', href=re.compile(r'file-0-1/'))
|
||||
|
||||
seen_urls = set()
|
||||
for result in search_results[:10]: # Limit to 10 results
|
||||
href = result.get('href', '')
|
||||
raw_title = result.get_text().strip()
|
||||
|
||||
# Skip if no href
|
||||
if not href:
|
||||
continue
|
||||
|
||||
# Skip duplicates
|
||||
if href in seen_urls:
|
||||
continue
|
||||
seen_urls.add(href)
|
||||
|
||||
# Extract better title from URL or parent elements
|
||||
better_title = raw_title
|
||||
|
||||
# If raw_title is just "Télécharger" or similar, try to find better title
|
||||
if len(raw_title) < 5 or raw_title.lower() in ['télécharger', 'download', 'ddl']:
|
||||
# Try to extract from URL (file-0-1/ID-Title format)
|
||||
url_match = re.search(r'file-0-1/\d+-(.+)$', href)
|
||||
if url_match:
|
||||
better_title = url_match.group(1).replace('-', ' ').title()
|
||||
|
||||
# If still no good title, look at parent/row elements
|
||||
if len(better_title) < 5:
|
||||
# Check parent row (table structure)
|
||||
row = result.find_parent(['tr', 'td', 'div'])
|
||||
if row:
|
||||
# Look for text in the row that's not the link text
|
||||
row_text = row.get_text().strip()
|
||||
# Remove the link text from row text
|
||||
if raw_title in row_text:
|
||||
row_text = row_text.replace(raw_title, '').strip()
|
||||
if len(row_text) > 5 and len(row_text) < 100:
|
||||
better_title = row_text
|
||||
|
||||
# Make URL absolute
|
||||
if not href.startswith('http'):
|
||||
href = urljoin("https://www.anime-ultime.net/", href)
|
||||
|
||||
results.append({
|
||||
'title': better_title,
|
||||
'url': href,
|
||||
'type': 'search_result'
|
||||
})
|
||||
|
||||
print(f"[ANIME-ULTIME] Found {len(results)} results")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-ULTIME] Error: {e}")
|
||||
return []
|
||||
|
||||
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Get list of episodes for an anime
|
||||
Returns list of episode numbers and their URLs
|
||||
"""
|
||||
try:
|
||||
response = await self.client.get(anime_url)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
episodes = []
|
||||
|
||||
# Look for episode links - anime-ultime uses info-XXXXX-Name-XX-vostfr format
|
||||
# The URL pattern is info-0-1/ID-Anime-Name-XX-vostfr where XX is episode number
|
||||
episode_links = soup.find_all('a', href=re.compile(r'info-0-1/\d+'))
|
||||
|
||||
for link in episode_links:
|
||||
href = link.get('href', '')
|
||||
text = link.get_text().strip()
|
||||
|
||||
# Extract episode number from URL pattern
|
||||
# Matches: info-0-1/30200/Naruto-OAV-01-vostfr
|
||||
match = re.search(r'-(\d+)-vostfr$', href, re.I)
|
||||
if not match:
|
||||
# Try other patterns
|
||||
match = re.search(r'Episode[-\s]?(\d+)', href, re.I)
|
||||
if not match:
|
||||
# Try to extract from text
|
||||
match = re.search(r'(\d+)', text)
|
||||
|
||||
if match:
|
||||
episode_num = match.group(1).zfill(2) # Pad with zero
|
||||
|
||||
# Extract the episode ID from href and build correct URL
|
||||
# href might be "info-0-1/30200" or "info-0-1/30200/..."
|
||||
# We need: https://www.anime-ultime.net/info-0-1/30200
|
||||
ep_id_match = re.search(r'info-0-1/(\d+)', href)
|
||||
if ep_id_match:
|
||||
ep_id = ep_id_match.group(1)
|
||||
# Build the correct episode URL
|
||||
episode_url = f"https://www.anime-ultime.net/info-0-1/{ep_id}"
|
||||
else:
|
||||
# Fallback to making URL absolute
|
||||
if not href.startswith('http'):
|
||||
href = urljoin(anime_url, href)
|
||||
episode_url = href
|
||||
|
||||
episodes.append({
|
||||
'episode': episode_num,
|
||||
'url': episode_url,
|
||||
'title': text
|
||||
})
|
||||
|
||||
# Remove duplicates and sort
|
||||
seen = set()
|
||||
unique_episodes = []
|
||||
for ep in episodes:
|
||||
if ep['episode'] not in seen:
|
||||
seen.add(ep['episode'])
|
||||
unique_episodes.append(ep)
|
||||
|
||||
unique_episodes.sort(key=lambda x: int(x['episode']))
|
||||
|
||||
return unique_episodes
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting episodes: {e}")
|
||||
return []
|
||||
@@ -0,0 +1,54 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, Tuple
|
||||
import httpx
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class BaseDownloader(ABC):
|
||||
"""Base class for all host downloaders"""
|
||||
|
||||
def __init__(self):
|
||||
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True)
|
||||
|
||||
@abstractmethod
|
||||
async def get_download_link(self, url: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Extract direct download link and filename from host URL
|
||||
Returns: (download_url, filename)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def can_handle(self, url: str) -> bool:
|
||||
"""Check if this downloader can handle the given URL"""
|
||||
pass
|
||||
|
||||
async def close(self):
|
||||
await self.client.aclose()
|
||||
|
||||
async def _fetch_page(self, url: str) -> str:
|
||||
response = await self.client.get(url)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
|
||||
def _extract_filename_from_headers(self, headers: dict) -> Optional[str]:
|
||||
content_disposition = headers.get("content-disposition", "")
|
||||
if "filename=" in content_disposition:
|
||||
filename = content_disposition.split("filename=")[-1].strip('"')
|
||||
return filename
|
||||
return None
|
||||
|
||||
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Search for anime on this provider
|
||||
Returns list of anime with title, url, and optional cover image
|
||||
"""
|
||||
return []
|
||||
|
||||
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Get list of episodes for an anime
|
||||
Returns list of episode numbers and their URLs
|
||||
"""
|
||||
return []
|
||||
@@ -0,0 +1,79 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import httpx
|
||||
|
||||
|
||||
class DoodStreamDownloader(BaseDownloader):
|
||||
"""Downloader for doodstream.com"""
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in ["doodstream.com", "dood.stream", "dood.to", "dood.lol", "dood.cx", "dood.so", "dood.watch", "dood.sh"])
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
try:
|
||||
# Get the page
|
||||
response = await self.client.get(url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Doodstream usually has the video URL in a script with '$(function)'
|
||||
# or in a token-based system
|
||||
download_url = None
|
||||
filename = "doodstream_video.mp4"
|
||||
|
||||
# Method 1: Look for /pass_md5 or similar patterns
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
# Look for token patterns
|
||||
match = re.search(r'https?://[^\"\']+\.(?:mp4|mkv|avi)', script.string)
|
||||
if match:
|
||||
download_url = match.group(0)
|
||||
break
|
||||
|
||||
# Look for doodstream CDN patterns
|
||||
match = re.search(r'(https?://[^\s\"\'<>]+/download/[^\s\"\'<>]+)', script.string)
|
||||
if match:
|
||||
download_url = match.group(0)
|
||||
break
|
||||
|
||||
# Method 2: Try to construct download URL from page
|
||||
if not download_url:
|
||||
# Extract video ID from URL
|
||||
# Format: https://doodstream.com/e/VIDEO_ID or /d/VIDEO_ID
|
||||
video_id_match = re.search(r'/[ed]/([a-zA-Z0-9]+)', url)
|
||||
if video_id_match:
|
||||
video_id = video_id_match.group(1)
|
||||
# Try direct download pattern
|
||||
download_url = f"https://dood.stream/e/{video_id}"
|
||||
|
||||
# Method 3: Look for any MP4 source in iframes or video tags
|
||||
if not download_url:
|
||||
video = soup.find('video')
|
||||
if video and video.get('src'):
|
||||
download_url = video['src']
|
||||
else:
|
||||
sources = soup.find_all('source')
|
||||
for source in sources:
|
||||
if source.get('src'):
|
||||
download_url = source['src']
|
||||
filename = source.get('src', '').split('/')[-1]
|
||||
break
|
||||
|
||||
if download_url:
|
||||
# Try to get real filename from HEAD request
|
||||
try:
|
||||
head_resp = await self.client.head(download_url, timeout=5.0)
|
||||
fname = self._extract_filename_from_headers(head_resp.headers)
|
||||
if fname:
|
||||
filename = fname
|
||||
except:
|
||||
pass
|
||||
|
||||
return download_url, filename
|
||||
|
||||
raise Exception("Could not extract download link from Doodstream page")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting Doodstream link: {str(e)}")
|
||||
@@ -0,0 +1,144 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
class NekoSamaDownloader(BaseDownloader):
|
||||
"""Downloader for neko-sama.fr"""
|
||||
|
||||
BASE_DOMAINS = ["neko-sama.fr", "nekosama.fr", "www.neko-sama.fr"]
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
"""Extract download link from neko-sama URL"""
|
||||
try:
|
||||
response = await self.client.get(url, follow_redirects=True)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Method 1: Look for iframes with video
|
||||
iframes = soup.find_all('iframe')
|
||||
for iframe in iframes:
|
||||
src = iframe.get('src', '')
|
||||
if src and any(p in src for p in ['video', 'player', 'stream']):
|
||||
if not src.startswith('http'):
|
||||
src = urljoin(str(response.url), src)
|
||||
filename = self._generate_filename(str(response.url))
|
||||
return src, filename
|
||||
|
||||
# Method 2: Look for video tags
|
||||
videos = soup.find_all('video')
|
||||
for video in videos:
|
||||
src = video.get('src') or video.get('data-src')
|
||||
if src:
|
||||
filename = self._generate_filename(str(response.url))
|
||||
return src, filename
|
||||
|
||||
sources = video.find_all('source')
|
||||
for source in sources:
|
||||
src = source.get('src', '')
|
||||
if src:
|
||||
filename = self._generate_filename(str(response.url))
|
||||
return src, filename
|
||||
|
||||
# Method 3: Look in scripts
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
patterns = [
|
||||
r'(https?://[^"\'>\s]+\.(?:mp4|m3u8)(?:\?[^"\'>\s]*)?)',
|
||||
r'"url":"([^"]+)"',
|
||||
r'"video":"([^"]+)"',
|
||||
]
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, script.string)
|
||||
for match in matches:
|
||||
match = match.replace('\\/', '/')
|
||||
if any(ext in match for ext in ['mp4', 'm3u8']):
|
||||
filename = self._generate_filename(str(response.url))
|
||||
return match, filename
|
||||
|
||||
raise Exception("Could not find video link")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting NekoSama link: {str(e)}")
|
||||
|
||||
def _generate_filename(self, url: str) -> str:
|
||||
parts = url.split('/')
|
||||
anime_name = "anime"
|
||||
episode = "1"
|
||||
|
||||
for i, part in enumerate(parts):
|
||||
if 'episode' in part.lower():
|
||||
match = re.search(r'episode[-\s]*(\d+)', part, re.I)
|
||||
if match:
|
||||
episode = match.group(1)
|
||||
|
||||
filename = f"{anime_name} - Episode {episode}.mp4"
|
||||
return filename.title()
|
||||
|
||||
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
|
||||
try:
|
||||
response = await self.client.get(anime_url)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
episodes = []
|
||||
episode_links = soup.find_all('a', href=re.compile(r'episode'))
|
||||
|
||||
for link in episode_links:
|
||||
href = link.get('href', '')
|
||||
match = re.search(r'episode[-\s]*(\d+)', href, re.I)
|
||||
if match:
|
||||
episode_num = match.group(1)
|
||||
if not href.startswith('http'):
|
||||
href = urljoin(anime_url, href)
|
||||
|
||||
episodes.append({'episode': episode_num, 'url': href})
|
||||
|
||||
# Deduplicate and sort
|
||||
seen = set()
|
||||
unique_episodes = []
|
||||
for ep in episodes:
|
||||
if ep['episode'] not in seen:
|
||||
seen.add(ep['episode'])
|
||||
unique_episodes.append(ep)
|
||||
|
||||
unique_episodes.sort(key=lambda x: int(x['episode']))
|
||||
return unique_episodes
|
||||
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Search for anime on neko-sama
|
||||
"""
|
||||
try:
|
||||
import time
|
||||
start = time.time()
|
||||
print(f"[NEKO-SAMA] Searching for '{query}' ({lang})...")
|
||||
|
||||
# Neko-Sama URL pattern: https://neko-sama.fr/anime/{anime-name}
|
||||
search_url = f"https://neko-sama.fr/anime/{query.lower().replace(' ', '-')}"
|
||||
|
||||
response = await self.client.get(search_url)
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"[NEKO-SAMA] Got response {response.status_code} in {elapsed:.2f}s")
|
||||
|
||||
if response.status_code == 200:
|
||||
print(f"[NEKO-SAMA] Found anime at {str(response.url)}")
|
||||
return [{
|
||||
'title': query,
|
||||
'url': str(response.url),
|
||||
'type': 'direct'
|
||||
}]
|
||||
|
||||
print(f"[NEKO-SAMA] No anime found")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
print(f"[NEKO-SAMA] Error: {str(e)}")
|
||||
return []
|
||||
@@ -0,0 +1,75 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import httpx
|
||||
|
||||
|
||||
class RapidFileDownloader(BaseDownloader):
|
||||
"""Downloader for rapidfile.net and similar hosts"""
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in ["rapidfile.net", "rapidfile.com", "rapid-file"])
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
try:
|
||||
# Get the initial page
|
||||
response = await self.client.get(url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
download_url = None
|
||||
filename = "rapidfile_download"
|
||||
|
||||
# Method 1: Look for download button/link
|
||||
download_btn = soup.find('a', {'id': 'downloadbtn'}) or soup.find('a', class_='download-btn')
|
||||
if download_btn and download_btn.get('href'):
|
||||
download_url = download_btn['href']
|
||||
|
||||
# Method 2: Look for form with POST action
|
||||
if not download_url:
|
||||
forms = soup.find_all('form')
|
||||
for form in forms:
|
||||
action = form.get('action', '')
|
||||
if action and ('download' in action.lower() or 'file' in action.lower()):
|
||||
download_url = action if action.startswith('http') else url + action
|
||||
break
|
||||
|
||||
# Method 3: Look for any link with download/file in URL
|
||||
if not download_url:
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
if any(keyword in href.lower() for keyword in ['download', 'get_file', 'file.php']):
|
||||
if href.startswith('http'):
|
||||
download_url = href
|
||||
break
|
||||
|
||||
# Method 4: Check for direct file links in scripts
|
||||
if not download_url:
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
match = re.search(r'(https?://[^\s\"\'<>]+/(?:download|file)[^\s\"\'<>]+)', script.string)
|
||||
if match:
|
||||
download_url = match.group(0)
|
||||
break
|
||||
|
||||
if download_url:
|
||||
# Get filename from headers or URL
|
||||
try:
|
||||
head_resp = await self.client.head(download_url, timeout=5.0)
|
||||
fname = self._extract_filename_from_headers(head_resp.headers)
|
||||
if fname:
|
||||
filename = fname
|
||||
else:
|
||||
filename = download_url.split('/')[-1] or "rapidfile_download"
|
||||
except:
|
||||
filename = download_url.split('/')[-1] or "rapidfile_download"
|
||||
|
||||
return download_url, filename
|
||||
|
||||
# If all else fails, return the original URL
|
||||
filename = url.split('/')[-1] or "rapidfile_download"
|
||||
return url, filename
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting Rapidfile link: {str(e)}")
|
||||
@@ -0,0 +1,83 @@
|
||||
from typing import Optional
|
||||
from bs4 import BeautifulSoup
|
||||
from .base import BaseDownloader
|
||||
import re
|
||||
|
||||
|
||||
class SendVidDownloader(BaseDownloader):
|
||||
"""Downloader for SendVid videos"""
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return "sendvid.com" in url.lower()
|
||||
|
||||
async def _fetch_page(self, url: str) -> str:
|
||||
"""Fetch page with proper headers to avoid 403 errors"""
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
||||
'Referer': 'https://sendvid.com/',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
}
|
||||
response = await self.client.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
|
||||
async def get_download_link(self, url: str, target_filename: str = None) -> tuple[str, str]:
|
||||
"""
|
||||
Extract direct download link from SendVid embed page
|
||||
SendVid embed pages contain the direct MP4 URL in a <source> tag
|
||||
"""
|
||||
print(f"[SENDVID] Fetching page: {url}")
|
||||
|
||||
html = await self._fetch_page(url)
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
|
||||
# Try to find the video source in the <source> tag
|
||||
source_tag = soup.find('source', {'id': 'video_source'})
|
||||
if source_tag and source_tag.get('src'):
|
||||
video_url = source_tag['src']
|
||||
print(f"[SENDVID] Found video URL in <source> tag")
|
||||
|
||||
# Generate filename
|
||||
if target_filename:
|
||||
filename = target_filename
|
||||
else:
|
||||
# Extract filename from video URL or generate one
|
||||
filename = self._extract_filename_from_url(url, video_url)
|
||||
|
||||
print(f"[SENDVID] Download URL: {video_url}")
|
||||
print(f"[SENDVID] Filename: {filename}")
|
||||
return video_url, filename
|
||||
|
||||
# Fallback: try to find in og:video meta property
|
||||
og_video = soup.find('meta', {'property': 'og:video'})
|
||||
if og_video and og_video.get('content'):
|
||||
video_url = og_video['content']
|
||||
print(f"[SENDVID] Found video URL in og:video meta")
|
||||
|
||||
if target_filename:
|
||||
filename = target_filename
|
||||
else:
|
||||
filename = self._extract_filename_from_url(url, video_url)
|
||||
|
||||
print(f"[SENDVID] Download URL: {video_url}")
|
||||
print(f"[SENDVID] Filename: {filename}")
|
||||
return video_url, filename
|
||||
|
||||
raise Exception("Could not extract video URL from SendVid page")
|
||||
|
||||
def _extract_filename_from_url(self, page_url: str, video_url: str) -> str:
|
||||
"""Generate filename from SendVod URLs"""
|
||||
# Try to extract video ID from page URL
|
||||
video_id_match = re.search(r'/embed/([a-z0-9]+)', page_url)
|
||||
if video_id_match:
|
||||
video_id = video_id_match.group(1)
|
||||
# Try to get title from page (might need to fetch, but for now use ID)
|
||||
return f"sendvid_{video_id}.mp4"
|
||||
|
||||
# Fallback: extract from video URL
|
||||
filename_match = re.search(r'/([^/]+\.mp4)', video_url)
|
||||
if filename_match:
|
||||
return filename_match.group(1)
|
||||
|
||||
return "sendvid_video.mp4"
|
||||
@@ -0,0 +1,51 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import httpx
|
||||
|
||||
|
||||
class UnFichierDownloader(BaseDownloader):
|
||||
"""Downloader for 1fichier.com"""
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in ["1fichier.com", "1fichier.fr"])
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
try:
|
||||
# Initial page
|
||||
response = await self.client.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
# Check if we need to wait (download button)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Check for direct download link
|
||||
download_link = soup.find('a', class_='btn btn-download')
|
||||
if download_link and download_link.get('href'):
|
||||
download_url = download_link['href']
|
||||
# Follow to get headers for filename
|
||||
head_resp = await self.client.head(download_url)
|
||||
filename = self._extract_filename_from_headers(head_resp.headers)
|
||||
if not filename:
|
||||
filename = download_url.split('/')[-1] or "downloaded_file"
|
||||
return download_url, filename
|
||||
|
||||
# Alternative: look for any download link in the page
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
if href.startswith('http') and '1fichier' not in href:
|
||||
# Try to head the URL to see if it's a file
|
||||
try:
|
||||
head_resp = await self.client.head(href, timeout=5.0)
|
||||
if 'content-length' in head_resp.headers or 'attachment' in head_resp.headers.get('content-disposition', ''):
|
||||
filename = self._extract_filename_from_headers(head_resp.headers)
|
||||
if not filename:
|
||||
filename = href.split('/')[-1] or "downloaded_file"
|
||||
return href, filename
|
||||
except:
|
||||
continue
|
||||
|
||||
raise Exception("Could not find download link on page")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting 1fichier link: {str(e)}")
|
||||
@@ -0,0 +1,59 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
|
||||
class UptoboxDownloader(BaseDownloader):
|
||||
"""Downloader for uptobox.com"""
|
||||
|
||||
BASE_DOMAINS = ["uptobox.com", "uptobox.fr"]
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
"""Extract direct download link from uptobox"""
|
||||
try:
|
||||
response = await self.client.get(url, follow_redirects=True)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Method 1: Look for direct download button/link
|
||||
download_btn = soup.find('a', {'id': 'directDownload'}) or soup.find('a', class_='download-btn')
|
||||
if download_btn and download_btn.get('href'):
|
||||
href = download_btn['href']
|
||||
filename = self._extract_filename_from_url(url) or "uptobox_file"
|
||||
return href, filename
|
||||
|
||||
# Method 2: Look for any download link in page
|
||||
links = soup.find_all('a', href=True)
|
||||
for link in links:
|
||||
href = link['href']
|
||||
text = link.get_text().lower()
|
||||
if any(keyword in text for keyword in ['download', 'télécharger', 'ddl']):
|
||||
if href.startswith('http'):
|
||||
filename = self._extract_filename_from_url(url) or "uptobox_file"
|
||||
return href, filename
|
||||
|
||||
# Method 3: Return the original URL (uptobox handles downloads directly)
|
||||
filename = self._extract_filename_from_url(url) or "uptobox_file"
|
||||
return url, filename
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting Uptobox link: {str(e)}")
|
||||
|
||||
def _extract_filename_from_url(self, url: str) -> str | None:
|
||||
"""Try to extract filename from URL"""
|
||||
# Look for filename parameter in URL
|
||||
match = re.search(r'[&?]filename=([^&]+)', url)
|
||||
if match:
|
||||
from urllib.parse import unquote
|
||||
return unquote(match.group(1))
|
||||
|
||||
# Extract from path
|
||||
parts = url.split('/')
|
||||
if len(parts) > 0:
|
||||
last_part = parts[-1]
|
||||
if '.' in last_part:
|
||||
return last_part
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,439 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import httpx
|
||||
import subprocess
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class VidMolyDownloader(BaseDownloader):
|
||||
"""Downloader for vidmoly.to using Playwright network interception"""
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in ["vidmoly.to", "vidmoly.org", "vidmoly.biz"])
|
||||
|
||||
async def get_download_link(self, url: str, target_filename: str = None) -> tuple[str, str]:
|
||||
try:
|
||||
# Extract VidMoly ID from URL
|
||||
vidmoly_id = self._extract_vidmoly_id(url)
|
||||
if not vidmoly_id:
|
||||
raise Exception("Could not extract VidMoly ID from URL")
|
||||
|
||||
# Construct embed URL - try vidmoly.biz first (it works better than .to/.org)
|
||||
# If original URL uses .biz, keep it. Otherwise try .biz first
|
||||
domains_to_try = []
|
||||
|
||||
if "vidmoly.biz" in url.lower():
|
||||
domains_to_try = ["vidmoly.biz"]
|
||||
elif "vidmoly.to" in url.lower() or "vidmoly.org" in url.lower():
|
||||
# For .to/.org, try .biz first (it has actual content), then original
|
||||
domains_to_try = ["vidmoly.biz", url.split("//")[1].split("/")[0]]
|
||||
else:
|
||||
domains_to_try = ["vidmoly.biz", "vidmoly.to"]
|
||||
|
||||
video_source = None
|
||||
last_error = None
|
||||
working_domain = None
|
||||
|
||||
for domain in domains_to_try:
|
||||
embed_url = f"https://{domain}/embed-{vidmoly_id}.html"
|
||||
|
||||
print(f"[VIDMOLY] Trying: {embed_url}")
|
||||
|
||||
# Use Playwright with network interception
|
||||
video_source = await self._extract_with_playwright_network(embed_url)
|
||||
|
||||
if not video_source:
|
||||
# Fallback to HTTP method
|
||||
print("[VIDMOLY] Playwright failed, trying HTTP fallback...")
|
||||
video_source = await self._extract_with_http(embed_url)
|
||||
|
||||
if video_source:
|
||||
print(f"[VIDMOLY] ✅ Found video on {domain}")
|
||||
working_domain = domain
|
||||
break
|
||||
else:
|
||||
print(f"[VIDMOLY] ❌ No video on {domain}")
|
||||
last_error = f"No video found on {domain}"
|
||||
|
||||
if not video_source:
|
||||
raise Exception(f"Could not find video source - tried: {', '.join(domains_to_try)}. Last error: {last_error}")
|
||||
|
||||
# Use target_filename if provided, otherwise generate default
|
||||
filename = target_filename if target_filename else f"vidmoly_{vidmoly_id}"
|
||||
|
||||
# Check if it's an M3U8 playlist
|
||||
if '.m3u8' in video_source:
|
||||
print(f"[VIDMOLY] Found M3U8 source: {video_source[:100]}...")
|
||||
|
||||
# Download and convert M3U8 to MP4 directly
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Referer': f'https://{working_domain}/',
|
||||
}
|
||||
|
||||
mp4_path = await self._download_m3u8_as_mp4(video_source, filename, headers)
|
||||
|
||||
return mp4_path, filename
|
||||
|
||||
# It's a direct MP4 link
|
||||
if not video_source.endswith('.mp4'):
|
||||
filename += '.mp4'
|
||||
|
||||
print(f"[VIDMOLY] Found MP4 source")
|
||||
return video_source, filename
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting VidMoly link: {str(e)}")
|
||||
|
||||
async def _extract_with_playwright_network(self, url: str) -> Optional[str]:
|
||||
"""Extract video source using Playwright with network interception (like DownloadHelper)"""
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
print("[VIDMOLY] Launching browser with network interception...")
|
||||
|
||||
video_urls = []
|
||||
|
||||
async with async_playwright() as p:
|
||||
# Launch browser in headless mode
|
||||
browser = await p.chromium.launch(
|
||||
headless=True,
|
||||
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
|
||||
)
|
||||
|
||||
context = await browser.new_context(
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||||
viewport={'width': 1920, 'height': 1080}
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# Set up request interception BEFORE navigation
|
||||
async def handle_request(route):
|
||||
# Capture all requests
|
||||
req_url = route.request.url
|
||||
print(f"[VIDMOLY] Request: {req_url[:80]}...")
|
||||
|
||||
# Look for video files (m3u8, mp4, etc.)
|
||||
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
|
||||
# Only capture non-vidmoly URLs (the actual video files)
|
||||
if 'vidmoly' not in req_url.lower():
|
||||
print(f"[VIDMOLY] 🎥 Captured video URL: {req_url[:100]}...")
|
||||
video_urls.append(req_url)
|
||||
|
||||
# Continue with the request
|
||||
await route.continue_()
|
||||
|
||||
# Enable request interception
|
||||
await page.route('**', handle_request)
|
||||
|
||||
# Also set up response interception to catch redirects
|
||||
page.on("response", lambda response: None)
|
||||
|
||||
print("[VIDMOLY] Navigating to page...")
|
||||
|
||||
# Navigate to URL and wait for load
|
||||
try:
|
||||
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
|
||||
except Exception as e:
|
||||
print(f"[VIDMOLY] Navigation warning: {e}")
|
||||
|
||||
# Wait for page to fully load and JavaScript to execute
|
||||
print("[VIDMOLY] Waiting for video player to load...")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Try to find and click play button if exists
|
||||
try:
|
||||
# Look for common play button selectors
|
||||
play_selectors = [
|
||||
'button.jw-icon-play',
|
||||
'.jw-play-btn',
|
||||
'button[aria-label="Play"]',
|
||||
'.play-button',
|
||||
'video',
|
||||
]
|
||||
|
||||
for selector in play_selectors:
|
||||
try:
|
||||
element = await page.query_selector(selector)
|
||||
if element:
|
||||
print(f"[VIDMOLY] Found element: {selector}")
|
||||
# For video tags, we can just wait
|
||||
# For buttons, click them
|
||||
if 'button' in selector or '.jw-' in selector:
|
||||
await element.click()
|
||||
await asyncio.sleep(3)
|
||||
break
|
||||
except:
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"[VIDMOLY] Play button interaction: {e}")
|
||||
|
||||
# Wait a bit more for network requests to complete
|
||||
await asyncio.sleep(3)
|
||||
|
||||
# Also try JavaScript extraction as backup
|
||||
try:
|
||||
js_result = await page.evaluate("""
|
||||
() => {
|
||||
// Check all video elements
|
||||
const videos = document.querySelectorAll('video');
|
||||
for (let v of videos) {
|
||||
if (v.src) {
|
||||
console.log('Found video src:', v.src);
|
||||
return v.src;
|
||||
}
|
||||
const sources = v.querySelectorAll('source');
|
||||
for (let s of sources) {
|
||||
if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
|
||||
console.log('Found source src:', s.src);
|
||||
return s.src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for jwplayer
|
||||
if (window.jwplayer) {
|
||||
try {
|
||||
const player = jwplayer();
|
||||
const playlist = player.getPlaylist();
|
||||
if (playlist && playlist[0] && playlist[0].sources) {
|
||||
const src = playlist[0].sources[0].file;
|
||||
console.log('Found jwplayer source:', src);
|
||||
return src;
|
||||
}
|
||||
} catch(e) {
|
||||
console.log('jwplayer error:', e);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for other player configurations
|
||||
if (window.player && window.player.config) {
|
||||
if (window.player.config.sources && window.player.config.sources[0]) {
|
||||
return window.player.config.sources[0].file;
|
||||
}
|
||||
}
|
||||
|
||||
// Look in window object for video URLs
|
||||
for (let key in window) {
|
||||
if (typeof window[key] === 'string') {
|
||||
const str = window[key];
|
||||
if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
|
||||
return str;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
""")
|
||||
|
||||
if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
|
||||
print(f"[VIDMOLY] Found video URL via JavaScript")
|
||||
video_urls.append(js_result)
|
||||
except Exception as e:
|
||||
print(f"[VIDMOLY] JS extraction error: {e}")
|
||||
|
||||
# Final check: parse page HTML for video URLs
|
||||
try:
|
||||
content = await page.content()
|
||||
patterns = [
|
||||
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
|
||||
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
|
||||
r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
|
||||
r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
|
||||
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
|
||||
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, content)
|
||||
for match in matches:
|
||||
# Clean up the URL
|
||||
match = match.replace('\\', '').replace('\/', '/')
|
||||
if 'http' in match and 'vidmoly' not in match:
|
||||
print(f"[VIDMOLY] Found in HTML: {match[:100]}...")
|
||||
video_urls.append(match)
|
||||
except Exception as e:
|
||||
print(f"[VIDMOLY] HTML parsing error: {e}")
|
||||
|
||||
await browser.close()
|
||||
|
||||
# Return the first valid video URL found
|
||||
if video_urls:
|
||||
# Deduplicate while preserving order
|
||||
seen = set()
|
||||
unique_urls = []
|
||||
for url in video_urls:
|
||||
if url not in seen:
|
||||
seen.add(url)
|
||||
unique_urls.append(url)
|
||||
|
||||
if unique_urls:
|
||||
print(f"[VIDMOLY] ✅ Found {len(unique_urls)} video URL(s)")
|
||||
return unique_urls[0]
|
||||
|
||||
print("[VIDMOLY] ❌ No video URLs found")
|
||||
return None
|
||||
|
||||
except ImportError:
|
||||
print("[VIDMOLY] Playwright not installed")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"[VIDMOLY] Playwright error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
async def _extract_with_http(self, url: str) -> Optional[str]:
|
||||
"""Fallback: Extract video source using pure HTTP requests"""
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||||
'Referer': 'https://vidmoly.to/',
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
}
|
||||
|
||||
response = await self.client.get(url, headers=headers)
|
||||
|
||||
# Follow JS redirect if present
|
||||
if 'window.location.replace' in response.text:
|
||||
redirect_match = re.search(r"window\.location\.replace\('([^']+)'", response.text)
|
||||
if redirect_match:
|
||||
redirect_url = redirect_match.group(1)
|
||||
response = await self.client.get(redirect_url, headers=headers, follow_redirects=True)
|
||||
|
||||
# Try to find video source
|
||||
patterns = [
|
||||
r'file:"([^"]+)"',
|
||||
r'"file"\s*:\s*"([^"]+)"',
|
||||
r"'file'\s*:\s*'([^']+)'",
|
||||
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
|
||||
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, response.text)
|
||||
if matches:
|
||||
for match in matches:
|
||||
match = match.replace('\\', '').replace('\/', '/')
|
||||
if 'http' in match and 'vidmoly' not in match:
|
||||
return match
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"[VIDMOLY] HTTP extraction error: {e}")
|
||||
return None
|
||||
|
||||
async def _get_m3u8_qualities(self, master_m3u8_url: str, headers: dict) -> list[dict]:
|
||||
"""Fetch master M3U8 and extract available qualities"""
|
||||
try:
|
||||
response = await self.client.get(master_m3u8_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
content = response.text
|
||||
lines = [line.strip() for line in content.split('\n') if line.strip()]
|
||||
|
||||
qualities = []
|
||||
current_quality = {}
|
||||
|
||||
for line in lines:
|
||||
if line.startswith('#EXT-X-STREAM-INF'):
|
||||
resolution_match = re.search(r'RESOLUTION=\d+x(\d+)', line)
|
||||
if resolution_match:
|
||||
current_quality['label'] = resolution_match.group(1)
|
||||
elif line.endswith('.m3u8') and current_quality:
|
||||
current_quality['url'] = line if line.startswith('http') else master_m3u8_url.rsplit('/', 1)[0] + '/' + line
|
||||
qualities.append(current_quality)
|
||||
current_quality = {}
|
||||
|
||||
qualities.sort(key=lambda x: int(x['label']), reverse=True)
|
||||
return qualities
|
||||
except Exception as e:
|
||||
print(f"Error fetching M3U8 qualities: {e}")
|
||||
return []
|
||||
|
||||
async def _download_m3u8_as_mp4(self, m3u8_url: str, filename: str, headers: dict, download_dir: str = "downloads") -> str:
|
||||
"""Download M3U8 stream and convert to MP4 using ffmpeg"""
|
||||
# Create downloads directory if it doesn't exist
|
||||
os.makedirs(download_dir, exist_ok=True)
|
||||
|
||||
output_path = os.path.join(download_dir, filename)
|
||||
|
||||
# Build headers for ffmpeg - using multiple -headers options
|
||||
header_args = []
|
||||
for key, value in headers.items():
|
||||
header_args.extend(['-headers', f'{key}: {value}'])
|
||||
|
||||
cmd = [
|
||||
'ffmpeg',
|
||||
*header_args,
|
||||
'-i', m3u8_url,
|
||||
'-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc',
|
||||
'-y',
|
||||
output_path
|
||||
]
|
||||
|
||||
try:
|
||||
print(f"[VIDMOLY] Downloading M3U8 with ffmpeg...")
|
||||
print(f"[VIDMOLY] URL: {m3u8_url[:80]}...")
|
||||
print(f"[VIDMOLY] Output: {output_path}")
|
||||
|
||||
# Run ffmpeg without capturing output to avoid buffering issues
|
||||
# Use a log file instead
|
||||
log_path = output_path + '.log'
|
||||
with open(log_path, 'w') as log_file:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
stdout=log_file,
|
||||
stderr=log_file,
|
||||
timeout=600 # 10 minutes for very long videos
|
||||
)
|
||||
|
||||
# Check if file was created even if ffmpeg had issues
|
||||
if os.path.exists(output_path):
|
||||
file_size = os.path.getsize(output_path)
|
||||
if file_size > 1000: # At least 1KB
|
||||
print(f"[VIDMOLY] ✅ Download complete: {file_size / (1024*1024):.2f} MB")
|
||||
return output_path
|
||||
|
||||
# If we get here, something went wrong
|
||||
raise Exception(f"FFmpeg failed - no output file created")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
# Check if file was created despite timeout
|
||||
if os.path.exists(output_path):
|
||||
file_size = os.path.getsize(output_path)
|
||||
if file_size > 1000: # At least 1KB
|
||||
print(f"[VIDMOLY] ⚠️ Timeout but file created: {file_size / (1024*1024):.2f} MB")
|
||||
return output_path
|
||||
raise Exception("FFmpeg timeout (10 minutes) - video too large")
|
||||
|
||||
except FileNotFoundError:
|
||||
raise Exception("ffmpeg not found - please install ffmpeg: apt install ffmpeg")
|
||||
except Exception as e:
|
||||
raise Exception(f"Error downloading M3U8: {str(e)}")
|
||||
|
||||
def _extract_vidmoly_id(self, url: str) -> Optional[str]:
|
||||
"""Extract VidMoly video ID from URL"""
|
||||
embed_match = re.search(r'embed-([a-z0-9]+)', url, re.IGNORECASE)
|
||||
if embed_match:
|
||||
return embed_match.group(1)
|
||||
|
||||
param_match = re.search(r'[?&]v=([a-z0-9]+)', url, re.IGNORECASE)
|
||||
if param_match:
|
||||
return param_match.group(1)
|
||||
|
||||
path_match = re.search(r'vidmoly\.(?:to|org|biz)/([a-z0-9]+)', url, re.IGNORECASE)
|
||||
if path_match:
|
||||
return path_match.group(1)
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,195 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import httpx
|
||||
import subprocess
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class VidMolyDownloader(BaseDownloader):
|
||||
"""Downloader for vidmoly.to - Video streaming host with M3U8 to MP4 conversion"""
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in ["vidmoly.to", "vidmoly.org"])
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
try:
|
||||
# Extract VidMoly ID from URL
|
||||
vidmoly_id = self._extract_vidmoly_id(url)
|
||||
if not vidmoly_id:
|
||||
raise Exception("Could not extract VidMoly ID from URL")
|
||||
|
||||
# Construct embed URL
|
||||
embed_url = f"https://vidmoly.to/embed-{vidmoly_id}.html"
|
||||
|
||||
# Fetch embed page
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||||
'Referer': 'https://vidmoly.to/',
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
}
|
||||
|
||||
response = await self.client.get(embed_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Check for JavaScript redirect with token
|
||||
if 'window.location.replace' in response.text:
|
||||
# Extract the redirect URL with token
|
||||
redirect_match = re.search(r"window\.location\.replace\('([^']+)'", response.text)
|
||||
if redirect_match:
|
||||
redirect_url = redirect_match.group(1)
|
||||
print(f"[VIDMOLY] Following redirect with token...")
|
||||
# Follow the redirect WITH follow_redirects to handle 302
|
||||
response = await self.client.get(redirect_url, headers=headers, follow_redirects=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Extract video source using regex (like the PHP version)
|
||||
# Pattern: file:"URL"
|
||||
sources_match = re.findall(r'file:"([^"]+)"', response.text)
|
||||
|
||||
if not sources_match:
|
||||
raise Exception("Could not find video source in page")
|
||||
|
||||
video_source = sources_match[0]
|
||||
|
||||
# Check if it's an M3U8 playlist
|
||||
if 'master.m3u8' in video_source or '.m3u8' in video_source:
|
||||
# Fetch master playlist to get available qualities
|
||||
qualities = await self._get_m3u8_qualities(video_source, headers)
|
||||
|
||||
if qualities:
|
||||
# Use highest quality (first one in list)
|
||||
best_quality_url = qualities[0]['url']
|
||||
quality_label = qualities[0]['label']
|
||||
|
||||
# Convert M3U8 to MP4 using ffmpeg
|
||||
mp4_path = await self._convert_m3u8_to_mp4(
|
||||
best_quality_url,
|
||||
vidmoly_id,
|
||||
quality_label,
|
||||
headers
|
||||
)
|
||||
|
||||
return mp4_path, f"vidmoly_{vidmoly_id}_{quality_label}p.mp4"
|
||||
else:
|
||||
# Direct M3U8 without quality variants
|
||||
mp4_path = await self._convert_m3u8_to_mp4(
|
||||
video_source,
|
||||
vidmoly_id,
|
||||
"720",
|
||||
headers
|
||||
)
|
||||
|
||||
return mp4_path, f"vidmoly_{vidmoly_id}_720p.mp4"
|
||||
|
||||
# It's a direct MP4 link
|
||||
filename = f"vidmoly_{vidmoly_id}.mp4"
|
||||
if not video_source.endswith('.mp4'):
|
||||
filename += '.mp4'
|
||||
|
||||
return video_source, filename
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting VidMoly link: {str(e)}")
|
||||
|
||||
async def _get_m3u8_qualities(self, master_m3u8_url: str, headers: dict) -> list[dict]:
|
||||
"""Fetch master M3U8 and extract available qualities"""
|
||||
try:
|
||||
response = await self.client.get(master_m3u8_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
content = response.text
|
||||
lines = [line.strip() for line in content.split('\n') if line.strip()]
|
||||
|
||||
qualities = []
|
||||
current_quality = {}
|
||||
|
||||
for line in lines:
|
||||
# Parse quality line (RESOLUTION=...xHEIGHT)
|
||||
if line.startswith('#EXT-X-STREAM-INF'):
|
||||
resolution_match = re.search(r'RESOLUTION=\d+x(\d+)', line)
|
||||
if resolution_match:
|
||||
current_quality['label'] = resolution_match.group(1)
|
||||
# Parse URL line
|
||||
elif line.endswith('.m3u8') and current_quality:
|
||||
current_quality['url'] = line if line.startswith('http') else master_m3u8_url.rsplit('/', 1)[0] + '/' + line
|
||||
qualities.append(current_quality)
|
||||
current_quality = {}
|
||||
|
||||
# Sort by resolution (descending)
|
||||
qualities.sort(key=lambda x: int(x['label']), reverse=True)
|
||||
|
||||
return qualities
|
||||
except Exception as e:
|
||||
print(f"Error fetching M3U8 qualities: {e}")
|
||||
return []
|
||||
|
||||
async def _convert_m3u8_to_mp4(self, m3u8_url: str, vidmoly_id: str, quality: str, headers: dict) -> str:
|
||||
"""Convert M3U8 stream to MP4 using ffmpeg"""
|
||||
# Create temp directory for output
|
||||
temp_dir = tempfile.gettempdir()
|
||||
output_path = os.path.join(temp_dir, f"vidmoly_{vidmoly_id}_{quality}p.mp4")
|
||||
|
||||
# Prepare ffmpeg headers
|
||||
ffmpeg_headers = '|'.join([f'{k}: {v}' for k, v in headers.items()])
|
||||
|
||||
# Build ffmpeg command
|
||||
cmd = [
|
||||
'ffmpeg',
|
||||
'-headers', f'"{ffmpeg_headers}"',
|
||||
'-i', m3u8_url,
|
||||
'-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc',
|
||||
'-y', # Overwrite output file if exists
|
||||
output_path
|
||||
]
|
||||
|
||||
# Execute ffmpeg
|
||||
try:
|
||||
result = subprocess.run(
|
||||
' '.join(cmd),
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300 # 5 minutes timeout
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"FFmpeg conversion failed: {result.stderr}")
|
||||
|
||||
if not os.path.exists(output_path):
|
||||
raise Exception("FFmpeg output file not created")
|
||||
|
||||
return output_path
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise Exception("FFmpeg conversion timeout (5 minutes)")
|
||||
except Exception as e:
|
||||
raise Exception(f"Error converting M3U8 to MP4: {str(e)}")
|
||||
|
||||
def _extract_vidmoly_id(self, url: str) -> str:
|
||||
"""Extract VidMoly video ID from URL"""
|
||||
# Patterns:
|
||||
# - vidmoly.to/embed-ID.html
|
||||
# - vidmoly.to/?v=ID
|
||||
# - vidmoly.to/ID
|
||||
|
||||
# Try to extract from embed pattern
|
||||
embed_match = re.search(r'embed-([a-z0-9]+)', url, re.IGNORECASE)
|
||||
if embed_match:
|
||||
return embed_match.group(1)
|
||||
|
||||
# Try to extract from ?v= parameter
|
||||
param_match = re.search(r'[?&]v=([a-z0-9]+)', url, re.IGNORECASE)
|
||||
if param_match:
|
||||
return param_match.group(1)
|
||||
|
||||
# Try to extract ID from path
|
||||
path_match = re.search(r'vidmoly\.(?:to|org)/([a-z0-9]+)', url, re.IGNORECASE)
|
||||
if path_match:
|
||||
return path_match.group(1)
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,144 @@
|
||||
from .base import BaseDownloader
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
class VostfreeDownloader(BaseDownloader):
|
||||
"""Downloader for vostfree.tv"""
|
||||
|
||||
BASE_DOMAINS = ["vostfree.tv", "www.vostfree.tv"]
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
"""Extract download link from vostfree URL"""
|
||||
try:
|
||||
response = await self.client.get(url, follow_redirects=True)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
# Method 1: Look for iframe players
|
||||
iframes = soup.find_all('iframe')
|
||||
for iframe in iframes:
|
||||
src = iframe.get('src', '')
|
||||
if src and any(p in src for p in ['player', 'video', 'stream']):
|
||||
if not src.startswith('http'):
|
||||
src = urljoin(str(response.url), src)
|
||||
filename = self._generate_filename(str(response.url))
|
||||
return src, filename
|
||||
|
||||
# Method 2: Look for video tags
|
||||
videos = soup.find_all('video')
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
if src:
|
||||
filename = self._generate_filename(str(response.url))
|
||||
return src, filename
|
||||
|
||||
sources = video.find_all('source')
|
||||
for source in sources:
|
||||
src = source.get('src', '')
|
||||
if src and any(ext in src for ext in ['mp4', 'm3u8']):
|
||||
filename = self._generate_filename(str(response.url))
|
||||
return src, filename
|
||||
|
||||
# Method 3: Look in scripts
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
patterns = [
|
||||
r'(https?://[^"\'>\s]+\.(?:mp4|m3u8)(?:\?[^"\'>\s]*)?)',
|
||||
r'"url":"([^"]+)"',
|
||||
r'"file":"([^"]+)"',
|
||||
r'"video":"([^"]+)"',
|
||||
]
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, script.string)
|
||||
for match in matches:
|
||||
match = match.replace('\\/', '/')
|
||||
if any(ext in match for ext in ['mp4', 'm3u8']):
|
||||
filename = self._generate_filename(str(response.url))
|
||||
return match, filename
|
||||
|
||||
raise Exception("Could not find video link")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting Vostfree link: {str(e)}")
|
||||
|
||||
def _generate_filename(self, url: str) -> str:
|
||||
parts = url.split('/')
|
||||
anime_name = "anime"
|
||||
episode = "1"
|
||||
|
||||
for part in parts:
|
||||
match = re.search(r'episode[-\s]*(\d+)', part, re.I)
|
||||
if match:
|
||||
episode = match.group(1)
|
||||
|
||||
filename = f"{anime_name} - Episode {episode}.mp4"
|
||||
return filename.title()
|
||||
|
||||
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
|
||||
try:
|
||||
response = await self.client.get(anime_url)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
episodes = []
|
||||
episode_links = soup.find_all('a', href=re.compile(r'episode', re.I))
|
||||
|
||||
for link in episode_links:
|
||||
href = link.get('href', '')
|
||||
match = re.search(r'episode[-\s]*(\d+)', href, re.I)
|
||||
if match:
|
||||
episode_num = match.group(1)
|
||||
if not href.startswith('http'):
|
||||
href = urljoin(anime_url, href)
|
||||
|
||||
episodes.append({'episode': episode_num, 'url': href})
|
||||
|
||||
# Deduplicate and sort
|
||||
seen = set()
|
||||
unique_episodes = []
|
||||
for ep in episodes:
|
||||
if ep['episode'] not in seen:
|
||||
seen.add(ep['episode'])
|
||||
unique_episodes.append(ep)
|
||||
|
||||
unique_episodes.sort(key=lambda x: int(x['episode']))
|
||||
return unique_episodes
|
||||
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
|
||||
"""
|
||||
Search for anime on vostfree
|
||||
"""
|
||||
try:
|
||||
import time
|
||||
start = time.time()
|
||||
print(f"[VOSTFREE] Searching for '{query}' ({lang})...")
|
||||
|
||||
# Vostfree URL pattern
|
||||
search_url = f"https://vostfree.tv/anime/{query.lower().replace(' ', '-')}"
|
||||
|
||||
response = await self.client.get(search_url)
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"[VOSTFREE] Got response {response.status_code} in {elapsed:.2f}s")
|
||||
|
||||
if response.status_code == 200:
|
||||
print(f"[VOSTFREE] Found anime at {str(response.url)}")
|
||||
return [{
|
||||
'title': query,
|
||||
'url': str(response.url),
|
||||
'type': 'direct'
|
||||
}]
|
||||
|
||||
print(f"[VOSTFREE] No anime found")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
print(f"[VOSTFREE] Error: {str(e)}")
|
||||
return []
|
||||
Reference in New Issue
Block a user