refactor: Restructure downloaders with clear separation

This commit implements a complete reorganization of the downloader system
with a clear distinction between anime streaming sites and video hosting services.

## Structure Changes

**New Organization:**
- `app/downloaders/anime_sites/` - Anime streaming sites (catalogs + metadata)
- `app/downloaders/video_players/` - Video hosting services (file downloads)

**Base Classes:**
- `BaseAnimeSite` - For anime providers (search, episodes, metadata)
- `BaseVideoPlayer` - For video players (download link extraction)

**Migrated Downloaders:**
Anime Sites (4):
- AnimeSama, NekoSama, AnimeUltime, Vostfree

Video Players (8):
- Doodstream, Sibnet, VidMoly, SendVid, Lpayer, 1fichier, Uptobox, Rapidfile

## Key Improvements

1. **Clear Separation**: Distinct base classes for different use cases
2. **Preserved Functionality**: All existing features maintained
   - VidMoly: M3U8 support, Playwright, multi-domains, target_filename param
   - SendVid: target_filename parameter support
   - All others: No behavioral changes

3. **Better Organization**:
   - Anime sites: search_anime(), get_episodes(), get_anime_metadata()
   - Video players: get_download_link(url, target_filename=None)

4. **Fixed Imports**: Updated cross-imports in AnimeSama
   - from ..video_players.vidmoly import
   - from ..video_players.sendvid import
   - from ..video_players.sibnet import
   - from ..video_players.lpayer import

5. **Updated Tests**: All test imports use new structure
6. **Updated Providers**: Added 4 missing file hosts to providers.py

## Backward Compatibility

 Main API unchanged: get_downloader() works identically
 All 23 tests passing
 Frontend fully functional
 No breaking changes for users

## Documentation

- RESTRUCTURATION_SUMMARY.md - Technical details
- FIX_IMPORT_ERROR.md - Import error resolution
- IMPORT_VERIFICATION_REPORT.md - Complete import verification
- FRONTEND_VERIFICATION_FINAL.md - Frontend validation

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
root
2026-01-24 22:13:20 +00:00
parent 1fe7392063
commit 3afad41d46
25 changed files with 1001 additions and 83 deletions
+447
View File
@@ -0,0 +1,447 @@
from .base import BaseVideoPlayer
from bs4 import BeautifulSoup
import re
import httpx
import subprocess
import os
import tempfile
from pathlib import Path
import asyncio
from typing import Optional
class VidMolyDownloader(BaseVideoPlayer):
"""Downloader for vidmoly.to using Playwright network interception"""
def can_handle(self, url: str) -> bool:
return any(domain in url.lower() for domain in ["vidmoly.to", "vidmoly.org", "vidmoly.biz"])
async def get_download_link(self, url: str, target_filename: str = None) -> tuple[str, str]:
try:
# Extract VidMoly ID from URL
vidmoly_id = self._extract_vidmoly_id(url)
if not vidmoly_id:
raise Exception("Could not extract VidMoly ID from URL")
# Construct embed URL - try vidmoly.biz first (it works better than .to/.org)
# If original URL uses .biz, keep it. Otherwise try .biz first
domains_to_try = []
if "vidmoly.biz" in url.lower():
domains_to_try = ["vidmoly.biz"]
elif "vidmoly.to" in url.lower() or "vidmoly.org" in url.lower():
# For .to/.org, try .biz first (it has actual content), then original
domains_to_try = ["vidmoly.biz", url.split("//")[1].split("/")[0]]
else:
domains_to_try = ["vidmoly.biz", "vidmoly.to"]
video_source = None
last_error = None
working_domain = None
for domain in domains_to_try:
embed_url = f"https://{domain}/embed-{vidmoly_id}.html"
print(f"[VIDMOLY] Trying: {embed_url}")
print(f"[VIDMOLY] VidMoly ID: {vidmoly_id}")
# Use Playwright with network interception
video_source = await self._extract_with_playwright_network(embed_url)
if not video_source:
# Fallback to HTTP method
print("[VIDMOLY] Playwright failed, trying HTTP fallback...")
video_source = await self._extract_with_http(embed_url)
if video_source:
print(f"[VIDMOLY] ✅ Found video on {domain}")
working_domain = domain
break
else:
print(f"[VIDMOLY] ❌ No video on {domain}")
last_error = f"No video found on {domain}"
if not video_source:
raise Exception(f"Could not find video source - tried: {', '.join(domains_to_try)}. Last error: {last_error}")
# Validate that video_source is not an embed URL
if 'vidmoly' in video_source.lower() and ('embed-' in video_source or '.html' in video_source):
raise Exception(f"Extracted URL is still a VidMoly embed page, not a video: {video_source[:100]}")
# Use target_filename if provided, otherwise generate default
filename = target_filename if target_filename else f"vidmoly_{vidmoly_id}"
# Check if it's an M3U8 playlist
if '.m3u8' in video_source:
print(f"[VIDMOLY] Found M3U8 source: {video_source[:100]}...")
# Download and convert M3U8 to MP4 directly
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Referer': f'https://{working_domain}/',
}
mp4_path = await self._download_m3u8_as_mp4(video_source, filename, headers)
return mp4_path, filename
# It's a direct MP4 link
if not video_source.endswith('.mp4'):
filename += '.mp4'
print(f"[VIDMOLY] Found MP4 source")
return video_source, filename
except Exception as e:
raise Exception(f"Error extracting VidMoly link: {str(e)}")
async def _extract_with_playwright_network(self, url: str) -> Optional[str]:
"""Extract video source using Playwright with network interception (like DownloadHelper)"""
try:
from playwright.async_api import async_playwright
print("[VIDMOLY] Launching browser with network interception...")
video_urls = []
async with async_playwright() as p:
# Launch browser in headless mode
browser = await p.chromium.launch(
headless=True,
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
)
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
viewport={'width': 1920, 'height': 1080}
)
page = await context.new_page()
# Set up request interception BEFORE navigation
async def handle_request(route):
# Capture all requests
req_url = route.request.url
print(f"[VIDMOLY] Request: {req_url[:80]}...")
# Look for video files (m3u8, mp4, etc.)
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
# Only capture non-vidmoly URLs (the actual video files)
if 'vidmoly' not in req_url.lower():
print(f"[VIDMOLY] 🎥 Captured video URL: {req_url[:100]}...")
video_urls.append(req_url)
# Continue with the request
await route.continue_()
# Enable request interception
await page.route('**', handle_request)
# Log page URL for debugging
print(f"[VIDMOLY] Page URL: {url}")
# Also set up response interception to catch redirects
page.on("response", lambda response: None)
print("[VIDMOLY] Navigating to page...")
# Navigate to URL and wait for load
try:
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
except Exception as e:
print(f"[VIDMOLY] Navigation warning: {e}")
# Wait for page to fully load and JavaScript to execute
print("[VIDMOLY] Waiting for video player to load...")
await asyncio.sleep(5)
# Try to find and click play button if exists
try:
# Look for common play button selectors
play_selectors = [
'button.jw-icon-play',
'.jw-play-btn',
'button[aria-label="Play"]',
'.play-button',
'video',
]
for selector in play_selectors:
try:
element = await page.query_selector(selector)
if element:
print(f"[VIDMOLY] Found element: {selector}")
# For video tags, we can just wait
# For buttons, click them
if 'button' in selector or '.jw-' in selector:
await element.click()
await asyncio.sleep(3)
break
except:
continue
except Exception as e:
print(f"[VIDMOLY] Play button interaction: {e}")
# Wait a bit more for network requests to complete
await asyncio.sleep(3)
# Also try JavaScript extraction as backup
try:
js_result = await page.evaluate("""
() => {
// Check all video elements
const videos = document.querySelectorAll('video');
for (let v of videos) {
if (v.src) {
console.log('Found video src:', v.src);
return v.src;
}
const sources = v.querySelectorAll('source');
for (let s of sources) {
if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
console.log('Found source src:', s.src);
return s.src;
}
}
}
// Check for jwplayer
if (window.jwplayer) {
try {
const player = jwplayer();
const playlist = player.getPlaylist();
if (playlist && playlist[0] && playlist[0].sources) {
const src = playlist[0].sources[0].file;
console.log('Found jwplayer source:', src);
return src;
}
} catch(e) {
console.log('jwplayer error:', e);
}
}
// Check for other player configurations
if (window.player && window.player.config) {
if (window.player.config.sources && window.player.config.sources[0]) {
return window.player.config.sources[0].file;
}
}
// Look in window object for video URLs
for (let key in window) {
if (typeof window[key] === 'string') {
const str = window[key];
if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
return str;
}
}
}
return null;
}
""")
if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
print(f"[VIDMOLY] Found video URL via JavaScript")
video_urls.append(js_result)
except Exception as e:
print(f"[VIDMOLY] JS extraction error: {e}")
# Final check: parse page HTML for video URLs
try:
content = await page.content()
patterns = [
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
]
for pattern in patterns:
matches = re.findall(pattern, content)
for match in matches:
# Clean up the URL
match = match.replace('\\', '').replace('\/', '/')
if 'http' in match and 'vidmoly' not in match:
print(f"[VIDMOLY] Found in HTML: {match[:100]}...")
video_urls.append(match)
except Exception as e:
print(f"[VIDMOLY] HTML parsing error: {e}")
await browser.close()
# Return the first valid video URL found
if video_urls:
# Deduplicate while preserving order
seen = set()
unique_urls = []
for url in video_urls:
if url not in seen:
seen.add(url)
unique_urls.append(url)
if unique_urls:
print(f"[VIDMOLY] ✅ Found {len(unique_urls)} video URL(s)")
return unique_urls[0]
print("[VIDMOLY] ❌ No video URLs found")
return None
except ImportError:
print("[VIDMOLY] Playwright not installed")
return None
except Exception as e:
print(f"[VIDMOLY] Playwright error: {e}")
import traceback
traceback.print_exc()
return None
async def _extract_with_http(self, url: str) -> Optional[str]:
"""Fallback: Extract video source using pure HTTP requests"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
'Referer': 'https://vidmoly.to/',
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.9',
}
response = await self.client.get(url, headers=headers)
# Follow JS redirect if present
if 'window.location.replace' in response.text:
redirect_match = re.search(r"window\.location\.replace\('([^']+)'", response.text)
if redirect_match:
redirect_url = redirect_match.group(1)
response = await self.client.get(redirect_url, headers=headers, follow_redirects=True)
# Try to find video source
patterns = [
r'file:"([^"]+)"',
r'"file"\s*:\s*"([^"]+)"',
r"'file'\s*:\s*'([^']+)'",
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
]
for pattern in patterns:
matches = re.findall(pattern, response.text)
if matches:
for match in matches:
match = match.replace('\\', '').replace('\/', '/')
if 'http' in match and 'vidmoly' not in match:
return match
return None
except Exception as e:
print(f"[VIDMOLY] HTTP extraction error: {e}")
return None
async def _get_m3u8_qualities(self, master_m3u8_url: str, headers: dict) -> list[dict]:
"""Fetch master M3U8 and extract available qualities"""
try:
response = await self.client.get(master_m3u8_url, headers=headers)
response.raise_for_status()
content = response.text
lines = [line.strip() for line in content.split('\n') if line.strip()]
qualities = []
current_quality = {}
for line in lines:
if line.startswith('#EXT-X-STREAM-INF'):
resolution_match = re.search(r'RESOLUTION=\d+x(\d+)', line)
if resolution_match:
current_quality['label'] = resolution_match.group(1)
elif line.endswith('.m3u8') and current_quality:
current_quality['url'] = line if line.startswith('http') else master_m3u8_url.rsplit('/', 1)[0] + '/' + line
qualities.append(current_quality)
current_quality = {}
qualities.sort(key=lambda x: int(x['label']), reverse=True)
return qualities
except Exception as e:
print(f"Error fetching M3U8 qualities: {e}")
return []
async def _download_m3u8_as_mp4(self, m3u8_url: str, filename: str, headers: dict, download_dir: str = "downloads") -> str:
"""Download M3U8 stream and convert to MP4 using ffmpeg"""
# Create downloads directory if it doesn't exist
os.makedirs(download_dir, exist_ok=True)
output_path = os.path.join(download_dir, filename)
# Build headers for ffmpeg - using multiple -headers options
header_args = []
for key, value in headers.items():
header_args.extend(['-headers', f'{key}: {value}'])
cmd = [
'ffmpeg',
*header_args,
'-i', m3u8_url,
'-c', 'copy',
'-bsf:a', 'aac_adtstoasc',
'-y',
output_path
]
try:
print(f"[VIDMOLY] Downloading M3U8 with ffmpeg...")
print(f"[VIDMOLY] URL: {m3u8_url[:80]}...")
print(f"[VIDMOLY] Output: {output_path}")
# Run ffmpeg without capturing output to avoid buffering issues
# Use a log file instead
log_path = output_path + '.log'
with open(log_path, 'w') as log_file:
result = subprocess.run(
cmd,
stdout=log_file,
stderr=log_file,
timeout=600 # 10 minutes for very long videos
)
# Check if file was created even if ffmpeg had issues
if os.path.exists(output_path):
file_size = os.path.getsize(output_path)
if file_size > 1000: # At least 1KB
print(f"[VIDMOLY] ✅ Download complete: {file_size / (1024*1024):.2f} MB")
return output_path
# If we get here, something went wrong
raise Exception(f"FFmpeg failed - no output file created")
except subprocess.TimeoutExpired:
# Check if file was created despite timeout
if os.path.exists(output_path):
file_size = os.path.getsize(output_path)
if file_size > 1000: # At least 1KB
print(f"[VIDMOLY] ⚠️ Timeout but file created: {file_size / (1024*1024):.2f} MB")
return output_path
raise Exception("FFmpeg timeout (10 minutes) - video too large")
except FileNotFoundError:
raise Exception("ffmpeg not found - please install ffmpeg: apt install ffmpeg")
except Exception as e:
raise Exception(f"Error downloading M3U8: {str(e)}")
def _extract_vidmoly_id(self, url: str) -> Optional[str]:
"""Extract VidMoly video ID from URL"""
embed_match = re.search(r'embed-([a-z0-9]+)', url, re.IGNORECASE)
if embed_match:
return embed_match.group(1)
param_match = re.search(r'[?&]v=([a-z0-9]+)', url, re.IGNORECASE)
if param_match:
return param_match.group(1)
path_match = re.search(r'vidmoly\.(?:to|org|biz)/([a-z0-9]+)', url, re.IGNORECASE)
if path_match:
return path_match.group(1)
return None