feat: Add series TV support with Vidzy HLS downloads and duplicate prevention
Major improvements: - Series TV support via FS7 provider with dedicated search endpoint - Vidzy downloader now uses Playwright for JS obfuscation and ffmpeg for HLS streams - Episode filenames properly named (Series Title - Episode X) instead of master.m3u8.mp4 - Duplicate download prevention: checks existing tasks before creating new ones - Removed host preference system in favor of intelligent URL-based detection Technical changes: - Vidzy: Added Playwright extraction and M3U8→MP4 conversion with ffmpeg - FS7: Episodes now use pipe format (video_url|series_url|episode_title) - DownloadManager: Extract target_filename from pipe URL and prevent duplicates - UI: New Series tab with search, recommendations, and releases sections - Anime-Sama: Removed hardcoded host preferences, uses site's URL order Generated with [Claude Code](https://claude.com/claude-code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
"""Vidzy video hosting service downloader"""
|
||||
import logging
|
||||
import asyncio
|
||||
import re
|
||||
import subprocess
|
||||
import os
|
||||
from typing import Optional
|
||||
from .base import BaseVideoPlayer
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -13,6 +17,7 @@ class VidzyDownloader(BaseVideoPlayer):
|
||||
Downloader for Vidzy video hosting service.
|
||||
|
||||
Vidzy is a video hosting platform used by various anime streaming sites.
|
||||
Uses heavy JavaScript obfuscation, so Playwright is required.
|
||||
"""
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
@@ -35,9 +40,206 @@ class VidzyDownloader(BaseVideoPlayer):
|
||||
Tuple of (download_url, filename)
|
||||
"""
|
||||
try:
|
||||
# Extract actual Vidzy URL from pipe-separated format if present
|
||||
# Format: video_url|anime_url|episode_title
|
||||
if '|' in url:
|
||||
url = url.split('|')[0].strip()
|
||||
logger.debug(f"Extracted Vidzy URL from pipe format: {url}")
|
||||
|
||||
logger.info(f"Fetching Vidzy URL: {url}")
|
||||
|
||||
# Fetch the page
|
||||
# Try using Playwright first (Vidzy uses heavy JS obfuscation)
|
||||
video_url = await self._extract_with_playwright(url)
|
||||
|
||||
if not video_url:
|
||||
# Fallback to static HTML parsing
|
||||
logger.warning("Playwright extraction failed, trying static parsing...")
|
||||
video_url = await self._extract_static(url)
|
||||
|
||||
if not video_url:
|
||||
raise ValueError(f"Could not extract video URL from Vidzy")
|
||||
|
||||
logger.info(f"Successfully extracted Vidzy URL: {video_url[:100]}...")
|
||||
|
||||
# Generate filename
|
||||
if target_filename:
|
||||
filename = sanitize_filename(target_filename)
|
||||
else:
|
||||
# Try to extract filename from URL
|
||||
filename = video_url.split('/')[-1].split('?')[0]
|
||||
if not filename or len(filename) < 5:
|
||||
filename = "vidzy_video.mp4"
|
||||
filename = sanitize_filename(filename)
|
||||
|
||||
# Ensure .mp4 extension
|
||||
if not filename.endswith('.mp4'):
|
||||
filename += '.mp4'
|
||||
|
||||
# Check if it's an M3U8 playlist (HLS stream)
|
||||
if '.m3u8' in video_url:
|
||||
logger.info(f"Detected M3U8 stream, will download with ffmpeg")
|
||||
|
||||
# Download and convert M3U8 to MP4 directly
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Referer': 'https://vidzy.org/',
|
||||
}
|
||||
|
||||
mp4_path = await self._download_m3u8_as_mp4(video_url, filename, headers)
|
||||
logger.info(f"Successfully extracted Vidzy download link: {filename}")
|
||||
return mp4_path, filename
|
||||
|
||||
# It's a direct MP4 link
|
||||
logger.info(f"Successfully extracted Vidzy download link: {filename}")
|
||||
return video_url, filename
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting Vidzy download link: {e}")
|
||||
raise ValueError(f"Failed to extract download link from Vidzy: {str(e)}")
|
||||
|
||||
async def _extract_with_playwright(self, url: str) -> Optional[str]:
|
||||
"""Extract video URL using Playwright with network interception"""
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
logger.info("Launching Playwright for Vidzy...")
|
||||
|
||||
video_urls = []
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=True,
|
||||
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
|
||||
)
|
||||
|
||||
context = await browser.new_context(
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# Set up request interception
|
||||
async def handle_request(route):
|
||||
req_url = route.request.url
|
||||
|
||||
# Look for video files (HLS streams and MP4s)
|
||||
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', 'master']):
|
||||
if 'vidzy' not in req_url.lower() or 'master' in req_url.lower():
|
||||
logger.info(f"🎥 Captured video URL: {req_url[:100]}...")
|
||||
video_urls.append(req_url)
|
||||
|
||||
await route.continue_()
|
||||
|
||||
await page.route('**', handle_request)
|
||||
|
||||
logger.info("Navigating to Vidzy page...")
|
||||
|
||||
try:
|
||||
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
|
||||
except Exception as e:
|
||||
logger.warning(f"Navigation warning: {e}")
|
||||
|
||||
# Wait for page to load and initialize player
|
||||
logger.info("Waiting for video player to load...")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Try JavaScript extraction from VideoJS player
|
||||
try:
|
||||
js_result = await page.evaluate("""
|
||||
() => {
|
||||
// Check if videojs is available
|
||||
if (typeof videojs !== 'undefined' && videojs.players) {
|
||||
// Get all players
|
||||
const players = Object.values(videojs.players);
|
||||
if (players.length > 0) {
|
||||
const player = players[0];
|
||||
|
||||
// Try to get source from player
|
||||
if (player.currentSrc()) {
|
||||
return player.currentSrc();
|
||||
}
|
||||
|
||||
// Try to get sources array
|
||||
if (player.currentSources() && player.currentSources().length > 0) {
|
||||
return player.currentSources()[0].src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check all video elements
|
||||
const videos = document.querySelectorAll('video');
|
||||
for (let v of videos) {
|
||||
if (v.src) {
|
||||
return v.src;
|
||||
}
|
||||
const sources = v.querySelectorAll('source');
|
||||
for (let s of sources) {
|
||||
if (s.src) {
|
||||
return s.src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for sources in scripts (VideoJS config)
|
||||
const scripts = document.querySelectorAll('script');
|
||||
for (let script of scripts) {
|
||||
const text = script.textContent;
|
||||
// Look for sources array with .m3u8 URLs
|
||||
const sourcesMatch = text.match(/sources\s*:\s*\[\s*\{\s*src\s*:\s*['"](https?:\/\/[^'"]+\.m3u8[^'"]*)['"]/i);
|
||||
if (sourcesMatch) {
|
||||
return sourcesMatch[1];
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
""")
|
||||
|
||||
if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
|
||||
logger.info(f"Found video URL via JavaScript evaluation")
|
||||
video_urls.append(js_result)
|
||||
except Exception as e:
|
||||
logger.warning(f"JS extraction error: {e}")
|
||||
|
||||
# Wait more for network requests
|
||||
await asyncio.sleep(3)
|
||||
|
||||
await browser.close()
|
||||
|
||||
# Return best video URL (prefer master.m3u8 for HLS)
|
||||
if video_urls:
|
||||
seen = set()
|
||||
unique_urls = []
|
||||
for url in video_urls:
|
||||
if url not in seen:
|
||||
seen.add(url)
|
||||
unique_urls.append(url)
|
||||
|
||||
if unique_urls:
|
||||
logger.info(f"✅ Found {len(unique_urls)} video URL(s)")
|
||||
|
||||
# Prefer master.m3u8 (HLS playlist)
|
||||
for url in unique_urls:
|
||||
if 'master.m3u8' in url or '.m3u8' in url:
|
||||
logger.info(f"Using HLS playlist: {url[:100]}...")
|
||||
return url
|
||||
|
||||
# Fall back to first URL
|
||||
return unique_urls[0]
|
||||
|
||||
logger.warning("❌ No video URLs found via Playwright")
|
||||
return None
|
||||
|
||||
except ImportError:
|
||||
logger.warning("Playwright not installed, falling back to static parsing")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Playwright error: {e}")
|
||||
return None
|
||||
|
||||
async def _extract_static(self, url: str) -> Optional[str]:
|
||||
"""Static HTML parsing fallback"""
|
||||
try:
|
||||
response = await self.client.get(url)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
@@ -47,65 +249,96 @@ class VidzyDownloader(BaseVideoPlayer):
|
||||
# Method 1: Look for video source in <video> tag
|
||||
video_tag = soup.find('video')
|
||||
if video_tag and video_tag.get('src'):
|
||||
download_url = video_tag['src']
|
||||
logger.info(f"Found video source from <video> tag")
|
||||
else:
|
||||
# Method 2: Look for source in <source> tag
|
||||
source_tag = soup.find('source')
|
||||
if source_tag and source_tag.get('src'):
|
||||
download_url = source_tag['src']
|
||||
logger.info(f"Found video source from <source> tag")
|
||||
else:
|
||||
# Method 3: Look for video URL in JavaScript
|
||||
# Vidzy often stores the video URL in a JavaScript variable
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
# Look for patterns like 'file:"URL"' or 'file: "URL"'
|
||||
import re
|
||||
patterns = [
|
||||
r'file\s*:\s*["\']([^"\']+\.mp4[^"\']*)["\']',
|
||||
r'source\s*:\s*["\']([^"\']+\.mp4[^"\']*)["\']',
|
||||
r'videoUrl\s*:\s*["\']([^"\']+)["\']',
|
||||
r'"url"\s*:\s*["\']([^"\']+\.mp4[^"\']*)["\']',
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, script.string)
|
||||
if match:
|
||||
download_url = match.group(1)
|
||||
logger.info(f"Found video source from JavaScript")
|
||||
break
|
||||
if 'download_url' in locals():
|
||||
break
|
||||
return video_tag['src']
|
||||
|
||||
if 'download_url' not in locals():
|
||||
raise ValueError("Could not find video URL in page")
|
||||
# Method 2: Look for source in <source> tag
|
||||
source_tag = soup.find('source')
|
||||
if source_tag and source_tag.get('src'):
|
||||
logger.info(f"Found video source from <source> tag")
|
||||
return source_tag['src']
|
||||
|
||||
# Ensure URL is absolute
|
||||
if not download_url.startswith('http'):
|
||||
if download_url.startswith('//'):
|
||||
download_url = 'https:' + download_url
|
||||
else:
|
||||
from urllib.parse import urljoin
|
||||
download_url = urljoin(url, download_url)
|
||||
# Method 3: Search entire HTML for .m3u8 URLs (Vidzy uses HLS)
|
||||
html_patterns = [
|
||||
r'(https?://[^\s<>"\'`]+\.m3u8[^\s<>"\'`]*)',
|
||||
r'(https?://[^\s<>"\'`]+/master[^\s<>"\'`]*)',
|
||||
]
|
||||
|
||||
# Generate filename
|
||||
if target_filename:
|
||||
filename = sanitize_filename(target_filename)
|
||||
else:
|
||||
# Try to extract filename from URL
|
||||
filename = download_url.split('/')[-1].split('?')[0]
|
||||
if not filename or len(filename) < 5:
|
||||
filename = "vidzy_video.mp4"
|
||||
filename = sanitize_filename(filename)
|
||||
for pattern in html_patterns:
|
||||
matches = re.findall(pattern, html)
|
||||
if matches:
|
||||
# Filter out obvious false positives
|
||||
for match in matches:
|
||||
# Accept URLs with 'master' or from video hosts
|
||||
if 'master' in match.lower() or any(host in match for host in ['hls', 'video', 'stream']):
|
||||
logger.info(f"Found video URL in HTML: {match[:100]}...")
|
||||
return match
|
||||
|
||||
# Ensure .mp4 extension
|
||||
if not filename.endswith('.mp4'):
|
||||
filename += '.mp4'
|
||||
|
||||
logger.info(f"Successfully extracted Vidzy download link: {filename}")
|
||||
return download_url, filename
|
||||
logger.warning("Static parsing failed to find video URL")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting Vidzy download link: {e}")
|
||||
raise ValueError(f"Failed to extract download link from Vidzy: {str(e)}")
|
||||
logger.warning(f"Static parsing error: {e}")
|
||||
return None
|
||||
|
||||
async def _download_m3u8_as_mp4(self, m3u8_url: str, filename: str, headers: dict, download_dir: str = "downloads") -> str:
|
||||
"""Download M3U8 stream and convert to MP4 using ffmpeg"""
|
||||
# Create downloads directory if it doesn't exist
|
||||
os.makedirs(download_dir, exist_ok=True)
|
||||
|
||||
output_path = os.path.join(download_dir, filename)
|
||||
|
||||
# Build headers for ffmpeg - using multiple -headers options
|
||||
header_args = []
|
||||
for key, value in headers.items():
|
||||
header_args.extend(['-headers', f'{key}: {value}'])
|
||||
|
||||
cmd = [
|
||||
'ffmpeg',
|
||||
*header_args,
|
||||
'-i', m3u8_url,
|
||||
'-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc',
|
||||
'-y',
|
||||
output_path
|
||||
]
|
||||
|
||||
try:
|
||||
logger.info(f"Downloading M3U8 with ffmpeg...")
|
||||
logger.info(f"URL: {m3u8_url[:80]}...")
|
||||
logger.info(f"Output: {output_path}")
|
||||
|
||||
# Run ffmpeg without capturing output to avoid buffering issues
|
||||
# Use a log file instead
|
||||
log_path = output_path + '.log'
|
||||
with open(log_path, 'w') as log_file:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
stdout=log_file,
|
||||
stderr=log_file,
|
||||
timeout=600 # 10 minutes for very long videos
|
||||
)
|
||||
|
||||
# Check if file was created even if ffmpeg had issues
|
||||
if os.path.exists(output_path):
|
||||
file_size = os.path.getsize(output_path)
|
||||
if file_size > 1000: # At least 1KB
|
||||
logger.info(f"✅ Download complete: {file_size / (1024*1024):.2f} MB")
|
||||
return output_path
|
||||
|
||||
# If we get here, something went wrong
|
||||
raise Exception(f"FFmpeg failed - no output file created")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
# Check if file was created despite timeout
|
||||
if os.path.exists(output_path):
|
||||
file_size = os.path.getsize(output_path)
|
||||
if file_size > 1000: # At least 1KB
|
||||
logger.warning(f"⚠️ Timeout but file created: {file_size / (1024*1024):.2f} MB")
|
||||
return output_path
|
||||
raise Exception("FFmpeg timeout (10 minutes) - video too large")
|
||||
|
||||
except FileNotFoundError:
|
||||
raise Exception("ffmpeg not found - please install ffmpeg: apt install ffmpeg")
|
||||
except Exception as e:
|
||||
raise Exception(f"Error downloading M3U8: {str(e)}")
|
||||
|
||||
Reference in New Issue
Block a user