Files
ohm_streaming/app/downloaders/video_players/vidzy.py
root c1c31d7685 feat: Add series TV support with Vidzy HLS downloads and duplicate prevention
Major improvements:
- Series TV support via FS7 provider with dedicated search endpoint
- Vidzy downloader now uses Playwright for JS obfuscation and ffmpeg for HLS streams
- Episode filenames properly named (Series Title - Episode X) instead of master.m3u8.mp4
- Duplicate download prevention: checks existing tasks before creating new ones
- Removed host preference system in favor of intelligent URL-based detection

Technical changes:
- Vidzy: Added Playwright extraction and M3U8→MP4 conversion with ffmpeg
- FS7: Episodes now use pipe format (video_url|series_url|episode_title)
- DownloadManager: Extract target_filename from pipe URL and prevent duplicates
- UI: New Series tab with search, recommendations, and releases sections
- Anime-Sama: Removed hardcoded host preferences, uses site's URL order

Generated with [Claude Code](https://claude.com/claude-code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
2026-01-25 20:42:29 +00:00

345 lines
14 KiB
Python

"""Vidzy video hosting service downloader"""
import logging
import asyncio
import re
import subprocess
import os
from typing import Optional
from .base import BaseVideoPlayer
from bs4 import BeautifulSoup
from app.utils import sanitize_filename
logger = logging.getLogger(__name__)
class VidzyDownloader(BaseVideoPlayer):
"""
Downloader for Vidzy video hosting service.
Vidzy is a video hosting platform used by various anime streaming sites.
Uses heavy JavaScript obfuscation, so Playwright is required.
"""
def can_handle(self, url: str) -> bool:
"""Check if this downloader can handle the given URL"""
return "vidzy" in url.lower()
async def get_download_link(
self,
url: str,
target_filename: Optional[str] = None
) -> tuple[str, str]:
"""
Extract direct download link and filename from Vidzy URL.
Args:
url: The Vidzy video player URL
target_filename: Optional filename override
Returns:
Tuple of (download_url, filename)
"""
try:
# Extract actual Vidzy URL from pipe-separated format if present
# Format: video_url|anime_url|episode_title
if '|' in url:
url = url.split('|')[0].strip()
logger.debug(f"Extracted Vidzy URL from pipe format: {url}")
logger.info(f"Fetching Vidzy URL: {url}")
# Try using Playwright first (Vidzy uses heavy JS obfuscation)
video_url = await self._extract_with_playwright(url)
if not video_url:
# Fallback to static HTML parsing
logger.warning("Playwright extraction failed, trying static parsing...")
video_url = await self._extract_static(url)
if not video_url:
raise ValueError(f"Could not extract video URL from Vidzy")
logger.info(f"Successfully extracted Vidzy URL: {video_url[:100]}...")
# Generate filename
if target_filename:
filename = sanitize_filename(target_filename)
else:
# Try to extract filename from URL
filename = video_url.split('/')[-1].split('?')[0]
if not filename or len(filename) < 5:
filename = "vidzy_video.mp4"
filename = sanitize_filename(filename)
# Ensure .mp4 extension
if not filename.endswith('.mp4'):
filename += '.mp4'
# Check if it's an M3U8 playlist (HLS stream)
if '.m3u8' in video_url:
logger.info(f"Detected M3U8 stream, will download with ffmpeg")
# Download and convert M3U8 to MP4 directly
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Referer': 'https://vidzy.org/',
}
mp4_path = await self._download_m3u8_as_mp4(video_url, filename, headers)
logger.info(f"Successfully extracted Vidzy download link: {filename}")
return mp4_path, filename
# It's a direct MP4 link
logger.info(f"Successfully extracted Vidzy download link: {filename}")
return video_url, filename
except Exception as e:
logger.error(f"Error extracting Vidzy download link: {e}")
raise ValueError(f"Failed to extract download link from Vidzy: {str(e)}")
async def _extract_with_playwright(self, url: str) -> Optional[str]:
"""Extract video URL using Playwright with network interception"""
try:
from playwright.async_api import async_playwright
logger.info("Launching Playwright for Vidzy...")
video_urls = []
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
)
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
)
page = await context.new_page()
# Set up request interception
async def handle_request(route):
req_url = route.request.url
# Look for video files (HLS streams and MP4s)
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', 'master']):
if 'vidzy' not in req_url.lower() or 'master' in req_url.lower():
logger.info(f"🎥 Captured video URL: {req_url[:100]}...")
video_urls.append(req_url)
await route.continue_()
await page.route('**', handle_request)
logger.info("Navigating to Vidzy page...")
try:
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
except Exception as e:
logger.warning(f"Navigation warning: {e}")
# Wait for page to load and initialize player
logger.info("Waiting for video player to load...")
await asyncio.sleep(5)
# Try JavaScript extraction from VideoJS player
try:
js_result = await page.evaluate("""
() => {
// Check if videojs is available
if (typeof videojs !== 'undefined' && videojs.players) {
// Get all players
const players = Object.values(videojs.players);
if (players.length > 0) {
const player = players[0];
// Try to get source from player
if (player.currentSrc()) {
return player.currentSrc();
}
// Try to get sources array
if (player.currentSources() && player.currentSources().length > 0) {
return player.currentSources()[0].src;
}
}
}
// Check all video elements
const videos = document.querySelectorAll('video');
for (let v of videos) {
if (v.src) {
return v.src;
}
const sources = v.querySelectorAll('source');
for (let s of sources) {
if (s.src) {
return s.src;
}
}
}
// Look for sources in scripts (VideoJS config)
const scripts = document.querySelectorAll('script');
for (let script of scripts) {
const text = script.textContent;
// Look for sources array with .m3u8 URLs
const sourcesMatch = text.match(/sources\s*:\s*\[\s*\{\s*src\s*:\s*['"](https?:\/\/[^'"]+\.m3u8[^'"]*)['"]/i);
if (sourcesMatch) {
return sourcesMatch[1];
}
}
return null;
}
""")
if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
logger.info(f"Found video URL via JavaScript evaluation")
video_urls.append(js_result)
except Exception as e:
logger.warning(f"JS extraction error: {e}")
# Wait more for network requests
await asyncio.sleep(3)
await browser.close()
# Return best video URL (prefer master.m3u8 for HLS)
if video_urls:
seen = set()
unique_urls = []
for url in video_urls:
if url not in seen:
seen.add(url)
unique_urls.append(url)
if unique_urls:
logger.info(f"✅ Found {len(unique_urls)} video URL(s)")
# Prefer master.m3u8 (HLS playlist)
for url in unique_urls:
if 'master.m3u8' in url or '.m3u8' in url:
logger.info(f"Using HLS playlist: {url[:100]}...")
return url
# Fall back to first URL
return unique_urls[0]
logger.warning("❌ No video URLs found via Playwright")
return None
except ImportError:
logger.warning("Playwright not installed, falling back to static parsing")
return None
except Exception as e:
logger.warning(f"Playwright error: {e}")
return None
async def _extract_static(self, url: str) -> Optional[str]:
"""Static HTML parsing fallback"""
try:
response = await self.client.get(url)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, 'lxml')
# Method 1: Look for video source in <video> tag
video_tag = soup.find('video')
if video_tag and video_tag.get('src'):
logger.info(f"Found video source from <video> tag")
return video_tag['src']
# Method 2: Look for source in <source> tag
source_tag = soup.find('source')
if source_tag and source_tag.get('src'):
logger.info(f"Found video source from <source> tag")
return source_tag['src']
# Method 3: Search entire HTML for .m3u8 URLs (Vidzy uses HLS)
html_patterns = [
r'(https?://[^\s<>"\'`]+\.m3u8[^\s<>"\'`]*)',
r'(https?://[^\s<>"\'`]+/master[^\s<>"\'`]*)',
]
for pattern in html_patterns:
matches = re.findall(pattern, html)
if matches:
# Filter out obvious false positives
for match in matches:
# Accept URLs with 'master' or from video hosts
if 'master' in match.lower() or any(host in match for host in ['hls', 'video', 'stream']):
logger.info(f"Found video URL in HTML: {match[:100]}...")
return match
logger.warning("Static parsing failed to find video URL")
return None
except Exception as e:
logger.warning(f"Static parsing error: {e}")
return None
async def _download_m3u8_as_mp4(self, m3u8_url: str, filename: str, headers: dict, download_dir: str = "downloads") -> str:
"""Download M3U8 stream and convert to MP4 using ffmpeg"""
# Create downloads directory if it doesn't exist
os.makedirs(download_dir, exist_ok=True)
output_path = os.path.join(download_dir, filename)
# Build headers for ffmpeg - using multiple -headers options
header_args = []
for key, value in headers.items():
header_args.extend(['-headers', f'{key}: {value}'])
cmd = [
'ffmpeg',
*header_args,
'-i', m3u8_url,
'-c', 'copy',
'-bsf:a', 'aac_adtstoasc',
'-y',
output_path
]
try:
logger.info(f"Downloading M3U8 with ffmpeg...")
logger.info(f"URL: {m3u8_url[:80]}...")
logger.info(f"Output: {output_path}")
# Run ffmpeg without capturing output to avoid buffering issues
# Use a log file instead
log_path = output_path + '.log'
with open(log_path, 'w') as log_file:
result = subprocess.run(
cmd,
stdout=log_file,
stderr=log_file,
timeout=600 # 10 minutes for very long videos
)
# Check if file was created even if ffmpeg had issues
if os.path.exists(output_path):
file_size = os.path.getsize(output_path)
if file_size > 1000: # At least 1KB
logger.info(f"✅ Download complete: {file_size / (1024*1024):.2f} MB")
return output_path
# If we get here, something went wrong
raise Exception(f"FFmpeg failed - no output file created")
except subprocess.TimeoutExpired:
# Check if file was created despite timeout
if os.path.exists(output_path):
file_size = os.path.getsize(output_path)
if file_size > 1000: # At least 1KB
logger.warning(f"⚠️ Timeout but file created: {file_size / (1024*1024):.2f} MB")
return output_path
raise Exception("FFmpeg timeout (10 minutes) - video too large")
except FileNotFoundError:
raise Exception("ffmpeg not found - please install ffmpeg: apt install ffmpeg")
except Exception as e:
raise Exception(f"Error downloading M3U8: {str(e)}")