"""Vidzy video hosting service downloader""" import logging import asyncio import re import subprocess import os from typing import Optional from .base import BaseVideoPlayer from bs4 import BeautifulSoup from app.utils import sanitize_filename logger = logging.getLogger(__name__) class VidzyDownloader(BaseVideoPlayer): """ Downloader for Vidzy video hosting service. Vidzy is a video hosting platform used by various anime streaming sites. Uses heavy JavaScript obfuscation, so Playwright is required. """ def can_handle(self, url: str) -> bool: """Check if this downloader can handle the given URL""" return "vidzy" in url.lower() async def get_download_link( self, url: str, target_filename: Optional[str] = None ) -> tuple[str, str]: """ Extract direct download link and filename from Vidzy URL. Args: url: The Vidzy video player URL target_filename: Optional filename override Returns: Tuple of (download_url, filename) """ try: # Extract actual Vidzy URL from pipe-separated format if present # Format: video_url|anime_url|episode_title if '|' in url: url = url.split('|')[0].strip() logger.debug(f"Extracted Vidzy URL from pipe format: {url}") logger.info(f"Fetching Vidzy URL: {url}") # Try using Playwright first (Vidzy uses heavy JS obfuscation) video_url = await self._extract_with_playwright(url) if not video_url: # Fallback to static HTML parsing logger.warning("Playwright extraction failed, trying static parsing...") video_url = await self._extract_static(url) if not video_url: raise ValueError(f"Could not extract video URL from Vidzy") logger.info(f"Successfully extracted Vidzy URL: {video_url[:100]}...") # Generate filename if target_filename: filename = sanitize_filename(target_filename) else: # Try to extract filename from URL filename = video_url.split('/')[-1].split('?')[0] if not filename or len(filename) < 5: filename = "vidzy_video.mp4" filename = sanitize_filename(filename) # Ensure .mp4 extension if not filename.endswith('.mp4'): filename += '.mp4' # Check if it's an M3U8 playlist (HLS stream) if '.m3u8' in video_url: logger.info(f"Detected M3U8 stream, will download with ffmpeg") # Download and convert M3U8 to MP4 directly headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Referer': 'https://vidzy.org/', } mp4_path = await self._download_m3u8_as_mp4(video_url, filename, headers) logger.info(f"Successfully extracted Vidzy download link: {filename}") return mp4_path, filename # It's a direct MP4 link logger.info(f"Successfully extracted Vidzy download link: {filename}") return video_url, filename except Exception as e: logger.error(f"Error extracting Vidzy download link: {e}") raise ValueError(f"Failed to extract download link from Vidzy: {str(e)}") async def _extract_with_playwright(self, url: str) -> Optional[str]: """Extract video URL using Playwright with network interception""" try: from playwright.async_api import async_playwright logger.info("Launching Playwright for Vidzy...") video_urls = [] async with async_playwright() as p: browser = await p.chromium.launch( headless=True, args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] ) context = await browser.new_context( user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' ) page = await context.new_page() # Set up request interception async def handle_request(route): req_url = route.request.url # Look for video files (HLS streams and MP4s) if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', 'master']): if 'vidzy' not in req_url.lower() or 'master' in req_url.lower(): logger.info(f"🎥 Captured video URL: {req_url[:100]}...") video_urls.append(req_url) await route.continue_() await page.route('**', handle_request) logger.info("Navigating to Vidzy page...") try: await page.goto(url, wait_until='domcontentloaded', timeout=30000) except Exception as e: logger.warning(f"Navigation warning: {e}") # Wait for page to load and initialize player logger.info("Waiting for video player to load...") await asyncio.sleep(5) # Try JavaScript extraction from VideoJS player try: js_result = await page.evaluate(""" () => { // Check if videojs is available if (typeof videojs !== 'undefined' && videojs.players) { // Get all players const players = Object.values(videojs.players); if (players.length > 0) { const player = players[0]; // Try to get source from player if (player.currentSrc()) { return player.currentSrc(); } // Try to get sources array if (player.currentSources() && player.currentSources().length > 0) { return player.currentSources()[0].src; } } } // Check all video elements const videos = document.querySelectorAll('video'); for (let v of videos) { if (v.src) { return v.src; } const sources = v.querySelectorAll('source'); for (let s of sources) { if (s.src) { return s.src; } } } // Look for sources in scripts (VideoJS config) const scripts = document.querySelectorAll('script'); for (let script of scripts) { const text = script.textContent; // Look for sources array with .m3u8 URLs const sourcesMatch = text.match(/sources\s*:\s*\[\s*\{\s*src\s*:\s*['"](https?:\/\/[^'"]+\.m3u8[^'"]*)['"]/i); if (sourcesMatch) { return sourcesMatch[1]; } } return null; } """) if js_result and ('.m3u8' in js_result or '.mp4' in js_result): logger.info(f"Found video URL via JavaScript evaluation") video_urls.append(js_result) except Exception as e: logger.warning(f"JS extraction error: {e}") # Wait more for network requests await asyncio.sleep(3) await browser.close() # Return best video URL (prefer master.m3u8 for HLS) if video_urls: seen = set() unique_urls = [] for url in video_urls: if url not in seen: seen.add(url) unique_urls.append(url) if unique_urls: logger.info(f"✅ Found {len(unique_urls)} video URL(s)") # Prefer master.m3u8 (HLS playlist) for url in unique_urls: if 'master.m3u8' in url or '.m3u8' in url: logger.info(f"Using HLS playlist: {url[:100]}...") return url # Fall back to first URL return unique_urls[0] logger.warning("❌ No video URLs found via Playwright") return None except ImportError: logger.warning("Playwright not installed, falling back to static parsing") return None except Exception as e: logger.warning(f"Playwright error: {e}") return None async def _extract_static(self, url: str) -> Optional[str]: """Static HTML parsing fallback""" try: response = await self.client.get(url) response.raise_for_status() html = response.text soup = BeautifulSoup(html, 'lxml') # Method 1: Look for video source in