from .base import BaseVideoPlayer from bs4 import BeautifulSoup import re import asyncio from typing import Optional class SmoothpreDownloader(BaseVideoPlayer): """Downloader for smoothpre.com video player (JWPlayer-based)""" def can_handle(self, url: str) -> bool: return 'smoothpre.com' in url.lower() async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]: """ Extract download link from Smoothpre video page Smoothpre uses JWPlayer with dynamic JavaScript - requires Playwright Args: url: The Smoothpre video page URL target_filename: Optional filename override Returns: Tuple of (direct_video_url, filename) """ try: print(f"[SMOOTHPRE] Extracting link from: {url}") # Try using Playwright to extract video URL video_url = await self._extract_with_playwright(url) if not video_url: raise Exception("Could not find video URL in Smoothpre page") print(f"[SMOOTHPRE] Found video URL: {video_url[:80]}...") # Generate filename from app.utils import sanitize_filename if target_filename: filename = sanitize_filename(target_filename) else: filename = "smoothpre_video.mp4" return video_url, filename except Exception as e: raise Exception(f"Error extracting Smoothpre link: {str(e)}") async def _extract_with_playwright(self, url: str) -> str | None: """Extract video URL using Playwright with network interception""" try: from playwright.async_api import async_playwright print("[SMOOTHPRE] Launching browser with network interception...") video_urls = [] async with async_playwright() as p: browser = await p.chromium.launch( headless=True, args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] ) context = await browser.new_context( user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' ) page = await context.new_page() # Set up response interception async def handle_response(response): try: resp_url = response.url content_type = response.headers.get('content-type', '') # Look for video files in responses if any(ext in resp_url.lower() for ext in ['.m3u8', '.mp4', '.mkv', '.ts']): if 'smoothpre' not in resp_url.lower() and 'google' not in resp_url.lower(): print(f"[SMOOTHPRE] 🎥 Captured video URL: {resp_url[:100]}...") video_urls.append(resp_url) # Also check by content-type elif any(ct in content_type.lower() for ct in ['video/', 'application/x-mpegurl']): if 'smoothpre' not in resp_url.lower(): print(f"[SMOOTHPRE] 🎥 Captured video response: {resp_url[:100]}...") video_urls.append(resp_url) except Exception as e: pass # Ignore interception errors page.on('response', handle_response) print("[SMOOTHPRE] Navigating to page...") try: await page.goto(url, wait_until='networkidle', timeout=30000) except Exception as e: print(f"[SMOOTHPRE] Navigation warning: {e}") # Wait for page to load print("[SMOOTHPRE] Waiting for video player to load...") await asyncio.sleep(3) # Try to find and click play button try: play_selectors = [ 'button[aria-label="Play"]', '.play-button', 'button[class*="play"]', '.jw-icon-display', 'video', ] for selector in play_selectors: try: element = await page.query_selector(selector) if element: print(f"[SMOOTHPRE] Found element: {selector}") if 'button' in selector or 'jw' in selector: await element.click() await asyncio.sleep(2) break except: continue except Exception as e: print(f"[SMOOTHPRE] Play button interaction: {e}") # Wait more for network requests await asyncio.sleep(4) # Try JavaScript extraction - JWPlayer specific try: js_code = r""" () => { // Check for JWPlayer setup (primary method for Smoothpre) if (window.jwplayer) { try { const playlist = window.jwplayer().getPlaylist(); if (playlist && playlist[0] && playlist[0].sources) { for (let source of playlist[0].sources) { if (source.file && (source.file.includes('.m3u8') || source.file.includes('.mp4'))) { return source.file; } } } } catch(e) {} } // Check all video elements const videos = document.querySelectorAll('video'); for (let v of videos) { if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) { return v.src; } const sources = v.querySelectorAll('source'); for (let s of sources) { if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) { return s.src; } } } // Check window object for video URLs const searchKeys = ['player', 'video', 'source', 'file', 'url', 'jw']; for (let key of searchKeys) { if (window[key] && typeof window[key] === 'object') { try { const json = JSON.stringify(window[key]); const match = json.match(/(https?:\/\/[^\s"\'<>]+\.(m3u8|mp4))/); if (match) return match[1]; } catch(e) {} } } return null; } """ js_result = await page.evaluate(js_code) if js_result and ('.m3u8' in js_result or '.mp4' in js_result): print(f"[SMOOTHPRE] ✅ Found video URL via JavaScript: {js_result[:100]}...") video_urls.append(js_result) except Exception as e: print(f"[SMOOTHPRE] JS extraction error: {e}") # Parse page HTML for video URLs - enhanced patterns try: content = await page.content() patterns = [ r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"', r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"', r'"source"\s*:\s*"([^"]+\.m3u8[^"]*)"', r'"source"\s*:\s*"([^"]+\.mp4[^"]*)"', r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)', r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)', r"url\s*[:=]\s*['\"]([^'\"]+\.m3u8[^'\"]*)['\"]", r"url\s*[:=]\s*['\"]([^'\"]+\.mp4[^'\"]*)['\"]", ] for pattern in patterns: matches = re.findall(pattern, content, re.IGNORECASE) for match in matches: # Clean up the URL match = match.replace('\\/', '/').replace('\\', '') if 'http' in match and 'smoothpre' not in match and 'google' not in match: print(f"[SMOOTHPRE] Found in HTML: {match[:100]}...") video_urls.append(match) except Exception as e: print(f"[SMOOTHPRE] HTML parsing error: {e}") await browser.close() # Return first valid video URL (prefer .m3u8 over .mp4 as it's usually the source) if video_urls: seen = set() unique_urls = [] for vid_url in video_urls: if vid_url not in seen: seen.add(vid_url) unique_urls.append(vid_url) if unique_urls: # Sort to prefer .m3u8 (source quality) unique_urls.sort(key=lambda x: 0 if '.m3u8' in x else 1) print(f"[SMOOTHPRE] ✅ Found {len(unique_urls)} video URL(s)") print(f"[SMOOTHPRE] Selected: {unique_urls[0][:100]}...") return unique_urls[0] print("[SMOOTHPRE] ❌ No video URLs found") return None except ImportError: print("[SMOOTHPRE] ⚠️ Playwright not installed - falling back to HTTP extraction") return await self._extract_with_http(url) except Exception as e: print(f"[SMOOTHPRE] Playwright error: {e}") import traceback traceback.print_exc() # Fallback to HTTP extraction return await self._extract_with_http(url) async def _extract_with_http(self, url: str) -> str | None: """Extract video URL using simple HTTP requests (fallback when Playwright fails)""" try: print(f"[SMOOTHPRE] Trying HTTP extraction from: {url}") response = await self.client.get(url, follow_redirects=True) soup = BeautifulSoup(response.text, 'lxml') # Method 1: Look for video/source tags videos = soup.find_all('video') for video in videos: src = video.get('src') or video.get('data-src') if src and any(ext in src for ext in ['.m3u8', '.mp4']): print(f"[SMOOTHPRE] ✅ Found video in video tag: {src[:100]}...") return src sources = video.find_all('source') for source in sources: src = source.get('src') if src and any(ext in src for ext in ['.m3u8', '.mp4']): print(f"[SMOOTHPRE] ✅ Found video in source tag: {src[:100]}...") return src # Method 2: Look in script tags for JWPlayer configuration scripts = soup.find_all('script') for script in scripts: if script.string: # JWPlayer patterns patterns = [ r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"', r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"', r'"source"\s*:\s*"([^"]+\.m3u8[^"]*)"', r'"source"\s*:\s*"([^"]+\.mp4[^"]*)"', r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)', r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)', ] for pattern in patterns: matches = re.findall(pattern, script.string, re.IGNORECASE) for match in matches: match = match.replace('\\/', '/') if 'http' in match and 'smoothpre' not in match.lower(): print(f"[SMOOTHPRE] ✅ Found video in script: {match[:100]}...") return match print("[SMOOTHPRE] ❌ HTTP extraction failed - no video URLs found") return None except Exception as e: print(f"[SMOOTHPRE] HTTP extraction error: {e}") return None