from .base import BaseVideoPlayer from bs4 import BeautifulSoup import re import asyncio from typing import Optional import httpx class LpayerDownloader(BaseVideoPlayer): """Downloader for lpayer.embed4me.com video player""" def can_handle(self, url: str) -> bool: return 'lpayer.embed4me.com' in url.lower() async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]: """ Extract download link from Lpayer video page. Uses Playwright for JavaScript rendering, falls back to HTML parsing. """ try: print(f"[LPAYER] Extracting link from: {url}") # Try Playwright first (handles JavaScript-rendered pages) video_url = await self._extract_with_playwright(url) if not video_url: # Fallback to HTML parsing print("[LPAYER] Playwright failed, trying HTML parsing fallback...") video_url = await self._extract_with_http(url) if not video_url: raise Exception("Could not find video URL in Lpayer page") print(f"[LPAYER] Found video URL: {video_url[:80]}...") # Use target_filename if provided, otherwise generate default if target_filename: filename = target_filename else: filename = "lpayer_video.mp4" # Ensure .mp4 extension if direct MP4 if video_url.endswith('.mp4') and not filename.endswith('.mp4'): filename += '.mp4' return video_url, filename except Exception as e: raise Exception(f"Error extracting Lpayer link: {str(e)}") async def _extract_with_playwright(self, url: str) -> Optional[str]: """Extract video URL using Playwright to render JavaScript""" browser = None try: from playwright.async_api import async_playwright print("[LPAYER] Launching Playwright browser...") video_urls = [] async with async_playwright() as p: browser = await p.chromium.launch( headless=True, args=[ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled', '--disable-features=IsolateOrigins,site-per-process', ] ) context = await browser.new_context( user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', viewport={'width': 1920, 'height': 1080} ) page = await context.new_page() # Set up request interception to capture video requests async def handle_request(route): req_url = route.request.url if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']): if 'lpayer' not in req_url.lower(): print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...") video_urls.append(req_url) await route.continue_() await page.route('**', handle_request) # Navigate to URL with timeout print("[LPAYER] Navigating to page...") try: await page.goto(url, wait_until='domcontentloaded', timeout=30000) except Exception as e: print(f"[LPAYER] Navigation warning: {e}") # Wait for JavaScript to execute print("[LPAYER] Waiting for video player to load...") await asyncio.sleep(5) # Try to interact with player to trigger video load try: await page.mouse.click(640, 360) await asyncio.sleep(3) except: pass # Try JavaScript extraction to find video URLs in DOM try: js_result = await page.evaluate(""" () => { // Check all video elements const videos = document.querySelectorAll('video'); for (let v of videos) { if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) { console.log('Found video src:', v.src); return v.src; } const sources = v.querySelectorAll('source'); for (let s of sources) { if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) { console.log('Found source src:', s.src); return s.src; } } } // Check for jwplayer if (window.jwplayer) { try { const player = jwplayer(); const playlist = player.getPlaylist(); if (playlist && playlist[0] && playlist[0].sources) { const src = playlist[0].sources[0].file; console.log('Found jwplayer source:', src); return src; } } catch(e) { console.log('jwplayer error:', e); } } // Check for VidStack player const player = document.querySelector('media-player'); if (player && player.provider) { const provider = player.provider; // Try to get source from provider if (provider.src) return provider.src; if (provider.currentSrc) return provider.currentSrc; if (provider.url) return provider.url; if (provider.videoUrl) return provider.videoUrl; // Check internal properties for (let key in provider) { try { const val = provider[key]; if (typeof val === 'string' && (val.includes('.m3u8') || val.includes('.mp4')) && val.startsWith('http')) { return val; } } catch(e) {} } } // Look for video URLs in window object for (let key in window) { if (typeof window[key] === 'string') { const str = window[key]; if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) { console.log('Found in window:', str); return str; } } } return null; } """) if js_result and ('.m3u8' in js_result or '.mp4' in js_result): print(f"[LPAYER] Found video URL via JavaScript") video_urls.append(js_result) except Exception as e: print(f"[LPAYER] JS extraction error: {e}") # Final check: parse rendered page HTML try: content = await page.content() patterns = [ r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"', r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"', r"'file'\s*:\s*'([^']+\.m3u8[^']*)'", r"'file'\s*:\s*'([^']+\.mp4[^']*)'", r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)', r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)', ] for pattern in patterns: matches = re.findall(pattern, content) for match in matches: match = match.replace('\\', '').replace('\\/', '/') if 'http' in match and 'lpayer' not in match.lower(): print(f"[LPAYER] Found in HTML: {match[:100]}...") video_urls.append(match) except Exception as e: print(f"[LPAYER] HTML parsing error: {e}") await browser.close() browser = None # Return first valid video URL if video_urls: seen = set() unique_urls = [] for url in video_urls: if url not in seen: seen.add(url) unique_urls.append(url) if unique_urls: print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)") return unique_urls[0] print("[LPAYER] ❌ No video URLs found") return None except ImportError: print("[LPAYER] Playwright not installed") return None except Exception as e: print(f"[LPAYER] Playwright error: {e}") import traceback traceback.print_exc() return None finally: # Ensure browser is always closed if browser: try: await browser.close() except: pass """Extract video URL using Playwright to render JavaScript""" try: from playwright.async_api import async_playwright print("[LPAYER] Launching Playwright browser...") video_urls = [] async with async_playwright() as p: browser = await p.chromium.launch( headless=True, args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] ) context = await browser.new_context( user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', viewport={'width': 1920, 'height': 1080} ) page = await context.new_page() # Set up request interception to capture video requests async def handle_request(route): req_url = route.request.url if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']): if 'lpayer' not in req_url.lower(): print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...") video_urls.append(req_url) await route.continue_() await page.route('**', handle_request) # Navigate to URL with timeout print("[LPAYER] Navigating to page...") try: await page.goto(url, wait_until='domcontentloaded', timeout=30000) except Exception as e: print(f"[LPAYER] Navigation warning: {e}") # Wait for JavaScript to execute and video to load print("[LPAYER] Waiting for video player to load...") await asyncio.sleep(5) # Try JavaScript extraction to find video URLs in DOM try: js_result = await page.evaluate(""" () => { // Check all video elements const videos = document.querySelectorAll('video'); for (let v of videos) { if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) { console.log('Found video src:', v.src); return v.src; } const sources = v.querySelectorAll('source'); for (let s of sources) { if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) { console.log('Found source src:', s.src); return s.src; } } } // Check for jwplayer if (window.jwplayer) { try { const player = jwplayer(); const playlist = player.getPlaylist(); if (playlist && playlist[0] && playlist[0].sources) { const src = playlist[0].sources[0].file; console.log('Found jwplayer source:', src); return src; } } catch(e) { console.log('jwplayer error:', e); } } // Look for video URLs in window object for (let key in window) { if (typeof window[key] === 'string') { const str = window[key]; if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) { console.log('Found in window:', str); return str; } } } return null; } """) if js_result and ('.m3u8' in js_result or '.mp4' in js_result): print(f"[LPAYER] Found video URL via JavaScript") video_urls.append(js_result) except Exception as e: print(f"[LPAYER] JS extraction error: {e}") # Final check: parse rendered page HTML try: content = await page.content() patterns = [ r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"', r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"', r"'file'\s*:\s*'([^']+\.m3u8[^']*)'", r"'file'\s*:\s*'([^']+\.mp4[^']*)'", r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)', r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)', ] for pattern in patterns: matches = re.findall(pattern, content) for match in matches: match = match.replace('\\', '').replace('\\/', '/') if 'http' in match and 'lpayer' not in match.lower(): print(f"[LPAYER] Found in HTML: {match[:100]}...") video_urls.append(match) except Exception as e: print(f"[LPAYER] HTML parsing error: {e}") await browser.close() # Return first valid video URL if video_urls: seen = set() unique_urls = [] for url in video_urls: if url not in seen: seen.add(url) unique_urls.append(url) if unique_urls: print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)") return unique_urls[0] print("[LPAYER] ❌ No video URLs found") return None except ImportError: print("[LPAYER] Playwright not installed") return None except Exception as e: print(f"[LPAYER] Playwright error: {e}") import traceback traceback.print_exc() return None async def _extract_with_http(self, url: str) -> Optional[str]: """Fallback: Extract video source using pure HTTP requests""" try: response = await self.client.get(url) response.raise_for_status() html_content = response.text return self._extract_video_from_html(html_content) except Exception as e: print(f"[LPAYER] HTTP extraction error: {e}") return None def _extract_video_from_html(self, html_content: str) -> Optional[str]: """ Extract video URL from HTML using BeautifulSoup parsing Looks for video URLs in this priority: 1.