ohm_streaming/app/downloaders/video_players/lpayer.py

from .base import BaseVideoPlayer
from bs4 import BeautifulSoup
import re
import asyncio
from typing import Optional
import httpx


class LpayerDownloader(BaseVideoPlayer):
    """Downloader for lpayer.embed4me.com video player"""

    def can_handle(self, url: str) -> bool:
        return 'lpayer.embed4me.com' in url.lower()

    async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
        """
        Extract download link from Lpayer video page.
        Uses Playwright for JavaScript rendering, falls back to HTML parsing.
        """
        try:
            print(f"[LPAYER] Extracting link from: {url}")

            # Try Playwright first (handles JavaScript-rendered pages)
            video_url = await self._extract_with_playwright(url)

            if not video_url:
                # Fallback to HTML parsing
                print("[LPAYER] Playwright failed, trying HTML parsing fallback...")
                video_url = await self._extract_with_http(url)

            if not video_url:
                raise Exception("Could not find video URL in Lpayer page")

            print(f"[LPAYER] Found video URL: {video_url[:80]}...")

            # Use target_filename if provided, otherwise generate default
            if target_filename:
                filename = target_filename
            else:
                filename = "lpayer_video.mp4"

            # Ensure .mp4 extension if direct MP4
            if video_url.endswith('.mp4') and not filename.endswith('.mp4'):
                filename += '.mp4'

            return video_url, filename

        except Exception as e:
            raise Exception(f"Error extracting Lpayer link: {str(e)}")

    async def _extract_with_playwright(self, url: str) -> Optional[str]:
        """Extract video URL using Playwright to render JavaScript"""
        browser = None
        try:
            from playwright.async_api import async_playwright

            print("[LPAYER] Launching Playwright browser...")
            video_urls = []

            async with async_playwright() as p:
                browser = await p.chromium.launch(
                    headless=True,
                    args=[
                        '--no-sandbox',
                        '--disable-setuid-sandbox',
                        '--disable-dev-shm-usage',
                        '--disable-blink-features=AutomationControlled',
                        '--disable-features=IsolateOrigins,site-per-process',
                    ]
                )

                context = await browser.new_context(
                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
                    viewport={'width': 1920, 'height': 1080}
                )

                page = await context.new_page()

                # Set up request interception to capture video requests
                async def handle_request(route):
                    req_url = route.request.url
                    if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
                        if 'lpayer' not in req_url.lower():
                            print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
                            video_urls.append(req_url)
                    await route.continue_()

                await page.route('**', handle_request)

                # Navigate to URL with timeout
                print("[LPAYER] Navigating to page...")
                try:
                    await page.goto(url, wait_until='domcontentloaded', timeout=30000)
                except Exception as e:
                    print(f"[LPAYER] Navigation warning: {e}")

                # Wait for JavaScript to execute
                print("[LPAYER] Waiting for video player to load...")
                await asyncio.sleep(5)

                # Try to interact with player to trigger video load
                try:
                    await page.mouse.click(640, 360)
                    await asyncio.sleep(3)
                except:
                    pass

                # Try JavaScript extraction to find video URLs in DOM
                try:
                    js_result = await page.evaluate("""
                        () => {
                            // Check all video elements
                            const videos = document.querySelectorAll('video');
                            for (let v of videos) {
                                if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
                                    console.log('Found video src:', v.src);
                                    return v.src;
                                }
                                const sources = v.querySelectorAll('source');
                                for (let s of sources) {
                                    if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
                                        console.log('Found source src:', s.src);
                                        return s.src;
                                    }
                                }
                            }

                            // Check for jwplayer
                            if (window.jwplayer) {
                                try {
                                    const player = jwplayer();
                                    const playlist = player.getPlaylist();
                                    if (playlist && playlist[0] && playlist[0].sources) {
                                        const src = playlist[0].sources[0].file;
                                        console.log('Found jwplayer source:', src);
                                        return src;
                                    }
                                } catch(e) {
                                    console.log('jwplayer error:', e);
                                }
                            }

                            // Check for VidStack player
                            const player = document.querySelector('media-player');
                            if (player && player.provider) {
                                const provider = player.provider;
                                // Try to get source from provider
                                if (provider.src) return provider.src;
                                if (provider.currentSrc) return provider.currentSrc;
                                if (provider.url) return provider.url;
                                if (provider.videoUrl) return provider.videoUrl;
                                // Check internal properties
                                for (let key in provider) {
                                    try {
                                        const val = provider[key];
                                        if (typeof val === 'string' && (val.includes('.m3u8') || val.includes('.mp4')) && val.startsWith('http')) {
                                            return val;
                                        }
                                    } catch(e) {}
                                }
                            }

                            // Look for video URLs in window object
                            for (let key in window) {
                                if (typeof window[key] === 'string') {
                                    const str = window[key];
                                    if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
                                        console.log('Found in window:', str);
                                        return str;
                                    }
                                }
                            }

                            return null;
                        }
                    """)

                    if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
                        print(f"[LPAYER] Found video URL via JavaScript")
                        video_urls.append(js_result)
                except Exception as e:
                    print(f"[LPAYER] JS extraction error: {e}")

                # Final check: parse rendered page HTML
                try:
                    content = await page.content()
                    patterns = [
                        r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
                        r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
                        r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
                        r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
                        r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
                        r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
                    ]

                    for pattern in patterns:
                        matches = re.findall(pattern, content)
                        for match in matches:
                            match = match.replace('\\', '').replace('\\/', '/')
                            if 'http' in match and 'lpayer' not in match.lower():
                                print(f"[LPAYER] Found in HTML: {match[:100]}...")
                                video_urls.append(match)
                except Exception as e:
                    print(f"[LPAYER] HTML parsing error: {e}")

                await browser.close()
                browser = None

            # Return first valid video URL
            if video_urls:
                seen = set()
                unique_urls = []
                for url in video_urls:
                    if url not in seen:
                        seen.add(url)
                        unique_urls.append(url)

                if unique_urls:
                    print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
                    return unique_urls[0]

            print("[LPAYER] ❌ No video URLs found")
            return None

        except ImportError:
            print("[LPAYER] Playwright not installed")
            return None
        except Exception as e:
            print(f"[LPAYER] Playwright error: {e}")
            import traceback
            traceback.print_exc()
            return None
        finally:
            # Ensure browser is always closed
            if browser:
                try:
                    await browser.close()
                except:
                    pass
        """Extract video URL using Playwright to render JavaScript"""
        try:
            from playwright.async_api import async_playwright

            print("[LPAYER] Launching Playwright browser...")
            video_urls = []

            async with async_playwright() as p:
                browser = await p.chromium.launch(
                    headless=True,
                    args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
                )

                context = await browser.new_context(
                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
                    viewport={'width': 1920, 'height': 1080}
                )

                page = await context.new_page()

                # Set up request interception to capture video requests
                async def handle_request(route):
                    req_url = route.request.url
                    if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
                        if 'lpayer' not in req_url.lower():
                            print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
                            video_urls.append(req_url)
                    await route.continue_()

                await page.route('**', handle_request)

                # Navigate to URL with timeout
                print("[LPAYER] Navigating to page...")
                try:
                    await page.goto(url, wait_until='domcontentloaded', timeout=30000)
                except Exception as e:
                    print(f"[LPAYER] Navigation warning: {e}")

                # Wait for JavaScript to execute and video to load
                print("[LPAYER] Waiting for video player to load...")
                await asyncio.sleep(5)

                # Try JavaScript extraction to find video URLs in DOM
                try:
                    js_result = await page.evaluate("""
                        () => {
                            // Check all video elements
                            const videos = document.querySelectorAll('video');
                            for (let v of videos) {
                                if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
                                    console.log('Found video src:', v.src);
                                    return v.src;
                                }
                                const sources = v.querySelectorAll('source');
                                for (let s of sources) {
                                    if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
                                        console.log('Found source src:', s.src);
                                        return s.src;
                                    }
                                }
                            }

                            // Check for jwplayer
                            if (window.jwplayer) {
                                try {
                                    const player = jwplayer();
                                    const playlist = player.getPlaylist();
                                    if (playlist && playlist[0] && playlist[0].sources) {
                                        const src = playlist[0].sources[0].file;
                                        console.log('Found jwplayer source:', src);
                                        return src;
                                    }
                                } catch(e) {
                                    console.log('jwplayer error:', e);
                                }
                            }

                            // Look for video URLs in window object
                            for (let key in window) {
                                if (typeof window[key] === 'string') {
                                    const str = window[key];
                                    if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
                                        console.log('Found in window:', str);
                                        return str;
                                    }
                                }
                            }

                            return null;
                        }
                    """)

                    if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
                        print(f"[LPAYER] Found video URL via JavaScript")
                        video_urls.append(js_result)
                except Exception as e:
                    print(f"[LPAYER] JS extraction error: {e}")

                # Final check: parse rendered page HTML
                try:
                    content = await page.content()
                    patterns = [
                        r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
                        r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
                        r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
                        r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
                        r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
                        r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
                    ]

                    for pattern in patterns:
                        matches = re.findall(pattern, content)
                        for match in matches:
                            match = match.replace('\\', '').replace('\\/', '/')
                            if 'http' in match and 'lpayer' not in match.lower():
                                print(f"[LPAYER] Found in HTML: {match[:100]}...")
                                video_urls.append(match)
                except Exception as e:
                    print(f"[LPAYER] HTML parsing error: {e}")

                await browser.close()

            # Return first valid video URL
            if video_urls:
                seen = set()
                unique_urls = []
                for url in video_urls:
                    if url not in seen:
                        seen.add(url)
                        unique_urls.append(url)

                if unique_urls:
                    print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
                    return unique_urls[0]

            print("[LPAYER] ❌ No video URLs found")
            return None

        except ImportError:
            print("[LPAYER] Playwright not installed")
            return None
        except Exception as e:
            print(f"[LPAYER] Playwright error: {e}")
            import traceback
            traceback.print_exc()
            return None

    async def _extract_with_http(self, url: str) -> Optional[str]:
        """Fallback: Extract video source using pure HTTP requests"""
        try:
            response = await self.client.get(url)
            response.raise_for_status()
            html_content = response.text
            return self._extract_video_from_html(html_content)
        except Exception as e:
            print(f"[LPAYER] HTTP extraction error: {e}")
            return None

    def _extract_video_from_html(self, html_content: str) -> Optional[str]:
        """
        Extract video URL from HTML using BeautifulSoup parsing

        Looks for video URLs in this priority:
        1. <video src="URL"> tags
        2. <source src="URL"> tags
        3. Direct URLs in page content with video extensions (.mp4, .m3u8)

        Returns first valid URL found, or None if not found
        """
        try:
            soup = BeautifulSoup(html_content, 'lxml')

            # Priority 1: Look for <video src="..."> tags
            video_tags = soup.find_all('video')
            for video in video_tags:
                src = video.get('src')
                if src and self._is_valid_video_url(src):
                    print(f"[LPAYER] Found video in <video> tag: {src[:80]}...")
                    return src

            # Priority 2: Look for <source src="..."> tags
            source_tags = soup.find_all('source')
            for source in source_tags:
                src = source.get('src')
                if src and self._is_valid_video_url(src):
                    print(f"[LPAYER] Found video in <source> tag: {src[:80]}...")
                    return src

            # Priority 3: Look for direct URLs in page content
            patterns = [
                r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
                r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
                r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
                r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
            ]

            for pattern in patterns:
                matches = re.findall(pattern, html_content)
                for match in matches:
                    match = match.replace('\\', '').replace(r'\/', '/')
                    if self._is_valid_video_url(match):
                        print(f"[LPAYER] Found video in content: {match[:80]}...")
                        return match

            print("[LPAYER] No video URL found in HTML")
            return None

        except Exception as e:
            print(f"[LPAYER] HTML parsing error: {e}")
            return None

    def _is_valid_video_url(self, url: str) -> bool:
        """
        Check if URL is a valid video URL

        Valid if:
        - Starts with http:// or https://
        - Contains .mp4 or .m3u8 extension
        """
        if not url:
            return False

        # Must be http(s) URL
        if not url.startswith('http'):
            return False

        # Must contain video extension
        url_lower = url.lower()
        if '.mp4' not in url_lower and '.m3u8' not in url_lower:
            return False

        return True