ohm_streaming/app/downloaders/video_players/smoothpre.py

from .base import BaseVideoPlayer
from bs4 import BeautifulSoup
import re
import asyncio
from typing import Optional


class SmoothpreDownloader(BaseVideoPlayer):
    """Downloader for smoothpre.com video player (JWPlayer-based)"""

    def can_handle(self, url: str) -> bool:
        return 'smoothpre.com' in url.lower()

    async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
        """
        Extract download link from Smoothpre video page
        Smoothpre uses JWPlayer with dynamic JavaScript - requires Playwright

        Args:
            url: The Smoothpre video page URL
            target_filename: Optional filename override

        Returns:
            Tuple of (direct_video_url, filename)
        """
        try:
            print(f"[SMOOTHPRE] Extracting link from: {url}")

            # Try using Playwright to extract video URL
            video_url = await self._extract_with_playwright(url)

            if not video_url:
                raise Exception("Could not find video URL in Smoothpre page")

            print(f"[SMOOTHPRE] Found video URL: {video_url[:80]}...")

            # Generate filename
            from app.utils import sanitize_filename
            if target_filename:
                filename = sanitize_filename(target_filename)
            else:
                filename = "smoothpre_video.mp4"

            return video_url, filename

        except Exception as e:
            raise Exception(f"Error extracting Smoothpre link: {str(e)}")

    async def _extract_with_playwright(self, url: str) -> str | None:
        """Extract video URL using Playwright with network interception"""
        try:
            from playwright.async_api import async_playwright

            print("[SMOOTHPRE] Launching browser with network interception...")

            video_urls = []

            async with async_playwright() as p:
                browser = await p.chromium.launch(
                    headless=True,
                    args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
                )

                context = await browser.new_context(
                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
                )

                page = await context.new_page()

                # Set up response interception
                async def handle_response(response):
                    try:
                        resp_url = response.url
                        content_type = response.headers.get('content-type', '')

                        # Look for video files in responses
                        if any(ext in resp_url.lower() for ext in ['.m3u8', '.mp4', '.mkv', '.ts']):
                            if 'smoothpre' not in resp_url.lower() and 'google' not in resp_url.lower():
                                print(f"[SMOOTHPRE] 🎥 Captured video URL: {resp_url[:100]}...")
                                video_urls.append(resp_url)
                        # Also check by content-type
                        elif any(ct in content_type.lower() for ct in ['video/', 'application/x-mpegurl']):
                            if 'smoothpre' not in resp_url.lower():
                                print(f"[SMOOTHPRE] 🎥 Captured video response: {resp_url[:100]}...")
                                video_urls.append(resp_url)
                    except Exception as e:
                        pass  # Ignore interception errors

                page.on('response', handle_response)

                print("[SMOOTHPRE] Navigating to page...")

                try:
                    await page.goto(url, wait_until='networkidle', timeout=30000)
                except Exception as e:
                    print(f"[SMOOTHPRE] Navigation warning: {e}")

                # Wait for page to load
                print("[SMOOTHPRE] Waiting for video player to load...")
                await asyncio.sleep(3)

                # Try to find and click play button
                try:
                    play_selectors = [
                        'button[aria-label="Play"]',
                        '.play-button',
                        'button[class*="play"]',
                        '.jw-icon-display',
                        'video',
                    ]

                    for selector in play_selectors:
                        try:
                            element = await page.query_selector(selector)
                            if element:
                                print(f"[SMOOTHPRE] Found element: {selector}")
                                if 'button' in selector or 'jw' in selector:
                                    await element.click()
                                    await asyncio.sleep(2)
                                break
                        except Exception:
                            continue
                except Exception as e:
                    print(f"[SMOOTHPRE] Play button interaction: {e}")

                # Wait more for network requests
                await asyncio.sleep(4)

                # Try JavaScript extraction - JWPlayer specific
                try:
                    js_code = r"""
                        () => {
                            // Check for JWPlayer setup (primary method for Smoothpre)
                            if (window.jwplayer) {
                                try {
                                    const playlist = window.jwplayer().getPlaylist();
                                    if (playlist && playlist[0] && playlist[0].sources) {
                                        for (let source of playlist[0].sources) {
                                            if (source.file && (source.file.includes('.m3u8') || source.file.includes('.mp4'))) {
                                                return source.file;
                                            }
                                        }
                                    }
                                } catch(e) {}
                            }

                            // Check all video elements
                            const videos = document.querySelectorAll('video');
                            for (let v of videos) {
                                if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
                                    return v.src;
                                }
                                const sources = v.querySelectorAll('source');
                                for (let s of sources) {
                                    if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
                                        return s.src;
                                    }
                                }
                            }

                            // Check window object for video URLs
                            const searchKeys = ['player', 'video', 'source', 'file', 'url', 'jw'];
                            for (let key of searchKeys) {
                                if (window[key] && typeof window[key] === 'object') {
                                    try {
                                        const json = JSON.stringify(window[key]);
                                        const match = json.match(/(https?:\/\/[^\s"\'<>]+\.(m3u8|mp4))/);
                                        if (match) return match[1];
                                    } catch(e) {}
                                }
                            }

                            return null;
                        }
                    """
                    js_result = await page.evaluate(js_code)

                    if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
                        print(f"[SMOOTHPRE] ✅ Found video URL via JavaScript: {js_result[:100]}...")
                        video_urls.append(js_result)
                except Exception as e:
                    print(f"[SMOOTHPRE] JS extraction error: {e}")

                # Parse page HTML for video URLs - enhanced patterns
                try:
                    content = await page.content()
                    patterns = [
                        r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
                        r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
                        r'"source"\s*:\s*"([^"]+\.m3u8[^"]*)"',
                        r'"source"\s*:\s*"([^"]+\.mp4[^"]*)"',
                        r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
                        r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
                        r"url\s*[:=]\s*['\"]([^'\"]+\.m3u8[^'\"]*)['\"]",
                        r"url\s*[:=]\s*['\"]([^'\"]+\.mp4[^'\"]*)['\"]",
                    ]

                    for pattern in patterns:
                        matches = re.findall(pattern, content, re.IGNORECASE)
                        for match in matches:
                            # Clean up the URL
                            match = match.replace('\\/', '/').replace('\\', '')
                            if 'http' in match and 'smoothpre' not in match and 'google' not in match:
                                print(f"[SMOOTHPRE] Found in HTML: {match[:100]}...")
                                video_urls.append(match)
                except Exception as e:
                    print(f"[SMOOTHPRE] HTML parsing error: {e}")

                await browser.close()

                # Return first valid video URL (prefer .m3u8 over .mp4 as it's usually the source)
                if video_urls:
                    seen = set()
                    unique_urls = []
                    for vid_url in video_urls:
                        if vid_url not in seen:
                            seen.add(vid_url)
                            unique_urls.append(vid_url)

                    if unique_urls:
                        # Sort to prefer .m3u8 (source quality)
                        unique_urls.sort(key=lambda x: 0 if '.m3u8' in x else 1)
                        print(f"[SMOOTHPRE] ✅ Found {len(unique_urls)} video URL(s)")
                        print(f"[SMOOTHPRE] Selected: {unique_urls[0][:100]}...")
                        return unique_urls[0]

                print("[SMOOTHPRE] ❌ No video URLs found")
                return None

        except ImportError:
            print("[SMOOTHPRE] ⚠️ Playwright not installed - falling back to HTTP extraction")
            return await self._extract_with_http(url)
        except Exception as e:
            print(f"[SMOOTHPRE] Playwright error: {e}")
            import traceback
            traceback.print_exc()
            # Fallback to HTTP extraction
            return await self._extract_with_http(url)

    async def _extract_with_http(self, url: str) -> str | None:
        """Extract video URL using simple HTTP requests (fallback when Playwright fails)"""
        try:
            print(f"[SMOOTHPRE] Trying HTTP extraction from: {url}")

            response = await self.client.get(url, follow_redirects=True)
            soup = BeautifulSoup(response.text, 'lxml')

            # Method 1: Look for video/source tags
            videos = soup.find_all('video')
            for video in videos:
                src = video.get('src') or video.get('data-src')
                if src and any(ext in src for ext in ['.m3u8', '.mp4']):
                    print(f"[SMOOTHPRE] ✅ Found video in video tag: {src[:100]}...")
                    return src

                sources = video.find_all('source')
                for source in sources:
                    src = source.get('src')
                    if src and any(ext in src for ext in ['.m3u8', '.mp4']):
                        print(f"[SMOOTHPRE] ✅ Found video in source tag: {src[:100]}...")
                        return src

            # Method 2: Look in script tags for JWPlayer configuration
            scripts = soup.find_all('script')
            for script in scripts:
                if script.string:
                    # JWPlayer patterns
                    patterns = [
                        r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
                        r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
                        r'"source"\s*:\s*"([^"]+\.m3u8[^"]*)"',
                        r'"source"\s*:\s*"([^"]+\.mp4[^"]*)"',
                        r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
                        r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
                    ]

                    for pattern in patterns:
                        matches = re.findall(pattern, script.string, re.IGNORECASE)
                        for match in matches:
                            match = match.replace('\\/', '/')
                            if 'http' in match and 'smoothpre' not in match.lower():
                                print(f"[SMOOTHPRE] ✅ Found video in script: {match[:100]}...")
                                return match

            print("[SMOOTHPRE] ❌ HTTP extraction failed - no video URLs found")
            return None

        except Exception as e:
            print(f"[SMOOTHPRE] HTTP extraction error: {e}")
            return None