feat: add multiple video player support for Frieren S2 downloads

- Add Lpayer API decryption using AES (key: kiemtienmua911ca) - Add yt-dlp extraction for bypassing player blocking - Add HTTP 206 support for video validation (Range header) - Add VidMoly .biz domain support (alternative to .to) - Add SendVid extraction (working - downloaded S1 and S2 E1) - Add player fallback system with caching per anime URL - Add video URL validation before returning to downloader - Update HTTP clients with realistic browser headers - Add pycryptodome to requirements.txt - Add test file for fallback system Downloads working: SendVid (primary), Lpayer (403 issue), VidMoly (testing)
2026-02-25 16:29:53 +00:00
parent 8b7a419b4c
commit 3cf2f8eca5
9 changed files with 1370 additions and 184 deletions
@@ -2,6 +2,8 @@ from .base import BaseVideoPlayer
 from bs4 import BeautifulSoup
 import re
 import asyncio
+from typing import Optional
+import httpx


 class LpayerDownloader(BaseVideoPlayer):
@@ -10,124 +12,160 @@ class LpayerDownloader(BaseVideoPlayer):
    def can_handle(self, url: str) -> bool:
        return 'lpayer.embed4me.com' in url.lower()

-    async def get_download_link(self, url: str) -> tuple[str, str]:
+    async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
        """
-        Extract download link from Lpayer video page
-        Lpayer uses a React app with dynamic JavaScript - requires Playwright
+        Extract download link from Lpayer video page.
+        Uses Playwright for JavaScript rendering, falls back to HTML parsing.
        """
        try:
            print(f"[LPAYER] Extracting link from: {url}")

-            # Try using Playwright to extract video URL
+            # Try Playwright first (handles JavaScript-rendered pages)
            video_url = await self._extract_with_playwright(url)

+            if not video_url:
+                # Fallback to HTML parsing
+                print("[LPAYER] Playwright failed, trying HTML parsing fallback...")
+                video_url = await self._extract_with_http(url)
+
            if not video_url:
                raise Exception("Could not find video URL in Lpayer page")

            print(f"[LPAYER] Found video URL: {video_url[:80]}...")

-            # Generate filename
-            filename = "lpayer_video.mp4"
+            # Use target_filename if provided, otherwise generate default
+            if target_filename:
+                filename = target_filename
+            else:
+                filename = "lpayer_video.mp4"
+
+            # Ensure .mp4 extension if direct MP4
+            if video_url.endswith('.mp4') and not filename.endswith('.mp4'):
+                filename += '.mp4'

            return video_url, filename

        except Exception as e:
            raise Exception(f"Error extracting Lpayer link: {str(e)}")

-    async def _extract_with_playwright(self, url: str) -> str | None:
-        """Extract video URL using Playwright with network interception"""
+    async def _extract_with_playwright(self, url: str) -> Optional[str]:
+        """Extract video URL using Playwright to render JavaScript"""
+        browser = None
        try:
            from playwright.async_api import async_playwright

-            print("[LPAYER] Launching browser with network interception...")
-
+            print("[LPAYER] Launching Playwright browser...")
            video_urls = []

            async with async_playwright() as p:
                browser = await p.chromium.launch(
                    headless=True,
-                    args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
+                    args=[
+                        '--no-sandbox',
+                        '--disable-setuid-sandbox',
+                        '--disable-dev-shm-usage',
+                        '--disable-blink-features=AutomationControlled',
+                        '--disable-features=IsolateOrigins,site-per-process',
+                    ]
                )

                context = await browser.new_context(
-                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
+                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+                    viewport={'width': 1920, 'height': 1080}
                )

                page = await context.new_page()

-                # Set up request interception
+                # Set up request interception to capture video requests
                async def handle_request(route):
                    req_url = route.request.url
-
-                    # Look for video files
                    if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
                        if 'lpayer' not in req_url.lower():
                            print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
                            video_urls.append(req_url)
-
                    await route.continue_()

                await page.route('**', handle_request)

+                # Navigate to URL with timeout
                print("[LPAYER] Navigating to page...")
-
                try:
                    await page.goto(url, wait_until='domcontentloaded', timeout=30000)
                except Exception as e:
                    print(f"[LPAYER] Navigation warning: {e}")

-                # Wait for page to load
+                # Wait for JavaScript to execute
                print("[LPAYER] Waiting for video player to load...")
                await asyncio.sleep(5)

-                # Try to find and click play button
+                # Try to interact with player to trigger video load
                try:
-                    play_selectors = [
-                        'button[aria-label="Play"]',
-                        '.play-button',
-                        'video',
-                    ]
+                    await page.mouse.click(640, 360)
+                    await asyncio.sleep(3)
+                except:
+                    pass

-                    for selector in play_selectors:
-                        try:
-                            element = await page.query_selector(selector)
-                            if element:
-                                print(f"[LPAYER] Found element: {selector}")
-                                if 'button' in selector:
-                                    await element.click()
-                                    await asyncio.sleep(3)
-                                break
-                        except:
-                            continue
-                except Exception as e:
-                    print(f"[LPAYER] Play button interaction: {e}")
-
-                # Wait more for network requests
-                await asyncio.sleep(3)
-
-                # Try JavaScript extraction
+                # Try JavaScript extraction to find video URLs in DOM
                try:
                    js_result = await page.evaluate("""
                        () => {
                            // Check all video elements
                            const videos = document.querySelectorAll('video');
                            for (let v of videos) {
-                                if (v.src) {
+                                if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
+                                    console.log('Found video src:', v.src);
                                    return v.src;
                                }
                                const sources = v.querySelectorAll('source');
                                for (let s of sources) {
                                    if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
+                                        console.log('Found source src:', s.src);
                                        return s.src;
                                    }
                                }
                            }

-                            // Check window object for video URLs
+                            // Check for jwplayer
+                            if (window.jwplayer) {
+                                try {
+                                    const player = jwplayer();
+                                    const playlist = player.getPlaylist();
+                                    if (playlist && playlist[0] && playlist[0].sources) {
+                                        const src = playlist[0].sources[0].file;
+                                        console.log('Found jwplayer source:', src);
+                                        return src;
+                                    }
+                                } catch(e) {
+                                    console.log('jwplayer error:', e);
+                                }
+                            }
+
+                            // Check for VidStack player
+                            const player = document.querySelector('media-player');
+                            if (player && player.provider) {
+                                const provider = player.provider;
+                                // Try to get source from provider
+                                if (provider.src) return provider.src;
+                                if (provider.currentSrc) return provider.currentSrc;
+                                if (provider.url) return provider.url;
+                                if (provider.videoUrl) return provider.videoUrl;
+                                // Check internal properties
+                                for (let key in provider) {
+                                    try {
+                                        const val = provider[key];
+                                        if (typeof val === 'string' && (val.includes('.m3u8') || val.includes('.mp4')) && val.startsWith('http')) {
+                                            return val;
+                                        }
+                                    } catch(e) {}
+                                }
+                            }
+
+                            // Look for video URLs in window object
                            for (let key in window) {
                                if (typeof window[key] === 'string') {
                                    const str = window[key];
                                    if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
+                                        console.log('Found in window:', str);
                                        return str;
                                    }
                                }
@@ -143,12 +181,14 @@ class LpayerDownloader(BaseVideoPlayer):
                except Exception as e:
                    print(f"[LPAYER] JS extraction error: {e}")

-                # Parse page HTML for video URLs
+                # Final check: parse rendered page HTML
                try:
                    content = await page.content()
                    patterns = [
                        r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
                        r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
+                        r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
+                        r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
                        r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
                        r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
                    ]
@@ -156,30 +196,31 @@ class LpayerDownloader(BaseVideoPlayer):
                    for pattern in patterns:
                        matches = re.findall(pattern, content)
                        for match in matches:
-                            match = match.replace('\\', '').replace('\/', '/')
-                            if 'http' in match and 'lpayer' not in match:
+                            match = match.replace('\\', '').replace('\\/', '/')
+                            if 'http' in match and 'lpayer' not in match.lower():
                                print(f"[LPAYER] Found in HTML: {match[:100]}...")
                                video_urls.append(match)
                except Exception as e:
                    print(f"[LPAYER] HTML parsing error: {e}")

                await browser.close()
+                browser = None

-                # Return first valid video URL
-                if video_urls:
-                    seen = set()
-                    unique_urls = []
-                    for url in video_urls:
-                        if url not in seen:
-                            seen.add(url)
-                            unique_urls.append(url)
+            # Return first valid video URL
+            if video_urls:
+                seen = set()
+                unique_urls = []
+                for url in video_urls:
+                    if url not in seen:
+                        seen.add(url)
+                        unique_urls.append(url)

-                    if unique_urls:
-                        print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
-                        return unique_urls[0]
+                if unique_urls:
+                    print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
+                    return unique_urls[0]

-                print("[LPAYER] ❌ No video URLs found")
-                return None
+            print("[LPAYER] ❌ No video URLs found")
+            return None

        except ImportError:
            print("[LPAYER] Playwright not installed")
@@ -189,3 +230,242 @@ class LpayerDownloader(BaseVideoPlayer):
            import traceback
            traceback.print_exc()
            return None
+        finally:
+            # Ensure browser is always closed
+            if browser:
+                try:
+                    await browser.close()
+                except:
+                    pass
+        """Extract video URL using Playwright to render JavaScript"""
+        try:
+            from playwright.async_api import async_playwright
+
+            print("[LPAYER] Launching Playwright browser...")
+            video_urls = []
+
+            async with async_playwright() as p:
+                browser = await p.chromium.launch(
+                    headless=True,
+                    args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
+                )
+
+                context = await browser.new_context(
+                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
+                    viewport={'width': 1920, 'height': 1080}
+                )
+
+                page = await context.new_page()
+
+                # Set up request interception to capture video requests
+                async def handle_request(route):
+                    req_url = route.request.url
+                    if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
+                        if 'lpayer' not in req_url.lower():
+                            print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
+                            video_urls.append(req_url)
+                    await route.continue_()
+
+                await page.route('**', handle_request)
+
+                # Navigate to URL with timeout
+                print("[LPAYER] Navigating to page...")
+                try:
+                    await page.goto(url, wait_until='domcontentloaded', timeout=30000)
+                except Exception as e:
+                    print(f"[LPAYER] Navigation warning: {e}")
+
+                # Wait for JavaScript to execute and video to load
+                print("[LPAYER] Waiting for video player to load...")
+                await asyncio.sleep(5)
+
+                # Try JavaScript extraction to find video URLs in DOM
+                try:
+                    js_result = await page.evaluate("""
+                        () => {
+                            // Check all video elements
+                            const videos = document.querySelectorAll('video');
+                            for (let v of videos) {
+                                if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
+                                    console.log('Found video src:', v.src);
+                                    return v.src;
+                                }
+                                const sources = v.querySelectorAll('source');
+                                for (let s of sources) {
+                                    if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
+                                        console.log('Found source src:', s.src);
+                                        return s.src;
+                                    }
+                                }
+                            }
+
+                            // Check for jwplayer
+                            if (window.jwplayer) {
+                                try {
+                                    const player = jwplayer();
+                                    const playlist = player.getPlaylist();
+                                    if (playlist && playlist[0] && playlist[0].sources) {
+                                        const src = playlist[0].sources[0].file;
+                                        console.log('Found jwplayer source:', src);
+                                        return src;
+                                    }
+                                } catch(e) {
+                                    console.log('jwplayer error:', e);
+                                }
+                            }
+
+                            // Look for video URLs in window object
+                            for (let key in window) {
+                                if (typeof window[key] === 'string') {
+                                    const str = window[key];
+                                    if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
+                                        console.log('Found in window:', str);
+                                        return str;
+                                    }
+                                }
+                            }
+
+                            return null;
+                        }
+                    """)
+
+                    if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
+                        print(f"[LPAYER] Found video URL via JavaScript")
+                        video_urls.append(js_result)
+                except Exception as e:
+                    print(f"[LPAYER] JS extraction error: {e}")
+
+                # Final check: parse rendered page HTML
+                try:
+                    content = await page.content()
+                    patterns = [
+                        r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
+                        r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
+                        r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
+                        r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
+                        r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
+                        r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
+                    ]
+
+                    for pattern in patterns:
+                        matches = re.findall(pattern, content)
+                        for match in matches:
+                            match = match.replace('\\', '').replace('\\/', '/')
+                            if 'http' in match and 'lpayer' not in match.lower():
+                                print(f"[LPAYER] Found in HTML: {match[:100]}...")
+                                video_urls.append(match)
+                except Exception as e:
+                    print(f"[LPAYER] HTML parsing error: {e}")
+
+                await browser.close()
+
+            # Return first valid video URL
+            if video_urls:
+                seen = set()
+                unique_urls = []
+                for url in video_urls:
+                    if url not in seen:
+                        seen.add(url)
+                        unique_urls.append(url)
+
+                if unique_urls:
+                    print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
+                    return unique_urls[0]
+
+            print("[LPAYER] ❌ No video URLs found")
+            return None
+
+        except ImportError:
+            print("[LPAYER] Playwright not installed")
+            return None
+        except Exception as e:
+            print(f"[LPAYER] Playwright error: {e}")
+            import traceback
+            traceback.print_exc()
+            return None
+
+    async def _extract_with_http(self, url: str) -> Optional[str]:
+        """Fallback: Extract video source using pure HTTP requests"""
+        try:
+            response = await self.client.get(url)
+            response.raise_for_status()
+            html_content = response.text
+            return self._extract_video_from_html(html_content)
+        except Exception as e:
+            print(f"[LPAYER] HTTP extraction error: {e}")
+            return None
+
+    def _extract_video_from_html(self, html_content: str) -> Optional[str]:
+        """
+        Extract video URL from HTML using BeautifulSoup parsing
+        
+        Looks for video URLs in this priority:
+        1. <video src="URL"> tags
+        2. <source src="URL"> tags
+        3. Direct URLs in page content with video extensions (.mp4, .m3u8)
+        
+        Returns first valid URL found, or None if not found
+        """
+        try:
+            soup = BeautifulSoup(html_content, 'lxml')
+
+            # Priority 1: Look for <video src="..."> tags
+            video_tags = soup.find_all('video')
+            for video in video_tags:
+                src = video.get('src')
+                if src and self._is_valid_video_url(src):
+                    print(f"[LPAYER] Found video in <video> tag: {src[:80]}...")
+                    return src
+
+            # Priority 2: Look for <source src="..."> tags
+            source_tags = soup.find_all('source')
+            for source in source_tags:
+                src = source.get('src')
+                if src and self._is_valid_video_url(src):
+                    print(f"[LPAYER] Found video in <source> tag: {src[:80]}...")
+                    return src
+
+            # Priority 3: Look for direct URLs in page content
+            patterns = [
+                r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
+                r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
+                r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
+                r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
+            ]
+
+            for pattern in patterns:
+                matches = re.findall(pattern, html_content)
+                for match in matches:
+                    match = match.replace('\\', '').replace(r'\/', '/')
+                    if self._is_valid_video_url(match):
+                        print(f"[LPAYER] Found video in content: {match[:80]}...")
+                        return match
+
+            print("[LPAYER] No video URL found in HTML")
+            return None
+
+        except Exception as e:
+            print(f"[LPAYER] HTML parsing error: {e}")
+            return None
+
+    def _is_valid_video_url(self, url: str) -> bool:
+        """
+        Check if URL is a valid video URL
+        
+        Valid if:
+        - Starts with http:// or https://
+        - Contains .mp4 or .m3u8 extension
+        """
+        if not url:
+            return False
+
+        # Must be http(s) URL
+        if not url.startswith('http'):
+            return False
+
+        # Must contain video extension
+        url_lower = url.lower()
+        if '.mp4' not in url_lower and '.m3u8' not in url_lower:
+            return False
+
+        return True