feat: add multiple video player support for Frieren S2 downloads
- Add Lpayer API decryption using AES (key: kiemtienmua911ca) - Add yt-dlp extraction for bypassing player blocking - Add HTTP 206 support for video validation (Range header) - Add VidMoly .biz domain support (alternative to .to) - Add SendVid extraction (working - downloaded S1 and S2 E1) - Add player fallback system with caching per anime URL - Add video URL validation before returning to downloader - Update HTTP clients with realistic browser headers - Add pycryptodome to requirements.txt - Add test file for fallback system Downloads working: SendVid (primary), Lpayer (403 issue), VidMoly (testing)
This commit is contained in:
@@ -2,6 +2,8 @@ from .base import BaseVideoPlayer
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
import httpx
|
||||
|
||||
|
||||
class LpayerDownloader(BaseVideoPlayer):
|
||||
@@ -10,124 +12,160 @@ class LpayerDownloader(BaseVideoPlayer):
|
||||
def can_handle(self, url: str) -> bool:
|
||||
return 'lpayer.embed4me.com' in url.lower()
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
|
||||
"""
|
||||
Extract download link from Lpayer video page
|
||||
Lpayer uses a React app with dynamic JavaScript - requires Playwright
|
||||
Extract download link from Lpayer video page.
|
||||
Uses Playwright for JavaScript rendering, falls back to HTML parsing.
|
||||
"""
|
||||
try:
|
||||
print(f"[LPAYER] Extracting link from: {url}")
|
||||
|
||||
# Try using Playwright to extract video URL
|
||||
# Try Playwright first (handles JavaScript-rendered pages)
|
||||
video_url = await self._extract_with_playwright(url)
|
||||
|
||||
if not video_url:
|
||||
# Fallback to HTML parsing
|
||||
print("[LPAYER] Playwright failed, trying HTML parsing fallback...")
|
||||
video_url = await self._extract_with_http(url)
|
||||
|
||||
if not video_url:
|
||||
raise Exception("Could not find video URL in Lpayer page")
|
||||
|
||||
print(f"[LPAYER] Found video URL: {video_url[:80]}...")
|
||||
|
||||
# Generate filename
|
||||
filename = "lpayer_video.mp4"
|
||||
# Use target_filename if provided, otherwise generate default
|
||||
if target_filename:
|
||||
filename = target_filename
|
||||
else:
|
||||
filename = "lpayer_video.mp4"
|
||||
|
||||
# Ensure .mp4 extension if direct MP4
|
||||
if video_url.endswith('.mp4') and not filename.endswith('.mp4'):
|
||||
filename += '.mp4'
|
||||
|
||||
return video_url, filename
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error extracting Lpayer link: {str(e)}")
|
||||
|
||||
async def _extract_with_playwright(self, url: str) -> str | None:
|
||||
"""Extract video URL using Playwright with network interception"""
|
||||
async def _extract_with_playwright(self, url: str) -> Optional[str]:
|
||||
"""Extract video URL using Playwright to render JavaScript"""
|
||||
browser = None
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
print("[LPAYER] Launching browser with network interception...")
|
||||
|
||||
print("[LPAYER] Launching Playwright browser...")
|
||||
video_urls = []
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=True,
|
||||
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
|
||||
args=[
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
]
|
||||
)
|
||||
|
||||
context = await browser.new_context(
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
viewport={'width': 1920, 'height': 1080}
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# Set up request interception
|
||||
# Set up request interception to capture video requests
|
||||
async def handle_request(route):
|
||||
req_url = route.request.url
|
||||
|
||||
# Look for video files
|
||||
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
|
||||
if 'lpayer' not in req_url.lower():
|
||||
print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
|
||||
video_urls.append(req_url)
|
||||
|
||||
await route.continue_()
|
||||
|
||||
await page.route('**', handle_request)
|
||||
|
||||
# Navigate to URL with timeout
|
||||
print("[LPAYER] Navigating to page...")
|
||||
|
||||
try:
|
||||
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] Navigation warning: {e}")
|
||||
|
||||
# Wait for page to load
|
||||
# Wait for JavaScript to execute
|
||||
print("[LPAYER] Waiting for video player to load...")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Try to find and click play button
|
||||
# Try to interact with player to trigger video load
|
||||
try:
|
||||
play_selectors = [
|
||||
'button[aria-label="Play"]',
|
||||
'.play-button',
|
||||
'video',
|
||||
]
|
||||
await page.mouse.click(640, 360)
|
||||
await asyncio.sleep(3)
|
||||
except:
|
||||
pass
|
||||
|
||||
for selector in play_selectors:
|
||||
try:
|
||||
element = await page.query_selector(selector)
|
||||
if element:
|
||||
print(f"[LPAYER] Found element: {selector}")
|
||||
if 'button' in selector:
|
||||
await element.click()
|
||||
await asyncio.sleep(3)
|
||||
break
|
||||
except:
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] Play button interaction: {e}")
|
||||
|
||||
# Wait more for network requests
|
||||
await asyncio.sleep(3)
|
||||
|
||||
# Try JavaScript extraction
|
||||
# Try JavaScript extraction to find video URLs in DOM
|
||||
try:
|
||||
js_result = await page.evaluate("""
|
||||
() => {
|
||||
// Check all video elements
|
||||
const videos = document.querySelectorAll('video');
|
||||
for (let v of videos) {
|
||||
if (v.src) {
|
||||
if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
|
||||
console.log('Found video src:', v.src);
|
||||
return v.src;
|
||||
}
|
||||
const sources = v.querySelectorAll('source');
|
||||
for (let s of sources) {
|
||||
if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
|
||||
console.log('Found source src:', s.src);
|
||||
return s.src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check window object for video URLs
|
||||
// Check for jwplayer
|
||||
if (window.jwplayer) {
|
||||
try {
|
||||
const player = jwplayer();
|
||||
const playlist = player.getPlaylist();
|
||||
if (playlist && playlist[0] && playlist[0].sources) {
|
||||
const src = playlist[0].sources[0].file;
|
||||
console.log('Found jwplayer source:', src);
|
||||
return src;
|
||||
}
|
||||
} catch(e) {
|
||||
console.log('jwplayer error:', e);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for VidStack player
|
||||
const player = document.querySelector('media-player');
|
||||
if (player && player.provider) {
|
||||
const provider = player.provider;
|
||||
// Try to get source from provider
|
||||
if (provider.src) return provider.src;
|
||||
if (provider.currentSrc) return provider.currentSrc;
|
||||
if (provider.url) return provider.url;
|
||||
if (provider.videoUrl) return provider.videoUrl;
|
||||
// Check internal properties
|
||||
for (let key in provider) {
|
||||
try {
|
||||
const val = provider[key];
|
||||
if (typeof val === 'string' && (val.includes('.m3u8') || val.includes('.mp4')) && val.startsWith('http')) {
|
||||
return val;
|
||||
}
|
||||
} catch(e) {}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for video URLs in window object
|
||||
for (let key in window) {
|
||||
if (typeof window[key] === 'string') {
|
||||
const str = window[key];
|
||||
if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
|
||||
console.log('Found in window:', str);
|
||||
return str;
|
||||
}
|
||||
}
|
||||
@@ -143,12 +181,14 @@ class LpayerDownloader(BaseVideoPlayer):
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] JS extraction error: {e}")
|
||||
|
||||
# Parse page HTML for video URLs
|
||||
# Final check: parse rendered page HTML
|
||||
try:
|
||||
content = await page.content()
|
||||
patterns = [
|
||||
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
|
||||
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
|
||||
r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
|
||||
r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
|
||||
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
|
||||
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
|
||||
]
|
||||
@@ -156,30 +196,31 @@ class LpayerDownloader(BaseVideoPlayer):
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, content)
|
||||
for match in matches:
|
||||
match = match.replace('\\', '').replace('\/', '/')
|
||||
if 'http' in match and 'lpayer' not in match:
|
||||
match = match.replace('\\', '').replace('\\/', '/')
|
||||
if 'http' in match and 'lpayer' not in match.lower():
|
||||
print(f"[LPAYER] Found in HTML: {match[:100]}...")
|
||||
video_urls.append(match)
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] HTML parsing error: {e}")
|
||||
|
||||
await browser.close()
|
||||
browser = None
|
||||
|
||||
# Return first valid video URL
|
||||
if video_urls:
|
||||
seen = set()
|
||||
unique_urls = []
|
||||
for url in video_urls:
|
||||
if url not in seen:
|
||||
seen.add(url)
|
||||
unique_urls.append(url)
|
||||
# Return first valid video URL
|
||||
if video_urls:
|
||||
seen = set()
|
||||
unique_urls = []
|
||||
for url in video_urls:
|
||||
if url not in seen:
|
||||
seen.add(url)
|
||||
unique_urls.append(url)
|
||||
|
||||
if unique_urls:
|
||||
print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
|
||||
return unique_urls[0]
|
||||
if unique_urls:
|
||||
print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
|
||||
return unique_urls[0]
|
||||
|
||||
print("[LPAYER] ❌ No video URLs found")
|
||||
return None
|
||||
print("[LPAYER] ❌ No video URLs found")
|
||||
return None
|
||||
|
||||
except ImportError:
|
||||
print("[LPAYER] Playwright not installed")
|
||||
@@ -189,3 +230,242 @@ class LpayerDownloader(BaseVideoPlayer):
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
finally:
|
||||
# Ensure browser is always closed
|
||||
if browser:
|
||||
try:
|
||||
await browser.close()
|
||||
except:
|
||||
pass
|
||||
"""Extract video URL using Playwright to render JavaScript"""
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
print("[LPAYER] Launching Playwright browser...")
|
||||
video_urls = []
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=True,
|
||||
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
|
||||
)
|
||||
|
||||
context = await browser.new_context(
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||||
viewport={'width': 1920, 'height': 1080}
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# Set up request interception to capture video requests
|
||||
async def handle_request(route):
|
||||
req_url = route.request.url
|
||||
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
|
||||
if 'lpayer' not in req_url.lower():
|
||||
print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
|
||||
video_urls.append(req_url)
|
||||
await route.continue_()
|
||||
|
||||
await page.route('**', handle_request)
|
||||
|
||||
# Navigate to URL with timeout
|
||||
print("[LPAYER] Navigating to page...")
|
||||
try:
|
||||
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] Navigation warning: {e}")
|
||||
|
||||
# Wait for JavaScript to execute and video to load
|
||||
print("[LPAYER] Waiting for video player to load...")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Try JavaScript extraction to find video URLs in DOM
|
||||
try:
|
||||
js_result = await page.evaluate("""
|
||||
() => {
|
||||
// Check all video elements
|
||||
const videos = document.querySelectorAll('video');
|
||||
for (let v of videos) {
|
||||
if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
|
||||
console.log('Found video src:', v.src);
|
||||
return v.src;
|
||||
}
|
||||
const sources = v.querySelectorAll('source');
|
||||
for (let s of sources) {
|
||||
if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
|
||||
console.log('Found source src:', s.src);
|
||||
return s.src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for jwplayer
|
||||
if (window.jwplayer) {
|
||||
try {
|
||||
const player = jwplayer();
|
||||
const playlist = player.getPlaylist();
|
||||
if (playlist && playlist[0] && playlist[0].sources) {
|
||||
const src = playlist[0].sources[0].file;
|
||||
console.log('Found jwplayer source:', src);
|
||||
return src;
|
||||
}
|
||||
} catch(e) {
|
||||
console.log('jwplayer error:', e);
|
||||
}
|
||||
}
|
||||
|
||||
// Look for video URLs in window object
|
||||
for (let key in window) {
|
||||
if (typeof window[key] === 'string') {
|
||||
const str = window[key];
|
||||
if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
|
||||
console.log('Found in window:', str);
|
||||
return str;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
""")
|
||||
|
||||
if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
|
||||
print(f"[LPAYER] Found video URL via JavaScript")
|
||||
video_urls.append(js_result)
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] JS extraction error: {e}")
|
||||
|
||||
# Final check: parse rendered page HTML
|
||||
try:
|
||||
content = await page.content()
|
||||
patterns = [
|
||||
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
|
||||
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
|
||||
r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
|
||||
r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
|
||||
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
|
||||
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, content)
|
||||
for match in matches:
|
||||
match = match.replace('\\', '').replace('\\/', '/')
|
||||
if 'http' in match and 'lpayer' not in match.lower():
|
||||
print(f"[LPAYER] Found in HTML: {match[:100]}...")
|
||||
video_urls.append(match)
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] HTML parsing error: {e}")
|
||||
|
||||
await browser.close()
|
||||
|
||||
# Return first valid video URL
|
||||
if video_urls:
|
||||
seen = set()
|
||||
unique_urls = []
|
||||
for url in video_urls:
|
||||
if url not in seen:
|
||||
seen.add(url)
|
||||
unique_urls.append(url)
|
||||
|
||||
if unique_urls:
|
||||
print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
|
||||
return unique_urls[0]
|
||||
|
||||
print("[LPAYER] ❌ No video URLs found")
|
||||
return None
|
||||
|
||||
except ImportError:
|
||||
print("[LPAYER] Playwright not installed")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] Playwright error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
async def _extract_with_http(self, url: str) -> Optional[str]:
|
||||
"""Fallback: Extract video source using pure HTTP requests"""
|
||||
try:
|
||||
response = await self.client.get(url)
|
||||
response.raise_for_status()
|
||||
html_content = response.text
|
||||
return self._extract_video_from_html(html_content)
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] HTTP extraction error: {e}")
|
||||
return None
|
||||
|
||||
def _extract_video_from_html(self, html_content: str) -> Optional[str]:
|
||||
"""
|
||||
Extract video URL from HTML using BeautifulSoup parsing
|
||||
|
||||
Looks for video URLs in this priority:
|
||||
1. <video src="URL"> tags
|
||||
2. <source src="URL"> tags
|
||||
3. Direct URLs in page content with video extensions (.mp4, .m3u8)
|
||||
|
||||
Returns first valid URL found, or None if not found
|
||||
"""
|
||||
try:
|
||||
soup = BeautifulSoup(html_content, 'lxml')
|
||||
|
||||
# Priority 1: Look for <video src="..."> tags
|
||||
video_tags = soup.find_all('video')
|
||||
for video in video_tags:
|
||||
src = video.get('src')
|
||||
if src and self._is_valid_video_url(src):
|
||||
print(f"[LPAYER] Found video in <video> tag: {src[:80]}...")
|
||||
return src
|
||||
|
||||
# Priority 2: Look for <source src="..."> tags
|
||||
source_tags = soup.find_all('source')
|
||||
for source in source_tags:
|
||||
src = source.get('src')
|
||||
if src and self._is_valid_video_url(src):
|
||||
print(f"[LPAYER] Found video in <source> tag: {src[:80]}...")
|
||||
return src
|
||||
|
||||
# Priority 3: Look for direct URLs in page content
|
||||
patterns = [
|
||||
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
|
||||
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
|
||||
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
|
||||
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, html_content)
|
||||
for match in matches:
|
||||
match = match.replace('\\', '').replace(r'\/', '/')
|
||||
if self._is_valid_video_url(match):
|
||||
print(f"[LPAYER] Found video in content: {match[:80]}...")
|
||||
return match
|
||||
|
||||
print("[LPAYER] No video URL found in HTML")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"[LPAYER] HTML parsing error: {e}")
|
||||
return None
|
||||
|
||||
def _is_valid_video_url(self, url: str) -> bool:
|
||||
"""
|
||||
Check if URL is a valid video URL
|
||||
|
||||
Valid if:
|
||||
- Starts with http:// or https://
|
||||
- Contains .mp4 or .m3u8 extension
|
||||
"""
|
||||
if not url:
|
||||
return False
|
||||
|
||||
# Must be http(s) URL
|
||||
if not url.startswith('http'):
|
||||
return False
|
||||
|
||||
# Must contain video extension
|
||||
url_lower = url.lower()
|
||||
if '.mp4' not in url_lower and '.m3u8' not in url_lower:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user