feat: add multiple video player support for Frieren S2 downloads

- Add Lpayer API decryption using AES (key: kiemtienmua911ca)
- Add yt-dlp extraction for bypassing player blocking
- Add HTTP 206 support for video validation (Range header)
- Add VidMoly .biz domain support (alternative to .to)
- Add SendVid extraction (working - downloaded S1 and S2 E1)
- Add player fallback system with caching per anime URL
- Add video URL validation before returning to downloader
- Update HTTP clients with realistic browser headers
- Add pycryptodome to requirements.txt
- Add test file for fallback system

Downloads working: SendVid (primary), Lpayer (403 issue), VidMoly (testing)
This commit is contained in:
root
2026-02-25 16:29:53 +00:00
parent 8b7a419b4c
commit 3cf2f8eca5
9 changed files with 1370 additions and 184 deletions
+4 -1
View File
@@ -63,7 +63,10 @@ class GenericDownloader(BaseDownloader):
def can_handle(self, url: str) -> bool: def can_handle(self, url: str) -> bool:
return True return True
async def get_download_link(self, url: str) -> tuple[str, str]: async def get_download_link(self, url: str, target_filename: str = None) -> tuple[str, str]:
# Just return the URL as-is
filename = target_filename or url.split('/')[-1] or "download"
return url, filename
# Just return the URL as-is # Just return the URL as-is
filename = url.split('/')[-1] or "download" filename = url.split('/')[-1] or "download"
return url, filename return url, filename
+599 -24
View File
@@ -1,12 +1,33 @@
from .base import BaseAnimeSite from .base import BaseAnimeSite
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re
import subprocess
import json
import httpx import httpx
import logging import logging
from typing import Optional
from urllib.parse import urljoin, unquote from urllib.parse import urljoin, unquote
import binascii
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Lpayer encryption key (from Anime-Sama-Downloader project)
LPAYER_KEY = b"kiemtienmua911ca"
LPAYER_IV = b"1234567890oiuytr"
def _decrypt_lpayer(hex_str: str) -> Optional[str]:
"""Decrypt Lpayer video URL using AES"""
try:
data = binascii.unhexlify(hex_str)
cipher = AES.new(LPAYER_KEY, AES.MODE_CBC, LPAYER_IV)
decrypted = unpad(cipher.decrypt(data), AES.block_size)
return decrypted.decode('utf-8')
except Exception:
return None
class AnimeSamaDownloader(BaseAnimeSite): class AnimeSamaDownloader(BaseAnimeSite):
"""Downloader for anime-sama.org / anime-sama.store""" """Downloader for anime-sama.org / anime-sama.store"""
@@ -14,6 +35,11 @@ class AnimeSamaDownloader(BaseAnimeSite):
# Static list of known domains (will be updated dynamically) # Static list of known domains (will be updated dynamically)
BASE_DOMAINS = ["anime-sama.tv", "anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"] BASE_DOMAINS = ["anime-sama.tv", "anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
def __init__(self):
"""Initialize AnimeSamaDownloader with working player cache"""
super().__init__() # Call parent __init__ to initialize client
self._working_players = {} # Cache: anime_url -> working player name
@classmethod @classmethod
async def get_current_domain(cls) -> str: async def get_current_domain(cls) -> str:
""" """
@@ -84,7 +110,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
def can_handle(self, url: str) -> bool: def can_handle(self, url: str) -> bool:
return any(domain in url.lower() for domain in self.BASE_DOMAINS) return any(domain in url.lower() for domain in self.BASE_DOMAINS)
async def get_download_link(self, url: str) -> tuple[str, str]: async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
""" """
Extract download link from anime-sama URL Extract download link from anime-sama URL
Anime-Sama uses third-party video hosts (vidmoly, etc.) Anime-Sama uses third-party video hosts (vidmoly, etc.)
@@ -93,6 +119,18 @@ class AnimeSamaDownloader(BaseAnimeSite):
try: try:
logger.debug(f"Extracting link from: {url}") logger.debug(f"Extracting link from: {url}")
# Check if URL is a direct video URL (.mp4, .m3u8, .mkv)
# If so, return it directly without extraction
if url.endswith('.mp4') or url.endswith('.m3u8') or url.endswith('.mkv'):
# Extract filename from URL
from urllib.parse import urlparse, unquote
parsed = urlparse(url)
path = unquote(parsed.path)
filename = path.split('/')[-1] if path.split('/')[-1] else "direct_video.mp4"
logger.info(f"Direct video URL detected: {url[:60]}... -> {filename}")
return url, filename
# Check if URL contains the anime page context (format: video_url|anime_page_url|episode_title?) # Check if URL contains the anime page context (format: video_url|anime_page_url|episode_title?)
if '|' in url: if '|' in url:
parts = url.split('|') parts = url.split('|')
@@ -102,29 +140,43 @@ class AnimeSamaDownloader(BaseAnimeSite):
logger.debug(f"Split URL - video: {video_url[:60]}..., anime: {anime_page_url}, episode: {episode_title}") logger.debug(f"Split URL - video: {video_url[:60]}..., anime: {anime_page_url}, episode: {episode_title}")
# Extract video from the host URL with anime context for filename # Use fallback method for pipe-separated URLs (tries multiple players)
if 'vidmoly.to' in video_url or 'vidmoly' in video_url: return await self.get_download_link_with_fallback(
return await self._extract_from_vidmoly(video_url, anime_page_url, episode_title) video_url,
elif 'sendvid.com' in video_url: anime_page_url=anime_page_url,
return await self._extract_from_sendvid(video_url, anime_page_url, episode_title) episode_title=episode_title
elif 'sibnet.ru' in video_url: )
return await self._extract_from_sibnet(video_url, anime_page_url, episode_title)
elif 'lpayer.embed4me.com' in video_url or 'lpayer' in video_url:
return await self._extract_from_lpayer(video_url, anime_page_url, episode_title)
else:
# Try to extract from other hosts
if episode_title:
filename = f"{self._generate_anime_name(anime_page_url)} - {episode_title}.mp4"
else:
filename = self._generate_filename_from_anime_url(anime_page_url)
return video_url, filename
# Check if this is a third-party host URL # Check if this is a third-party host URL
if 'vidmoly.to' in url or 'vidmoly' in url: if 'vidmoly.to' in url or 'vidmoly.biz' in url or 'vidmoly' in url:
return await self._extract_from_vidmoly(url) return await self._extract_from_vidmoly(url)
# Handle direct Lpayer URLs (not embedded in anime-sama pages)
elif 'lpayer.' in url and url.startswith('https://lpayer.embed4me.com/'):
# Direct video URL - return with fixed filename
logger.info(f"Using direct Lpayer URL: {url[:80]}...")
return url, "lpayer_video.mp4"
# Handle Lpayer embedded pages (non-direct URLs)
elif 'lpayer.' in url:
# Embedded page - use fallback
logger.info(f"Using fallback for Lpayer embedded page: {url[:80]}...")
return await self.get_download_link_with_fallback(
url,
anime_page_url=url,
episode_title=None
)
# If it's an anime-sama page, try to find the video # If it's an anime-sama page, try to find the video
if 'anime-sama' in url.lower(): if 'anime-sama' in url.lower():
if 'dingtez' in url or 'dingz' in url:
return await self._extract_from_dingetz(url)
elif 'wupstream' in url or 'wup' in url:
return await self._extract_from_wupstream(url)
elif 'doodstream' in url or 'dood' in url:
return await self._extract_from_doodstream(url)
elif 'streamtape' in url:
return await self._extract_from_streamtape(url)
elif 'voe' in url:
return await self._extract_from_voe(url)
logger.debug(f"Processing anime-sama page: {url}") logger.debug(f"Processing anime-sama page: {url}")
response = await self.client.get(url, follow_redirects=True) response = await self.client.get(url, follow_redirects=True)
final_url = str(response.url) final_url = str(response.url)
@@ -437,6 +489,77 @@ class AnimeSamaDownloader(BaseAnimeSite):
# Re-raise with clearer message # Re-raise with clearer message
raise Exception(f"Lpayer player not supported - this video host requires manual download. Try another host (VidMoly, SendVid, Sibnet). Error: {str(e)}") raise Exception(f"Lpayer player not supported - this video host requires manual download. Try another host (VidMoly, SendVid, Sibnet). Error: {str(e)}")
async def _extract_from_lpayer_api(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
"""Extract video URL from Lplayer using API decryption"""
import requests
# Extract video ID from URL
match = re.search(r'#([a-zA-Z0-9]+)', url)
if not match:
match = re.search(r'[?&]id=([a-zA-Z0-9]+)', url)
if not match:
raise Exception("Could not extract Lplayer video ID")
video_id = match.group(1)
api_url = f"https://lpayer.embed4me.com/api/v1/video?id={video_id}&w=1920&h=1080&r=https://lpayer.embed4me.com/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
"Referer": "https://lpayer.embed4me.com/"
}
response = requests.get(api_url, headers=headers, timeout=30)
if response.status_code != 200:
raise Exception(f"Lplayer API returned {response.status_code}")
hex_data = response.text.strip()
if hex_data.startswith('"') and hex_data.endswith('"'):
hex_data = hex_data[1:-1]
decrypted = _decrypt_lpayer(hex_data)
if not decrypted:
raise Exception("Failed to decrypt Lplayer response")
data = json.loads(decrypted)
m3u8_url = data.get('source')
if not m3u8_url:
raise Exception("No source found in Lplayer response")
# Use yt-dlp to get direct video URL from m3u8
cmd = [
'yt-dlp',
'--referer', 'https://lpayer.embed4me.com/',
'--skip-download',
'--dump-json',
'--no-warnings',
m3u8_url
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode == 0 and result.stdout:
yt_data = json.loads(result.stdout)
if 'formats' in yt_data:
# Get best mp4 format
formats = yt_data['formats']
mp4_formats = [f for f in formats if f.get('ext') == 'mp4']
if mp4_formats:
video_url = mp4_formats[0].get('url')
else:
video_url = formats[0].get('url')
else:
video_url = yt_data.get('url')
if video_url:
filename = f"lpayer_{video_id}.mp4"
return video_url, filename
# If yt-dlp fails, return m3u8 URL anyway (let download manager handle it)
filename = f"lpayer_{video_id}.mp4"
return m3u8_url, filename
async def _extract_from_player(self, player_url: str) -> str | None: async def _extract_from_player(self, player_url: str) -> str | None:
"""Try to extract direct video URL from player iframe""" """Try to extract direct video URL from player iframe"""
try: try:
@@ -744,6 +867,259 @@ class AnimeSamaDownloader(BaseAnimeSite):
traceback.print_exc() traceback.print_exc()
return [] return []
async def _test_video_url(self, url: str) -> bool:
"""
Validate a video URL by downloading the first 10KB.
Returns True if HTTP 200 and valid data received, False otherwise.
Includes 10 second timeout handling.
"""
try:
logger.debug(f"Testing video URL: {url[:60]}...")
# Stream only first 10KB to validate the URL
response = await self.client.get(
url,
timeout=10.0,
headers={"Range": "bytes=0-10240"}
)
if response.status_code in (200, 206):
content_length = len(response.content)
if content_length > 0:
logger.info(f"Video URL validation SUCCESS: {url[:60]}... ({content_length} bytes)")
return True
else:
logger.warning(f"Video URL validation FAILED: Empty response for {url[:60]}...")
return False
else:
logger.warning(f"Video URL validation FAILED: HTTP {response.status_code} for {url[:60]}...")
return False
except httpx.TimeoutException:
logger.warning(f"Video URL validation FAILED: Timeout for {url[:60]}...")
return False
except httpx.ConnectError as e:
logger.warning(f"Video URL validation FAILED: Connection error for {url[:60]}...: {e}")
return False
except Exception as e:
logger.warning(f"Video URL validation FAILED: Error for {url[:60]}...: {e}")
return False
async def _extract_with_ytdlp(self, url: str, provider: str = None) -> tuple[str, str]:
"""
Extract video URL using yt-dlp with proper referer.
This bypasses many blocking mechanisms.
"""
# Define referers for each provider
referers = {
'sendvid': 'https://sendvid.com/',
'vidmoly': 'https://vidmoly.biz/',
'sibnet': 'https://video.sibnet.ru/',
'lpayer': 'https://lpayer.embed4me.com/',
'dingtez': 'https://anime-sama.tv/',
'streamtape': 'https://streamtape.com/',
'voe': 'https://voe.sx/',
'doodstream': 'https://doodstream.com/',
}
# Determine referer
referer = 'https://anime-sama.tv/'
if provider:
referer = referers.get(provider.lower(), referer)
else:
for prov, ref in referers.items():
if prov in url.lower():
referer = ref
break
try:
cmd = [
'yt-dlp',
'--referer', referer,
'--skip-download',
'--dump-json',
'--no-warnings',
url
]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=30
)
if result.returncode == 0 and result.stdout:
data = json.loads(result.stdout)
if 'formats' in data:
formats = data['formats']
mp4_formats = [f for f in formats if f.get('ext') == 'mp4']
if mp4_formats:
video_url = mp4_formats[0].get('url')
else:
video_url = formats[0].get('url')
else:
video_url = data.get('url')
if video_url:
return video_url, f"{provider}_video.mp4" if provider else "video.mp4"
raise Exception(f"yt-dlp failed: {result.stderr}")
except subprocess.TimeoutExpired:
raise Exception("yt-dlp extraction timeout")
except json.JSONDecodeError:
raise Exception("yt-dlp returned invalid JSON")
async def get_download_link_with_fallback(
self,
url: str,
target_filename: Optional[str] = None,
anime_page_url: Optional[str] = None,
episode_title: Optional[str] = None
) -> tuple[str, str]:
"""
Extract download link with fallback to multiple players and URLs.
URL format: url1|url2|url3|anime_page_url|episode_title
Player priority: detected from URL -> cached -> vidmoly -> sendvid -> sibnet -> lpayer
Uses caching to remember working players per anime URL.
Validates each URL with _test_video_url() before returning.
Args:
url: Video player URL or pipe-separated URLs
target_filename: Optional target filename for the download
anime_page_url: URL of the anime page (for caching key)
episode_title: Episode title (for filename generation)
Returns:
Tuple of (video_url, filename)
Raises:
Exception: If all players fail
"""
# Define player priority list
player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer']
# Extract video URLs from pipe format if needed
# Format: url1|url2|url3|anime_page_url|episode_title
video_urls = []
if '|' in url:
parts = url.split('|')
# Last 2 parts are anime_page_url and episode_title (if present)
# Everything before is video URLs
if len(parts) >= 3:
# Multiple video URLs provided
video_urls = parts[:-2] # All but last 2 are video URLs
if parts[-2]:
anime_page_url = parts[-2]
if parts[-1]:
episode_title = parts[-1]
else:
video_urls = [parts[0]]
if len(parts) > 1 and 'anime-sama' in parts[1]:
anime_page_url = parts[1]
else:
video_urls = [url]
# Try each video URL in order (each may have different player)
last_error = None
for video_url in video_urls:
logger.info(f"Trying video URL: {video_url[:50]}...")
# Detect player type from URL
detected_player = None
url_lower = video_url.lower()
if 'vidmoly' in url_lower:
detected_player = 'vidmoly'
elif 'sendvid' in url_lower:
detected_player = 'sendvid'
elif 'sibnet' in url_lower:
detected_player = 'sibnet'
elif 'lpayer' in url_lower or 'embed' in url_lower:
detected_player = 'lpayer'
elif 'dingtez' in url_lower:
detected_player = 'lpayer' # Unknown player, try lpayer as fallback
logger.debug(f"Detected player from URL: {detected_player}")
# Determine which player to try first
cached_player = None
if anime_page_url and anime_page_url in self._working_players:
cached_player = self._working_players[anime_page_url]
logger.info(f"Using cached player '{cached_player}' for anime: {anime_page_url[:50]}...")
# Build player order: cached player first, then detected, then rest in priority order
player_order = []
if cached_player and cached_player in player_priority:
player_order.append(cached_player)
if detected_player and detected_player not in player_order and detected_player in player_priority:
player_order.append(detected_player)
for p in player_priority:
if p not in player_order:
player_order.append(p)
# Only iterate through all players if there are MULTIPLE video URLs
# Otherwise, just use the detected player (or first in priority)
if len(video_urls) == 1:
# Single URL - only try the detected player
if detected_player and detected_player in player_priority:
player_order = [detected_player]
else:
player_order = [player_priority[0]] # Just try first one
# Try each player for this video URL
for player_name in player_order:
try:
logger.info(f"Trying player: {player_name} for {video_url[:50]}...")
if player_name == 'vidmoly':
video_url_result, filename = await self._extract_from_vidmoly(
video_url, anime_page_url, episode_title
)
elif player_name == 'sendvid':
video_url_result, filename = await self._extract_from_sendvid(
video_url, anime_page_url, episode_title
)
elif player_name == 'sibnet':
video_url_result, filename = await self._extract_from_sibnet(
video_url, anime_page_url, episode_title
)
elif player_name == 'lpayer':
video_url_result, filename = await self._extract_from_lpayer_api(video_url)
# Validate the extracted URL
logger.info(f"Validating extracted URL from {player_name}...")
is_valid = await self._test_video_url(video_url_result)
if is_valid:
logger.info(f"SUCCESS: {player_name} returned valid video URL")
# Cache this working player for future requests
if anime_page_url:
self._working_players[anime_page_url] = player_name
logger.debug(f"Cached working player '{player_name}' for anime URL")
# Use target_filename if provided
if target_filename:
filename = target_filename
return video_url_result, filename
else:
logger.warning(f"FAILED: {player_name} returned invalid video URL (validation failed)")
last_error = f"{player_name} returned invalid URL"
continue
except Exception as e:
logger.warning(f"FAILED: {player_name} extraction failed: {str(e)}")
last_error = str(e)
continue
# All players failed
error_msg = f"All players failed. Last error: {last_error}"
logger.error(error_msg)
raise Exception(error_msg)
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]: async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
""" """
Get list of episodes for an anime Get list of episodes for an anime
@@ -842,15 +1218,15 @@ class AnimeSamaDownloader(BaseAnimeSite):
all_episodes_by_number[episode_num].extend(episode_urls) all_episodes_by_number[episode_num].extend(episode_urls)
# For each episode, use the first available URL # For each episode, use ALL available URLs (for fallback)
# (they are usually already in order of preference on the site)
for episode_num in sorted(all_episodes_by_number.keys()): for episode_num in sorted(all_episodes_by_number.keys()):
available_urls = all_episodes_by_number[episode_num] available_urls = all_episodes_by_number[episode_num]
# Use the first available URL (the site usually lists them in preference order) # Use ALL available URLs (pipe-separated) for fallback
episode_url = available_urls[0] # Format: url1|url2|url3|anime_page_url|episode_title
episode_urls_separator = "|".join(available_urls)
episode_title = f'Episode {episode_num}' episode_title = f'Episode {episode_num}'
combined_url = f"{episode_url}|{anime_url}|{episode_title}" combined_url = f"{episode_urls_separator}|{anime_url}|{episode_title}"
episodes.append({ episodes.append({
'episode': episode_num, 'episode': episode_num,
@@ -1109,3 +1485,202 @@ class AnimeSamaDownloader(BaseAnimeSite):
traceback.print_exc() traceback.print_exc()
return [] return []
async def _test_video_url(self, url: str) -> bool:
"""
Validate a video URL by downloading the first 10KB.
Returns True if HTTP 200 and valid data received, False otherwise.
Includes 10 second timeout handling.
"""
try:
logger.debug(f"Testing video URL: {url[:60]}...")
# Stream only first 10KB to validate the URL
response = await self.client.get(
url,
timeout=10.0,
headers={"Range": "bytes=0-10240"}
)
if response.status_code in (200, 206):
content_length = len(response.content)
if content_length > 0:
logger.info(f"Video URL validation SUCCESS: {url[:60]}... ({content_length} bytes)")
return True
else:
logger.warning(f"Video URL validation FAILED: Empty response for {url[:60]}...")
return False
else:
logger.warning(f"Video URL validation FAILED: HTTP {response.status_code} for {url[:60]}...")
return False
except httpx.TimeoutException:
logger.warning(f"Video URL validation FAILED: Timeout for {url[:60]}...")
return False
except httpx.ConnectError as e:
logger.warning(f"Video URL validation FAILED: Connection error for {url[:60]}...: {e}")
return False
except Exception as e:
logger.warning(f"Video URL validation FAILED: Error for {url[:60]}...: {e}")
return False
async def get_download_link_with_fallback(
self,
url: str,
target_filename: Optional[str] = None,
anime_page_url: Optional[str] = None,
episode_title: Optional[str] = None
) -> tuple[str, str]:
"""
Extract download link with fallback to multiple players and URLs.
URL format: url1|url2|url3|anime_page_url|episode_title
Player priority: detected from URL -> cached -> vidmoly -> sendvid -> sibnet -> lpayer
Uses caching to remember working players per anime URL.
Validates each URL with _test_video_url() before returning.
Args:
url: Video player URL or pipe-separated URLs
target_filename: Optional target filename for the download
anime_page_url: URL of the anime page (for caching key)
episode_title: Episode title (for filename generation)
Returns:
Tuple of (video_url, filename)
Raises:
Exception: If all players fail
"""
# Define player priority list
player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer']
# Extract video URLs from pipe format if needed
# Format: url1|url2|url3|anime_page_url|episode_title
video_urls = []
if '|' in url:
parts = url.split('|')
# Last 2 parts are anime_page_url and episode_title (if present)
# Everything before is video URLs
if len(parts) >= 3:
# Multiple video URLs provided
video_urls = parts[:-2] # All but last 2 are video URLs
if parts[-2]:
anime_page_url = parts[-2]
if parts[-1]:
episode_title = parts[-1]
else:
video_urls = [parts[0]]
if len(parts) > 1 and 'anime-sama' in parts[1]:
anime_page_url = parts[1]
else:
video_urls = [url]
# Try each video URL in order (each may have different player)
last_error = None
for video_url in video_urls:
logger.info(f"Trying video URL: {video_url[:50]}...")
# Detect player type from URL
detected_player = None
url_lower = video_url.lower()
if 'vidmoly' in url_lower:
detected_player = 'vidmoly'
elif 'sendvid' in url_lower:
detected_player = 'sendvid'
elif 'sibnet' in url_lower:
detected_player = 'sibnet'
elif 'lpayer' in url_lower:
detected_player = 'lpayer'
elif 'dingtez' in url_lower:
detected_player = 'dingtez'
url_lower = video_url.lower()
if 'vidmoly' in url_lower:
detected_player = 'vidmoly'
elif 'sendvid' in url_lower:
detected_player = 'sendvid'
elif 'sibnet' in url_lower:
detected_player = 'sibnet'
elif 'lpayer' in url_lower or 'embed' in url_lower:
detected_player = 'lpayer'
elif 'dingtez' in url_lower:
detected_player = 'lpayer' # Unknown player, try lpayer as fallback
logger.debug(f"Detected player from URL: {detected_player}")
# Determine which player to try first
cached_player = None
if anime_page_url and anime_page_url in self._working_players:
cached_player = self._working_players[anime_page_url]
logger.info(f"Using cached player '{cached_player}' for anime: {anime_page_url[:50]}...")
# Build player order: cached player first, then detected, then rest in priority order
player_order = []
if cached_player and cached_player in player_priority:
player_order.append(cached_player)
if detected_player and detected_player not in player_order and detected_player in player_priority:
player_order.append(detected_player)
for p in player_priority:
if p not in player_order:
player_order.append(p)
# Only try detected player if single video URL
if len(video_urls) == 1:
if detected_player and detected_player in player_priority:
player_order = [detected_player]
else:
player_order = [player_priority[0]]
logger.info(f"Player order: {player_order}")
# Try each player for this video URL
for player_name in player_order:
try:
logger.info(f"Trying player: {player_name} for {video_url[:50]}...")
if player_name == 'vidmoly':
video_url_result, filename = await self._extract_from_vidmoly(
video_url, anime_page_url, episode_title
)
elif player_name == 'sendvid':
video_url_result, filename = await self._extract_from_sendvid(
video_url, anime_page_url, episode_title
)
elif player_name == 'sibnet':
video_url_result, filename = await self._extract_from_sibnet(
video_url, anime_page_url, episode_title
)
elif player_name == 'lpayer':
video_url_result, filename = await self._extract_from_lpayer_api(video_url)
# Validate the extracted URL
logger.info(f"Validating extracted URL from {player_name}...")
is_valid = await self._test_video_url(video_url_result)
if is_valid:
logger.info(f"SUCCESS: {player_name} returned valid video URL")
# Cache this working player for future requests
if anime_page_url:
self._working_players[anime_page_url] = player_name
logger.debug(f"Cached working player '{player_name}' for anime URL")
# Use target_filename if provided
if target_filename:
filename = target_filename
return video_url_result, filename
else:
logger.warning(f"FAILED: {player_name} returned invalid video URL (validation failed)")
last_error = f"{player_name} returned invalid URL"
continue
except Exception as e:
logger.warning(f"FAILED: {player_name} extraction failed: {str(e)}")
last_error = str(e)
continue
# All players failed
error_msg = f"All players failed. Last error: {last_error}"
logger.error(error_msg)
raise Exception(error_msg)
+11 -2
View File
@@ -21,8 +21,17 @@ class BaseAnimeSite:
""" """
def __init__(self): def __init__(self):
# Initialize HTTP client directly # Realistic browser headers to avoid blocking by video hosts
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True) headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9,fr;q=0.8",
"Referer": "https://anime-sama.tv/",
}
# Initialize HTTP client with browser headers
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True, headers=headers)
@abstractmethod @abstractmethod
def can_handle(self, url: str) -> bool: def can_handle(self, url: str) -> bool:
+130 -94
View File
@@ -1,20 +1,55 @@
from .base import BaseAnimeSite from .base import BaseAnimeSite
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re
from typing import Optional
from urllib.parse import urljoin from urllib.parse import urljoin
class NekoSamaDownloader(BaseAnimeSite): class NekoSamaDownloader(BaseAnimeSite):
"""Downloader for neko-sama.fr""" """Downloader for neko-sama.org (anime streaming via Gupy)
NOTE: neko-sama.org now redirects to Gupy, which is a legal streaming search engine.
It does NOT host video content - it provides metadata about where to watch legally.
This provider can search and get metadata but cannot provide direct download links.
"""
BASE_DOMAINS = ["neko-sama.fr", "nekosama.fr", "www.neko-sama.fr"] BASE_DOMAINS = ["neko-sama.org", "www.neko-sama.org", "neko-sama.fr", "nekosama.fr", "www.gupy.fr", "gupy.fr"]
def can_handle(self, url: str) -> bool: def can_handle(self, url: str) -> bool:
return any(domain in url.lower() for domain in self.BASE_DOMAINS) return any(domain in url.lower() for domain in self.BASE_DOMAINS)
async def get_download_link(self, url: str) -> tuple[str, str]: async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
"""Extract download link from neko-sama URL""" """
Extract download link from neko-sama URL.
NOTE: neko-sama.org/Gupy is a legal streaming search engine, NOT a video host.
This returns streaming platform information instead of direct video links.
"""
try: try:
# Check if this is a Gupy URL
if 'gupy.fr' in url or 'neko-sama.org' in url:
response = await self.client.get(url, follow_redirects=True)
soup = BeautifulSoup(response.text, 'lxml')
# Look for streaming platform links
streaming_links = []
for link in soup.find_all('a', href=True):
href = link.get('href', '')
if '/out/' in href:
text = link.get_text(strip=True)
if text and 'Regarder' in text:
streaming_links.append(f"{text}: {href}")
if streaming_links:
title_elem = soup.find('h1') or soup.find('title')
title = title_elem.get_text(strip=True).split('|')[0].strip() if title_elem else "Unknown"
info = "Available streaming platforms:\n" + "\n".join(streaming_links[:5])
filename = target_filename or f"{title}_streaming_info.txt"
return info, filename
raise Exception("No streaming links found - Gupy is a legal streaming search, not a video host")
# Legacy: try original method for other URLs
response = await self.client.get(url, follow_redirects=True) response = await self.client.get(url, follow_redirects=True)
soup = BeautifulSoup(response.text, 'lxml') soup = BeautifulSoup(response.text, 'lxml')
@@ -60,7 +95,7 @@ class NekoSamaDownloader(BaseAnimeSite):
filename = self._generate_filename(str(response.url)) filename = self._generate_filename(str(response.url))
return match, filename return match, filename
raise Exception("Could not find video link") raise Exception("Could not find video link - Neko-Sama/Gupy does not host video content")
except Exception as e: except Exception as e:
raise Exception(f"Error extracting NekoSama link: {str(e)}") raise Exception(f"Error extracting NekoSama link: {str(e)}")
@@ -80,11 +115,13 @@ class NekoSamaDownloader(BaseAnimeSite):
return filename.title() return filename.title()
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]: async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
"""Get list of episodes for an anime."""
try: try:
response = await self.client.get(anime_url) response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml') soup = BeautifulSoup(response.text, 'lxml')
episodes = [] episodes = []
# Try to find episode links
episode_links = soup.find_all('a', href=re.compile(r'episode')) episode_links = soup.find_all('a', href=re.compile(r'episode'))
for link in episode_links: for link in episode_links:
@@ -112,10 +149,7 @@ class NekoSamaDownloader(BaseAnimeSite):
return [] return []
async def get_anime_metadata(self, anime_url: str) -> dict: async def get_anime_metadata(self, anime_url: str) -> dict:
""" """Extract rich metadata from anime page."""
Extract rich metadata from anime page
Returns synopsis, genres, rating, release year, studio, etc.
"""
try: try:
print(f"[NEKO-SAMA] Extracting metadata from: {anime_url}") print(f"[NEKO-SAMA] Extracting metadata from: {anime_url}")
response = await self.client.get(anime_url) response = await self.client.get(anime_url)
@@ -134,68 +168,55 @@ class NekoSamaDownloader(BaseAnimeSite):
'alternative_titles': [] 'alternative_titles': []
} }
# Extract synopsis # Extract title and year from h1
synopsis_selectors = [ title_elem = soup.find('h1')
'div.synopsis', if title_elem:
'div.description', title_text = title_elem.get_text(strip=True)
'div[class*="synopsis"]', # Extract year from title like "Naruto (2002)"
'div[class*="desc"]', year_match = re.search(r'\((\d{4})\)', title_text)
'p.synopsis', if year_match:
'.anime-synopsis', metadata['release_year'] = int(year_match.group(1))
'.summary'
] # Extract synopsis - Gupy shows it as paragraphs
synopsis_elem = soup.find('p')
if synopsis_elem:
text = synopsis_elem.get_text(strip=True)
if len(text) > 50:
metadata['synopsis'] = text
for selector in synopsis_selectors: # Extract genres from meta tags or links
synopsis_elem = soup.select_one(selector) genre_links = soup.find_all('a', href=re.compile(r'serie-|genre|tag'))
if synopsis_elem:
synopsis = synopsis_elem.get_text(strip=True)
if len(synopsis) > 50:
metadata['synopsis'] = synopsis
break
# Extract genres
genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type', re.I))
if genre_links: if genre_links:
metadata['genres'] = [link.get_text(strip=True) for link in genre_links[:5]] genres = []
for link in genre_links[:5]:
text = link.get_text(strip=True)
if text and '/' not in text and len(text) < 30:
genres.append(text)
metadata['genres'] = genres
# Extract rating # Extract rating from percentage
rating_selectors = [ rating_elem = soup.find(string=re.compile(r'\d+(\.\d+)?%'))
'span.rating', if rating_elem:
'div.rating', match = re.search(r'(\d+(\.\d+)?)%', rating_elem)
'span.score', if match:
'div[class*="rating"]', rating = float(match.group(1)) / 10
'div[class*="score"]' metadata['rating'] = f"{rating:.1f}/10"
]
for selector in rating_selectors:
rating_elem = soup.select_one(selector)
if rating_elem:
rating_text = rating_elem.get_text(strip=True)
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
if rating_match:
metadata['rating'] = f"{rating_match.group(1)}/10"
break
# Extract release year
page_text = soup.get_text()
year_matches = re.findall(r'\b(19\d{2}|20\d{2})\b', page_text)
if year_matches:
import datetime
current_year = datetime.datetime.now().year + 2
valid_years = [int(y) for y in year_matches if 1950 <= int(y) <= current_year]
if valid_years:
from collections import Counter
metadata['release_year'] = Counter(valid_years).most_common(1)[0][0]
# Extract poster image # Extract poster image
poster_elem = soup.select_one('img.poster, img.cover, .anime-poster img') poster_elem = soup.find('img', src=re.compile(r'poster|poster'))
if poster_elem: if poster_elem:
metadata['poster_image'] = poster_elem.get('src') or poster_elem.get('data-src') metadata['poster_image'] = poster_elem.get('src')
# Extract total episodes # Extract episode count from page text
episodes_count = len(await self.get_episodes(anime_url)) page_text = soup.get_text()
if episodes_count > 0: ep_match = re.search(r'(\d+)\s*episodes?', page_text, re.I)
metadata['total_episodes'] = episodes_count if ep_match:
metadata['total_episodes'] = int(ep_match.group(1))
# Extract studio/director
director_elem = soup.find('a', href=re.compile(r'person|réalisé'))
if director_elem:
metadata['studio'] = director_elem.get_text(strip=True)
print(f"[NEKO-SAMA] Extracted metadata: {metadata}") print(f"[NEKO-SAMA] Extracted metadata: {metadata}")
return metadata return metadata
@@ -205,44 +226,59 @@ class NekoSamaDownloader(BaseAnimeSite):
return {} return {}
async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]: async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
""" """Search for anime on neko-sama (uses Gupy backend)."""
Search for anime on neko-sama
Args:
query: Search query string
lang: Language preference (vostfr, vf)
include_metadata: Whether to fetch full metadata for each result (slower)
"""
try: try:
import time import time
from html import unescape
start = time.time() start = time.time()
print(f"[NEKO-SAMA] Searching for '{query}' ({lang})...") print(f"[NEKO-SAMA] Searching for '{query}' ({lang})...")
# Neko-Sama URL pattern: https://neko-sama.fr/anime/{anime-name} # Neko-Sama now uses Gupy - try the direct URL pattern
search_url = f"https://neko-sama.fr/anime/{query.lower().replace(' ', '-')}" search_slug = query.lower().replace(' ', '-')
search_urls = [
f"https://www.gupy.fr/series/{search_slug}/",
f"https://neko-sama.org/series/{search_slug}/",
]
response = await self.client.get(search_url) results = []
for search_url in search_urls:
response = await self.client.get(search_url, follow_redirects=True)
print(f"[NEKO-SAMA] Tried {search_url} -> {response.status_code}")
if response.status_code == 200:
final_url = str(response.url)
print(f"[NEKO-SAMA] Found anime at {final_url}")
# Extract title from page
soup = BeautifulSoup(response.text, 'lxml')
title_elem = soup.find('h1') or soup.find('title')
title = unescape(title_elem.get_text(strip=True)) if title_elem else query
# Clean up title
title = title.split('|')[0].split('-')[0].strip()
result = {
'title': title,
'url': final_url,
'cover_image': None,
'type': 'direct',
'metadata': None
}
# Try to get poster
poster = soup.find('img', src=re.compile(r'poster'))
if poster:
result['cover_image'] = poster.get('src')
if include_metadata:
metadata = await self.get_anime_metadata(final_url)
result['metadata'] = metadata
results.append(result)
break
elapsed = time.time() - start elapsed = time.time() - start
print(f"[NEKO-SAMA] Got response {response.status_code} in {elapsed:.2f}s") print(f"[NEKO-SAMA] Search completed in {elapsed:.2f}s, found {len(results)} results")
return results
if response.status_code == 200:
print(f"[NEKO-SAMA] Found anime at {str(response.url)}")
result = {
'title': query,
'url': str(response.url),
'type': 'direct',
'metadata': None
}
if include_metadata:
metadata = await self.get_anime_metadata(str(response.url))
result['metadata'] = metadata
return [result]
print(f"[NEKO-SAMA] No anime found")
return []
except Exception as e: except Exception as e:
print(f"[NEKO-SAMA] Error: {str(e)}") print(f"[NEKO-SAMA] Error: {str(e)}")
+9 -2
View File
@@ -23,8 +23,15 @@ class BaseVideoPlayer:
""" """
def __init__(self): def __init__(self):
# Initialize HTTP client directly # Realistic browser headers to avoid blocking by video hosts
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True) headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9,fr;q=0.8",
"Referer": "https://anime-sama.tv/",
}
# Initialize HTTP client with browser headers
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True, headers=headers)
@abstractmethod @abstractmethod
def can_handle(self, url: str) -> bool: def can_handle(self, url: str) -> bool:
+340 -60
View File
@@ -2,6 +2,8 @@ from .base import BaseVideoPlayer
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re
import asyncio import asyncio
from typing import Optional
import httpx
class LpayerDownloader(BaseVideoPlayer): class LpayerDownloader(BaseVideoPlayer):
@@ -10,124 +12,160 @@ class LpayerDownloader(BaseVideoPlayer):
def can_handle(self, url: str) -> bool: def can_handle(self, url: str) -> bool:
return 'lpayer.embed4me.com' in url.lower() return 'lpayer.embed4me.com' in url.lower()
async def get_download_link(self, url: str) -> tuple[str, str]: async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
""" """
Extract download link from Lpayer video page Extract download link from Lpayer video page.
Lpayer uses a React app with dynamic JavaScript - requires Playwright Uses Playwright for JavaScript rendering, falls back to HTML parsing.
""" """
try: try:
print(f"[LPAYER] Extracting link from: {url}") print(f"[LPAYER] Extracting link from: {url}")
# Try using Playwright to extract video URL # Try Playwright first (handles JavaScript-rendered pages)
video_url = await self._extract_with_playwright(url) video_url = await self._extract_with_playwright(url)
if not video_url:
# Fallback to HTML parsing
print("[LPAYER] Playwright failed, trying HTML parsing fallback...")
video_url = await self._extract_with_http(url)
if not video_url: if not video_url:
raise Exception("Could not find video URL in Lpayer page") raise Exception("Could not find video URL in Lpayer page")
print(f"[LPAYER] Found video URL: {video_url[:80]}...") print(f"[LPAYER] Found video URL: {video_url[:80]}...")
# Generate filename # Use target_filename if provided, otherwise generate default
filename = "lpayer_video.mp4" if target_filename:
filename = target_filename
else:
filename = "lpayer_video.mp4"
# Ensure .mp4 extension if direct MP4
if video_url.endswith('.mp4') and not filename.endswith('.mp4'):
filename += '.mp4'
return video_url, filename return video_url, filename
except Exception as e: except Exception as e:
raise Exception(f"Error extracting Lpayer link: {str(e)}") raise Exception(f"Error extracting Lpayer link: {str(e)}")
async def _extract_with_playwright(self, url: str) -> str | None: async def _extract_with_playwright(self, url: str) -> Optional[str]:
"""Extract video URL using Playwright with network interception""" """Extract video URL using Playwright to render JavaScript"""
browser = None
try: try:
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
print("[LPAYER] Launching browser with network interception...") print("[LPAYER] Launching Playwright browser...")
video_urls = [] video_urls = []
async with async_playwright() as p: async with async_playwright() as p:
browser = await p.chromium.launch( browser = await p.chromium.launch(
headless=True, headless=True,
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
'--disable-features=IsolateOrigins,site-per-process',
]
) )
context = await browser.new_context( context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport={'width': 1920, 'height': 1080}
) )
page = await context.new_page() page = await context.new_page()
# Set up request interception # Set up request interception to capture video requests
async def handle_request(route): async def handle_request(route):
req_url = route.request.url req_url = route.request.url
# Look for video files
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']): if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
if 'lpayer' not in req_url.lower(): if 'lpayer' not in req_url.lower():
print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...") print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
video_urls.append(req_url) video_urls.append(req_url)
await route.continue_() await route.continue_()
await page.route('**', handle_request) await page.route('**', handle_request)
# Navigate to URL with timeout
print("[LPAYER] Navigating to page...") print("[LPAYER] Navigating to page...")
try: try:
await page.goto(url, wait_until='domcontentloaded', timeout=30000) await page.goto(url, wait_until='domcontentloaded', timeout=30000)
except Exception as e: except Exception as e:
print(f"[LPAYER] Navigation warning: {e}") print(f"[LPAYER] Navigation warning: {e}")
# Wait for page to load # Wait for JavaScript to execute
print("[LPAYER] Waiting for video player to load...") print("[LPAYER] Waiting for video player to load...")
await asyncio.sleep(5) await asyncio.sleep(5)
# Try to find and click play button # Try to interact with player to trigger video load
try: try:
play_selectors = [ await page.mouse.click(640, 360)
'button[aria-label="Play"]', await asyncio.sleep(3)
'.play-button', except:
'video', pass
]
for selector in play_selectors: # Try JavaScript extraction to find video URLs in DOM
try:
element = await page.query_selector(selector)
if element:
print(f"[LPAYER] Found element: {selector}")
if 'button' in selector:
await element.click()
await asyncio.sleep(3)
break
except:
continue
except Exception as e:
print(f"[LPAYER] Play button interaction: {e}")
# Wait more for network requests
await asyncio.sleep(3)
# Try JavaScript extraction
try: try:
js_result = await page.evaluate(""" js_result = await page.evaluate("""
() => { () => {
// Check all video elements // Check all video elements
const videos = document.querySelectorAll('video'); const videos = document.querySelectorAll('video');
for (let v of videos) { for (let v of videos) {
if (v.src) { if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
console.log('Found video src:', v.src);
return v.src; return v.src;
} }
const sources = v.querySelectorAll('source'); const sources = v.querySelectorAll('source');
for (let s of sources) { for (let s of sources) {
if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) { if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
console.log('Found source src:', s.src);
return s.src; return s.src;
} }
} }
} }
// Check window object for video URLs // Check for jwplayer
if (window.jwplayer) {
try {
const player = jwplayer();
const playlist = player.getPlaylist();
if (playlist && playlist[0] && playlist[0].sources) {
const src = playlist[0].sources[0].file;
console.log('Found jwplayer source:', src);
return src;
}
} catch(e) {
console.log('jwplayer error:', e);
}
}
// Check for VidStack player
const player = document.querySelector('media-player');
if (player && player.provider) {
const provider = player.provider;
// Try to get source from provider
if (provider.src) return provider.src;
if (provider.currentSrc) return provider.currentSrc;
if (provider.url) return provider.url;
if (provider.videoUrl) return provider.videoUrl;
// Check internal properties
for (let key in provider) {
try {
const val = provider[key];
if (typeof val === 'string' && (val.includes('.m3u8') || val.includes('.mp4')) && val.startsWith('http')) {
return val;
}
} catch(e) {}
}
}
// Look for video URLs in window object
for (let key in window) { for (let key in window) {
if (typeof window[key] === 'string') { if (typeof window[key] === 'string') {
const str = window[key]; const str = window[key];
if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) { if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
console.log('Found in window:', str);
return str; return str;
} }
} }
@@ -143,12 +181,14 @@ class LpayerDownloader(BaseVideoPlayer):
except Exception as e: except Exception as e:
print(f"[LPAYER] JS extraction error: {e}") print(f"[LPAYER] JS extraction error: {e}")
# Parse page HTML for video URLs # Final check: parse rendered page HTML
try: try:
content = await page.content() content = await page.content()
patterns = [ patterns = [
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"', r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"', r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)', r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)', r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
] ]
@@ -156,30 +196,31 @@ class LpayerDownloader(BaseVideoPlayer):
for pattern in patterns: for pattern in patterns:
matches = re.findall(pattern, content) matches = re.findall(pattern, content)
for match in matches: for match in matches:
match = match.replace('\\', '').replace('\/', '/') match = match.replace('\\', '').replace('\\/', '/')
if 'http' in match and 'lpayer' not in match: if 'http' in match and 'lpayer' not in match.lower():
print(f"[LPAYER] Found in HTML: {match[:100]}...") print(f"[LPAYER] Found in HTML: {match[:100]}...")
video_urls.append(match) video_urls.append(match)
except Exception as e: except Exception as e:
print(f"[LPAYER] HTML parsing error: {e}") print(f"[LPAYER] HTML parsing error: {e}")
await browser.close() await browser.close()
browser = None
# Return first valid video URL # Return first valid video URL
if video_urls: if video_urls:
seen = set() seen = set()
unique_urls = [] unique_urls = []
for url in video_urls: for url in video_urls:
if url not in seen: if url not in seen:
seen.add(url) seen.add(url)
unique_urls.append(url) unique_urls.append(url)
if unique_urls: if unique_urls:
print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)") print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
return unique_urls[0] return unique_urls[0]
print("[LPAYER] ❌ No video URLs found") print("[LPAYER] ❌ No video URLs found")
return None return None
except ImportError: except ImportError:
print("[LPAYER] Playwright not installed") print("[LPAYER] Playwright not installed")
@@ -189,3 +230,242 @@ class LpayerDownloader(BaseVideoPlayer):
import traceback import traceback
traceback.print_exc() traceback.print_exc()
return None return None
finally:
# Ensure browser is always closed
if browser:
try:
await browser.close()
except:
pass
"""Extract video URL using Playwright to render JavaScript"""
try:
from playwright.async_api import async_playwright
print("[LPAYER] Launching Playwright browser...")
video_urls = []
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
)
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
viewport={'width': 1920, 'height': 1080}
)
page = await context.new_page()
# Set up request interception to capture video requests
async def handle_request(route):
req_url = route.request.url
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
if 'lpayer' not in req_url.lower():
print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
video_urls.append(req_url)
await route.continue_()
await page.route('**', handle_request)
# Navigate to URL with timeout
print("[LPAYER] Navigating to page...")
try:
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
except Exception as e:
print(f"[LPAYER] Navigation warning: {e}")
# Wait for JavaScript to execute and video to load
print("[LPAYER] Waiting for video player to load...")
await asyncio.sleep(5)
# Try JavaScript extraction to find video URLs in DOM
try:
js_result = await page.evaluate("""
() => {
// Check all video elements
const videos = document.querySelectorAll('video');
for (let v of videos) {
if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
console.log('Found video src:', v.src);
return v.src;
}
const sources = v.querySelectorAll('source');
for (let s of sources) {
if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
console.log('Found source src:', s.src);
return s.src;
}
}
}
// Check for jwplayer
if (window.jwplayer) {
try {
const player = jwplayer();
const playlist = player.getPlaylist();
if (playlist && playlist[0] && playlist[0].sources) {
const src = playlist[0].sources[0].file;
console.log('Found jwplayer source:', src);
return src;
}
} catch(e) {
console.log('jwplayer error:', e);
}
}
// Look for video URLs in window object
for (let key in window) {
if (typeof window[key] === 'string') {
const str = window[key];
if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
console.log('Found in window:', str);
return str;
}
}
}
return null;
}
""")
if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
print(f"[LPAYER] Found video URL via JavaScript")
video_urls.append(js_result)
except Exception as e:
print(f"[LPAYER] JS extraction error: {e}")
# Final check: parse rendered page HTML
try:
content = await page.content()
patterns = [
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
]
for pattern in patterns:
matches = re.findall(pattern, content)
for match in matches:
match = match.replace('\\', '').replace('\\/', '/')
if 'http' in match and 'lpayer' not in match.lower():
print(f"[LPAYER] Found in HTML: {match[:100]}...")
video_urls.append(match)
except Exception as e:
print(f"[LPAYER] HTML parsing error: {e}")
await browser.close()
# Return first valid video URL
if video_urls:
seen = set()
unique_urls = []
for url in video_urls:
if url not in seen:
seen.add(url)
unique_urls.append(url)
if unique_urls:
print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
return unique_urls[0]
print("[LPAYER] ❌ No video URLs found")
return None
except ImportError:
print("[LPAYER] Playwright not installed")
return None
except Exception as e:
print(f"[LPAYER] Playwright error: {e}")
import traceback
traceback.print_exc()
return None
async def _extract_with_http(self, url: str) -> Optional[str]:
"""Fallback: Extract video source using pure HTTP requests"""
try:
response = await self.client.get(url)
response.raise_for_status()
html_content = response.text
return self._extract_video_from_html(html_content)
except Exception as e:
print(f"[LPAYER] HTTP extraction error: {e}")
return None
def _extract_video_from_html(self, html_content: str) -> Optional[str]:
"""
Extract video URL from HTML using BeautifulSoup parsing
Looks for video URLs in this priority:
1. <video src="URL"> tags
2. <source src="URL"> tags
3. Direct URLs in page content with video extensions (.mp4, .m3u8)
Returns first valid URL found, or None if not found
"""
try:
soup = BeautifulSoup(html_content, 'lxml')
# Priority 1: Look for <video src="..."> tags
video_tags = soup.find_all('video')
for video in video_tags:
src = video.get('src')
if src and self._is_valid_video_url(src):
print(f"[LPAYER] Found video in <video> tag: {src[:80]}...")
return src
# Priority 2: Look for <source src="..."> tags
source_tags = soup.find_all('source')
for source in source_tags:
src = source.get('src')
if src and self._is_valid_video_url(src):
print(f"[LPAYER] Found video in <source> tag: {src[:80]}...")
return src
# Priority 3: Look for direct URLs in page content
patterns = [
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
]
for pattern in patterns:
matches = re.findall(pattern, html_content)
for match in matches:
match = match.replace('\\', '').replace(r'\/', '/')
if self._is_valid_video_url(match):
print(f"[LPAYER] Found video in content: {match[:80]}...")
return match
print("[LPAYER] No video URL found in HTML")
return None
except Exception as e:
print(f"[LPAYER] HTML parsing error: {e}")
return None
def _is_valid_video_url(self, url: str) -> bool:
"""
Check if URL is a valid video URL
Valid if:
- Starts with http:// or https://
- Contains .mp4 or .m3u8 extension
"""
if not url:
return False
# Must be http(s) URL
if not url.startswith('http'):
return False
# Must contain video extension
url_lower = url.lower()
if '.mp4' not in url_lower and '.m3u8' not in url_lower:
return False
return True
+1 -1
View File
@@ -303,7 +303,7 @@ class VidMolyDownloader(BaseVideoPlayer):
try: try:
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
'Referer': 'https://vidmoly.to/', 'Referer': 'https://vidmoly.biz/',
'Accept': '*/*', 'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.9', 'Accept-Language': 'en-US,en;q=0.9',
} }
+3
View File
@@ -26,3 +26,6 @@ bcrypt<4.0
# Scheduler for auto-download # Scheduler for auto-download
apscheduler==3.11.0 apscheduler==3.11.0
# Cryptography for AES decryption
pycryptodome==3.20.0
+273
View File
@@ -0,0 +1,273 @@
"""
Unit tests for Anime-Sama fallback mechanism
Tests player priority, caching, and URL validation
"""
import pytest
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from httpx import TimeoutException, ConnectError
from app.downloaders.anime_sites.animesama import AnimeSamaDownloader
class TestAnimeSamaFallback:
"""Tests for Anime-Sama fallback mechanism"""
@pytest.fixture
def downloader(self):
"""Create AnimeSamaDownloader instance"""
return AnimeSamaDownloader()
@pytest.mark.asyncio
async def test_fallback_tries_players_in_priority_order(self, downloader):
"""
Test that fallback tries players in priority order:
VidMoly -> SendVid -> Sibnet -> Lpayer
"""
# Mock each player extraction method
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_extract_from_sibnet') as mock_sibnet, \
patch.object(downloader, '_extract_from_lpayer') as mock_lpayer, \
patch.object(downloader, '_test_video_url', new_callable=AsyncMock) as mock_test_url:
# Make vidmoly and sendvid fail, sibnet succeed
mock_vidmoly.side_effect = Exception("VidMoly failed")
mock_sendvid.side_effect = Exception("SendVid failed")
mock_sibnet.return_value = ("http://sibnet.com/video.mp4", "video.mp4")
mock_lpayer.return_value = ("http://lpayer.com/video.mp4", "video.mp4")
# Make validation pass for sibnet
mock_test_url.return_value = True
result = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url="https://anime-sama.si/catalogue/test/vostfr/"
)
# Verify player order was correct
assert mock_vidmoly.called, "VidMoly should be tried first"
assert mock_sendvid.called, "SendVid should be tried second"
assert mock_sibnet.called, "Sibnet should be tried third"
assert not mock_lpayer.called, "Lpayer should not be called since Sibnet succeeded"
assert result == ("http://sibnet.com/video.mp4", "video.mp4")
@pytest.mark.asyncio
async def test_caching_mechanism_stores_working_player(self, downloader):
"""
Test that caching mechanism stores working player for same anime URL.
After first successful player, subsequent requests should use cached player first.
"""
# Setup: First request - vidmoly fails, sendvid succeeds
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_extract_from_sibnet') as mock_sibnet, \
patch.object(downloader, '_extract_from_lpayer') as mock_lpayer, \
patch.object(downloader, '_test_video_url', new_callable=AsyncMock) as mock_test_url:
# First request: vidmoly fails, sendvid succeeds
mock_vidmoly.side_effect = Exception("VidMoly failed")
mock_sendvid.return_value = ("http://sendvid.com/video.mp4", "video.mp4")
mock_test_url.return_value = True
anime_url = "https://anime-sama.si/catalogue/test/vostfr/"
result1 = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url=anime_url
)
# Verify caching worked
assert anime_url in downloader._working_players
assert downloader._working_players[anime_url] == "sendvid"
# Reset mocks for second request
mock_vidmoly.reset_mock()
mock_sendvid.reset_mock()
mock_sibnet.reset_mock()
mock_lpayer.reset_mock()
# Second request: Should try sendvid first (cached)
mock_sendvid.return_value = ("http://sendvid.com/video2.mp4", "video2.mp4")
mock_test_url.return_value = True
result2 = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url=anime_url
)
# Verify sendvid was tried first (due to cache)
assert mock_sendvid.call_count == 1, "Cached player (sendvid) should be tried first"
@pytest.mark.asyncio
async def test_all_players_failing_raises_exception(self, downloader):
"""
Test that when all players fail, an exception is raised with proper error message.
"""
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_extract_from_sibnet') as mock_sibnet, \
patch.object(downloader, '_extract_from_lpayer') as mock_lpayer:
# All players fail
mock_vidmoly.side_effect = Exception("VidMoly error")
mock_sendvid.side_effect = Exception("SendVid error")
mock_sibnet.side_effect = Exception("Sibnet error")
mock_lpayer.side_effect = Exception("Lpayer error")
anime_url = "https://anime-sama.si/catalogue/test/vostfr/"
with pytest.raises(Exception) as exc_info:
await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url=anime_url
)
# Verify error message mentions all players failed
assert "All players failed" in str(exc_info.value)
# Verify all players were tried
assert mock_vidmoly.called
assert mock_sendvid.called
assert mock_sibnet.called
assert mock_lpayer.called
@pytest.mark.asyncio
async def test_test_video_url_returns_true_for_valid_url(self, downloader):
"""
Test that _test_video_url returns True for valid video URL (HTTP 200 with content).
"""
# Mock the client to return valid response
mock_response = Mock()
mock_response.status_code = 200
mock_response.content = b"video content data"
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.return_value = mock_response
result = await downloader._test_video_url("http://example.com/video.mp4")
assert result is True
mock_get.assert_called_once()
# Verify Range header was included
call_args = mock_get.call_args
assert "Range" in call_args.kwargs.get("headers", {})
@pytest.mark.asyncio
async def test_test_video_url_returns_false_for_invalid_url(self, downloader):
"""
Test that _test_video_url returns False for invalid/non-working URL.
"""
# Test case 1: HTTP error status
mock_response = Mock()
mock_response.status_code = 404
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.return_value = mock_response
result = await downloader._test_video_url("http://example.com/notfound.mp4")
assert result is False
@pytest.mark.asyncio
async def test_test_video_url_returns_false_for_empty_response(self, downloader):
"""
Test that _test_video_url returns False for empty response content.
"""
mock_response = Mock()
mock_response.status_code = 200
mock_response.content = b"" # Empty content
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.return_value = mock_response
result = await downloader._test_video_url("http://example.com/empty.mp4")
assert result is False
@pytest.mark.asyncio
async def test_test_video_url_returns_false_for_timeout(self, downloader):
"""
Test that _test_video_url returns False for timeout.
"""
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.side_effect = TimeoutException("Request timeout")
result = await downloader._test_video_url("http://example.com/slow.mp4")
assert result is False
@pytest.mark.asyncio
async def test_test_video_url_returns_false_for_connection_error(self, downloader):
"""
Test that _test_video_url returns False for connection error.
"""
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.side_effect = ConnectError("Connection failed")
result = await downloader._test_video_url("http://example.com/badhost.mp4")
assert result is False
@pytest.mark.asyncio
async def test_fallback_skips_invalid_player_url(self, downloader):
"""
Test that fallback skips players that return invalid URLs (validation fails).
"""
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_extract_from_sibnet') as mock_sibnet, \
patch.object(downloader, '_test_video_url', new_callable=AsyncMock) as mock_test_url:
# Vidmoly returns URL but validation fails
mock_vidmoly.return_value = ("http://vidmoly.com/video.mp4", "video.mp4")
# SendVid returns URL and validation passes
mock_sendvid.return_value = ("http://sendvid.com/video.mp4", "video.mp4")
mock_sibnet.return_value = ("http://sibnet.com/video.mp4", "video.mp4")
# First call (vidmoly): validation fails
# Second call (sendvid): validation passes
# Third call (sibnet): not called because sendvid succeeded
mock_test_url.side_effect = [False, True]
result = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url="https://anime-sama.si/catalogue/test/vostfr/"
)
# Verify validation was called for vidmoly
assert mock_test_url.call_count >= 1
# Verify sendvid was also tried after vidmoly failed validation
assert mock_sendvid.called
@pytest.mark.asyncio
async def test_cache_not_used_without_anime_page_url(self, downloader):
"""
Test that caching is not used when anime_page_url is not provided.
"""
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_test_video_url', new_callable=AsyncMock) as mock_test_url:
# First request: no anime_page_url, vidmoly succeeds
mock_vidmoly.return_value = ("http://vidmoly.com/video.mp4", "video.mp4")
mock_test_url.return_value = True
result1 = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test"
)
# Cache should be empty (no anime_page_url provided)
assert len(downloader._working_players) == 0
# Second request: still no anime_page_url, should not use cache
mock_vidmoly.reset_mock()
result2 = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test"
)
# Vidmoly should still be called (no cache used)
assert mock_vidmoly.call_count == 1