feat: add multiple video player support for Frieren S2 downloads

- Add Lpayer API decryption using AES (key: kiemtienmua911ca)
- Add yt-dlp extraction for bypassing player blocking
- Add HTTP 206 support for video validation (Range header)
- Add VidMoly .biz domain support (alternative to .to)
- Add SendVid extraction (working - downloaded S1 and S2 E1)
- Add player fallback system with caching per anime URL
- Add video URL validation before returning to downloader
- Update HTTP clients with realistic browser headers
- Add pycryptodome to requirements.txt
- Add test file for fallback system

Downloads working: SendVid (primary), Lpayer (403 issue), VidMoly (testing)
This commit is contained in:
root
2026-02-25 16:29:53 +00:00
parent 8b7a419b4c
commit 3cf2f8eca5
9 changed files with 1370 additions and 184 deletions
+4 -1
View File
@@ -63,7 +63,10 @@ class GenericDownloader(BaseDownloader):
def can_handle(self, url: str) -> bool:
return True
async def get_download_link(self, url: str) -> tuple[str, str]:
async def get_download_link(self, url: str, target_filename: str = None) -> tuple[str, str]:
# Just return the URL as-is
filename = target_filename or url.split('/')[-1] or "download"
return url, filename
# Just return the URL as-is
filename = url.split('/')[-1] or "download"
return url, filename
+599 -24
View File
@@ -1,12 +1,33 @@
from .base import BaseAnimeSite
from bs4 import BeautifulSoup
import re
import subprocess
import json
import httpx
import logging
from typing import Optional
from urllib.parse import urljoin, unquote
import binascii
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
logger = logging.getLogger(__name__)
# Lpayer encryption key (from Anime-Sama-Downloader project)
LPAYER_KEY = b"kiemtienmua911ca"
LPAYER_IV = b"1234567890oiuytr"
def _decrypt_lpayer(hex_str: str) -> Optional[str]:
"""Decrypt Lpayer video URL using AES"""
try:
data = binascii.unhexlify(hex_str)
cipher = AES.new(LPAYER_KEY, AES.MODE_CBC, LPAYER_IV)
decrypted = unpad(cipher.decrypt(data), AES.block_size)
return decrypted.decode('utf-8')
except Exception:
return None
class AnimeSamaDownloader(BaseAnimeSite):
"""Downloader for anime-sama.org / anime-sama.store"""
@@ -14,6 +35,11 @@ class AnimeSamaDownloader(BaseAnimeSite):
# Static list of known domains (will be updated dynamically)
BASE_DOMAINS = ["anime-sama.tv", "anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
def __init__(self):
"""Initialize AnimeSamaDownloader with working player cache"""
super().__init__() # Call parent __init__ to initialize client
self._working_players = {} # Cache: anime_url -> working player name
@classmethod
async def get_current_domain(cls) -> str:
"""
@@ -84,7 +110,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
def can_handle(self, url: str) -> bool:
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
async def get_download_link(self, url: str) -> tuple[str, str]:
async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
"""
Extract download link from anime-sama URL
Anime-Sama uses third-party video hosts (vidmoly, etc.)
@@ -93,6 +119,18 @@ class AnimeSamaDownloader(BaseAnimeSite):
try:
logger.debug(f"Extracting link from: {url}")
# Check if URL is a direct video URL (.mp4, .m3u8, .mkv)
# If so, return it directly without extraction
if url.endswith('.mp4') or url.endswith('.m3u8') or url.endswith('.mkv'):
# Extract filename from URL
from urllib.parse import urlparse, unquote
parsed = urlparse(url)
path = unquote(parsed.path)
filename = path.split('/')[-1] if path.split('/')[-1] else "direct_video.mp4"
logger.info(f"Direct video URL detected: {url[:60]}... -> {filename}")
return url, filename
# Check if URL contains the anime page context (format: video_url|anime_page_url|episode_title?)
if '|' in url:
parts = url.split('|')
@@ -102,29 +140,43 @@ class AnimeSamaDownloader(BaseAnimeSite):
logger.debug(f"Split URL - video: {video_url[:60]}..., anime: {anime_page_url}, episode: {episode_title}")
# Extract video from the host URL with anime context for filename
if 'vidmoly.to' in video_url or 'vidmoly' in video_url:
return await self._extract_from_vidmoly(video_url, anime_page_url, episode_title)
elif 'sendvid.com' in video_url:
return await self._extract_from_sendvid(video_url, anime_page_url, episode_title)
elif 'sibnet.ru' in video_url:
return await self._extract_from_sibnet(video_url, anime_page_url, episode_title)
elif 'lpayer.embed4me.com' in video_url or 'lpayer' in video_url:
return await self._extract_from_lpayer(video_url, anime_page_url, episode_title)
else:
# Try to extract from other hosts
if episode_title:
filename = f"{self._generate_anime_name(anime_page_url)} - {episode_title}.mp4"
else:
filename = self._generate_filename_from_anime_url(anime_page_url)
return video_url, filename
# Use fallback method for pipe-separated URLs (tries multiple players)
return await self.get_download_link_with_fallback(
video_url,
anime_page_url=anime_page_url,
episode_title=episode_title
)
# Check if this is a third-party host URL
if 'vidmoly.to' in url or 'vidmoly' in url:
if 'vidmoly.to' in url or 'vidmoly.biz' in url or 'vidmoly' in url:
return await self._extract_from_vidmoly(url)
# Handle direct Lpayer URLs (not embedded in anime-sama pages)
elif 'lpayer.' in url and url.startswith('https://lpayer.embed4me.com/'):
# Direct video URL - return with fixed filename
logger.info(f"Using direct Lpayer URL: {url[:80]}...")
return url, "lpayer_video.mp4"
# Handle Lpayer embedded pages (non-direct URLs)
elif 'lpayer.' in url:
# Embedded page - use fallback
logger.info(f"Using fallback for Lpayer embedded page: {url[:80]}...")
return await self.get_download_link_with_fallback(
url,
anime_page_url=url,
episode_title=None
)
# If it's an anime-sama page, try to find the video
if 'anime-sama' in url.lower():
if 'dingtez' in url or 'dingz' in url:
return await self._extract_from_dingetz(url)
elif 'wupstream' in url or 'wup' in url:
return await self._extract_from_wupstream(url)
elif 'doodstream' in url or 'dood' in url:
return await self._extract_from_doodstream(url)
elif 'streamtape' in url:
return await self._extract_from_streamtape(url)
elif 'voe' in url:
return await self._extract_from_voe(url)
logger.debug(f"Processing anime-sama page: {url}")
response = await self.client.get(url, follow_redirects=True)
final_url = str(response.url)
@@ -437,6 +489,77 @@ class AnimeSamaDownloader(BaseAnimeSite):
# Re-raise with clearer message
raise Exception(f"Lpayer player not supported - this video host requires manual download. Try another host (VidMoly, SendVid, Sibnet). Error: {str(e)}")
async def _extract_from_lpayer_api(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
"""Extract video URL from Lplayer using API decryption"""
import requests
# Extract video ID from URL
match = re.search(r'#([a-zA-Z0-9]+)', url)
if not match:
match = re.search(r'[?&]id=([a-zA-Z0-9]+)', url)
if not match:
raise Exception("Could not extract Lplayer video ID")
video_id = match.group(1)
api_url = f"https://lpayer.embed4me.com/api/v1/video?id={video_id}&w=1920&h=1080&r=https://lpayer.embed4me.com/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
"Referer": "https://lpayer.embed4me.com/"
}
response = requests.get(api_url, headers=headers, timeout=30)
if response.status_code != 200:
raise Exception(f"Lplayer API returned {response.status_code}")
hex_data = response.text.strip()
if hex_data.startswith('"') and hex_data.endswith('"'):
hex_data = hex_data[1:-1]
decrypted = _decrypt_lpayer(hex_data)
if not decrypted:
raise Exception("Failed to decrypt Lplayer response")
data = json.loads(decrypted)
m3u8_url = data.get('source')
if not m3u8_url:
raise Exception("No source found in Lplayer response")
# Use yt-dlp to get direct video URL from m3u8
cmd = [
'yt-dlp',
'--referer', 'https://lpayer.embed4me.com/',
'--skip-download',
'--dump-json',
'--no-warnings',
m3u8_url
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode == 0 and result.stdout:
yt_data = json.loads(result.stdout)
if 'formats' in yt_data:
# Get best mp4 format
formats = yt_data['formats']
mp4_formats = [f for f in formats if f.get('ext') == 'mp4']
if mp4_formats:
video_url = mp4_formats[0].get('url')
else:
video_url = formats[0].get('url')
else:
video_url = yt_data.get('url')
if video_url:
filename = f"lpayer_{video_id}.mp4"
return video_url, filename
# If yt-dlp fails, return m3u8 URL anyway (let download manager handle it)
filename = f"lpayer_{video_id}.mp4"
return m3u8_url, filename
async def _extract_from_player(self, player_url: str) -> str | None:
"""Try to extract direct video URL from player iframe"""
try:
@@ -744,6 +867,259 @@ class AnimeSamaDownloader(BaseAnimeSite):
traceback.print_exc()
return []
async def _test_video_url(self, url: str) -> bool:
"""
Validate a video URL by downloading the first 10KB.
Returns True if HTTP 200 and valid data received, False otherwise.
Includes 10 second timeout handling.
"""
try:
logger.debug(f"Testing video URL: {url[:60]}...")
# Stream only first 10KB to validate the URL
response = await self.client.get(
url,
timeout=10.0,
headers={"Range": "bytes=0-10240"}
)
if response.status_code in (200, 206):
content_length = len(response.content)
if content_length > 0:
logger.info(f"Video URL validation SUCCESS: {url[:60]}... ({content_length} bytes)")
return True
else:
logger.warning(f"Video URL validation FAILED: Empty response for {url[:60]}...")
return False
else:
logger.warning(f"Video URL validation FAILED: HTTP {response.status_code} for {url[:60]}...")
return False
except httpx.TimeoutException:
logger.warning(f"Video URL validation FAILED: Timeout for {url[:60]}...")
return False
except httpx.ConnectError as e:
logger.warning(f"Video URL validation FAILED: Connection error for {url[:60]}...: {e}")
return False
except Exception as e:
logger.warning(f"Video URL validation FAILED: Error for {url[:60]}...: {e}")
return False
async def _extract_with_ytdlp(self, url: str, provider: str = None) -> tuple[str, str]:
"""
Extract video URL using yt-dlp with proper referer.
This bypasses many blocking mechanisms.
"""
# Define referers for each provider
referers = {
'sendvid': 'https://sendvid.com/',
'vidmoly': 'https://vidmoly.biz/',
'sibnet': 'https://video.sibnet.ru/',
'lpayer': 'https://lpayer.embed4me.com/',
'dingtez': 'https://anime-sama.tv/',
'streamtape': 'https://streamtape.com/',
'voe': 'https://voe.sx/',
'doodstream': 'https://doodstream.com/',
}
# Determine referer
referer = 'https://anime-sama.tv/'
if provider:
referer = referers.get(provider.lower(), referer)
else:
for prov, ref in referers.items():
if prov in url.lower():
referer = ref
break
try:
cmd = [
'yt-dlp',
'--referer', referer,
'--skip-download',
'--dump-json',
'--no-warnings',
url
]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=30
)
if result.returncode == 0 and result.stdout:
data = json.loads(result.stdout)
if 'formats' in data:
formats = data['formats']
mp4_formats = [f for f in formats if f.get('ext') == 'mp4']
if mp4_formats:
video_url = mp4_formats[0].get('url')
else:
video_url = formats[0].get('url')
else:
video_url = data.get('url')
if video_url:
return video_url, f"{provider}_video.mp4" if provider else "video.mp4"
raise Exception(f"yt-dlp failed: {result.stderr}")
except subprocess.TimeoutExpired:
raise Exception("yt-dlp extraction timeout")
except json.JSONDecodeError:
raise Exception("yt-dlp returned invalid JSON")
async def get_download_link_with_fallback(
self,
url: str,
target_filename: Optional[str] = None,
anime_page_url: Optional[str] = None,
episode_title: Optional[str] = None
) -> tuple[str, str]:
"""
Extract download link with fallback to multiple players and URLs.
URL format: url1|url2|url3|anime_page_url|episode_title
Player priority: detected from URL -> cached -> vidmoly -> sendvid -> sibnet -> lpayer
Uses caching to remember working players per anime URL.
Validates each URL with _test_video_url() before returning.
Args:
url: Video player URL or pipe-separated URLs
target_filename: Optional target filename for the download
anime_page_url: URL of the anime page (for caching key)
episode_title: Episode title (for filename generation)
Returns:
Tuple of (video_url, filename)
Raises:
Exception: If all players fail
"""
# Define player priority list
player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer']
# Extract video URLs from pipe format if needed
# Format: url1|url2|url3|anime_page_url|episode_title
video_urls = []
if '|' in url:
parts = url.split('|')
# Last 2 parts are anime_page_url and episode_title (if present)
# Everything before is video URLs
if len(parts) >= 3:
# Multiple video URLs provided
video_urls = parts[:-2] # All but last 2 are video URLs
if parts[-2]:
anime_page_url = parts[-2]
if parts[-1]:
episode_title = parts[-1]
else:
video_urls = [parts[0]]
if len(parts) > 1 and 'anime-sama' in parts[1]:
anime_page_url = parts[1]
else:
video_urls = [url]
# Try each video URL in order (each may have different player)
last_error = None
for video_url in video_urls:
logger.info(f"Trying video URL: {video_url[:50]}...")
# Detect player type from URL
detected_player = None
url_lower = video_url.lower()
if 'vidmoly' in url_lower:
detected_player = 'vidmoly'
elif 'sendvid' in url_lower:
detected_player = 'sendvid'
elif 'sibnet' in url_lower:
detected_player = 'sibnet'
elif 'lpayer' in url_lower or 'embed' in url_lower:
detected_player = 'lpayer'
elif 'dingtez' in url_lower:
detected_player = 'lpayer' # Unknown player, try lpayer as fallback
logger.debug(f"Detected player from URL: {detected_player}")
# Determine which player to try first
cached_player = None
if anime_page_url and anime_page_url in self._working_players:
cached_player = self._working_players[anime_page_url]
logger.info(f"Using cached player '{cached_player}' for anime: {anime_page_url[:50]}...")
# Build player order: cached player first, then detected, then rest in priority order
player_order = []
if cached_player and cached_player in player_priority:
player_order.append(cached_player)
if detected_player and detected_player not in player_order and detected_player in player_priority:
player_order.append(detected_player)
for p in player_priority:
if p not in player_order:
player_order.append(p)
# Only iterate through all players if there are MULTIPLE video URLs
# Otherwise, just use the detected player (or first in priority)
if len(video_urls) == 1:
# Single URL - only try the detected player
if detected_player and detected_player in player_priority:
player_order = [detected_player]
else:
player_order = [player_priority[0]] # Just try first one
# Try each player for this video URL
for player_name in player_order:
try:
logger.info(f"Trying player: {player_name} for {video_url[:50]}...")
if player_name == 'vidmoly':
video_url_result, filename = await self._extract_from_vidmoly(
video_url, anime_page_url, episode_title
)
elif player_name == 'sendvid':
video_url_result, filename = await self._extract_from_sendvid(
video_url, anime_page_url, episode_title
)
elif player_name == 'sibnet':
video_url_result, filename = await self._extract_from_sibnet(
video_url, anime_page_url, episode_title
)
elif player_name == 'lpayer':
video_url_result, filename = await self._extract_from_lpayer_api(video_url)
# Validate the extracted URL
logger.info(f"Validating extracted URL from {player_name}...")
is_valid = await self._test_video_url(video_url_result)
if is_valid:
logger.info(f"SUCCESS: {player_name} returned valid video URL")
# Cache this working player for future requests
if anime_page_url:
self._working_players[anime_page_url] = player_name
logger.debug(f"Cached working player '{player_name}' for anime URL")
# Use target_filename if provided
if target_filename:
filename = target_filename
return video_url_result, filename
else:
logger.warning(f"FAILED: {player_name} returned invalid video URL (validation failed)")
last_error = f"{player_name} returned invalid URL"
continue
except Exception as e:
logger.warning(f"FAILED: {player_name} extraction failed: {str(e)}")
last_error = str(e)
continue
# All players failed
error_msg = f"All players failed. Last error: {last_error}"
logger.error(error_msg)
raise Exception(error_msg)
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
"""
Get list of episodes for an anime
@@ -842,15 +1218,15 @@ class AnimeSamaDownloader(BaseAnimeSite):
all_episodes_by_number[episode_num].extend(episode_urls)
# For each episode, use the first available URL
# (they are usually already in order of preference on the site)
# For each episode, use ALL available URLs (for fallback)
for episode_num in sorted(all_episodes_by_number.keys()):
available_urls = all_episodes_by_number[episode_num]
# Use the first available URL (the site usually lists them in preference order)
episode_url = available_urls[0]
# Use ALL available URLs (pipe-separated) for fallback
# Format: url1|url2|url3|anime_page_url|episode_title
episode_urls_separator = "|".join(available_urls)
episode_title = f'Episode {episode_num}'
combined_url = f"{episode_url}|{anime_url}|{episode_title}"
combined_url = f"{episode_urls_separator}|{anime_url}|{episode_title}"
episodes.append({
'episode': episode_num,
@@ -1109,3 +1485,202 @@ class AnimeSamaDownloader(BaseAnimeSite):
traceback.print_exc()
return []
async def _test_video_url(self, url: str) -> bool:
"""
Validate a video URL by downloading the first 10KB.
Returns True if HTTP 200 and valid data received, False otherwise.
Includes 10 second timeout handling.
"""
try:
logger.debug(f"Testing video URL: {url[:60]}...")
# Stream only first 10KB to validate the URL
response = await self.client.get(
url,
timeout=10.0,
headers={"Range": "bytes=0-10240"}
)
if response.status_code in (200, 206):
content_length = len(response.content)
if content_length > 0:
logger.info(f"Video URL validation SUCCESS: {url[:60]}... ({content_length} bytes)")
return True
else:
logger.warning(f"Video URL validation FAILED: Empty response for {url[:60]}...")
return False
else:
logger.warning(f"Video URL validation FAILED: HTTP {response.status_code} for {url[:60]}...")
return False
except httpx.TimeoutException:
logger.warning(f"Video URL validation FAILED: Timeout for {url[:60]}...")
return False
except httpx.ConnectError as e:
logger.warning(f"Video URL validation FAILED: Connection error for {url[:60]}...: {e}")
return False
except Exception as e:
logger.warning(f"Video URL validation FAILED: Error for {url[:60]}...: {e}")
return False
async def get_download_link_with_fallback(
self,
url: str,
target_filename: Optional[str] = None,
anime_page_url: Optional[str] = None,
episode_title: Optional[str] = None
) -> tuple[str, str]:
"""
Extract download link with fallback to multiple players and URLs.
URL format: url1|url2|url3|anime_page_url|episode_title
Player priority: detected from URL -> cached -> vidmoly -> sendvid -> sibnet -> lpayer
Uses caching to remember working players per anime URL.
Validates each URL with _test_video_url() before returning.
Args:
url: Video player URL or pipe-separated URLs
target_filename: Optional target filename for the download
anime_page_url: URL of the anime page (for caching key)
episode_title: Episode title (for filename generation)
Returns:
Tuple of (video_url, filename)
Raises:
Exception: If all players fail
"""
# Define player priority list
player_priority = ['vidmoly', 'sendvid', 'sibnet', 'lpayer']
# Extract video URLs from pipe format if needed
# Format: url1|url2|url3|anime_page_url|episode_title
video_urls = []
if '|' in url:
parts = url.split('|')
# Last 2 parts are anime_page_url and episode_title (if present)
# Everything before is video URLs
if len(parts) >= 3:
# Multiple video URLs provided
video_urls = parts[:-2] # All but last 2 are video URLs
if parts[-2]:
anime_page_url = parts[-2]
if parts[-1]:
episode_title = parts[-1]
else:
video_urls = [parts[0]]
if len(parts) > 1 and 'anime-sama' in parts[1]:
anime_page_url = parts[1]
else:
video_urls = [url]
# Try each video URL in order (each may have different player)
last_error = None
for video_url in video_urls:
logger.info(f"Trying video URL: {video_url[:50]}...")
# Detect player type from URL
detected_player = None
url_lower = video_url.lower()
if 'vidmoly' in url_lower:
detected_player = 'vidmoly'
elif 'sendvid' in url_lower:
detected_player = 'sendvid'
elif 'sibnet' in url_lower:
detected_player = 'sibnet'
elif 'lpayer' in url_lower:
detected_player = 'lpayer'
elif 'dingtez' in url_lower:
detected_player = 'dingtez'
url_lower = video_url.lower()
if 'vidmoly' in url_lower:
detected_player = 'vidmoly'
elif 'sendvid' in url_lower:
detected_player = 'sendvid'
elif 'sibnet' in url_lower:
detected_player = 'sibnet'
elif 'lpayer' in url_lower or 'embed' in url_lower:
detected_player = 'lpayer'
elif 'dingtez' in url_lower:
detected_player = 'lpayer' # Unknown player, try lpayer as fallback
logger.debug(f"Detected player from URL: {detected_player}")
# Determine which player to try first
cached_player = None
if anime_page_url and anime_page_url in self._working_players:
cached_player = self._working_players[anime_page_url]
logger.info(f"Using cached player '{cached_player}' for anime: {anime_page_url[:50]}...")
# Build player order: cached player first, then detected, then rest in priority order
player_order = []
if cached_player and cached_player in player_priority:
player_order.append(cached_player)
if detected_player and detected_player not in player_order and detected_player in player_priority:
player_order.append(detected_player)
for p in player_priority:
if p not in player_order:
player_order.append(p)
# Only try detected player if single video URL
if len(video_urls) == 1:
if detected_player and detected_player in player_priority:
player_order = [detected_player]
else:
player_order = [player_priority[0]]
logger.info(f"Player order: {player_order}")
# Try each player for this video URL
for player_name in player_order:
try:
logger.info(f"Trying player: {player_name} for {video_url[:50]}...")
if player_name == 'vidmoly':
video_url_result, filename = await self._extract_from_vidmoly(
video_url, anime_page_url, episode_title
)
elif player_name == 'sendvid':
video_url_result, filename = await self._extract_from_sendvid(
video_url, anime_page_url, episode_title
)
elif player_name == 'sibnet':
video_url_result, filename = await self._extract_from_sibnet(
video_url, anime_page_url, episode_title
)
elif player_name == 'lpayer':
video_url_result, filename = await self._extract_from_lpayer_api(video_url)
# Validate the extracted URL
logger.info(f"Validating extracted URL from {player_name}...")
is_valid = await self._test_video_url(video_url_result)
if is_valid:
logger.info(f"SUCCESS: {player_name} returned valid video URL")
# Cache this working player for future requests
if anime_page_url:
self._working_players[anime_page_url] = player_name
logger.debug(f"Cached working player '{player_name}' for anime URL")
# Use target_filename if provided
if target_filename:
filename = target_filename
return video_url_result, filename
else:
logger.warning(f"FAILED: {player_name} returned invalid video URL (validation failed)")
last_error = f"{player_name} returned invalid URL"
continue
except Exception as e:
logger.warning(f"FAILED: {player_name} extraction failed: {str(e)}")
last_error = str(e)
continue
# All players failed
error_msg = f"All players failed. Last error: {last_error}"
logger.error(error_msg)
raise Exception(error_msg)
+11 -2
View File
@@ -21,8 +21,17 @@ class BaseAnimeSite:
"""
def __init__(self):
# Initialize HTTP client directly
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True)
# Realistic browser headers to avoid blocking by video hosts
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9,fr;q=0.8",
"Referer": "https://anime-sama.tv/",
}
# Initialize HTTP client with browser headers
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True, headers=headers)
@abstractmethod
def can_handle(self, url: str) -> bool:
+118 -82
View File
@@ -1,20 +1,55 @@
from .base import BaseAnimeSite
from bs4 import BeautifulSoup
import re
from typing import Optional
from urllib.parse import urljoin
class NekoSamaDownloader(BaseAnimeSite):
"""Downloader for neko-sama.fr"""
"""Downloader for neko-sama.org (anime streaming via Gupy)
BASE_DOMAINS = ["neko-sama.fr", "nekosama.fr", "www.neko-sama.fr"]
NOTE: neko-sama.org now redirects to Gupy, which is a legal streaming search engine.
It does NOT host video content - it provides metadata about where to watch legally.
This provider can search and get metadata but cannot provide direct download links.
"""
BASE_DOMAINS = ["neko-sama.org", "www.neko-sama.org", "neko-sama.fr", "nekosama.fr", "www.gupy.fr", "gupy.fr"]
def can_handle(self, url: str) -> bool:
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
async def get_download_link(self, url: str) -> tuple[str, str]:
"""Extract download link from neko-sama URL"""
async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
"""
Extract download link from neko-sama URL.
NOTE: neko-sama.org/Gupy is a legal streaming search engine, NOT a video host.
This returns streaming platform information instead of direct video links.
"""
try:
# Check if this is a Gupy URL
if 'gupy.fr' in url or 'neko-sama.org' in url:
response = await self.client.get(url, follow_redirects=True)
soup = BeautifulSoup(response.text, 'lxml')
# Look for streaming platform links
streaming_links = []
for link in soup.find_all('a', href=True):
href = link.get('href', '')
if '/out/' in href:
text = link.get_text(strip=True)
if text and 'Regarder' in text:
streaming_links.append(f"{text}: {href}")
if streaming_links:
title_elem = soup.find('h1') or soup.find('title')
title = title_elem.get_text(strip=True).split('|')[0].strip() if title_elem else "Unknown"
info = "Available streaming platforms:\n" + "\n".join(streaming_links[:5])
filename = target_filename or f"{title}_streaming_info.txt"
return info, filename
raise Exception("No streaming links found - Gupy is a legal streaming search, not a video host")
# Legacy: try original method for other URLs
response = await self.client.get(url, follow_redirects=True)
soup = BeautifulSoup(response.text, 'lxml')
@@ -60,7 +95,7 @@ class NekoSamaDownloader(BaseAnimeSite):
filename = self._generate_filename(str(response.url))
return match, filename
raise Exception("Could not find video link")
raise Exception("Could not find video link - Neko-Sama/Gupy does not host video content")
except Exception as e:
raise Exception(f"Error extracting NekoSama link: {str(e)}")
@@ -80,11 +115,13 @@ class NekoSamaDownloader(BaseAnimeSite):
return filename.title()
async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
"""Get list of episodes for an anime."""
try:
response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml')
episodes = []
# Try to find episode links
episode_links = soup.find_all('a', href=re.compile(r'episode'))
for link in episode_links:
@@ -112,10 +149,7 @@ class NekoSamaDownloader(BaseAnimeSite):
return []
async def get_anime_metadata(self, anime_url: str) -> dict:
"""
Extract rich metadata from anime page
Returns synopsis, genres, rating, release year, studio, etc.
"""
"""Extract rich metadata from anime page."""
try:
print(f"[NEKO-SAMA] Extracting metadata from: {anime_url}")
response = await self.client.get(anime_url)
@@ -134,68 +168,55 @@ class NekoSamaDownloader(BaseAnimeSite):
'alternative_titles': []
}
# Extract synopsis
synopsis_selectors = [
'div.synopsis',
'div.description',
'div[class*="synopsis"]',
'div[class*="desc"]',
'p.synopsis',
'.anime-synopsis',
'.summary'
]
# Extract title and year from h1
title_elem = soup.find('h1')
if title_elem:
title_text = title_elem.get_text(strip=True)
# Extract year from title like "Naruto (2002)"
year_match = re.search(r'\((\d{4})\)', title_text)
if year_match:
metadata['release_year'] = int(year_match.group(1))
for selector in synopsis_selectors:
synopsis_elem = soup.select_one(selector)
# Extract synopsis - Gupy shows it as paragraphs
synopsis_elem = soup.find('p')
if synopsis_elem:
synopsis = synopsis_elem.get_text(strip=True)
if len(synopsis) > 50:
metadata['synopsis'] = synopsis
break
text = synopsis_elem.get_text(strip=True)
if len(text) > 50:
metadata['synopsis'] = text
# Extract genres
genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type', re.I))
# Extract genres from meta tags or links
genre_links = soup.find_all('a', href=re.compile(r'serie-|genre|tag'))
if genre_links:
metadata['genres'] = [link.get_text(strip=True) for link in genre_links[:5]]
genres = []
for link in genre_links[:5]:
text = link.get_text(strip=True)
if text and '/' not in text and len(text) < 30:
genres.append(text)
metadata['genres'] = genres
# Extract rating
rating_selectors = [
'span.rating',
'div.rating',
'span.score',
'div[class*="rating"]',
'div[class*="score"]'
]
for selector in rating_selectors:
rating_elem = soup.select_one(selector)
# Extract rating from percentage
rating_elem = soup.find(string=re.compile(r'\d+(\.\d+)?%'))
if rating_elem:
rating_text = rating_elem.get_text(strip=True)
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
if rating_match:
metadata['rating'] = f"{rating_match.group(1)}/10"
break
# Extract release year
page_text = soup.get_text()
year_matches = re.findall(r'\b(19\d{2}|20\d{2})\b', page_text)
if year_matches:
import datetime
current_year = datetime.datetime.now().year + 2
valid_years = [int(y) for y in year_matches if 1950 <= int(y) <= current_year]
if valid_years:
from collections import Counter
metadata['release_year'] = Counter(valid_years).most_common(1)[0][0]
match = re.search(r'(\d+(\.\d+)?)%', rating_elem)
if match:
rating = float(match.group(1)) / 10
metadata['rating'] = f"{rating:.1f}/10"
# Extract poster image
poster_elem = soup.select_one('img.poster, img.cover, .anime-poster img')
poster_elem = soup.find('img', src=re.compile(r'poster|poster'))
if poster_elem:
metadata['poster_image'] = poster_elem.get('src') or poster_elem.get('data-src')
metadata['poster_image'] = poster_elem.get('src')
# Extract total episodes
episodes_count = len(await self.get_episodes(anime_url))
if episodes_count > 0:
metadata['total_episodes'] = episodes_count
# Extract episode count from page text
page_text = soup.get_text()
ep_match = re.search(r'(\d+)\s*episodes?', page_text, re.I)
if ep_match:
metadata['total_episodes'] = int(ep_match.group(1))
# Extract studio/director
director_elem = soup.find('a', href=re.compile(r'person|réalisé'))
if director_elem:
metadata['studio'] = director_elem.get_text(strip=True)
print(f"[NEKO-SAMA] Extracted metadata: {metadata}")
return metadata
@@ -205,44 +226,59 @@ class NekoSamaDownloader(BaseAnimeSite):
return {}
async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
"""
Search for anime on neko-sama
Args:
query: Search query string
lang: Language preference (vostfr, vf)
include_metadata: Whether to fetch full metadata for each result (slower)
"""
"""Search for anime on neko-sama (uses Gupy backend)."""
try:
import time
from html import unescape
start = time.time()
print(f"[NEKO-SAMA] Searching for '{query}' ({lang})...")
# Neko-Sama URL pattern: https://neko-sama.fr/anime/{anime-name}
search_url = f"https://neko-sama.fr/anime/{query.lower().replace(' ', '-')}"
# Neko-Sama now uses Gupy - try the direct URL pattern
search_slug = query.lower().replace(' ', '-')
search_urls = [
f"https://www.gupy.fr/series/{search_slug}/",
f"https://neko-sama.org/series/{search_slug}/",
]
response = await self.client.get(search_url)
elapsed = time.time() - start
print(f"[NEKO-SAMA] Got response {response.status_code} in {elapsed:.2f}s")
results = []
for search_url in search_urls:
response = await self.client.get(search_url, follow_redirects=True)
print(f"[NEKO-SAMA] Tried {search_url} -> {response.status_code}")
if response.status_code == 200:
print(f"[NEKO-SAMA] Found anime at {str(response.url)}")
final_url = str(response.url)
print(f"[NEKO-SAMA] Found anime at {final_url}")
# Extract title from page
soup = BeautifulSoup(response.text, 'lxml')
title_elem = soup.find('h1') or soup.find('title')
title = unescape(title_elem.get_text(strip=True)) if title_elem else query
# Clean up title
title = title.split('|')[0].split('-')[0].strip()
result = {
'title': query,
'url': str(response.url),
'title': title,
'url': final_url,
'cover_image': None,
'type': 'direct',
'metadata': None
}
# Try to get poster
poster = soup.find('img', src=re.compile(r'poster'))
if poster:
result['cover_image'] = poster.get('src')
if include_metadata:
metadata = await self.get_anime_metadata(str(response.url))
metadata = await self.get_anime_metadata(final_url)
result['metadata'] = metadata
return [result]
results.append(result)
break
print(f"[NEKO-SAMA] No anime found")
return []
elapsed = time.time() - start
print(f"[NEKO-SAMA] Search completed in {elapsed:.2f}s, found {len(results)} results")
return results
except Exception as e:
print(f"[NEKO-SAMA] Error: {str(e)}")
+9 -2
View File
@@ -23,8 +23,15 @@ class BaseVideoPlayer:
"""
def __init__(self):
# Initialize HTTP client directly
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True)
# Realistic browser headers to avoid blocking by video hosts
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9,fr;q=0.8",
"Referer": "https://anime-sama.tv/",
}
# Initialize HTTP client with browser headers
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True, headers=headers)
@abstractmethod
def can_handle(self, url: str) -> bool:
+324 -44
View File
@@ -2,6 +2,8 @@ from .base import BaseVideoPlayer
from bs4 import BeautifulSoup
import re
import asyncio
from typing import Optional
import httpx
class LpayerDownloader(BaseVideoPlayer):
@@ -10,124 +12,160 @@ class LpayerDownloader(BaseVideoPlayer):
def can_handle(self, url: str) -> bool:
return 'lpayer.embed4me.com' in url.lower()
async def get_download_link(self, url: str) -> tuple[str, str]:
async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
"""
Extract download link from Lpayer video page
Lpayer uses a React app with dynamic JavaScript - requires Playwright
Extract download link from Lpayer video page.
Uses Playwright for JavaScript rendering, falls back to HTML parsing.
"""
try:
print(f"[LPAYER] Extracting link from: {url}")
# Try using Playwright to extract video URL
# Try Playwright first (handles JavaScript-rendered pages)
video_url = await self._extract_with_playwright(url)
if not video_url:
# Fallback to HTML parsing
print("[LPAYER] Playwright failed, trying HTML parsing fallback...")
video_url = await self._extract_with_http(url)
if not video_url:
raise Exception("Could not find video URL in Lpayer page")
print(f"[LPAYER] Found video URL: {video_url[:80]}...")
# Generate filename
# Use target_filename if provided, otherwise generate default
if target_filename:
filename = target_filename
else:
filename = "lpayer_video.mp4"
# Ensure .mp4 extension if direct MP4
if video_url.endswith('.mp4') and not filename.endswith('.mp4'):
filename += '.mp4'
return video_url, filename
except Exception as e:
raise Exception(f"Error extracting Lpayer link: {str(e)}")
async def _extract_with_playwright(self, url: str) -> str | None:
"""Extract video URL using Playwright with network interception"""
async def _extract_with_playwright(self, url: str) -> Optional[str]:
"""Extract video URL using Playwright to render JavaScript"""
browser = None
try:
from playwright.async_api import async_playwright
print("[LPAYER] Launching browser with network interception...")
print("[LPAYER] Launching Playwright browser...")
video_urls = []
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
'--disable-features=IsolateOrigins,site-per-process',
]
)
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport={'width': 1920, 'height': 1080}
)
page = await context.new_page()
# Set up request interception
# Set up request interception to capture video requests
async def handle_request(route):
req_url = route.request.url
# Look for video files
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
if 'lpayer' not in req_url.lower():
print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
video_urls.append(req_url)
await route.continue_()
await page.route('**', handle_request)
# Navigate to URL with timeout
print("[LPAYER] Navigating to page...")
try:
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
except Exception as e:
print(f"[LPAYER] Navigation warning: {e}")
# Wait for page to load
# Wait for JavaScript to execute
print("[LPAYER] Waiting for video player to load...")
await asyncio.sleep(5)
# Try to find and click play button
# Try to interact with player to trigger video load
try:
play_selectors = [
'button[aria-label="Play"]',
'.play-button',
'video',
]
for selector in play_selectors:
try:
element = await page.query_selector(selector)
if element:
print(f"[LPAYER] Found element: {selector}")
if 'button' in selector:
await element.click()
await page.mouse.click(640, 360)
await asyncio.sleep(3)
break
except:
continue
except Exception as e:
print(f"[LPAYER] Play button interaction: {e}")
pass
# Wait more for network requests
await asyncio.sleep(3)
# Try JavaScript extraction
# Try JavaScript extraction to find video URLs in DOM
try:
js_result = await page.evaluate("""
() => {
// Check all video elements
const videos = document.querySelectorAll('video');
for (let v of videos) {
if (v.src) {
if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
console.log('Found video src:', v.src);
return v.src;
}
const sources = v.querySelectorAll('source');
for (let s of sources) {
if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
console.log('Found source src:', s.src);
return s.src;
}
}
}
// Check window object for video URLs
// Check for jwplayer
if (window.jwplayer) {
try {
const player = jwplayer();
const playlist = player.getPlaylist();
if (playlist && playlist[0] && playlist[0].sources) {
const src = playlist[0].sources[0].file;
console.log('Found jwplayer source:', src);
return src;
}
} catch(e) {
console.log('jwplayer error:', e);
}
}
// Check for VidStack player
const player = document.querySelector('media-player');
if (player && player.provider) {
const provider = player.provider;
// Try to get source from provider
if (provider.src) return provider.src;
if (provider.currentSrc) return provider.currentSrc;
if (provider.url) return provider.url;
if (provider.videoUrl) return provider.videoUrl;
// Check internal properties
for (let key in provider) {
try {
const val = provider[key];
if (typeof val === 'string' && (val.includes('.m3u8') || val.includes('.mp4')) && val.startsWith('http')) {
return val;
}
} catch(e) {}
}
}
// Look for video URLs in window object
for (let key in window) {
if (typeof window[key] === 'string') {
const str = window[key];
if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
console.log('Found in window:', str);
return str;
}
}
@@ -143,12 +181,14 @@ class LpayerDownloader(BaseVideoPlayer):
except Exception as e:
print(f"[LPAYER] JS extraction error: {e}")
# Parse page HTML for video URLs
# Final check: parse rendered page HTML
try:
content = await page.content()
patterns = [
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
]
@@ -156,8 +196,162 @@ class LpayerDownloader(BaseVideoPlayer):
for pattern in patterns:
matches = re.findall(pattern, content)
for match in matches:
match = match.replace('\\', '').replace('\/', '/')
if 'http' in match and 'lpayer' not in match:
match = match.replace('\\', '').replace('\\/', '/')
if 'http' in match and 'lpayer' not in match.lower():
print(f"[LPAYER] Found in HTML: {match[:100]}...")
video_urls.append(match)
except Exception as e:
print(f"[LPAYER] HTML parsing error: {e}")
await browser.close()
browser = None
# Return first valid video URL
if video_urls:
seen = set()
unique_urls = []
for url in video_urls:
if url not in seen:
seen.add(url)
unique_urls.append(url)
if unique_urls:
print(f"[LPAYER] ✅ Found {len(unique_urls)} video URL(s)")
return unique_urls[0]
print("[LPAYER] ❌ No video URLs found")
return None
except ImportError:
print("[LPAYER] Playwright not installed")
return None
except Exception as e:
print(f"[LPAYER] Playwright error: {e}")
import traceback
traceback.print_exc()
return None
finally:
# Ensure browser is always closed
if browser:
try:
await browser.close()
except:
pass
"""Extract video URL using Playwright to render JavaScript"""
try:
from playwright.async_api import async_playwright
print("[LPAYER] Launching Playwright browser...")
video_urls = []
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
)
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
viewport={'width': 1920, 'height': 1080}
)
page = await context.new_page()
# Set up request interception to capture video requests
async def handle_request(route):
req_url = route.request.url
if any(ext in req_url.lower() for ext in ['.m3u8', '.mp4', '.mkv']):
if 'lpayer' not in req_url.lower():
print(f"[LPAYER] 🎥 Captured video URL: {req_url[:100]}...")
video_urls.append(req_url)
await route.continue_()
await page.route('**', handle_request)
# Navigate to URL with timeout
print("[LPAYER] Navigating to page...")
try:
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
except Exception as e:
print(f"[LPAYER] Navigation warning: {e}")
# Wait for JavaScript to execute and video to load
print("[LPAYER] Waiting for video player to load...")
await asyncio.sleep(5)
# Try JavaScript extraction to find video URLs in DOM
try:
js_result = await page.evaluate("""
() => {
// Check all video elements
const videos = document.querySelectorAll('video');
for (let v of videos) {
if (v.src && (v.src.includes('.m3u8') || v.src.includes('.mp4'))) {
console.log('Found video src:', v.src);
return v.src;
}
const sources = v.querySelectorAll('source');
for (let s of sources) {
if (s.src && (s.src.includes('.m3u8') || s.src.includes('.mp4'))) {
console.log('Found source src:', s.src);
return s.src;
}
}
}
// Check for jwplayer
if (window.jwplayer) {
try {
const player = jwplayer();
const playlist = player.getPlaylist();
if (playlist && playlist[0] && playlist[0].sources) {
const src = playlist[0].sources[0].file;
console.log('Found jwplayer source:', src);
return src;
}
} catch(e) {
console.log('jwplayer error:', e);
}
}
// Look for video URLs in window object
for (let key in window) {
if (typeof window[key] === 'string') {
const str = window[key];
if ((str.includes('.m3u8') || str.includes('.mp4')) && str.startsWith('http')) {
console.log('Found in window:', str);
return str;
}
}
}
return null;
}
""")
if js_result and ('.m3u8' in js_result or '.mp4' in js_result):
print(f"[LPAYER] Found video URL via JavaScript")
video_urls.append(js_result)
except Exception as e:
print(f"[LPAYER] JS extraction error: {e}")
# Final check: parse rendered page HTML
try:
content = await page.content()
patterns = [
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
r"'file'\s*:\s*'([^']+\.m3u8[^']*)'",
r"'file'\s*:\s*'([^']+\.mp4[^']*)'",
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
]
for pattern in patterns:
matches = re.findall(pattern, content)
for match in matches:
match = match.replace('\\', '').replace('\\/', '/')
if 'http' in match and 'lpayer' not in match.lower():
print(f"[LPAYER] Found in HTML: {match[:100]}...")
video_urls.append(match)
except Exception as e:
@@ -189,3 +383,89 @@ class LpayerDownloader(BaseVideoPlayer):
import traceback
traceback.print_exc()
return None
async def _extract_with_http(self, url: str) -> Optional[str]:
"""Fallback: Extract video source using pure HTTP requests"""
try:
response = await self.client.get(url)
response.raise_for_status()
html_content = response.text
return self._extract_video_from_html(html_content)
except Exception as e:
print(f"[LPAYER] HTTP extraction error: {e}")
return None
def _extract_video_from_html(self, html_content: str) -> Optional[str]:
"""
Extract video URL from HTML using BeautifulSoup parsing
Looks for video URLs in this priority:
1. <video src="URL"> tags
2. <source src="URL"> tags
3. Direct URLs in page content with video extensions (.mp4, .m3u8)
Returns first valid URL found, or None if not found
"""
try:
soup = BeautifulSoup(html_content, 'lxml')
# Priority 1: Look for <video src="..."> tags
video_tags = soup.find_all('video')
for video in video_tags:
src = video.get('src')
if src and self._is_valid_video_url(src):
print(f"[LPAYER] Found video in <video> tag: {src[:80]}...")
return src
# Priority 2: Look for <source src="..."> tags
source_tags = soup.find_all('source')
for source in source_tags:
src = source.get('src')
if src and self._is_valid_video_url(src):
print(f"[LPAYER] Found video in <source> tag: {src[:80]}...")
return src
# Priority 3: Look for direct URLs in page content
patterns = [
r'"file"\s*:\s*"([^"]+\.m3u8[^"]*)"',
r'"file"\s*:\s*"([^"]+\.mp4[^"]*)"',
r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)',
r'(https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*)',
]
for pattern in patterns:
matches = re.findall(pattern, html_content)
for match in matches:
match = match.replace('\\', '').replace(r'\/', '/')
if self._is_valid_video_url(match):
print(f"[LPAYER] Found video in content: {match[:80]}...")
return match
print("[LPAYER] No video URL found in HTML")
return None
except Exception as e:
print(f"[LPAYER] HTML parsing error: {e}")
return None
def _is_valid_video_url(self, url: str) -> bool:
"""
Check if URL is a valid video URL
Valid if:
- Starts with http:// or https://
- Contains .mp4 or .m3u8 extension
"""
if not url:
return False
# Must be http(s) URL
if not url.startswith('http'):
return False
# Must contain video extension
url_lower = url.lower()
if '.mp4' not in url_lower and '.m3u8' not in url_lower:
return False
return True
+1 -1
View File
@@ -303,7 +303,7 @@ class VidMolyDownloader(BaseVideoPlayer):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
'Referer': 'https://vidmoly.to/',
'Referer': 'https://vidmoly.biz/',
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.9',
}
+3
View File
@@ -26,3 +26,6 @@ bcrypt<4.0
# Scheduler for auto-download
apscheduler==3.11.0
# Cryptography for AES decryption
pycryptodome==3.20.0
+273
View File
@@ -0,0 +1,273 @@
"""
Unit tests for Anime-Sama fallback mechanism
Tests player priority, caching, and URL validation
"""
import pytest
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from httpx import TimeoutException, ConnectError
from app.downloaders.anime_sites.animesama import AnimeSamaDownloader
class TestAnimeSamaFallback:
"""Tests for Anime-Sama fallback mechanism"""
@pytest.fixture
def downloader(self):
"""Create AnimeSamaDownloader instance"""
return AnimeSamaDownloader()
@pytest.mark.asyncio
async def test_fallback_tries_players_in_priority_order(self, downloader):
"""
Test that fallback tries players in priority order:
VidMoly -> SendVid -> Sibnet -> Lpayer
"""
# Mock each player extraction method
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_extract_from_sibnet') as mock_sibnet, \
patch.object(downloader, '_extract_from_lpayer') as mock_lpayer, \
patch.object(downloader, '_test_video_url', new_callable=AsyncMock) as mock_test_url:
# Make vidmoly and sendvid fail, sibnet succeed
mock_vidmoly.side_effect = Exception("VidMoly failed")
mock_sendvid.side_effect = Exception("SendVid failed")
mock_sibnet.return_value = ("http://sibnet.com/video.mp4", "video.mp4")
mock_lpayer.return_value = ("http://lpayer.com/video.mp4", "video.mp4")
# Make validation pass for sibnet
mock_test_url.return_value = True
result = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url="https://anime-sama.si/catalogue/test/vostfr/"
)
# Verify player order was correct
assert mock_vidmoly.called, "VidMoly should be tried first"
assert mock_sendvid.called, "SendVid should be tried second"
assert mock_sibnet.called, "Sibnet should be tried third"
assert not mock_lpayer.called, "Lpayer should not be called since Sibnet succeeded"
assert result == ("http://sibnet.com/video.mp4", "video.mp4")
@pytest.mark.asyncio
async def test_caching_mechanism_stores_working_player(self, downloader):
"""
Test that caching mechanism stores working player for same anime URL.
After first successful player, subsequent requests should use cached player first.
"""
# Setup: First request - vidmoly fails, sendvid succeeds
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_extract_from_sibnet') as mock_sibnet, \
patch.object(downloader, '_extract_from_lpayer') as mock_lpayer, \
patch.object(downloader, '_test_video_url', new_callable=AsyncMock) as mock_test_url:
# First request: vidmoly fails, sendvid succeeds
mock_vidmoly.side_effect = Exception("VidMoly failed")
mock_sendvid.return_value = ("http://sendvid.com/video.mp4", "video.mp4")
mock_test_url.return_value = True
anime_url = "https://anime-sama.si/catalogue/test/vostfr/"
result1 = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url=anime_url
)
# Verify caching worked
assert anime_url in downloader._working_players
assert downloader._working_players[anime_url] == "sendvid"
# Reset mocks for second request
mock_vidmoly.reset_mock()
mock_sendvid.reset_mock()
mock_sibnet.reset_mock()
mock_lpayer.reset_mock()
# Second request: Should try sendvid first (cached)
mock_sendvid.return_value = ("http://sendvid.com/video2.mp4", "video2.mp4")
mock_test_url.return_value = True
result2 = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url=anime_url
)
# Verify sendvid was tried first (due to cache)
assert mock_sendvid.call_count == 1, "Cached player (sendvid) should be tried first"
@pytest.mark.asyncio
async def test_all_players_failing_raises_exception(self, downloader):
"""
Test that when all players fail, an exception is raised with proper error message.
"""
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_extract_from_sibnet') as mock_sibnet, \
patch.object(downloader, '_extract_from_lpayer') as mock_lpayer:
# All players fail
mock_vidmoly.side_effect = Exception("VidMoly error")
mock_sendvid.side_effect = Exception("SendVid error")
mock_sibnet.side_effect = Exception("Sibnet error")
mock_lpayer.side_effect = Exception("Lpayer error")
anime_url = "https://anime-sama.si/catalogue/test/vostfr/"
with pytest.raises(Exception) as exc_info:
await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url=anime_url
)
# Verify error message mentions all players failed
assert "All players failed" in str(exc_info.value)
# Verify all players were tried
assert mock_vidmoly.called
assert mock_sendvid.called
assert mock_sibnet.called
assert mock_lpayer.called
@pytest.mark.asyncio
async def test_test_video_url_returns_true_for_valid_url(self, downloader):
"""
Test that _test_video_url returns True for valid video URL (HTTP 200 with content).
"""
# Mock the client to return valid response
mock_response = Mock()
mock_response.status_code = 200
mock_response.content = b"video content data"
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.return_value = mock_response
result = await downloader._test_video_url("http://example.com/video.mp4")
assert result is True
mock_get.assert_called_once()
# Verify Range header was included
call_args = mock_get.call_args
assert "Range" in call_args.kwargs.get("headers", {})
@pytest.mark.asyncio
async def test_test_video_url_returns_false_for_invalid_url(self, downloader):
"""
Test that _test_video_url returns False for invalid/non-working URL.
"""
# Test case 1: HTTP error status
mock_response = Mock()
mock_response.status_code = 404
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.return_value = mock_response
result = await downloader._test_video_url("http://example.com/notfound.mp4")
assert result is False
@pytest.mark.asyncio
async def test_test_video_url_returns_false_for_empty_response(self, downloader):
"""
Test that _test_video_url returns False for empty response content.
"""
mock_response = Mock()
mock_response.status_code = 200
mock_response.content = b"" # Empty content
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.return_value = mock_response
result = await downloader._test_video_url("http://example.com/empty.mp4")
assert result is False
@pytest.mark.asyncio
async def test_test_video_url_returns_false_for_timeout(self, downloader):
"""
Test that _test_video_url returns False for timeout.
"""
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.side_effect = TimeoutException("Request timeout")
result = await downloader._test_video_url("http://example.com/slow.mp4")
assert result is False
@pytest.mark.asyncio
async def test_test_video_url_returns_false_for_connection_error(self, downloader):
"""
Test that _test_video_url returns False for connection error.
"""
with patch.object(downloader.client, 'get', new_callable=AsyncMock) as mock_get:
mock_get.side_effect = ConnectError("Connection failed")
result = await downloader._test_video_url("http://example.com/badhost.mp4")
assert result is False
@pytest.mark.asyncio
async def test_fallback_skips_invalid_player_url(self, downloader):
"""
Test that fallback skips players that return invalid URLs (validation fails).
"""
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_extract_from_sibnet') as mock_sibnet, \
patch.object(downloader, '_test_video_url', new_callable=AsyncMock) as mock_test_url:
# Vidmoly returns URL but validation fails
mock_vidmoly.return_value = ("http://vidmoly.com/video.mp4", "video.mp4")
# SendVid returns URL and validation passes
mock_sendvid.return_value = ("http://sendvid.com/video.mp4", "video.mp4")
mock_sibnet.return_value = ("http://sibnet.com/video.mp4", "video.mp4")
# First call (vidmoly): validation fails
# Second call (sendvid): validation passes
# Third call (sibnet): not called because sendvid succeeded
mock_test_url.side_effect = [False, True]
result = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test",
anime_page_url="https://anime-sama.si/catalogue/test/vostfr/"
)
# Verify validation was called for vidmoly
assert mock_test_url.call_count >= 1
# Verify sendvid was also tried after vidmoly failed validation
assert mock_sendvid.called
@pytest.mark.asyncio
async def test_cache_not_used_without_anime_page_url(self, downloader):
"""
Test that caching is not used when anime_page_url is not provided.
"""
with patch.object(downloader, '_extract_from_vidmoly') as mock_vidmoly, \
patch.object(downloader, '_extract_from_sendvid') as mock_sendvid, \
patch.object(downloader, '_test_video_url', new_callable=AsyncMock) as mock_test_url:
# First request: no anime_page_url, vidmoly succeeds
mock_vidmoly.return_value = ("http://vidmoly.com/video.mp4", "video.mp4")
mock_test_url.return_value = True
result1 = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test"
)
# Cache should be empty (no anime_page_url provided)
assert len(downloader._working_players) == 0
# Second request: still no anime_page_url, should not use cache
mock_vidmoly.reset_mock()
result2 = await downloader.get_download_link_with_fallback(
"http://vidmoly.to/test"
)
# Vidmoly should still be called (no cache used)
assert mock_vidmoly.call_count == 1