fix: detect episode format by URL count variance, add anime-sama.tv to domains

This commit is contained in:
root
2026-02-24 21:53:03 +00:00
parent 414a89b7a5
commit 2e0af00278
+25 -6
View File
@@ -12,7 +12,7 @@ class AnimeSamaDownloader(BaseAnimeSite):
"""Downloader for anime-sama.org / anime-sama.store"""
# Static list of known domains (will be updated dynamically)
BASE_DOMAINS = ["anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
BASE_DOMAINS = ["anime-sama.tv", "anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
@classmethod
async def get_current_domain(cls) -> str:
@@ -778,14 +778,33 @@ class AnimeSamaDownloader(BaseAnimeSite):
if eps_matches:
# Determine the format by looking at the data
# If eps1 has many URLs (> 10), it's Format A (each array is a source with all episodes)
# If eps1 has few URLs (< 10), it's Format B (each array is an episode with multiple sources)
# Format A: each epsX array is one source with all episodes (few arrays, many URLs each)
# Format B: each epsX array is one episode with multiple sources (many arrays or similar counts)
# Parse eps1 to check
eps1_urls = re.findall(r"'(https?://[^']+)'", eps_matches[0][1])
is_format_a = len(eps1_urls) > 10 # More than 10 URLs in eps1 = Format A
num_episode_arrays = len(eps_matches)
logger.debug(f"Detected format {'A (source-based)' if is_format_a else 'B (episode-based)'} - eps1 has {len(eps1_urls)} URLs")
is_format_a = True # Default
if num_episode_arrays > 5:
# Many arrays = Format B (each array = one episode)
is_format_a = False
elif num_episode_arrays >= 2:
# Check URL counts - if similar, it's Format B
url_counts = []
for eps_num, urls_text in eps_matches:
urls = re.findall(r"'(https?://[^']+)'", urls_text)
url_counts.append(len(urls))
if url_counts and max(url_counts) > 0:
avg_count = sum(url_counts) / len(url_counts)
variance = max(url_counts) / avg_count if avg_count > 0 else 1
# Similar counts (< 1.5x variance) = Format B
if variance < 1.5:
is_format_a = False
logger.debug(f"Detected format {'A (source-based)' if is_format_a else 'B (episode-based)'} - {num_episode_arrays} arrays")
# No more host preference! Just collect all available URLs for each episode
# The download system will automatically detect and use the appropriate downloader