diff --git a/app/downloaders/anime_sites/animesama.py b/app/downloaders/anime_sites/animesama.py index 9a18862..523b939 100644 --- a/app/downloaders/anime_sites/animesama.py +++ b/app/downloaders/anime_sites/animesama.py @@ -778,33 +778,42 @@ class AnimeSamaDownloader(BaseAnimeSite): if eps_matches: # Determine the format by looking at the data - # Format A: each epsX array is one source with all episodes (few arrays, many URLs each) - # Format B: each epsX array is one episode with multiple sources (many arrays or similar counts) + # Format A: each epsX array is one SOURCE with all episodes (different domains per array) + # Format B: each epsX array is one EPISODE with multiple sources (same domains across arrays) eps1_urls = re.findall(r"'(https?://[^']+)'", eps_matches[0][1]) num_episode_arrays = len(eps_matches) is_format_a = True # Default - if num_episode_arrays > 5: - # Many arrays = Format B (each array = one episode) - is_format_a = False - elif num_episode_arrays >= 2: - # Check URL counts - if similar, it's Format B - url_counts = [] + if num_episode_arrays >= 2: + # Extract domains from first URLs of each array + def get_domain(url): + return url.split('/')[2] if '/' in url else url + + domains_per_array = [] for eps_num, urls_text in eps_matches: urls = re.findall(r"'(https?://[^']+)'", urls_text) - url_counts.append(len(urls)) + if urls: + domains = set(get_domain(u) for u in urls[:3]) # Sample first 3 + domains_per_array.append(domains) - if url_counts and max(url_counts) > 0: - avg_count = sum(url_counts) / len(url_counts) - variance = max(url_counts) / avg_count if avg_count > 0 else 1 - - # Similar counts (< 1.5x variance) = Format B - if variance < 1.5: - is_format_a = False + # Check if domains are different across arrays + # If each array has completely different domains → Format A (each = source) + # If arrays share domains → Format B (each = episode with multiple sources) + all_domains = set() + for domains in domains_per_array: + all_domains.update(domains) + + # If total unique domains ≈ sum of domains per array → Format A + # If total unique domains << sum of domains per array → Format B (shared) + total_domain_count = sum(len(d) for d in domains_per_array) + if len(all_domains) < total_domain_count * 0.7: + # Domains are shared across arrays → Format B + is_format_a = False - logger.debug(f"Detected format {'A (source-based)' if is_format_a else 'B (episode-based)'} - {num_episode_arrays} arrays") + + # No more host preference! # No more host preference! Just collect all available URLs for each episode # The download system will automatically detect and use the appropriate downloader