"""Manages scraper providers and their health status""" import os import logging import asyncio from typing import Dict, List, Optional from pathlib import Path from datetime import datetime from app.downloaders.generic_scraper import GenericScraper from app.downloaders.anime_sites import ( AnimeSamaDownloader, NekoSamaDownloader, AnimeUltimeDownloader, VostfreeDownloader, FrenchMangaDownloader, ) from app.downloaders.series_sites import ( FS7Downloader, ZoneTelechargementDownloader, ) from app.providers import ANIME_PROVIDERS, SERIES_PROVIDERS logger = logging.getLogger(__name__) class ProvidersManager: """Registry and health manager for scraping providers""" def __init__(self, config_dir: str = "app/downloaders/providers_config"): self.config_dir = Path(config_dir) self.providers: Dict[str, object] = {} self.provider_info: Dict[str, Dict] = {} self.health_status: Dict[str, Dict] = {} self._load_yaml_providers() self._load_hardcoded_providers() def _load_yaml_providers(self): """Load all providers from YAML configs""" if not self.config_dir.exists(): logger.warning(f"Providers config directory not found: {self.config_dir}") return for config_file in self.config_dir.glob("*.yaml"): try: scraper = GenericScraper(str(config_file)) self.providers[scraper.id] = scraper self.health_status[scraper.id] = { "status": "unknown", "last_check": None, "error": None, } logger.info(f"Loaded YAML provider: {scraper.name} ({scraper.id})") except Exception as e: logger.error(f"Failed to load provider from {config_file}: {e}") def _load_hardcoded_providers(self): """Load hardcoded Python providers""" provider_classes = [ ("anime-sama", AnimeSamaDownloader, ANIME_PROVIDERS), ("neko-sama", NekoSamaDownloader, ANIME_PROVIDERS), ("anime-ultime", AnimeUltimeDownloader, ANIME_PROVIDERS), ("vostfree", VostfreeDownloader, ANIME_PROVIDERS), ("french-manga", FrenchMangaDownloader, ANIME_PROVIDERS), ("fs7", FS7Downloader, SERIES_PROVIDERS), ("zonetelechargement", ZoneTelechargementDownloader, SERIES_PROVIDERS), ] for provider_id, provider_class, provider_dict in provider_classes: if provider_id in provider_dict: try: self.providers[provider_id] = provider_class() self.provider_info[provider_id] = provider_dict[provider_id] self.health_status[provider_id] = { "status": "unknown", "last_check": None, "error": None, } logger.info(f"Loaded hardcoded provider: {provider_id}") except Exception as e: logger.error(f"Failed to load provider {provider_id}: {e}") async def check_all_health(self): """Check health of all registered providers""" logger.info("Checking health of all providers...") tasks = [] for provider_id, scraper in self.providers.items(): tasks.append(self._check_single_health(provider_id, scraper)) await asyncio.gather(*tasks) logger.info("Provider health check complete") async def _check_single_health(self, provider_id: str, scraper): """Check health of a single provider and update status""" try: is_healthy = await self._do_health_check(scraper) self.health_status[provider_id] = { "status": "up" if is_healthy else "down", "last_check": datetime.now().isoformat(), "error": None if is_healthy else "No search results returned", } except Exception as e: self.health_status[provider_id] = { "status": "down", "last_check": datetime.now().isoformat(), "error": str(e), } logger.error(f"Health check failed for {provider_id}: {e}") async def _do_health_check(self, scraper) -> bool: """Perform health check on a scraper""" try: if hasattr(scraper, "check_health"): return await scraper.check_health() elif hasattr(scraper, "client"): # Test basic connectivity base_url = getattr(scraper, "base_url", None) or getattr( scraper, "active_url", None ) if base_url: if hasattr(scraper, "_ensure_base_url"): await scraper._ensure_base_url() base_url = getattr(scraper, "base_url", base_url) response = await scraper.client.get(base_url, timeout=15.0) return 200 <= response.status_code < 400 elif hasattr(scraper, "BASE_DOMAINS") and scraper.BASE_DOMAINS: # Test first domain from BASE_DOMAINS test_url = f"https://{scraper.BASE_DOMAINS[0]}" response = await scraper.client.get(test_url, timeout=15.0) return 200 <= response.status_code < 400 elif hasattr(scraper, "search_anime"): results = await scraper.search_anime("One Piece", lang="vostfr") return len(results) > 0 elif hasattr(scraper, "search"): results = await scraper.search("One Piece") return len(results) > 0 return False except Exception as e: logger.error( f"Health check exception for {getattr(scraper, 'provider_id', scraper)}: {e}" ) return False def get_provider(self, provider_id: str): return self.providers.get(provider_id) def get_active_providers(self) -> List: """Return only providers that are UP or UNKNOWN""" return [ self.providers[pid] for pid, status in self.health_status.items() if status["status"] != "down" ] def get_all_status(self) -> Dict[str, Dict]: return self.health_status # Global instance providers_manager = ProvidersManager()