520be53901
- Add proper Alembic initial migration (0001_initial_schema.py) - Migrate refresh tokens from JSON file to SQLite (RefreshTokenTable) - Remove Neko-Sama provider entirely (redirects to Gupy, not a host) - Fix provider health check always showing UNKNOWN - Run check_all_health() on startup - Fix POST /providers/health/check background task bug - Add HTMX refresh after manual health check trigger - Fix anime search relevance scoring with MIN_RELEVANCE_THRESHOLD=0.5 - Replace bare 'except:' with 'except Exception:' across codebase - Add Playwright E2E test suite (12 tests, auth setup, helpers) - Fix toast container blocking clicks via pointer-events: none - Remove obsolete Jest/Vite test files and config - Clean up obsolete test_watchlist scripts - Update sonarr model comment for active providers
172 lines
6.8 KiB
Python
172 lines
6.8 KiB
Python
"""Manages scraper providers and their health status"""
|
|
|
|
import os
|
|
import logging
|
|
import asyncio
|
|
from typing import Dict, List, Optional
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
from app.downloaders.generic_scraper import GenericScraper
|
|
from app.downloaders.anime_sites import (
|
|
AnimeSamaDownloader,
|
|
AnimeUltimeDownloader,
|
|
VostfreeDownloader,
|
|
FrenchMangaDownloader,
|
|
)
|
|
from app.downloaders.series_sites import (
|
|
FS7Downloader,
|
|
ZoneTelechargementDownloader,
|
|
)
|
|
from app.providers import ANIME_PROVIDERS, SERIES_PROVIDERS
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ProvidersManager:
|
|
"""Registry and health manager for scraping providers"""
|
|
|
|
def __init__(self, config_dir: str = "app/downloaders/providers_config"):
|
|
self.config_dir = Path(config_dir)
|
|
self.providers: Dict[str, object] = {}
|
|
self.provider_info: Dict[str, Dict] = {}
|
|
self.health_status: Dict[str, Dict] = {}
|
|
self._load_yaml_providers()
|
|
self._load_hardcoded_providers()
|
|
|
|
def _load_yaml_providers(self):
|
|
"""Load all providers from YAML configs"""
|
|
if not self.config_dir.exists():
|
|
logger.warning(f"Providers config directory not found: {self.config_dir}")
|
|
return
|
|
|
|
for config_file in self.config_dir.glob("*.yaml"):
|
|
try:
|
|
scraper = GenericScraper(str(config_file))
|
|
self.providers[scraper.id] = scraper
|
|
self.health_status[scraper.id] = {
|
|
"status": "unknown",
|
|
"last_check": None,
|
|
"error": None,
|
|
}
|
|
logger.info(f"Loaded YAML provider: {scraper.name} ({scraper.id})")
|
|
except Exception as e:
|
|
logger.error(f"Failed to load provider from {config_file}: {e}")
|
|
|
|
def _load_hardcoded_providers(self):
|
|
"""Load hardcoded Python providers"""
|
|
provider_classes = [
|
|
("anime-sama", AnimeSamaDownloader, ANIME_PROVIDERS),
|
|
("anime-ultime", AnimeUltimeDownloader, ANIME_PROVIDERS),
|
|
("vostfree", VostfreeDownloader, ANIME_PROVIDERS),
|
|
("french-manga", FrenchMangaDownloader, ANIME_PROVIDERS),
|
|
("fs7", FS7Downloader, SERIES_PROVIDERS),
|
|
("zonetelechargement", ZoneTelechargementDownloader, SERIES_PROVIDERS),
|
|
]
|
|
|
|
for provider_id, provider_class, provider_dict in provider_classes:
|
|
if provider_id in provider_dict:
|
|
try:
|
|
self.providers[provider_id] = provider_class()
|
|
self.provider_info[provider_id] = provider_dict[provider_id]
|
|
self.health_status[provider_id] = {
|
|
"status": "unknown",
|
|
"last_check": None,
|
|
"error": None,
|
|
}
|
|
logger.info(f"Loaded hardcoded provider: {provider_id}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to load provider {provider_id}: {e}")
|
|
|
|
async def check_all_health(self):
|
|
"""Check health of all registered providers"""
|
|
logger.info("Checking health of all providers...")
|
|
tasks = []
|
|
for provider_id, scraper in self.providers.items():
|
|
tasks.append(self._check_single_health(provider_id, scraper))
|
|
|
|
await asyncio.gather(*tasks)
|
|
logger.info("Provider health check complete")
|
|
|
|
async def _check_single_health(self, provider_id: str, scraper):
|
|
"""Check health of a single provider and update status"""
|
|
try:
|
|
is_healthy = await self._do_health_check(scraper)
|
|
self.health_status[provider_id] = {
|
|
"status": "up" if is_healthy else "down",
|
|
"last_check": datetime.now().isoformat(),
|
|
"error": None if is_healthy else "No search results returned",
|
|
}
|
|
except Exception as e:
|
|
self.health_status[provider_id] = {
|
|
"status": "down",
|
|
"last_check": datetime.now().isoformat(),
|
|
"error": str(e),
|
|
}
|
|
logger.error(f"Health check failed for {provider_id}: {e}")
|
|
|
|
async def _do_health_check(self, scraper) -> bool:
|
|
"""Perform health check on a scraper"""
|
|
try:
|
|
if hasattr(scraper, "check_health"):
|
|
return await scraper.check_health()
|
|
elif hasattr(scraper, "client"):
|
|
# Test basic connectivity
|
|
base_url = getattr(scraper, "base_url", None) or getattr(
|
|
scraper, "active_url", None
|
|
)
|
|
if base_url:
|
|
if hasattr(scraper, "_ensure_base_url"):
|
|
await scraper._ensure_base_url()
|
|
base_url = getattr(scraper, "base_url", base_url)
|
|
response = await scraper.client.get(base_url, timeout=15.0)
|
|
return 200 <= response.status_code < 400
|
|
elif hasattr(scraper, "BASE_DOMAINS") and scraper.BASE_DOMAINS:
|
|
# Test first domain from BASE_DOMAINS
|
|
test_url = f"https://{scraper.BASE_DOMAINS[0]}"
|
|
response = await scraper.client.get(test_url, timeout=15.0)
|
|
return 200 <= response.status_code < 400
|
|
elif hasattr(scraper, "search_anime"):
|
|
results = await scraper.search_anime("One Piece", lang="vostfr")
|
|
# Validate that results actually match the query
|
|
if not results:
|
|
return False
|
|
for r in results:
|
|
title = (r.get("title") or "").lower()
|
|
if "one" in title or "piece" in title:
|
|
return True
|
|
return False
|
|
elif hasattr(scraper, "search"):
|
|
results = await scraper.search("One Piece")
|
|
if not results:
|
|
return False
|
|
for r in results:
|
|
title = (r.get("title") or "").lower()
|
|
if "one" in title or "piece" in title:
|
|
return True
|
|
return False
|
|
return False
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Health check exception for {getattr(scraper, 'provider_id', scraper)}: {e}"
|
|
)
|
|
return False
|
|
|
|
def get_provider(self, provider_id: str):
|
|
return self.providers.get(provider_id)
|
|
|
|
def get_active_providers(self) -> List:
|
|
"""Return only providers that are UP or UNKNOWN"""
|
|
return [
|
|
self.providers[pid]
|
|
for pid, status in self.health_status.items()
|
|
if status["status"] != "down"
|
|
]
|
|
|
|
def get_all_status(self) -> Dict[str, Dict]:
|
|
return self.health_status
|
|
|
|
|
|
# Global instance
|
|
providers_manager = ProvidersManager()
|