Files
ohm_streaming/app/providers_manager.py
Kimi Agent 520be53901
CI / Test (Python 3.11) (push) Has been cancelled
CI / Test (Python 3.12) (push) Has been cancelled
CI / Lint (push) Has been cancelled
CI / Type Check (push) Has been cancelled
CI / Summary (push) Has been cancelled
fix: migrations, auth, providers health check, E2E tests, remove neko-sama
- Add proper Alembic initial migration (0001_initial_schema.py)
- Migrate refresh tokens from JSON file to SQLite (RefreshTokenTable)
- Remove Neko-Sama provider entirely (redirects to Gupy, not a host)
- Fix provider health check always showing UNKNOWN
  - Run check_all_health() on startup
  - Fix POST /providers/health/check background task bug
  - Add HTMX refresh after manual health check trigger
- Fix anime search relevance scoring with MIN_RELEVANCE_THRESHOLD=0.5
- Replace bare 'except:' with 'except Exception:' across codebase
- Add Playwright E2E test suite (12 tests, auth setup, helpers)
- Fix toast container blocking clicks via pointer-events: none
- Remove obsolete Jest/Vite test files and config
- Clean up obsolete test_watchlist scripts
- Update sonarr model comment for active providers
2026-05-12 11:45:56 +00:00

172 lines
6.8 KiB
Python

"""Manages scraper providers and their health status"""
import os
import logging
import asyncio
from typing import Dict, List, Optional
from pathlib import Path
from datetime import datetime
from app.downloaders.generic_scraper import GenericScraper
from app.downloaders.anime_sites import (
AnimeSamaDownloader,
AnimeUltimeDownloader,
VostfreeDownloader,
FrenchMangaDownloader,
)
from app.downloaders.series_sites import (
FS7Downloader,
ZoneTelechargementDownloader,
)
from app.providers import ANIME_PROVIDERS, SERIES_PROVIDERS
logger = logging.getLogger(__name__)
class ProvidersManager:
"""Registry and health manager for scraping providers"""
def __init__(self, config_dir: str = "app/downloaders/providers_config"):
self.config_dir = Path(config_dir)
self.providers: Dict[str, object] = {}
self.provider_info: Dict[str, Dict] = {}
self.health_status: Dict[str, Dict] = {}
self._load_yaml_providers()
self._load_hardcoded_providers()
def _load_yaml_providers(self):
"""Load all providers from YAML configs"""
if not self.config_dir.exists():
logger.warning(f"Providers config directory not found: {self.config_dir}")
return
for config_file in self.config_dir.glob("*.yaml"):
try:
scraper = GenericScraper(str(config_file))
self.providers[scraper.id] = scraper
self.health_status[scraper.id] = {
"status": "unknown",
"last_check": None,
"error": None,
}
logger.info(f"Loaded YAML provider: {scraper.name} ({scraper.id})")
except Exception as e:
logger.error(f"Failed to load provider from {config_file}: {e}")
def _load_hardcoded_providers(self):
"""Load hardcoded Python providers"""
provider_classes = [
("anime-sama", AnimeSamaDownloader, ANIME_PROVIDERS),
("anime-ultime", AnimeUltimeDownloader, ANIME_PROVIDERS),
("vostfree", VostfreeDownloader, ANIME_PROVIDERS),
("french-manga", FrenchMangaDownloader, ANIME_PROVIDERS),
("fs7", FS7Downloader, SERIES_PROVIDERS),
("zonetelechargement", ZoneTelechargementDownloader, SERIES_PROVIDERS),
]
for provider_id, provider_class, provider_dict in provider_classes:
if provider_id in provider_dict:
try:
self.providers[provider_id] = provider_class()
self.provider_info[provider_id] = provider_dict[provider_id]
self.health_status[provider_id] = {
"status": "unknown",
"last_check": None,
"error": None,
}
logger.info(f"Loaded hardcoded provider: {provider_id}")
except Exception as e:
logger.error(f"Failed to load provider {provider_id}: {e}")
async def check_all_health(self):
"""Check health of all registered providers"""
logger.info("Checking health of all providers...")
tasks = []
for provider_id, scraper in self.providers.items():
tasks.append(self._check_single_health(provider_id, scraper))
await asyncio.gather(*tasks)
logger.info("Provider health check complete")
async def _check_single_health(self, provider_id: str, scraper):
"""Check health of a single provider and update status"""
try:
is_healthy = await self._do_health_check(scraper)
self.health_status[provider_id] = {
"status": "up" if is_healthy else "down",
"last_check": datetime.now().isoformat(),
"error": None if is_healthy else "No search results returned",
}
except Exception as e:
self.health_status[provider_id] = {
"status": "down",
"last_check": datetime.now().isoformat(),
"error": str(e),
}
logger.error(f"Health check failed for {provider_id}: {e}")
async def _do_health_check(self, scraper) -> bool:
"""Perform health check on a scraper"""
try:
if hasattr(scraper, "check_health"):
return await scraper.check_health()
elif hasattr(scraper, "client"):
# Test basic connectivity
base_url = getattr(scraper, "base_url", None) or getattr(
scraper, "active_url", None
)
if base_url:
if hasattr(scraper, "_ensure_base_url"):
await scraper._ensure_base_url()
base_url = getattr(scraper, "base_url", base_url)
response = await scraper.client.get(base_url, timeout=15.0)
return 200 <= response.status_code < 400
elif hasattr(scraper, "BASE_DOMAINS") and scraper.BASE_DOMAINS:
# Test first domain from BASE_DOMAINS
test_url = f"https://{scraper.BASE_DOMAINS[0]}"
response = await scraper.client.get(test_url, timeout=15.0)
return 200 <= response.status_code < 400
elif hasattr(scraper, "search_anime"):
results = await scraper.search_anime("One Piece", lang="vostfr")
# Validate that results actually match the query
if not results:
return False
for r in results:
title = (r.get("title") or "").lower()
if "one" in title or "piece" in title:
return True
return False
elif hasattr(scraper, "search"):
results = await scraper.search("One Piece")
if not results:
return False
for r in results:
title = (r.get("title") or "").lower()
if "one" in title or "piece" in title:
return True
return False
return False
except Exception as e:
logger.error(
f"Health check exception for {getattr(scraper, 'provider_id', scraper)}: {e}"
)
return False
def get_provider(self, provider_id: str):
return self.providers.get(provider_id)
def get_active_providers(self) -> List:
"""Return only providers that are UP or UNKNOWN"""
return [
self.providers[pid]
for pid, status in self.health_status.items()
if status["status"] != "down"
]
def get_all_status(self) -> Dict[str, Dict]:
return self.health_status
# Global instance
providers_manager = ProvidersManager()