feat: robust scraping DSL and health monitoring (Phase 2)
CI / Test (Python 3.11) (push) Has been cancelled
CI / Test (Python 3.12) (push) Has been cancelled
CI / Lint (push) Has been cancelled
CI / Type Check (push) Has been cancelled
CI / Summary (push) Has been cancelled

- Implemented YAML-driven GenericScraper for resilient scraping
- Added ProvidersManager to manage scraper health and active providers
- Modernized unified search with systematic Kitsu metadata enrichment
- Integrated automated health checks in the scheduler
- Added comprehensive tests for scraping DSL and provider health
This commit is contained in:
root
2026-03-24 10:57:19 +00:00
parent 29c7040b20
commit 2b4cc617cb
8 changed files with 535 additions and 268 deletions
+29 -1
View File
@@ -9,6 +9,7 @@ from apscheduler.triggers.interval import IntervalTrigger
from app.watchlist import watchlist_manager, WatchlistManager
from app.episode_checker import EpisodeChecker, episode_checker
from app.providers_manager import providers_manager
logger = logging.getLogger(__name__)
@@ -23,6 +24,7 @@ class AutoDownloadScheduler:
):
self.wlm = wlm or watchlist_manager
self.checker = checker or episode_checker
self.providers_mgr = providers_manager
self.scheduler: Optional[AsyncIOScheduler] = None
self._running = False
@@ -46,6 +48,14 @@ class AutoDownloadScheduler:
except Exception as e:
logger.error(f"Error in scheduled check job: {e}", exc_info=True)
async def _health_check_job(self):
"""Job function that runs periodically to check provider health"""
try:
logger.info("Running scheduled provider health check...")
await self.providers_mgr.check_all_health()
except Exception as e:
logger.error(f"Error in health check job: {e}")
def start(self):
"""Start the scheduler"""
if self._running:
@@ -59,7 +69,7 @@ class AutoDownloadScheduler:
settings = self.wlm.get_settings()
interval_hours = settings.check_interval_hours
# Add the job
# Add the job for episode checking
self.scheduler.add_job(
self._check_job,
trigger=IntervalTrigger(hours=interval_hours),
@@ -68,6 +78,15 @@ class AutoDownloadScheduler:
replace_existing=True
)
# Add the job for provider health check (every 6 hours)
self.scheduler.add_job(
self._health_check_job,
trigger=IntervalTrigger(hours=6),
id='provider_health',
name='Check provider health',
replace_existing=True
)
# Start the scheduler
self.scheduler.start()
self._running = True
@@ -149,6 +168,15 @@ class AutoDownloadScheduler:
logger.error(f"Error in manual check: {e}", exc_info=True)
raise
async def trigger_health_check_now(self):
"""Manually trigger a health check now"""
logger.info("Manually triggering provider health check...")
try:
await self._health_check_job()
except Exception as e:
logger.error(f"Error in manual health check: {e}")
raise
# Global scheduler instance
auto_download_scheduler = AutoDownloadScheduler()