Files
ohm_streaming/app/routers/router_anime.py
T
root 2b4cc617cb
CI / Test (Python 3.11) (push) Has been cancelled
CI / Test (Python 3.12) (push) Has been cancelled
CI / Lint (push) Has been cancelled
CI / Type Check (push) Has been cancelled
CI / Summary (push) Has been cancelled
feat: robust scraping DSL and health monitoring (Phase 2)
- Implemented YAML-driven GenericScraper for resilient scraping
- Added ProvidersManager to manage scraper health and active providers
- Modernized unified search with systematic Kitsu metadata enrichment
- Integrated automated health checks in the scheduler
- Added comprehensive tests for scraping DSL and provider health
2026-03-24 10:57:19 +00:00

377 lines
13 KiB
Python

"""
Anime and series search routes for Ohm Stream Downloader API.
Endpoints:
- GET /api/anime/search - Search across all anime providers (Modernized with Kitsu)
- GET /api/series/search - Search across all TV series providers
- GET /api/anime/metadata - Get detailed metadata for a specific anime
- GET /api/anime/episodes - Get list of episodes for an anime
- GET /api/anime/providers - Get list of anime providers
- GET /api/providers/health - Get provider health status
- POST /api/providers/health/check - Trigger health check
- POST /api/anime/download - Download an anime episode
- POST /api/anime/download-season - Download all episodes of a season
- GET /api/anime/seasons - Get list of seasons for an anime
- GET /api/anime/mal/search - Search for anime on MyAnimeList
- GET /api/anime/mal/{mal_id} - Get full details by MyAnimeList ID
- POST /api/translate - Translate text from English to French
"""
import json
import re
import time
import logging
import asyncio
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request
from app.download_manager import DownloadManager
from app.downloaders import (
AnimeSamaDownloader,
AnimeUltimeDownloader,
NekoSamaDownloader,
VostfreeDownloader,
get_downloader,
)
from app.models import DownloadRequest
from app.providers import get_anime_providers, get_series_providers
from app.providers_manager import providers_manager
from app.metadata_enrichment import get_metadata_enricher
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api", tags=["anime"])
@router.get("/providers/health")
async def get_providers_health():
"""Get the current health status of all providers"""
return providers_manager.get_all_status()
@router.post("/providers/health/check")
async def trigger_providers_health_check(background_tasks: BackgroundTasks):
"""Trigger a manual health check of all providers in the background"""
from app.auto_download_scheduler import auto_download_scheduler
background_tasks.add_task(auto_download_scheduler.trigger_health_check_now)
return {"status": "Health check triggered in background"}
def get_download_manager() -> DownloadManager:
"""Get the download manager instance from main app"""
from main import download_manager
return download_manager
# ==================== ANIME SEARCH ====================
@router.get("/anime/search")
async def search_anime_unified(
q: str,
lang: str = "vostfr",
include_metadata: bool = False,
):
"""
Search across all anime providers using MetadataEnricher and health checks.
Results are grouped by provider for legacy UI compatibility.
"""
print(f"\n[SEARCH] Starting modern unified search for '{q}' in {lang}")
start_time = time.time()
results = {}
# 1. Prepare search tasks (Generic + Legacy)
search_tasks = []
task_metadata = []
# Generic YAML providers
active_generic = providers_manager.get_active_providers()
for provider in active_generic:
print(f"[SEARCH] Queueing generic provider: {provider.name}")
search_tasks.append(provider.search(q))
task_metadata.append({"id": provider.id, "type": "generic"})
# Legacy providers (until migrated to YAML)
legacy_downloaders = {
"anime-ultime": AnimeUltimeDownloader(),
"neko-sama": NekoSamaDownloader(),
"vostfree": VostfreeDownloader(),
}
for pid, dl in legacy_downloaders.items():
print(f"[SEARCH] Queueing legacy provider: {pid}")
search_tasks.append(dl.search_anime(q, lang, include_metadata=False))
task_metadata.append({"id": pid, "type": "legacy"})
# 2. Run searches in parallel
print(f"[SEARCH] Waiting for {len(search_tasks)} provider results...")
all_raw_results = await asyncio.gather(*search_tasks, return_exceptions=True)
# 3. Organize results by provider
seen_urls = set()
enricher = await get_metadata_enricher()
enrichment_tasks = []
# Map task indices to result slots for re-injection after enrichment
enrichment_mapping = [] # List of (provider_id, index_in_provider_results)
for i, raw_result in enumerate(all_raw_results):
provider_info = task_metadata[i]
pid = provider_info["id"]
if isinstance(raw_result, Exception):
logger.error(f"Search failed for {pid}: {raw_result}")
continue
if not raw_result:
continue
if pid not in results:
results[pid] = []
for item in raw_result:
# Normalize to dict
item_dict = item.model_dump() if hasattr(item, "model_dump") else item
url = item_dict.get("url")
if url and url not in seen_urls:
seen_urls.add(url)
# Check relevance simple boost
if q.lower() in (item_dict.get("title") or "").lower():
item_dict["_relevance_boost"] = 1.0
else:
item_dict["_relevance_boost"] = 0.5
results[pid].append(item_dict)
# Prepare enrichment task for top 5 results per provider
if len(results[pid]) <= 5:
enrichment_tasks.append(
enricher.enrich_metadata(
item_dict.get("metadata", {}),
item_dict.get("title", ""),
url
)
)
enrichment_mapping.append((pid, len(results[pid]) - 1))
else:
if "metadata" not in item_dict:
item_dict["metadata"] = {}
# 4. Perform parallel enrichment
if enrichment_tasks:
print(f"[SEARCH] Enriching {len(enrichment_tasks)} top results via Kitsu...")
enriched_metas = await asyncio.gather(*enrichment_tasks, return_exceptions=True)
# Re-inject enriched metadata
for idx, (pid, pos) in enumerate(enrichment_mapping):
if idx < len(enriched_metas):
meta = enriched_metas[idx]
if not isinstance(meta, Exception) and meta:
results[pid][pos]["metadata"] = meta.model_dump()
# 5. Sort results by relevance per provider
for pid in results:
results[pid].sort(key=lambda x: -x.get("_relevance_boost", 0))
for item in results[pid]:
item.pop("_relevance_boost", None)
elapsed = time.time() - start_time
total_found = sum(len(r) for r in results.values())
print(f"[SEARCH] Finished in {elapsed:.2f}s. Found {total_found} unique results across {len(results)} providers.")
return {
"query": q,
"lang": lang,
"include_metadata": include_metadata,
"results": results,
}
@router.get("/series/search")
async def search_series_unified(
q: str,
lang: str = "vf",
):
"""
Search across all TV series providers (FS7, etc.)
"""
import asyncio
from app.downloaders.series_sites import FS7Downloader
print(f"\n[SERIES SEARCH] Starting search for '{q}' in {lang}")
start_time = time.time()
results = {}
series_downloaders = {"fs7": FS7Downloader()}
search_tasks = []
provider_ids = []
for provider_id, provider in get_series_providers().items():
if provider_id in series_downloaders:
downloader = series_downloaders[provider_id]
print(f"[SERIES SEARCH] Queueing search on {provider_id}...")
search_tasks.append(downloader.search_anime(q, lang))
provider_ids.append(provider_id)
print(f"[SERIES SEARCH] Waiting for {len(search_tasks)} searches...")
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
for provider_id, result in zip(provider_ids, search_results):
if isinstance(result, Exception):
print(f"[SERIES SEARCH] {provider_id} error: {str(result)}")
elif result:
print(f"[SERIES SEARCH] {provider_id} found {len(result)} results")
results[provider_id] = result
elapsed = time.time() - start_time
print(f"[SERIES SEARCH] Completed in {elapsed:.2f}s\n")
return {"query": q, "lang": lang, "results": results}
@router.get("/anime/metadata")
async def get_anime_metadata(url: str):
"""Get detailed metadata for a specific anime"""
try:
downloader = get_downloader(url)
if hasattr(downloader, "get_anime_metadata"):
metadata = await downloader.get_anime_metadata(url)
return {"url": url, "metadata": metadata}
else:
raise HTTPException(
status_code=400,
detail=f"Downloader for {url} does not support metadata extraction",
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/anime/episodes")
async def get_anime_episodes(
url: str,
lang: str = "vostfr",
):
"""Get list of episodes for an anime"""
downloader = get_downloader(url)
episodes = await downloader.get_episodes(url, lang)
return {"url": url, "lang": lang, "episodes": episodes}
@router.get("/anime/providers")
async def get_anime_providers_list():
"""Get list of anime providers with info"""
return {"providers": get_anime_providers()}
@router.get("/anime-sama/search")
async def search_anime_sama(
q: str,
lang: str = "vostfr",
):
"""Search for anime on anime-sama (legacy)"""
downloader = AnimeSamaDownloader()
results = await downloader.search_anime(q, lang)
return {"query": q, "lang": lang, "results": results}
@router.post("/anime/download")
async def download_anime_episode(
url: str,
background_tasks: BackgroundTasks,
episode: str | None = None,
download_manager: DownloadManager = Depends(get_download_manager),
):
"""Download an anime episode"""
if episode and "episode-" not in url and "|" not in url:
url = f"{url.rstrip('/')}/episode-{episode}"
request = DownloadRequest(url=url)
task = download_manager.create_task(request)
background_tasks.add_task(download_manager.start_download, task.id)
return {"task_id": task.id, "task": task}
@router.post("/anime/download-season")
async def download_anime_season(
url: str,
background_tasks: BackgroundTasks,
lang: str = "vostfr",
download_manager: DownloadManager = Depends(get_download_manager),
):
"""Download all episodes of an anime season"""
downloader = get_downloader(url)
episodes = await downloader.get_episodes(url, lang)
if not episodes:
raise HTTPException(status_code=404, detail="No episodes found")
task_ids = []
for episode in episodes:
request = DownloadRequest(url=episode["url"])
task = download_manager.create_task(request)
task_ids.append(task.id)
background_tasks.add_task(download_manager.start_download, task.id)
return {
"message": f"Started downloading {len(task_ids)} episodes",
"task_ids": task_ids,
"total_episodes": len(episodes),
}
@router.get("/anime/seasons")
async def get_anime_seasons(url: str):
"""Get list of seasons for an anime"""
downloader = get_downloader(url)
if hasattr(downloader, "get_seasons"):
seasons = await downloader.get_seasons(url)
return {"seasons": seasons or []}
return {"seasons": [], "message": "Season info not available for this provider"}
@router.get("/anime/mal/search")
async def search_anime_mal_details(
q: str = Query(..., description="Anime search query"),
limit: int = Query(5, description="Number of results"),
):
"""Search for anime on MyAnimeList and get full details"""
from app.recommendations import AnimeReleasesFetcher
fetcher = AnimeReleasesFetcher()
try:
search_results = await fetcher.search_anime(q, limit=limit)
if not search_results:
return {"anime": None, "message": "No anime found"}
main_anime = search_results[0]
anime_details = await fetcher.get_anime_details(main_anime["mal_id"])
return {
"anime": anime_details,
"alternatives": search_results[1:],
"total_results": len(search_results),
}
finally:
await fetcher.close()
@router.post("/translate")
async def translate_text(request: Request):
"""Translate text from English to French using Google Translate"""
import httpx
try:
body = await request.json()
text = body.get("text", "")
if not text:
raise HTTPException(status_code=400, detail="Text is required")
async with httpx.AsyncClient(timeout=30.0) as client:
url = "https://translate.googleapis.com/translate_a/single"
params = {"client": "gtx", "sl": "en", "tl": "fr", "dt": "t", "q": text[:5000]}
response = await client.get(url, params=params)
if response.status_code == 200:
data = response.json()
if data and data[0]:
translated = "".join([item[0] for item in data[0] if item[0]])
return {"translatedText": translated, "status": "success"}
raise HTTPException(status_code=500, detail="Translation failed")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")