diff --git a/app/metadata_enrichment.py b/app/metadata_enrichment.py index 4401c60..d3fdefb 100644 --- a/app/metadata_enrichment.py +++ b/app/metadata_enrichment.py @@ -7,6 +7,7 @@ This module provides intelligent metadata enrichment by: 3. Normalizing data formats across providers 4. Caching enriched metadata to reduce API calls """ + import asyncio import logging from typing import Dict, Optional, List, Set @@ -15,6 +16,7 @@ from pathlib import Path import json import hashlib +import httpx from app.kitsu_api import KitsuAPI from app.models import AnimeMetadata @@ -30,9 +32,15 @@ class MetadataEnricher: # Fields that Kitsu can provide as fallback # Note: studio is not included as Kitsu API requires separate calls KITSU_FIELDS = { - 'synopsis', 'genres', 'rating', 'release_year', - 'poster_image', 'banner_image', 'total_episodes', 'status', - 'alternative_titles' + "synopsis", + "genres", + "rating", + "release_year", + "poster_image", + "banner_image", + "total_episodes", + "status", + "alternative_titles", } # Cache duration in hours @@ -52,14 +60,15 @@ class MetadataEnricher: """Load metadata cache from disk.""" try: if self.cache_file.exists(): - with open(self.cache_file, 'r', encoding='utf-8') as f: + with open(self.cache_file, "r", encoding="utf-8") as f: data = json.load(f) # Filter out expired entries now = datetime.now() self._cache = { - k: v for k, v in data.items() - if datetime.fromisoformat(v.get('cached_at', '')) > - now - timedelta(hours=self.CACHE_DURATION_HOURS) + k: v + for k, v in data.items() + if datetime.fromisoformat(v.get("cached_at", "")) + > now - timedelta(hours=self.CACHE_DURATION_HOURS) } logger.info(f"Loaded {len(self._cache)} cached metadata entries") except Exception as e: @@ -73,7 +82,7 @@ class MetadataEnricher: try: self.cache_dir.mkdir(parents=True, exist_ok=True) - with open(self.cache_file, 'w', encoding='utf-8') as f: + with open(self.cache_file, "w", encoding="utf-8") as f: json.dump(self._cache, f, ensure_ascii=False, indent=2) self._cache_dirty = False logger.debug("Saved metadata cache") @@ -90,10 +99,10 @@ class MetadataEnricher: """Get cached metadata if available and not expired.""" if cache_key in self._cache: entry = self._cache[cache_key] - cached_at = datetime.fromisoformat(entry.get('cached_at', '')) + cached_at = datetime.fromisoformat(entry.get("cached_at", "")) if cached_at > datetime.now() - timedelta(hours=self.CACHE_DURATION_HOURS): logger.debug(f"Cache hit for key: {cache_key}") - return entry.get('metadata') + return entry.get("metadata") else: # Remove expired entry del self._cache[cache_key] @@ -103,8 +112,8 @@ class MetadataEnricher: def _set_cached_metadata(self, cache_key: str, metadata: Dict): """Cache enriched metadata.""" self._cache[cache_key] = { - 'metadata': metadata, - 'cached_at': datetime.now().isoformat() + "metadata": metadata, + "cached_at": datetime.now().isoformat(), } self._cache_dirty = True @@ -113,7 +122,7 @@ class MetadataEnricher: provider_metadata: Dict, title: str, url: Optional[str] = None, - use_kitsu_fallback: bool = True + use_kitsu_fallback: bool = True, ) -> AnimeMetadata: """ Enrich provider metadata with Kitsu API fallback. @@ -140,7 +149,9 @@ class MetadataEnricher: missing_fields = self._get_missing_fields(enriched) if missing_fields and use_kitsu_fallback: - logger.info(f"Missing fields for '{title}': {missing_fields} - fetching from Kitsu") + logger.info( + f"Missing fields for '{title}': {missing_fields} - fetching from Kitsu" + ) try: # Fetch from Kitsu kitsu_metadata = await self._fetch_from_kitsu(title) @@ -148,19 +159,27 @@ class MetadataEnricher: if kitsu_metadata: # Merge Kitsu data enriched = self._merge_metadata(enriched, kitsu_metadata) - enriched['_kitsu_enriched'] = True - enriched['_enriched_fields'] = list(missing_fields) + enriched["_kitsu_enriched"] = True + enriched["_enriched_fields"] = list(missing_fields) except Exception as e: logger.warning(f"Failed to fetch Kitsu metadata for '{title}': {e}") + # Translate synopsis to French + synopsis = enriched.get("synopsis") + if synopsis and len(synopsis) > 20: + enriched["synopsis"] = await self._translate_to_french(synopsis) + # Calculate quality score - enriched['_quality_score'] = self._calculate_quality_score(enriched) + enriched["_quality_score"] = self._calculate_quality_score(enriched) # Convert to AnimeMetadata - result = AnimeMetadata(**{ - k: v for k, v in enriched.items() - if not k.startswith('_') # Exclude internal fields - }) + result = AnimeMetadata( + **{ + k: v + for k, v in enriched.items() + if not k.startswith("_") # Exclude internal fields + } + ) # Cache the result self._set_cached_metadata(cache_key, result.model_dump()) @@ -176,7 +195,7 @@ class MetadataEnricher: missing = set() for field in self.KITSU_FIELDS: value = metadata.get(field) - if value is None or value == [] or value == '': + if value is None or value == [] or value == "": missing.add(field) return missing @@ -202,68 +221,79 @@ class MetadataEnricher: metadata = {} # Synopsis - if kitsu_data.get('synopsis'): - metadata['synopsis'] = kitsu_data['synopsis'] + if kitsu_data.get("synopsis"): + metadata["synopsis"] = kitsu_data["synopsis"] # Genres - if kitsu_data.get('genres'): - metadata['genres'] = kitsu_data['genres'] + if kitsu_data.get("genres"): + metadata["genres"] = kitsu_data["genres"] # Rating (Kitsu returns score out of 10, convert to string) - if kitsu_data.get('score'): - score = kitsu_data['score'] + if kitsu_data.get("score"): + score = kitsu_data["score"] if score > 0: - metadata['rating'] = f"{score:.1f}/10" + metadata["rating"] = f"{score:.1f}/10" # Release year - if kitsu_data.get('year'): - metadata['release_year'] = kitsu_data['year'] + if kitsu_data.get("year"): + metadata["release_year"] = kitsu_data["year"] # Poster image - if kitsu_data.get('images', {}).get('jpg', {}).get('large_image_url'): - metadata['poster_image'] = kitsu_data['images']['jpg']['large_image_url'] - elif kitsu_data.get('images', {}).get('jpg', {}).get('image_url'): - metadata['poster_image'] = kitsu_data['images']['jpg']['image_url'] + if kitsu_data.get("images", {}).get("jpg", {}).get("large_image_url"): + metadata["poster_image"] = kitsu_data["images"]["jpg"]["large_image_url"] + elif kitsu_data.get("images", {}).get("jpg", {}).get("image_url"): + metadata["poster_image"] = kitsu_data["images"]["jpg"]["image_url"] # Banner image (Kitsu calls it coverImage) # Note: Kitsu API structure doesn't clearly separate poster vs banner, # but we can use different sizes if available - if kitsu_data.get('images', {}).get('webp', {}).get('large_image_url'): - metadata['banner_image'] = kitsu_data['images']['webp']['large_image_url'] + if kitsu_data.get("images", {}).get("webp", {}).get("large_image_url"): + metadata["banner_image"] = kitsu_data["images"]["webp"]["large_image_url"] # Total episodes - if kitsu_data.get('episodes'): - metadata['total_episodes'] = kitsu_data['episodes'] + if kitsu_data.get("episodes"): + metadata["total_episodes"] = kitsu_data["episodes"] # Status - if kitsu_data.get('status'): + if kitsu_data.get("status"): # Translate Kitsu status to our format status_map = { - 'Airing': 'Ongoing', - 'Finished Airing': 'Completed', - 'To Be Aired': 'Upcoming' + "Airing": "Ongoing", + "Finished Airing": "Completed", + "To Be Aired": "Upcoming", } - metadata['status'] = status_map.get( - kitsu_data['status'], - kitsu_data['status'] + metadata["status"] = status_map.get( + kitsu_data["status"], kitsu_data["status"] ) # Alternative titles alt_titles = [] - if kitsu_data.get('title_japanese'): - alt_titles.append(kitsu_data['title_japanese']) - if kitsu_data.get('title_english'): - alt_titles.append(kitsu_data['title_english']) + if kitsu_data.get("title_japanese"): + alt_titles.append(kitsu_data["title_japanese"]) + if kitsu_data.get("title_english"): + alt_titles.append(kitsu_data["title_english"]) if alt_titles: - metadata['alternative_titles'] = alt_titles + metadata["alternative_titles"] = alt_titles return metadata - def _merge_metadata( - self, - provider_metadata: Dict, - kitsu_metadata: Dict - ) -> Dict: + async def _translate_to_french(self, text: str) -> str: + """Translate text to French using MyMemory API (free, no key needed).""" + try: + async with httpx.AsyncClient(timeout=15.0) as client: + response = await client.get( + "https://api.mymemory.translated.net/get", + params={"q": text[:490], "langpair": "en|fr"}, + ) + data = response.json() + translated = data.get("responseData", {}).get("translatedText", "") + if translated and translated.lower() != text[: len(translated)].lower(): + return translated + except Exception as e: + logger.debug(f"Translation failed, using original: {e}") + return text + + def _merge_metadata(self, provider_metadata: Dict, kitsu_metadata: Dict) -> Dict: """ Merge provider and Kitsu metadata, preferring provider data. @@ -285,16 +315,16 @@ class MetadataEnricher: Based on completeness of critical fields. """ weights = { - 'synopsis': 0.2, - 'genres': 0.15, - 'rating': 0.1, - 'release_year': 0.1, - 'studio': 0.1, - 'poster_image': 0.15, - 'banner_image': 0.05, - 'total_episodes': 0.05, - 'status': 0.05, - 'alternative_titles': 0.05 + "synopsis": 0.2, + "genres": 0.15, + "rating": 0.1, + "release_year": 0.1, + "studio": 0.1, + "poster_image": 0.15, + "banner_image": 0.05, + "total_episodes": 0.05, + "status": 0.05, + "alternative_titles": 0.05, } total_weight = sum(weights.values()) @@ -318,9 +348,7 @@ class MetadataEnricher: return round(score / total_weight, 2) if total_weight > 0 else 0.0 async def enrich_search_results( - self, - results: List[Dict], - use_kitsu_fallback: bool = True + self, results: List[Dict], use_kitsu_fallback: bool = True ) -> List[Dict]: """ Enrich metadata for a list of search results. @@ -338,22 +366,21 @@ class MetadataEnricher: enrichment_tasks = [] for result in results: # Skip if no metadata - will add later in order - if 'metadata' not in result: + if "metadata" not in result: continue task = self.enrich_metadata( - provider_metadata=result['metadata'], - title=result.get('title', ''), - url=result.get('url'), - use_kitsu_fallback=use_kitsu_fallback + provider_metadata=result["metadata"], + title=result.get("title", ""), + url=result.get("url"), + use_kitsu_fallback=use_kitsu_fallback, ) enrichment_tasks.append(task) # Wait for all enrichment tasks if enrichment_tasks: enriched_metadata_list = await asyncio.gather( - *enrichment_tasks, - return_exceptions=True + *enrichment_tasks, return_exceptions=True ) # Update results with enriched metadata @@ -361,7 +388,7 @@ class MetadataEnricher: temp_results = {} metadata_idx = 0 for i, result in enumerate(results): - if 'metadata' in result: + if "metadata" in result: enriched_meta = enriched_metadata_list[metadata_idx] if isinstance(enriched_meta, Exception): @@ -372,7 +399,7 @@ class MetadataEnricher: result_copy = result.copy() else: result_copy = result.copy() - result_copy['metadata'] = enriched_meta.model_dump() + result_copy["metadata"] = enriched_meta.model_dump() temp_results[i] = result_copy metadata_idx += 1 diff --git a/templates/components/anime_search_results.html b/templates/components/anime_search_results.html index 6343880..b0efa1a 100644 --- a/templates/components/anime_search_results.html +++ b/templates/components/anime_search_results.html @@ -49,7 +49,7 @@ {% if group.synopsis %} -

{{ group.synopsis[:200] }}{% if group.synopsis | length > 200 %}...{% endif %}

+

{{ group.synopsis }}

{% endif %} {% if group.genres %} @@ -125,7 +125,7 @@ .sr-top { display: flex; align-items: baseline; gap: 12px; } .sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .sr-rating { flex-shrink: 0; font-size: 0.8rem; font-weight: 700; color: #ffcc00; } -.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; } +.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; } .sr-tags { display: flex; flex-wrap: wrap; gap: 4px; margin: 0; } .sr-tag { font-size: 0.65rem; font-weight: 600; padding: 2px 8px; border-radius: 4px; background: rgba(255,255,255,0.06); color: var(--text-dim); } .sr-providers { display: flex; flex-wrap: wrap; gap: 6px; } diff --git a/templates/components/series_search_results.html b/templates/components/series_search_results.html index 62ae650..70e2dbd 100644 --- a/templates/components/series_search_results.html +++ b/templates/components/series_search_results.html @@ -36,7 +36,7 @@

{{ group.title }}

{% if group.synopsis %} -

{{ group.synopsis[:200] }}{% if group.synopsis | length > 200 %}...{% endif %}

+

{{ group.synopsis }}

{% endif %}
@@ -102,7 +102,7 @@ .sr-poster-img { width: 100%; height: 100%; object-fit: cover; display: block; } .sr-body { flex: 1; min-width: 0; display: flex; flex-direction: column; gap: 8px; } .sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } -.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; } +.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; } .sr-providers { display: flex; flex-wrap: wrap; gap: 6px; } .sr-provider-badge { font-size: 0.7rem; font-weight: 700; text-transform: uppercase; padding: 4px 12px; border-radius: 20px; border: 1px solid var(--sr-accent); color: var(--sr-accent); background: transparent; cursor: pointer; transition: var(--transition); letter-spacing: 0.5px; text-decoration: none; } .sr-provider-badge:hover { background: var(--sr-accent); color: var(--bg-dark); }