feat: translate synopses to French and show full text
CI / Test (Python 3.11) (push) Has been cancelled
CI / Test (Python 3.12) (push) Has been cancelled
CI / Lint (push) Has been cancelled
CI / Type Check (push) Has been cancelled
CI / Summary (push) Has been cancelled

- Add MyMemory translation API to MetadataEnricher (free, no key)
- Translate English synopses to French after Kitsu enrichment
- Remove synopsis truncation (was 200 chars, now shows full text)
- Increase CSS line-clamp from 2 to 4 lines
This commit is contained in:
root
2026-03-28 00:37:55 +00:00
parent 0e27d73d07
commit d8bc00808d
3 changed files with 109 additions and 82 deletions
+105 -78
View File
@@ -7,6 +7,7 @@ This module provides intelligent metadata enrichment by:
3. Normalizing data formats across providers 3. Normalizing data formats across providers
4. Caching enriched metadata to reduce API calls 4. Caching enriched metadata to reduce API calls
""" """
import asyncio import asyncio
import logging import logging
from typing import Dict, Optional, List, Set from typing import Dict, Optional, List, Set
@@ -15,6 +16,7 @@ from pathlib import Path
import json import json
import hashlib import hashlib
import httpx
from app.kitsu_api import KitsuAPI from app.kitsu_api import KitsuAPI
from app.models import AnimeMetadata from app.models import AnimeMetadata
@@ -30,9 +32,15 @@ class MetadataEnricher:
# Fields that Kitsu can provide as fallback # Fields that Kitsu can provide as fallback
# Note: studio is not included as Kitsu API requires separate calls # Note: studio is not included as Kitsu API requires separate calls
KITSU_FIELDS = { KITSU_FIELDS = {
'synopsis', 'genres', 'rating', 'release_year', "synopsis",
'poster_image', 'banner_image', 'total_episodes', 'status', "genres",
'alternative_titles' "rating",
"release_year",
"poster_image",
"banner_image",
"total_episodes",
"status",
"alternative_titles",
} }
# Cache duration in hours # Cache duration in hours
@@ -52,14 +60,15 @@ class MetadataEnricher:
"""Load metadata cache from disk.""" """Load metadata cache from disk."""
try: try:
if self.cache_file.exists(): if self.cache_file.exists():
with open(self.cache_file, 'r', encoding='utf-8') as f: with open(self.cache_file, "r", encoding="utf-8") as f:
data = json.load(f) data = json.load(f)
# Filter out expired entries # Filter out expired entries
now = datetime.now() now = datetime.now()
self._cache = { self._cache = {
k: v for k, v in data.items() k: v
if datetime.fromisoformat(v.get('cached_at', '')) > for k, v in data.items()
now - timedelta(hours=self.CACHE_DURATION_HOURS) if datetime.fromisoformat(v.get("cached_at", ""))
> now - timedelta(hours=self.CACHE_DURATION_HOURS)
} }
logger.info(f"Loaded {len(self._cache)} cached metadata entries") logger.info(f"Loaded {len(self._cache)} cached metadata entries")
except Exception as e: except Exception as e:
@@ -73,7 +82,7 @@ class MetadataEnricher:
try: try:
self.cache_dir.mkdir(parents=True, exist_ok=True) self.cache_dir.mkdir(parents=True, exist_ok=True)
with open(self.cache_file, 'w', encoding='utf-8') as f: with open(self.cache_file, "w", encoding="utf-8") as f:
json.dump(self._cache, f, ensure_ascii=False, indent=2) json.dump(self._cache, f, ensure_ascii=False, indent=2)
self._cache_dirty = False self._cache_dirty = False
logger.debug("Saved metadata cache") logger.debug("Saved metadata cache")
@@ -90,10 +99,10 @@ class MetadataEnricher:
"""Get cached metadata if available and not expired.""" """Get cached metadata if available and not expired."""
if cache_key in self._cache: if cache_key in self._cache:
entry = self._cache[cache_key] entry = self._cache[cache_key]
cached_at = datetime.fromisoformat(entry.get('cached_at', '')) cached_at = datetime.fromisoformat(entry.get("cached_at", ""))
if cached_at > datetime.now() - timedelta(hours=self.CACHE_DURATION_HOURS): if cached_at > datetime.now() - timedelta(hours=self.CACHE_DURATION_HOURS):
logger.debug(f"Cache hit for key: {cache_key}") logger.debug(f"Cache hit for key: {cache_key}")
return entry.get('metadata') return entry.get("metadata")
else: else:
# Remove expired entry # Remove expired entry
del self._cache[cache_key] del self._cache[cache_key]
@@ -103,8 +112,8 @@ class MetadataEnricher:
def _set_cached_metadata(self, cache_key: str, metadata: Dict): def _set_cached_metadata(self, cache_key: str, metadata: Dict):
"""Cache enriched metadata.""" """Cache enriched metadata."""
self._cache[cache_key] = { self._cache[cache_key] = {
'metadata': metadata, "metadata": metadata,
'cached_at': datetime.now().isoformat() "cached_at": datetime.now().isoformat(),
} }
self._cache_dirty = True self._cache_dirty = True
@@ -113,7 +122,7 @@ class MetadataEnricher:
provider_metadata: Dict, provider_metadata: Dict,
title: str, title: str,
url: Optional[str] = None, url: Optional[str] = None,
use_kitsu_fallback: bool = True use_kitsu_fallback: bool = True,
) -> AnimeMetadata: ) -> AnimeMetadata:
""" """
Enrich provider metadata with Kitsu API fallback. Enrich provider metadata with Kitsu API fallback.
@@ -140,7 +149,9 @@ class MetadataEnricher:
missing_fields = self._get_missing_fields(enriched) missing_fields = self._get_missing_fields(enriched)
if missing_fields and use_kitsu_fallback: if missing_fields and use_kitsu_fallback:
logger.info(f"Missing fields for '{title}': {missing_fields} - fetching from Kitsu") logger.info(
f"Missing fields for '{title}': {missing_fields} - fetching from Kitsu"
)
try: try:
# Fetch from Kitsu # Fetch from Kitsu
kitsu_metadata = await self._fetch_from_kitsu(title) kitsu_metadata = await self._fetch_from_kitsu(title)
@@ -148,19 +159,27 @@ class MetadataEnricher:
if kitsu_metadata: if kitsu_metadata:
# Merge Kitsu data # Merge Kitsu data
enriched = self._merge_metadata(enriched, kitsu_metadata) enriched = self._merge_metadata(enriched, kitsu_metadata)
enriched['_kitsu_enriched'] = True enriched["_kitsu_enriched"] = True
enriched['_enriched_fields'] = list(missing_fields) enriched["_enriched_fields"] = list(missing_fields)
except Exception as e: except Exception as e:
logger.warning(f"Failed to fetch Kitsu metadata for '{title}': {e}") logger.warning(f"Failed to fetch Kitsu metadata for '{title}': {e}")
# Translate synopsis to French
synopsis = enriched.get("synopsis")
if synopsis and len(synopsis) > 20:
enriched["synopsis"] = await self._translate_to_french(synopsis)
# Calculate quality score # Calculate quality score
enriched['_quality_score'] = self._calculate_quality_score(enriched) enriched["_quality_score"] = self._calculate_quality_score(enriched)
# Convert to AnimeMetadata # Convert to AnimeMetadata
result = AnimeMetadata(**{ result = AnimeMetadata(
k: v for k, v in enriched.items() **{
if not k.startswith('_') # Exclude internal fields k: v
}) for k, v in enriched.items()
if not k.startswith("_") # Exclude internal fields
}
)
# Cache the result # Cache the result
self._set_cached_metadata(cache_key, result.model_dump()) self._set_cached_metadata(cache_key, result.model_dump())
@@ -176,7 +195,7 @@ class MetadataEnricher:
missing = set() missing = set()
for field in self.KITSU_FIELDS: for field in self.KITSU_FIELDS:
value = metadata.get(field) value = metadata.get(field)
if value is None or value == [] or value == '': if value is None or value == [] or value == "":
missing.add(field) missing.add(field)
return missing return missing
@@ -202,68 +221,79 @@ class MetadataEnricher:
metadata = {} metadata = {}
# Synopsis # Synopsis
if kitsu_data.get('synopsis'): if kitsu_data.get("synopsis"):
metadata['synopsis'] = kitsu_data['synopsis'] metadata["synopsis"] = kitsu_data["synopsis"]
# Genres # Genres
if kitsu_data.get('genres'): if kitsu_data.get("genres"):
metadata['genres'] = kitsu_data['genres'] metadata["genres"] = kitsu_data["genres"]
# Rating (Kitsu returns score out of 10, convert to string) # Rating (Kitsu returns score out of 10, convert to string)
if kitsu_data.get('score'): if kitsu_data.get("score"):
score = kitsu_data['score'] score = kitsu_data["score"]
if score > 0: if score > 0:
metadata['rating'] = f"{score:.1f}/10" metadata["rating"] = f"{score:.1f}/10"
# Release year # Release year
if kitsu_data.get('year'): if kitsu_data.get("year"):
metadata['release_year'] = kitsu_data['year'] metadata["release_year"] = kitsu_data["year"]
# Poster image # Poster image
if kitsu_data.get('images', {}).get('jpg', {}).get('large_image_url'): if kitsu_data.get("images", {}).get("jpg", {}).get("large_image_url"):
metadata['poster_image'] = kitsu_data['images']['jpg']['large_image_url'] metadata["poster_image"] = kitsu_data["images"]["jpg"]["large_image_url"]
elif kitsu_data.get('images', {}).get('jpg', {}).get('image_url'): elif kitsu_data.get("images", {}).get("jpg", {}).get("image_url"):
metadata['poster_image'] = kitsu_data['images']['jpg']['image_url'] metadata["poster_image"] = kitsu_data["images"]["jpg"]["image_url"]
# Banner image (Kitsu calls it coverImage) # Banner image (Kitsu calls it coverImage)
# Note: Kitsu API structure doesn't clearly separate poster vs banner, # Note: Kitsu API structure doesn't clearly separate poster vs banner,
# but we can use different sizes if available # but we can use different sizes if available
if kitsu_data.get('images', {}).get('webp', {}).get('large_image_url'): if kitsu_data.get("images", {}).get("webp", {}).get("large_image_url"):
metadata['banner_image'] = kitsu_data['images']['webp']['large_image_url'] metadata["banner_image"] = kitsu_data["images"]["webp"]["large_image_url"]
# Total episodes # Total episodes
if kitsu_data.get('episodes'): if kitsu_data.get("episodes"):
metadata['total_episodes'] = kitsu_data['episodes'] metadata["total_episodes"] = kitsu_data["episodes"]
# Status # Status
if kitsu_data.get('status'): if kitsu_data.get("status"):
# Translate Kitsu status to our format # Translate Kitsu status to our format
status_map = { status_map = {
'Airing': 'Ongoing', "Airing": "Ongoing",
'Finished Airing': 'Completed', "Finished Airing": "Completed",
'To Be Aired': 'Upcoming' "To Be Aired": "Upcoming",
} }
metadata['status'] = status_map.get( metadata["status"] = status_map.get(
kitsu_data['status'], kitsu_data["status"], kitsu_data["status"]
kitsu_data['status']
) )
# Alternative titles # Alternative titles
alt_titles = [] alt_titles = []
if kitsu_data.get('title_japanese'): if kitsu_data.get("title_japanese"):
alt_titles.append(kitsu_data['title_japanese']) alt_titles.append(kitsu_data["title_japanese"])
if kitsu_data.get('title_english'): if kitsu_data.get("title_english"):
alt_titles.append(kitsu_data['title_english']) alt_titles.append(kitsu_data["title_english"])
if alt_titles: if alt_titles:
metadata['alternative_titles'] = alt_titles metadata["alternative_titles"] = alt_titles
return metadata return metadata
def _merge_metadata( async def _translate_to_french(self, text: str) -> str:
self, """Translate text to French using MyMemory API (free, no key needed)."""
provider_metadata: Dict, try:
kitsu_metadata: Dict async with httpx.AsyncClient(timeout=15.0) as client:
) -> Dict: response = await client.get(
"https://api.mymemory.translated.net/get",
params={"q": text[:490], "langpair": "en|fr"},
)
data = response.json()
translated = data.get("responseData", {}).get("translatedText", "")
if translated and translated.lower() != text[: len(translated)].lower():
return translated
except Exception as e:
logger.debug(f"Translation failed, using original: {e}")
return text
def _merge_metadata(self, provider_metadata: Dict, kitsu_metadata: Dict) -> Dict:
""" """
Merge provider and Kitsu metadata, preferring provider data. Merge provider and Kitsu metadata, preferring provider data.
@@ -285,16 +315,16 @@ class MetadataEnricher:
Based on completeness of critical fields. Based on completeness of critical fields.
""" """
weights = { weights = {
'synopsis': 0.2, "synopsis": 0.2,
'genres': 0.15, "genres": 0.15,
'rating': 0.1, "rating": 0.1,
'release_year': 0.1, "release_year": 0.1,
'studio': 0.1, "studio": 0.1,
'poster_image': 0.15, "poster_image": 0.15,
'banner_image': 0.05, "banner_image": 0.05,
'total_episodes': 0.05, "total_episodes": 0.05,
'status': 0.05, "status": 0.05,
'alternative_titles': 0.05 "alternative_titles": 0.05,
} }
total_weight = sum(weights.values()) total_weight = sum(weights.values())
@@ -318,9 +348,7 @@ class MetadataEnricher:
return round(score / total_weight, 2) if total_weight > 0 else 0.0 return round(score / total_weight, 2) if total_weight > 0 else 0.0
async def enrich_search_results( async def enrich_search_results(
self, self, results: List[Dict], use_kitsu_fallback: bool = True
results: List[Dict],
use_kitsu_fallback: bool = True
) -> List[Dict]: ) -> List[Dict]:
""" """
Enrich metadata for a list of search results. Enrich metadata for a list of search results.
@@ -338,22 +366,21 @@ class MetadataEnricher:
enrichment_tasks = [] enrichment_tasks = []
for result in results: for result in results:
# Skip if no metadata - will add later in order # Skip if no metadata - will add later in order
if 'metadata' not in result: if "metadata" not in result:
continue continue
task = self.enrich_metadata( task = self.enrich_metadata(
provider_metadata=result['metadata'], provider_metadata=result["metadata"],
title=result.get('title', ''), title=result.get("title", ""),
url=result.get('url'), url=result.get("url"),
use_kitsu_fallback=use_kitsu_fallback use_kitsu_fallback=use_kitsu_fallback,
) )
enrichment_tasks.append(task) enrichment_tasks.append(task)
# Wait for all enrichment tasks # Wait for all enrichment tasks
if enrichment_tasks: if enrichment_tasks:
enriched_metadata_list = await asyncio.gather( enriched_metadata_list = await asyncio.gather(
*enrichment_tasks, *enrichment_tasks, return_exceptions=True
return_exceptions=True
) )
# Update results with enriched metadata # Update results with enriched metadata
@@ -361,7 +388,7 @@ class MetadataEnricher:
temp_results = {} temp_results = {}
metadata_idx = 0 metadata_idx = 0
for i, result in enumerate(results): for i, result in enumerate(results):
if 'metadata' in result: if "metadata" in result:
enriched_meta = enriched_metadata_list[metadata_idx] enriched_meta = enriched_metadata_list[metadata_idx]
if isinstance(enriched_meta, Exception): if isinstance(enriched_meta, Exception):
@@ -372,7 +399,7 @@ class MetadataEnricher:
result_copy = result.copy() result_copy = result.copy()
else: else:
result_copy = result.copy() result_copy = result.copy()
result_copy['metadata'] = enriched_meta.model_dump() result_copy["metadata"] = enriched_meta.model_dump()
temp_results[i] = result_copy temp_results[i] = result_copy
metadata_idx += 1 metadata_idx += 1
@@ -49,7 +49,7 @@
</div> </div>
{% if group.synopsis %} {% if group.synopsis %}
<p class="sr-synopsis">{{ group.synopsis[:200] }}{% if group.synopsis | length > 200 %}...{% endif %}</p> <p class="sr-synopsis">{{ group.synopsis }}</p>
{% endif %} {% endif %}
{% if group.genres %} {% if group.genres %}
@@ -125,7 +125,7 @@
.sr-top { display: flex; align-items: baseline; gap: 12px; } .sr-top { display: flex; align-items: baseline; gap: 12px; }
.sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
.sr-rating { flex-shrink: 0; font-size: 0.8rem; font-weight: 700; color: #ffcc00; } .sr-rating { flex-shrink: 0; font-size: 0.8rem; font-weight: 700; color: #ffcc00; }
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; } .sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; }
.sr-tags { display: flex; flex-wrap: wrap; gap: 4px; margin: 0; } .sr-tags { display: flex; flex-wrap: wrap; gap: 4px; margin: 0; }
.sr-tag { font-size: 0.65rem; font-weight: 600; padding: 2px 8px; border-radius: 4px; background: rgba(255,255,255,0.06); color: var(--text-dim); } .sr-tag { font-size: 0.65rem; font-weight: 600; padding: 2px 8px; border-radius: 4px; background: rgba(255,255,255,0.06); color: var(--text-dim); }
.sr-providers { display: flex; flex-wrap: wrap; gap: 6px; } .sr-providers { display: flex; flex-wrap: wrap; gap: 6px; }
@@ -36,7 +36,7 @@
<h3 class="sr-title">{{ group.title }}</h3> <h3 class="sr-title">{{ group.title }}</h3>
{% if group.synopsis %} {% if group.synopsis %}
<p class="sr-synopsis">{{ group.synopsis[:200] }}{% if group.synopsis | length > 200 %}...{% endif %}</p> <p class="sr-synopsis">{{ group.synopsis }}</p>
{% endif %} {% endif %}
<div class="sr-providers"> <div class="sr-providers">
@@ -102,7 +102,7 @@
.sr-poster-img { width: 100%; height: 100%; object-fit: cover; display: block; } .sr-poster-img { width: 100%; height: 100%; object-fit: cover; display: block; }
.sr-body { flex: 1; min-width: 0; display: flex; flex-direction: column; gap: 8px; } .sr-body { flex: 1; min-width: 0; display: flex; flex-direction: column; gap: 8px; }
.sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; } .sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; }
.sr-providers { display: flex; flex-wrap: wrap; gap: 6px; } .sr-providers { display: flex; flex-wrap: wrap; gap: 6px; }
.sr-provider-badge { font-size: 0.7rem; font-weight: 700; text-transform: uppercase; padding: 4px 12px; border-radius: 20px; border: 1px solid var(--sr-accent); color: var(--sr-accent); background: transparent; cursor: pointer; transition: var(--transition); letter-spacing: 0.5px; text-decoration: none; } .sr-provider-badge { font-size: 0.7rem; font-weight: 700; text-transform: uppercase; padding: 4px 12px; border-radius: 20px; border: 1px solid var(--sr-accent); color: var(--sr-accent); background: transparent; cursor: pointer; transition: var(--transition); letter-spacing: 0.5px; text-decoration: none; }
.sr-provider-badge:hover { background: var(--sr-accent); color: var(--bg-dark); } .sr-provider-badge:hover { background: var(--sr-accent); color: var(--bg-dark); }