feat: translate synopses to French and show full text
CI / Test (Python 3.11) (push) Has been cancelled
CI / Test (Python 3.12) (push) Has been cancelled
CI / Lint (push) Has been cancelled
CI / Type Check (push) Has been cancelled
CI / Summary (push) Has been cancelled

- Add MyMemory translation API to MetadataEnricher (free, no key)
- Translate English synopses to French after Kitsu enrichment
- Remove synopsis truncation (was 200 chars, now shows full text)
- Increase CSS line-clamp from 2 to 4 lines
This commit is contained in:
root
2026-03-28 00:37:55 +00:00
parent 0e27d73d07
commit d8bc00808d
3 changed files with 109 additions and 82 deletions
+105 -78
View File
@@ -7,6 +7,7 @@ This module provides intelligent metadata enrichment by:
3. Normalizing data formats across providers
4. Caching enriched metadata to reduce API calls
"""
import asyncio
import logging
from typing import Dict, Optional, List, Set
@@ -15,6 +16,7 @@ from pathlib import Path
import json
import hashlib
import httpx
from app.kitsu_api import KitsuAPI
from app.models import AnimeMetadata
@@ -30,9 +32,15 @@ class MetadataEnricher:
# Fields that Kitsu can provide as fallback
# Note: studio is not included as Kitsu API requires separate calls
KITSU_FIELDS = {
'synopsis', 'genres', 'rating', 'release_year',
'poster_image', 'banner_image', 'total_episodes', 'status',
'alternative_titles'
"synopsis",
"genres",
"rating",
"release_year",
"poster_image",
"banner_image",
"total_episodes",
"status",
"alternative_titles",
}
# Cache duration in hours
@@ -52,14 +60,15 @@ class MetadataEnricher:
"""Load metadata cache from disk."""
try:
if self.cache_file.exists():
with open(self.cache_file, 'r', encoding='utf-8') as f:
with open(self.cache_file, "r", encoding="utf-8") as f:
data = json.load(f)
# Filter out expired entries
now = datetime.now()
self._cache = {
k: v for k, v in data.items()
if datetime.fromisoformat(v.get('cached_at', '')) >
now - timedelta(hours=self.CACHE_DURATION_HOURS)
k: v
for k, v in data.items()
if datetime.fromisoformat(v.get("cached_at", ""))
> now - timedelta(hours=self.CACHE_DURATION_HOURS)
}
logger.info(f"Loaded {len(self._cache)} cached metadata entries")
except Exception as e:
@@ -73,7 +82,7 @@ class MetadataEnricher:
try:
self.cache_dir.mkdir(parents=True, exist_ok=True)
with open(self.cache_file, 'w', encoding='utf-8') as f:
with open(self.cache_file, "w", encoding="utf-8") as f:
json.dump(self._cache, f, ensure_ascii=False, indent=2)
self._cache_dirty = False
logger.debug("Saved metadata cache")
@@ -90,10 +99,10 @@ class MetadataEnricher:
"""Get cached metadata if available and not expired."""
if cache_key in self._cache:
entry = self._cache[cache_key]
cached_at = datetime.fromisoformat(entry.get('cached_at', ''))
cached_at = datetime.fromisoformat(entry.get("cached_at", ""))
if cached_at > datetime.now() - timedelta(hours=self.CACHE_DURATION_HOURS):
logger.debug(f"Cache hit for key: {cache_key}")
return entry.get('metadata')
return entry.get("metadata")
else:
# Remove expired entry
del self._cache[cache_key]
@@ -103,8 +112,8 @@ class MetadataEnricher:
def _set_cached_metadata(self, cache_key: str, metadata: Dict):
"""Cache enriched metadata."""
self._cache[cache_key] = {
'metadata': metadata,
'cached_at': datetime.now().isoformat()
"metadata": metadata,
"cached_at": datetime.now().isoformat(),
}
self._cache_dirty = True
@@ -113,7 +122,7 @@ class MetadataEnricher:
provider_metadata: Dict,
title: str,
url: Optional[str] = None,
use_kitsu_fallback: bool = True
use_kitsu_fallback: bool = True,
) -> AnimeMetadata:
"""
Enrich provider metadata with Kitsu API fallback.
@@ -140,7 +149,9 @@ class MetadataEnricher:
missing_fields = self._get_missing_fields(enriched)
if missing_fields and use_kitsu_fallback:
logger.info(f"Missing fields for '{title}': {missing_fields} - fetching from Kitsu")
logger.info(
f"Missing fields for '{title}': {missing_fields} - fetching from Kitsu"
)
try:
# Fetch from Kitsu
kitsu_metadata = await self._fetch_from_kitsu(title)
@@ -148,19 +159,27 @@ class MetadataEnricher:
if kitsu_metadata:
# Merge Kitsu data
enriched = self._merge_metadata(enriched, kitsu_metadata)
enriched['_kitsu_enriched'] = True
enriched['_enriched_fields'] = list(missing_fields)
enriched["_kitsu_enriched"] = True
enriched["_enriched_fields"] = list(missing_fields)
except Exception as e:
logger.warning(f"Failed to fetch Kitsu metadata for '{title}': {e}")
# Translate synopsis to French
synopsis = enriched.get("synopsis")
if synopsis and len(synopsis) > 20:
enriched["synopsis"] = await self._translate_to_french(synopsis)
# Calculate quality score
enriched['_quality_score'] = self._calculate_quality_score(enriched)
enriched["_quality_score"] = self._calculate_quality_score(enriched)
# Convert to AnimeMetadata
result = AnimeMetadata(**{
k: v for k, v in enriched.items()
if not k.startswith('_') # Exclude internal fields
})
result = AnimeMetadata(
**{
k: v
for k, v in enriched.items()
if not k.startswith("_") # Exclude internal fields
}
)
# Cache the result
self._set_cached_metadata(cache_key, result.model_dump())
@@ -176,7 +195,7 @@ class MetadataEnricher:
missing = set()
for field in self.KITSU_FIELDS:
value = metadata.get(field)
if value is None or value == [] or value == '':
if value is None or value == [] or value == "":
missing.add(field)
return missing
@@ -202,68 +221,79 @@ class MetadataEnricher:
metadata = {}
# Synopsis
if kitsu_data.get('synopsis'):
metadata['synopsis'] = kitsu_data['synopsis']
if kitsu_data.get("synopsis"):
metadata["synopsis"] = kitsu_data["synopsis"]
# Genres
if kitsu_data.get('genres'):
metadata['genres'] = kitsu_data['genres']
if kitsu_data.get("genres"):
metadata["genres"] = kitsu_data["genres"]
# Rating (Kitsu returns score out of 10, convert to string)
if kitsu_data.get('score'):
score = kitsu_data['score']
if kitsu_data.get("score"):
score = kitsu_data["score"]
if score > 0:
metadata['rating'] = f"{score:.1f}/10"
metadata["rating"] = f"{score:.1f}/10"
# Release year
if kitsu_data.get('year'):
metadata['release_year'] = kitsu_data['year']
if kitsu_data.get("year"):
metadata["release_year"] = kitsu_data["year"]
# Poster image
if kitsu_data.get('images', {}).get('jpg', {}).get('large_image_url'):
metadata['poster_image'] = kitsu_data['images']['jpg']['large_image_url']
elif kitsu_data.get('images', {}).get('jpg', {}).get('image_url'):
metadata['poster_image'] = kitsu_data['images']['jpg']['image_url']
if kitsu_data.get("images", {}).get("jpg", {}).get("large_image_url"):
metadata["poster_image"] = kitsu_data["images"]["jpg"]["large_image_url"]
elif kitsu_data.get("images", {}).get("jpg", {}).get("image_url"):
metadata["poster_image"] = kitsu_data["images"]["jpg"]["image_url"]
# Banner image (Kitsu calls it coverImage)
# Note: Kitsu API structure doesn't clearly separate poster vs banner,
# but we can use different sizes if available
if kitsu_data.get('images', {}).get('webp', {}).get('large_image_url'):
metadata['banner_image'] = kitsu_data['images']['webp']['large_image_url']
if kitsu_data.get("images", {}).get("webp", {}).get("large_image_url"):
metadata["banner_image"] = kitsu_data["images"]["webp"]["large_image_url"]
# Total episodes
if kitsu_data.get('episodes'):
metadata['total_episodes'] = kitsu_data['episodes']
if kitsu_data.get("episodes"):
metadata["total_episodes"] = kitsu_data["episodes"]
# Status
if kitsu_data.get('status'):
if kitsu_data.get("status"):
# Translate Kitsu status to our format
status_map = {
'Airing': 'Ongoing',
'Finished Airing': 'Completed',
'To Be Aired': 'Upcoming'
"Airing": "Ongoing",
"Finished Airing": "Completed",
"To Be Aired": "Upcoming",
}
metadata['status'] = status_map.get(
kitsu_data['status'],
kitsu_data['status']
metadata["status"] = status_map.get(
kitsu_data["status"], kitsu_data["status"]
)
# Alternative titles
alt_titles = []
if kitsu_data.get('title_japanese'):
alt_titles.append(kitsu_data['title_japanese'])
if kitsu_data.get('title_english'):
alt_titles.append(kitsu_data['title_english'])
if kitsu_data.get("title_japanese"):
alt_titles.append(kitsu_data["title_japanese"])
if kitsu_data.get("title_english"):
alt_titles.append(kitsu_data["title_english"])
if alt_titles:
metadata['alternative_titles'] = alt_titles
metadata["alternative_titles"] = alt_titles
return metadata
def _merge_metadata(
self,
provider_metadata: Dict,
kitsu_metadata: Dict
) -> Dict:
async def _translate_to_french(self, text: str) -> str:
"""Translate text to French using MyMemory API (free, no key needed)."""
try:
async with httpx.AsyncClient(timeout=15.0) as client:
response = await client.get(
"https://api.mymemory.translated.net/get",
params={"q": text[:490], "langpair": "en|fr"},
)
data = response.json()
translated = data.get("responseData", {}).get("translatedText", "")
if translated and translated.lower() != text[: len(translated)].lower():
return translated
except Exception as e:
logger.debug(f"Translation failed, using original: {e}")
return text
def _merge_metadata(self, provider_metadata: Dict, kitsu_metadata: Dict) -> Dict:
"""
Merge provider and Kitsu metadata, preferring provider data.
@@ -285,16 +315,16 @@ class MetadataEnricher:
Based on completeness of critical fields.
"""
weights = {
'synopsis': 0.2,
'genres': 0.15,
'rating': 0.1,
'release_year': 0.1,
'studio': 0.1,
'poster_image': 0.15,
'banner_image': 0.05,
'total_episodes': 0.05,
'status': 0.05,
'alternative_titles': 0.05
"synopsis": 0.2,
"genres": 0.15,
"rating": 0.1,
"release_year": 0.1,
"studio": 0.1,
"poster_image": 0.15,
"banner_image": 0.05,
"total_episodes": 0.05,
"status": 0.05,
"alternative_titles": 0.05,
}
total_weight = sum(weights.values())
@@ -318,9 +348,7 @@ class MetadataEnricher:
return round(score / total_weight, 2) if total_weight > 0 else 0.0
async def enrich_search_results(
self,
results: List[Dict],
use_kitsu_fallback: bool = True
self, results: List[Dict], use_kitsu_fallback: bool = True
) -> List[Dict]:
"""
Enrich metadata for a list of search results.
@@ -338,22 +366,21 @@ class MetadataEnricher:
enrichment_tasks = []
for result in results:
# Skip if no metadata - will add later in order
if 'metadata' not in result:
if "metadata" not in result:
continue
task = self.enrich_metadata(
provider_metadata=result['metadata'],
title=result.get('title', ''),
url=result.get('url'),
use_kitsu_fallback=use_kitsu_fallback
provider_metadata=result["metadata"],
title=result.get("title", ""),
url=result.get("url"),
use_kitsu_fallback=use_kitsu_fallback,
)
enrichment_tasks.append(task)
# Wait for all enrichment tasks
if enrichment_tasks:
enriched_metadata_list = await asyncio.gather(
*enrichment_tasks,
return_exceptions=True
*enrichment_tasks, return_exceptions=True
)
# Update results with enriched metadata
@@ -361,7 +388,7 @@ class MetadataEnricher:
temp_results = {}
metadata_idx = 0
for i, result in enumerate(results):
if 'metadata' in result:
if "metadata" in result:
enriched_meta = enriched_metadata_list[metadata_idx]
if isinstance(enriched_meta, Exception):
@@ -372,7 +399,7 @@ class MetadataEnricher:
result_copy = result.copy()
else:
result_copy = result.copy()
result_copy['metadata'] = enriched_meta.model_dump()
result_copy["metadata"] = enriched_meta.model_dump()
temp_results[i] = result_copy
metadata_idx += 1
@@ -49,7 +49,7 @@
</div>
{% if group.synopsis %}
<p class="sr-synopsis">{{ group.synopsis[:200] }}{% if group.synopsis | length > 200 %}...{% endif %}</p>
<p class="sr-synopsis">{{ group.synopsis }}</p>
{% endif %}
{% if group.genres %}
@@ -125,7 +125,7 @@
.sr-top { display: flex; align-items: baseline; gap: 12px; }
.sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
.sr-rating { flex-shrink: 0; font-size: 0.8rem; font-weight: 700; color: #ffcc00; }
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; }
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; }
.sr-tags { display: flex; flex-wrap: wrap; gap: 4px; margin: 0; }
.sr-tag { font-size: 0.65rem; font-weight: 600; padding: 2px 8px; border-radius: 4px; background: rgba(255,255,255,0.06); color: var(--text-dim); }
.sr-providers { display: flex; flex-wrap: wrap; gap: 6px; }
@@ -36,7 +36,7 @@
<h3 class="sr-title">{{ group.title }}</h3>
{% if group.synopsis %}
<p class="sr-synopsis">{{ group.synopsis[:200] }}{% if group.synopsis | length > 200 %}...{% endif %}</p>
<p class="sr-synopsis">{{ group.synopsis }}</p>
{% endif %}
<div class="sr-providers">
@@ -102,7 +102,7 @@
.sr-poster-img { width: 100%; height: 100%; object-fit: cover; display: block; }
.sr-body { flex: 1; min-width: 0; display: flex; flex-direction: column; gap: 8px; }
.sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; }
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; }
.sr-providers { display: flex; flex-wrap: wrap; gap: 6px; }
.sr-provider-badge { font-size: 0.7rem; font-weight: 700; text-transform: uppercase; padding: 4px 12px; border-radius: 20px; border: 1px solid var(--sr-accent); color: var(--sr-accent); background: transparent; cursor: pointer; transition: var(--transition); letter-spacing: 0.5px; text-decoration: none; }
.sr-provider-badge:hover { background: var(--sr-accent); color: var(--bg-dark); }