feat: translate synopses to French and show full text
- Add MyMemory translation API to MetadataEnricher (free, no key) - Translate English synopses to French after Kitsu enrichment - Remove synopsis truncation (was 200 chars, now shows full text) - Increase CSS line-clamp from 2 to 4 lines
This commit is contained in:
+105
-78
@@ -7,6 +7,7 @@ This module provides intelligent metadata enrichment by:
|
||||
3. Normalizing data formats across providers
|
||||
4. Caching enriched metadata to reduce API calls
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Dict, Optional, List, Set
|
||||
@@ -15,6 +16,7 @@ from pathlib import Path
|
||||
import json
|
||||
import hashlib
|
||||
|
||||
import httpx
|
||||
from app.kitsu_api import KitsuAPI
|
||||
from app.models import AnimeMetadata
|
||||
|
||||
@@ -30,9 +32,15 @@ class MetadataEnricher:
|
||||
# Fields that Kitsu can provide as fallback
|
||||
# Note: studio is not included as Kitsu API requires separate calls
|
||||
KITSU_FIELDS = {
|
||||
'synopsis', 'genres', 'rating', 'release_year',
|
||||
'poster_image', 'banner_image', 'total_episodes', 'status',
|
||||
'alternative_titles'
|
||||
"synopsis",
|
||||
"genres",
|
||||
"rating",
|
||||
"release_year",
|
||||
"poster_image",
|
||||
"banner_image",
|
||||
"total_episodes",
|
||||
"status",
|
||||
"alternative_titles",
|
||||
}
|
||||
|
||||
# Cache duration in hours
|
||||
@@ -52,14 +60,15 @@ class MetadataEnricher:
|
||||
"""Load metadata cache from disk."""
|
||||
try:
|
||||
if self.cache_file.exists():
|
||||
with open(self.cache_file, 'r', encoding='utf-8') as f:
|
||||
with open(self.cache_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
# Filter out expired entries
|
||||
now = datetime.now()
|
||||
self._cache = {
|
||||
k: v for k, v in data.items()
|
||||
if datetime.fromisoformat(v.get('cached_at', '')) >
|
||||
now - timedelta(hours=self.CACHE_DURATION_HOURS)
|
||||
k: v
|
||||
for k, v in data.items()
|
||||
if datetime.fromisoformat(v.get("cached_at", ""))
|
||||
> now - timedelta(hours=self.CACHE_DURATION_HOURS)
|
||||
}
|
||||
logger.info(f"Loaded {len(self._cache)} cached metadata entries")
|
||||
except Exception as e:
|
||||
@@ -73,7 +82,7 @@ class MetadataEnricher:
|
||||
|
||||
try:
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.cache_file, 'w', encoding='utf-8') as f:
|
||||
with open(self.cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(self._cache, f, ensure_ascii=False, indent=2)
|
||||
self._cache_dirty = False
|
||||
logger.debug("Saved metadata cache")
|
||||
@@ -90,10 +99,10 @@ class MetadataEnricher:
|
||||
"""Get cached metadata if available and not expired."""
|
||||
if cache_key in self._cache:
|
||||
entry = self._cache[cache_key]
|
||||
cached_at = datetime.fromisoformat(entry.get('cached_at', ''))
|
||||
cached_at = datetime.fromisoformat(entry.get("cached_at", ""))
|
||||
if cached_at > datetime.now() - timedelta(hours=self.CACHE_DURATION_HOURS):
|
||||
logger.debug(f"Cache hit for key: {cache_key}")
|
||||
return entry.get('metadata')
|
||||
return entry.get("metadata")
|
||||
else:
|
||||
# Remove expired entry
|
||||
del self._cache[cache_key]
|
||||
@@ -103,8 +112,8 @@ class MetadataEnricher:
|
||||
def _set_cached_metadata(self, cache_key: str, metadata: Dict):
|
||||
"""Cache enriched metadata."""
|
||||
self._cache[cache_key] = {
|
||||
'metadata': metadata,
|
||||
'cached_at': datetime.now().isoformat()
|
||||
"metadata": metadata,
|
||||
"cached_at": datetime.now().isoformat(),
|
||||
}
|
||||
self._cache_dirty = True
|
||||
|
||||
@@ -113,7 +122,7 @@ class MetadataEnricher:
|
||||
provider_metadata: Dict,
|
||||
title: str,
|
||||
url: Optional[str] = None,
|
||||
use_kitsu_fallback: bool = True
|
||||
use_kitsu_fallback: bool = True,
|
||||
) -> AnimeMetadata:
|
||||
"""
|
||||
Enrich provider metadata with Kitsu API fallback.
|
||||
@@ -140,7 +149,9 @@ class MetadataEnricher:
|
||||
missing_fields = self._get_missing_fields(enriched)
|
||||
|
||||
if missing_fields and use_kitsu_fallback:
|
||||
logger.info(f"Missing fields for '{title}': {missing_fields} - fetching from Kitsu")
|
||||
logger.info(
|
||||
f"Missing fields for '{title}': {missing_fields} - fetching from Kitsu"
|
||||
)
|
||||
try:
|
||||
# Fetch from Kitsu
|
||||
kitsu_metadata = await self._fetch_from_kitsu(title)
|
||||
@@ -148,19 +159,27 @@ class MetadataEnricher:
|
||||
if kitsu_metadata:
|
||||
# Merge Kitsu data
|
||||
enriched = self._merge_metadata(enriched, kitsu_metadata)
|
||||
enriched['_kitsu_enriched'] = True
|
||||
enriched['_enriched_fields'] = list(missing_fields)
|
||||
enriched["_kitsu_enriched"] = True
|
||||
enriched["_enriched_fields"] = list(missing_fields)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch Kitsu metadata for '{title}': {e}")
|
||||
|
||||
# Translate synopsis to French
|
||||
synopsis = enriched.get("synopsis")
|
||||
if synopsis and len(synopsis) > 20:
|
||||
enriched["synopsis"] = await self._translate_to_french(synopsis)
|
||||
|
||||
# Calculate quality score
|
||||
enriched['_quality_score'] = self._calculate_quality_score(enriched)
|
||||
enriched["_quality_score"] = self._calculate_quality_score(enriched)
|
||||
|
||||
# Convert to AnimeMetadata
|
||||
result = AnimeMetadata(**{
|
||||
k: v for k, v in enriched.items()
|
||||
if not k.startswith('_') # Exclude internal fields
|
||||
})
|
||||
result = AnimeMetadata(
|
||||
**{
|
||||
k: v
|
||||
for k, v in enriched.items()
|
||||
if not k.startswith("_") # Exclude internal fields
|
||||
}
|
||||
)
|
||||
|
||||
# Cache the result
|
||||
self._set_cached_metadata(cache_key, result.model_dump())
|
||||
@@ -176,7 +195,7 @@ class MetadataEnricher:
|
||||
missing = set()
|
||||
for field in self.KITSU_FIELDS:
|
||||
value = metadata.get(field)
|
||||
if value is None or value == [] or value == '':
|
||||
if value is None or value == [] or value == "":
|
||||
missing.add(field)
|
||||
return missing
|
||||
|
||||
@@ -202,68 +221,79 @@ class MetadataEnricher:
|
||||
metadata = {}
|
||||
|
||||
# Synopsis
|
||||
if kitsu_data.get('synopsis'):
|
||||
metadata['synopsis'] = kitsu_data['synopsis']
|
||||
if kitsu_data.get("synopsis"):
|
||||
metadata["synopsis"] = kitsu_data["synopsis"]
|
||||
|
||||
# Genres
|
||||
if kitsu_data.get('genres'):
|
||||
metadata['genres'] = kitsu_data['genres']
|
||||
if kitsu_data.get("genres"):
|
||||
metadata["genres"] = kitsu_data["genres"]
|
||||
|
||||
# Rating (Kitsu returns score out of 10, convert to string)
|
||||
if kitsu_data.get('score'):
|
||||
score = kitsu_data['score']
|
||||
if kitsu_data.get("score"):
|
||||
score = kitsu_data["score"]
|
||||
if score > 0:
|
||||
metadata['rating'] = f"{score:.1f}/10"
|
||||
metadata["rating"] = f"{score:.1f}/10"
|
||||
|
||||
# Release year
|
||||
if kitsu_data.get('year'):
|
||||
metadata['release_year'] = kitsu_data['year']
|
||||
if kitsu_data.get("year"):
|
||||
metadata["release_year"] = kitsu_data["year"]
|
||||
|
||||
# Poster image
|
||||
if kitsu_data.get('images', {}).get('jpg', {}).get('large_image_url'):
|
||||
metadata['poster_image'] = kitsu_data['images']['jpg']['large_image_url']
|
||||
elif kitsu_data.get('images', {}).get('jpg', {}).get('image_url'):
|
||||
metadata['poster_image'] = kitsu_data['images']['jpg']['image_url']
|
||||
if kitsu_data.get("images", {}).get("jpg", {}).get("large_image_url"):
|
||||
metadata["poster_image"] = kitsu_data["images"]["jpg"]["large_image_url"]
|
||||
elif kitsu_data.get("images", {}).get("jpg", {}).get("image_url"):
|
||||
metadata["poster_image"] = kitsu_data["images"]["jpg"]["image_url"]
|
||||
|
||||
# Banner image (Kitsu calls it coverImage)
|
||||
# Note: Kitsu API structure doesn't clearly separate poster vs banner,
|
||||
# but we can use different sizes if available
|
||||
if kitsu_data.get('images', {}).get('webp', {}).get('large_image_url'):
|
||||
metadata['banner_image'] = kitsu_data['images']['webp']['large_image_url']
|
||||
if kitsu_data.get("images", {}).get("webp", {}).get("large_image_url"):
|
||||
metadata["banner_image"] = kitsu_data["images"]["webp"]["large_image_url"]
|
||||
|
||||
# Total episodes
|
||||
if kitsu_data.get('episodes'):
|
||||
metadata['total_episodes'] = kitsu_data['episodes']
|
||||
if kitsu_data.get("episodes"):
|
||||
metadata["total_episodes"] = kitsu_data["episodes"]
|
||||
|
||||
# Status
|
||||
if kitsu_data.get('status'):
|
||||
if kitsu_data.get("status"):
|
||||
# Translate Kitsu status to our format
|
||||
status_map = {
|
||||
'Airing': 'Ongoing',
|
||||
'Finished Airing': 'Completed',
|
||||
'To Be Aired': 'Upcoming'
|
||||
"Airing": "Ongoing",
|
||||
"Finished Airing": "Completed",
|
||||
"To Be Aired": "Upcoming",
|
||||
}
|
||||
metadata['status'] = status_map.get(
|
||||
kitsu_data['status'],
|
||||
kitsu_data['status']
|
||||
metadata["status"] = status_map.get(
|
||||
kitsu_data["status"], kitsu_data["status"]
|
||||
)
|
||||
|
||||
# Alternative titles
|
||||
alt_titles = []
|
||||
if kitsu_data.get('title_japanese'):
|
||||
alt_titles.append(kitsu_data['title_japanese'])
|
||||
if kitsu_data.get('title_english'):
|
||||
alt_titles.append(kitsu_data['title_english'])
|
||||
if kitsu_data.get("title_japanese"):
|
||||
alt_titles.append(kitsu_data["title_japanese"])
|
||||
if kitsu_data.get("title_english"):
|
||||
alt_titles.append(kitsu_data["title_english"])
|
||||
if alt_titles:
|
||||
metadata['alternative_titles'] = alt_titles
|
||||
metadata["alternative_titles"] = alt_titles
|
||||
|
||||
return metadata
|
||||
|
||||
def _merge_metadata(
|
||||
self,
|
||||
provider_metadata: Dict,
|
||||
kitsu_metadata: Dict
|
||||
) -> Dict:
|
||||
async def _translate_to_french(self, text: str) -> str:
|
||||
"""Translate text to French using MyMemory API (free, no key needed)."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||
response = await client.get(
|
||||
"https://api.mymemory.translated.net/get",
|
||||
params={"q": text[:490], "langpair": "en|fr"},
|
||||
)
|
||||
data = response.json()
|
||||
translated = data.get("responseData", {}).get("translatedText", "")
|
||||
if translated and translated.lower() != text[: len(translated)].lower():
|
||||
return translated
|
||||
except Exception as e:
|
||||
logger.debug(f"Translation failed, using original: {e}")
|
||||
return text
|
||||
|
||||
def _merge_metadata(self, provider_metadata: Dict, kitsu_metadata: Dict) -> Dict:
|
||||
"""
|
||||
Merge provider and Kitsu metadata, preferring provider data.
|
||||
|
||||
@@ -285,16 +315,16 @@ class MetadataEnricher:
|
||||
Based on completeness of critical fields.
|
||||
"""
|
||||
weights = {
|
||||
'synopsis': 0.2,
|
||||
'genres': 0.15,
|
||||
'rating': 0.1,
|
||||
'release_year': 0.1,
|
||||
'studio': 0.1,
|
||||
'poster_image': 0.15,
|
||||
'banner_image': 0.05,
|
||||
'total_episodes': 0.05,
|
||||
'status': 0.05,
|
||||
'alternative_titles': 0.05
|
||||
"synopsis": 0.2,
|
||||
"genres": 0.15,
|
||||
"rating": 0.1,
|
||||
"release_year": 0.1,
|
||||
"studio": 0.1,
|
||||
"poster_image": 0.15,
|
||||
"banner_image": 0.05,
|
||||
"total_episodes": 0.05,
|
||||
"status": 0.05,
|
||||
"alternative_titles": 0.05,
|
||||
}
|
||||
|
||||
total_weight = sum(weights.values())
|
||||
@@ -318,9 +348,7 @@ class MetadataEnricher:
|
||||
return round(score / total_weight, 2) if total_weight > 0 else 0.0
|
||||
|
||||
async def enrich_search_results(
|
||||
self,
|
||||
results: List[Dict],
|
||||
use_kitsu_fallback: bool = True
|
||||
self, results: List[Dict], use_kitsu_fallback: bool = True
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Enrich metadata for a list of search results.
|
||||
@@ -338,22 +366,21 @@ class MetadataEnricher:
|
||||
enrichment_tasks = []
|
||||
for result in results:
|
||||
# Skip if no metadata - will add later in order
|
||||
if 'metadata' not in result:
|
||||
if "metadata" not in result:
|
||||
continue
|
||||
|
||||
task = self.enrich_metadata(
|
||||
provider_metadata=result['metadata'],
|
||||
title=result.get('title', ''),
|
||||
url=result.get('url'),
|
||||
use_kitsu_fallback=use_kitsu_fallback
|
||||
provider_metadata=result["metadata"],
|
||||
title=result.get("title", ""),
|
||||
url=result.get("url"),
|
||||
use_kitsu_fallback=use_kitsu_fallback,
|
||||
)
|
||||
enrichment_tasks.append(task)
|
||||
|
||||
# Wait for all enrichment tasks
|
||||
if enrichment_tasks:
|
||||
enriched_metadata_list = await asyncio.gather(
|
||||
*enrichment_tasks,
|
||||
return_exceptions=True
|
||||
*enrichment_tasks, return_exceptions=True
|
||||
)
|
||||
|
||||
# Update results with enriched metadata
|
||||
@@ -361,7 +388,7 @@ class MetadataEnricher:
|
||||
temp_results = {}
|
||||
metadata_idx = 0
|
||||
for i, result in enumerate(results):
|
||||
if 'metadata' in result:
|
||||
if "metadata" in result:
|
||||
enriched_meta = enriched_metadata_list[metadata_idx]
|
||||
|
||||
if isinstance(enriched_meta, Exception):
|
||||
@@ -372,7 +399,7 @@ class MetadataEnricher:
|
||||
result_copy = result.copy()
|
||||
else:
|
||||
result_copy = result.copy()
|
||||
result_copy['metadata'] = enriched_meta.model_dump()
|
||||
result_copy["metadata"] = enriched_meta.model_dump()
|
||||
|
||||
temp_results[i] = result_copy
|
||||
metadata_idx += 1
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
</div>
|
||||
|
||||
{% if group.synopsis %}
|
||||
<p class="sr-synopsis">{{ group.synopsis[:200] }}{% if group.synopsis | length > 200 %}...{% endif %}</p>
|
||||
<p class="sr-synopsis">{{ group.synopsis }}</p>
|
||||
{% endif %}
|
||||
|
||||
{% if group.genres %}
|
||||
@@ -125,7 +125,7 @@
|
||||
.sr-top { display: flex; align-items: baseline; gap: 12px; }
|
||||
.sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
||||
.sr-rating { flex-shrink: 0; font-size: 0.8rem; font-weight: 700; color: #ffcc00; }
|
||||
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; }
|
||||
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; }
|
||||
.sr-tags { display: flex; flex-wrap: wrap; gap: 4px; margin: 0; }
|
||||
.sr-tag { font-size: 0.65rem; font-weight: 600; padding: 2px 8px; border-radius: 4px; background: rgba(255,255,255,0.06); color: var(--text-dim); }
|
||||
.sr-providers { display: flex; flex-wrap: wrap; gap: 6px; }
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
<h3 class="sr-title">{{ group.title }}</h3>
|
||||
|
||||
{% if group.synopsis %}
|
||||
<p class="sr-synopsis">{{ group.synopsis[:200] }}{% if group.synopsis | length > 200 %}...{% endif %}</p>
|
||||
<p class="sr-synopsis">{{ group.synopsis }}</p>
|
||||
{% endif %}
|
||||
|
||||
<div class="sr-providers">
|
||||
@@ -102,7 +102,7 @@
|
||||
.sr-poster-img { width: 100%; height: 100%; object-fit: cover; display: block; }
|
||||
.sr-body { flex: 1; min-width: 0; display: flex; flex-direction: column; gap: 8px; }
|
||||
.sr-title { font-size: 1.1rem; font-weight: 700; margin: 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
||||
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; }
|
||||
.sr-synopsis { font-size: 0.85rem; color: var(--text-dim); margin: 0; display: -webkit-box; -webkit-line-clamp: 4; -webkit-box-orient: vertical; overflow: hidden; }
|
||||
.sr-providers { display: flex; flex-wrap: wrap; gap: 6px; }
|
||||
.sr-provider-badge { font-size: 0.7rem; font-weight: 700; text-transform: uppercase; padding: 4px 12px; border-radius: 20px; border: 1px solid var(--sr-accent); color: var(--sr-accent); background: transparent; cursor: pointer; transition: var(--transition); letter-spacing: 0.5px; text-decoration: none; }
|
||||
.sr-provider-badge:hover { background: var(--sr-accent); color: var(--bg-dark); }
|
||||
|
||||
Reference in New Issue
Block a user