feat: Add anime metadata extraction and fix episode selection bug
Features: - Added rich metadata extraction for all anime providers (Anime-Sama, Neko-Sama, Anime-Ultime, Vostfree) - New AnimeMetadata model with synopsis, genres, rating, release year, studio, poster/banner images, episode count, and status - New /api/anime/metadata endpoint for fetching metadata of specific anime - Enhanced /api/anime/search endpoint with optional include_metadata parameter - Updated web interface with metadata display (expandable synopsis, genres, rating, year) - Added metadata toggle checkbox in search UI (disabled by default for performance) Bug Fixes: - Fixed episode selection bug where select would reset to default after any change - Removed onchange event from select element that was causing unwanted reloads - Fixed download button disappearing after episode download - Episodes can now be downloaded multiple times without page refresh Enhancements: - Metadata displayed with icons (📅 year, ⭐ rating, 🏷️ genres, 📺 episodes, 📡 status) - Expandable synopsis section for detailed descriptions - Better visual organization of anime information - Maintains backward compatibility (metadata is optional) Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -346,11 +346,188 @@ class AnimeSamaDownloader(BaseDownloader):
|
||||
filename = f"{anime_name} - Episode {episode}.mp4"
|
||||
return filename.title()
|
||||
|
||||
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
|
||||
async def get_anime_metadata(self, anime_url: str) -> dict:
|
||||
"""
|
||||
Extract rich metadata from anime page
|
||||
Returns synopsis, genres, rating, release year, studio, etc.
|
||||
"""
|
||||
try:
|
||||
print(f"[ANIME-SAMA] Extracting metadata from: {anime_url}")
|
||||
response = await self.client.get(anime_url)
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
metadata = {
|
||||
'synopsis': None,
|
||||
'genres': [],
|
||||
'rating': None,
|
||||
'release_year': None,
|
||||
'studio': None,
|
||||
'poster_image': None,
|
||||
'banner_image': None,
|
||||
'total_episodes': None,
|
||||
'status': None,
|
||||
'alternative_titles': []
|
||||
}
|
||||
|
||||
# Extract synopsis
|
||||
# Anime-Sama typically has synopsis in a div with specific classes
|
||||
synopsis_selectors = [
|
||||
'div.synopsis',
|
||||
'div.description',
|
||||
'div[class*="synopsis"]',
|
||||
'div[class*="description"]',
|
||||
'p.synopsis',
|
||||
'div.texte',
|
||||
'.asn-synopsis'
|
||||
]
|
||||
|
||||
for selector in synopsis_selectors:
|
||||
synopsis_elem = soup.select_one(selector)
|
||||
if synopsis_elem:
|
||||
synopsis = synopsis_elem.get_text(strip=True)
|
||||
if len(synopsis) > 50: # Ensure it's actual content
|
||||
metadata['synopsis'] = synopsis
|
||||
break
|
||||
|
||||
# Extract genres
|
||||
# Look for genre tags/links
|
||||
genre_patterns = [
|
||||
r'Genre?\s*:?\s*([^\n]+)',
|
||||
r'Type?\s*:?\s*([^\n]+)',
|
||||
]
|
||||
|
||||
# Try to find genre links
|
||||
genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type', re.I))
|
||||
if genre_links:
|
||||
metadata['genres'] = [link.get_text(strip=True) for link in genre_links[:5]]
|
||||
|
||||
# Also try to find genres in text
|
||||
page_text = soup.get_text()
|
||||
for pattern in genre_patterns:
|
||||
match = re.search(pattern, page_text, re.IGNORECASE)
|
||||
if match:
|
||||
genres_text = match.group(1)
|
||||
# Split by common separators
|
||||
genres = [g.strip() for g in re.split(r'[,;/|]', genres_text)]
|
||||
genres = [g for g in genres if g and len(g) > 2]
|
||||
if genres:
|
||||
metadata['genres'].extend(genres)
|
||||
break
|
||||
|
||||
# Remove duplicates
|
||||
metadata['genres'] = list(set(metadata['genres']))
|
||||
|
||||
# Extract rating
|
||||
rating_selectors = [
|
||||
'span.rating',
|
||||
'div.rating',
|
||||
'span.score',
|
||||
'div[class*="rating"]',
|
||||
'div[class*="score"]',
|
||||
'.asn-rating'
|
||||
]
|
||||
|
||||
for selector in rating_selectors:
|
||||
rating_elem = soup.select_one(selector)
|
||||
if rating_elem:
|
||||
rating_text = rating_elem.get_text(strip=True)
|
||||
# Look for rating patterns like "8.5/10", "4/5", "★★★★☆"
|
||||
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
|
||||
if rating_match:
|
||||
metadata['rating'] = f"{rating_match.group(1)}/10"
|
||||
break
|
||||
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*5', rating_text)
|
||||
if rating_match:
|
||||
rating_val = float(rating_match.group(1)) * 2 # Convert to /10
|
||||
metadata['rating'] = f"{rating_val:.1f}/10"
|
||||
break
|
||||
|
||||
# Extract release year
|
||||
year_patterns = [
|
||||
r'(\d{4})',
|
||||
r'Année?\s*:?\s*(\d{4})',
|
||||
r'Year?\s*:?\s*(\d{4})',
|
||||
r'Sortie?\s*:?\s*(\d{4})',
|
||||
]
|
||||
|
||||
for pattern in year_patterns:
|
||||
matches = re.findall(pattern, page_text)
|
||||
# Filter valid years (between 1950 and current year + 2)
|
||||
import datetime
|
||||
current_year = datetime.datetime.now().year + 2
|
||||
valid_years = [int(m) for m in matches if 1950 <= int(m) <= current_year]
|
||||
if valid_years:
|
||||
# Take the most common year (likely the release year)
|
||||
from collections import Counter
|
||||
metadata['release_year'] = Counter(valid_years).most_common(1)[0][0]
|
||||
break
|
||||
|
||||
# Extract studio
|
||||
studio_patterns = [
|
||||
r'Studio\s*:?\s*([^\n,]+)',
|
||||
r'Produit\s*par\s*:?\s*([^\n,]+)',
|
||||
r'Animation\s*:?\s*([^\n,]+)',
|
||||
]
|
||||
|
||||
for pattern in studio_patterns:
|
||||
match = re.search(pattern, page_text, re.IGNORECASE)
|
||||
if match:
|
||||
studio = match.group(1).strip()
|
||||
if len(studio) > 2 and len(studio) < 100:
|
||||
metadata['studio'] = studio
|
||||
break
|
||||
|
||||
# Extract poster image
|
||||
poster_elem = soup.select_one('img.poster, img.cover, img[class*="poster"], img[class*="cover"], .asn-poster img')
|
||||
if poster_elem:
|
||||
metadata['poster_image'] = poster_elem.get('src') or poster_elem.get('data-src')
|
||||
|
||||
# Extract banner image
|
||||
banner_elem = soup.select_one('div.banner img, .asn-banner img, img[class*="banner"]')
|
||||
if banner_elem:
|
||||
metadata['banner_image'] = banner_elem.get('src') or banner_elem.get('data-src')
|
||||
|
||||
# Extract total episodes
|
||||
episodes_count = len(await self.get_episodes(anime_url))
|
||||
if episodes_count > 0:
|
||||
metadata['total_episodes'] = episodes_count
|
||||
|
||||
# Extract status (ongoing/completed)
|
||||
status_patterns = [
|
||||
r'En\s*cours',
|
||||
r'Ongoing',
|
||||
r'Terminé',
|
||||
r'Completed',
|
||||
r'Finished',
|
||||
]
|
||||
|
||||
for pattern in status_patterns:
|
||||
if re.search(pattern, page_text, re.IGNORECASE):
|
||||
if 'cour' in pattern.lower() or 'ongoing' in pattern.lower():
|
||||
metadata['status'] = 'Ongoing'
|
||||
else:
|
||||
metadata['status'] = 'Completed'
|
||||
break
|
||||
|
||||
print(f"[ANIME-SAMA] Extracted metadata: {metadata}")
|
||||
return metadata
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ANIME-SAMA] Error extracting metadata: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return {}
|
||||
|
||||
async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
|
||||
"""
|
||||
Search for anime on anime-sama
|
||||
Returns list of anime with title, url, and cover image
|
||||
Uses the official Anime-Sama search API which handles typos and fuzzy matching
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
lang: Language preference (vostfr, vf)
|
||||
include_metadata: Whether to fetch full metadata for each result (slower)
|
||||
"""
|
||||
try:
|
||||
# Update domains before searching to ensure we have the current domain
|
||||
@@ -395,12 +572,20 @@ class AnimeSamaDownloader(BaseDownloader):
|
||||
if '/saison1/' not in href:
|
||||
href = href.rstrip('/') + f'/saison1/{lang}/'
|
||||
|
||||
results.append({
|
||||
result = {
|
||||
'title': title,
|
||||
'url': href,
|
||||
'cover_image': cover_image,
|
||||
'type': 'search_result'
|
||||
})
|
||||
'type': 'search_result',
|
||||
'metadata': None
|
||||
}
|
||||
|
||||
# Fetch metadata if requested
|
||||
if include_metadata:
|
||||
metadata = await self.get_anime_metadata(href)
|
||||
result['metadata'] = metadata
|
||||
|
||||
results.append(result)
|
||||
|
||||
print(f"[ANIME-SAMA] Found {len(results)} results")
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user