feat: Add anime metadata extraction and fix episode selection bug

Features:
- Added rich metadata extraction for all anime providers (Anime-Sama, Neko-Sama, Anime-Ultime, Vostfree)
- New AnimeMetadata model with synopsis, genres, rating, release year, studio, poster/banner images, episode count, and status
- New /api/anime/metadata endpoint for fetching metadata of specific anime
- Enhanced /api/anime/search endpoint with optional include_metadata parameter
- Updated web interface with metadata display (expandable synopsis, genres, rating, year)
- Added metadata toggle checkbox in search UI (disabled by default for performance)

Bug Fixes:
- Fixed episode selection bug where select would reset to default after any change
- Removed onchange event from select element that was causing unwanted reloads
- Fixed download button disappearing after episode download
- Episodes can now be downloaded multiple times without page refresh

Enhancements:
- Metadata displayed with icons (📅 year,  rating, 🏷️ genres, 📺 episodes, 📡 status)
- Expandable synopsis section for detailed descriptions
- Better visual organization of anime information
- Maintains backward compatibility (metadata is optional)

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
root
2026-01-23 09:36:59 +00:00
parent 40977438ff
commit 20cad0b4fe
7 changed files with 693 additions and 29 deletions
+189 -4
View File
@@ -346,11 +346,188 @@ class AnimeSamaDownloader(BaseDownloader):
filename = f"{anime_name} - Episode {episode}.mp4"
return filename.title()
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
async def get_anime_metadata(self, anime_url: str) -> dict:
"""
Extract rich metadata from anime page
Returns synopsis, genres, rating, release year, studio, etc.
"""
try:
print(f"[ANIME-SAMA] Extracting metadata from: {anime_url}")
response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml')
metadata = {
'synopsis': None,
'genres': [],
'rating': None,
'release_year': None,
'studio': None,
'poster_image': None,
'banner_image': None,
'total_episodes': None,
'status': None,
'alternative_titles': []
}
# Extract synopsis
# Anime-Sama typically has synopsis in a div with specific classes
synopsis_selectors = [
'div.synopsis',
'div.description',
'div[class*="synopsis"]',
'div[class*="description"]',
'p.synopsis',
'div.texte',
'.asn-synopsis'
]
for selector in synopsis_selectors:
synopsis_elem = soup.select_one(selector)
if synopsis_elem:
synopsis = synopsis_elem.get_text(strip=True)
if len(synopsis) > 50: # Ensure it's actual content
metadata['synopsis'] = synopsis
break
# Extract genres
# Look for genre tags/links
genre_patterns = [
r'Genre?\s*:?\s*([^\n]+)',
r'Type?\s*:?\s*([^\n]+)',
]
# Try to find genre links
genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type', re.I))
if genre_links:
metadata['genres'] = [link.get_text(strip=True) for link in genre_links[:5]]
# Also try to find genres in text
page_text = soup.get_text()
for pattern in genre_patterns:
match = re.search(pattern, page_text, re.IGNORECASE)
if match:
genres_text = match.group(1)
# Split by common separators
genres = [g.strip() for g in re.split(r'[,;/|]', genres_text)]
genres = [g for g in genres if g and len(g) > 2]
if genres:
metadata['genres'].extend(genres)
break
# Remove duplicates
metadata['genres'] = list(set(metadata['genres']))
# Extract rating
rating_selectors = [
'span.rating',
'div.rating',
'span.score',
'div[class*="rating"]',
'div[class*="score"]',
'.asn-rating'
]
for selector in rating_selectors:
rating_elem = soup.select_one(selector)
if rating_elem:
rating_text = rating_elem.get_text(strip=True)
# Look for rating patterns like "8.5/10", "4/5", "★★★★☆"
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
if rating_match:
metadata['rating'] = f"{rating_match.group(1)}/10"
break
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*5', rating_text)
if rating_match:
rating_val = float(rating_match.group(1)) * 2 # Convert to /10
metadata['rating'] = f"{rating_val:.1f}/10"
break
# Extract release year
year_patterns = [
r'(\d{4})',
r'Année?\s*:?\s*(\d{4})',
r'Year?\s*:?\s*(\d{4})',
r'Sortie?\s*:?\s*(\d{4})',
]
for pattern in year_patterns:
matches = re.findall(pattern, page_text)
# Filter valid years (between 1950 and current year + 2)
import datetime
current_year = datetime.datetime.now().year + 2
valid_years = [int(m) for m in matches if 1950 <= int(m) <= current_year]
if valid_years:
# Take the most common year (likely the release year)
from collections import Counter
metadata['release_year'] = Counter(valid_years).most_common(1)[0][0]
break
# Extract studio
studio_patterns = [
r'Studio\s*:?\s*([^\n,]+)',
r'Produit\s*par\s*:?\s*([^\n,]+)',
r'Animation\s*:?\s*([^\n,]+)',
]
for pattern in studio_patterns:
match = re.search(pattern, page_text, re.IGNORECASE)
if match:
studio = match.group(1).strip()
if len(studio) > 2 and len(studio) < 100:
metadata['studio'] = studio
break
# Extract poster image
poster_elem = soup.select_one('img.poster, img.cover, img[class*="poster"], img[class*="cover"], .asn-poster img')
if poster_elem:
metadata['poster_image'] = poster_elem.get('src') or poster_elem.get('data-src')
# Extract banner image
banner_elem = soup.select_one('div.banner img, .asn-banner img, img[class*="banner"]')
if banner_elem:
metadata['banner_image'] = banner_elem.get('src') or banner_elem.get('data-src')
# Extract total episodes
episodes_count = len(await self.get_episodes(anime_url))
if episodes_count > 0:
metadata['total_episodes'] = episodes_count
# Extract status (ongoing/completed)
status_patterns = [
r'En\s*cours',
r'Ongoing',
r'Terminé',
r'Completed',
r'Finished',
]
for pattern in status_patterns:
if re.search(pattern, page_text, re.IGNORECASE):
if 'cour' in pattern.lower() or 'ongoing' in pattern.lower():
metadata['status'] = 'Ongoing'
else:
metadata['status'] = 'Completed'
break
print(f"[ANIME-SAMA] Extracted metadata: {metadata}")
return metadata
except Exception as e:
print(f"[ANIME-SAMA] Error extracting metadata: {e}")
import traceback
traceback.print_exc()
return {}
async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
"""
Search for anime on anime-sama
Returns list of anime with title, url, and cover image
Uses the official Anime-Sama search API which handles typos and fuzzy matching
Args:
query: Search query string
lang: Language preference (vostfr, vf)
include_metadata: Whether to fetch full metadata for each result (slower)
"""
try:
# Update domains before searching to ensure we have the current domain
@@ -395,12 +572,20 @@ class AnimeSamaDownloader(BaseDownloader):
if '/saison1/' not in href:
href = href.rstrip('/') + f'/saison1/{lang}/'
results.append({
result = {
'title': title,
'url': href,
'cover_image': cover_image,
'type': 'search_result'
})
'type': 'search_result',
'metadata': None
}
# Fetch metadata if requested
if include_metadata:
metadata = await self.get_anime_metadata(href)
result['metadata'] = metadata
results.append(result)
print(f"[ANIME-SAMA] Found {len(results)} results")
return results
+126 -4
View File
@@ -165,10 +165,124 @@ class AnimeUltimeDownloader(BaseDownloader):
filename = f"{anime_name} - Episode {episode}.mp4"
return filename.title()
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
async def get_anime_metadata(self, anime_url: str) -> dict:
"""
Extract rich metadata from anime page
Returns synopsis, genres, rating, release year, studio, etc.
"""
try:
print(f"[ANIME-ULTIME] Extracting metadata from: {anime_url}")
response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml')
metadata = {
'synopsis': None,
'genres': [],
'rating': None,
'release_year': None,
'studio': None,
'poster_image': None,
'banner_image': None,
'total_episodes': None,
'status': None,
'alternative_titles': []
}
# Extract synopsis
synopsis_selectors = [
'div.synopsis',
'div.description',
'div[class*="synopsis"]',
'div[class*="synopsis"]',
'p.synopsis',
'.info',
'div.texte'
]
for selector in synopsis_selectors:
synopsis_elem = soup.select_one(selector)
if synopsis_elem:
synopsis = synopsis_elem.get_text(strip=True)
if len(synopsis) > 50:
metadata['synopsis'] = synopsis
break
# Extract genres from meta tags and page content
page_text = soup.get_text()
# Look for genre in meta tags
genre_meta = soup.find('meta', property='genre') or soup.find('meta', attrs={'name': 'genre'})
if genre_meta:
genres_text = genre_meta.get('content', '')
if genres_text:
metadata['genres'] = [g.strip() for g in genres_text.split(',')]
# Try to find genre links
genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type|cat', re.I))
if genre_links:
for link in genre_links[:5]:
genre = link.get_text(strip=True)
if genre and genre not in metadata['genres']:
metadata['genres'].append(genre)
# Extract rating
rating_selectors = [
'span.rating',
'div.rating',
'span.score',
'div.note',
'.rating'
]
for selector in rating_selectors:
rating_elem = soup.select_one(selector)
if rating_elem:
rating_text = rating_elem.get_text(strip=True)
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
if rating_match:
metadata['rating'] = f"{rating_match.group(1)}/10"
break
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*5', rating_text)
if rating_match:
rating_val = float(rating_match.group(1)) * 2
metadata['rating'] = f"{rating_val:.1f}/10"
break
# Extract release year
year_match = re.search(r'\b(19\d{2}|20\d{2})\b', page_text)
if year_match:
import datetime
current_year = datetime.datetime.now().year + 2
year = int(year_match.group(1))
if 1950 <= year <= current_year:
metadata['release_year'] = year
# Extract poster image from og:image
og_image = soup.find('meta', property='og:image')
if og_image:
metadata['poster_image'] = og_image.get('content')
# Extract total episodes
episodes_count = len(await self.get_episodes(anime_url))
if episodes_count > 0:
metadata['total_episodes'] = episodes_count
print(f"[ANIME-ULTIME] Extracted metadata: {metadata}")
return metadata
except Exception as e:
print(f"[ANIME-ULTIME] Error extracting metadata: {e}")
return {}
async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
"""
Search for anime on anime-ultime
Returns list of anime with title, url, and cover image
Args:
query: Search query string
lang: Language preference (vostfr, vf)
include_metadata: Whether to fetch full metadata for each result (slower)
"""
try:
import time
@@ -231,11 +345,19 @@ class AnimeUltimeDownloader(BaseDownloader):
if not href.startswith('http'):
href = urljoin("https://www.anime-ultime.net/", href)
results.append({
result_item = {
'title': better_title,
'url': href,
'type': 'search_result'
})
'type': 'search_result',
'metadata': None
}
# Fetch metadata if requested
if include_metadata:
metadata = await self.get_anime_metadata(href)
result_item['metadata'] = metadata
results.append(result_item)
print(f"[ANIME-ULTIME] Found {len(results)} results")
return results
+109 -4
View File
@@ -111,9 +111,107 @@ class NekoSamaDownloader(BaseDownloader):
except Exception as e:
return []
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
async def get_anime_metadata(self, anime_url: str) -> dict:
"""
Extract rich metadata from anime page
Returns synopsis, genres, rating, release year, studio, etc.
"""
try:
print(f"[NEKO-SAMA] Extracting metadata from: {anime_url}")
response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml')
metadata = {
'synopsis': None,
'genres': [],
'rating': None,
'release_year': None,
'studio': None,
'poster_image': None,
'banner_image': None,
'total_episodes': None,
'status': None,
'alternative_titles': []
}
# Extract synopsis
synopsis_selectors = [
'div.synopsis',
'div.description',
'div[class*="synopsis"]',
'div[class*="desc"]',
'p.synopsis',
'.anime-synopsis',
'.summary'
]
for selector in synopsis_selectors:
synopsis_elem = soup.select_one(selector)
if synopsis_elem:
synopsis = synopsis_elem.get_text(strip=True)
if len(synopsis) > 50:
metadata['synopsis'] = synopsis
break
# Extract genres
genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type', re.I))
if genre_links:
metadata['genres'] = [link.get_text(strip=True) for link in genre_links[:5]]
# Extract rating
rating_selectors = [
'span.rating',
'div.rating',
'span.score',
'div[class*="rating"]',
'div[class*="score"]'
]
for selector in rating_selectors:
rating_elem = soup.select_one(selector)
if rating_elem:
rating_text = rating_elem.get_text(strip=True)
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
if rating_match:
metadata['rating'] = f"{rating_match.group(1)}/10"
break
# Extract release year
page_text = soup.get_text()
year_matches = re.findall(r'\b(19\d{2}|20\d{2})\b', page_text)
if year_matches:
import datetime
current_year = datetime.datetime.now().year + 2
valid_years = [int(y) for y in year_matches if 1950 <= int(y) <= current_year]
if valid_years:
from collections import Counter
metadata['release_year'] = Counter(valid_years).most_common(1)[0][0]
# Extract poster image
poster_elem = soup.select_one('img.poster, img.cover, .anime-poster img')
if poster_elem:
metadata['poster_image'] = poster_elem.get('src') or poster_elem.get('data-src')
# Extract total episodes
episodes_count = len(await self.get_episodes(anime_url))
if episodes_count > 0:
metadata['total_episodes'] = episodes_count
print(f"[NEKO-SAMA] Extracted metadata: {metadata}")
return metadata
except Exception as e:
print(f"[NEKO-SAMA] Error extracting metadata: {e}")
return {}
async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
"""
Search for anime on neko-sama
Args:
query: Search query string
lang: Language preference (vostfr, vf)
include_metadata: Whether to fetch full metadata for each result (slower)
"""
try:
import time
@@ -130,11 +228,18 @@ class NekoSamaDownloader(BaseDownloader):
if response.status_code == 200:
print(f"[NEKO-SAMA] Found anime at {str(response.url)}")
return [{
result = {
'title': query,
'url': str(response.url),
'type': 'direct'
}]
'type': 'direct',
'metadata': None
}
if include_metadata:
metadata = await self.get_anime_metadata(str(response.url))
result['metadata'] = metadata
return [result]
print(f"[NEKO-SAMA] No anime found")
return []
+113 -4
View File
@@ -111,9 +111,111 @@ class VostfreeDownloader(BaseDownloader):
except Exception as e:
return []
async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
async def get_anime_metadata(self, anime_url: str) -> dict:
"""
Extract rich metadata from anime page
Returns synopsis, genres, rating, release year, studio, etc.
"""
try:
print(f"[VOSTFREE] Extracting metadata from: {anime_url}")
response = await self.client.get(anime_url)
soup = BeautifulSoup(response.text, 'lxml')
metadata = {
'synopsis': None,
'genres': [],
'rating': None,
'release_year': None,
'studio': None,
'poster_image': None,
'banner_image': None,
'total_episodes': None,
'status': None,
'alternative_titles': []
}
# Extract synopsis
synopsis_selectors = [
'div.synopsis',
'div.description',
'div[class*="synopsis"]',
'div[class*="desc"]',
'p.synopsis',
'.anime-synopsis'
]
for selector in synopsis_selectors:
synopsis_elem = soup.select_one(selector)
if synopsis_elem:
synopsis = synopsis_elem.get_text(strip=True)
if len(synopsis) > 50:
metadata['synopsis'] = synopsis
break
# Extract genres
genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type', re.I))
if genre_links:
metadata['genres'] = [link.get_text(strip=True) for link in genre_links[:5]]
# Extract rating
rating_selectors = [
'span.rating',
'div.rating',
'span.score',
'div[class*="rating"]',
'div[class*="score"]'
]
for selector in rating_selectors:
rating_elem = soup.select_one(selector)
if rating_elem:
rating_text = rating_elem.get_text(strip=True)
rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
if rating_match:
metadata['rating'] = f"{rating_match.group(1)}/10"
break
# Extract release year
page_text = soup.get_text()
year_matches = re.findall(r'\b(19\d{2}|20\d{2})\b', page_text)
if year_matches:
import datetime
current_year = datetime.datetime.now().year + 2
valid_years = [int(y) for y in year_matches if 1950 <= int(y) <= current_year]
if valid_years:
from collections import Counter
metadata['release_year'] = Counter(valid_years).most_common(1)[0][0]
# Extract poster image
poster_elem = soup.select_one('img.poster, img.cover, .anime-poster img')
if poster_elem:
metadata['poster_image'] = poster_elem.get('src') or poster_elem.get('data-src')
# Extract poster from og:image
og_image = soup.find('meta', property='og:image')
if og_image and not metadata['poster_image']:
metadata['poster_image'] = og_image.get('content')
# Extract total episodes
episodes_count = len(await self.get_episodes(anime_url))
if episodes_count > 0:
metadata['total_episodes'] = episodes_count
print(f"[VOSTFREE] Extracted metadata: {metadata}")
return metadata
except Exception as e:
print(f"[VOSTFREE] Error extracting metadata: {e}")
return {}
async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
"""
Search for anime on vostfree
Args:
query: Search query string
lang: Language preference (vostfr, vf)
include_metadata: Whether to fetch full metadata for each result (slower)
"""
try:
import time
@@ -130,11 +232,18 @@ class VostfreeDownloader(BaseDownloader):
if response.status_code == 200:
print(f"[VOSTFREE] Found anime at {str(response.url)}")
return [{
result = {
'title': query,
'url': str(response.url),
'type': 'direct'
}]
'type': 'direct',
'metadata': None
}
if include_metadata:
metadata = await self.get_anime_metadata(str(response.url))
result['metadata'] = metadata
return [result]
print(f"[VOSTFREE] No anime found")
return []
+23
View File
@@ -40,3 +40,26 @@ class DownloadTask(BaseModel):
class DownloadRequest(BaseModel):
url: str
filename: Optional[str] = None
class AnimeMetadata(BaseModel):
"""Metadata for anime series"""
synopsis: Optional[str] = None
genres: list[str] = []
rating: Optional[str] = None # Could be "PG-13", "R", etc., or numeric like "8.5/10"
release_year: Optional[int] = None
studio: Optional[str] = None
poster_image: Optional[str] = None
banner_image: Optional[str] = None
total_episodes: Optional[int] = None
status: Optional[str] = None # "Ongoing", "Completed", etc.
alternative_titles: list[str] = []
class AnimeSearchResult(BaseModel):
"""Enhanced search result with metadata"""
title: str
url: str
cover_image: Optional[str] = None
type: str # "search_result" or "direct"
metadata: Optional[AnimeMetadata] = None