docs: Update CLAUDE.md with three-tier architecture and new providers
- Added new video players: Vidzy, LuLuvid, Uqload - Added new anime site: French-Manga - Added new series sites category with FS7 - Updated documentation to reflect three-tier architecture (anime sites → series sites → video players) - Added BaseSeriesSite interface documentation - Added "Adding New Series Site" section - Updated test organization with test_french_manga.py Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -0,0 +1,299 @@
|
||||
"""French-Manga.net anime streaming site downloader"""
|
||||
from .base import BaseAnimeSite
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from typing import List, Dict, Any
|
||||
from app.utils import sanitize_filename
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FrenchMangaDownloader(BaseAnimeSite):
|
||||
"""Downloader for french-manga.net anime streaming site"""
|
||||
|
||||
# Known domains for French-Manga
|
||||
BASE_DOMAINS = [
|
||||
"french-manga.net",
|
||||
"w16.french-manga.net",
|
||||
"w15.french-manga.net",
|
||||
"www.french-manga.net"
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.base_url = "https://w16.french-manga.net"
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
"""Check if this downloader can handle the given URL"""
|
||||
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
|
||||
|
||||
async def search_anime(
|
||||
self,
|
||||
query: str,
|
||||
lang: str = "vostfr"
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Search for anime on French-Manga.
|
||||
|
||||
Args:
|
||||
query: Search query (anime title)
|
||||
lang: Language preference (vostfr, vf)
|
||||
|
||||
Returns:
|
||||
List of anime with title, url, cover_image
|
||||
"""
|
||||
try:
|
||||
# French-Manga uses a search endpoint
|
||||
search_url = f"{self.base_url}/index.php?do=search"
|
||||
params = {
|
||||
'do': 'search',
|
||||
'subaction': 'search',
|
||||
'story': query,
|
||||
'x': '0',
|
||||
'y': '0'
|
||||
}
|
||||
|
||||
response = await self.client.post(search_url, data=params)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
results = []
|
||||
|
||||
# Look for search results in article or story classes
|
||||
for item in soup.find_all('article', class_=lambda x: x and 'story' in x.lower()):
|
||||
title_elem = item.find(['h2', 'h3', 'h4'])
|
||||
link_elem = item.find('a', href=True)
|
||||
img_elem = item.find('img')
|
||||
|
||||
if title_elem and link_elem:
|
||||
title = title_elem.get_text(strip=True)
|
||||
url = link_elem['href']
|
||||
|
||||
# Ensure absolute URL
|
||||
if url.startswith('/'):
|
||||
url = self.base_url + url
|
||||
|
||||
cover_image = ""
|
||||
if img_elem and img_elem.get('src'):
|
||||
cover_image = img_elem['src']
|
||||
if cover_image.startswith('/'):
|
||||
cover_image = self.base_url + cover_image
|
||||
|
||||
results.append({
|
||||
'title': title,
|
||||
'url': url,
|
||||
'cover_image': cover_image,
|
||||
'lang': lang
|
||||
})
|
||||
|
||||
logger.info(f"Found {len(results)} anime results for query: {query}")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching anime: {e}")
|
||||
return []
|
||||
|
||||
async def get_episodes(
|
||||
self,
|
||||
anime_url: str,
|
||||
lang: str = "vostfr"
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Get episode list for an anime.
|
||||
|
||||
Args:
|
||||
anime_url: URL of the anime page
|
||||
lang: Language preference
|
||||
|
||||
Returns:
|
||||
List of episodes with episode_number, url, title
|
||||
"""
|
||||
try:
|
||||
response = await self.client.get(anime_url)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
episodes = []
|
||||
|
||||
# Look for episode links (typically in a list or table)
|
||||
# French-Manga usually has episode links in <a> tags with episode numbers
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
text = link.get_text(strip=True)
|
||||
|
||||
# Pattern: Episode links usually contain "episode" or numbers
|
||||
if re.search(r'episode?\s*\d+', text.lower()):
|
||||
episode_num = re.search(r'(\d+)', text)
|
||||
if episode_num:
|
||||
episode_number = int(episode_num.group(1))
|
||||
|
||||
# Ensure absolute URL
|
||||
if href.startswith('/'):
|
||||
href = self.base_url + href
|
||||
|
||||
episodes.append({
|
||||
'episode_number': episode_number,
|
||||
'url': href,
|
||||
'title': text,
|
||||
'host': 'french-manga'
|
||||
})
|
||||
|
||||
# Sort by episode number
|
||||
episodes.sort(key=lambda x: x['episode_number'])
|
||||
|
||||
logger.info(f"Found {len(episodes)} episodes for {anime_url}")
|
||||
return episodes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting episodes: {e}")
|
||||
return []
|
||||
|
||||
async def get_anime_metadata(self, anime_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detailed metadata for an anime.
|
||||
|
||||
Args:
|
||||
anime_url: URL of the anime page
|
||||
|
||||
Returns:
|
||||
Dict with metadata (synopsis, genres, rating, etc.)
|
||||
"""
|
||||
try:
|
||||
response = await self.client.get(anime_url)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
|
||||
# Extract title
|
||||
title = ""
|
||||
title_elem = soup.find('h1') or soup.find('h2', class_='title')
|
||||
if title_elem:
|
||||
title = title_elem.get_text(strip=True)
|
||||
|
||||
# Extract synopsis
|
||||
synopsis = ""
|
||||
synopsis_elem = soup.find('div', class_=lambda x: x and 'story' in x.lower())
|
||||
if synopsis_elem:
|
||||
synopsis = synopsis_elem.get_text(strip=True)
|
||||
|
||||
# Extract cover image
|
||||
poster_image = ""
|
||||
img_elem = soup.find('img', class_=lambda x: x and 'poster' in x.lower())
|
||||
if img_elem and img_elem.get('src'):
|
||||
poster_image = img_elem['src']
|
||||
if poster_image.startswith('/'):
|
||||
poster_image = self.base_url + poster_image
|
||||
|
||||
# Extract genres
|
||||
genres = []
|
||||
genre_links = soup.find_all('a', href=re.compile(r'/xfsearch/.*genre/'))
|
||||
for link in genre_links[:10]: # Limit to 10 genres
|
||||
genre = link.get_text(strip=True)
|
||||
if genre:
|
||||
genres.append(genre)
|
||||
|
||||
# Extract rating (if available)
|
||||
rating = ""
|
||||
rating_elem = soup.find(['span', 'div'], class_=lambda x: x and 'rating' in x.lower())
|
||||
if rating_elem:
|
||||
rating = rating_elem.get_text(strip=True)
|
||||
|
||||
return {
|
||||
'title': title,
|
||||
'synopsis': synopsis,
|
||||
'genres': genres,
|
||||
'rating': rating,
|
||||
'release_year': '',
|
||||
'studio': '',
|
||||
'poster_image': poster_image,
|
||||
'total_episodes': len(await self.get_episodes(anime_url)),
|
||||
'status': '',
|
||||
'languages': ['vf', 'vostfr']
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting anime metadata: {e}")
|
||||
return {
|
||||
'title': '',
|
||||
'synopsis': '',
|
||||
'genres': [],
|
||||
'rating': '',
|
||||
'release_year': '',
|
||||
'studio': '',
|
||||
'poster_image': '',
|
||||
'total_episodes': 0,
|
||||
'status': '',
|
||||
'languages': ['vf', 'vostfr']
|
||||
}
|
||||
|
||||
async def get_download_link(self, url: str) -> tuple[str, str]:
|
||||
"""
|
||||
Get download link from episode page.
|
||||
|
||||
For French-Manga, this returns the video player URL.
|
||||
The actual video extraction will be handled by the video player downloaders.
|
||||
|
||||
Args:
|
||||
url: Episode page URL
|
||||
|
||||
Returns:
|
||||
Tuple of (video_player_url, episode_title)
|
||||
"""
|
||||
try:
|
||||
response = await self.client.get(url)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
|
||||
# Look for iframe or video player
|
||||
iframe = soup.find('iframe', src=True)
|
||||
if iframe:
|
||||
video_url = iframe['src']
|
||||
else:
|
||||
# Look for video tag directly
|
||||
video = soup.find('video', src=True)
|
||||
if video:
|
||||
video_url = video['src']
|
||||
else:
|
||||
# Try to find in script tags
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
# Look for iframe or video URLs in JavaScript
|
||||
patterns = [
|
||||
r'iframe.*?src=["\']([^"\']+)["\']',
|
||||
r'video.*?src=["\']([^"\']+)["\']',
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, script.string, re.IGNORECASE)
|
||||
if match:
|
||||
video_url = match.group(1)
|
||||
break
|
||||
if 'video_url' in locals():
|
||||
break
|
||||
|
||||
if 'video_url' not in locals():
|
||||
raise ValueError("Could not find video player URL")
|
||||
|
||||
# Ensure absolute URL
|
||||
if video_url.startswith('//'):
|
||||
video_url = 'https:' + video_url
|
||||
elif video_url.startswith('/'):
|
||||
video_url = self.base_url + video_url
|
||||
|
||||
# Extract episode title
|
||||
title_elem = soup.find('h1') or soup.find('h2')
|
||||
episode_title = title_elem.get_text(strip=True) if title_elem else "Episode"
|
||||
episode_title = sanitize_filename(episode_title)
|
||||
|
||||
logger.info(f"Extracted video player URL: {video_url[:60]}...")
|
||||
return video_url, episode_title
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting download link: {e}")
|
||||
raise ValueError(f"Failed to extract download link: {str(e)}")
|
||||
Reference in New Issue
Block a user