docs: Update CLAUDE.md with three-tier architecture and new providers

- Added new video players: Vidzy, LuLuvid, Uqload
- Added new anime site: French-Manga
- Added new series sites category with FS7
- Updated documentation to reflect three-tier architecture (anime sites → series sites → video players)
- Added BaseSeriesSite interface documentation
- Added "Adding New Series Site" section
- Updated test organization with test_french_manga.py

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
root
2026-01-25 10:34:39 +00:00
parent 3afad41d46
commit 4d280b5239
16 changed files with 1507 additions and 53 deletions
+23
View File
@@ -0,0 +1,23 @@
"""Series streaming sites (catalogs) downloaders"""
from .base import BaseSeriesSite
# Import all series site downloaders
from .fs7 import FS7Downloader
__all__ = [
"BaseSeriesSite",
"FS7Downloader",
]
def get_series_site(url: str) -> BaseSeriesSite:
"""Factory function to get the appropriate series site for a URL"""
sites = [
FS7Downloader(),
]
for site in sites:
if site.can_handle(url):
return site
# Return None if no match (should not happen in normal flow)
return None
+131
View File
@@ -0,0 +1,131 @@
"""Base class for series streaming sites (catalogs)"""
from abc import abstractmethod
from typing import List, Dict, Any, Optional, Tuple
import logging
import httpx
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
class BaseSeriesSite:
"""
Base class for series streaming sites.
Series sites provide catalogs, metadata, and episode listings.
They typically link to video players for actual file hosting.
Examples: FS7 (French Stream), etc.
KEY FEATURE: Provides rich metadata and episode management for TV series
"""
def __init__(self):
# Initialize HTTP client directly
self.client = httpx.AsyncClient(timeout=10.0, follow_redirects=True)
@abstractmethod
def can_handle(self, url: str) -> bool:
"""Check if this series site can handle the given URL"""
pass
@abstractmethod
async def search_anime(
self,
query: str,
lang: str = "vf"
) -> List[Dict[str, str]]:
"""
Search for series on this site.
Args:
query: Search query (series title)
lang: Language preference (vf, vostfr)
Returns:
List of series with keys:
- title: Series title
- url: Series page URL
- cover_image: Optional cover image URL
- lang: Available languages
"""
pass
@abstractmethod
async def get_episodes(
self,
anime_url: str,
lang: str = "vf"
) -> List[Dict[str, str]]:
"""
Get list of episodes for a series.
Args:
anime_url: URL of the series page
lang: Language preference
Returns:
List of episodes with keys:
- episode_number: Episode number
- url: Episode page URL
- title: Optional episode title
- host: Video player hosting the file
"""
pass
@abstractmethod
async def get_anime_metadata(self, anime_url: str) -> Dict[str, Any]:
"""
Get detailed metadata for a series.
Args:
anime_url: URL of the series page
Returns:
Dict with metadata:
- title: Series title
- synopsis: Plot summary
- genres: List of genres
- rating: Rating (e.g., "8.5/10")
- release_year: Release year
- studio: Production studio
- poster_image: Poster URL
- total_episodes: Total episode count
- status: Airing status (ongoing, completed)
- languages: Available languages
"""
pass
@abstractmethod
async def get_download_link(self, url: str) -> Tuple[str, str]:
"""
Get download link for a specific episode.
For series sites, this extracts the video player URL from an episode page.
Note: Returns video player URL, NOT direct download link!
Returns:
Tuple of (video_player_url, episode_title)
"""
pass
# Common methods for all series sites
async def close(self):
"""Close HTTP client"""
await self.client.aclose()
async def _fetch_page(self, url: str) -> str:
"""Fetch HTML page content"""
response = await self.client.get(url)
response.raise_for_status()
return response.text
def _parse_html(self, html: str) -> BeautifulSoup:
"""Parse HTML with BeautifulSoup"""
return BeautifulSoup(html, 'lxml')
def _extract_season_number(self, title: str) -> Optional[int]:
"""Extract season number from title (e.g., 'Saison 2' -> 2)"""
import re
match = re.search(r'saison\s*(\d+)', title.lower())
return int(match.group(1)) if match else None
+262
View File
@@ -0,0 +1,262 @@
"""FS7 (French Stream) series site downloader"""
import logging
import re
from typing import List, Dict, Any, Optional
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
from app.utils import sanitize_filename
from .base import BaseSeriesSite
logger = logging.getLogger(__name__)
class FS7Downloader(BaseSeriesSite):
"""
Downloader for FS7 (French Stream) series site.
FS7 is a French streaming site for TV series and films.
"""
def __init__(self):
super().__init__()
self.base_url = "https://fs7.lol"
self.search_url = f"{self.base_url}/"
# Update client headers to mimic browser
self.client.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
})
def can_handle(self, url: str) -> bool:
"""Check if this downloader can handle the given URL"""
return "fs7.lol" in url.lower() or "french-stream" in url.lower()
async def search_anime(
self,
query: str,
lang: str = "vf"
) -> List[Dict[str, str]]:
"""
Search for series on FS7.
Args:
query: Search query
lang: Language preference (vf, vostfr)
Returns:
List of series with title, url, cover_image
"""
try:
logger.info(f"Searching FS7 for: {query}")
# FS7 uses GET request with query parameters for search
response = await self.client.get(
self.search_url,
params={
"do": "search",
"subaction": "search",
"story": query
}
)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, 'lxml')
results = []
# Look for series items (FS7 has both films and series in search results)
# We filter for /s-tv/ URLs ending with .html (actual series/season pages)
items = soup.find_all('a', href=re.compile(r'/s-tv/\d+-.+\.html'))
for item in items[:20]: # Limit to 20 results
url = item.get('href', '')
if not url.startswith('http'):
url = urljoin(self.base_url, url)
# Extract title from the item
title_elem = item.find('img', alt=True)
if title_elem:
title = title_elem.get('alt', '').strip()
else:
# Get text content and clean it
text = item.get_text(strip=True)
# Skip if it's just a category name
if any(cat in text.lower() for cat in ['séries', 'series', 'vf', 'vostfr', 'vo', 'netflix', 'disney', 'amazon', 'apple']):
continue
title = text
# Extract cover image
img = item.find('img')
cover_image = img.get('src', '') if img else ''
# Only add if we have a title and it's not empty
if title and len(title) > 5:
# Avoid duplicates
if not any(r['url'] == url for r in results):
results.append({
'title': title,
'url': url,
'cover_image': cover_image
})
logger.info(f"Found {len(results)} series on FS7")
return results
except Exception as e:
logger.error(f"Error searching FS7: {e}")
return []
async def get_episodes(
self,
anime_url: str,
lang: str = "vf"
) -> List[Dict[str, str]]:
"""
Get episode list for a series.
Args:
anime_url: URL of the series page
lang: Language preference
Returns:
List of episodes with episode number and url
"""
try:
logger.info(f"Fetching episodes from: {anime_url}")
response = await self.client.get(anime_url)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, 'lxml')
episodes = []
# FS7 stores episode data in JavaScript div elements
# Format: <div data-ep="1" data-vidzy="..." data-uqload="..." data-netu="..." data-voe="..."></div>
episode_divs = soup.find_all('div', attrs={'data-ep': True})
for div in episode_divs:
ep_num = div.get('data-ep', '').strip()
# Try different video players in order of preference
video_url = None
for player in ['data-vidzy', 'data-uqload', 'data-voe', 'data-netu']:
player_url = div.get(player, '').strip()
if player_url:
video_url = player_url
logger.debug(f"Found episode {ep_num} on {player}")
break
if video_url and ep_num:
episodes.append({
'episode': ep_num,
'url': video_url
})
# Sort by episode number
episodes.sort(key=lambda x: int(x['episode']) if x['episode'].isdigit() else 0)
logger.info(f"Found {len(episodes)} episodes")
return episodes
except Exception as e:
logger.error(f"Error getting episodes from FS7: {e}")
return []
async def get_anime_metadata(
self,
anime_url: str
) -> Dict[str, Any]:
"""
Get metadata for a series.
Args:
anime_url: URL of the series page
Returns:
Dictionary with metadata
"""
try:
logger.info(f"Fetching metadata from: {anime_url}")
response = await self.client.get(anime_url)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, 'lxml')
# Extract title
title = soup.find('h1')
title = title.get_text(strip=True) if title else "Unknown"
# Extract description/synopsis
description_elem = soup.find('div', class_='full-text')
description = description_elem.get_text(strip=True) if description_elem else ""
# Extract cover image
img = soup.find('img', class_='poster')
poster_image = img.get('src', '') if img else ''
# Try to get poster from meta tag if not found
if not poster_image:
meta_img = soup.find('meta', property='og:image')
poster_image = meta_img.get('content', '') if meta_img else ''
# Extract year
year_match = re.search(r'\b(19|20)\d{2}\b', description)
release_year = int(year_match.group()) if year_match else None
return {
'title': title,
'synopsis': description,
'poster_image': poster_image,
'release_year': release_year,
'genres': [],
'rating': None,
'studio': None,
'total_episodes': None,
'status': None
}
except Exception as e:
logger.error(f"Error getting metadata from FS7: {e}")
return {
'title': "Unknown",
'synopsis': "",
'poster_image': '',
'genres': [],
'rating': None,
'release_year': None,
'studio': None,
'total_episodes': None,
'status': None
}
async def get_download_link(
self,
url: str,
target_filename: Optional[str] = None
) -> tuple[str, str]:
"""
Extract download link from video player URL.
Args:
url: Video player URL
target_filename: Optional filename override
Returns:
Tuple of (download_url, filename)
"""
# FS7 uses embedded video players
# Delegate to the appropriate video player downloader
from app.downloaders.video_players import get_video_player
player = get_video_player(url)
if player:
return await player.get_download_link(url, target_filename)
else:
raise ValueError(f"No video player found for URL: {url}")