Files
ohm_streaming/app/downloaders/anime_sites/frenchmanga.py
T
root 4d280b5239 docs: Update CLAUDE.md with three-tier architecture and new providers
- Added new video players: Vidzy, LuLuvid, Uqload
- Added new anime site: French-Manga
- Added new series sites category with FS7
- Updated documentation to reflect three-tier architecture (anime sites → series sites → video players)
- Added BaseSeriesSite interface documentation
- Added "Adding New Series Site" section
- Updated test organization with test_french_manga.py

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
2026-01-25 10:34:39 +00:00

300 lines
10 KiB
Python

"""French-Manga.net anime streaming site downloader"""
from .base import BaseAnimeSite
from bs4 import BeautifulSoup
import re
from typing import List, Dict, Any
from app.utils import sanitize_filename
import logging
logger = logging.getLogger(__name__)
class FrenchMangaDownloader(BaseAnimeSite):
"""Downloader for french-manga.net anime streaming site"""
# Known domains for French-Manga
BASE_DOMAINS = [
"french-manga.net",
"w16.french-manga.net",
"w15.french-manga.net",
"www.french-manga.net"
]
def __init__(self):
super().__init__()
self.base_url = "https://w16.french-manga.net"
def can_handle(self, url: str) -> bool:
"""Check if this downloader can handle the given URL"""
return any(domain in url.lower() for domain in self.BASE_DOMAINS)
async def search_anime(
self,
query: str,
lang: str = "vostfr"
) -> List[Dict[str, str]]:
"""
Search for anime on French-Manga.
Args:
query: Search query (anime title)
lang: Language preference (vostfr, vf)
Returns:
List of anime with title, url, cover_image
"""
try:
# French-Manga uses a search endpoint
search_url = f"{self.base_url}/index.php?do=search"
params = {
'do': 'search',
'subaction': 'search',
'story': query,
'x': '0',
'y': '0'
}
response = await self.client.post(search_url, data=params)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, 'lxml')
results = []
# Look for search results in article or story classes
for item in soup.find_all('article', class_=lambda x: x and 'story' in x.lower()):
title_elem = item.find(['h2', 'h3', 'h4'])
link_elem = item.find('a', href=True)
img_elem = item.find('img')
if title_elem and link_elem:
title = title_elem.get_text(strip=True)
url = link_elem['href']
# Ensure absolute URL
if url.startswith('/'):
url = self.base_url + url
cover_image = ""
if img_elem and img_elem.get('src'):
cover_image = img_elem['src']
if cover_image.startswith('/'):
cover_image = self.base_url + cover_image
results.append({
'title': title,
'url': url,
'cover_image': cover_image,
'lang': lang
})
logger.info(f"Found {len(results)} anime results for query: {query}")
return results
except Exception as e:
logger.error(f"Error searching anime: {e}")
return []
async def get_episodes(
self,
anime_url: str,
lang: str = "vostfr"
) -> List[Dict[str, str]]:
"""
Get episode list for an anime.
Args:
anime_url: URL of the anime page
lang: Language preference
Returns:
List of episodes with episode_number, url, title
"""
try:
response = await self.client.get(anime_url)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, 'lxml')
episodes = []
# Look for episode links (typically in a list or table)
# French-Manga usually has episode links in <a> tags with episode numbers
for link in soup.find_all('a', href=True):
href = link['href']
text = link.get_text(strip=True)
# Pattern: Episode links usually contain "episode" or numbers
if re.search(r'episode?\s*\d+', text.lower()):
episode_num = re.search(r'(\d+)', text)
if episode_num:
episode_number = int(episode_num.group(1))
# Ensure absolute URL
if href.startswith('/'):
href = self.base_url + href
episodes.append({
'episode_number': episode_number,
'url': href,
'title': text,
'host': 'french-manga'
})
# Sort by episode number
episodes.sort(key=lambda x: x['episode_number'])
logger.info(f"Found {len(episodes)} episodes for {anime_url}")
return episodes
except Exception as e:
logger.error(f"Error getting episodes: {e}")
return []
async def get_anime_metadata(self, anime_url: str) -> Dict[str, Any]:
"""
Get detailed metadata for an anime.
Args:
anime_url: URL of the anime page
Returns:
Dict with metadata (synopsis, genres, rating, etc.)
"""
try:
response = await self.client.get(anime_url)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, 'lxml')
# Extract title
title = ""
title_elem = soup.find('h1') or soup.find('h2', class_='title')
if title_elem:
title = title_elem.get_text(strip=True)
# Extract synopsis
synopsis = ""
synopsis_elem = soup.find('div', class_=lambda x: x and 'story' in x.lower())
if synopsis_elem:
synopsis = synopsis_elem.get_text(strip=True)
# Extract cover image
poster_image = ""
img_elem = soup.find('img', class_=lambda x: x and 'poster' in x.lower())
if img_elem and img_elem.get('src'):
poster_image = img_elem['src']
if poster_image.startswith('/'):
poster_image = self.base_url + poster_image
# Extract genres
genres = []
genre_links = soup.find_all('a', href=re.compile(r'/xfsearch/.*genre/'))
for link in genre_links[:10]: # Limit to 10 genres
genre = link.get_text(strip=True)
if genre:
genres.append(genre)
# Extract rating (if available)
rating = ""
rating_elem = soup.find(['span', 'div'], class_=lambda x: x and 'rating' in x.lower())
if rating_elem:
rating = rating_elem.get_text(strip=True)
return {
'title': title,
'synopsis': synopsis,
'genres': genres,
'rating': rating,
'release_year': '',
'studio': '',
'poster_image': poster_image,
'total_episodes': len(await self.get_episodes(anime_url)),
'status': '',
'languages': ['vf', 'vostfr']
}
except Exception as e:
logger.error(f"Error getting anime metadata: {e}")
return {
'title': '',
'synopsis': '',
'genres': [],
'rating': '',
'release_year': '',
'studio': '',
'poster_image': '',
'total_episodes': 0,
'status': '',
'languages': ['vf', 'vostfr']
}
async def get_download_link(self, url: str) -> tuple[str, str]:
"""
Get download link from episode page.
For French-Manga, this returns the video player URL.
The actual video extraction will be handled by the video player downloaders.
Args:
url: Episode page URL
Returns:
Tuple of (video_player_url, episode_title)
"""
try:
response = await self.client.get(url)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, 'lxml')
# Look for iframe or video player
iframe = soup.find('iframe', src=True)
if iframe:
video_url = iframe['src']
else:
# Look for video tag directly
video = soup.find('video', src=True)
if video:
video_url = video['src']
else:
# Try to find in script tags
scripts = soup.find_all('script')
for script in scripts:
if script.string:
# Look for iframe or video URLs in JavaScript
patterns = [
r'iframe.*?src=["\']([^"\']+)["\']',
r'video.*?src=["\']([^"\']+)["\']',
]
for pattern in patterns:
match = re.search(pattern, script.string, re.IGNORECASE)
if match:
video_url = match.group(1)
break
if 'video_url' in locals():
break
if 'video_url' not in locals():
raise ValueError("Could not find video player URL")
# Ensure absolute URL
if video_url.startswith('//'):
video_url = 'https:' + video_url
elif video_url.startswith('/'):
video_url = self.base_url + video_url
# Extract episode title
title_elem = soup.find('h1') or soup.find('h2')
episode_title = title_elem.get_text(strip=True) if title_elem else "Episode"
episode_title = sanitize_filename(episode_title)
logger.info(f"Extracted video player URL: {video_url[:60]}...")
return video_url, episode_title
except Exception as e:
logger.error(f"Error getting download link: {e}")
raise ValueError(f"Failed to extract download link: {str(e)}")