feat: add multiple video player support for Frieren S2 downloads

- Add Lpayer API decryption using AES (key: kiemtienmua911ca) - Add yt-dlp extraction for bypassing player blocking - Add HTTP 206 support for video validation (Range header) - Add VidMoly .biz domain support (alternative to .to) - Add SendVid extraction (working - downloaded S1 and S2 E1) - Add player fallback system with caching per anime URL - Add video URL validation before returning to downloader - Update HTTP clients with realistic browser headers - Add pycryptodome to requirements.txt - Add test file for fallback system Downloads working: SendVid (primary), Lpayer (403 issue), VidMoly (testing)
2026-02-25 16:29:53 +00:00
parent 8b7a419b4c
commit 3cf2f8eca5
9 changed files with 1370 additions and 184 deletions
@@ -1,20 +1,55 @@
 from .base import BaseAnimeSite
 from bs4 import BeautifulSoup
 import re
+from typing import Optional
 from urllib.parse import urljoin


 class NekoSamaDownloader(BaseAnimeSite):
-    """Downloader for neko-sama.fr"""
+    """Downloader for neko-sama.org (anime streaming via Gupy)
+    
+    NOTE: neko-sama.org now redirects to Gupy, which is a legal streaming search engine.
+    It does NOT host video content - it provides metadata about where to watch legally.
+    This provider can search and get metadata but cannot provide direct download links.
+    """

-    BASE_DOMAINS = ["neko-sama.fr", "nekosama.fr", "www.neko-sama.fr"]
+    BASE_DOMAINS = ["neko-sama.org", "www.neko-sama.org", "neko-sama.fr", "nekosama.fr", "www.gupy.fr", "gupy.fr"]

    def can_handle(self, url: str) -> bool:
        return any(domain in url.lower() for domain in self.BASE_DOMAINS)

-    async def get_download_link(self, url: str) -> tuple[str, str]:
-        """Extract download link from neko-sama URL"""
+    async def get_download_link(self, url: str, target_filename: Optional[str] = None) -> tuple[str, str]:
+        """
+        Extract download link from neko-sama URL.
+        
+        NOTE: neko-sama.org/Gupy is a legal streaming search engine, NOT a video host.
+        This returns streaming platform information instead of direct video links.
+        """
        try:
+            # Check if this is a Gupy URL
+            if 'gupy.fr' in url or 'neko-sama.org' in url:
+                response = await self.client.get(url, follow_redirects=True)
+                soup = BeautifulSoup(response.text, 'lxml')
+                
+                # Look for streaming platform links
+                streaming_links = []
+                for link in soup.find_all('a', href=True):
+                    href = link.get('href', '')
+                    if '/out/' in href:
+                        text = link.get_text(strip=True)
+                        if text and 'Regarder' in text:
+                            streaming_links.append(f"{text}: {href}")
+                
+                if streaming_links:
+                    title_elem = soup.find('h1') or soup.find('title')
+                    title = title_elem.get_text(strip=True).split('|')[0].strip() if title_elem else "Unknown"
+                    info = "Available streaming platforms:\n" + "\n".join(streaming_links[:5])
+                    filename = target_filename or f"{title}_streaming_info.txt"
+                    return info, filename
+                
+                raise Exception("No streaming links found - Gupy is a legal streaming search, not a video host")
+            
+            # Legacy: try original method for other URLs
            response = await self.client.get(url, follow_redirects=True)
            soup = BeautifulSoup(response.text, 'lxml')

@@ -60,7 +95,7 @@ class NekoSamaDownloader(BaseAnimeSite):
                                filename = self._generate_filename(str(response.url))
                                return match, filename

-            raise Exception("Could not find video link")
+            raise Exception("Could not find video link - Neko-Sama/Gupy does not host video content")

        except Exception as e:
            raise Exception(f"Error extracting NekoSama link: {str(e)}")
@@ -80,11 +115,13 @@ class NekoSamaDownloader(BaseAnimeSite):
        return filename.title()

    async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
+        """Get list of episodes for an anime."""
        try:
            response = await self.client.get(anime_url)
            soup = BeautifulSoup(response.text, 'lxml')

            episodes = []
+            # Try to find episode links
            episode_links = soup.find_all('a', href=re.compile(r'episode'))

            for link in episode_links:
@@ -112,10 +149,7 @@ class NekoSamaDownloader(BaseAnimeSite):
            return []

    async def get_anime_metadata(self, anime_url: str) -> dict:
-        """
-        Extract rich metadata from anime page
-        Returns synopsis, genres, rating, release year, studio, etc.
-        """
+        """Extract rich metadata from anime page."""
        try:
            print(f"[NEKO-SAMA] Extracting metadata from: {anime_url}")
            response = await self.client.get(anime_url)
@@ -134,68 +168,55 @@ class NekoSamaDownloader(BaseAnimeSite):
                'alternative_titles': []
            }

-            # Extract synopsis
-            synopsis_selectors = [
-                'div.synopsis',
-                'div.description',
-                'div[class*="synopsis"]',
-                'div[class*="desc"]',
-                'p.synopsis',
-                '.anime-synopsis',
-                '.summary'
-            ]
+            # Extract title and year from h1
+            title_elem = soup.find('h1')
+            if title_elem:
+                title_text = title_elem.get_text(strip=True)
+                # Extract year from title like "Naruto (2002)"
+                year_match = re.search(r'\((\d{4})\)', title_text)
+                if year_match:
+                    metadata['release_year'] = int(year_match.group(1))
+            
+            # Extract synopsis - Gupy shows it as paragraphs
+            synopsis_elem = soup.find('p')
+            if synopsis_elem:
+                text = synopsis_elem.get_text(strip=True)
+                if len(text) > 50:
+                    metadata['synopsis'] = text

-            for selector in synopsis_selectors:
-                synopsis_elem = soup.select_one(selector)
-                if synopsis_elem:
-                    synopsis = synopsis_elem.get_text(strip=True)
-                    if len(synopsis) > 50:
-                        metadata['synopsis'] = synopsis
-                        break
-
-            # Extract genres
-            genre_links = soup.find_all('a', href=re.compile(r'genre|tag|type', re.I))
+            # Extract genres from meta tags or links
+            genre_links = soup.find_all('a', href=re.compile(r'serie-|genre|tag'))
            if genre_links:
-                metadata['genres'] = [link.get_text(strip=True) for link in genre_links[:5]]
+                genres = []
+                for link in genre_links[:5]:
+                    text = link.get_text(strip=True)
+                    if text and '/' not in text and len(text) < 30:
+                        genres.append(text)
+                metadata['genres'] = genres

-            # Extract rating
-            rating_selectors = [
-                'span.rating',
-                'div.rating',
-                'span.score',
-                'div[class*="rating"]',
-                'div[class*="score"]'
-            ]
-
-            for selector in rating_selectors:
-                rating_elem = soup.select_one(selector)
-                if rating_elem:
-                    rating_text = rating_elem.get_text(strip=True)
-                    rating_match = re.search(r'(\d+\.?\d*)\s*/\s*10', rating_text)
-                    if rating_match:
-                        metadata['rating'] = f"{rating_match.group(1)}/10"
-                        break
-
-            # Extract release year
-            page_text = soup.get_text()
-            year_matches = re.findall(r'\b(19\d{2}|20\d{2})\b', page_text)
-            if year_matches:
-                import datetime
-                current_year = datetime.datetime.now().year + 2
-                valid_years = [int(y) for y in year_matches if 1950 <= int(y) <= current_year]
-                if valid_years:
-                    from collections import Counter
-                    metadata['release_year'] = Counter(valid_years).most_common(1)[0][0]
+            # Extract rating from percentage
+            rating_elem = soup.find(string=re.compile(r'\d+(\.\d+)?%'))
+            if rating_elem:
+                match = re.search(r'(\d+(\.\d+)?)%', rating_elem)
+                if match:
+                    rating = float(match.group(1)) / 10
+                    metadata['rating'] = f"{rating:.1f}/10"

            # Extract poster image
-            poster_elem = soup.select_one('img.poster, img.cover, .anime-poster img')
+            poster_elem = soup.find('img', src=re.compile(r'poster|poster'))
            if poster_elem:
-                metadata['poster_image'] = poster_elem.get('src') or poster_elem.get('data-src')
+                metadata['poster_image'] = poster_elem.get('src')

-            # Extract total episodes
-            episodes_count = len(await self.get_episodes(anime_url))
-            if episodes_count > 0:
-                metadata['total_episodes'] = episodes_count
+            # Extract episode count from page text
+            page_text = soup.get_text()
+            ep_match = re.search(r'(\d+)\s*episodes?', page_text, re.I)
+            if ep_match:
+                metadata['total_episodes'] = int(ep_match.group(1))
+
+            # Extract studio/director
+            director_elem = soup.find('a', href=re.compile(r'person|réalisé'))
+            if director_elem:
+                metadata['studio'] = director_elem.get_text(strip=True)

            print(f"[NEKO-SAMA] Extracted metadata: {metadata}")
            return metadata
@@ -205,44 +226,59 @@ class NekoSamaDownloader(BaseAnimeSite):
            return {}

    async def search_anime(self, query: str, lang: str = "vostfr", include_metadata: bool = False) -> list[dict]:
-        """
-        Search for anime on neko-sama
-
-        Args:
-            query: Search query string
-            lang: Language preference (vostfr, vf)
-            include_metadata: Whether to fetch full metadata for each result (slower)
-        """
+        """Search for anime on neko-sama (uses Gupy backend)."""
        try:
            import time
+            from html import unescape
            start = time.time()
            print(f"[NEKO-SAMA] Searching for '{query}' ({lang})...")

-            # Neko-Sama URL pattern: https://neko-sama.fr/anime/{anime-name}
-            search_url = f"https://neko-sama.fr/anime/{query.lower().replace(' ', '-')}"
+            # Neko-Sama now uses Gupy - try the direct URL pattern
+            search_slug = query.lower().replace(' ', '-')
+            search_urls = [
+                f"https://www.gupy.fr/series/{search_slug}/",
+                f"https://neko-sama.org/series/{search_slug}/",
+            ]

-            response = await self.client.get(search_url)
+            results = []
+            for search_url in search_urls:
+                response = await self.client.get(search_url, follow_redirects=True)
+                print(f"[NEKO-SAMA] Tried {search_url} -> {response.status_code}")
+
+                if response.status_code == 200:
+                    final_url = str(response.url)
+                    print(f"[NEKO-SAMA] Found anime at {final_url}")
+
+                    # Extract title from page
+                    soup = BeautifulSoup(response.text, 'lxml')
+                    title_elem = soup.find('h1') or soup.find('title')
+                    title = unescape(title_elem.get_text(strip=True)) if title_elem else query
+                    # Clean up title
+                    title = title.split('|')[0].split('-')[0].strip()
+
+                    result = {
+                        'title': title,
+                        'url': final_url,
+                        'cover_image': None,
+                        'type': 'direct',
+                        'metadata': None
+                    }
+
+                    # Try to get poster
+                    poster = soup.find('img', src=re.compile(r'poster'))
+                    if poster:
+                        result['cover_image'] = poster.get('src')
+
+                    if include_metadata:
+                        metadata = await self.get_anime_metadata(final_url)
+                        result['metadata'] = metadata
+
+                    results.append(result)
+                    break

            elapsed = time.time() - start
-            print(f"[NEKO-SAMA] Got response {response.status_code} in {elapsed:.2f}s")
-
-            if response.status_code == 200:
-                print(f"[NEKO-SAMA] Found anime at {str(response.url)}")
-                result = {
-                    'title': query,
-                    'url': str(response.url),
-                    'type': 'direct',
-                    'metadata': None
-                }
-
-                if include_metadata:
-                    metadata = await self.get_anime_metadata(str(response.url))
-                    result['metadata'] = metadata
-
-                return [result]
-
-            print(f"[NEKO-SAMA] No anime found")
-            return []
+            print(f"[NEKO-SAMA] Search completed in {elapsed:.2f}s, found {len(results)} results")
+            return results

        except Exception as e:
            print(f"[NEKO-SAMA] Error: {str(e)}")