feat: Add SendVid downloader support

Add complete support for SendVid video hosting service used by Anime-Sama for anime series like Hell's Paradise. Changes: - Create SendVidDownloader class with proper headers to avoid 403 errors - Add SendVid detection and handling in AnimeSamaDownloader - Update download_manager to include SendVid-specific headers - Support custom episode naming (e.g., "Hells Paradise - Episode 01.mp4") Technical details: - SendVid embed pages require User-Agent and Referer headers - Direct MP4 URLs extracted from <source> tags with IP/time-based parameters - Tested with Hell's Paradise Episode 01 (7MB, 24min, 1280x720) Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
2026-01-23 08:17:10 +00:00
commit cb3ea8d926
25 changed files with 4657 additions and 0 deletions
@@ -0,0 +1,475 @@
+from .base import BaseDownloader
+from bs4 import BeautifulSoup
+import re
+import httpx
+from urllib.parse import urljoin, unquote
+
+
+class AnimeSamaDownloader(BaseDownloader):
+    """Downloader for anime-sama.org / anime-sama.store"""
+
+    # Static list of known domains (will be updated dynamically)
+    BASE_DOMAINS = ["anime-sama.si", "www.anime-sama.si", "anime-sama.org", "anime-sama.store", "anime-sama.eu"]
+
+    @classmethod
+    async def get_current_domain(cls) -> str:
+        """
+        Fetch the current active domain from anime-sama.pw
+        Returns the current domain (e.g., 'anime-sama.si')
+        """
+        try:
+            import httpx
+            async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
+                response = await client.get("https://anime-sama.pw")
+
+                # Look for the main link in the HTML
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(response.text, 'lxml')
+
+                # Look for the primary button/link
+                primary_link = soup.find('a', class_='btn-primary')
+                if primary_link and primary_link.get('href'):
+                    href = primary_link['href']
+                    # Extract domain from URL
+                    from urllib.parse import urlparse
+                    parsed = urlparse(href)
+                    domain = parsed.netloc  # e.g., 'anime-sama.si'
+                    print(f"[ANIME-SAMA] Current domain from anime-sama.pw: {domain}")
+                    return domain
+
+                # Fallback: look for any anime-sama.* link
+                for link in soup.find_all('a', href=True):
+                    href = link['href']
+                    if 'anime-sama.' in href and href.startswith('https://'):
+                        from urllib.parse import urlparse
+                        parsed = urlparse(href)
+                        domain = parsed.netloc
+                        if domain not in ['anime-sama.pw', 'www.anime-sama.pw']:
+                            print(f"[ANIME-SAMA] Found domain via fallback: {domain}")
+                            return domain
+
+                print("[ANIME-SAMA] Could not determine current domain, using default")
+                return "anime-sama.si"
+
+        except Exception as e:
+            print(f"[ANIME-SAMA] Error fetching current domain: {e}")
+            return "anime-sama.si"
+
+    @classmethod
+    async def update_domains(cls) -> None:
+        """
+        Update the BASE_DOMAINS list with the current active domain
+        This should be called periodically to keep up with domain changes
+        """
+        try:
+            current_domain = await cls.get_current_domain()
+
+            # Add the current domain and its www variant if not already present
+            domains_to_add = [current_domain]
+            if not current_domain.startswith('www.'):
+                domains_to_add.append(f'www.{current_domain}')
+
+            for domain in domains_to_add:
+                if domain not in cls.BASE_DOMAINS:
+                    # Insert at the beginning for priority
+                    cls.BASE_DOMAINS.insert(0, domain)
+                    print(f"[ANIME-SAMA] Added new domain: {domain}")
+
+        except Exception as e:
+            print(f"[ANIME-SAMA] Error updating domains: {e}")
+
+    def can_handle(self, url: str) -> bool:
+        return any(domain in url.lower() for domain in self.BASE_DOMAINS)
+
+    async def get_download_link(self, url: str) -> tuple[str, str]:
+        """
+        Extract download link from anime-sama URL
+        Anime-Sama uses third-party video hosts (vidmoly, etc.)
+        We'll try to extract the video URL from these hosts
+        """
+        try:
+            print(f"[ANIME-SAMA] Extracting link from: {url}")
+
+            # Check if URL contains the anime page context (format: video_url|anime_page_url|episode_title?)
+            if '|' in url:
+                parts = url.split('|')
+                video_url = parts[0]
+                anime_page_url = parts[1] if len(parts) > 1 else None
+                episode_title = parts[2] if len(parts) > 2 else None
+
+                print(f"[ANIME-SAMA] Split URL - video: {video_url[:60]}..., anime: {anime_page_url}, episode: {episode_title}")
+
+                # Extract video from the host URL with anime context for filename
+                if 'vidmoly.to' in video_url or 'vidmoly' in video_url:
+                    return await self._extract_from_vidmoly(video_url, anime_page_url, episode_title)
+                elif 'sendvid.com' in video_url:
+                    return await self._extract_from_sendvid(video_url, anime_page_url, episode_title)
+                else:
+                    # Try to extract from other hosts
+                    if episode_title:
+                        filename = f"{self._generate_anime_name(anime_page_url)} - {episode_title}.mp4"
+                    else:
+                        filename = self._generate_filename_from_anime_url(anime_page_url)
+                    return video_url, filename
+
+            # Check if this is a third-party host URL
+            if 'vidmoly.to' in url or 'vidmoly' in url:
+                return await self._extract_from_vidmoly(url)
+
+            # If it's an anime-sama page, try to find the video
+            if 'anime-sama' in url.lower():
+                response = await self.client.get(url, follow_redirects=True)
+                final_url = str(response.url)
+                soup = BeautifulSoup(response.text, 'lxml')
+
+                # Look for iframe with video player
+                iframes = soup.find_all('iframe')
+                for iframe in iframes:
+                    src = iframe.get('src', '')
+                    if src and any(provider in src for provider in ['vidmoly', 'player', 'stream', 'play', 'embed']):
+                        if src.startswith('http'):
+                            print(f"[ANIME-SAMA] Found iframe: {src}")
+                            # Try to extract video from the player
+                            video_url = await self._extract_from_player(src)
+                            if video_url:
+                                filename = self._generate_filename(final_url)
+                                return video_url, filename
+
+                # Look for video tags
+                videos = soup.find_all('video')
+                for video in videos:
+                    src = video.get('src', '')
+                    if src:
+                        if not src.startswith('http'):
+                            src = urljoin(final_url, src)
+                        filename = self._generate_filename(final_url)
+                        return src, filename
+
+                    sources = video.find_all('source')
+                    for source in sources:
+                        src = source.get('src', '')
+                        if src:
+                            if not src.startswith('http'):
+                                src = urljoin(final_url, src)
+                            filename = self._generate_filename(final_url)
+                            return src, filename
+
+            raise Exception("Could not find video link on page")
+
+        except Exception as e:
+            raise Exception(f"Error extracting AnimeSama link: {str(e)}")
+
+    async def _extract_from_vidmoly(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
+        """Extract video URL from vidmoly player - delegate to VidMolyDownloader"""
+        try:
+            print(f"[ANIME-SAMA] Extracting from vidmoly: {url}")
+            print(f"[ANIME-SAMA] Delegating to VidMolyDownloader...")
+
+            # Import VidMolyDownloader
+            from .vidmoly import VidMolyDownloader
+
+            # Generate the target filename first
+            if episode_title and anime_page_url:
+                anime_name = self._generate_anime_name(anime_page_url)
+                target_filename = f"{anime_name} - {episode_title}.mp4"
+                print(f"[ANIME-SAMA] Generated filename: {target_filename} (episode: {episode_title})")
+            elif anime_page_url:
+                target_filename = self._generate_filename_from_anime_url(anime_page_url)
+                print(f"[ANIME-SAMA] Generated filename: {target_filename} (no episode title)")
+            else:
+                target_filename = None
+                print(f"[ANIME-SAMA] No target_filename generated")
+
+            # Use VidMolyDownloader to extract and download
+            vidmoly_downloader = VidMolyDownloader()
+
+            # Pass the target filename to VidMolyDownloader if available
+            if target_filename:
+                video_url, temp_filename = await vidmoly_downloader.get_download_link(url, target_filename=target_filename)
+            else:
+                video_url, temp_filename = await vidmoly_downloader.get_download_link(url)
+
+            # Use the target filename
+            filename = target_filename if target_filename else temp_filename
+
+            print(f"[ANIME-SAMA] Got video: {filename}")
+
+            # Rename the file if needed
+            import os
+            if temp_filename != filename:
+                # temp_filename might be a full path or just the name
+                temp_path = temp_filename if os.path.isabs(temp_filename) else os.path.join('downloads', temp_filename)
+
+                if os.path.exists(temp_path):
+                    final_path = os.path.join('downloads', filename)
+                    if os.path.exists(final_path):
+                        os.remove(final_path)
+                    os.rename(temp_path, final_path)
+                    print(f"[ANIME-SAMA] Renamed {temp_filename} -> {filename}")
+                else:
+                    print(f"[ANIME-SAMA] Warning: temp file not found: {temp_path}")
+
+            # Return the original VidMoly URL - the file exists so download_manager will skip it
+            return url, filename
+
+        except Exception as e:
+            print(f"[ANIME-SAMA] Vidmoly extraction error: {e}")
+            raise Exception(f"Error extracting from vidmoly: {str(e)}")
+
+    async def _extract_from_sendvid(self, url: str, anime_page_url: str = None, episode_title: str = None) -> tuple[str, str]:
+        """Extract video URL from sendvid player - delegate to SendVidDownloader"""
+        try:
+            print(f"[ANIME-SAMA] Extracting from sendvid: {url}")
+            print(f"[ANIME-SAMA] Delegating to SendVidDownloader...")
+
+            # Import SendVidDownloader
+            from .sendvid import SendVidDownloader
+
+            # Generate the target filename first
+            if episode_title and anime_page_url:
+                anime_name = self._generate_anime_name(anime_page_url)
+                target_filename = f"{anime_name} - {episode_title}.mp4"
+                print(f"[ANIME-SAMA] Generated filename: {target_filename} (episode: {episode_title})")
+            elif anime_page_url:
+                target_filename = self._generate_filename_from_anime_url(anime_page_url)
+                print(f"[ANIME-SAMA] Generated filename: {target_filename} (no episode title)")
+            else:
+                target_filename = None
+                print(f"[ANIME-SAMA] No target_filename generated")
+
+            # Use SendVidDownloader to extract the video URL
+            sendvid_downloader = SendVidDownloader()
+
+            # Pass the target filename to SendVidDownloader if available
+            if target_filename:
+                video_url, filename = await sendvid_downloader.get_download_link(url, target_filename=target_filename)
+            else:
+                video_url, filename = await sendvid_downloader.get_download_link(url)
+
+            # Use the target filename
+            filename = target_filename if target_filename else filename
+
+            print(f"[ANIME-SAMA] Got video: {filename}")
+
+            # Return the direct video URL (SendVid provides direct MP4 links)
+            # The download_manager will handle the actual download
+            return video_url, filename
+
+        except Exception as e:
+            print(f"[ANIME-SAMA] SendVid extraction error: {e}")
+            raise Exception(f"Error extracting from sendvid: {str(e)}")
+
+    def _generate_filename_from_anime_url(self, anime_url: str) -> str:
+        """Generate filename from anime-sama anime page URL"""
+        try:
+            # Extract anime name from URL like: https://anime-sama.si/catalogue/naruto/saison1/vostfr/
+            # Format: /catalogue/{anime}/saison{N}/{lang}/
+            parts = anime_url.split('/')
+            for i, part in enumerate(parts):
+                if part == 'catalogue' and i + 1 < len(parts):
+                    anime_name = parts[i + 1].replace('-', ' ').title()
+                    # Try to find episode number
+                    episode = "01"
+                    for j, part2 in enumerate(parts):
+                        if 'saison' in part2 and j + 2 < len(parts):
+                            # Look for episode in the remaining path
+                            pass
+                    return f"{anime_name} - Episode {episode}.mp4"
+            # Fallback
+            return "Anime - Episode 01.Mp4"
+        except:
+            return "Anime - Episode 01.Mp4"
+
+    def _generate_anime_name(self, anime_url: str) -> str:
+        """Extract just the anime name from anime-sama URL"""
+        try:
+            # Extract anime name from URL like: https://anime-sama.si/catalogue/naruto/saison1/vostfr/
+            parts = anime_url.split('/')
+            for i, part in enumerate(parts):
+                if part == 'catalogue' and i + 1 < len(parts):
+                    return parts[i + 1].replace('-', ' ').title()
+            # Fallback
+            return "Anime"
+        except:
+            return "Anime"
+
+    async def _extract_from_player(self, player_url: str) -> str | None:
+        """Try to extract direct video URL from player iframe"""
+        try:
+            response = await self.client.get(player_url)
+            soup = BeautifulSoup(response.text, 'lxml')
+
+            # Check for video tags
+            videos = soup.find_all('video')
+            for video in videos:
+                src = video.get('src') or video.get('data-src')
+                if src:
+                    return src
+
+            # Check for source tags
+            sources = soup.find_all('source')
+            for source in sources:
+                src = source.get('src')
+                if src and any(ext in src for ext in ['mp4', 'm3u8', 'mkv']):
+                    return src
+
+            # Check scripts in player page
+            scripts = soup.find_all('script')
+            for script in scripts:
+                if script.string:
+                    match = re.search(r'(https?://[^"\'>\s]+\.(?:mp4|m3u8)(?:\?[^"\'>\s]*)?)', script.string)
+                    if match:
+                        return match.group(1)
+
+        except:
+            pass
+
+        return None
+
+    def _generate_filename(self, url: str) -> str:
+        """Generate filename from URL"""
+        # Extract anime name and episode info from URL
+        # URL format: .../catalogue/{anime}/saison{N}/{vostfr|vf}/episode-{N}
+        parts = url.split('/')
+
+        anime_name = "anime"
+        episode = "1"
+
+        for i, part in enumerate(parts):
+            if part == 'catalogue' and i + 1 < len(parts):
+                anime_name = parts[i + 1].replace('-', ' ')
+            elif 'episode-' in part:
+                episode = part.replace('episode-', '')
+            elif part in ['vostfr', 'vf']:
+                lang = part.upper()
+
+        filename = f"{anime_name} - Episode {episode}.mp4"
+        return filename.title()
+
+    async def search_anime(self, query: str, lang: str = "vostfr") -> list[dict]:
+        """
+        Search for anime on anime-sama
+        Returns list of anime with title, url, and cover image
+        """
+        try:
+            # Update domains before searching to ensure we have the current domain
+            await self.update_domains()
+
+            import time
+            start = time.time()
+            print(f"[ANIME-SAMA] Searching for '{query}' ({lang})...")
+
+            # Use the current domain from anime-sama.pw
+            current_domain = await self.get_current_domain()
+
+            # Convert query to URL format (lowercase, replace spaces with hyphens)
+            query_formatted = query.lower().replace(' ', '-').replace("'", '').replace(':', '')
+            search_url = f"https://{current_domain}/catalogue/{query_formatted}/saison1/{lang}/"
+
+            response = await self.client.get(search_url, follow_redirects=True)
+
+            elapsed = time.time() - start
+            print(f"[ANIME-SAMA] Got response {response.status_code} in {elapsed:.2f}s")
+
+            if response.status_code == 200:
+                # Check if it's a valid anime page by looking for episode selector
+                if 'selectEpisodes' in response.text or 'episodes.js' in response.text:
+                    print(f"[ANIME-SAMA] Found anime at {str(response.url)}")
+                    return [{
+                        'title': query,
+                        'url': str(response.url),
+                        'type': 'direct'
+                    }]
+
+            print(f"[ANIME-SAMA] No anime found (status: {response.status_code})")
+            return []
+
+        except Exception as e:
+            print(f"[ANIME-SAMA] Error: {str(e)}")
+            return []
+
+    async def get_episodes(self, anime_url: str, lang: str = "vostfr") -> list[dict]:
+        """
+        Get list of episodes for an anime
+        Returns list of episode numbers and their URLs
+        Anime-Sama uses a JavaScript file (episodes.js) to store episode URLs
+        """
+        try:
+            response = await self.client.get(anime_url)
+            soup = BeautifulSoup(response.text, 'lxml')
+
+            episodes = []
+
+            # Try to find the episodes.js file in the HTML
+            episodes_js_match = re.search(r'episodes\.js\?filever=(\d+)', response.text)
+            if episodes_js_match:
+                file_ver = episodes_js_match.group(1)
+                # Build the URL to episodes.js
+                episodes_js_url = f"{anime_url.rstrip('/')}/episodes.js?filever={file_ver}"
+
+                print(f"[ANIME-SAMA] Found episodes.js at {episodes_js_url}")
+
+                try:
+                    # Fetch the episodes.js file
+                    js_response = await self.client.get(episodes_js_url)
+                    js_content = js_response.text
+
+                    # Parse the JavaScript file to extract episode URLs
+                    # The file contains arrays like: var eps1 = ['url1', 'url2', ...]
+                    eps_matches = re.findall(r'var\s+eps\d+\s*=\s*(\[[^\]]+\])', js_content)
+
+                    if eps_matches:
+                        # Extract URLs from the first array found
+                        urls_text = eps_matches[0]
+                        # Parse the array of URLs
+                        episode_urls = re.findall(r"'(https?://[^']+)'", urls_text)
+
+                        for idx, url in enumerate(episode_urls, start=1):
+                            episode_num = str(idx).zfill(2)
+                            episode_title = f'Episode {episode_num}'
+                            # Store both the video URL, the anime page URL, and the episode title
+                            # Format: video_url|anime_page_url|episode_title
+                            combined_url = f"{url}|{anime_url}|{episode_title}"
+                            episodes.append({
+                                'episode': episode_num,
+                                'url': combined_url,
+                                'title': episode_title
+                            })
+
+                        print(f"[ANIME-SAMA] Found {len(episodes)} episodes")
+                        return episodes
+
+                except Exception as e:
+                    print(f"[ANIME-SAMA] Error fetching episodes.js: {e}")
+
+            # Fallback: Try to find episode links in the HTML (old method)
+            episode_links = soup.find_all('a', href=True)
+            for link in episode_links:
+                href = link['href']
+                if 'episode-' in href:
+                    # Extract episode number
+                    match = re.search(r'episode-(\d+)', href)
+                    if match:
+                        episode_num = match.group(1)
+                        full_url = urljoin(anime_url, href)
+
+                        episodes.append({
+                            'episode': episode_num,
+                            'url': full_url
+                        })
+
+            # Remove duplicates and sort
+            seen = set()
+            unique_episodes = []
+            for ep in episodes:
+                if ep['episode'] not in seen:
+                    seen.add(ep['episode'])
+                    unique_episodes.append(ep)
+
+            unique_episodes.sort(key=lambda x: int(x['episode']))
+
+            return unique_episodes
+
+        except Exception as e:
+            print(f"[ANIME-SAMA] Error getting episodes: {e}")
+            return []