ohm_streaming/app/downloaders/video_players/rapidfile.py

from .base import BaseVideoPlayer
from bs4 import BeautifulSoup
import re
import httpx


class RapidFileDownloader(BaseVideoPlayer):
    """Downloader for rapidfile.net and similar hosts"""

    def can_handle(self, url: str) -> bool:
        return any(domain in url.lower() for domain in ["rapidfile.net", "rapidfile.com", "rapid-file"])

    async def get_download_link(self, url: str) -> tuple[str, str]:
        try:
            # Get the initial page
            response = await self.client.get(url)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'lxml')

            download_url = None
            filename = "rapidfile_download"

            # Method 1: Look for download button/link
            download_btn = soup.find('a', {'id': 'downloadbtn'}) or soup.find('a', class_='download-btn')
            if download_btn and download_btn.get('href'):
                download_url = download_btn['href']

            # Method 2: Look for form with POST action
            if not download_url:
                forms = soup.find_all('form')
                for form in forms:
                    action = form.get('action', '')
                    if action and ('download' in action.lower() or 'file' in action.lower()):
                        download_url = action if action.startswith('http') else url + action
                        break

            # Method 3: Look for any link with download/file in URL
            if not download_url:
                for link in soup.find_all('a', href=True):
                    href = link['href']
                    if any(keyword in href.lower() for keyword in ['download', 'get_file', 'file.php']):
                        if href.startswith('http'):
                            download_url = href
                            break

            # Method 4: Check for direct file links in scripts
            if not download_url:
                scripts = soup.find_all('script')
                for script in scripts:
                    if script.string:
                        match = re.search(r'(https?://[^\s\"\'<>]+/(?:download|file)[^\s\"\'<>]+)', script.string)
                        if match:
                            download_url = match.group(0)
                            break

            if download_url:
                # Get filename from headers or URL
                try:
                    head_resp = await self.client.head(download_url, timeout=5.0)
                    fname = self._extract_filename_from_headers(head_resp.headers)
                    if fname:
                        filename = fname
                    else:
                        filename = download_url.split('/')[-1] or "rapidfile_download"
                except Exception:
                    filename = download_url.split('/')[-1] or "rapidfile_download"

                return download_url, filename

            # If all else fails, return the original URL
            filename = url.split('/')[-1] or "rapidfile_download"
            return url, filename

        except Exception as e:
            raise Exception(f"Error extracting Rapidfile link: {str(e)}")