ohm_streaming/app/downloaders/vidmoly_old.py

from .base import BaseDownloader
from bs4 import BeautifulSoup
import re
import httpx
import subprocess
import os
import tempfile
from pathlib import Path


class VidMolyDownloader(BaseDownloader):
    """Downloader for vidmoly.to - Video streaming host with M3U8 to MP4 conversion"""

    def can_handle(self, url: str) -> bool:
        return any(domain in url.lower() for domain in ["vidmoly.to", "vidmoly.org"])

    async def get_download_link(self, url: str) -> tuple[str, str]:
        try:
            # Extract VidMoly ID from URL
            vidmoly_id = self._extract_vidmoly_id(url)
            if not vidmoly_id:
                raise Exception("Could not extract VidMoly ID from URL")

            # Construct embed URL
            embed_url = f"https://vidmoly.to/embed-{vidmoly_id}.html"

            # Fetch embed page
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
                'Referer': 'https://vidmoly.to/',
                'Accept': '*/*',
                'Accept-Language': 'en-US,en;q=0.9',
            }

            response = await self.client.get(embed_url, headers=headers)
            response.raise_for_status()

            # Check for JavaScript redirect with token
            if 'window.location.replace' in response.text:
                # Extract the redirect URL with token
                redirect_match = re.search(r"window\.location\.replace\('([^']+)'", response.text)
                if redirect_match:
                    redirect_url = redirect_match.group(1)
                    print(f"[VIDMOLY] Following redirect with token...")
                    # Follow the redirect WITH follow_redirects to handle 302
                    response = await self.client.get(redirect_url, headers=headers, follow_redirects=True)
                    response.raise_for_status()

            # Extract video source using regex (like the PHP version)
            # Pattern: file:"URL"
            sources_match = re.findall(r'file:"([^"]+)"', response.text)

            if not sources_match:
                raise Exception("Could not find video source in page")

            video_source = sources_match[0]

            # Check if it's an M3U8 playlist
            if 'master.m3u8' in video_source or '.m3u8' in video_source:
                # Fetch master playlist to get available qualities
                qualities = await self._get_m3u8_qualities(video_source, headers)

                if qualities:
                    # Use highest quality (first one in list)
                    best_quality_url = qualities[0]['url']
                    quality_label = qualities[0]['label']

                    # Convert M3U8 to MP4 using ffmpeg
                    mp4_path = await self._convert_m3u8_to_mp4(
                        best_quality_url,
                        vidmoly_id,
                        quality_label,
                        headers
                    )

                    return mp4_path, f"vidmoly_{vidmoly_id}_{quality_label}p.mp4"
                else:
                    # Direct M3U8 without quality variants
                    mp4_path = await self._convert_m3u8_to_mp4(
                        video_source,
                        vidmoly_id,
                        "720",
                        headers
                    )

                    return mp4_path, f"vidmoly_{vidmoly_id}_720p.mp4"

            # It's a direct MP4 link
            filename = f"vidmoly_{vidmoly_id}.mp4"
            if not video_source.endswith('.mp4'):
                filename += '.mp4'

            return video_source, filename

        except Exception as e:
            raise Exception(f"Error extracting VidMoly link: {str(e)}")

    async def _get_m3u8_qualities(self, master_m3u8_url: str, headers: dict) -> list[dict]:
        """Fetch master M3U8 and extract available qualities"""
        try:
            response = await self.client.get(master_m3u8_url, headers=headers)
            response.raise_for_status()

            content = response.text
            lines = [line.strip() for line in content.split('\n') if line.strip()]

            qualities = []
            current_quality = {}

            for line in lines:
                # Parse quality line (RESOLUTION=...xHEIGHT)
                if line.startswith('#EXT-X-STREAM-INF'):
                    resolution_match = re.search(r'RESOLUTION=\d+x(\d+)', line)
                    if resolution_match:
                        current_quality['label'] = resolution_match.group(1)
                # Parse URL line
                elif line.endswith('.m3u8') and current_quality:
                    current_quality['url'] = line if line.startswith('http') else master_m3u8_url.rsplit('/', 1)[0] + '/' + line
                    qualities.append(current_quality)
                    current_quality = {}

            # Sort by resolution (descending)
            qualities.sort(key=lambda x: int(x['label']), reverse=True)

            return qualities
        except Exception as e:
            print(f"Error fetching M3U8 qualities: {e}")
            return []

    async def _convert_m3u8_to_mp4(self, m3u8_url: str, vidmoly_id: str, quality: str, headers: dict) -> str:
        """Convert M3U8 stream to MP4 using ffmpeg"""
        # Create temp directory for output
        temp_dir = tempfile.gettempdir()
        output_path = os.path.join(temp_dir, f"vidmoly_{vidmoly_id}_{quality}p.mp4")

        # Prepare ffmpeg headers
        ffmpeg_headers = '|'.join([f'{k}: {v}' for k, v in headers.items()])

        # Build ffmpeg command
        cmd = [
            'ffmpeg',
            '-headers', f'"{ffmpeg_headers}"',
            '-i', m3u8_url,
            '-c', 'copy',
            '-bsf:a', 'aac_adtstoasc',
            '-y',  # Overwrite output file if exists
            output_path
        ]

        # Execute ffmpeg
        try:
            result = subprocess.run(
                ' '.join(cmd),
                shell=True,
                capture_output=True,
                text=True,
                timeout=300  # 5 minutes timeout
            )

            if result.returncode != 0:
                raise Exception(f"FFmpeg conversion failed: {result.stderr}")

            if not os.path.exists(output_path):
                raise Exception("FFmpeg output file not created")

            return output_path

        except subprocess.TimeoutExpired:
            raise Exception("FFmpeg conversion timeout (5 minutes)")
        except Exception as e:
            raise Exception(f"Error converting M3U8 to MP4: {str(e)}")

    def _extract_vidmoly_id(self, url: str) -> str:
        """Extract VidMoly video ID from URL"""
        # Patterns:
        # - vidmoly.to/embed-ID.html
        # - vidmoly.to/?v=ID
        # - vidmoly.to/ID

        # Try to extract from embed pattern
        embed_match = re.search(r'embed-([a-z0-9]+)', url, re.IGNORECASE)
        if embed_match:
            return embed_match.group(1)

        # Try to extract from ?v= parameter
        param_match = re.search(r'[?&]v=([a-z0-9]+)', url, re.IGNORECASE)
        if param_match:
            return param_match.group(1)

        # Try to extract ID from path
        path_match = re.search(r'vidmoly\.(?:to|org)/([a-z0-9]+)', url, re.IGNORECASE)
        if path_match:
            return path_match.group(1)

        return None