Files
ohm_streaming/tests/test_phase2_scraping.py
root 2b4cc617cb
CI / Test (Python 3.11) (push) Has been cancelled
CI / Test (Python 3.12) (push) Has been cancelled
CI / Lint (push) Has been cancelled
CI / Type Check (push) Has been cancelled
CI / Summary (push) Has been cancelled
feat: robust scraping DSL and health monitoring (Phase 2)
- Implemented YAML-driven GenericScraper for resilient scraping
- Added ProvidersManager to manage scraper health and active providers
- Modernized unified search with systematic Kitsu metadata enrichment
- Integrated automated health checks in the scheduler
- Added comprehensive tests for scraping DSL and provider health
2026-03-24 10:57:19 +00:00

154 lines
6.0 KiB
Python

"""
Tests for Phase 2: Robust Scraping (DSL, Health Checks, Unified Search)
"""
import pytest
import yaml
import os
from unittest.mock import AsyncMock, MagicMock, patch
from pathlib import Path
from app.downloaders.generic_scraper import GenericScraper
from app.providers_manager import ProvidersManager
from app.models import AnimeSearchResult, AnimeMetadata
@pytest.fixture
def mock_config_path(tmp_path):
"""Create a temporary YAML config file for testing"""
config = {
"name": "Test Site",
"id": "testsite",
"base_url": "https://test.com",
"search": {
"path": "/search?q={query}",
"container_selector": ".item",
"title_selector": "h3",
"url_selector": "a",
"image_selector": "img"
}
}
config_file = tmp_path / "testsite.yaml"
with open(config_file, 'w', encoding='utf-8') as f:
yaml.dump(config, f)
return str(config_file)
class TestGenericScraper:
"""Tests for GenericScraper driven by YAML"""
def test_init_loads_config(self, mock_config_path):
scraper = GenericScraper(mock_config_path)
assert scraper.name == "Test Site"
assert scraper.id == "testsite"
assert scraper.base_url == "https://test.com"
@pytest.mark.asyncio
async def test_search_logic(self, mock_config_path):
scraper = GenericScraper(mock_config_path)
# Mock HTTP response
mock_html = """
<div class="item">
<h3>Naruto</h3>
<a href="/naruto-page">Link</a>
<img src="/cover.jpg">
</div>
"""
with patch.object(scraper.client, 'get', return_value=MagicMock(text=mock_html)) as mock_get:
# Mock metadata enrichment to avoid real API calls
with patch('app.downloaders.generic_scraper.get_metadata_enricher') as mock_get_enricher:
mock_enricher = AsyncMock()
mock_enricher.enrich_metadata.return_value = AnimeMetadata(title="Naruto", poster_image="https://test.com/cover.jpg")
mock_get_enricher.return_value = mock_enricher
results = await scraper.search("Naruto")
assert len(results) == 1
assert results[0].title == "Naruto"
assert "test.com/naruto-page" in results[0].url
assert results[0].cover_image == "https://test.com/cover.jpg"
@pytest.mark.asyncio
async def test_check_health_success(self, mock_config_path):
scraper = GenericScraper(mock_config_path)
with patch.object(scraper, 'search', return_value=[MagicMock()]) as mock_search:
is_healthy = await scraper.check_health()
assert is_healthy is True
mock_search.assert_called_once_with("One Piece")
@pytest.mark.asyncio
async def test_check_health_failure(self, mock_config_path):
scraper = GenericScraper(mock_config_path)
with patch.object(scraper, 'search', return_value=[]) as mock_search:
is_healthy = await scraper.check_health()
assert is_healthy is False
class TestProvidersManager:
"""Tests for ProvidersManager"""
def test_load_providers(self, tmp_path):
# Create a temp providers config dir
config_dir = tmp_path / "config"
config_dir.mkdir()
# Create two mock configs
for i in range(2):
config = {"name": f"Site {i}", "id": f"site{i}", "base_url": "http://test.com"}
with open(config_dir / f"site{i}.yaml", 'w') as f:
yaml.dump(config, f)
manager = ProvidersManager(str(config_dir))
assert len(manager.providers) == 2
assert "site0" in manager.providers
assert "site1" in manager.providers
@pytest.mark.asyncio
async def test_check_all_health(self, tmp_path):
config_dir = tmp_path / "config"
config_dir.mkdir()
config = {"name": "Site", "id": "site", "base_url": "http://test.com"}
with open(config_dir / "site.yaml", 'w') as f:
yaml.dump(config, f)
manager = ProvidersManager(str(config_dir))
# Mock the health check of the scraper
with patch.object(manager.providers["site"], 'check_health', return_value=True) as mock_check:
await manager.check_all_health()
assert manager.health_status["site"]["status"] == "up"
assert manager.health_status["site"]["last_check"] is not None
@pytest.mark.asyncio
async def test_router_search_unified_modern(mock_config_path):
"""Test the modernized unified search route in the router"""
from app.routers.router_anime import search_anime_unified
from app.providers_manager import providers_manager
# Mock providers manager to return our test scraper
test_scraper = GenericScraper(mock_config_path)
mock_results = [
AnimeSearchResult(title="Naruto", url="https://test.com/n", cover_image="", type="direct")
]
test_scraper.search = AsyncMock(return_value=mock_results)
with patch.object(providers_manager, 'get_active_providers', return_value=[test_scraper]):
# Patch legacy downloaders to return nothing
with patch('app.routers.router_anime.AnimeUltimeDownloader') as mock_dl:
mock_dl.return_value.search_anime = AsyncMock(return_value=[])
# Patch metadata enricher
with patch('app.routers.router_anime.get_metadata_enricher') as mock_get_enricher:
mock_enricher = AsyncMock()
mock_enricher.enrich_metadata = AsyncMock(return_value=AnimeMetadata(title="Naruto"))
mock_enricher.enrich_search_results = AsyncMock(side_effect=lambda x: x)
mock_get_enricher.return_value = mock_enricher
response = await search_anime_unified("Naruto")
assert "results" in response
assert "testsite" in response["results"]
assert response["results"]["testsite"][0]["title"] == "Naruto"