2b4cc617cb
- Implemented YAML-driven GenericScraper for resilient scraping - Added ProvidersManager to manage scraper health and active providers - Modernized unified search with systematic Kitsu metadata enrichment - Integrated automated health checks in the scheduler - Added comprehensive tests for scraping DSL and provider health
154 lines
6.0 KiB
Python
154 lines
6.0 KiB
Python
"""
|
|
Tests for Phase 2: Robust Scraping (DSL, Health Checks, Unified Search)
|
|
"""
|
|
import pytest
|
|
import yaml
|
|
import os
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
from pathlib import Path
|
|
|
|
from app.downloaders.generic_scraper import GenericScraper
|
|
from app.providers_manager import ProvidersManager
|
|
from app.models import AnimeSearchResult, AnimeMetadata
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_config_path(tmp_path):
|
|
"""Create a temporary YAML config file for testing"""
|
|
config = {
|
|
"name": "Test Site",
|
|
"id": "testsite",
|
|
"base_url": "https://test.com",
|
|
"search": {
|
|
"path": "/search?q={query}",
|
|
"container_selector": ".item",
|
|
"title_selector": "h3",
|
|
"url_selector": "a",
|
|
"image_selector": "img"
|
|
}
|
|
}
|
|
config_file = tmp_path / "testsite.yaml"
|
|
with open(config_file, 'w', encoding='utf-8') as f:
|
|
yaml.dump(config, f)
|
|
return str(config_file)
|
|
|
|
|
|
class TestGenericScraper:
|
|
"""Tests for GenericScraper driven by YAML"""
|
|
|
|
def test_init_loads_config(self, mock_config_path):
|
|
scraper = GenericScraper(mock_config_path)
|
|
assert scraper.name == "Test Site"
|
|
assert scraper.id == "testsite"
|
|
assert scraper.base_url == "https://test.com"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_search_logic(self, mock_config_path):
|
|
scraper = GenericScraper(mock_config_path)
|
|
|
|
# Mock HTTP response
|
|
mock_html = """
|
|
<div class="item">
|
|
<h3>Naruto</h3>
|
|
<a href="/naruto-page">Link</a>
|
|
<img src="/cover.jpg">
|
|
</div>
|
|
"""
|
|
|
|
with patch.object(scraper.client, 'get', return_value=MagicMock(text=mock_html)) as mock_get:
|
|
# Mock metadata enrichment to avoid real API calls
|
|
with patch('app.downloaders.generic_scraper.get_metadata_enricher') as mock_get_enricher:
|
|
mock_enricher = AsyncMock()
|
|
mock_enricher.enrich_metadata.return_value = AnimeMetadata(title="Naruto", poster_image="https://test.com/cover.jpg")
|
|
mock_get_enricher.return_value = mock_enricher
|
|
|
|
results = await scraper.search("Naruto")
|
|
|
|
assert len(results) == 1
|
|
assert results[0].title == "Naruto"
|
|
assert "test.com/naruto-page" in results[0].url
|
|
assert results[0].cover_image == "https://test.com/cover.jpg"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_check_health_success(self, mock_config_path):
|
|
scraper = GenericScraper(mock_config_path)
|
|
with patch.object(scraper, 'search', return_value=[MagicMock()]) as mock_search:
|
|
is_healthy = await scraper.check_health()
|
|
assert is_healthy is True
|
|
mock_search.assert_called_once_with("One Piece")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_check_health_failure(self, mock_config_path):
|
|
scraper = GenericScraper(mock_config_path)
|
|
with patch.object(scraper, 'search', return_value=[]) as mock_search:
|
|
is_healthy = await scraper.check_health()
|
|
assert is_healthy is False
|
|
|
|
|
|
class TestProvidersManager:
|
|
"""Tests for ProvidersManager"""
|
|
|
|
def test_load_providers(self, tmp_path):
|
|
# Create a temp providers config dir
|
|
config_dir = tmp_path / "config"
|
|
config_dir.mkdir()
|
|
|
|
# Create two mock configs
|
|
for i in range(2):
|
|
config = {"name": f"Site {i}", "id": f"site{i}", "base_url": "http://test.com"}
|
|
with open(config_dir / f"site{i}.yaml", 'w') as f:
|
|
yaml.dump(config, f)
|
|
|
|
manager = ProvidersManager(str(config_dir))
|
|
assert len(manager.providers) == 2
|
|
assert "site0" in manager.providers
|
|
assert "site1" in manager.providers
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_check_all_health(self, tmp_path):
|
|
config_dir = tmp_path / "config"
|
|
config_dir.mkdir()
|
|
config = {"name": "Site", "id": "site", "base_url": "http://test.com"}
|
|
with open(config_dir / "site.yaml", 'w') as f:
|
|
yaml.dump(config, f)
|
|
|
|
manager = ProvidersManager(str(config_dir))
|
|
|
|
# Mock the health check of the scraper
|
|
with patch.object(manager.providers["site"], 'check_health', return_value=True) as mock_check:
|
|
await manager.check_all_health()
|
|
assert manager.health_status["site"]["status"] == "up"
|
|
assert manager.health_status["site"]["last_check"] is not None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_router_search_unified_modern(mock_config_path):
|
|
"""Test the modernized unified search route in the router"""
|
|
from app.routers.router_anime import search_anime_unified
|
|
from app.providers_manager import providers_manager
|
|
|
|
# Mock providers manager to return our test scraper
|
|
test_scraper = GenericScraper(mock_config_path)
|
|
mock_results = [
|
|
AnimeSearchResult(title="Naruto", url="https://test.com/n", cover_image="", type="direct")
|
|
]
|
|
test_scraper.search = AsyncMock(return_value=mock_results)
|
|
|
|
with patch.object(providers_manager, 'get_active_providers', return_value=[test_scraper]):
|
|
# Patch legacy downloaders to return nothing
|
|
with patch('app.routers.router_anime.AnimeUltimeDownloader') as mock_dl:
|
|
mock_dl.return_value.search_anime = AsyncMock(return_value=[])
|
|
|
|
# Patch metadata enricher
|
|
with patch('app.routers.router_anime.get_metadata_enricher') as mock_get_enricher:
|
|
mock_enricher = AsyncMock()
|
|
mock_enricher.enrich_metadata = AsyncMock(return_value=AnimeMetadata(title="Naruto"))
|
|
mock_enricher.enrich_search_results = AsyncMock(side_effect=lambda x: x)
|
|
mock_get_enricher.return_value = mock_enricher
|
|
|
|
response = await search_anime_unified("Naruto")
|
|
|
|
assert "results" in response
|
|
assert "testsite" in response["results"]
|
|
assert response["results"]["testsite"][0]["title"] == "Naruto"
|