feat: robust scraping DSL and health monitoring (Phase 2)
- Implemented YAML-driven GenericScraper for resilient scraping - Added ProvidersManager to manage scraper health and active providers - Modernized unified search with systematic Kitsu metadata enrichment - Integrated automated health checks in the scheduler - Added comprehensive tests for scraping DSL and provider health
This commit is contained in:
@@ -72,7 +72,6 @@ def mock_kitsu_api_raw():
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="New tests for non-implemented feature")
|
||||
class TestMetadataEnricher:
|
||||
"""Test MetadataEnricher functionality."""
|
||||
|
||||
@@ -389,7 +388,6 @@ class TestMetadataEnricher:
|
||||
assert result.rating is None
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="New tests for non-implemented feature")
|
||||
class TestMetadataEnrichmentIntegration:
|
||||
"""Integration tests for metadata enrichment."""
|
||||
|
||||
|
||||
@@ -0,0 +1,153 @@
|
||||
"""
|
||||
Tests for Phase 2: Robust Scraping (DSL, Health Checks, Unified Search)
|
||||
"""
|
||||
import pytest
|
||||
import yaml
|
||||
import os
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from pathlib import Path
|
||||
|
||||
from app.downloaders.generic_scraper import GenericScraper
|
||||
from app.providers_manager import ProvidersManager
|
||||
from app.models import AnimeSearchResult, AnimeMetadata
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config_path(tmp_path):
|
||||
"""Create a temporary YAML config file for testing"""
|
||||
config = {
|
||||
"name": "Test Site",
|
||||
"id": "testsite",
|
||||
"base_url": "https://test.com",
|
||||
"search": {
|
||||
"path": "/search?q={query}",
|
||||
"container_selector": ".item",
|
||||
"title_selector": "h3",
|
||||
"url_selector": "a",
|
||||
"image_selector": "img"
|
||||
}
|
||||
}
|
||||
config_file = tmp_path / "testsite.yaml"
|
||||
with open(config_file, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(config, f)
|
||||
return str(config_file)
|
||||
|
||||
|
||||
class TestGenericScraper:
|
||||
"""Tests for GenericScraper driven by YAML"""
|
||||
|
||||
def test_init_loads_config(self, mock_config_path):
|
||||
scraper = GenericScraper(mock_config_path)
|
||||
assert scraper.name == "Test Site"
|
||||
assert scraper.id == "testsite"
|
||||
assert scraper.base_url == "https://test.com"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_logic(self, mock_config_path):
|
||||
scraper = GenericScraper(mock_config_path)
|
||||
|
||||
# Mock HTTP response
|
||||
mock_html = """
|
||||
<div class="item">
|
||||
<h3>Naruto</h3>
|
||||
<a href="/naruto-page">Link</a>
|
||||
<img src="/cover.jpg">
|
||||
</div>
|
||||
"""
|
||||
|
||||
with patch.object(scraper.client, 'get', return_value=MagicMock(text=mock_html)) as mock_get:
|
||||
# Mock metadata enrichment to avoid real API calls
|
||||
with patch('app.downloaders.generic_scraper.get_metadata_enricher') as mock_get_enricher:
|
||||
mock_enricher = AsyncMock()
|
||||
mock_enricher.enrich_metadata.return_value = AnimeMetadata(title="Naruto", poster_image="https://test.com/cover.jpg")
|
||||
mock_get_enricher.return_value = mock_enricher
|
||||
|
||||
results = await scraper.search("Naruto")
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].title == "Naruto"
|
||||
assert "test.com/naruto-page" in results[0].url
|
||||
assert results[0].cover_image == "https://test.com/cover.jpg"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_health_success(self, mock_config_path):
|
||||
scraper = GenericScraper(mock_config_path)
|
||||
with patch.object(scraper, 'search', return_value=[MagicMock()]) as mock_search:
|
||||
is_healthy = await scraper.check_health()
|
||||
assert is_healthy is True
|
||||
mock_search.assert_called_once_with("One Piece")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_health_failure(self, mock_config_path):
|
||||
scraper = GenericScraper(mock_config_path)
|
||||
with patch.object(scraper, 'search', return_value=[]) as mock_search:
|
||||
is_healthy = await scraper.check_health()
|
||||
assert is_healthy is False
|
||||
|
||||
|
||||
class TestProvidersManager:
|
||||
"""Tests for ProvidersManager"""
|
||||
|
||||
def test_load_providers(self, tmp_path):
|
||||
# Create a temp providers config dir
|
||||
config_dir = tmp_path / "config"
|
||||
config_dir.mkdir()
|
||||
|
||||
# Create two mock configs
|
||||
for i in range(2):
|
||||
config = {"name": f"Site {i}", "id": f"site{i}", "base_url": "http://test.com"}
|
||||
with open(config_dir / f"site{i}.yaml", 'w') as f:
|
||||
yaml.dump(config, f)
|
||||
|
||||
manager = ProvidersManager(str(config_dir))
|
||||
assert len(manager.providers) == 2
|
||||
assert "site0" in manager.providers
|
||||
assert "site1" in manager.providers
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_all_health(self, tmp_path):
|
||||
config_dir = tmp_path / "config"
|
||||
config_dir.mkdir()
|
||||
config = {"name": "Site", "id": "site", "base_url": "http://test.com"}
|
||||
with open(config_dir / "site.yaml", 'w') as f:
|
||||
yaml.dump(config, f)
|
||||
|
||||
manager = ProvidersManager(str(config_dir))
|
||||
|
||||
# Mock the health check of the scraper
|
||||
with patch.object(manager.providers["site"], 'check_health', return_value=True) as mock_check:
|
||||
await manager.check_all_health()
|
||||
assert manager.health_status["site"]["status"] == "up"
|
||||
assert manager.health_status["site"]["last_check"] is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_search_unified_modern(mock_config_path):
|
||||
"""Test the modernized unified search route in the router"""
|
||||
from app.routers.router_anime import search_anime_unified
|
||||
from app.providers_manager import providers_manager
|
||||
|
||||
# Mock providers manager to return our test scraper
|
||||
test_scraper = GenericScraper(mock_config_path)
|
||||
mock_results = [
|
||||
AnimeSearchResult(title="Naruto", url="https://test.com/n", cover_image="", type="direct")
|
||||
]
|
||||
test_scraper.search = AsyncMock(return_value=mock_results)
|
||||
|
||||
with patch.object(providers_manager, 'get_active_providers', return_value=[test_scraper]):
|
||||
# Patch legacy downloaders to return nothing
|
||||
with patch('app.routers.router_anime.AnimeUltimeDownloader') as mock_dl:
|
||||
mock_dl.return_value.search_anime = AsyncMock(return_value=[])
|
||||
|
||||
# Patch metadata enricher
|
||||
with patch('app.routers.router_anime.get_metadata_enricher') as mock_get_enricher:
|
||||
mock_enricher = AsyncMock()
|
||||
mock_enricher.enrich_metadata = AsyncMock(return_value=AnimeMetadata(title="Naruto"))
|
||||
mock_enricher.enrich_search_results = AsyncMock(side_effect=lambda x: x)
|
||||
mock_get_enricher.return_value = mock_enricher
|
||||
|
||||
response = await search_anime_unified("Naruto")
|
||||
|
||||
assert "results" in response
|
||||
assert "testsite" in response["results"]
|
||||
assert response["results"]["testsite"][0]["title"] == "Naruto"
|
||||
Reference in New Issue
Block a user