feat: Add AGENTS.md and new downloaders with metadata enrichment
- Add AGENTS.md for agentic coding guidelines - Add Oneupload and Smoothpre video player downloaders - Add MetadataEnrichment service with Kitsu API fallback - Add tests for metadata enrichment and provider detection - Update .gitignore to ignore runtime config files
This commit is contained in:
@@ -0,0 +1,442 @@
|
||||
"""
|
||||
Tests for metadata enrichment with Kitsu API fallback.
|
||||
"""
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from app.metadata_enrichment import MetadataEnricher
|
||||
from app.models import AnimeMetadata
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def enricher(temp_dir):
|
||||
"""Create a MetadataEnricher instance with temp cache dir."""
|
||||
enricher = MetadataEnricher(cache_dir=temp_dir)
|
||||
yield enricher
|
||||
await enricher.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_kitsu_api():
|
||||
"""Mock Kitsu API responses in raw Kitsu format."""
|
||||
mock_data = {
|
||||
'title': 'Naruto',
|
||||
'title_japanese': 'ナルト',
|
||||
'title_english': 'Naruto',
|
||||
'synopsis': 'A test synopsis from Kitsu',
|
||||
'genres': ['Action', 'Adventure'],
|
||||
'score': 8.5,
|
||||
'year': 2002,
|
||||
'episodes': 220,
|
||||
'status': 'Finished Airing',
|
||||
'images': {
|
||||
'jpg': {
|
||||
'large_image_url': 'https://kitsu.io/naruto-poster.jpg',
|
||||
'image_url': 'https://kitsu.io/naruto-poster-small.jpg'
|
||||
},
|
||||
'webp': {
|
||||
'large_image_url': 'https://kitsu.io/naruto-banner.jpg'
|
||||
}
|
||||
}
|
||||
}
|
||||
return mock_data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_kitsu_api_raw():
|
||||
"""Mock raw Kitsu API response format."""
|
||||
return {
|
||||
'mal_id': 123,
|
||||
'title': 'Naruto',
|
||||
'title_japanese': 'ナルト',
|
||||
'title_english': 'Naruto',
|
||||
'episodes': 220,
|
||||
'status': 'Finished Airing',
|
||||
'score': 8.5,
|
||||
'synopsis': 'A test synopsis from Kitsu',
|
||||
'genres': ['Action', 'Adventure'],
|
||||
'images': {
|
||||
'jpg': {
|
||||
'image_url': 'https://kitsu.io/naruto-poster-small.jpg',
|
||||
'large_image_url': 'https://kitsu.io/naruto-poster.jpg'
|
||||
},
|
||||
'webp': {
|
||||
'image_url': 'https://kitsu.io/naruto-poster-small.webp',
|
||||
'large_image_url': 'https://kitsu.io/naruto-banner.jpg'
|
||||
}
|
||||
},
|
||||
'url': 'https://kitsu.io/anime/123',
|
||||
'subtype': 'TV',
|
||||
'year': 2002
|
||||
}
|
||||
|
||||
|
||||
class TestMetadataEnricher:
|
||||
"""Test MetadataEnricher functionality."""
|
||||
|
||||
def test_init_creates_cache_dir(self, enricher, temp_dir):
|
||||
"""Test that enricher creates cache directory."""
|
||||
assert enricher.cache_dir == temp_dir
|
||||
assert enricher.cache_file == temp_dir / "metadata_cache.json"
|
||||
|
||||
def test_get_cache_key(self, enricher):
|
||||
"""Test cache key generation."""
|
||||
key1 = enricher._get_cache_key("Naruto", "https://example.com/naruto")
|
||||
key2 = enricher._get_cache_key("Naruto", "https://example.com/naruto")
|
||||
key3 = enricher._get_cache_key("Naruto", "https://example.com/sasuke")
|
||||
|
||||
# Same inputs should produce same key
|
||||
assert key1 == key2
|
||||
|
||||
# Different URL should produce different key
|
||||
assert key1 != key3
|
||||
|
||||
def test_get_missing_fields(self, enricher):
|
||||
"""Test identification of missing fields."""
|
||||
# Complete metadata
|
||||
complete = {
|
||||
'synopsis': 'Test synopsis',
|
||||
'genres': ['Action'],
|
||||
'rating': '8.5/10',
|
||||
'release_year': 2020,
|
||||
'studio': 'Studio Pierrot',
|
||||
'poster_image': 'https://example.com/poster.jpg',
|
||||
'banner_image': 'https://example.com/banner.jpg',
|
||||
'total_episodes': 12,
|
||||
'status': 'Completed',
|
||||
'alternative_titles': ['Japanese Title'] # Now required for completeness
|
||||
}
|
||||
|
||||
missing = enricher._get_missing_fields(complete)
|
||||
assert len(missing) == 0
|
||||
|
||||
# Incomplete metadata
|
||||
incomplete = {
|
||||
'synopsis': 'Test synopsis',
|
||||
'genres': [] # Empty list counts as missing
|
||||
}
|
||||
|
||||
missing = enricher._get_missing_fields(incomplete)
|
||||
assert 'rating' in missing
|
||||
assert 'release_year' in missing
|
||||
# Note: studio is not in KITSU_FIELDS, so it won't be detected as missing
|
||||
assert 'status' in missing
|
||||
assert 'genres' in missing # Empty list is considered missing
|
||||
assert len(missing) >= 4
|
||||
|
||||
def test_convert_kitsu_to_metadata(self, enricher, mock_kitsu_api):
|
||||
"""Test conversion of Kitsu API response to metadata format."""
|
||||
metadata = enricher._convert_kitsu_to_metadata(mock_kitsu_api)
|
||||
|
||||
assert metadata['synopsis'] == 'A test synopsis from Kitsu'
|
||||
assert metadata['genres'] == ['Action', 'Adventure']
|
||||
assert metadata['rating'] == '8.5/10'
|
||||
assert metadata['release_year'] == 2002
|
||||
assert metadata['poster_image'] == 'https://kitsu.io/naruto-poster.jpg'
|
||||
assert metadata['banner_image'] == 'https://kitsu.io/naruto-banner.jpg'
|
||||
assert metadata['total_episodes'] == 220
|
||||
assert metadata['status'] == 'Completed'
|
||||
assert 'ナルト' in metadata['alternative_titles']
|
||||
assert 'Naruto' in metadata['alternative_titles']
|
||||
|
||||
def test_convert_kitsu_status_translation(self, enricher):
|
||||
"""Test Kitsu status translation."""
|
||||
test_cases = [
|
||||
('Airing', 'Ongoing'),
|
||||
('Finished Airing', 'Completed'),
|
||||
('To Be Aired', 'Upcoming'),
|
||||
]
|
||||
|
||||
for kitsu_status, expected_status in test_cases:
|
||||
metadata = enricher._convert_kitsu_to_metadata({
|
||||
'status': kitsu_status
|
||||
})
|
||||
assert metadata['status'] == expected_status
|
||||
|
||||
def test_merge_metadata_prefer_provider(self, enricher, mock_kitsu_api):
|
||||
"""Test that provider metadata takes priority over Kitsu."""
|
||||
provider_meta = {
|
||||
'synopsis': 'Provider synopsis (better)',
|
||||
'genres': ['Action'],
|
||||
'rating': '9.0/10', # Different from Kitsu
|
||||
'release_year': 2002,
|
||||
'studio': 'Studio Pierrot', # Not in Kitsu
|
||||
}
|
||||
|
||||
kitsu_meta = enricher._convert_kitsu_to_metadata(mock_kitsu_api)
|
||||
|
||||
merged = enricher._merge_metadata(provider_meta, kitsu_meta)
|
||||
|
||||
# Provider data should be preserved
|
||||
assert merged['synopsis'] == 'Provider synopsis (better)'
|
||||
assert merged['rating'] == '9.0/10'
|
||||
assert merged['studio'] == 'Studio Pierrot'
|
||||
|
||||
# Kitsu data should fill gaps
|
||||
assert merged['total_episodes'] == 220
|
||||
assert merged['status'] == 'Completed'
|
||||
|
||||
def test_calculate_quality_score(self, enricher):
|
||||
"""Test metadata quality score calculation."""
|
||||
# Complete metadata should have high score
|
||||
complete = {
|
||||
'synopsis': 'A detailed synopsis of the anime with lots of information',
|
||||
'genres': ['Action', 'Adventure', 'Fantasy'],
|
||||
'rating': '8.5/10',
|
||||
'release_year': 2020,
|
||||
'studio': 'Studio Pierrot',
|
||||
'poster_image': 'https://example.com/poster.jpg',
|
||||
'banner_image': 'https://example.com/banner.jpg',
|
||||
'total_episodes': 12,
|
||||
'status': 'Completed',
|
||||
'alternative_titles': ['Japanese Title']
|
||||
}
|
||||
|
||||
score = enricher._calculate_quality_score(complete)
|
||||
assert score > 0.8 # Should be high quality
|
||||
|
||||
# Minimal metadata should have low score
|
||||
minimal = {
|
||||
'synopsis': 'Short',
|
||||
'genres': ['Action']
|
||||
}
|
||||
|
||||
score = enricher._calculate_quality_score(minimal)
|
||||
assert score < 0.5 # Should be low quality
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_enrich_metadata_with_kitsu_fallback(self, enricher, mock_kitsu_api_raw):
|
||||
"""Test enrichment with Kitsu API fallback."""
|
||||
provider_metadata = {
|
||||
'synopsis': 'Provider synopsis',
|
||||
'genres': ['Action'],
|
||||
# Missing: rating, release_year, poster_image, etc.
|
||||
}
|
||||
|
||||
# Mock the Kitsu API search to return raw format
|
||||
with patch.object(enricher.kitsu_api, 'search_anime', return_value=[mock_kitsu_api_raw]):
|
||||
result = await enricher.enrich_metadata(
|
||||
provider_metadata=provider_metadata,
|
||||
title='Naruto',
|
||||
url='https://example.com/naruto',
|
||||
use_kitsu_fallback=True
|
||||
)
|
||||
|
||||
# Should have Kitsu data
|
||||
assert result.rating == '8.5/10'
|
||||
assert result.release_year == 2002
|
||||
assert result.poster_image is not None
|
||||
assert result.total_episodes == 220
|
||||
assert result.status == 'Completed'
|
||||
|
||||
# Should preserve provider data
|
||||
assert result.synopsis == 'Provider synopsis'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_enrich_metadata_without_kitsu_fallback(self, enricher):
|
||||
"""Test enrichment without Kitsu fallback."""
|
||||
provider_metadata = {
|
||||
'synopsis': 'Provider synopsis',
|
||||
'genres': ['Action'],
|
||||
}
|
||||
|
||||
result = await enricher.enrich_metadata(
|
||||
provider_metadata=provider_metadata,
|
||||
title='Naruto',
|
||||
url='https://example.com/naruto',
|
||||
use_kitsu_fallback=False
|
||||
)
|
||||
|
||||
# Should only have provider data
|
||||
assert result.synopsis == 'Provider synopsis'
|
||||
assert result.genres == ['Action']
|
||||
assert result.rating is None # No Kitsu fallback
|
||||
assert result.release_year is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_enrich_metadata_caching(self, enricher, mock_kitsu_api_raw):
|
||||
"""Test that enriched metadata is cached."""
|
||||
provider_metadata = {
|
||||
'synopsis': 'Provider synopsis',
|
||||
'genres': ['Action'],
|
||||
}
|
||||
|
||||
with patch.object(enricher.kitsu_api, 'search_anime', return_value=[mock_kitsu_api_raw]) as mock_search:
|
||||
# First call should fetch from Kitsu
|
||||
result1 = await enricher.enrich_metadata(
|
||||
provider_metadata=provider_metadata,
|
||||
title='Naruto',
|
||||
url='https://example.com/naruto',
|
||||
use_kitsu_fallback=True
|
||||
)
|
||||
assert mock_search.call_count == 1
|
||||
|
||||
# Second call should use cache
|
||||
result2 = await enricher.enrich_metadata(
|
||||
provider_metadata=provider_metadata,
|
||||
title='Naruto',
|
||||
url='https://example.com/naruto',
|
||||
use_kitsu_fallback=True
|
||||
)
|
||||
assert mock_search.call_count == 1 # No additional call
|
||||
|
||||
# Results should be identical
|
||||
assert result1.model_dump() == result2.model_dump()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_enrich_search_results(self, enricher, mock_kitsu_api_raw):
|
||||
"""Test enrichment of multiple search results."""
|
||||
search_results = [
|
||||
{
|
||||
'title': 'Naruto',
|
||||
'url': 'https://example.com/naruto',
|
||||
'metadata': {
|
||||
'synopsis': 'Brief synopsis',
|
||||
'genres': ['Action']
|
||||
}
|
||||
},
|
||||
{
|
||||
'title': 'One Piece',
|
||||
'url': 'https://example.com/onepiece',
|
||||
'metadata': {
|
||||
'synopsis': 'Another synopsis',
|
||||
'genres': ['Adventure']
|
||||
}
|
||||
},
|
||||
{
|
||||
'title': 'No Metadata',
|
||||
'url': 'https://example.com/nometa'
|
||||
}
|
||||
]
|
||||
|
||||
with patch.object(enricher.kitsu_api, 'search_anime', return_value=[mock_kitsu_api_raw]):
|
||||
enriched = await enricher.enrich_search_results(
|
||||
results=search_results,
|
||||
use_kitsu_fallback=True
|
||||
)
|
||||
|
||||
# Should enrich results with metadata
|
||||
assert len(enriched) == 3
|
||||
|
||||
# First result should be enriched
|
||||
assert enriched[0]['metadata']['rating'] == '8.5/10'
|
||||
assert enriched[0]['metadata']['release_year'] == 2002
|
||||
|
||||
# Second result should also be enriched
|
||||
assert enriched[1]['metadata']['rating'] == '8.5/10'
|
||||
|
||||
# Third result should have no metadata field
|
||||
assert 'metadata' not in enriched[2] or enriched[2].get('metadata') is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_expiry(self, enricher, mock_kitsu_api_raw):
|
||||
"""Test that expired cache entries are removed."""
|
||||
provider_metadata = {'synopsis': 'Test'}
|
||||
|
||||
# Add an expired entry to cache
|
||||
cache_key = enricher._get_cache_key('Test', 'https://example.com/test')
|
||||
enricher._cache[cache_key] = {
|
||||
'metadata': provider_metadata,
|
||||
'cached_at': (datetime.now() - timedelta(hours=25)).isoformat() # Expired
|
||||
}
|
||||
enricher._cache_dirty = True
|
||||
|
||||
with patch.object(enricher.kitsu_api, 'search_anime', return_value=[mock_kitsu_api_raw]) as mock_search:
|
||||
# Should fetch from Kitsu since cache is expired
|
||||
result = await enricher.enrich_metadata(
|
||||
provider_metadata=provider_metadata,
|
||||
title='Test',
|
||||
url='https://example.com/test',
|
||||
use_kitsu_fallback=True
|
||||
)
|
||||
|
||||
assert mock_search.call_count == 1
|
||||
assert result.rating == '8.5/10'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_close_saves_cache(self, enricher):
|
||||
"""Test that closing the enricher saves the cache."""
|
||||
# Add something to cache
|
||||
cache_key = 'test_key'
|
||||
enricher._cache[cache_key] = {
|
||||
'metadata': {'test': 'data'},
|
||||
'cached_at': datetime.now().isoformat()
|
||||
}
|
||||
enricher._cache_dirty = True
|
||||
|
||||
await enricher.close()
|
||||
|
||||
# Cache file should exist
|
||||
assert enricher.cache_file.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_from_kitsu_error_handling(self, enricher):
|
||||
"""Test error handling when Kitsu API fails."""
|
||||
provider_metadata = {'synopsis': 'Test'}
|
||||
|
||||
with patch.object(enricher, '_fetch_from_kitsu', side_effect=Exception("API Error")):
|
||||
result = await enricher.enrich_metadata(
|
||||
provider_metadata=provider_metadata,
|
||||
title='NonExistent Anime',
|
||||
url='https://example.com/nonexistent',
|
||||
use_kitsu_fallback=True
|
||||
)
|
||||
|
||||
# Should return provider metadata despite error
|
||||
assert result.synopsis == 'Test'
|
||||
assert result.rating is None
|
||||
|
||||
|
||||
class TestMetadataEnrichmentIntegration:
|
||||
"""Integration tests for metadata enrichment."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.slow
|
||||
async def test_kitsu_api_integration(self):
|
||||
"""Test actual Kitsu API integration (marked as slow)."""
|
||||
enricher = MetadataEnricher()
|
||||
|
||||
try:
|
||||
# Search for a well-known anime
|
||||
results = await enricher.kitsu_api.search_anime('Naruto', limit=1)
|
||||
|
||||
assert len(results) > 0
|
||||
assert 'title' in results[0]
|
||||
assert 'synopsis' in results[0] or 'genres' in results[0]
|
||||
|
||||
finally:
|
||||
await enricher.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.slow
|
||||
async def test_full_enrichment_flow(self):
|
||||
"""Test complete enrichment flow with real data (marked as slow)."""
|
||||
enricher = MetadataEnricher()
|
||||
|
||||
try:
|
||||
# Simulate provider metadata with gaps
|
||||
provider_metadata = {
|
||||
'synopsis': 'Naruto Uzumaki wants to be the best ninja.',
|
||||
'genres': ['Action'],
|
||||
# Missing many fields
|
||||
}
|
||||
|
||||
result = await enricher.enrich_metadata(
|
||||
provider_metadata=provider_metadata,
|
||||
title='Naruto',
|
||||
url='https://test.com/naruto',
|
||||
use_kitsu_fallback=True
|
||||
)
|
||||
|
||||
# Should have enriched data
|
||||
assert result.synopsis is not None
|
||||
assert len(result.genres) > 0
|
||||
|
||||
# Kitsu might have filled some gaps
|
||||
# (We can't assert specific fields as Kitsu responses may vary)
|
||||
quality_score = result.model_dump().get('_quality_score', 0)
|
||||
assert quality_score >= 0
|
||||
|
||||
finally:
|
||||
await enricher.close()
|
||||
Reference in New Issue
Block a user