""" Tests for metadata enrichment with Kitsu API fallback. """ import pytest from unittest.mock import AsyncMock, MagicMock, patch from datetime import datetime, timedelta from app.metadata_enrichment import MetadataEnricher from app.models import AnimeMetadata @pytest.fixture async def enricher(temp_dir): """Create a MetadataEnricher instance with temp cache dir.""" enricher = MetadataEnricher(cache_dir=temp_dir) yield enricher await enricher.close() @pytest.fixture def mock_kitsu_api(): """Mock Kitsu API responses in raw Kitsu format.""" mock_data = { 'title': 'Naruto', 'title_japanese': 'ナルト', 'title_english': 'Naruto', 'synopsis': 'A test synopsis from Kitsu', 'genres': ['Action', 'Adventure'], 'score': 8.5, 'year': 2002, 'episodes': 220, 'status': 'Finished Airing', 'images': { 'jpg': { 'large_image_url': 'https://kitsu.io/naruto-poster.jpg', 'image_url': 'https://kitsu.io/naruto-poster-small.jpg' }, 'webp': { 'large_image_url': 'https://kitsu.io/naruto-banner.jpg' } } } return mock_data @pytest.fixture def mock_kitsu_api_raw(): """Mock raw Kitsu API response format.""" return { 'mal_id': 123, 'title': 'Naruto', 'title_japanese': 'ナルト', 'title_english': 'Naruto', 'episodes': 220, 'status': 'Finished Airing', 'score': 8.5, 'synopsis': 'A test synopsis from Kitsu', 'genres': ['Action', 'Adventure'], 'images': { 'jpg': { 'image_url': 'https://kitsu.io/naruto-poster-small.jpg', 'large_image_url': 'https://kitsu.io/naruto-poster.jpg' }, 'webp': { 'image_url': 'https://kitsu.io/naruto-poster-small.webp', 'large_image_url': 'https://kitsu.io/naruto-banner.jpg' } }, 'url': 'https://kitsu.io/anime/123', 'subtype': 'TV', 'year': 2002 } @pytest.mark.skip(reason="New tests for non-implemented feature") class TestMetadataEnricher: """Test MetadataEnricher functionality.""" def test_init_creates_cache_dir(self, enricher, temp_dir): """Test that enricher creates cache directory.""" assert enricher.cache_dir == temp_dir assert enricher.cache_file == temp_dir / "metadata_cache.json" def test_get_cache_key(self, enricher): """Test cache key generation.""" key1 = enricher._get_cache_key("Naruto", "https://example.com/naruto") key2 = enricher._get_cache_key("Naruto", "https://example.com/naruto") key3 = enricher._get_cache_key("Naruto", "https://example.com/sasuke") # Same inputs should produce same key assert key1 == key2 # Different URL should produce different key assert key1 != key3 def test_get_missing_fields(self, enricher): """Test identification of missing fields.""" # Complete metadata complete = { 'synopsis': 'Test synopsis', 'genres': ['Action'], 'rating': '8.5/10', 'release_year': 2020, 'studio': 'Studio Pierrot', 'poster_image': 'https://example.com/poster.jpg', 'banner_image': 'https://example.com/banner.jpg', 'total_episodes': 12, 'status': 'Completed', 'alternative_titles': ['Japanese Title'] # Now required for completeness } missing = enricher._get_missing_fields(complete) assert len(missing) == 0 # Incomplete metadata incomplete = { 'synopsis': 'Test synopsis', 'genres': [] # Empty list counts as missing } missing = enricher._get_missing_fields(incomplete) assert 'rating' in missing assert 'release_year' in missing # Note: studio is not in KITSU_FIELDS, so it won't be detected as missing assert 'status' in missing assert 'genres' in missing # Empty list is considered missing assert len(missing) >= 4 def test_convert_kitsu_to_metadata(self, enricher, mock_kitsu_api): """Test conversion of Kitsu API response to metadata format.""" metadata = enricher._convert_kitsu_to_metadata(mock_kitsu_api) assert metadata['synopsis'] == 'A test synopsis from Kitsu' assert metadata['genres'] == ['Action', 'Adventure'] assert metadata['rating'] == '8.5/10' assert metadata['release_year'] == 2002 assert metadata['poster_image'] == 'https://kitsu.io/naruto-poster.jpg' assert metadata['banner_image'] == 'https://kitsu.io/naruto-banner.jpg' assert metadata['total_episodes'] == 220 assert metadata['status'] == 'Completed' assert 'ナルト' in metadata['alternative_titles'] assert 'Naruto' in metadata['alternative_titles'] def test_convert_kitsu_status_translation(self, enricher): """Test Kitsu status translation.""" test_cases = [ ('Airing', 'Ongoing'), ('Finished Airing', 'Completed'), ('To Be Aired', 'Upcoming'), ] for kitsu_status, expected_status in test_cases: metadata = enricher._convert_kitsu_to_metadata({ 'status': kitsu_status }) assert metadata['status'] == expected_status def test_merge_metadata_prefer_provider(self, enricher, mock_kitsu_api): """Test that provider metadata takes priority over Kitsu.""" provider_meta = { 'synopsis': 'Provider synopsis (better)', 'genres': ['Action'], 'rating': '9.0/10', # Different from Kitsu 'release_year': 2002, 'studio': 'Studio Pierrot', # Not in Kitsu } kitsu_meta = enricher._convert_kitsu_to_metadata(mock_kitsu_api) merged = enricher._merge_metadata(provider_meta, kitsu_meta) # Provider data should be preserved assert merged['synopsis'] == 'Provider synopsis (better)' assert merged['rating'] == '9.0/10' assert merged['studio'] == 'Studio Pierrot' # Kitsu data should fill gaps assert merged['total_episodes'] == 220 assert merged['status'] == 'Completed' def test_calculate_quality_score(self, enricher): """Test metadata quality score calculation.""" # Complete metadata should have high score complete = { 'synopsis': 'A detailed synopsis of the anime with lots of information', 'genres': ['Action', 'Adventure', 'Fantasy'], 'rating': '8.5/10', 'release_year': 2020, 'studio': 'Studio Pierrot', 'poster_image': 'https://example.com/poster.jpg', 'banner_image': 'https://example.com/banner.jpg', 'total_episodes': 12, 'status': 'Completed', 'alternative_titles': ['Japanese Title'] } score = enricher._calculate_quality_score(complete) assert score > 0.8 # Should be high quality # Minimal metadata should have low score minimal = { 'synopsis': 'Short', 'genres': ['Action'] } score = enricher._calculate_quality_score(minimal) assert score < 0.5 # Should be low quality @pytest.mark.asyncio async def test_enrich_metadata_with_kitsu_fallback(self, enricher, mock_kitsu_api_raw): """Test enrichment with Kitsu API fallback.""" provider_metadata = { 'synopsis': 'Provider synopsis', 'genres': ['Action'], # Missing: rating, release_year, poster_image, etc. } # Mock the Kitsu API search to return raw format with patch.object(enricher.kitsu_api, 'search_anime', return_value=[mock_kitsu_api_raw]): result = await enricher.enrich_metadata( provider_metadata=provider_metadata, title='Naruto', url='https://example.com/naruto', use_kitsu_fallback=True ) # Should have Kitsu data assert result.rating == '8.5/10' assert result.release_year == 2002 assert result.poster_image is not None assert result.total_episodes == 220 assert result.status == 'Completed' # Should preserve provider data assert result.synopsis == 'Provider synopsis' @pytest.mark.asyncio async def test_enrich_metadata_without_kitsu_fallback(self, enricher): """Test enrichment without Kitsu fallback.""" provider_metadata = { 'synopsis': 'Provider synopsis', 'genres': ['Action'], } result = await enricher.enrich_metadata( provider_metadata=provider_metadata, title='Naruto', url='https://example.com/naruto', use_kitsu_fallback=False ) # Should only have provider data assert result.synopsis == 'Provider synopsis' assert result.genres == ['Action'] assert result.rating is None # No Kitsu fallback assert result.release_year is None @pytest.mark.asyncio async def test_enrich_metadata_caching(self, enricher, mock_kitsu_api_raw): """Test that enriched metadata is cached.""" provider_metadata = { 'synopsis': 'Provider synopsis', 'genres': ['Action'], } with patch.object(enricher.kitsu_api, 'search_anime', return_value=[mock_kitsu_api_raw]) as mock_search: # First call should fetch from Kitsu result1 = await enricher.enrich_metadata( provider_metadata=provider_metadata, title='Naruto', url='https://example.com/naruto', use_kitsu_fallback=True ) assert mock_search.call_count == 1 # Second call should use cache result2 = await enricher.enrich_metadata( provider_metadata=provider_metadata, title='Naruto', url='https://example.com/naruto', use_kitsu_fallback=True ) assert mock_search.call_count == 1 # No additional call # Results should be identical assert result1.model_dump() == result2.model_dump() @pytest.mark.asyncio async def test_enrich_search_results(self, enricher, mock_kitsu_api_raw): """Test enrichment of multiple search results.""" search_results = [ { 'title': 'Naruto', 'url': 'https://example.com/naruto', 'metadata': { 'synopsis': 'Brief synopsis', 'genres': ['Action'] } }, { 'title': 'One Piece', 'url': 'https://example.com/onepiece', 'metadata': { 'synopsis': 'Another synopsis', 'genres': ['Adventure'] } }, { 'title': 'No Metadata', 'url': 'https://example.com/nometa' } ] with patch.object(enricher.kitsu_api, 'search_anime', return_value=[mock_kitsu_api_raw]): enriched = await enricher.enrich_search_results( results=search_results, use_kitsu_fallback=True ) # Should enrich results with metadata assert len(enriched) == 3 # First result should be enriched assert enriched[0]['metadata']['rating'] == '8.5/10' assert enriched[0]['metadata']['release_year'] == 2002 # Second result should also be enriched assert enriched[1]['metadata']['rating'] == '8.5/10' # Third result should have no metadata field assert 'metadata' not in enriched[2] or enriched[2].get('metadata') is None @pytest.mark.asyncio async def test_cache_expiry(self, enricher, mock_kitsu_api_raw): """Test that expired cache entries are removed.""" provider_metadata = {'synopsis': 'Test'} # Add an expired entry to cache cache_key = enricher._get_cache_key('Test', 'https://example.com/test') enricher._cache[cache_key] = { 'metadata': provider_metadata, 'cached_at': (datetime.now() - timedelta(hours=25)).isoformat() # Expired } enricher._cache_dirty = True with patch.object(enricher.kitsu_api, 'search_anime', return_value=[mock_kitsu_api_raw]) as mock_search: # Should fetch from Kitsu since cache is expired result = await enricher.enrich_metadata( provider_metadata=provider_metadata, title='Test', url='https://example.com/test', use_kitsu_fallback=True ) assert mock_search.call_count == 1 assert result.rating == '8.5/10' @pytest.mark.asyncio async def test_close_saves_cache(self, enricher): """Test that closing the enricher saves the cache.""" # Add something to cache cache_key = 'test_key' enricher._cache[cache_key] = { 'metadata': {'test': 'data'}, 'cached_at': datetime.now().isoformat() } enricher._cache_dirty = True await enricher.close() # Cache file should exist assert enricher.cache_file.exists() @pytest.mark.asyncio async def test_fetch_from_kitsu_error_handling(self, enricher): """Test error handling when Kitsu API fails.""" provider_metadata = {'synopsis': 'Test'} with patch.object(enricher, '_fetch_from_kitsu', side_effect=Exception("API Error")): result = await enricher.enrich_metadata( provider_metadata=provider_metadata, title='NonExistent Anime', url='https://example.com/nonexistent', use_kitsu_fallback=True ) # Should return provider metadata despite error assert result.synopsis == 'Test' assert result.rating is None @pytest.mark.skip(reason="New tests for non-implemented feature") class TestMetadataEnrichmentIntegration: """Integration tests for metadata enrichment.""" @pytest.mark.asyncio @pytest.mark.slow async def test_kitsu_api_integration(self): """Test actual Kitsu API integration (marked as slow).""" enricher = MetadataEnricher() try: # Search for a well-known anime results = await enricher.kitsu_api.search_anime('Naruto', limit=1) assert len(results) > 0 assert 'title' in results[0] assert 'synopsis' in results[0] or 'genres' in results[0] finally: await enricher.close() @pytest.mark.asyncio @pytest.mark.slow async def test_full_enrichment_flow(self): """Test complete enrichment flow with real data (marked as slow).""" enricher = MetadataEnricher() try: # Simulate provider metadata with gaps provider_metadata = { 'synopsis': 'Naruto Uzumaki wants to be the best ninja.', 'genres': ['Action'], # Missing many fields } result = await enricher.enrich_metadata( provider_metadata=provider_metadata, title='Naruto', url='https://test.com/naruto', use_kitsu_fallback=True ) # Should have enriched data assert result.synopsis is not None assert len(result.genres) > 0 # Kitsu might have filled some gaps # (We can't assert specific fields as Kitsu responses may vary) quality_score = result.model_dump().get('_quality_score', 0) assert quality_score >= 0 finally: await enricher.close()