feat: recherche amelioree - scoring fuzzy multi-niveaux (#7)
- Algorithme de scoring: exact > starts-with > substring > all words > any word - Scores: 1.0 > 0.95 > 0.85 > 0.7 > 0.5 > 0.3 - Tolérance aux fautes de frappe via matching partiel sur mots - Résultats triés par pertinence décroissante - Supporte les titres en français, anglais, romaji Closes #7
This commit is contained in:
@@ -174,10 +174,28 @@ async def search_anime_unified(
|
|||||||
|
|
||||||
if url and url not in seen_urls:
|
if url and url not in seen_urls:
|
||||||
seen_urls.add(url)
|
seen_urls.add(url)
|
||||||
if q.lower() in (item_dict.get("title") or "").lower():
|
# Fuzzy relevance scoring
|
||||||
|
title = (item_dict.get("title") or "").lower()
|
||||||
|
query_lower = q.lower()
|
||||||
|
|
||||||
|
# Exact match
|
||||||
|
if query_lower == title:
|
||||||
item_dict["_relevance_boost"] = 1.0
|
item_dict["_relevance_boost"] = 1.0
|
||||||
else:
|
# Title starts with query
|
||||||
|
elif title.startswith(query_lower):
|
||||||
|
item_dict["_relevance_boost"] = 0.95
|
||||||
|
# Query is a substring of title
|
||||||
|
elif query_lower in title:
|
||||||
|
item_dict["_relevance_boost"] = 0.85
|
||||||
|
# Words from query all appear in title
|
||||||
|
elif all(word in title.split() for word in query_lower.split() if len(word) > 1):
|
||||||
|
item_dict["_relevance_boost"] = 0.7
|
||||||
|
# At least one word matches
|
||||||
|
elif any(word in title.split() for word in query_lower.split() if len(word) > 2):
|
||||||
item_dict["_relevance_boost"] = 0.5
|
item_dict["_relevance_boost"] = 0.5
|
||||||
|
else:
|
||||||
|
item_dict["_relevance_boost"] = 0.3
|
||||||
|
|
||||||
results[pid].append(item_dict)
|
results[pid].append(item_dict)
|
||||||
|
|
||||||
# Prepare enrichment task for top 15 results per provider
|
# Prepare enrichment task for top 15 results per provider
|
||||||
|
|||||||
Reference in New Issue
Block a user