Merge pull request #9 from lexitronic/fix-track-matching

Use `rapidfuzz` instead of `difflib`, use `_remove_parentheses()` during ISRC search, remove redundant `title_match` entries.
2025-08-19 19:53:38 -06:00
parent d81c57e357 5886904a72
commit cd16cbabf3
2 changed files with 10 additions and 14 deletions
--- a/deezspot/deezloader/init.py
+++ b/deezspot/deezloader/init.py
@@ -47,7 +47,6 @@ from deezspot.libutils.others_settings import (
 )
 from deezspot.libutils.logging_utils import ProgressReporter, logger, report_progress
 import requests
 from difflib import SequenceMatcher
 from deezspot.models.callback.callbacks import (
    trackCallbackObject,
@@ -64,14 +63,14 @@ from deezspot.models.callback.album import albumObject as albumCbObject
 from deezspot.models.callback.playlist import playlistObject as playlistCbObject
 from deezspot.models.callback.common import IDs
 from deezspot.models.callback.user import userObject
-
+from rapidfuzz import fuzz
 def _sim(a: str, b: str) -> float:
    a = (a or '').strip().lower()
    b = (b or '').strip().lower()
    if not a or not b:
        return 0.0
-    return SequenceMatcher(None, a, b).ratio()
+    return fuzz.partial_ratio(a, b) / 100
 # Clean for searching on Deezer
 def _remove_parentheses(string: str) -> str:
@@ -438,8 +437,8 @@ class DeeLogin:
                dz_json = dz
                tn = (dz_json.get('track_position') or dz_json.get('track_number') or 0)
                title_match = max(
-                    _sim(spo_title, dz_json.get('title', '')),
+                    _sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title', ''))),
-                    _sim(spo_title, dz_json.get('title_short', '')),
+                    _sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title_short', '')))
                )
                album_match = _sim(spo_album_title, (dz_json.get('album') or {}).get('title', ''))
                t_isrc = (dz_json.get('isrc') or '').upper()
@@ -460,15 +459,11 @@ class DeeLogin:
            candidates = []
        for cand in candidates:
-            title_match_1 = max(
+            title_match = max(
-                _sim(spo_title, dz_json.get('title', '')),
+                _sim(_remove_parentheses(spo_title), _remove_parentheses(cand.get('title', ''))),
-                _sim(spo_title, dz_json.get('title_short', ''))
+                _sim(_remove_parentheses(spo_title), _remove_parentheses(cand.get('title_short', '')))
            )
-            title_match_2 = max(
+            if title_match < 0.90:
                _sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title', ''))),
                _sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title_short', '')))
            )
            if max(title_match_1, title_match_2) < 0.90:
                continue
            c_id = cand.get('id')
            if not c_id:
--- a/setup.py
+++ b/setup.py
@@ -27,6 +27,7 @@ setup(
                "fastapi==0.116.1",
                "uvicorn[standard]==0.35.0",
                "spotipy-anon==1.5.2",
-                "librespot-spotizerr==0.3.0"
+                "librespot-spotizerr==0.3.0",
                "rapidfuzz"
         ],
 )