Merge pull request #9 from lexitronic/fix-track-matching
Use `rapidfuzz` instead of `difflib`, use `_remove_parentheses()` during ISRC search, remove redundant `title_match` entries.
This commit is contained in:
@@ -47,7 +47,6 @@ from deezspot.libutils.others_settings import (
|
||||
)
|
||||
from deezspot.libutils.logging_utils import ProgressReporter, logger, report_progress
|
||||
import requests
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
from deezspot.models.callback.callbacks import (
|
||||
trackCallbackObject,
|
||||
@@ -64,14 +63,14 @@ from deezspot.models.callback.album import albumObject as albumCbObject
|
||||
from deezspot.models.callback.playlist import playlistObject as playlistCbObject
|
||||
from deezspot.models.callback.common import IDs
|
||||
from deezspot.models.callback.user import userObject
|
||||
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
def _sim(a: str, b: str) -> float:
|
||||
a = (a or '').strip().lower()
|
||||
b = (b or '').strip().lower()
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
return SequenceMatcher(None, a, b).ratio()
|
||||
return fuzz.partial_ratio(a, b) / 100
|
||||
|
||||
# Clean for searching on Deezer
|
||||
def _remove_parentheses(string: str) -> str:
|
||||
@@ -438,8 +437,8 @@ class DeeLogin:
|
||||
dz_json = dz
|
||||
tn = (dz_json.get('track_position') or dz_json.get('track_number') or 0)
|
||||
title_match = max(
|
||||
_sim(spo_title, dz_json.get('title', '')),
|
||||
_sim(spo_title, dz_json.get('title_short', '')),
|
||||
_sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title', ''))),
|
||||
_sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title_short', '')))
|
||||
)
|
||||
album_match = _sim(spo_album_title, (dz_json.get('album') or {}).get('title', ''))
|
||||
t_isrc = (dz_json.get('isrc') or '').upper()
|
||||
@@ -460,15 +459,11 @@ class DeeLogin:
|
||||
candidates = []
|
||||
|
||||
for cand in candidates:
|
||||
title_match_1 = max(
|
||||
_sim(spo_title, dz_json.get('title', '')),
|
||||
_sim(spo_title, dz_json.get('title_short', ''))
|
||||
title_match = max(
|
||||
_sim(_remove_parentheses(spo_title), _remove_parentheses(cand.get('title', ''))),
|
||||
_sim(_remove_parentheses(spo_title), _remove_parentheses(cand.get('title_short', '')))
|
||||
)
|
||||
title_match_2 = max(
|
||||
_sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title', ''))),
|
||||
_sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title_short', '')))
|
||||
)
|
||||
if max(title_match_1, title_match_2) < 0.90:
|
||||
if title_match < 0.90:
|
||||
continue
|
||||
c_id = cand.get('id')
|
||||
if not c_id:
|
||||
|
||||
Reference in New Issue
Block a user