Merge pull request #9 from lexitronic/fix-track-matching
Use `rapidfuzz` instead of `difflib`, use `_remove_parentheses()` during ISRC search, remove redundant `title_match` entries.
This commit is contained in:
@@ -47,7 +47,6 @@ from deezspot.libutils.others_settings import (
|
|||||||
)
|
)
|
||||||
from deezspot.libutils.logging_utils import ProgressReporter, logger, report_progress
|
from deezspot.libutils.logging_utils import ProgressReporter, logger, report_progress
|
||||||
import requests
|
import requests
|
||||||
from difflib import SequenceMatcher
|
|
||||||
|
|
||||||
from deezspot.models.callback.callbacks import (
|
from deezspot.models.callback.callbacks import (
|
||||||
trackCallbackObject,
|
trackCallbackObject,
|
||||||
@@ -64,14 +63,14 @@ from deezspot.models.callback.album import albumObject as albumCbObject
|
|||||||
from deezspot.models.callback.playlist import playlistObject as playlistCbObject
|
from deezspot.models.callback.playlist import playlistObject as playlistCbObject
|
||||||
from deezspot.models.callback.common import IDs
|
from deezspot.models.callback.common import IDs
|
||||||
from deezspot.models.callback.user import userObject
|
from deezspot.models.callback.user import userObject
|
||||||
|
from rapidfuzz import fuzz
|
||||||
|
|
||||||
def _sim(a: str, b: str) -> float:
|
def _sim(a: str, b: str) -> float:
|
||||||
a = (a or '').strip().lower()
|
a = (a or '').strip().lower()
|
||||||
b = (b or '').strip().lower()
|
b = (b or '').strip().lower()
|
||||||
if not a or not b:
|
if not a or not b:
|
||||||
return 0.0
|
return 0.0
|
||||||
return SequenceMatcher(None, a, b).ratio()
|
return fuzz.partial_ratio(a, b) / 100
|
||||||
|
|
||||||
# Clean for searching on Deezer
|
# Clean for searching on Deezer
|
||||||
def _remove_parentheses(string: str) -> str:
|
def _remove_parentheses(string: str) -> str:
|
||||||
@@ -438,8 +437,8 @@ class DeeLogin:
|
|||||||
dz_json = dz
|
dz_json = dz
|
||||||
tn = (dz_json.get('track_position') or dz_json.get('track_number') or 0)
|
tn = (dz_json.get('track_position') or dz_json.get('track_number') or 0)
|
||||||
title_match = max(
|
title_match = max(
|
||||||
_sim(spo_title, dz_json.get('title', '')),
|
_sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title', ''))),
|
||||||
_sim(spo_title, dz_json.get('title_short', '')),
|
_sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title_short', '')))
|
||||||
)
|
)
|
||||||
album_match = _sim(spo_album_title, (dz_json.get('album') or {}).get('title', ''))
|
album_match = _sim(spo_album_title, (dz_json.get('album') or {}).get('title', ''))
|
||||||
t_isrc = (dz_json.get('isrc') or '').upper()
|
t_isrc = (dz_json.get('isrc') or '').upper()
|
||||||
@@ -460,15 +459,11 @@ class DeeLogin:
|
|||||||
candidates = []
|
candidates = []
|
||||||
|
|
||||||
for cand in candidates:
|
for cand in candidates:
|
||||||
title_match_1 = max(
|
title_match = max(
|
||||||
_sim(spo_title, dz_json.get('title', '')),
|
_sim(_remove_parentheses(spo_title), _remove_parentheses(cand.get('title', ''))),
|
||||||
_sim(spo_title, dz_json.get('title_short', ''))
|
_sim(_remove_parentheses(spo_title), _remove_parentheses(cand.get('title_short', '')))
|
||||||
)
|
)
|
||||||
title_match_2 = max(
|
if title_match < 0.90:
|
||||||
_sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title', ''))),
|
|
||||||
_sim(_remove_parentheses(spo_title), _remove_parentheses(dz_json.get('title_short', '')))
|
|
||||||
)
|
|
||||||
if max(title_match_1, title_match_2) < 0.90:
|
|
||||||
continue
|
continue
|
||||||
c_id = cand.get('id')
|
c_id = cand.get('id')
|
||||||
if not c_id:
|
if not c_id:
|
||||||
|
|||||||
Reference in New Issue
Block a user