feat: Reimplement download artist discography per groups in artist page

2025-08-28 07:51:10 -06:00
parent 4476d39d39
commit 0b7c9d0da8
8 changed files with 184 additions and 125 deletions
--- a/routes/utils/album.py
+++ b/routes/utils/album.py
@@ -8,6 +8,7 @@ from routes.utils.credentials import (
 )
 from routes.utils.celery_queue_manager import get_existing_task_id
 from routes.utils.errors import DuplicateDownloadError
+from routes.utils.celery_config import get_config_params


 def download_album(
@@ -98,6 +99,7 @@ def download_album(
                        spotify_client_id=global_spotify_client_id,
                        spotify_client_secret=global_spotify_client_secret,
                        progress_callback=progress_callback,
+                        spotify_credentials_path=str(get_spotify_blob_path(main)),
                    )
                    dl.download_albumspo(
                        link_album=url,  # Spotify URL
@@ -257,6 +259,11 @@ def download_album(
                spotify_client_id=global_spotify_client_id,  # Global Spotify keys
                spotify_client_secret=global_spotify_client_secret,  # Global Spotify keys
                progress_callback=progress_callback,
+                spotify_credentials_path=(
+                    str(get_spotify_blob_path(get_config_params().get("spotify")))
+                    if get_config_params().get("spotify")
+                    else None
+                ),
            )
            dl.download_albumdee(  # Deezer URL, download via Deezer
                link_album=url,
--- a/routes/utils/artist.py
+++ b/routes/utils/artist.py
@@ -4,7 +4,7 @@ import logging
 from routes.utils.celery_queue_manager import download_queue_manager
 from routes.utils.credentials import get_credential, _get_global_spotify_api_creds
 from routes.utils.errors import DuplicateDownloadError
-from routes.utils.get_info import get_spotify_info
+from routes.utils.get_info import get_client, get_artist

 from deezspot.libutils.utils import get_ids, link_is_valid

@@ -77,10 +77,26 @@ def get_artist_discography(
        log_json({"status": "error", "message": msg})
        raise ValueError(msg)

+    # Fetch artist once and return grouped arrays without pagination
    try:
-        # Use the optimized get_spotify_info function
-        discography = get_spotify_info(artist_id, "artist_discography")
-        return discography
+        client = get_client()
+        artist_obj = get_artist(client, artist_id)
+
+        # Normalize groups as arrays of IDs; tolerate dict shape from some sources
+        def normalize_group(val):
+            if isinstance(val, list):
+                return val
+            if isinstance(val, dict):
+                items = val.get("items") or val.get("releases") or []
+                return items if isinstance(items, list) else []
+            return []
+
+        return {
+            "album_group": normalize_group(artist_obj.get("album_group")),
+            "single_group": normalize_group(artist_obj.get("single_group")),
+            "compilation_group": normalize_group(artist_obj.get("compilation_group")),
+            "appears_on_group": normalize_group(artist_obj.get("appears_on_group")),
+        }
    except Exception as fetch_error:
        msg = f"An error occurred while fetching the discography: {fetch_error}"
        log_json({"status": "error", "message": msg})
@@ -120,61 +136,55 @@ def download_artist_albums(url, album_type=None, request_args=None, username=Non
        raise ValueError(error_msg)

    # Get watch config to determine which album groups to download
-    watch_config = get_watch_config()
-    allowed_groups = [
-        g.lower()
-        for g in watch_config.get("watchedArtistAlbumGroup", ["album", "single"])
-    ]
+    valid_groups = {"album", "single", "compilation", "appears_on"}
+    if album_type and isinstance(album_type, str):
+        requested = [g.strip().lower() for g in album_type.split(",") if g.strip()]
+        allowed_groups = [g for g in requested if g in valid_groups]
+        if not allowed_groups:
+            logger.warning(
+                f"album_type query provided but no valid groups found in {requested}; falling back to watch config."
+            )
+    if not album_type or not isinstance(album_type, str) or not allowed_groups:
+        watch_config = get_watch_config()
+        allowed_groups = [
+            g.lower()
+            for g in watch_config.get("watchedArtistAlbumGroup", ["album", "single"])
+            if g.lower() in valid_groups
+        ]
    logger.info(
-        f"Filtering albums by watchedArtistAlbumGroup setting (exact album_group match): {allowed_groups}"
+        f"Filtering albums by album_type/watch setting (exact album_group match): {allowed_groups}"
    )

-    # Fetch all artist albums with pagination
+    # Fetch artist and aggregate group arrays without pagination
+    client = get_client()
+    artist_obj = get_artist(client, artist_id)
+
+    def normalize_group(val):
+        if isinstance(val, list):
+            return val
+        if isinstance(val, dict):
+            items = val.get("items") or val.get("releases") or []
+            return items if isinstance(items, list) else []
+        return []
+
+    group_key_to_type = [
+        ("album_group", "album"),
+        ("single_group", "single"),
+        ("compilation_group", "compilation"),
+        ("appears_on_group", "appears_on"),
+    ]
+
    all_artist_albums = []
-    offset = 0
-    limit = 50  # Spotify API limit for artist albums
-
-    logger.info(f"Fetching all albums for artist ID: {artist_id} with pagination")
-
-    while True:
-        logger.debug(
-            f"Fetching albums for {artist_id}. Limit: {limit}, Offset: {offset}"
-        )
-        artist_data_page = get_spotify_info(
-            artist_id, "artist_discography", limit=limit, offset=offset
-        )
-
-        if not artist_data_page or not isinstance(artist_data_page.get("items"), list):
-            logger.warning(
-                f"No album items found or invalid format for artist {artist_id} at offset {offset}. Response: {artist_data_page}"
+    for key, group_type in group_key_to_type:
+        ids = normalize_group(artist_obj.get(key))
+        # transform to minimal album objects with album_group tagging for filtering parity
+        for album_id in ids:
+            all_artist_albums.append(
+                {
+                    "id": album_id,
+                    "album_group": group_type,
+                }
            )
-            break
-
-        current_page_albums = artist_data_page.get("items", [])
-        if not current_page_albums:
-            logger.info(
-                f"No more albums on page for artist {artist_id} at offset {offset}. Total fetched so far: {len(all_artist_albums)}."
-            )
-            break
-
-        logger.debug(
-            f"Fetched {len(current_page_albums)} albums on current page for artist {artist_id}."
-        )
-        all_artist_albums.extend(current_page_albums)
-
-        # Check if Spotify indicates a next page URL
-        if artist_data_page.get("next"):
-            offset += limit  # Increment offset by the limit used for the request
-        else:
-            logger.info(
-                f"No next page URL for artist {artist_id}. Pagination complete. Total albums fetched: {len(all_artist_albums)}."
-            )
-            break
-
-    if not all_artist_albums:
-        raise ValueError(
-            f"Failed to retrieve artist data or no albums found for artist ID {artist_id}"
-        )

    # Filter albums based on the allowed types using album_group field (like in manager.py)
    filtered_albums = []
@@ -201,13 +211,23 @@ def download_artist_albums(url, album_type=None, request_args=None, username=Non
    duplicate_albums = []

    for album in filtered_albums:
-        album_url = album.get("external_urls", {}).get("spotify", "")
-        album_name = album.get("name", "Unknown Album")
-        album_artists = album.get("artists", [])
+        album_id = album.get("id")
+        if not album_id:
+            logger.warning("Skipping album without ID in filtered list.")
+            continue
+        # fetch album details to construct URL and names
+        try:
+            album_obj = download_queue_manager.client.get_album(
+                album_id, include_tracks=False
+            )  # type: ignore[attr-defined]
+        except AttributeError:
+            # If download_queue_manager lacks a client, fallback to shared client
+            album_obj = get_client().get_album(album_id, include_tracks=False)
+        album_url = album_obj.get("external_urls", {}).get("spotify", "")
+        album_name = album_obj.get("name", "Unknown Album")
+        artists = album_obj.get("artists", []) or []
        album_artist = (
-            album_artists[0].get("name", "Unknown Artist")
-            if album_artists
-            else "Unknown Artist"
+            artists[0].get("name", "Unknown Artist") if artists else "Unknown Artist"
        )

        if not album_url:
--- a/routes/utils/get_info.py
+++ b/routes/utils/get_info.py
@@ -93,57 +93,6 @@ def get_playlist(
    return client.get_playlist(playlist_in, expand_items=expand_items)


-def get_spotify_info(
-    spotify_id: str,
-    info_type: str,
-    limit: int = 50,
-    offset: int = 0,
-) -> Dict[str, Any]:
-    """
-    Thin, typed wrapper around common Spotify info lookups using the shared client.
-
-    Currently supports:
-    - "artist_discography": returns a paginated view over the artist's releases
-      combined across album_group/single_group/compilation_group/appears_on_group.
-
-    Returns a mapping with at least: items, total, limit, offset.
-    Also includes a truthy "next" key when more pages are available.
-    """
-    client = get_client()
-
-    if info_type == "artist_discography":
-        artist = client.get_artist(spotify_id)
-        all_items = []
-        for key in (
-            "album_group",
-            "single_group",
-            "compilation_group",
-            "appears_on_group",
-        ):
-            grp = artist.get(key)
-            if isinstance(grp, list):
-                all_items.extend(grp)
-            elif isinstance(grp, dict):
-                items = grp.get("items") or grp.get("releases") or []
-                if isinstance(items, list):
-                    all_items.extend(items)
-        total = len(all_items)
-        start = max(0, offset or 0)
-        page_limit = max(1, limit or 50)
-        end = min(total, start + page_limit)
-        page_items = all_items[start:end]
-        has_more = end < total
-        return {
-            "items": page_items,
-            "total": total,
-            "limit": page_limit,
-            "offset": start,
-            "next": bool(has_more),
-        }
-
-    raise ValueError(f"Unsupported info_type: {info_type}")
-
-
 def get_playlist_metadata(playlist_id: str) -> Dict[str, Any]:
    """
    Fetch playlist metadata using the shared client without expanding items.
--- a/routes/utils/playlist.py
+++ b/routes/utils/playlist.py
@@ -3,6 +3,8 @@ from deezspot.spotloader import SpoLogin
 from deezspot.deezloader import DeeLogin
 from pathlib import Path
 from routes.utils.credentials import get_credential, _get_global_spotify_api_creds
+from routes.utils.credentials import get_spotify_blob_path
+from routes.utils.celery_config import get_config_params
 from routes.utils.celery_queue_manager import get_existing_task_id
 from routes.utils.errors import DuplicateDownloadError

@@ -95,6 +97,7 @@ def download_playlist(
                        spotify_client_id=global_spotify_client_id,
                        spotify_client_secret=global_spotify_client_secret,
                        progress_callback=progress_callback,
+                        spotify_credentials_path=str(get_spotify_blob_path(main)),
                    )
                    dl.download_playlistspo(
                        link_playlist=url,  # Spotify URL
@@ -265,6 +268,11 @@ def download_playlist(
                spotify_client_id=global_spotify_client_id,  # Global Spotify keys
                spotify_client_secret=global_spotify_client_secret,  # Global Spotify keys
                progress_callback=progress_callback,
+                spotify_credentials_path=(
+                    str(get_spotify_blob_path(get_config_params().get("spotify")))
+                    if get_config_params().get("spotify")
+                    else None
+                ),
            )
            dl.download_playlistdee(  # Deezer URL, download via Deezer
                link_playlist=url,
--- a/routes/utils/track.py
+++ b/routes/utils/track.py
@@ -6,6 +6,7 @@ from routes.utils.credentials import (
    _get_global_spotify_api_creds,
    get_spotify_blob_path,
 )
+from routes.utils.celery_config import get_config_params


 def download_track(
@@ -90,6 +91,7 @@ def download_track(
                        spotify_client_id=global_spotify_client_id,  # Global creds
                        spotify_client_secret=global_spotify_client_secret,  # Global creds
                        progress_callback=progress_callback,
+                        spotify_credentials_path=str(get_spotify_blob_path(main)),
                    )
                    # download_trackspo means: Spotify URL, download via Deezer
                    dl.download_trackspo(
@@ -251,6 +253,11 @@ def download_track(
                spotify_client_id=global_spotify_client_id,  # Global Spotify keys for internal Spo use by DeeLogin
                spotify_client_secret=global_spotify_client_secret,  # Global Spotify keys
                progress_callback=progress_callback,
+                spotify_credentials_path=(
+                    str(get_spotify_blob_path(get_config_params().get("spotify")))
+                    if get_config_params().get("spotify")
+                    else None
+                ),
            )
            dl.download_trackdee(  # Deezer URL, download via Deezer
                link_track=url,