From 96b26bac6af81dfca433db2eda5b0a5542deada2 Mon Sep 17 00:00:00 2001
From: Xoconoch <github@cordovault.com>
Date: Thu, 21 Aug 2025 09:40:02 -0500
Subject: [PATCH] fix: Artist batch download now follows
 watchedArtistAlbumGroup and considers pagination

---
 routes/utils/artist.py | 98 ++++++++++++++++++++++++++++++------------
 1 file changed, 71 insertions(+), 27 deletions(-)

diff --git a/routes/utils/artist.py b/routes/utils/artist.py
index b9d8126..e08474e 100644
--- a/routes/utils/artist.py
+++ b/routes/utils/artist.py
@@ -1,4 +1,5 @@
 import json
+from routes.utils.watch.manager import get_watch_config
 import logging
 from routes.utils.celery_queue_manager import download_queue_manager
 from routes.utils.get_info import get_spotify_info
@@ -86,16 +87,16 @@ def get_artist_discography(
         raise
 
 
-def download_artist_albums(
-    url, album_type="album,single,compilation", request_args=None, username=None
-):
+def download_artist_albums(url, album_type=None, request_args=None, username=None):
     """
     Download albums by an artist, filtered by album types.
+    If album_type is not provided, uses the watchedArtistAlbumGroup setting from watch config.
 
     Args:
         url (str): Spotify artist URL
         album_type (str): Comma-separated list of album types to download
                          (album, single, compilation, appears_on)
+                         If None, uses watchedArtistAlbumGroup setting
         request_args (dict): Original request arguments for tracking
         username (str | None): Username initiating the request, used for per-user separation
 
@@ -118,39 +119,82 @@ def download_artist_albums(
         logger.error(error_msg)
         raise ValueError(error_msg)
 
-    artist_data = get_spotify_info(artist_id, "artist_discography")
+    # Get watch config to determine which album groups to download
+    watch_config = get_watch_config()
+    allowed_groups = [
+        g.lower()
+        for g in watch_config.get("watchedArtistAlbumGroup", ["album", "single"])
+    ]
+    logger.info(
+        f"Filtering albums by watchedArtistAlbumGroup setting (exact album_group match): {allowed_groups}"
+    )
 
-    if not artist_data or "items" not in artist_data:
+    # Fetch all artist albums with pagination
+    all_artist_albums = []
+    offset = 0
+    limit = 50  # Spotify API limit for artist albums
+
+    logger.info(f"Fetching all albums for artist ID: {artist_id} with pagination")
+
+    while True:
+        logger.debug(
+            f"Fetching albums for {artist_id}. Limit: {limit}, Offset: {offset}"
+        )
+        artist_data_page = get_spotify_info(
+            artist_id, "artist_discography", limit=limit, offset=offset
+        )
+
+        if not artist_data_page or not isinstance(artist_data_page.get("items"), list):
+            logger.warning(
+                f"No album items found or invalid format for artist {artist_id} at offset {offset}. Response: {artist_data_page}"
+            )
+            break
+
+        current_page_albums = artist_data_page.get("items", [])
+        if not current_page_albums:
+            logger.info(
+                f"No more albums on page for artist {artist_id} at offset {offset}. Total fetched so far: {len(all_artist_albums)}."
+            )
+            break
+
+        logger.debug(
+            f"Fetched {len(current_page_albums)} albums on current page for artist {artist_id}."
+        )
+        all_artist_albums.extend(current_page_albums)
+
+        # Check if Spotify indicates a next page URL
+        if artist_data_page.get("next"):
+            offset += limit  # Increment offset by the limit used for the request
+        else:
+            logger.info(
+                f"No next page URL for artist {artist_id}. Pagination complete. Total albums fetched: {len(all_artist_albums)}."
+            )
+            break
+
+    if not all_artist_albums:
         raise ValueError(
             f"Failed to retrieve artist data or no albums found for artist ID {artist_id}"
         )
 
-    allowed_types = [t.strip().lower() for t in album_type.split(",")]
-    logger.info(f"Filtering albums by types: {allowed_types}")
-
+    # Filter albums based on the allowed types using album_group field (like in manager.py)
     filtered_albums = []
-    for album in artist_data.get("items", []):
-        album_type_value = album.get("album_type", "").lower()
+    for album in all_artist_albums:
         album_group_value = album.get("album_group", "").lower()
+        album_name = album.get("name", "Unknown Album")
+        album_id = album.get("id", "Unknown ID")
 
-        if (
-            (
-                "album" in allowed_types
-                and album_type_value == "album"
-                and album_group_value == "album"
-            )
-            or (
-                "single" in allowed_types
-                and album_type_value == "single"
-                and album_group_value == "single"
-            )
-            or ("compilation" in allowed_types and album_type_value == "compilation")
-            or ("appears_on" in allowed_types and album_group_value == "appears_on")
-        ):
+        # Exact album_group match only (align with watch manager)
+        is_matching_group = album_group_value in allowed_groups
+
+        logger.debug(
+            f"Album {album_name} ({album_id}): album_group={album_group_value}. Allowed groups: {allowed_groups}. Match: {is_matching_group}."
+        )
+
+        if is_matching_group:
             filtered_albums.append(album)
 
     if not filtered_albums:
-        logger.warning(f"No albums match the specified types: {album_type}")
+        logger.warning(f"No albums match the specified groups: {allowed_groups}")
         return [], []
 
     successfully_queued_albums = []
@@ -168,7 +212,7 @@ def download_artist_albums(
 
         if not album_url:
             logger.warning(
-                f"Skipping album '{album_name}' because it has no Spotify URL."
+                f"Skipping album {album_name} because it has no Spotify URL."
             )
             continue
 
@@ -211,6 +255,6 @@ def download_artist_albums(
             )
 
     logger.info(
-        f"Artist album processing: {len(successfully_queued_albums)} queued, {len(duplicate_albums)} duplicates found."
+        f"Artist album processing: {len(successfully_queued_albums)} queued, {len(duplicate_albums)} duplicates found from {len(filtered_albums)} matching albums out of {len(all_artist_albums)} total albums."
     )
     return successfully_queued_albums, duplicate_albums