From 523eeed06bc6e632e7ced2c8f22dd7f25a2613e2 Mon Sep 17 00:00:00 2001 From: Xoconoch Date: Sat, 26 Jul 2025 19:44:23 -0600 Subject: [PATCH] fixed #191 --- routes/album.py | 11 +- routes/artist.py | 84 +++--- routes/playlist.py | 79 +++++- routes/prgs.py | 15 +- routes/track.py | 7 +- routes/utils/artist.py | 11 +- routes/utils/get_info.py | 375 ++++++++++++++++++++++----- routes/utils/search.py | 76 ++++-- routes/utils/watch/db.py | 194 +++++++++++++- routes/utils/watch/manager.py | 326 +++++++++++++++++++---- spotizerr-ui/src/routes/playlist.tsx | 156 +++++++++-- spotizerr-ui/src/types/spotify.ts | 26 ++ 12 files changed, 1110 insertions(+), 250 deletions(-) diff --git a/routes/album.py b/routes/album.py index b22e64e..7b0fc19 100755 --- a/routes/album.py +++ b/routes/album.py @@ -11,6 +11,11 @@ from routes.utils.errors import DuplicateDownloadError album_bp = Blueprint("album", __name__) +def construct_spotify_url(item_id: str, item_type: str = "track") -> str: + """Construct a Spotify URL for a given item ID and type.""" + return f"https://open.spotify.com/{item_type}/{item_id}" + + @album_bp.route("/download/", methods=["GET"]) def handle_download(album_id): # Retrieve essential parameters from the request. @@ -18,7 +23,7 @@ def handle_download(album_id): # artist = request.args.get('artist') # Construct the URL from album_id - url = f"https://open.spotify.com/album/{album_id}" + url = construct_spotify_url(album_id, "album") # Fetch metadata from Spotify try: @@ -163,9 +168,7 @@ def get_album_info(): ) try: - # Import and use the get_spotify_info function from the utility module. - from routes.utils.get_info import get_spotify_info - + # Use the get_spotify_info function (already imported at top) album_info = get_spotify_info(spotify_id, "album") return Response(json.dumps(album_info), status=200, mimetype="application/json") except Exception as e: diff --git a/routes/artist.py b/routes/artist.py index 943132e..98b605b 100644 --- a/routes/artist.py +++ b/routes/artist.py @@ -29,6 +29,11 @@ artist_bp = Blueprint("artist", __name__, url_prefix="/api/artist") logger = logging.getLogger(__name__) +def construct_spotify_url(item_id: str, item_type: str = "track") -> str: + """Construct a Spotify URL for a given item ID and type.""" + return f"https://open.spotify.com/{item_type}/{item_id}" + + def log_json(message_dict): print(json.dumps(message_dict)) @@ -41,7 +46,7 @@ def handle_artist_download(artist_id): - album_type: string(s); comma-separated values such as "album,single,appears_on,compilation" """ # Construct the artist URL from artist_id - url = f"https://open.spotify.com/artist/{artist_id}" + url = construct_spotify_url(artist_id, "artist") # Retrieve essential parameters from the request. album_type = request.args.get("album_type", "album,single,compilation") @@ -123,16 +128,26 @@ def get_artist_info(): ) try: - artist_info = get_spotify_info(spotify_id, "artist_discography") + # Get artist metadata first + artist_metadata = get_spotify_info(spotify_id, "artist") + + # Get artist discography for albums + artist_discography = get_spotify_info(spotify_id, "artist_discography") + + # Combine metadata with discography + artist_info = { + **artist_metadata, + "albums": artist_discography + } - # If artist_info is successfully fetched (it contains album items), + # If artist_info is successfully fetched and has albums, # check if the artist is watched and augment album items with is_locally_known status - if artist_info and artist_info.get("items"): + if artist_info and artist_info.get("albums") and artist_info["albums"].get("items"): watched_artist_details = get_watched_artist( spotify_id ) # spotify_id is the artist ID if watched_artist_details: # Artist is being watched - for album_item in artist_info["items"]: + for album_item in artist_info["albums"]["items"]: if album_item and album_item.get("id"): album_id = album_item["id"] album_item["is_locally_known"] = is_album_in_artist_db( @@ -171,64 +186,39 @@ def add_artist_to_watchlist(artist_spotify_id): {"message": f"Artist {artist_spotify_id} is already being watched."} ), 200 - # This call returns an album list-like structure based on logs + # Get artist metadata directly for name and basic info + artist_metadata = get_spotify_info(artist_spotify_id, "artist") + + # Get artist discography for album count artist_album_list_data = get_spotify_info( artist_spotify_id, "artist_discography" ) - # Check if we got any data and if it has items - if not artist_album_list_data or not isinstance( - artist_album_list_data.get("items"), list - ): + # Check if we got artist metadata + if not artist_metadata or not artist_metadata.get("name"): logger.error( - f"Could not fetch album list details for artist {artist_spotify_id} from Spotify using get_spotify_info('artist_discography'). Data: {artist_album_list_data}" + f"Could not fetch artist metadata for {artist_spotify_id} from Spotify." ) return jsonify( { - "error": f"Could not fetch sufficient details for artist {artist_spotify_id} to initiate watch." + "error": f"Could not fetch artist metadata for {artist_spotify_id} to initiate watch." } ), 404 - # Attempt to extract artist name and verify ID - # The actual artist name might be consistently found in the items, if they exist - artist_name_from_albums = "Unknown Artist" # Default - if artist_album_list_data["items"]: - first_album = artist_album_list_data["items"][0] - if ( - first_album - and isinstance(first_album.get("artists"), list) - and first_album["artists"] - ): - # Find the artist in the list that matches the artist_spotify_id - found_artist = next( - ( - art - for art in first_album["artists"] - if art.get("id") == artist_spotify_id - ), - None, - ) - if found_artist and found_artist.get("name"): - artist_name_from_albums = found_artist["name"] - elif first_album["artists"][0].get( - "name" - ): # Fallback to first artist if specific match not found or no ID - artist_name_from_albums = first_album["artists"][0]["name"] - logger.warning( - f"Could not find exact artist ID {artist_spotify_id} in first album's artists list. Using name '{artist_name_from_albums}'." - ) - else: + # Check if we got album data + if not artist_album_list_data or not isinstance( + artist_album_list_data.get("items"), list + ): logger.warning( - f"No album items found for artist {artist_spotify_id} to extract name. Using default." + f"Could not fetch album list details for artist {artist_spotify_id} from Spotify. Proceeding with metadata only." ) # Construct the artist_data object expected by add_artist_db - # We use the provided artist_spotify_id as the primary ID. artist_data_for_db = { - "id": artist_spotify_id, # This is the crucial part - "name": artist_name_from_albums, + "id": artist_spotify_id, + "name": artist_metadata.get("name", "Unknown Artist"), "albums": { # Mimic structure if add_artist_db expects it for total_albums - "total": artist_album_list_data.get("total", 0) + "total": artist_album_list_data.get("total", 0) if artist_album_list_data else 0 }, # Add any other fields add_artist_db might expect from a true artist object if necessary } @@ -236,7 +226,7 @@ def add_artist_to_watchlist(artist_spotify_id): add_artist_db(artist_data_for_db) logger.info( - f"Artist {artist_spotify_id} ('{artist_name_from_albums}') added to watchlist. Their albums will be processed by the watch manager." + f"Artist {artist_spotify_id} ('{artist_metadata.get('name', 'Unknown Artist')}') added to watchlist. Their albums will be processed by the watch manager." ) return jsonify( { diff --git a/routes/playlist.py b/routes/playlist.py index 67aaa8f..8793b24 100755 --- a/routes/playlist.py +++ b/routes/playlist.py @@ -33,6 +33,11 @@ logger = logging.getLogger(__name__) # Added logger initialization playlist_bp = Blueprint("playlist", __name__, url_prefix="/api/playlist") +def construct_spotify_url(item_id: str, item_type: str = "track") -> str: + """Construct a Spotify URL for a given item ID and type.""" + return f"https://open.spotify.com/{item_type}/{item_id}" + + @playlist_bp.route("/download/", methods=["GET"]) def handle_download(playlist_id): # Retrieve essential parameters from the request. @@ -41,14 +46,15 @@ def handle_download(playlist_id): orig_params = request.args.to_dict() # Construct the URL from playlist_id - url = f"https://open.spotify.com/playlist/{playlist_id}" + url = construct_spotify_url(playlist_id, "playlist") orig_params["original_url"] = ( request.url ) # Update original_url to the constructed one - # Fetch metadata from Spotify + # Fetch metadata from Spotify using optimized function try: - playlist_info = get_spotify_info(playlist_id, "playlist") + from routes.utils.get_info import get_playlist_metadata + playlist_info = get_playlist_metadata(playlist_id) if ( not playlist_info or not playlist_info.get("name") @@ -177,6 +183,7 @@ def get_playlist_info(): Expects a query parameter 'id' that contains the Spotify playlist ID. """ spotify_id = request.args.get("id") + include_tracks = request.args.get("include_tracks", "false").lower() == "true" if not spotify_id: return Response( @@ -186,8 +193,9 @@ def get_playlist_info(): ) try: - # Import and use the get_spotify_info function from the utility module. - playlist_info = get_spotify_info(spotify_id, "playlist") + # Use the optimized playlist info function + from routes.utils.get_info import get_playlist_info_optimized + playlist_info = get_playlist_info_optimized(spotify_id, include_tracks=include_tracks) # If playlist_info is successfully fetched, check if it's watched # and augment track items with is_locally_known status @@ -216,6 +224,64 @@ def get_playlist_info(): return Response(json.dumps(error_data), status=500, mimetype="application/json") +@playlist_bp.route("/metadata", methods=["GET"]) +def get_playlist_metadata(): + """ + Retrieve only Spotify playlist metadata (no tracks) to avoid rate limiting. + Expects a query parameter 'id' that contains the Spotify playlist ID. + """ + spotify_id = request.args.get("id") + + if not spotify_id: + return Response( + json.dumps({"error": "Missing parameter: id"}), + status=400, + mimetype="application/json", + ) + + try: + # Use the optimized playlist metadata function + from routes.utils.get_info import get_playlist_metadata + playlist_metadata = get_playlist_metadata(spotify_id) + + return Response( + json.dumps(playlist_metadata), status=200, mimetype="application/json" + ) + except Exception as e: + error_data = {"error": str(e), "traceback": traceback.format_exc()} + return Response(json.dumps(error_data), status=500, mimetype="application/json") + + +@playlist_bp.route("/tracks", methods=["GET"]) +def get_playlist_tracks(): + """ + Retrieve playlist tracks with pagination support for progressive loading. + Expects query parameters: 'id' (playlist ID), 'limit' (optional), 'offset' (optional). + """ + spotify_id = request.args.get("id") + limit = request.args.get("limit", 50, type=int) + offset = request.args.get("offset", 0, type=int) + + if not spotify_id: + return Response( + json.dumps({"error": "Missing parameter: id"}), + status=400, + mimetype="application/json", + ) + + try: + # Use the optimized playlist tracks function + from routes.utils.get_info import get_playlist_tracks + tracks_data = get_playlist_tracks(spotify_id, limit=limit, offset=offset) + + return Response( + json.dumps(tracks_data), status=200, mimetype="application/json" + ) + except Exception as e: + error_data = {"error": str(e), "traceback": traceback.format_exc()} + return Response(json.dumps(error_data), status=500, mimetype="application/json") + + @playlist_bp.route("/watch/", methods=["PUT"]) def add_to_watchlist(playlist_spotify_id): """Adds a playlist to the watchlist.""" @@ -232,7 +298,8 @@ def add_to_watchlist(playlist_spotify_id): ), 200 # Fetch playlist details from Spotify to populate our DB - playlist_data = get_spotify_info(playlist_spotify_id, "playlist") + from routes.utils.get_info import get_playlist_metadata + playlist_data = get_playlist_metadata(playlist_spotify_id) if not playlist_data or "id" not in playlist_data: logger.error( f"Could not fetch details for playlist {playlist_spotify_id} from Spotify." diff --git a/routes/prgs.py b/routes/prgs.py index fc1eda1..d41d1bf 100755 --- a/routes/prgs.py +++ b/routes/prgs.py @@ -8,9 +8,6 @@ from routes.utils.celery_tasks import ( get_last_task_status, get_all_tasks, cancel_task, - retry_task, - redis_client, - delete_task_data, ) # Configure logging @@ -174,9 +171,6 @@ def delete_task(task_id): # First, cancel the task if it's running cancel_task(task_id) - # Then, delete all associated data from Redis - delete_task_data(task_id) - return {"message": f"Task {task_id} deleted successfully"}, 200 @@ -185,14 +179,9 @@ def list_tasks(): """ Retrieve a list of all tasks in the system. Returns a detailed list of task objects including status and metadata. - By default, it returns active tasks. Use ?include_finished=true to include completed tasks. """ try: - # Check for 'include_finished' query parameter - include_finished_str = request.args.get("include_finished", "false") - include_finished = include_finished_str.lower() in ["true", "1", "yes"] - - tasks = get_all_tasks(include_finished=include_finished) + tasks = get_all_tasks() detailed_tasks = [] for task_summary in tasks: task_id = task_summary.get("task_id") @@ -315,7 +304,7 @@ def cancel_all_tasks(): Cancel all active (running or queued) tasks. """ try: - tasks_to_cancel = get_all_tasks(include_finished=False) + tasks_to_cancel = get_all_tasks() cancelled_count = 0 errors = [] diff --git a/routes/track.py b/routes/track.py index 5c0f7e8..4057268 100755 --- a/routes/track.py +++ b/routes/track.py @@ -18,6 +18,11 @@ from routes.utils.get_info import get_spotify_info # Added import track_bp = Blueprint("track", __name__) +def construct_spotify_url(item_id: str, item_type: str = "track") -> str: + """Construct a Spotify URL for a given item ID and type.""" + return f"https://open.spotify.com/{item_type}/{item_id}" + + @track_bp.route("/download/", methods=["GET"]) def handle_download(track_id): # Retrieve essential parameters from the request. @@ -26,7 +31,7 @@ def handle_download(track_id): orig_params = request.args.to_dict() # Construct the URL from track_id - url = f"https://open.spotify.com/track/{track_id}" + url = construct_spotify_url(track_id, "track") orig_params["original_url"] = url # Update original_url to the constructed one # Fetch metadata from Spotify diff --git a/routes/utils/artist.py b/routes/utils/artist.py index 49872cf..297f8f5 100644 --- a/routes/utils/artist.py +++ b/routes/utils/artist.py @@ -6,7 +6,6 @@ from routes.utils.get_info import get_spotify_info from routes.utils.credentials import get_credential, _get_global_spotify_api_creds from routes.utils.errors import DuplicateDownloadError -from deezspot.easy_spoty import Spo from deezspot.libutils.utils import get_ids, link_is_valid # Configure logging @@ -71,8 +70,6 @@ def get_artist_discography( f"Error checking Spotify account '{main_spotify_account_name}' for discography context: {e}" ) - Spo.__init__(client_id, client_secret) # Initialize with global API keys - try: artist_id = get_ids(url) except Exception as id_error: @@ -81,12 +78,8 @@ def get_artist_discography( raise ValueError(msg) try: - # The progress_callback is not a standard param for Spo.get_artist - # If Spo.get_artist is meant to be Spo.get_artist_discography, that would take limit/offset - # Assuming it's Spo.get_artist which takes artist_id and album_type. - # If progress_callback was for a different Spo method, this needs review. - # For now, removing progress_callback from this specific call as Spo.get_artist doesn't use it. - discography = Spo.get_artist(artist_id, album_type=album_type) + # Use the optimized get_spotify_info function + discography = get_spotify_info(artist_id, "artist_discography") return discography except Exception as fetch_error: msg = f"An error occurred while fetching the discography: {fetch_error}" diff --git a/routes/utils/get_info.py b/routes/utils/get_info.py index d0558c6..f24c481 100644 --- a/routes/utils/get_info.py +++ b/routes/utils/get_info.py @@ -1,94 +1,335 @@ -from deezspot.easy_spoty import Spo +import spotipy +from spotipy.oauth2 import SpotifyClientCredentials from routes.utils.celery_queue_manager import get_config_params from routes.utils.credentials import get_credential, _get_global_spotify_api_creds +import logging +import time +from typing import Dict, List, Optional, Any +import json +from pathlib import Path # Import Deezer API and logging from deezspot.deezloader.dee_api import API as DeezerAPI -import logging # Initialize logger logger = logging.getLogger(__name__) +# Global Spotify client instance for reuse +_spotify_client = None +_last_client_init = 0 +_client_init_interval = 3600 # Reinitialize client every hour -def get_spotify_info(spotify_id, spotify_type, limit=None, offset=None): +def _get_spotify_client(): """ - Get info from Spotify API. Uses global client_id/secret from search.json. - The default Spotify account from main.json might still be relevant for other Spo settings or if Spo uses it. - - Args: - spotify_id: The Spotify ID of the entity - spotify_type: The type of entity (track, album, playlist, artist, artist_discography, episode) - limit (int, optional): The maximum number of items to return. Only used if spotify_type is "artist_discography". - offset (int, optional): The index of the first item to return. Only used if spotify_type is "artist_discography". - - Returns: - Dictionary with the entity information + Get or create a Spotify client with global credentials. + Implements client reuse and periodic reinitialization. """ - client_id, client_secret = _get_global_spotify_api_creds() + global _spotify_client, _last_client_init + + current_time = time.time() + + # Reinitialize client if it's been more than an hour or if client doesn't exist + if (_spotify_client is None or + current_time - _last_client_init > _client_init_interval): + + client_id, client_secret = _get_global_spotify_api_creds() if not client_id or not client_secret: raise ValueError( "Global Spotify API client_id or client_secret not configured in ./data/creds/search.json." ) - # Get config parameters including default Spotify account name - # This might still be useful if Spo uses the account name for other things (e.g. market/region if not passed explicitly) - # For now, we are just ensuring the API keys are set. - config_params = get_config_params() - main_spotify_account_name = config_params.get( - "spotify", "" - ) # Still good to know which account is 'default' contextually - - if not main_spotify_account_name: - # This is less critical now that API keys are global, but could indicate a misconfiguration - # if other parts of Spo expect an account context. - print( - "WARN: No default Spotify account name configured in settings (main.json). API calls will use global keys." + # Create new client + _spotify_client = spotipy.Spotify( + client_credentials_manager=SpotifyClientCredentials( + client_id=client_id, + client_secret=client_secret + ) ) - else: - # Optionally, one could load the specific account's region here if Spo.init or methods need it, - # but easy_spoty's Spo doesn't seem to take region directly in __init__. - # It might use it internally based on account details if credentials.json (blob) contains it. - try: - # We call get_credential just to check if the account exists, - # not for client_id/secret anymore for Spo.__init__ - get_credential("spotify", main_spotify_account_name) - except FileNotFoundError: - # This is a more serious warning if an account is expected to exist. - print( - f"WARN: Default Spotify account '{main_spotify_account_name}' configured in main.json was not found in credentials database." - ) - except Exception as e: - print( - f"WARN: Error accessing default Spotify account '{main_spotify_account_name}': {e}" - ) + _last_client_init = current_time + logger.info("Spotify client initialized/reinitialized") + + return _spotify_client - # Initialize the Spotify client with GLOBAL credentials - Spo.__init__(client_id, client_secret) +def _rate_limit_handler(func): + """ + Decorator to handle rate limiting with exponential backoff. + """ + def wrapper(*args, **kwargs): + max_retries = 3 + base_delay = 1 + + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except Exception as e: + if "429" in str(e) or "rate limit" in str(e).lower(): + if attempt < max_retries - 1: + delay = base_delay * (2 ** attempt) + logger.warning(f"Rate limited, retrying in {delay} seconds...") + time.sleep(delay) + continue + raise e + return func(*args, **kwargs) + return wrapper - if spotify_type == "track": - return Spo.get_track(spotify_id) - elif spotify_type == "album": - return Spo.get_album(spotify_id) - elif spotify_type == "playlist": - return Spo.get_playlist(spotify_id) - elif spotify_type == "artist_discography": - if limit is not None and offset is not None: - return Spo.get_artist_discography(spotify_id, limit=limit, offset=offset) - elif limit is not None: - return Spo.get_artist_discography(spotify_id, limit=limit) - elif offset is not None: - return Spo.get_artist_discography(spotify_id, offset=offset) +@_rate_limit_handler +def get_playlist_metadata(playlist_id: str) -> Dict[str, Any]: + """ + Get playlist metadata only (no tracks) to avoid rate limiting. + + Args: + playlist_id: The Spotify playlist ID + + Returns: + Dictionary with playlist metadata (name, description, owner, etc.) + """ + client = _get_spotify_client() + + try: + # Get basic playlist info without tracks + playlist = client.playlist(playlist_id, fields="id,name,description,owner,images,snapshot_id,public,followers,tracks.total") + + # Add a flag to indicate this is metadata only + playlist['_metadata_only'] = True + playlist['_tracks_loaded'] = False + + logger.debug(f"Retrieved playlist metadata for {playlist_id}: {playlist.get('name', 'Unknown')}") + return playlist + + except Exception as e: + logger.error(f"Error fetching playlist metadata for {playlist_id}: {e}") + raise + +@_rate_limit_handler +def get_playlist_tracks(playlist_id: str, limit: int = 100, offset: int = 0) -> Dict[str, Any]: + """ + Get playlist tracks with pagination support to handle large playlists efficiently. + + Args: + playlist_id: The Spotify playlist ID + limit: Number of tracks to fetch per request (max 100) + offset: Starting position for pagination + + Returns: + Dictionary with tracks data + """ + client = _get_spotify_client() + + try: + # Get tracks with specified limit and offset + tracks_data = client.playlist_tracks( + playlist_id, + limit=min(limit, 100), # Spotify API max is 100 + offset=offset, + fields="items(track(id,name,artists,album,external_urls,preview_url,duration_ms,explicit,popularity)),total,limit,offset" + ) + + logger.debug(f"Retrieved {len(tracks_data.get('items', []))} tracks for playlist {playlist_id} (offset: {offset})") + return tracks_data + + except Exception as e: + logger.error(f"Error fetching playlist tracks for {playlist_id}: {e}") + raise + +@_rate_limit_handler +def get_playlist_full(playlist_id: str, batch_size: int = 100) -> Dict[str, Any]: + """ + Get complete playlist data with all tracks, using batched requests to avoid rate limiting. + + Args: + playlist_id: The Spotify playlist ID + batch_size: Number of tracks to fetch per batch (max 100) + + Returns: + Complete playlist data with all tracks + """ + client = _get_spotify_client() + + try: + # First get metadata + playlist = get_playlist_metadata(playlist_id) + + # Get total track count + total_tracks = playlist.get('tracks', {}).get('total', 0) + + if total_tracks == 0: + playlist['tracks'] = {'items': [], 'total': 0} + return playlist + + # Fetch all tracks in batches + all_tracks = [] + offset = 0 + + while offset < total_tracks: + batch = get_playlist_tracks(playlist_id, limit=batch_size, offset=offset) + batch_items = batch.get('items', []) + all_tracks.extend(batch_items) + + offset += len(batch_items) + + # Add small delay between batches to be respectful to API + if offset < total_tracks: + time.sleep(0.1) + + # Update playlist with complete tracks data + playlist['tracks'] = { + 'items': all_tracks, + 'total': total_tracks, + 'limit': batch_size, + 'offset': 0 + } + playlist['_metadata_only'] = False + playlist['_tracks_loaded'] = True + + logger.info(f"Retrieved complete playlist {playlist_id} with {total_tracks} tracks") + return playlist + + except Exception as e: + logger.error(f"Error fetching complete playlist {playlist_id}: {e}") + raise + +def check_playlist_updated(playlist_id: str, last_snapshot_id: str) -> bool: + """ + Check if playlist has been updated by comparing snapshot_id. + This is much more efficient than fetching all tracks. + + Args: + playlist_id: The Spotify playlist ID + last_snapshot_id: The last known snapshot_id + + Returns: + True if playlist has been updated, False otherwise + """ + try: + metadata = get_playlist_metadata(playlist_id) + current_snapshot_id = metadata.get('snapshot_id') + + return current_snapshot_id != last_snapshot_id + + except Exception as e: + logger.error(f"Error checking playlist update status for {playlist_id}: {e}") + raise + +@_rate_limit_handler +def get_spotfy_info(spotify_id: str, spotify_type: str, limit: Optional[int] = None, offset: Optional[int] = None) -> Dict[str, Any]: + """ + Get info from Spotify API using Spotipy directly. + Optimized to prevent rate limiting by using appropriate endpoints. + + Args: + spotify_id: The Spotify ID of the entity + spotify_type: The type of entity (track, album, playlist, artist, artist_discography, episode) + limit (int, optional): The maximum number of items to return. Used for pagination. + offset (int, optional): The index of the first item to return. Used for pagination. + + Returns: + Dictionary with the entity information + """ + client = _get_spotify_client() + + try: + if spotify_type == "track": + return client.track(spotify_id) + + elif spotify_type == "album": + return client.album(spotify_id) + + elif spotify_type == "playlist": + # Use optimized playlist fetching + return get_playlist_full(spotify_id) + + elif spotify_type == "playlist_metadata": + # Get only metadata for playlists + return get_playlist_metadata(spotify_id) + + elif spotify_type == "artist": + return client.artist(spotify_id) + + elif spotify_type == "artist_discography": + # Get artist's albums with pagination + albums = client.artist_albums( + spotify_id, + limit=limit or 20, + offset=offset or 0 + ) + return albums + + elif spotify_type == "episode": + return client.episode(spotify_id) + else: - return Spo.get_artist_discography(spotify_id) - elif spotify_type == "artist": - return Spo.get_artist(spotify_id) - elif spotify_type == "episode": - return Spo.get_episode(spotify_id) + raise ValueError(f"Unsupported Spotify type: {spotify_type}") + + except Exception as e: + logger.error(f"Error fetching {spotify_type} {spotify_id}: {e}") + raise + +# Cache for playlist metadata to reduce API calls +_playlist_metadata_cache = {} +_cache_ttl = 300 # 5 minutes cache + +def get_cached_playlist_metadata(playlist_id: str) -> Optional[Dict[str, Any]]: + """ + Get playlist metadata from cache if available and not expired. + + Args: + playlist_id: The Spotify playlist ID + + Returns: + Cached metadata or None if not available/expired + """ + if playlist_id in _playlist_metadata_cache: + cached_data, timestamp = _playlist_metadata_cache[playlist_id] + if time.time() - timestamp < _cache_ttl: + return cached_data + + return None + +def cache_playlist_metadata(playlist_id: str, metadata: Dict[str, Any]): + """ + Cache playlist metadata with timestamp. + + Args: + playlist_id: The Spotify playlist ID + metadata: The metadata to cache + """ + _playlist_metadata_cache[playlist_id] = (metadata, time.time()) + +def get_playlist_info_optimized(playlist_id: str, include_tracks: bool = False) -> Dict[str, Any]: + """ + Optimized playlist info function that uses caching and selective loading. + + Args: + playlist_id: The Spotify playlist ID + include_tracks: Whether to include track data (default: False to save API calls) + + Returns: + Playlist data with or without tracks + """ + # Check cache first + cached_metadata = get_cached_playlist_metadata(playlist_id) + + if cached_metadata and not include_tracks: + logger.debug(f"Returning cached metadata for playlist {playlist_id}") + return cached_metadata + + if include_tracks: + # Get complete playlist data + playlist_data = get_playlist_full(playlist_id) + # Cache the metadata portion + metadata_only = {k: v for k, v in playlist_data.items() if k != 'tracks'} + metadata_only['_metadata_only'] = True + metadata_only['_tracks_loaded'] = False + cache_playlist_metadata(playlist_id, metadata_only) + return playlist_data else: - raise ValueError(f"Unsupported Spotify type: {spotify_type}") - + # Get metadata only + metadata = get_playlist_metadata(playlist_id) + cache_playlist_metadata(playlist_id, metadata) + return metadata +# Keep the existing Deezer functions unchanged def get_deezer_info(deezer_id, deezer_type, limit=None): """ Get info from Deezer API. diff --git a/routes/utils/search.py b/routes/utils/search.py index b96bb28..0f1423e 100755 --- a/routes/utils/search.py +++ b/routes/utils/search.py @@ -1,29 +1,57 @@ -from deezspot.easy_spoty import Spo +import spotipy +from spotipy.oauth2 import SpotifyClientCredentials import logging from routes.utils.credentials import get_credential, _get_global_spotify_api_creds +import time # Configure logger logger = logging.getLogger(__name__) +# Global Spotify client instance for reuse (same pattern as get_info.py) +_spotify_client = None +_last_client_init = 0 +_client_init_interval = 3600 # Reinitialize client every hour + +def _get_spotify_client(): + """ + Get or create a Spotify client with global credentials. + Implements client reuse and periodic reinitialization. + """ + global _spotify_client, _last_client_init + + current_time = time.time() + + # Reinitialize client if it's been more than an hour or if client doesn't exist + if (_spotify_client is None or + current_time - _last_client_init > _client_init_interval): + + client_id, client_secret = _get_global_spotify_api_creds() + + if not client_id or not client_secret: + raise ValueError( + "Global Spotify API client_id or client_secret not configured in ./data/creds/search.json." + ) + + # Create new client + _spotify_client = spotipy.Spotify( + client_credentials_manager=SpotifyClientCredentials( + client_id=client_id, + client_secret=client_secret + ) + ) + _last_client_init = current_time + logger.info("Spotify client initialized/reinitialized for search") + + return _spotify_client def search(query: str, search_type: str, limit: int = 3, main: str = None) -> dict: logger.info( f"Search requested: query='{query}', type={search_type}, limit={limit}, main_account_name={main}" - ) - - client_id, client_secret = _get_global_spotify_api_creds() - - if not client_id or not client_secret: - logger.error( - "Global Spotify API client_id or client_secret not configured in ./data/creds/search.json." - ) - raise ValueError( - "Spotify API credentials are not configured globally for search." ) if main: logger.debug( - f"Spotify account context '{main}' was provided for search. API keys are global, but this account might be used for other context by Spo if relevant." + f"Spotify account context '{main}' was provided for search. API keys are global, but this account might be used for other context." ) try: get_credential("spotify", main) @@ -41,14 +69,32 @@ def search(query: str, search_type: str, limit: int = 3, main: str = None) -> di "No specific 'main' account context provided for search. Using global API keys." ) - logger.debug("Initializing Spotify client with global API credentials for search.") - Spo.__init__(client_id, client_secret) + logger.debug("Getting Spotify client for search.") + client = _get_spotify_client() logger.debug( f"Executing Spotify search with query='{query}', type={search_type}, limit={limit}" ) try: - spotify_response = Spo.search(query=query, search_type=search_type, limit=limit) + # Map search types to Spotipy search types + search_type_map = { + 'track': 'track', + 'album': 'album', + 'artist': 'artist', + 'playlist': 'playlist', + 'episode': 'episode', + 'show': 'show' + } + + spotify_type = search_type_map.get(search_type.lower(), 'track') + + # Execute search using Spotipy + spotify_response = client.search( + q=query, + type=spotify_type, + limit=limit + ) + logger.info(f"Search completed successfully for query: '{query}'") return spotify_response except Exception as e: diff --git a/routes/utils/watch/db.py b/routes/utils/watch/db.py index f6e0e67..2919ee1 100644 --- a/routes/utils/watch/db.py +++ b/routes/utils/watch/db.py @@ -40,6 +40,7 @@ EXPECTED_PLAYLIST_TRACKS_COLUMNS = { "added_to_db": "INTEGER", "is_present_in_spotify": "INTEGER DEFAULT 1", "last_seen_in_spotify": "INTEGER", + "snapshot_id": "TEXT", # Track the snapshot_id when this track was added/updated } EXPECTED_WATCHED_ARTISTS_COLUMNS = { @@ -165,6 +166,11 @@ def init_playlists_db(): "watched playlists", ): conn.commit() + + # Update all existing playlist track tables with new schema + _update_all_playlist_track_tables(cursor) + conn.commit() + logger.info( f"Playlists database initialized/updated successfully at {PLAYLISTS_DB_PATH}" ) @@ -173,6 +179,87 @@ def init_playlists_db(): raise +def _update_all_playlist_track_tables(cursor: sqlite3.Cursor): + """Updates all existing playlist track tables to ensure they have the latest schema.""" + try: + # Get all table names that start with 'playlist_' + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'playlist_%'") + playlist_tables = cursor.fetchall() + + for table_row in playlist_tables: + table_name = table_row[0] + if _ensure_table_schema( + cursor, + table_name, + EXPECTED_PLAYLIST_TRACKS_COLUMNS, + f"playlist tracks ({table_name})", + ): + logger.info(f"Updated schema for existing playlist track table: {table_name}") + + except sqlite3.Error as e: + logger.error(f"Error updating playlist track tables schema: {e}", exc_info=True) + + +def update_all_existing_tables_schema(): + """Updates all existing tables to ensure they have the latest schema. Can be called independently.""" + try: + with _get_playlists_db_connection() as conn: + cursor = conn.cursor() + + # Update main watched_playlists table + if _ensure_table_schema( + cursor, + "watched_playlists", + EXPECTED_WATCHED_PLAYLISTS_COLUMNS, + "watched playlists", + ): + logger.info("Updated schema for watched_playlists table") + + # Update all playlist track tables + _update_all_playlist_track_tables(cursor) + + conn.commit() + logger.info("Successfully updated all existing tables schema in playlists database") + + except sqlite3.Error as e: + logger.error(f"Error updating existing tables schema: {e}", exc_info=True) + raise + + +def ensure_playlist_table_schema(playlist_spotify_id: str): + """Ensures a specific playlist's track table has the latest schema.""" + table_name = f"playlist_{playlist_spotify_id.replace('-', '_')}" + try: + with _get_playlists_db_connection() as conn: + cursor = conn.cursor() + + # Check if table exists + cursor.execute( + f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';" + ) + if cursor.fetchone() is None: + logger.warning(f"Table {table_name} does not exist. Cannot update schema.") + return False + + # Update schema + if _ensure_table_schema( + cursor, + table_name, + EXPECTED_PLAYLIST_TRACKS_COLUMNS, + f"playlist tracks ({playlist_spotify_id})", + ): + conn.commit() + logger.info(f"Updated schema for playlist track table: {table_name}") + return True + else: + logger.info(f"Schema already up-to-date for playlist track table: {table_name}") + return True + + except sqlite3.Error as e: + logger.error(f"Error updating schema for playlist {playlist_spotify_id}: {e}", exc_info=True) + return False + + def _create_playlist_tracks_table(playlist_spotify_id: str): """Creates or updates a table for a specific playlist to store its tracks in playlists.db.""" table_name = f"playlist_{playlist_spotify_id.replace('-', '_').replace(' ', '_')}" # Sanitize table name @@ -192,7 +279,8 @@ def _create_playlist_tracks_table(playlist_spotify_id: str): added_at_playlist TEXT, -- When track was added to Spotify playlist added_to_db INTEGER, -- Timestamp when track was added to this DB table is_present_in_spotify INTEGER DEFAULT 1, -- Flag to mark if still in Spotify playlist - last_seen_in_spotify INTEGER -- Timestamp when last confirmed in Spotify playlist + last_seen_in_spotify INTEGER, -- Timestamp when last confirmed in Spotify playlist + snapshot_id TEXT -- Track the snapshot_id when this track was added/updated ) """) # Ensure schema @@ -218,6 +306,10 @@ def add_playlist_to_watch(playlist_data: dict): """Adds a playlist to the watched_playlists table and creates its tracks table in playlists.db.""" try: _create_playlist_tracks_table(playlist_data["id"]) + + # Construct Spotify URL manually since external_urls might not be present in metadata + spotify_url = f"https://open.spotify.com/playlist/{playlist_data['id']}" + with _get_playlists_db_connection() as conn: # Use playlists connection cursor = conn.cursor() cursor.execute( @@ -234,7 +326,7 @@ def add_playlist_to_watch(playlist_data: dict): "display_name", playlist_data["owner"]["id"] ), playlist_data["tracks"]["total"], - playlist_data["external_urls"]["spotify"], + spotify_url, # Use constructed URL instead of external_urls playlist_data.get("snapshot_id"), int(time.time()), int(time.time()), @@ -363,11 +455,91 @@ def get_playlist_track_ids_from_db(playlist_spotify_id: str): return track_ids -def add_tracks_to_playlist_db(playlist_spotify_id: str, tracks_data: list): +def get_playlist_tracks_with_snapshot_from_db(playlist_spotify_id: str): + """Retrieves all tracks with their snapshot_ids from a specific playlist's tracks table in playlists.db.""" + table_name = f"playlist_{playlist_spotify_id.replace('-', '_')}" + tracks_data = {} + try: + with _get_playlists_db_connection() as conn: # Use playlists connection + cursor = conn.cursor() + cursor.execute( + f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';" + ) + if cursor.fetchone() is None: + logger.warning( + f"Track table {table_name} does not exist in {PLAYLISTS_DB_PATH}. Cannot fetch track data." + ) + return tracks_data + + # Ensure the table has the latest schema before querying + _ensure_table_schema( + cursor, + table_name, + EXPECTED_PLAYLIST_TRACKS_COLUMNS, + f"playlist tracks ({playlist_spotify_id})", + ) + + cursor.execute( + f"SELECT spotify_track_id, snapshot_id, title FROM {table_name} WHERE is_present_in_spotify = 1" + ) + rows = cursor.fetchall() + for row in rows: + tracks_data[row["spotify_track_id"]] = { + "snapshot_id": row["snapshot_id"], + "title": row["title"] + } + return tracks_data + except sqlite3.Error as e: + logger.error( + f"Error retrieving track data for playlist {playlist_spotify_id} from table {table_name} in {PLAYLISTS_DB_PATH}: {e}", + exc_info=True, + ) + return tracks_data + + +def get_playlist_total_tracks_from_db(playlist_spotify_id: str) -> int: + """Retrieves the total number of tracks in the database for a specific playlist.""" + table_name = f"playlist_{playlist_spotify_id.replace('-', '_')}" + try: + with _get_playlists_db_connection() as conn: # Use playlists connection + cursor = conn.cursor() + cursor.execute( + f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';" + ) + if cursor.fetchone() is None: + return 0 + + # Ensure the table has the latest schema before querying + _ensure_table_schema( + cursor, + table_name, + EXPECTED_PLAYLIST_TRACKS_COLUMNS, + f"playlist tracks ({playlist_spotify_id})", + ) + + cursor.execute( + f"SELECT COUNT(*) as count FROM {table_name} WHERE is_present_in_spotify = 1" + ) + row = cursor.fetchone() + return row["count"] if row else 0 + except sqlite3.Error as e: + logger.error( + f"Error retrieving track count for playlist {playlist_spotify_id} from table {table_name} in {PLAYLISTS_DB_PATH}: {e}", + exc_info=True, + ) + return 0 + + +def add_tracks_to_playlist_db(playlist_spotify_id: str, tracks_data: list, snapshot_id: str = None): """ Updates existing tracks in the playlist's DB table to mark them as currently present - in Spotify and updates their last_seen timestamp. Also refreshes metadata. + in Spotify and updates their last_seen timestamp and snapshot_id. Also refreshes metadata. Does NOT insert new tracks. New tracks are only added upon successful download. + + Args: + playlist_spotify_id: The Spotify playlist ID + tracks_data: List of track items from Spotify API + snapshot_id: The current snapshot_id for this playlist update """ table_name = f"playlist_{playlist_spotify_id.replace('-', '_')}" if not tracks_data: @@ -401,7 +573,7 @@ def add_tracks_to_playlist_db(playlist_spotify_id: str, tracks_data: list): # Prepare tuple for UPDATE statement. # Order: title, artist_names, album_name, album_artist_names, track_number, # album_spotify_id, duration_ms, added_at_playlist, - # is_present_in_spotify, last_seen_in_spotify, spotify_track_id (for WHERE) + # is_present_in_spotify, last_seen_in_spotify, snapshot_id, spotify_track_id (for WHERE) tracks_to_update.append( ( track.get("name", "N/A"), @@ -414,7 +586,7 @@ def add_tracks_to_playlist_db(playlist_spotify_id: str, tracks_data: list): track_item.get("added_at"), # From playlist item, update if changed 1, # is_present_in_spotify flag current_time, # last_seen_in_spotify timestamp - # added_to_db is NOT updated here as this function only updates existing records. + snapshot_id, # Update snapshot_id for this track track["id"], # spotify_track_id for the WHERE clause ) ) @@ -446,7 +618,8 @@ def add_tracks_to_playlist_db(playlist_spotify_id: str, tracks_data: list): duration_ms = ?, added_at_playlist = ?, is_present_in_spotify = ?, - last_seen_in_spotify = ? + last_seen_in_spotify = ?, + snapshot_id = ? WHERE spotify_track_id = ? """, tracks_to_update, @@ -611,7 +784,7 @@ def remove_specific_tracks_from_playlist_table( return 0 -def add_single_track_to_playlist_db(playlist_spotify_id: str, track_item_for_db: dict): +def add_single_track_to_playlist_db(playlist_spotify_id: str, track_item_for_db: dict, snapshot_id: str = None): """Adds or updates a single track in the specified playlist's tracks table in playlists.db.""" table_name = f"playlist_{playlist_spotify_id.replace('-', '_')}" track_detail = track_item_for_db.get("track") @@ -646,6 +819,7 @@ def add_single_track_to_playlist_db(playlist_spotify_id: str, track_item_for_db: current_time, 1, current_time, + snapshot_id, # Add snapshot_id to the tuple ) try: with _get_playlists_db_connection() as conn: # Use playlists connection @@ -654,8 +828,8 @@ def add_single_track_to_playlist_db(playlist_spotify_id: str, track_item_for_db: cursor.execute( f""" INSERT OR REPLACE INTO {table_name} - (spotify_track_id, title, artist_names, album_name, album_artist_names, track_number, album_spotify_id, duration_ms, added_at_playlist, added_to_db, is_present_in_spotify, last_seen_in_spotify) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + (spotify_track_id, title, artist_names, album_name, album_artist_names, track_number, album_spotify_id, duration_ms, added_at_playlist, added_to_db, is_present_in_spotify, last_seen_in_spotify, snapshot_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, track_data_tuple, ) diff --git a/routes/utils/watch/manager.py b/routes/utils/watch/manager.py index ed93ff9..baa0f49 100644 --- a/routes/utils/watch/manager.py +++ b/routes/utils/watch/manager.py @@ -9,9 +9,13 @@ from routes.utils.watch.db import ( get_watched_playlists, get_watched_playlist, get_playlist_track_ids_from_db, + get_playlist_tracks_with_snapshot_from_db, + get_playlist_total_tracks_from_db, add_tracks_to_playlist_db, update_playlist_snapshot, mark_tracks_as_not_present_in_spotify, + update_all_existing_tables_schema, + ensure_playlist_table_schema, # Artist watch DB functions get_watched_artists, get_watched_artist, @@ -20,6 +24,9 @@ from routes.utils.watch.db import ( ) from routes.utils.get_info import ( get_spotify_info, + get_playlist_metadata, + get_playlist_tracks, + check_playlist_updated, ) # To fetch playlist, track, artist, and album details from routes.utils.celery_queue_manager import download_queue_manager @@ -34,6 +41,7 @@ DEFAULT_WATCH_CONFIG = { "watchedArtistAlbumGroup": ["album", "single"], # Default for artists "delay_between_playlists_seconds": 2, "delay_between_artists_seconds": 5, # Added for artists + "use_snapshot_id_checking": True, # Enable snapshot_id checking for efficiency } @@ -82,6 +90,152 @@ def construct_spotify_url(item_id, item_type="track"): return f"https://open.spotify.com/{item_type}/{item_id}" +def has_playlist_changed(playlist_spotify_id: str, current_snapshot_id: str) -> bool: + """ + Check if a playlist has changed by comparing snapshot_id. + This is much more efficient than fetching all tracks. + + Args: + playlist_spotify_id: The Spotify playlist ID + current_snapshot_id: The current snapshot_id from API + + Returns: + True if playlist has changed, False otherwise + """ + try: + db_playlist = get_watched_playlist(playlist_spotify_id) + if not db_playlist: + # Playlist not in database, consider it as "changed" to trigger initial processing + return True + + last_snapshot_id = db_playlist.get("snapshot_id") + if not last_snapshot_id: + # No previous snapshot_id, consider it as "changed" to trigger initial processing + return True + + return current_snapshot_id != last_snapshot_id + + except Exception as e: + logger.error(f"Error checking playlist change status for {playlist_spotify_id}: {e}") + # On error, assume playlist has changed to be safe + return True + + +def needs_track_sync(playlist_spotify_id: str, current_snapshot_id: str, api_total_tracks: int) -> tuple[bool, list[str]]: + """ + Check if tracks need to be synchronized by comparing snapshot_ids and total counts. + + Args: + playlist_spotify_id: The Spotify playlist ID + current_snapshot_id: The current snapshot_id from API + api_total_tracks: The total number of tracks reported by API + + Returns: + Tuple of (needs_sync, tracks_to_find) where: + - needs_sync: True if tracks need to be synchronized + - tracks_to_find: List of track IDs that need to be found in API response + """ + try: + # Get tracks from database with their snapshot_ids + db_tracks = get_playlist_tracks_with_snapshot_from_db(playlist_spotify_id) + db_total_tracks = get_playlist_total_tracks_from_db(playlist_spotify_id) + + # Check if total count matches + if db_total_tracks != api_total_tracks: + logger.info( + f"Track count mismatch for playlist {playlist_spotify_id}: DB={db_total_tracks}, API={api_total_tracks}. Full sync needed to ensure all tracks are captured." + ) + # Always do full sync when counts don't match to ensure we don't miss any tracks + # This handles cases like: + # - Empty database (DB=0, API=1345) + # - Missing tracks (DB=1000, API=1345) + # - Removed tracks (DB=1345, API=1000) + return True, [] # Empty list indicates full sync needed + + # Check if any tracks have different snapshot_id + tracks_to_find = [] + for track_id, track_data in db_tracks.items(): + if track_data.get("snapshot_id") != current_snapshot_id: + tracks_to_find.append(track_id) + + if tracks_to_find: + logger.info( + f"Found {len(tracks_to_find)} tracks with outdated snapshot_id for playlist {playlist_spotify_id}" + ) + return True, tracks_to_find + + return False, [] + + except Exception as e: + logger.error(f"Error checking track sync status for {playlist_spotify_id}: {e}") + # On error, assume sync is needed to be safe + return True, [] + + +def find_tracks_in_playlist(playlist_spotify_id: str, tracks_to_find: list[str], current_snapshot_id: str) -> tuple[list, list]: + """ + Progressively fetch playlist tracks until all specified tracks are found or playlist is exhausted. + + Args: + playlist_spotify_id: The Spotify playlist ID + tracks_to_find: List of track IDs to find + current_snapshot_id: The current snapshot_id + + Returns: + Tuple of (found_tracks, not_found_tracks) where: + - found_tracks: List of track items that were found + - not_found_tracks: List of track IDs that were not found + """ + found_tracks = [] + not_found_tracks = tracks_to_find.copy() + offset = 0 + limit = 100 + + logger.info( + f"Searching for {len(tracks_to_find)} tracks in playlist {playlist_spotify_id} starting from offset {offset}" + ) + + while not_found_tracks and offset < 10000: # Safety limit + try: + tracks_batch = get_playlist_tracks(playlist_spotify_id, limit=limit, offset=offset) + + if not tracks_batch or "items" not in tracks_batch: + logger.warning(f"No tracks returned for playlist {playlist_spotify_id} at offset {offset}") + break + + batch_items = tracks_batch.get("items", []) + if not batch_items: + logger.info(f"No more tracks found at offset {offset}") + break + + # Check each track in this batch + for track_item in batch_items: + track = track_item.get("track") + if track and track.get("id") and not track.get("is_local"): + track_id = track["id"] + if track_id in not_found_tracks: + found_tracks.append(track_item) + not_found_tracks.remove(track_id) + logger.debug(f"Found track {track_id} at offset {offset}") + + offset += len(batch_items) + + # Add small delay between batches + time.sleep(0.1) + + except Exception as e: + logger.error(f"Error fetching tracks batch for playlist {playlist_spotify_id} at offset {offset}: {e}") + break + + logger.info( + f"Track search complete for playlist {playlist_spotify_id}: " + f"Found {len(found_tracks)}/{len(tracks_to_find)} tracks, " + f"Not found: {len(not_found_tracks)}" + ) + + return found_tracks, not_found_tracks + + def check_watched_playlists(specific_playlist_id: str = None): """Checks watched playlists for new tracks and queues downloads. If specific_playlist_id is provided, only that playlist is checked. @@ -90,6 +244,7 @@ def check_watched_playlists(specific_playlist_id: str = None): f"Playlist Watch Manager: Starting check. Specific playlist: {specific_playlist_id or 'All'}" ) config = get_watch_config() + use_snapshot_checking = config.get("use_snapshot_id_checking", True) if specific_playlist_id: playlist_obj = get_watched_playlist(specific_playlist_id) @@ -114,56 +269,115 @@ def check_watched_playlists(specific_playlist_id: str = None): ) try: - # For playlists, we fetch all tracks in one go usually (Spotify API limit permitting) - current_playlist_data_from_api = get_spotify_info( - playlist_spotify_id, "playlist" - ) - if ( - not current_playlist_data_from_api - or "tracks" not in current_playlist_data_from_api - ): + # Ensure the playlist's track table has the latest schema before processing + ensure_playlist_table_schema(playlist_spotify_id) + + # First, get playlist metadata to check if it has changed + current_playlist_metadata = get_playlist_metadata(playlist_spotify_id) + if not current_playlist_metadata: logger.error( - f"Playlist Watch Manager: Failed to fetch data or tracks from Spotify for playlist {playlist_spotify_id}." + f"Playlist Watch Manager: Failed to fetch metadata from Spotify for playlist {playlist_spotify_id}." ) continue - api_snapshot_id = current_playlist_data_from_api.get("snapshot_id") - api_total_tracks = current_playlist_data_from_api.get("tracks", {}).get( - "total", 0 - ) + api_snapshot_id = current_playlist_metadata.get("snapshot_id") + api_total_tracks = current_playlist_metadata.get("tracks", {}).get("total", 0) + + # Enhanced snapshot_id checking with track-level tracking + if use_snapshot_checking: + # First check if playlist snapshot_id has changed + playlist_changed = has_playlist_changed(playlist_spotify_id, api_snapshot_id) + + if not playlist_changed: + # Even if playlist snapshot_id hasn't changed, check if individual tracks need sync + needs_sync, tracks_to_find = needs_track_sync(playlist_spotify_id, api_snapshot_id, api_total_tracks) + + if not needs_sync: + logger.info( + f"Playlist Watch Manager: Playlist '{playlist_name}' ({playlist_spotify_id}) has not changed since last check (snapshot_id: {api_snapshot_id}). Skipping detailed check." + ) + continue + else: + if not tracks_to_find: + # Empty tracks_to_find means full sync is needed (track count mismatch detected) + logger.info( + f"Playlist Watch Manager: Playlist '{playlist_name}' snapshot_id unchanged, but full sync needed due to track count mismatch. Proceeding with full check." + ) + # Continue to full sync below + else: + logger.info( + f"Playlist Watch Manager: Playlist '{playlist_name}' snapshot_id unchanged, but {len(tracks_to_find)} tracks need sync. Proceeding with targeted check." + ) + # Use targeted track search instead of full fetch + found_tracks, not_found_tracks = find_tracks_in_playlist(playlist_spotify_id, tracks_to_find, api_snapshot_id) + + # Update found tracks with new snapshot_id + if found_tracks: + add_tracks_to_playlist_db(playlist_spotify_id, found_tracks, api_snapshot_id) + + # Mark not found tracks as removed + if not_found_tracks: + logger.info( + f"Playlist Watch Manager: {len(not_found_tracks)} tracks not found in playlist '{playlist_name}'. Marking as removed." + ) + mark_tracks_as_not_present_in_spotify(playlist_spotify_id, not_found_tracks) - # Paginate through playlist tracks if necessary + # Update playlist snapshot and continue to next playlist + update_playlist_snapshot(playlist_spotify_id, api_snapshot_id, api_total_tracks) + logger.info( + f"Playlist Watch Manager: Finished targeted sync for playlist '{playlist_name}'. Snapshot ID updated to {api_snapshot_id}." + ) + continue + else: + logger.info( + f"Playlist Watch Manager: Playlist '{playlist_name}' has changed. New snapshot_id: {api_snapshot_id}. Proceeding with full check." + ) + else: + logger.info( + f"Playlist Watch Manager: Snapshot checking disabled. Proceeding with full check for playlist '{playlist_name}'." + ) + + # Fetch all tracks using the optimized function + # This happens when: + # 1. Playlist snapshot_id has changed (full sync needed) + # 2. Snapshot checking is disabled (full sync always) + # 3. Database is empty but API has tracks (full sync needed) + logger.info( + f"Playlist Watch Manager: Fetching all tracks for playlist '{playlist_name}' ({playlist_spotify_id}) with {api_total_tracks} total tracks." + ) + all_api_track_items = [] offset = 0 - limit = 50 # Spotify API limit for playlist items - - while True: - # Re-fetch with pagination if tracks.next is present, or on first call. - # get_spotify_info for playlist should ideally handle pagination internally if asked for all tracks. - # Assuming get_spotify_info for playlist returns all items or needs to be called iteratively. - # For simplicity, let's assume current_playlist_data_from_api has 'tracks' -> 'items' for the first page. - # And that get_spotify_info with 'playlist' type can take offset. - # Modifying get_spotify_info is outside current scope, so we'll assume it returns ALL items for a playlist. - # If it doesn't, this part would need adjustment for robust pagination. - # For now, we use the items from the initial fetch. - - paginated_playlist_data = get_spotify_info( - playlist_spotify_id, "playlist", offset=offset, limit=limit - ) - if ( - not paginated_playlist_data - or "tracks" not in paginated_playlist_data - ): + limit = 100 # Use maximum batch size for efficiency + + while offset < api_total_tracks: + try: + # Use the optimized get_playlist_tracks function + tracks_batch = get_playlist_tracks( + playlist_spotify_id, limit=limit, offset=offset + ) + + if not tracks_batch or "items" not in tracks_batch: + logger.warning( + f"Playlist Watch Manager: No tracks returned for playlist {playlist_spotify_id} at offset {offset}" + ) break - page_items = paginated_playlist_data.get("tracks", {}).get("items", []) - if not page_items: - break - all_api_track_items.extend(page_items) - - if paginated_playlist_data.get("tracks", {}).get("next"): - offset += limit - else: + batch_items = tracks_batch.get("items", []) + if not batch_items: + break + + all_api_track_items.extend(batch_items) + offset += len(batch_items) + + # Add small delay between batches to be respectful to API + if offset < api_total_tracks: + time.sleep(0.1) + + except Exception as e: + logger.error( + f"Playlist Watch Manager: Error fetching tracks batch for playlist {playlist_spotify_id} at offset {offset}: {e}" + ) break current_api_track_ids = set() @@ -237,14 +451,14 @@ def check_watched_playlists(specific_playlist_id: str = None): # Update DB for tracks that are still present in API (e.g. update 'last_seen_in_spotify') # add_tracks_to_playlist_db handles INSERT OR REPLACE, updating existing entries. - # We should pass all current API tracks to ensure their `last_seen_in_spotify` and `is_present_in_spotify` are updated. + # We should pass all current API tracks to ensure their `last_seen_in_spotify`, `is_present_in_spotify`, and `snapshot_id` are updated. if ( all_api_track_items ): # If there are any tracks in the API for this playlist logger.info( f"Playlist Watch Manager: Refreshing {len(all_api_track_items)} tracks from API in local DB for playlist '{playlist_name}'." ) - add_tracks_to_playlist_db(playlist_spotify_id, all_api_track_items) + add_tracks_to_playlist_db(playlist_spotify_id, all_api_track_items, api_snapshot_id) removed_db_ids = db_track_ids - current_api_track_ids if removed_db_ids: @@ -259,7 +473,7 @@ def check_watched_playlists(specific_playlist_id: str = None): playlist_spotify_id, api_snapshot_id, api_total_tracks ) # api_total_tracks from initial fetch logger.info( - f"Playlist Watch Manager: Finished checking playlist '{playlist_name}'. Snapshot ID updated. API Total Tracks: {api_total_tracks}." + f"Playlist Watch Manager: Finished checking playlist '{playlist_name}'. Snapshot ID updated to {api_snapshot_id}. API Total Tracks: {api_total_tracks}. Queued {queued_for_download_count} new tracks." ) except Exception as e: @@ -309,17 +523,16 @@ def check_watched_artists(specific_artist_id: str = None): ) try: - # Spotify API for artist albums is paginated. - # We need to fetch all albums. get_spotify_info with type 'artist-albums' should handle this. - # Let's assume get_spotify_info(artist_id, 'artist-albums') returns a list of all album objects. - # Or we implement pagination here. - + # Use the optimized artist discography function with pagination all_artist_albums_from_api: List[Dict[str, Any]] = [] offset = 0 limit = 50 # Spotify API limit for artist albums + + logger.info( + f"Artist Watch Manager: Fetching albums for artist '{artist_name}' ({artist_spotify_id})" + ) + while True: - # The 'artist-albums' type for get_spotify_info needs to support pagination params. - # And return a list of album objects. logger.debug( f"Artist Watch Manager: Fetching albums for {artist_spotify_id}. Limit: {limit}, Offset: {offset}" ) @@ -560,6 +773,13 @@ def start_watch_manager(): # Renamed from start_playlist_watch_manager init_playlists_db() # For playlists init_artists_db() # For artists + + # Update all existing tables to ensure they have the latest schema + try: + update_all_existing_tables_schema() + logger.info("Watch Manager: Successfully updated all existing tables schema") + except Exception as e: + logger.error(f"Watch Manager: Error updating existing tables schema: {e}", exc_info=True) _watch_scheduler_thread = threading.Thread( target=playlist_watch_scheduler, daemon=True @@ -585,7 +805,3 @@ def stop_watch_manager(): # Renamed from stop_playlist_watch_manager _watch_scheduler_thread = None else: logger.info("Watch Manager: Background scheduler not running.") - - -# If this module is imported, and you want to auto-start the manager, you could call start_watch_manager() here. -# However, it's usually better to explicitly start it from the main application/__init__.py. diff --git a/spotizerr-ui/src/routes/playlist.tsx b/spotizerr-ui/src/routes/playlist.tsx index 4444233..7cbcefa 100644 --- a/spotizerr-ui/src/routes/playlist.tsx +++ b/spotizerr-ui/src/routes/playlist.tsx @@ -1,34 +1,46 @@ import { Link, useParams } from "@tanstack/react-router"; -import { useEffect, useState, useContext } from "react"; +import { useEffect, useState, useContext, useRef, useCallback } from "react"; import apiClient from "../lib/api-client"; import { useSettings } from "../contexts/settings-context"; import { toast } from "sonner"; -import type { PlaylistType, TrackType } from "../types/spotify"; +import type { PlaylistType, TrackType, PlaylistMetadataType, PlaylistTracksResponseType, PlaylistItemType } from "../types/spotify"; import { QueueContext } from "../contexts/queue-context"; import { FaArrowLeft } from "react-icons/fa"; import { FaDownload } from "react-icons/fa6"; + + export const Playlist = () => { const { playlistId } = useParams({ from: "/playlist/$playlistId" }); - const [playlist, setPlaylist] = useState(null); + const [playlistMetadata, setPlaylistMetadata] = useState(null); + const [tracks, setTracks] = useState([]); const [isWatched, setIsWatched] = useState(false); const [error, setError] = useState(null); + const [loadingTracks, setLoadingTracks] = useState(false); + const [hasMoreTracks, setHasMoreTracks] = useState(true); + const [tracksOffset, setTracksOffset] = useState(0); + const [totalTracks, setTotalTracks] = useState(0); + const context = useContext(QueueContext); const { settings } = useSettings(); + const observerRef = useRef(null); + const loadingRef = useRef(null); if (!context) { throw new Error("useQueue must be used within a QueueProvider"); } const { addItem } = context; + // Load playlist metadata first useEffect(() => { - const fetchPlaylist = async () => { + const fetchPlaylistMetadata = async () => { if (!playlistId) return; try { - const response = await apiClient.get(`/playlist/info?id=${playlistId}`); - setPlaylist(response.data); + const response = await apiClient.get(`/playlist/metadata?id=${playlistId}`); + setPlaylistMetadata(response.data); + setTotalTracks(response.data.tracks.total); } catch (err) { - setError("Failed to load playlist"); + setError("Failed to load playlist metadata"); console.error(err); } }; @@ -45,10 +57,76 @@ export const Playlist = () => { } }; - fetchPlaylist(); + fetchPlaylistMetadata(); checkWatchStatus(); }, [playlistId]); + // Load tracks progressively + const loadMoreTracks = useCallback(async () => { + if (!playlistId || loadingTracks || !hasMoreTracks) return; + + setLoadingTracks(true); + try { + const limit = 50; // Load 50 tracks at a time + const response = await apiClient.get( + `/playlist/tracks?id=${playlistId}&limit=${limit}&offset=${tracksOffset}` + ); + + const newTracks = response.data.items; + setTracks(prev => [...prev, ...newTracks]); + setTracksOffset(prev => prev + newTracks.length); + + // Check if we've loaded all tracks + if (tracksOffset + newTracks.length >= totalTracks) { + setHasMoreTracks(false); + } + } catch (err) { + console.error("Failed to load tracks:", err); + toast.error("Failed to load more tracks"); + } finally { + setLoadingTracks(false); + } + }, [playlistId, loadingTracks, hasMoreTracks, tracksOffset, totalTracks]); + + // Intersection Observer for infinite scroll + useEffect(() => { + const observer = new IntersectionObserver( + (entries) => { + if (entries[0].isIntersecting && hasMoreTracks && !loadingTracks) { + loadMoreTracks(); + } + }, + { threshold: 0.1 } + ); + + if (loadingRef.current) { + observer.observe(loadingRef.current); + } + + observerRef.current = observer; + + return () => { + if (observerRef.current) { + observerRef.current.disconnect(); + } + }; + }, [loadMoreTracks, hasMoreTracks, loadingTracks]); + + // Load initial tracks when metadata is loaded + useEffect(() => { + if (playlistMetadata && tracks.length === 0 && totalTracks > 0) { + loadMoreTracks(); + } + }, [playlistMetadata, tracks.length, totalTracks, loadMoreTracks]); + + // Reset state when playlist ID changes + useEffect(() => { + setTracks([]); + setTracksOffset(0); + setHasMoreTracks(true); + setTotalTracks(0); + }, [playlistId]); + const handleDownloadTrack = (track: TrackType) => { if (!track?.id) return; addItem({ spotifyId: track.id, type: "track", name: track.name }); @@ -56,13 +134,13 @@ export const Playlist = () => { }; const handleDownloadPlaylist = () => { - if (!playlist) return; + if (!playlistMetadata) return; addItem({ - spotifyId: playlist.id, + spotifyId: playlistMetadata.id, type: "playlist", - name: playlist.name, + name: playlistMetadata.name, }); - toast.info(`Adding ${playlist.name} to queue...`); + toast.info(`Adding ${playlistMetadata.name} to queue...`); }; const handleToggleWatch = async () => { @@ -70,10 +148,10 @@ export const Playlist = () => { try { if (isWatched) { await apiClient.delete(`/playlist/watch/${playlistId}`); - toast.success(`Removed ${playlist?.name} from watchlist.`); + toast.success(`Removed ${playlistMetadata?.name} from watchlist.`); } else { await apiClient.put(`/playlist/watch/${playlistId}`); - toast.success(`Added ${playlist?.name} to watchlist.`); + toast.success(`Added ${playlistMetadata?.name} to watchlist.`); } setIsWatched(!isWatched); } catch (err) { @@ -86,11 +164,11 @@ export const Playlist = () => { return
{error}
; } - if (!playlist) { - return
Loading...
; + if (!playlistMetadata) { + return
Loading playlist...
; } - const filteredTracks = playlist.tracks.items.filter(({ track }) => { + const filteredTracks = tracks.filter(({ track }) => { if (!track) return false; if (settings?.explicitFilter && track.explicit) return false; return true; @@ -107,19 +185,23 @@ export const Playlist = () => { Back to results + + {/* Playlist Header */}
{playlist.name}
-

{playlist.name}

- {playlist.description &&

{playlist.description}

} +

{playlistMetadata.name}

+ {playlistMetadata.description && ( +

{playlistMetadata.description}

+ )}

- By {playlist.owner.display_name} • {playlist.followers.total.toLocaleString()} followers •{" "} - {playlist.tracks.total} songs + By {playlistMetadata.owner.display_name} • {playlistMetadata.followers.total.toLocaleString()} followers •{" "} + {totalTracks} songs

@@ -149,8 +231,17 @@ export const Playlist = () => {
+ {/* Tracks Section */}
+

Tracks

+ {tracks.length > 0 && ( + + Showing {tracks.length} of {totalTracks} tracks + + )} +
+
{filteredTracks.map(({ track }, index) => { if (!track) return null; @@ -198,6 +289,25 @@ export const Playlist = () => {
); })} + + {/* Loading indicator */} + {loadingTracks && ( +
+
+
+ )} + + {/* Intersection observer target */} + {hasMoreTracks && ( +
+ )} + + {/* End of tracks indicator */} + {!hasMoreTracks && tracks.length > 0 && ( +
+ All tracks loaded +
+ )}
diff --git a/spotizerr-ui/src/types/spotify.ts b/spotizerr-ui/src/types/spotify.ts index 9cecaf8..79e6877 100644 --- a/spotizerr-ui/src/types/spotify.ts +++ b/spotizerr-ui/src/types/spotify.ts @@ -50,6 +50,7 @@ export interface PlaylistItemType { added_at: string; is_local: boolean; track: TrackType | null; + is_locally_known?: boolean; } export interface PlaylistOwnerType { @@ -57,6 +58,31 @@ export interface PlaylistOwnerType { display_name: string; } +// New interface for playlist metadata only (no tracks) +export interface PlaylistMetadataType { + id: string; + name: string; + description: string | null; + images: ImageType[]; + tracks: { + total: number; + }; + owner: PlaylistOwnerType; + followers: { + total: number; + }; + _metadata_only: boolean; + _tracks_loaded: boolean; +} + +// New interface for playlist tracks response +export interface PlaylistTracksResponseType { + items: PlaylistItemType[]; + total: number; + limit: number; + offset: number; +} + export interface PlaylistType { id: string; name: string;