1099 lines
48 KiB
Python
1099 lines
48 KiB
Python
import time
|
|
import threading
|
|
import logging
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any, List, Dict
|
|
|
|
from routes.utils.watch.db import (
|
|
get_watched_playlists,
|
|
get_watched_playlist,
|
|
get_playlist_track_ids_from_db,
|
|
get_playlist_tracks_with_snapshot_from_db,
|
|
get_playlist_total_tracks_from_db,
|
|
add_tracks_to_playlist_db,
|
|
update_playlist_snapshot,
|
|
mark_tracks_as_not_present_in_spotify,
|
|
update_all_existing_tables_schema,
|
|
ensure_playlist_table_schema,
|
|
# Artist watch DB functions
|
|
get_watched_artists,
|
|
get_watched_artist,
|
|
get_artist_album_ids_from_db,
|
|
update_artist_metadata_after_check, # Renamed from update_artist_metadata
|
|
)
|
|
from routes.utils.get_info import (
|
|
get_spotify_info,
|
|
get_playlist_metadata,
|
|
get_playlist_tracks,
|
|
) # To fetch playlist, track, artist, and album details
|
|
from routes.utils.celery_queue_manager import download_queue_manager
|
|
|
|
logger = logging.getLogger(__name__)
|
|
MAIN_CONFIG_FILE_PATH = Path("./data/config/main.json")
|
|
WATCH_OLD_FILE_PATH = Path("./data/config/watch.json")
|
|
STOP_EVENT = threading.Event()
|
|
|
|
|
|
DEFAULT_WATCH_CONFIG = {
|
|
"enabled": False,
|
|
"watchPollIntervalSeconds": 3600,
|
|
"maxTracksPerRun": 50,
|
|
"watchedArtistAlbumGroup": ["album", "single"],
|
|
"delayBetweenPlaylistsSeconds": 2,
|
|
"delayBetweenArtistsSeconds": 5,
|
|
"useSnapshotIdChecking": True,
|
|
}
|
|
|
|
|
|
def get_watch_config():
|
|
"""Loads the watch configuration from main.json's 'watch' key (camelCase).
|
|
Applies defaults and migrates legacy snake_case keys if found.
|
|
"""
|
|
try:
|
|
MAIN_CONFIG_FILE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
if not MAIN_CONFIG_FILE_PATH.exists():
|
|
# Create main config with default watch block
|
|
with open(MAIN_CONFIG_FILE_PATH, "w") as f:
|
|
json.dump({"watch": DEFAULT_WATCH_CONFIG}, f, indent=2)
|
|
return DEFAULT_WATCH_CONFIG.copy()
|
|
|
|
with open(MAIN_CONFIG_FILE_PATH, "r") as f:
|
|
main_cfg = json.load(f) or {}
|
|
|
|
watch_cfg = main_cfg.get("watch", {}) or {}
|
|
|
|
# Detect legacy watch.json and migrate it into main.json's watch key
|
|
legacy_file_found = False
|
|
legacy_migrated_ok = False
|
|
if WATCH_OLD_FILE_PATH.exists():
|
|
try:
|
|
with open(WATCH_OLD_FILE_PATH, "r") as wf:
|
|
legacy_watch = json.load(wf) or {}
|
|
# Map legacy snake_case keys to camelCase
|
|
legacy_to_camel_watch = {
|
|
"enabled": "enabled",
|
|
"watchPollIntervalSeconds": "watchPollIntervalSeconds",
|
|
"watch_poll_interval_seconds": "watchPollIntervalSeconds",
|
|
"watchedArtistAlbumGroup": "watchedArtistAlbumGroup",
|
|
"watched_artist_album_group": "watchedArtistAlbumGroup",
|
|
"delay_between_playlists_seconds": "delayBetweenPlaylistsSeconds",
|
|
"delay_between_artists_seconds": "delayBetweenArtistsSeconds",
|
|
"use_snapshot_id_checking": "useSnapshotIdChecking",
|
|
"max_tracks_per_run": "maxTracksPerRun",
|
|
}
|
|
migrated_watch = {}
|
|
for k, v in legacy_watch.items():
|
|
target_key = legacy_to_camel_watch.get(k, k)
|
|
migrated_watch[target_key] = v
|
|
# Merge with existing watch (legacy overrides existing)
|
|
watch_cfg.update(migrated_watch)
|
|
migrated = True
|
|
legacy_file_found = True
|
|
legacy_migrated_ok = True
|
|
except Exception as le:
|
|
logger.error(
|
|
f"Failed to migrate legacy watch.json: {le}", exc_info=True
|
|
)
|
|
|
|
# Migration: map legacy keys inside watch block if present
|
|
# Keep camelCase names in memory
|
|
legacy_to_camel = {
|
|
"watch_poll_interval_seconds": "watchPollIntervalSeconds",
|
|
"watched_artist_album_group": "watchedArtistAlbumGroup",
|
|
"delay_between_playlists_seconds": "delayBetweenPlaylistsSeconds",
|
|
"delay_between_artists_seconds": "delayBetweenArtistsSeconds",
|
|
"use_snapshot_id_checking": "useSnapshotIdChecking",
|
|
"max_tracks_per_run": "maxTracksPerRun",
|
|
}
|
|
migrated = False
|
|
for legacy_key, camel_key in legacy_to_camel.items():
|
|
if legacy_key in watch_cfg and camel_key not in watch_cfg:
|
|
watch_cfg[camel_key] = watch_cfg.pop(legacy_key)
|
|
migrated = True
|
|
|
|
# Ensure defaults
|
|
for k, v in DEFAULT_WATCH_CONFIG.items():
|
|
if k not in watch_cfg:
|
|
watch_cfg[k] = v
|
|
|
|
if migrated or legacy_file_found:
|
|
# Persist migration back to main.json
|
|
main_cfg["watch"] = watch_cfg
|
|
with open(MAIN_CONFIG_FILE_PATH, "w") as f:
|
|
json.dump(main_cfg, f, indent=2)
|
|
|
|
# Rename legacy file to avoid re-migration next start
|
|
if legacy_file_found and legacy_migrated_ok:
|
|
try:
|
|
WATCH_OLD_FILE_PATH.rename(
|
|
WATCH_OLD_FILE_PATH.with_suffix(".migrated")
|
|
)
|
|
logger.info(
|
|
f"Legacy watch.json migrated and renamed to {WATCH_OLD_FILE_PATH.with_suffix('.migrated')}"
|
|
)
|
|
except Exception:
|
|
try:
|
|
WATCH_OLD_FILE_PATH.unlink()
|
|
logger.info("Legacy watch.json migrated and removed.")
|
|
except Exception:
|
|
pass
|
|
|
|
return watch_cfg
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error loading watch config from {MAIN_CONFIG_FILE_PATH}: {e}",
|
|
exc_info=True,
|
|
)
|
|
return DEFAULT_WATCH_CONFIG.copy()
|
|
|
|
|
|
def construct_spotify_url(item_id, item_type="track"):
|
|
return f"https://open.spotify.com/{item_type}/{item_id}"
|
|
|
|
|
|
def has_playlist_changed(playlist_spotify_id: str, current_snapshot_id: str) -> bool:
|
|
"""
|
|
Check if a playlist has changed by comparing snapshot_id.
|
|
This is much more efficient than fetching all tracks.
|
|
|
|
Args:
|
|
playlist_spotify_id: The Spotify playlist ID
|
|
current_snapshot_id: The current snapshot_id from API
|
|
|
|
Returns:
|
|
True if playlist has changed, False otherwise
|
|
"""
|
|
try:
|
|
db_playlist = get_watched_playlist(playlist_spotify_id)
|
|
if not db_playlist:
|
|
# Playlist not in database, consider it as "changed" to trigger initial processing
|
|
return True
|
|
|
|
last_snapshot_id = db_playlist.get("snapshot_id")
|
|
if not last_snapshot_id:
|
|
# No previous snapshot_id, consider it as "changed" to trigger initial processing
|
|
return True
|
|
|
|
return current_snapshot_id != last_snapshot_id
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error checking playlist change status for {playlist_spotify_id}: {e}"
|
|
)
|
|
# On error, assume playlist has changed to be safe
|
|
return True
|
|
|
|
|
|
def needs_track_sync(
|
|
playlist_spotify_id: str, current_snapshot_id: str, api_total_tracks: int
|
|
) -> tuple[bool, list[str]]:
|
|
"""
|
|
Check if tracks need to be synchronized by comparing snapshot_ids and total counts.
|
|
|
|
Args:
|
|
playlist_spotify_id: The Spotify playlist ID
|
|
current_snapshot_id: The current snapshot_id from API
|
|
api_total_tracks: The total number of tracks reported by API
|
|
|
|
Returns:
|
|
Tuple of (needs_sync, tracks_to_find) where:
|
|
- needs_sync: True if tracks need to be synchronized
|
|
- tracks_to_find: List of track IDs that need to be found in API response
|
|
"""
|
|
try:
|
|
# Get tracks from database with their snapshot_ids
|
|
db_tracks = get_playlist_tracks_with_snapshot_from_db(playlist_spotify_id)
|
|
db_total_tracks = get_playlist_total_tracks_from_db(playlist_spotify_id)
|
|
|
|
# Check if total count matches
|
|
if db_total_tracks != api_total_tracks:
|
|
logger.info(
|
|
f"Track count mismatch for playlist {playlist_spotify_id}: DB={db_total_tracks}, API={api_total_tracks}. Full sync needed to ensure all tracks are captured."
|
|
)
|
|
# Always do full sync when counts don't match to ensure we don't miss any tracks
|
|
# This handles cases like:
|
|
# - Empty database (DB=0, API=1345)
|
|
# - Missing tracks (DB=1000, API=1345)
|
|
# - Removed tracks (DB=1345, API=1000)
|
|
return True, [] # Empty list indicates full sync needed
|
|
|
|
# Check if any tracks have different snapshot_id
|
|
tracks_to_find = []
|
|
for track_id, track_data in db_tracks.items():
|
|
if track_data.get("snapshot_id") != current_snapshot_id:
|
|
tracks_to_find.append(track_id)
|
|
|
|
if tracks_to_find:
|
|
logger.info(
|
|
f"Found {len(tracks_to_find)} tracks with outdated snapshot_id for playlist {playlist_spotify_id}"
|
|
)
|
|
return True, tracks_to_find
|
|
|
|
return False, []
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking track sync status for {playlist_spotify_id}: {e}")
|
|
# On error, assume sync is needed to be safe
|
|
return True, []
|
|
|
|
|
|
def find_tracks_in_playlist(
|
|
playlist_spotify_id: str, tracks_to_find: list[str], current_snapshot_id: str
|
|
) -> tuple[list, list]:
|
|
"""
|
|
Progressively fetch playlist tracks until all specified tracks are found or playlist is exhausted.
|
|
|
|
Args:
|
|
playlist_spotify_id: The Spotify playlist ID
|
|
tracks_to_find: List of track IDs to find
|
|
current_snapshot_id: The current snapshot_id
|
|
|
|
Returns:
|
|
Tuple of (found_tracks, not_found_tracks) where:
|
|
- found_tracks: List of track items that were found
|
|
- not_found_tracks: List of track IDs that were not found
|
|
"""
|
|
found_tracks = []
|
|
not_found_tracks = tracks_to_find.copy()
|
|
offset = 0
|
|
limit = 100
|
|
|
|
logger.info(
|
|
f"Searching for {len(tracks_to_find)} tracks in playlist {playlist_spotify_id} starting from offset {offset}"
|
|
)
|
|
|
|
while not_found_tracks and offset < 10000: # Safety limit
|
|
try:
|
|
tracks_batch = get_playlist_tracks(
|
|
playlist_spotify_id, limit=limit, offset=offset
|
|
)
|
|
|
|
if not tracks_batch or "items" not in tracks_batch:
|
|
logger.warning(
|
|
f"No tracks returned for playlist {playlist_spotify_id} at offset {offset}"
|
|
)
|
|
break
|
|
|
|
batch_items = tracks_batch.get("items", [])
|
|
if not batch_items:
|
|
logger.info(f"No more tracks found at offset {offset}")
|
|
break
|
|
|
|
# Check each track in this batch
|
|
for track_item in batch_items:
|
|
track = track_item.get("track")
|
|
if track and track.get("id") and not track.get("is_local"):
|
|
track_id = track["id"]
|
|
if track_id in not_found_tracks:
|
|
found_tracks.append(track_item)
|
|
not_found_tracks.remove(track_id)
|
|
logger.debug(f"Found track {track_id} at offset {offset}")
|
|
|
|
offset += len(batch_items)
|
|
|
|
# Add small delay between batches
|
|
time.sleep(0.1)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error fetching tracks batch for playlist {playlist_spotify_id} at offset {offset}: {e}"
|
|
)
|
|
break
|
|
|
|
logger.info(
|
|
f"Track search complete for playlist {playlist_spotify_id}: "
|
|
f"Found {len(found_tracks)}/{len(tracks_to_find)} tracks, "
|
|
f"Not found: {len(not_found_tracks)}"
|
|
)
|
|
|
|
return found_tracks, not_found_tracks
|
|
|
|
|
|
def check_watched_playlists(specific_playlist_id: str = None):
|
|
"""Checks watched playlists for new tracks and queues downloads.
|
|
If specific_playlist_id is provided, only that playlist is checked.
|
|
"""
|
|
logger.info(
|
|
f"Playlist Watch Manager: Starting check. Specific playlist: {specific_playlist_id or 'All'}"
|
|
)
|
|
config = get_watch_config()
|
|
use_snapshot_checking = config.get("useSnapshotIdChecking", True)
|
|
|
|
if specific_playlist_id:
|
|
playlist_obj = get_watched_playlist(specific_playlist_id)
|
|
if not playlist_obj:
|
|
logger.error(
|
|
f"Playlist Watch Manager: Playlist {specific_playlist_id} not found in watch database."
|
|
)
|
|
return
|
|
watched_playlists_to_check = [playlist_obj]
|
|
else:
|
|
watched_playlists_to_check = get_watched_playlists()
|
|
|
|
if not watched_playlists_to_check:
|
|
logger.info("Playlist Watch Manager: No playlists to check.")
|
|
return
|
|
|
|
for playlist_in_db in watched_playlists_to_check:
|
|
playlist_spotify_id = playlist_in_db["spotify_id"]
|
|
playlist_name = playlist_in_db["name"]
|
|
logger.info(
|
|
f"Playlist Watch Manager: Checking playlist '{playlist_name}' ({playlist_spotify_id})..."
|
|
)
|
|
|
|
try:
|
|
# Ensure the playlist's track table has the latest schema before processing
|
|
ensure_playlist_table_schema(playlist_spotify_id)
|
|
|
|
# First, get playlist metadata to check if it has changed
|
|
current_playlist_metadata = get_playlist_metadata(playlist_spotify_id)
|
|
if not current_playlist_metadata:
|
|
logger.error(
|
|
f"Playlist Watch Manager: Failed to fetch metadata from Spotify for playlist {playlist_spotify_id}."
|
|
)
|
|
continue
|
|
|
|
api_snapshot_id = current_playlist_metadata.get("snapshot_id")
|
|
api_total_tracks = current_playlist_metadata.get("tracks", {}).get(
|
|
"total", 0
|
|
)
|
|
|
|
# Enhanced snapshot_id checking with track-level tracking
|
|
if use_snapshot_checking:
|
|
# First check if playlist snapshot_id has changed
|
|
playlist_changed = has_playlist_changed(
|
|
playlist_spotify_id, api_snapshot_id
|
|
)
|
|
|
|
if not playlist_changed:
|
|
# Even if playlist snapshot_id hasn't changed, check if individual tracks need sync
|
|
needs_sync, tracks_to_find = needs_track_sync(
|
|
playlist_spotify_id, api_snapshot_id, api_total_tracks
|
|
)
|
|
|
|
if not needs_sync:
|
|
logger.info(
|
|
f"Playlist Watch Manager: Playlist '{playlist_name}' ({playlist_spotify_id}) has not changed since last check (snapshot_id: {api_snapshot_id}). Skipping detailed check."
|
|
)
|
|
continue
|
|
else:
|
|
if not tracks_to_find:
|
|
# Empty tracks_to_find means full sync is needed (track count mismatch detected)
|
|
logger.info(
|
|
f"Playlist Watch Manager: Playlist '{playlist_name}' snapshot_id unchanged, but full sync needed due to track count mismatch. Proceeding with full check."
|
|
)
|
|
# Continue to full sync below
|
|
else:
|
|
logger.info(
|
|
f"Playlist Watch Manager: Playlist '{playlist_name}' snapshot_id unchanged, but {len(tracks_to_find)} tracks need sync. Proceeding with targeted check."
|
|
)
|
|
# Use targeted track search instead of full fetch
|
|
found_tracks, not_found_tracks = find_tracks_in_playlist(
|
|
playlist_spotify_id, tracks_to_find, api_snapshot_id
|
|
)
|
|
|
|
# Update found tracks with new snapshot_id
|
|
if found_tracks:
|
|
add_tracks_to_playlist_db(
|
|
playlist_spotify_id, found_tracks, api_snapshot_id
|
|
)
|
|
|
|
# Mark not found tracks as removed
|
|
if not_found_tracks:
|
|
logger.info(
|
|
f"Playlist Watch Manager: {len(not_found_tracks)} tracks not found in playlist '{playlist_name}'. Marking as removed."
|
|
)
|
|
mark_tracks_as_not_present_in_spotify(
|
|
playlist_spotify_id, not_found_tracks
|
|
)
|
|
|
|
# Update the playlist's m3u file after tracks are removed
|
|
try:
|
|
logger.info(
|
|
f"Updating m3u file for playlist '{playlist_name}' after removing {len(not_found_tracks)} tracks."
|
|
)
|
|
update_playlist_m3u_file(playlist_spotify_id)
|
|
except Exception as m3u_update_err:
|
|
logger.error(
|
|
f"Failed to update m3u file for playlist '{playlist_name}' after marking tracks as removed: {m3u_update_err}",
|
|
exc_info=True,
|
|
)
|
|
|
|
# Update playlist snapshot and continue to next playlist
|
|
update_playlist_snapshot(
|
|
playlist_spotify_id, api_snapshot_id, api_total_tracks
|
|
)
|
|
logger.info(
|
|
f"Playlist Watch Manager: Finished targeted sync for playlist '{playlist_name}'. Snapshot ID updated to {api_snapshot_id}."
|
|
)
|
|
continue
|
|
else:
|
|
logger.info(
|
|
f"Playlist Watch Manager: Playlist '{playlist_name}' has changed. New snapshot_id: {api_snapshot_id}. Proceeding with full check."
|
|
)
|
|
else:
|
|
logger.info(
|
|
f"Playlist Watch Manager: Snapshot checking disabled. Proceeding with full check for playlist '{playlist_name}'."
|
|
)
|
|
|
|
# Fetch all tracks using the optimized function
|
|
# This happens when:
|
|
# 1. Playlist snapshot_id has changed (full sync needed)
|
|
# 2. Snapshot checking is disabled (full sync always)
|
|
# 3. Database is empty but API has tracks (full sync needed)
|
|
logger.info(
|
|
f"Playlist Watch Manager: Fetching all tracks for playlist '{playlist_name}' ({playlist_spotify_id}) with {api_total_tracks} total tracks."
|
|
)
|
|
|
|
all_api_track_items = []
|
|
offset = 0
|
|
limit = 100 # Use maximum batch size for efficiency
|
|
|
|
while offset < api_total_tracks:
|
|
try:
|
|
# Use the optimized get_playlist_tracks function
|
|
tracks_batch = get_playlist_tracks(
|
|
playlist_spotify_id, limit=limit, offset=offset
|
|
)
|
|
|
|
if not tracks_batch or "items" not in tracks_batch:
|
|
logger.warning(
|
|
f"Playlist Watch Manager: No tracks returned for playlist {playlist_spotify_id} at offset {offset}"
|
|
)
|
|
break
|
|
|
|
batch_items = tracks_batch.get("items", [])
|
|
if not batch_items:
|
|
break
|
|
|
|
all_api_track_items.extend(batch_items)
|
|
offset += len(batch_items)
|
|
|
|
# Add small delay between batches to be respectful to API
|
|
if offset < api_total_tracks:
|
|
time.sleep(0.1)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Playlist Watch Manager: Error fetching tracks batch for playlist {playlist_spotify_id} at offset {offset}: {e}"
|
|
)
|
|
break
|
|
|
|
current_api_track_ids = set()
|
|
api_track_id_to_item_map = {}
|
|
for item in all_api_track_items: # Use all_api_track_items
|
|
track = item.get("track")
|
|
if track and track.get("id") and not track.get("is_local"):
|
|
track_id = track["id"]
|
|
current_api_track_ids.add(track_id)
|
|
api_track_id_to_item_map[track_id] = item
|
|
|
|
db_track_ids = get_playlist_track_ids_from_db(playlist_spotify_id)
|
|
|
|
new_track_ids_for_download = current_api_track_ids - db_track_ids
|
|
queued_for_download_count = 0
|
|
if new_track_ids_for_download:
|
|
logger.info(
|
|
f"Playlist Watch Manager: Found {len(new_track_ids_for_download)} new tracks for playlist '{playlist_name}' to download."
|
|
)
|
|
for track_id in new_track_ids_for_download:
|
|
api_item = api_track_id_to_item_map.get(track_id)
|
|
if not api_item or not api_item.get("track"):
|
|
logger.warning(
|
|
f"Playlist Watch Manager: Missing track details in API map for new track_id {track_id} in playlist {playlist_spotify_id}. Cannot queue."
|
|
)
|
|
continue
|
|
|
|
track_to_queue = api_item["track"]
|
|
task_payload = {
|
|
"download_type": "track",
|
|
"url": construct_spotify_url(track_id, "track"),
|
|
"name": track_to_queue.get("name", "Unknown Track"),
|
|
"artist": ", ".join(
|
|
[
|
|
a["name"]
|
|
for a in track_to_queue.get("artists", [])
|
|
if a.get("name")
|
|
]
|
|
),
|
|
"orig_request": {
|
|
"source": "playlist_watch",
|
|
"playlist_id": playlist_spotify_id,
|
|
"playlist_name": playlist_name,
|
|
"track_spotify_id": track_id,
|
|
"track_item_for_db": api_item, # Pass full API item for DB update on completion
|
|
},
|
|
# "track_details_for_db" was old name, using track_item_for_db consistent with celery_tasks
|
|
}
|
|
try:
|
|
task_id_or_none = download_queue_manager.add_task(
|
|
task_payload, from_watch_job=True
|
|
)
|
|
if task_id_or_none: # Task was newly queued
|
|
logger.info(
|
|
f"Playlist Watch Manager: Queued download task {task_id_or_none} for new track {track_id} ('{track_to_queue.get('name')}') from playlist '{playlist_name}'."
|
|
)
|
|
queued_for_download_count += 1
|
|
# If task_id_or_none is None, it was a duplicate and not re-queued, Celery manager handles logging.
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Playlist Watch Manager: Failed to queue download for new track {track_id} from playlist '{playlist_name}': {e}",
|
|
exc_info=True,
|
|
)
|
|
logger.info(
|
|
f"Playlist Watch Manager: Attempted to queue {queued_for_download_count} new tracks for playlist '{playlist_name}'."
|
|
)
|
|
else:
|
|
logger.info(
|
|
f"Playlist Watch Manager: No new tracks to download for playlist '{playlist_name}'."
|
|
)
|
|
|
|
# Update DB for tracks that are still present in API (e.g. update 'last_seen_in_spotify')
|
|
# add_tracks_to_playlist_db handles INSERT OR REPLACE, updating existing entries.
|
|
# We should pass all current API tracks to ensure their `last_seen_in_spotify`, `is_present_in_spotify`, and `snapshot_id` are updated.
|
|
if (
|
|
all_api_track_items
|
|
): # If there are any tracks in the API for this playlist
|
|
logger.info(
|
|
f"Playlist Watch Manager: Refreshing {len(all_api_track_items)} tracks from API in local DB for playlist '{playlist_name}'."
|
|
)
|
|
add_tracks_to_playlist_db(
|
|
playlist_spotify_id, all_api_track_items, api_snapshot_id
|
|
)
|
|
|
|
removed_db_ids = db_track_ids - current_api_track_ids
|
|
if removed_db_ids:
|
|
logger.info(
|
|
f"Playlist Watch Manager: {len(removed_db_ids)} tracks removed from Spotify playlist '{playlist_name}'. Marking in DB."
|
|
)
|
|
mark_tracks_as_not_present_in_spotify(
|
|
playlist_spotify_id, list(removed_db_ids)
|
|
)
|
|
|
|
# Update the playlist's m3u file after any changes (new tracks queued or tracks removed)
|
|
if new_track_ids_for_download or removed_db_ids:
|
|
try:
|
|
logger.info(
|
|
f"Updating m3u file for playlist '{playlist_name}' after playlist changes."
|
|
)
|
|
update_playlist_m3u_file(playlist_spotify_id)
|
|
except Exception as m3u_update_err:
|
|
logger.error(
|
|
f"Failed to update m3u file for playlist '{playlist_name}' after playlist changes: {m3u_update_err}",
|
|
exc_info=True,
|
|
)
|
|
|
|
update_playlist_snapshot(
|
|
playlist_spotify_id, api_snapshot_id, api_total_tracks
|
|
) # api_total_tracks from initial fetch
|
|
logger.info(
|
|
f"Playlist Watch Manager: Finished checking playlist '{playlist_name}'. Snapshot ID updated to {api_snapshot_id}. API Total Tracks: {api_total_tracks}. Queued {queued_for_download_count} new tracks."
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Playlist Watch Manager: Error processing playlist {playlist_spotify_id}: {e}",
|
|
exc_info=True,
|
|
)
|
|
|
|
time.sleep(max(1, config.get("delayBetweenPlaylistsSeconds", 2)))
|
|
|
|
logger.info("Playlist Watch Manager: Finished checking all watched playlists.")
|
|
|
|
|
|
def check_watched_artists(specific_artist_id: str = None):
|
|
"""Checks watched artists for new albums and queues downloads."""
|
|
logger.info(
|
|
f"Artist Watch Manager: Starting check. Specific artist: {specific_artist_id or 'All'}"
|
|
)
|
|
config = get_watch_config()
|
|
watched_album_groups = [
|
|
g.lower() for g in config.get("watchedArtistAlbumGroup", ["album", "single"])
|
|
]
|
|
logger.info(
|
|
f"Artist Watch Manager: Watching for album groups: {watched_album_groups}"
|
|
)
|
|
|
|
if specific_artist_id:
|
|
artist_obj_in_db = get_watched_artist(specific_artist_id)
|
|
if not artist_obj_in_db:
|
|
logger.error(
|
|
f"Artist Watch Manager: Artist {specific_artist_id} not found in watch database."
|
|
)
|
|
return
|
|
artists_to_check = [artist_obj_in_db]
|
|
else:
|
|
artists_to_check = get_watched_artists()
|
|
|
|
if not artists_to_check:
|
|
logger.info("Artist Watch Manager: No artists to check.")
|
|
return
|
|
|
|
for artist_in_db in artists_to_check:
|
|
artist_spotify_id = artist_in_db["spotify_id"]
|
|
artist_name = artist_in_db["name"]
|
|
logger.info(
|
|
f"Artist Watch Manager: Checking artist '{artist_name}' ({artist_spotify_id})..."
|
|
)
|
|
|
|
try:
|
|
# Use the optimized artist discography function with pagination
|
|
all_artist_albums_from_api: List[Dict[str, Any]] = []
|
|
offset = 0
|
|
limit = 50 # Spotify API limit for artist albums
|
|
|
|
logger.info(
|
|
f"Artist Watch Manager: Fetching albums for artist '{artist_name}' ({artist_spotify_id})"
|
|
)
|
|
|
|
while True:
|
|
logger.debug(
|
|
f"Artist Watch Manager: Fetching albums for {artist_spotify_id}. Limit: {limit}, Offset: {offset}"
|
|
)
|
|
artist_albums_page = get_spotify_info(
|
|
artist_spotify_id, "artist_discography", limit=limit, offset=offset
|
|
)
|
|
|
|
if not artist_albums_page or not isinstance(
|
|
artist_albums_page.get("items"), list
|
|
):
|
|
logger.warning(
|
|
f"Artist Watch Manager: No album items found or invalid format for artist {artist_spotify_id} (name: '{artist_name}') at offset {offset}. Response: {artist_albums_page}"
|
|
)
|
|
break
|
|
|
|
current_page_albums = artist_albums_page.get("items", [])
|
|
if not current_page_albums:
|
|
logger.info(
|
|
f"Artist Watch Manager: No more albums on page for artist {artist_spotify_id} (name: '{artist_name}') at offset {offset}. Total fetched so far: {len(all_artist_albums_from_api)}."
|
|
)
|
|
break
|
|
|
|
logger.debug(
|
|
f"Artist Watch Manager: Fetched {len(current_page_albums)} albums on current page for artist '{artist_name}'."
|
|
)
|
|
all_artist_albums_from_api.extend(current_page_albums)
|
|
|
|
# Correct pagination: Check if Spotify indicates a next page URL
|
|
# The `next` field in Spotify API responses is a URL to the next page or null.
|
|
if artist_albums_page.get("next"):
|
|
offset += limit # CORRECT: Increment offset by the limit used for the request
|
|
else:
|
|
logger.info(
|
|
f"Artist Watch Manager: No 'next' page URL for artist '{artist_name}'. Pagination complete. Total albums fetched: {len(all_artist_albums_from_api)}."
|
|
)
|
|
break
|
|
|
|
# total_albums_from_api = len(all_artist_albums_from_api)
|
|
# Use the 'total' field from the API response for a more accurate count of all available albums (matching current API filter if any)
|
|
api_reported_total_albums = (
|
|
artist_albums_page.get("total", 0)
|
|
if "artist_albums_page" in locals() and artist_albums_page
|
|
else len(all_artist_albums_from_api)
|
|
)
|
|
logger.info(
|
|
f"Artist Watch Manager: Fetched {len(all_artist_albums_from_api)} albums in total from API for artist '{artist_name}'. API reports total: {api_reported_total_albums}."
|
|
)
|
|
|
|
db_album_ids = get_artist_album_ids_from_db(artist_spotify_id)
|
|
logger.info(
|
|
f"Artist Watch Manager: Found {len(db_album_ids)} albums in DB for artist '{artist_name}'. These will be skipped if re-encountered unless logic changes."
|
|
)
|
|
|
|
queued_for_download_count = 0
|
|
processed_album_ids_in_run = set() # To avoid processing duplicate album_ids if API returns them across pages (should not happen with correct pagination)
|
|
|
|
for album_data in all_artist_albums_from_api:
|
|
album_id = album_data.get("id")
|
|
album_name = album_data.get("name", "Unknown Album")
|
|
album_group = album_data.get("album_group", "N/A").lower()
|
|
album_type = album_data.get("album_type", "N/A").lower()
|
|
|
|
if not album_id:
|
|
logger.warning(
|
|
f"Artist Watch Manager: Skipping album without ID for artist '{artist_name}'. Album data: {album_data}"
|
|
)
|
|
continue
|
|
|
|
if album_id in processed_album_ids_in_run:
|
|
logger.debug(
|
|
f"Artist Watch Manager: Album '{album_name}' ({album_id}) already processed in this run. Skipping."
|
|
)
|
|
continue
|
|
processed_album_ids_in_run.add(album_id)
|
|
|
|
# Filter based on watchedArtistAlbumGroup
|
|
# The album_group field is generally preferred for this type of categorization as per Spotify docs.
|
|
is_matching_group = album_group in watched_album_groups
|
|
|
|
logger.debug(
|
|
f"Artist '{artist_name}', Album '{album_name}' ({album_id}): album_group='{album_group}', album_type='{album_type}'. Watched groups: {watched_album_groups}. Match: {is_matching_group}."
|
|
)
|
|
|
|
if not is_matching_group:
|
|
logger.debug(
|
|
f"Artist Watch Manager: Skipping album '{album_name}' ({album_id}) by '{artist_name}' - group '{album_group}' not in watched list: {watched_album_groups}."
|
|
)
|
|
continue
|
|
|
|
logger.info(
|
|
f"Artist Watch Manager: Album '{album_name}' ({album_id}) by '{artist_name}' (group: {album_group}) IS a matching group."
|
|
)
|
|
|
|
if album_id not in db_album_ids:
|
|
logger.info(
|
|
f"Artist Watch Manager: Found NEW matching album '{album_name}' ({album_id}) by '{artist_name}'. Queuing for download."
|
|
)
|
|
|
|
album_artists_list = album_data.get("artists", [])
|
|
album_main_artist_name = (
|
|
album_artists_list[0].get("name", "Unknown Artist")
|
|
if album_artists_list
|
|
else "Unknown Artist"
|
|
)
|
|
|
|
task_payload = {
|
|
"download_type": "album", # Or "track" if downloading individual tracks of album later
|
|
"url": construct_spotify_url(album_id, "album"),
|
|
"name": album_name,
|
|
"artist": album_main_artist_name, # Primary artist of the album
|
|
"orig_request": {
|
|
"source": "artist_watch",
|
|
"artist_spotify_id": artist_spotify_id, # Watched artist
|
|
"artist_name": artist_name,
|
|
"album_spotify_id": album_id,
|
|
"album_data_for_db": album_data, # Pass full API album object for DB update on completion/queuing
|
|
},
|
|
}
|
|
try:
|
|
# Add to DB first with task_id, then queue. Or queue and add task_id to DB.
|
|
# Let's use add_or_update_album_for_artist to record it with a task_id before queuing.
|
|
# The celery_queue_manager.add_task might return None if it's a duplicate.
|
|
|
|
# Record the album in DB as being processed for download
|
|
# Task_id will be added if successfully queued
|
|
|
|
# We should call add_task first, and if it returns a task_id (not a duplicate), then update our DB.
|
|
task_id_or_none = download_queue_manager.add_task(
|
|
task_payload, from_watch_job=True
|
|
)
|
|
|
|
if task_id_or_none: # Task was newly queued
|
|
# REMOVED: add_or_update_album_for_artist(artist_spotify_id, album_data, task_id=task_id_or_none, is_download_complete=False)
|
|
# The album will be added/updated in the DB by celery_tasks.py upon successful download completion.
|
|
logger.info(
|
|
f"Artist Watch Manager: Queued download task {task_id_or_none} for new album '{album_name}' from artist '{artist_name}'. DB entry will be created/updated on success."
|
|
)
|
|
queued_for_download_count += 1
|
|
# If task_id_or_none is None, it was a duplicate. Celery manager handles logging.
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Artist Watch Manager: Failed to queue download for new album {album_id} ('{album_name}') from artist '{artist_name}': {e}",
|
|
exc_info=True,
|
|
)
|
|
else:
|
|
logger.info(
|
|
f"Artist Watch Manager: Album '{album_name}' ({album_id}) by '{artist_name}' already known in DB (ID found in db_album_ids). Skipping queue."
|
|
)
|
|
# Optionally, update its entry (e.g. last_seen, or if details changed), but for now, we only queue new ones.
|
|
# add_or_update_album_for_artist(artist_spotify_id, album_data, task_id=None, is_download_complete=False) # would update added_to_db_at
|
|
|
|
logger.info(
|
|
f"Artist Watch Manager: For artist '{artist_name}', processed {len(all_artist_albums_from_api)} API albums, attempted to queue {queued_for_download_count} new albums."
|
|
)
|
|
|
|
update_artist_metadata_after_check(
|
|
artist_spotify_id, api_reported_total_albums
|
|
)
|
|
logger.info(
|
|
f"Artist Watch Manager: Finished checking artist '{artist_name}'. DB metadata updated. API reported total albums (for API filter): {api_reported_total_albums}."
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Artist Watch Manager: Error processing artist {artist_spotify_id} ('{artist_name}'): {e}",
|
|
exc_info=True,
|
|
)
|
|
|
|
time.sleep(max(1, config.get("delayBetweenArtistsSeconds", 5)))
|
|
|
|
logger.info("Artist Watch Manager: Finished checking all watched artists.")
|
|
|
|
|
|
def playlist_watch_scheduler():
|
|
"""Periodically calls check_watched_playlists and check_watched_artists."""
|
|
logger.info("Watch Scheduler: Thread started.")
|
|
|
|
while not STOP_EVENT.is_set():
|
|
current_config = get_watch_config() # Get latest config for this run
|
|
interval = current_config.get("watchPollIntervalSeconds", 3600)
|
|
watch_enabled = current_config.get("enabled", False) # Get enabled status
|
|
|
|
if not watch_enabled:
|
|
logger.info(
|
|
"Watch Scheduler: Watch feature is disabled in config. Skipping checks."
|
|
)
|
|
STOP_EVENT.wait(
|
|
interval
|
|
) # Still respect poll interval for checking config again
|
|
continue # Skip to next iteration
|
|
|
|
try:
|
|
logger.info("Watch Scheduler: Starting playlist check run.")
|
|
check_watched_playlists()
|
|
logger.info("Watch Scheduler: Playlist check run completed.")
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Watch Scheduler: Unhandled exception during check_watched_playlists: {e}",
|
|
exc_info=True,
|
|
)
|
|
|
|
# Add a small delay between playlist and artist checks if desired
|
|
# time.sleep(current_config.get("delay_between_check_types_seconds", 10))
|
|
if STOP_EVENT.is_set():
|
|
break # Check stop event again before starting artist check
|
|
|
|
try:
|
|
logger.info("Watch Scheduler: Starting artist check run.")
|
|
check_watched_artists()
|
|
logger.info("Watch Scheduler: Artist check run completed.")
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Watch Scheduler: Unhandled exception during check_watched_artists: {e}",
|
|
exc_info=True,
|
|
)
|
|
|
|
logger.info(
|
|
f"Watch Scheduler: All checks complete. Next run in {interval} seconds."
|
|
)
|
|
STOP_EVENT.wait(interval)
|
|
logger.info("Watch Scheduler: Thread stopped.")
|
|
|
|
|
|
# --- Global thread for the scheduler ---
|
|
_watch_scheduler_thread = None # Renamed from _playlist_watch_thread
|
|
|
|
|
|
def start_watch_manager(): # Renamed from start_playlist_watch_manager
|
|
global _watch_scheduler_thread
|
|
if _watch_scheduler_thread is None or not _watch_scheduler_thread.is_alive():
|
|
STOP_EVENT.clear()
|
|
# Initialize DBs on start
|
|
from routes.utils.watch.db import (
|
|
init_playlists_db,
|
|
init_artists_db,
|
|
) # Updated import
|
|
|
|
init_playlists_db() # For playlists
|
|
init_artists_db() # For artists
|
|
|
|
# Update all existing tables to ensure they have the latest schema
|
|
try:
|
|
update_all_existing_tables_schema()
|
|
logger.info(
|
|
"Watch Manager: Successfully updated all existing tables schema"
|
|
)
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Watch Manager: Error updating existing tables schema: {e}",
|
|
exc_info=True,
|
|
)
|
|
|
|
_watch_scheduler_thread = threading.Thread(
|
|
target=playlist_watch_scheduler, daemon=True
|
|
)
|
|
_watch_scheduler_thread.start()
|
|
logger.info(
|
|
"Watch Manager: Background scheduler started (includes playlists and artists)."
|
|
)
|
|
else:
|
|
logger.info("Watch Manager: Background scheduler already running.")
|
|
|
|
|
|
def stop_watch_manager(): # Renamed from stop_playlist_watch_manager
|
|
global _watch_scheduler_thread
|
|
if _watch_scheduler_thread and _watch_scheduler_thread.is_alive():
|
|
logger.info("Watch Manager: Stopping background scheduler...")
|
|
STOP_EVENT.set()
|
|
_watch_scheduler_thread.join(timeout=10)
|
|
if _watch_scheduler_thread.is_alive():
|
|
logger.warning("Watch Manager: Scheduler thread did not stop in time.")
|
|
else:
|
|
logger.info("Watch Manager: Background scheduler stopped.")
|
|
_watch_scheduler_thread = None
|
|
else:
|
|
logger.info("Watch Manager: Background scheduler not running.")
|
|
|
|
|
|
def get_playlist_tracks_for_m3u(playlist_spotify_id: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get all tracks for a playlist from the database with complete metadata needed for m3u generation.
|
|
|
|
Args:
|
|
playlist_spotify_id: The Spotify playlist ID
|
|
|
|
Returns:
|
|
List of track dictionaries with metadata
|
|
"""
|
|
table_name = f"playlist_{playlist_spotify_id.replace('-', '_')}"
|
|
tracks: List[Dict[str, Any]] = []
|
|
|
|
try:
|
|
from routes.utils.watch.db import (
|
|
_get_playlists_db_connection,
|
|
_ensure_table_schema,
|
|
EXPECTED_PLAYLIST_TRACKS_COLUMNS,
|
|
)
|
|
|
|
with _get_playlists_db_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Check if table exists
|
|
cursor.execute(
|
|
f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';"
|
|
)
|
|
if cursor.fetchone() is None:
|
|
logger.warning(
|
|
f"Track table {table_name} does not exist. Cannot generate m3u file."
|
|
)
|
|
return tracks
|
|
|
|
# Ensure the table has the latest schema before querying
|
|
_ensure_table_schema(
|
|
cursor,
|
|
table_name,
|
|
EXPECTED_PLAYLIST_TRACKS_COLUMNS,
|
|
f"playlist tracks ({playlist_spotify_id})",
|
|
)
|
|
|
|
# Get all tracks that are present in Spotify
|
|
cursor.execute(f"""
|
|
SELECT spotify_track_id, title, artist_names, album_name,
|
|
album_artist_names, track_number, duration_ms, final_path
|
|
FROM {table_name}
|
|
WHERE is_present_in_spotify = 1
|
|
ORDER BY track_number, title
|
|
""")
|
|
|
|
rows = cursor.fetchall()
|
|
for row in rows:
|
|
tracks.append(
|
|
{
|
|
"spotify_track_id": row["spotify_track_id"],
|
|
"title": row["title"] or "Unknown Track",
|
|
"artist_names": row["artist_names"] or "Unknown Artist",
|
|
"album_name": row["album_name"] or "Unknown Album",
|
|
"album_artist_names": row["album_artist_names"]
|
|
or "Unknown Artist",
|
|
"track_number": row["track_number"] or 0,
|
|
"duration_ms": row["duration_ms"] or 0,
|
|
"final_path": row["final_path"]
|
|
if "final_path" in row.keys()
|
|
else None,
|
|
}
|
|
)
|
|
|
|
return tracks
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error retrieving tracks for m3u generation for playlist {playlist_spotify_id}: {e}",
|
|
exc_info=True,
|
|
)
|
|
return tracks
|
|
|
|
|
|
def update_playlist_m3u_file(playlist_spotify_id: str):
|
|
"""
|
|
Generate/update the m3u file for a watched playlist based on tracks in the database.
|
|
|
|
Args:
|
|
playlist_spotify_id: The Spotify playlist ID
|
|
"""
|
|
try:
|
|
# Get playlist metadata
|
|
playlist_info = get_watched_playlist(playlist_spotify_id)
|
|
if not playlist_info:
|
|
logger.warning(
|
|
f"Playlist {playlist_spotify_id} not found in watched playlists. Cannot update m3u file."
|
|
)
|
|
return
|
|
|
|
playlist_name = playlist_info.get("name", "Unknown Playlist")
|
|
|
|
# Get configuration settings
|
|
|
|
output_dir = (
|
|
"./downloads" # This matches the output_dir used in download functions
|
|
)
|
|
|
|
# Get all tracks for the playlist
|
|
tracks = get_playlist_tracks_for_m3u(playlist_spotify_id)
|
|
|
|
if not tracks:
|
|
logger.info(
|
|
f"No tracks found for playlist '{playlist_name}'. M3U file will be empty or removed."
|
|
)
|
|
|
|
# Clean playlist name for filename
|
|
safe_playlist_name = re.sub(
|
|
r'[<>:"/\\|?*\x00-\x1f]', "_", playlist_name
|
|
).strip()
|
|
|
|
# Create m3u file path
|
|
playlists_dir = Path(output_dir) / "playlists"
|
|
playlists_dir.mkdir(parents=True, exist_ok=True)
|
|
m3u_file_path = playlists_dir / f"{safe_playlist_name}.m3u"
|
|
|
|
# Generate m3u content
|
|
m3u_lines = ["#EXTM3U"]
|
|
included_count = 0
|
|
skipped_missing_final_path = 0
|
|
|
|
for track in tracks:
|
|
# Use final_path from deezspot summary and convert from ./downloads to ../ relative path
|
|
final_path = track.get("final_path")
|
|
if not final_path:
|
|
skipped_missing_final_path += 1
|
|
continue
|
|
normalized = str(final_path).replace("\\", "/")
|
|
if normalized.startswith("./downloads/"):
|
|
relative_path = normalized.replace("./downloads/", "../", 1)
|
|
elif "/downloads/" in normalized.lower():
|
|
idx = normalized.lower().rfind("/downloads/")
|
|
relative_path = "../" + normalized[idx + len("/downloads/") :]
|
|
elif normalized.startswith("downloads/"):
|
|
relative_path = "../" + normalized[len("downloads/") :]
|
|
else:
|
|
# As per assumption, everything is under downloads; if not, keep as-is
|
|
relative_path = normalized
|
|
|
|
# Add EXTINF line with track duration and title
|
|
duration_seconds = (
|
|
(track.get("duration_ms", 0) // 1000)
|
|
if track.get("duration_ms")
|
|
else -1
|
|
)
|
|
artist_and_title = f"{track.get('artist_names', 'Unknown Artist')} - {track.get('title', 'Unknown Track')}"
|
|
|
|
m3u_lines.append(f"#EXTINF:{duration_seconds},{artist_and_title}")
|
|
m3u_lines.append(relative_path)
|
|
included_count += 1
|
|
|
|
# Write m3u file
|
|
with open(m3u_file_path, "w", encoding="utf-8") as f:
|
|
f.write("\n".join(m3u_lines))
|
|
|
|
logger.info(
|
|
f"Updated m3u file for playlist '{playlist_name}' at {m3u_file_path} with {included_count} entries.{f' Skipped {skipped_missing_final_path} without final_path.' if skipped_missing_final_path else ''}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error updating m3u file for playlist {playlist_spotify_id}: {e}",
|
|
exc_info=True,
|
|
)
|