Files
spotizerr-dev/routes/prgs.py
architect.in.git b67637abe8 uuhm
2025-03-23 19:32:47 -06:00

533 lines
23 KiB
Python
Executable File

from flask import Blueprint, abort, jsonify, Response, stream_with_context
import os
import json
import logging
import time
import random
from routes.utils.celery_tasks import (
get_task_info,
get_task_status,
get_last_task_status,
get_all_tasks,
cancel_task,
retry_task,
ProgressState,
redis_client
)
# Configure logging
logger = logging.getLogger(__name__)
prgs_bp = Blueprint('prgs', __name__, url_prefix='/api/prgs')
# The old path for PRG files (keeping for backward compatibility during transition)
PRGS_DIR = os.path.join(os.getcwd(), 'prgs')
@prgs_bp.route('/<task_id>', methods=['GET'])
def get_prg_file(task_id):
"""
Return a JSON object with the resource type, its name (title),
the last progress update, and, if available, the original request parameters.
This function works with both the old PRG file system (for backward compatibility)
and the new task ID based system.
Args:
task_id: Either a task UUID from Celery or a PRG filename from the old system
"""
try:
# First check if this is a task ID in the new system
task_info = get_task_info(task_id)
if task_info:
# This is a task ID in the new system
original_request = task_info.get("original_request", {})
# Get the latest status update for this task
last_status = get_last_task_status(task_id)
logger.debug(f"API: Got last_status for {task_id}: {json.dumps(last_status) if last_status else None}")
# Get all status updates for debugging
all_statuses = get_task_status(task_id)
status_count = len(all_statuses)
logger.debug(f"API: Task {task_id} has {status_count} status updates")
# Prepare the response with basic info
response = {
"type": task_info.get("type", ""),
"name": task_info.get("name", ""),
"artist": task_info.get("artist", ""),
"last_line": last_status,
"original_request": original_request,
"display_title": original_request.get("display_title", task_info.get("name", "")),
"display_type": original_request.get("display_type", task_info.get("type", "")),
"display_artist": original_request.get("display_artist", task_info.get("artist", "")),
"status_count": status_count
}
# Handle different status types
if last_status:
status_type = last_status.get("status", "unknown")
# For terminal statuses (complete, error, cancelled)
if status_type in [ProgressState.COMPLETE, ProgressState.ERROR, ProgressState.CANCELLED]:
response["progress_message"] = last_status.get("message", f"Download {status_type}")
# For progress status with track information
elif status_type == "progress" and last_status.get("track"):
# Add explicit track progress fields to the top level for easy access
response["current_track"] = last_status.get("track", "")
response["track_number"] = last_status.get("parsed_current_track", 0)
response["total_tracks"] = last_status.get("parsed_total_tracks", 0)
response["progress_percent"] = last_status.get("overall_progress", 0)
response["album"] = last_status.get("album", "")
# Format a nice progress message for display
track_info = last_status.get("track", "")
current = last_status.get("parsed_current_track", 0)
total = last_status.get("parsed_total_tracks", 0)
progress = last_status.get("overall_progress", 0)
if current and total:
response["progress_message"] = f"Downloading track {current}/{total} ({progress}%): {track_info}"
elif track_info:
response["progress_message"] = f"Downloading: {track_info}"
# For initializing status
elif status_type == "initializing":
album = last_status.get("album", "")
if album:
response["progress_message"] = f"Initializing download for {album}"
else:
response["progress_message"] = "Initializing download..."
# For processing status (default)
elif status_type == "processing":
# Search for the most recent track progress in all statuses
has_progress = False
for status in reversed(all_statuses):
if status.get("status") == "progress" and status.get("track"):
# Use this track progress information
track_info = status.get("track", "")
current_raw = status.get("current_track", "")
response["current_track"] = track_info
# Try to parse track numbers if available
if isinstance(current_raw, str) and "/" in current_raw:
try:
parts = current_raw.split("/")
current = int(parts[0])
total = int(parts[1])
response["track_number"] = current
response["total_tracks"] = total
response["progress_percent"] = min(int((current / total) * 100), 100)
response["progress_message"] = f"Processing track {current}/{total}: {track_info}"
except (ValueError, IndexError):
response["progress_message"] = f"Processing: {track_info}"
else:
response["progress_message"] = f"Processing: {track_info}"
has_progress = True
break
if not has_progress:
# Just use the processing message
response["progress_message"] = last_status.get("message", "Processing download...")
# For other status types
else:
response["progress_message"] = last_status.get("message", f"Status: {status_type}")
return jsonify(response)
# If not found in new system, try the old PRG file system
# Security check to prevent path traversal attacks.
if '..' in task_id or '/' in task_id:
abort(400, "Invalid file request")
filepath = os.path.join(PRGS_DIR, task_id)
with open(filepath, 'r') as f:
content = f.read()
lines = content.splitlines()
# If the file is empty, return default values.
if not lines:
return jsonify({
"type": "",
"name": "",
"artist": "",
"last_line": None,
"original_request": None,
"display_title": "",
"display_type": "",
"display_artist": ""
})
# Attempt to extract the original request from the first line.
original_request = None
display_title = ""
display_type = ""
display_artist = ""
try:
first_line = json.loads(lines[0])
if isinstance(first_line, dict):
if "original_request" in first_line:
original_request = first_line["original_request"]
else:
# The first line might be the original request itself
original_request = first_line
# Extract display information from the original request
if original_request:
display_title = original_request.get("display_title", original_request.get("name", ""))
display_type = original_request.get("display_type", original_request.get("type", ""))
display_artist = original_request.get("display_artist", original_request.get("artist", ""))
except Exception as e:
print(f"Error parsing first line of PRG file: {e}")
original_request = None
# For resource type and name, use the second line if available.
resource_type = ""
resource_name = ""
resource_artist = ""
if len(lines) > 1:
try:
second_line = json.loads(lines[1])
# Directly extract 'type' and 'name' from the JSON
resource_type = second_line.get("type", "")
resource_name = second_line.get("name", "")
resource_artist = second_line.get("artist", "")
except Exception:
resource_type = ""
resource_name = ""
resource_artist = ""
else:
resource_type = ""
resource_name = ""
resource_artist = ""
# Get the last line from the file.
last_line_raw = lines[-1]
try:
last_line_parsed = json.loads(last_line_raw)
except Exception:
last_line_parsed = last_line_raw # Fallback to raw string if JSON parsing fails.
return jsonify({
"type": resource_type,
"name": resource_name,
"artist": resource_artist,
"last_line": last_line_parsed,
"original_request": original_request,
"display_title": display_title,
"display_type": display_type,
"display_artist": display_artist
})
except FileNotFoundError:
abort(404, "Task or file not found")
except Exception as e:
abort(500, f"An error occurred: {e}")
@prgs_bp.route('/delete/<task_id>', methods=['DELETE'])
def delete_prg_file(task_id):
"""
Delete a task's information and history.
Works with both the old PRG file system and the new task ID based system.
Args:
task_id: Either a task UUID from Celery or a PRG filename from the old system
"""
try:
# First try to delete from Redis if it's a task ID
task_info = get_task_info(task_id)
if task_info:
# This is a task ID in the new system - we should cancel it first
# if it's still running, then clear its data from Redis
cancel_result = cancel_task(task_id)
# Use Redis connection to delete the task data
from routes.utils.celery_tasks import redis_client
# Delete task info and status
redis_client.delete(f"task:{task_id}:info")
redis_client.delete(f"task:{task_id}:status")
return {'message': f'Task {task_id} deleted successfully'}, 200
# If not found in Redis, try the old PRG file system
# Security checks to prevent path traversal and ensure correct file type.
if '..' in task_id or '/' in task_id:
abort(400, "Invalid file request")
if not task_id.endswith('.prg'):
abort(400, "Only .prg files can be deleted")
filepath = os.path.join(PRGS_DIR, task_id)
if not os.path.isfile(filepath):
abort(404, "File not found")
os.remove(filepath)
return {'message': f'File {task_id} deleted successfully'}, 200
except FileNotFoundError:
abort(404, "Task or file not found")
except Exception as e:
abort(500, f"An error occurred: {e}")
@prgs_bp.route('/list', methods=['GET'])
def list_prg_files():
"""
Retrieve a list of all tasks in the system.
Combines results from both the old PRG file system and the new task ID based system.
"""
try:
# Get tasks from the new system
tasks = get_all_tasks()
task_ids = [task["task_id"] for task in tasks]
# Get PRG files from the old system
prg_files = []
if os.path.isdir(PRGS_DIR):
with os.scandir(PRGS_DIR) as entries:
for entry in entries:
if entry.is_file() and entry.name.endswith('.prg'):
prg_files.append(entry.name)
# Combine both lists
all_ids = task_ids + prg_files
return jsonify(all_ids)
except Exception as e:
abort(500, f"An error occurred: {e}")
@prgs_bp.route('/retry/<task_id>', methods=['POST'])
def retry_task_endpoint(task_id):
"""
Retry a failed task.
Args:
task_id: The ID of the task to retry
"""
try:
# First check if this is a task ID in the new system
task_info = get_task_info(task_id)
if task_info:
# This is a task ID in the new system
result = retry_task(task_id)
return jsonify(result)
# If not found in new system, we need to handle the old system retry
# For now, return an error as we're transitioning to the new system
return jsonify({
"status": "error",
"message": "Retry for old system is not supported in the new API. Please use the new task ID format."
}), 400
except Exception as e:
abort(500, f"An error occurred: {e}")
@prgs_bp.route('/cancel/<task_id>', methods=['POST'])
def cancel_task_endpoint(task_id):
"""
Cancel a running or queued task.
Args:
task_id: The ID of the task to cancel
"""
try:
# First check if this is a task ID in the new system
task_info = get_task_info(task_id)
if task_info:
# This is a task ID in the new system
result = cancel_task(task_id)
return jsonify(result)
# If not found in new system, we need to handle the old system cancellation
# For now, return an error as we're transitioning to the new system
return jsonify({
"status": "error",
"message": "Cancellation for old system is not supported in the new API. Please use the new task ID format."
}), 400
except Exception as e:
abort(500, f"An error occurred: {e}")
@prgs_bp.route('/stream/<task_id>', methods=['GET'])
def stream_task_status(task_id):
"""
Stream task status updates as Server-Sent Events (SSE).
This endpoint opens a persistent connection and sends updates in real-time.
Args:
task_id: The ID of the task to stream updates for
"""
def generate():
try:
# Get initial task info to send as the opening message
task_info = get_task_info(task_id)
if not task_info:
# Check if this is an old PRG file
if os.path.exists(os.path.join(PRGS_DIR, task_id)):
# Return error - SSE not supported for old PRG files
yield f"event: error\ndata: {json.dumps({'error': 'SSE streaming not supported for old PRG files'})}\n\n"
return
else:
# Task not found
yield f"event: error\ndata: {json.dumps({'error': 'Task not found'})}\n\n"
return
# Get the original request and other basic info for the opening message
original_request = task_info.get("original_request", {})
download_type = task_info.get("type", "")
name = task_info.get("name", "")
artist = task_info.get("artist", "")
# Prepare the opening message with the required information
opening_data = {
"event": "start",
"task_id": task_id,
"type": download_type,
"name": name,
"artist": artist,
"url": original_request.get("url", ""),
"service": original_request.get("service", ""),
"timestamp": time.time(),
"status": "initializing",
"message": f"Starting {download_type} download: {name}" + (f" by {artist}" if artist else "")
}
# Send the opening message
yield f"event: start\ndata: {json.dumps(opening_data)}\n\n"
# Get existing status updates to catch up (most recent first)
all_updates = get_task_status(task_id)
# Sort updates by id
sorted_updates = sorted(all_updates, key=lambda x: x.get("id", 0))
# Limit to send only the 5 most recent updates to reduce initial payload
for i, update in enumerate(sorted_updates[-5:]):
# Add the task_id to each update message
update["task_id"] = task_id
yield f"event: update\ndata: {json.dumps(update)}\n\n"
# Keep track of the last update ID we've sent
last_sent_id = 0
if sorted_updates:
last_sent_id = sorted_updates[-1].get("id", 0)
# Create a Redis connection for subscribing to updates
redis_pubsub = redis_client.pubsub()
redis_pubsub.subscribe(f"task_updates:{task_id}")
# Hold the connection open and check for updates
last_heartbeat = time.time()
heartbeat_interval = 30 # Increased from 15 to 30 seconds to reduce overhead
# Optimize polling with a more efficient loop structure
check_interval = 0.2 # Check for messages every 200ms instead of continuously
message_batch_size = 5 # Process up to 5 messages at a time
while True:
# Process a batch of messages to reduce CPU usage
messages_processed = 0
while messages_processed < message_batch_size:
# Check for new updates via Redis Pub/Sub with a timeout
message = redis_pubsub.get_message(timeout=check_interval)
if not message:
break # No more messages to process
if message['type'] == 'message':
messages_processed += 1
# Got a new message from Redis Pub/Sub
try:
data = json.loads(message['data'].decode('utf-8'))
status_id = data.get('status_id', 0)
# Only process if this is a new status update
if status_id > last_sent_id:
# Efficient fetch - only get the specific status update we need
for idx in range(-10, 0): # Check last 10 entries for efficiency
status_data = redis_client.lindex(f"task:{task_id}:status", idx)
if status_data:
status = json.loads(status_data.decode('utf-8'))
if status.get("id") == status_id:
# Add the task_id to the update
status["task_id"] = task_id
# Choose the appropriate event type based on status
status_type = status.get("status", "")
event_type = "update"
if status_type == ProgressState.COMPLETE or status_type == ProgressState.DONE:
event_type = "complete"
elif status_type == ProgressState.TRACK_COMPLETE:
# Create a distinct event type for track completion to prevent UI issues
event_type = "track_complete"
elif status_type == ProgressState.ERROR:
event_type = "error"
elif status_type in [ProgressState.TRACK_PROGRESS, ProgressState.REAL_TIME]:
event_type = "progress"
# Send the update
yield f"event: {event_type}\ndata: {json.dumps(status)}\n\n"
last_sent_id = status_id
break
except Exception as e:
logger.error(f"Error processing Redis Pub/Sub message: {e}")
# Check if task is complete, error, or cancelled - if so, end the stream
# Only do this check every 5 loops to reduce load
if random.random() < 0.2: # ~20% chance to check terminal status each loop
last_status = get_last_task_status(task_id)
if last_status and last_status.get("status") in [ProgressState.COMPLETE, ProgressState.ERROR, ProgressState.CANCELLED, ProgressState.DONE]:
# Send final message
final_data = {
"event": "end",
"task_id": task_id,
"status": last_status.get("status"),
"message": last_status.get("message", "Download complete"),
"timestamp": time.time()
}
yield f"event: end\ndata: {json.dumps(final_data)}\n\n"
break
# Send a heartbeat periodically to keep the connection alive
now = time.time()
if now - last_heartbeat >= heartbeat_interval:
yield f"event: heartbeat\ndata: {json.dumps({'timestamp': now})}\n\n"
last_heartbeat = now
# More efficient sleep between batch checks
time.sleep(check_interval)
except Exception as e:
logger.error(f"Error in SSE stream: {e}")
yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n"
finally:
# Clean up: unsubscribe and close Redis Pub/Sub connection
if 'redis_pubsub' in locals():
try:
redis_pubsub.unsubscribe()
redis_pubsub.close()
except Exception as e:
logger.error(f"Error closing Redis Pub/Sub: {e}")
return Response(
stream_with_context(generate()),
mimetype='text/event-stream',
headers={
'Cache-Control': 'no-cache',
'X-Accel-Buffering': 'no' # Disable Nginx buffering
}
)