Merge pull request #338 from Phlogi/performance-improvements

enh(api): add per-task sse throttling and batching for robust updates
2025-08-30 06:12:50 -06:00
parent e777dbeba2 63afc969c0
commit 9e4b2fcd01
5 changed files with 152 additions and 28 deletions
--- a/.env.example
+++ b/.env.example
@@ -4,7 +4,7 @@
 ### can leave the defaults as they are.
 ###
 ### If you plan on using for a server,
-### see [insert docs url]
+### see https://spotizerr.rtfd.io
 ###
 # Interface to bind to. Unless you know what you're doing, don't change this
@@ -62,4 +62,4 @@ GITHUB_CLIENT_SECRET=
 # Log level for application logging.
 # Possible values: debug, info, warning, error, critical
 # Set to 'info' or 'warning' for general use. Use 'debug' for troubleshooting.
-LOG_LEVEL=info
+LOG_LEVEL=info
--- a/.github/workflows/pr-build.yml
+++ b/.github/workflows/pr-build.yml
@@ -0,0 +1,60 @@
 name: PR Dev/Test Container
 on:
  pull_request:
    types: [opened, synchronize, reopened]
  workflow_dispatch:
    inputs:
      pr_number:
        description: 'Pull request number (optional, for manual runs)'
        required: false
      branch:
        description: 'Branch to build (optional, defaults to PR head or main)'
        required: false
 env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
 jobs:
  build-and-push:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.branch || github.head_ref || github.ref }}
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
      - name: Login to GHCR
        uses: docker/login-action@v2
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      # Extract Docker metadata
      - name: Extract Docker metadata
        id: meta
        uses: docker/metadata-action@v4
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          tags: |
            type=raw,value=dev-pr-${{ github.event.inputs.pr_number || github.event.pull_request.number }}
      # Build and push multi-arch dev image
      - name: Build and push
        uses: docker/build-push-action@v4
        with:
          context: .
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
--- a/README.md
+++ b/README.md
@@ -27,6 +27,10 @@ If you self-host a music server with other users than yourself, you almost certa
  <img width="1588" height="994" alt="image" src="https://github.com/user-attachments/assets/e34d7dbb-29e3-4d75-bcbd-0cee03fa57dc" />
 </details>
 ## How do I start?
 Docs are available at: https://spotizerr.rtfd.io
 ### Common Issues
 **Downloads not starting?**
--- a/routes/system/progress.py
+++ b/routes/system/progress.py
@@ -8,7 +8,7 @@ from typing import Set, Optional
 import redis
 import threading
-from routes.utils.celery_config import REDIS_URL
+from routes.utils.celery_config import REDIS_URL, get_config_params
 from routes.utils.celery_tasks import (
    get_task_info,
@@ -37,6 +37,11 @@ router = APIRouter()
 class SSEBroadcaster:
    def __init__(self):
        self.clients: Set[asyncio.Queue] = set()
        # Per-task throttling/batching/deduplication state
        self._task_state = {}  # task_id -> dict with last_sent, last_event, last_send_time, scheduled_handle
        # Load configurable interval
        config = get_config_params()
        self.sse_update_interval = float(config.get("sseUpdateIntervalSeconds", 1))
    async def add_client(self, queue: asyncio.Queue):
        """Add a new SSE client"""
@@ -49,51 +54,105 @@ class SSEBroadcaster:
        logger.debug(f"SSE: Client disconnected (total: {len(self.clients)})")
    async def broadcast_event(self, event_data: dict):
-        """Broadcast an event to all connected clients"""
+        """
-        logger.debug(
+        Throttle, batch, and deduplicate SSE events per task.
-            f"SSE Broadcaster: Attempting to broadcast to {len(self.clients)} clients"
+        Only emit at most 1 update/sec per task, aggregate within window, suppress redundant updates.
-        )
+        """
        if not self.clients:
            logger.debug("SSE Broadcaster: No clients connected, skipping broadcast")
            return
        # Defensive: always work with a list of tasks
        tasks = event_data.get("tasks", [])
        if not isinstance(tasks, list):
            tasks = [tasks]
        # For each task, throttle/batch/dedupe
        for task in tasks:
            task_id = task.get("task_id")
            if not task_id:
                continue
            now = time.time()
            state = self._task_state.setdefault(task_id, {
                "last_sent": None,
                "last_event": None,
                "last_send_time": 0,
                "scheduled_handle": None,
            })
            # Deduplication: if event is identical to last sent, skip
            if state["last_sent"] is not None and self._events_equal(state["last_sent"], task):
                logger.debug(f"SSE: Deduped event for task {task_id}")
                continue
            # Throttling: if within interval, batch (store as last_event, schedule send)
            elapsed = now - state["last_send_time"]
            if elapsed < self.sse_update_interval:
                state["last_event"] = task
                if state["scheduled_handle"] is None:
                    delay = self.sse_update_interval - elapsed
                    loop = asyncio.get_event_loop()
                    state["scheduled_handle"] = loop.call_later(
                        delay, lambda: asyncio.create_task(self._send_batched_event(task_id))
                    )
                continue
            # Otherwise, send immediately
            await self._send_event(task_id, task)
            state["last_send_time"] = now
            state["last_sent"] = task
            state["last_event"] = None
            if state["scheduled_handle"]:
                state["scheduled_handle"].cancel()
                state["scheduled_handle"] = None
    async def _send_batched_event(self, task_id):
        state = self._task_state.get(task_id)
        if not state or not state["last_event"]:
            return
        await self._send_event(task_id, state["last_event"])
        state["last_send_time"] = time.time()
        state["last_sent"] = state["last_event"]
        state["last_event"] = None
        state["scheduled_handle"] = None
    async def _send_event(self, task_id, task):
        # Compose event_data for this task
        event_data = {
            "tasks": [task],
            "current_timestamp": time.time(),
            "change_type": "update",
        }
        enhanced_event_data = add_global_task_counts_to_event(event_data.copy())
        # Add global task counts right before broadcasting - this is the single source of truth
        # Skip expensive count recomputation for high-frequency callback/progress updates
        try:
            trigger_reason = event_data.get("trigger_reason")
        except Exception:
            trigger_reason = None
        if trigger_reason and trigger_reason in {"callback_update", "progress_update"}:
            enhanced_event_data = event_data.copy()
        else:
            enhanced_event_data = add_global_task_counts_to_event(event_data.copy())
        event_json = json.dumps(enhanced_event_data)
        sse_data = f"data: {event_json}\n\n"
        logger.debug(
            f"SSE Broadcaster: Broadcasting event: {enhanced_event_data.get('change_type', 'unknown')} with {enhanced_event_data.get('active_tasks', 0)} active tasks"
        )
        # Send to all clients, remove disconnected ones
        disconnected = set()
        sent_count = 0
        for client_queue in self.clients.copy():
            try:
                await client_queue.put(sse_data)
                sent_count += 1
                logger.debug("SSE: Successfully sent to client queue")
            except Exception as e:
                logger.error(f"SSE: Failed to send to client: {e}")
                disconnected.add(client_queue)
        # Clean up disconnected clients
        for client in disconnected:
            self.clients.discard(client)
        logger.debug(
-            f"SSE Broadcaster: Successfully sent to {sent_count} clients, removed {len(disconnected)} disconnected clients"
+            f"SSE Broadcaster: Sent throttled/batched event for task {task_id} to {sent_count} clients"
        )
    def _events_equal(self, a, b):
        # Compare two task dicts for deduplication (ignore timestamps)
        if not isinstance(a, dict) or not isinstance(b, dict):
            return False
        a_copy = dict(a)
        b_copy = dict(b)
        a_copy.pop("timestamp", None)
        b_copy.pop("timestamp", None)
        return a_copy == b_copy
 # Global broadcaster instance
 sse_broadcaster = SSEBroadcaster()
--- a/routes/utils/celery_config.py
+++ b/routes/utils/celery_config.py
@@ -54,6 +54,7 @@ DEFAULT_MAIN_CONFIG = {
    "watch": {},
    "realTimeMultiplier": 0,
    "padNumberWidth": 3,
    "sseUpdateIntervalSeconds": 1,  # Configurable SSE update interval (default: 1s)
 }
@@ -190,7 +191,7 @@ task_annotations = {
        "rate_limit": f"{MAX_CONCURRENT_DL}/m",
    },
    "routes.utils.celery_tasks.trigger_sse_update_task": {
-        "rate_limit": "500/m",  # Allow high rate for real-time SSE updates
+        "rate_limit": "60/m",  # Throttle to 1 update/sec per task (matches SSE throttle)
        "default_retry_delay": 1,  # Quick retry for SSE updates
        "max_retries": 1,  # Limited retries for best-effort delivery
        "ignore_result": True,  # Don't store results for SSE tasks