Merge branch 'dev' into main
This commit is contained in:
@@ -83,6 +83,89 @@ def get_config_params():
|
||||
}
|
||||
|
||||
|
||||
def get_existing_task_id(url, download_type=None):
|
||||
"""
|
||||
Check if an active task with the same URL (and optionally, type) already exists.
|
||||
This function ignores tasks that are in a terminal state (e.g., completed, cancelled, or failed).
|
||||
|
||||
Args:
|
||||
url (str): The URL to check for duplicates.
|
||||
download_type (str, optional): The type of download to check. Defaults to None.
|
||||
|
||||
Returns:
|
||||
str | None: The task ID of the existing active task, or None if no active duplicate is found.
|
||||
"""
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Checking for URL='{url}', type='{download_type}'")
|
||||
if not url:
|
||||
logger.debug("GET_EXISTING_TASK_ID: No URL provided, returning None.")
|
||||
return None
|
||||
|
||||
# Define terminal states. Tasks in these states are considered inactive and will be ignored.
|
||||
TERMINAL_STATES = {
|
||||
ProgressState.COMPLETE,
|
||||
ProgressState.DONE,
|
||||
ProgressState.CANCELLED,
|
||||
ProgressState.ERROR,
|
||||
ProgressState.ERROR_RETRIED,
|
||||
ProgressState.ERROR_AUTO_CLEANED,
|
||||
}
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Terminal states defined as: {TERMINAL_STATES}")
|
||||
|
||||
all_existing_tasks_summary = get_all_tasks() # This function already filters by default based on its own TERMINAL_STATES
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Found {len(all_existing_tasks_summary)} tasks from get_all_tasks(). Iterating...")
|
||||
|
||||
for task_summary in all_existing_tasks_summary:
|
||||
existing_task_id = task_summary.get("task_id")
|
||||
if not existing_task_id:
|
||||
logger.debug("GET_EXISTING_TASK_ID: Skipping summary with no task_id.")
|
||||
continue
|
||||
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Processing existing task_id='{existing_task_id}' from summary.")
|
||||
|
||||
# First, check the status of the task directly from its latest status record.
|
||||
# get_all_tasks() might have its own view of terminal, but we re-check here for absolute certainty.
|
||||
existing_last_status_obj = get_last_task_status(existing_task_id)
|
||||
if not existing_last_status_obj:
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: No last status object for task_id='{existing_task_id}'. Skipping.")
|
||||
continue
|
||||
|
||||
existing_status = existing_last_status_obj.get("status")
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Task_id='{existing_task_id}', last_status_obj='{existing_last_status_obj}', extracted status='{existing_status}'.")
|
||||
|
||||
# If the task is in a terminal state, ignore it and move to the next one.
|
||||
if existing_status in TERMINAL_STATES:
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Task_id='{existing_task_id}' has terminal status='{existing_status}'. Skipping.")
|
||||
continue
|
||||
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Task_id='{existing_task_id}' has ACTIVE status='{existing_status}'. Proceeding to check URL/type.")
|
||||
|
||||
# If the task is active, then check if its URL and type match.
|
||||
existing_task_info = get_task_info(existing_task_id)
|
||||
if not existing_task_info:
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: No task info for active task_id='{existing_task_id}'. Skipping.")
|
||||
continue
|
||||
|
||||
existing_url = existing_task_info.get("url")
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Task_id='{existing_task_id}', info_url='{existing_url}'. Comparing with target_url='{url}'.")
|
||||
if existing_url != url:
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Task_id='{existing_task_id}' URL mismatch. Skipping.")
|
||||
continue
|
||||
|
||||
if download_type:
|
||||
existing_type = existing_task_info.get("download_type")
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Task_id='{existing_task_id}', info_type='{existing_type}'. Comparing with target_type='{download_type}'.")
|
||||
if existing_type != download_type:
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: Task_id='{existing_task_id}' type mismatch. Skipping.")
|
||||
continue
|
||||
|
||||
# Found an active task that matches the criteria.
|
||||
logger.info(f"GET_EXISTING_TASK_ID: Found ACTIVE duplicate: task_id='{existing_task_id}' for URL='{url}', type='{download_type}'. Returning this ID.")
|
||||
return existing_task_id
|
||||
|
||||
logger.debug(f"GET_EXISTING_TASK_ID: No active duplicate found for URL='{url}', type='{download_type}'. Returning None.")
|
||||
return None
|
||||
|
||||
|
||||
class CeleryDownloadQueueManager:
|
||||
"""
|
||||
Manages a queue of download tasks using Celery.
|
||||
@@ -125,14 +208,14 @@ class CeleryDownloadQueueManager:
|
||||
"Task being added with no URL. Duplicate check might be unreliable."
|
||||
)
|
||||
|
||||
NON_BLOCKING_STATES = [
|
||||
TERMINAL_STATES = { # Renamed and converted to a set for consistency
|
||||
ProgressState.COMPLETE,
|
||||
ProgressState.DONE,
|
||||
ProgressState.CANCELLED,
|
||||
ProgressState.ERROR,
|
||||
ProgressState.ERROR_RETRIED,
|
||||
ProgressState.ERROR_AUTO_CLEANED,
|
||||
]
|
||||
}
|
||||
|
||||
all_existing_tasks_summary = get_all_tasks()
|
||||
if incoming_url:
|
||||
@@ -155,7 +238,7 @@ class CeleryDownloadQueueManager:
|
||||
if (
|
||||
existing_url == incoming_url
|
||||
and existing_type == incoming_type
|
||||
and existing_status not in NON_BLOCKING_STATES
|
||||
and existing_status not in TERMINAL_STATES
|
||||
):
|
||||
message = f"Duplicate download: URL '{incoming_url}' (type: {incoming_type}) is already being processed by task {existing_task_id} (status: {existing_status})."
|
||||
logger.warning(message)
|
||||
|
||||
Reference in New Issue
Block a user