refactor to await results

valentijnscholten · valentijnscholten · commit cb2d0e3a93d0 · 2025-09-13T23:27:30.000+02:00
diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py
@@ -365,7 +365,8 @@ def post_process_finding_save_signature(finding, dedupe_option=True, rules_optio
              issue_updater_option=True, push_to_jira=False, user=None, *args, **kwargs):  # noqa: FBT002 - this is bit hard to fix nice have this universally fixed
     """
     Returns a task signature for post-processing a finding. This is useful for creating task signatures
-    that can be used in chords or groups.
+    that can be used in chords or groups or to await results. We need this extra method because of our dojo_async decorator.
+    If we use more of these celery features, we should probably move away from that decorator.
     """
     return post_process_finding_save_internal(finding, dedupe_option, rules_option, product_grading_option,
                                    issue_updater_option, push_to_jira, user, *args, **kwargs)
diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
@@ -7,6 +7,7 @@
 
 import dojo.jira_link.helper as jira_helper
 from dojo.decorators import we_want_async
+from dojo.finding import helper as finding_helper
 from dojo.importers.base_importer import BaseImporter, Parser
 from dojo.importers.options import ImporterOptions
 from dojo.models import (
@@ -16,6 +17,8 @@
     Test_Import,
 )
 from dojo.notifications.helper import create_notification
+from dojo.tasks import wait_for_tasks_and_calculate_grade
+from dojo.utils import calculate_grade
 from dojo.validators import clean_tags
 
 logger = logging.getLogger(__name__)
@@ -155,11 +158,7 @@ def process_findings(
         parsed_findings: list[Finding],
         **kwargs: dict,
     ) -> list[Finding]:
-        from celery import chord
-
-        from dojo.finding import helper as finding_helper
-        from dojo.utils import calculate_grade, calculate_grade_signature
-        post_processing_task_signatures = []
+        async_task_ids = []
 
         """
         Saves findings in memory that were parsed from the scan report into the database.
@@ -189,7 +188,7 @@ def process_findings(
             unsaved_finding.reporter = self.user
             unsaved_finding.last_reviewed_by = self.user
             unsaved_finding.last_reviewed = self.now
-            logger.debug("process_parsed_findings: unique_id_from_tool: %s, hash_code: %s, active from report: %s, verified from report: %s", unsaved_finding.unique_id_from_tool, unsaved_finding.hash_code, unsaved_finding.active, unsaved_finding.verified)
+            logger.debug("process_parsed_finding: unique_id_from_tool: %s, hash_code: %s, active from report: %s, verified from report: %s", unsaved_finding.unique_id_from_tool, unsaved_finding.hash_code, unsaved_finding.active, unsaved_finding.verified)
             # indicates an override. Otherwise, do not change the value of unsaved_finding.active
             if self.active is not None:
                 unsaved_finding.active = self.active
@@ -238,20 +237,25 @@ def process_findings(
             new_findings.append(finding)
             # all data is already saved on the finding, we only need to trigger post processing
 
-            # Collect finding for parallel processing - we'll process them all at once after the loop
+            # We create a signature for the post processing task so we can decide to apply it async or sync
             push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by)
-            # Always create signatures - we'll execute them sync or async later
-            post_processing_task_signatures.append(
-                finding_helper.post_process_finding_save_signature(
-                    finding,
-                    dedupe_option=True,
-                    rules_option=True,
-                    product_grading_option=False,
-                    issue_updater_option=True,
-                    push_to_jira=push_to_jira,
-                ),
+            post_processing_task_signature = finding_helper.post_process_finding_save_signature(
+                finding,
+                dedupe_option=True,
+                rules_option=True,
+                product_grading_option=False,
+                issue_updater_option=True,
+                push_to_jira=push_to_jira,
             )
 
+            # We need to call apply_async to get the result of the task so we can collect the task ID
+            if we_want_async(async_user=self.user):
+                result = post_processing_task_signature.apply_async()
+                async_task_ids.append(result.id)
+            else:
+                # Execute task immediately for synchronous processing
+                post_processing_task_signature()
+
         for (group_name, findings) in group_names_to_findings_dict.items():
             finding_helper.add_findings_to_auto_group(
                 group_name,
@@ -268,17 +272,12 @@ def process_findings(
 
         # Calculate product grade after all findings are processed
         product = self.test.engagement.product
-        if post_processing_task_signatures:
-            # If we have async tasks, use chord to wait for them before calculating grade
-            if we_want_async(async_user=self.user):
-                # Run the chord asynchronously and after completing post processing tasks, calculate grade ONCE
-                chord(post_processing_task_signatures)(calculate_grade_signature(product))
-            else:
-                # Execute each task synchronously
-                for task_sig in post_processing_task_signatures:
-                    task_sig()
 
-        # Calculate grade, which can be prelimary calculated before the async tasks have finished
+        if we_want_async(async_user=self.user) and async_task_ids:
+            # Tasks were executed immediately during processing, now coordinate final grade calculation
+            wait_for_tasks_and_calculate_grade.delay(async_task_ids, product.id)
+
+        # Synchronous tasks were already executed during processing, just calculate grade
         calculate_grade(product)
 
         sync = kwargs.get("sync", True)
diff --git a/dojo/tasks.py b/dojo/tasks.py
@@ -2,6 +2,7 @@
 from datetime import date, timedelta
 
 from auditlog.models import LogEntry
+from celery.result import AsyncResult
 from celery.utils.log import get_task_logger
 from dateutil.relativedelta import relativedelta
 from django.conf import settings
@@ -192,6 +193,36 @@ def fix_loop_duplicates_task(*args, **kwargs):
     return fix_loop_duplicates()
 
 
+@app.task
+def wait_for_tasks_and_calculate_grade(task_ids, product_id, *args, **kwargs):
+    """
+    Wait for all specified tasks to complete, then calculate product grade.
+    This provides coordination for immediate task execution without using chord.
+    """
+    logger.info(f"Waiting for {len(task_ids)} tasks to complete before calculating grade for product {product_id}")
+
+    # Wait for all tasks to complete
+    results = [AsyncResult(task_id) for task_id in task_ids]
+
+    # This will block until all tasks are done
+    for result in results:
+        try:
+            result.get(timeout=300)  # 5 minute timeout per task
+        except Exception as e:
+            logger.warning(f"Task {result.id} failed: {e}")
+            # Continue waiting for other tasks even if one fails
+
+    # All tasks completed, now calculate grade
+    try:
+        product = Product.objects.get(id=product_id)
+        logger.info(f"All post-processing tasks completed, calculating grade for product {product.name}")
+        calculate_grade(product)
+    except Product.DoesNotExist:
+        logger.error(f"Product {product_id} not found for grade calculation")
+    except Exception as e:
+        logger.error(f"Error calculating grade for product {product_id}: {e}")
+
+
 @app.task
 def evaluate_pro_proposition(*args, **kwargs):
     # Ensure we should be doing this