From a1ee613abcd562bf03be0ea2345c70ead373ef49 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 26 Jul 2025 23:56:38 +0200 Subject: [PATCH 01/53] test cases: fix caching of system settings --- dojo/middleware.py | 5 +++++ unittests/dojo_test_case.py | 6 +++-- unittests/test_importers_performance.py | 30 ++++++++++++------------- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/dojo/middleware.py b/dojo/middleware.py index ffae7dd9432..29e17a0104c 100644 --- a/dojo/middleware.py +++ b/dojo/middleware.py @@ -111,6 +111,11 @@ def load(cls): cls._thread_local.system_settings = system_settings return system_settings + @classmethod + def initialize_for_testing(cls, system_settings): + """Initialize system settings for test scenarios where middleware may not be processed normally""" + cls._thread_local.system_settings = system_settings + class System_Settings_Manager(models.Manager): diff --git a/unittests/dojo_test_case.py b/unittests/dojo_test_case.py index 5be0e1a5e3e..23fc18748a9 100644 --- a/unittests/dojo_test_case.py +++ b/unittests/dojo_test_case.py @@ -100,6 +100,7 @@ def get_test_admin(self, *args, **kwargs): def system_settings(self, **kwargs): ss = System_Settings.objects.get() + # only modify the any setting provided as kwargs for key, value in kwargs.items(): setattr(ss, key, value) ss.save() @@ -484,8 +485,9 @@ def __init__(self, *args, **kwargs): def setUp(self): super().setUp() - # Initialize middleware with fresh settings from db - DojoSytemSettingsMiddleware.load() + from dojo.middleware import DojoSytemSettingsMiddleware + from dojo.models import System_Settings + DojoSytemSettingsMiddleware.initialize_for_testing(System_Settings.objects.get()) def common_check_finding(self, finding): self.assertIn(finding.severity, SEVERITIES) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index a629b7f97c3..5fb2fd23051 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,11 +176,11 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=682, - expected_num_async_tasks1=10, - expected_num_queries2=610, - expected_num_async_tasks2=22, - expected_num_queries3=292, + expected_num_queries1=554, + expected_num_async_tasks1=15, + expected_num_queries2=469, + expected_num_async_tasks2=23, + expected_num_queries3=332, expected_num_async_tasks3=20, ) @@ -198,11 +198,11 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=682, - expected_num_async_tasks1=10, - expected_num_queries2=615, - expected_num_async_tasks2=22, - expected_num_queries3=297, + expected_num_queries1=554, + expected_num_async_tasks1=15, + expected_num_queries2=469, + expected_num_async_tasks2=23, + expected_num_queries3=332, expected_num_async_tasks3=20, ) @@ -222,10 +222,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=702, - expected_num_async_tasks1=15, - expected_num_queries2=645, - expected_num_async_tasks2=28, - expected_num_queries3=322, + expected_num_queries1=594, + expected_num_async_tasks1=25, + expected_num_queries2=503, + expected_num_async_tasks2=30, + expected_num_queries3=357, expected_num_async_tasks3=25, ) From 811f37ad1f63e63ab9a77605d37a91b5f978d7bd Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 09:18:41 +0200 Subject: [PATCH 02/53] fix tests --- dojo/middleware.py | 2 ++ unittests/dojo_test_case.py | 3 --- unittests/test_importers_performance.py | 12 ++++++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/dojo/middleware.py b/dojo/middleware.py index 29e17a0104c..7103a6c6c90 100644 --- a/dojo/middleware.py +++ b/dojo/middleware.py @@ -114,6 +114,8 @@ def load(cls): @classmethod def initialize_for_testing(cls, system_settings): """Initialize system settings for test scenarios where middleware may not be processed normally""" + # cleanup any existing settings first to ensure fresh state + cls.cleanup() cls._thread_local.system_settings = system_settings diff --git a/unittests/dojo_test_case.py b/unittests/dojo_test_case.py index 23fc18748a9..ae763809502 100644 --- a/unittests/dojo_test_case.py +++ b/unittests/dojo_test_case.py @@ -485,9 +485,6 @@ def __init__(self, *args, **kwargs): def setUp(self): super().setUp() - from dojo.middleware import DojoSytemSettingsMiddleware - from dojo.models import System_Settings - DojoSytemSettingsMiddleware.initialize_for_testing(System_Settings.objects.get()) def common_check_finding(self, finding): self.assertIn(finding.severity, SEVERITIES) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 5fb2fd23051..e19227af8d9 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -8,6 +8,7 @@ from dojo.decorators import dojo_async_task_counter from dojo.importers.default_importer import DefaultImporter from dojo.importers.default_reimporter import DefaultReImporter +from dojo.middleware import DojoSytemSettingsMiddleware from dojo.models import ( Development_Environment, Dojo_User, @@ -17,6 +18,7 @@ Finding, Product, Product_Type, + System_Settings, Test, User, UserContactInfo, @@ -48,6 +50,16 @@ def setUp(self): self.system_settings(enable_product_grade=False) self.system_settings(enable_github=False) + # # Configure system settings directly + # from dojo.middleware import DojoSytemSettingsMiddleware + # from dojo.models import System_Settings + # system_settings = System_Settings.objects.get() + # system_settings.enable_product_tag_inheritance = True + # system_settings.save() + + # Initialize middleware with modified settings + DojoSytemSettingsMiddleware.initialize_for_testing(System_Settings.objects.get()) + # Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation # As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count. # But if we run the test in isolation, the ContentType ids will not be cached and will result in more queries. From a9b04a3496983d781ee39c87db34abc226b92cae Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 09:48:44 +0200 Subject: [PATCH 03/53] fix caching for github --- unittests/test_importers_performance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index e19227af8d9..afc294a9310 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -234,7 +234,7 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=594, + expected_num_queries1=59444, expected_num_async_tasks1=25, expected_num_queries2=503, expected_num_async_tasks2=30, From 5ee9aeb8ad2ab99833d8dd984f6b0a56a9155f24 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 09:53:25 +0200 Subject: [PATCH 04/53] fix caching for github --- unittests/test_importers_performance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index afc294a9310..e19227af8d9 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -234,7 +234,7 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=59444, + expected_num_queries1=594, expected_num_async_tasks1=25, expected_num_queries2=503, expected_num_async_tasks2=30, From 63badef81782a3fe35299b1ec4cb2cd3ef338f18 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 10:13:42 +0200 Subject: [PATCH 05/53] simplify cache loading --- dojo/middleware.py | 7 ------- unittests/dojo_test_case.py | 3 ++- unittests/test_importers_performance.py | 4 +--- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/dojo/middleware.py b/dojo/middleware.py index 7103a6c6c90..ffae7dd9432 100644 --- a/dojo/middleware.py +++ b/dojo/middleware.py @@ -111,13 +111,6 @@ def load(cls): cls._thread_local.system_settings = system_settings return system_settings - @classmethod - def initialize_for_testing(cls, system_settings): - """Initialize system settings for test scenarios where middleware may not be processed normally""" - # cleanup any existing settings first to ensure fresh state - cls.cleanup() - cls._thread_local.system_settings = system_settings - class System_Settings_Manager(models.Manager): diff --git a/unittests/dojo_test_case.py b/unittests/dojo_test_case.py index ae763809502..5be0e1a5e3e 100644 --- a/unittests/dojo_test_case.py +++ b/unittests/dojo_test_case.py @@ -100,7 +100,6 @@ def get_test_admin(self, *args, **kwargs): def system_settings(self, **kwargs): ss = System_Settings.objects.get() - # only modify the any setting provided as kwargs for key, value in kwargs.items(): setattr(ss, key, value) ss.save() @@ -485,6 +484,8 @@ def __init__(self, *args, **kwargs): def setUp(self): super().setUp() + # Initialize middleware with fresh settings from db + DojoSytemSettingsMiddleware.load() def common_check_finding(self, finding): self.assertIn(finding.severity, SEVERITIES) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index e19227af8d9..429d31c0507 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -8,7 +8,6 @@ from dojo.decorators import dojo_async_task_counter from dojo.importers.default_importer import DefaultImporter from dojo.importers.default_reimporter import DefaultReImporter -from dojo.middleware import DojoSytemSettingsMiddleware from dojo.models import ( Development_Environment, Dojo_User, @@ -18,7 +17,6 @@ Finding, Product, Product_Type, - System_Settings, Test, User, UserContactInfo, @@ -58,7 +56,7 @@ def setUp(self): # system_settings.save() # Initialize middleware with modified settings - DojoSytemSettingsMiddleware.initialize_for_testing(System_Settings.objects.get()) + # DojoSytemSettingsMiddleware.initialize_for_testing(System_Settings.objects.get()) # Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation # As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count. From e99e7d48f3d7d2f2bd08a9bce28738215b256d08 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 10:32:01 +0200 Subject: [PATCH 06/53] post process only when needed --- unittests/test_importers_performance.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 429d31c0507..8669c9e5639 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -48,16 +48,6 @@ def setUp(self): self.system_settings(enable_product_grade=False) self.system_settings(enable_github=False) - # # Configure system settings directly - # from dojo.middleware import DojoSytemSettingsMiddleware - # from dojo.models import System_Settings - # system_settings = System_Settings.objects.get() - # system_settings.enable_product_tag_inheritance = True - # system_settings.save() - - # Initialize middleware with modified settings - # DojoSytemSettingsMiddleware.initialize_for_testing(System_Settings.objects.get()) - # Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation # As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count. # But if we run the test in isolation, the ContentType ids will not be cached and will result in more queries. @@ -187,7 +177,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( expected_num_queries1=554, - expected_num_async_tasks1=15, + expected_num_async_tasks1=10, expected_num_queries2=469, expected_num_async_tasks2=23, expected_num_queries3=332, @@ -209,7 +199,7 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.save() self.import_reimport_performance( expected_num_queries1=554, - expected_num_async_tasks1=15, + expected_num_async_tasks1=10, expected_num_queries2=469, expected_num_async_tasks2=23, expected_num_queries3=332, @@ -230,10 +220,13 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.system_settings(enable_product_grade=True) + # Refresh the cache with the new settings + from dojo.middleware import DojoSytemSettingsMiddleware + DojoSytemSettingsMiddleware.load() self.import_reimport_performance( - expected_num_queries1=594, - expected_num_async_tasks1=25, + expected_num_queries1=574, + expected_num_async_tasks1=15, expected_num_queries2=503, expected_num_async_tasks2=30, expected_num_queries3=357, From 348b69c6f8a86bb03c25ec893e48bc9cc9f216be Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 12:23:36 +0200 Subject: [PATCH 07/53] set tags on (re)import --- unittests/test_importers_performance.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 8669c9e5639..85169dce747 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,9 +176,9 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=554, - expected_num_async_tasks1=10, - expected_num_queries2=469, + expected_num_queries1=712, + expected_num_async_tasks1=15, + expected_num_queries2=656, expected_num_async_tasks2=23, expected_num_queries3=332, expected_num_async_tasks3=20, @@ -198,9 +198,9 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=554, - expected_num_async_tasks1=10, - expected_num_queries2=469, + expected_num_queries1=712, + expected_num_async_tasks1=15, + expected_num_queries2=656, expected_num_async_tasks2=23, expected_num_queries3=332, expected_num_async_tasks3=20, @@ -225,9 +225,9 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self DojoSytemSettingsMiddleware.load() self.import_reimport_performance( - expected_num_queries1=574, - expected_num_async_tasks1=15, - expected_num_queries2=503, + expected_num_queries1=752, + expected_num_async_tasks1=25, + expected_num_queries2=690, expected_num_async_tasks2=30, expected_num_queries3=357, expected_num_async_tasks3=25, From 7a9326d107f32c0e9c89bd33bec8aacf3378c090 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 12:27:04 +0200 Subject: [PATCH 08/53] rebase set tags --- unittests/test_importers_performance.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 85169dce747..eba8ac6014d 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -177,7 +177,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( expected_num_queries1=712, - expected_num_async_tasks1=15, + expected_num_async_tasks1=10, expected_num_queries2=656, expected_num_async_tasks2=23, expected_num_queries3=332, @@ -199,7 +199,7 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.save() self.import_reimport_performance( expected_num_queries1=712, - expected_num_async_tasks1=15, + expected_num_async_tasks1=10, expected_num_queries2=656, expected_num_async_tasks2=23, expected_num_queries3=332, @@ -225,8 +225,8 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self DojoSytemSettingsMiddleware.load() self.import_reimport_performance( - expected_num_queries1=752, - expected_num_async_tasks1=25, + expected_num_queries1=732, + expected_num_async_tasks1=15, expected_num_queries2=690, expected_num_async_tasks2=30, expected_num_queries3=357, From d9c4cc495d6c0aa473952bdd138cb05868b62185 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 12:31:39 +0200 Subject: [PATCH 09/53] reduce save with options --- unittests/test_importers_performance.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index eba8ac6014d..82691b73cf1 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -179,7 +179,7 @@ def test_import_reimport_reimport_performance_async(self): expected_num_queries1=712, expected_num_async_tasks1=10, expected_num_queries2=656, - expected_num_async_tasks2=23, + expected_num_async_tasks2=22, expected_num_queries3=332, expected_num_async_tasks3=20, ) @@ -201,7 +201,7 @@ def test_import_reimport_reimport_performance_no_async(self): expected_num_queries1=712, expected_num_async_tasks1=10, expected_num_queries2=656, - expected_num_async_tasks2=23, + expected_num_async_tasks2=22, expected_num_queries3=332, expected_num_async_tasks3=20, ) @@ -228,7 +228,7 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self expected_num_queries1=732, expected_num_async_tasks1=15, expected_num_queries2=690, - expected_num_async_tasks2=30, + expected_num_async_tasks2=28, expected_num_queries3=357, expected_num_async_tasks3=25, ) From e717d8fd2748c48e5e51887cbc1937235c20104d Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 13:04:59 +0200 Subject: [PATCH 10/53] update counts, reduce saves with options --- unittests/test_importers_performance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 82691b73cf1..95270388fe1 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -227,7 +227,7 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.import_reimport_performance( expected_num_queries1=732, expected_num_async_tasks1=15, - expected_num_queries2=690, + expected_num_queries2=686, expected_num_async_tasks2=28, expected_num_queries3=357, expected_num_async_tasks3=25, From eda6959231127d5bf4dc8ee6324cb23f786c16eb Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 13:34:16 +0200 Subject: [PATCH 11/53] importers: do not save again, but postprocess directly --- dojo/importers/default_importer.py | 5 ++++- dojo/importers/default_reimporter.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index ed4106971b7..90358cc0996 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -223,7 +223,10 @@ def process_findings( finding = self.process_vulnerability_ids(finding) # Categorize this finding as a new one new_findings.append(finding) - # all data is already saved on the finding, we only need to trigger post processing + # by this time the finding has been processed and saved to the database. + # since the save above no changes have been made to the finding, only to related objects such as endpoints. + # we don't have to save the finding again and can trigger postprocessing directly + # this saves a database UDPATE which is costly (and may trigger extra processing via signals such as audit logging) # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 455730a647b..5c2f22abba9 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -236,7 +236,10 @@ def process_findings( finding, unsaved_finding, ) - # all data is already saved on the finding, we only need to trigger post processing + # by this time the finding has been processed and saved to the database. + # since the save above no changes have been made to the finding, only to related objects such as endpoints. + # we don't have to save the finding again and can trigger postprocessing directly + # this saves a database UDPATE which is costly (and may trigger extra processing via signals such as audit logging) # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) From 395ac9e30a0ad70048bff11b370bb1fe4173e842 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 13:46:42 +0200 Subject: [PATCH 12/53] update counts --- unittests/test_importers_performance.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 95270388fe1..6f28fbe9e6f 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,11 +176,11 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=712, + expected_num_queries1=682, expected_num_async_tasks1=10, - expected_num_queries2=656, - expected_num_async_tasks2=22, - expected_num_queries3=332, + expected_num_queries2=700, + expected_num_async_tasks2=12, + expected_num_queries3=292, expected_num_async_tasks3=20, ) @@ -198,11 +198,11 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=712, + expected_num_queries1=682, expected_num_async_tasks1=10, - expected_num_queries2=656, - expected_num_async_tasks2=22, - expected_num_queries3=332, + expected_num_queries2=700, + expected_num_async_tasks2=12, + expected_num_queries3=297, expected_num_async_tasks3=20, ) @@ -225,10 +225,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self DojoSytemSettingsMiddleware.load() self.import_reimport_performance( - expected_num_queries1=732, + expected_num_queries1=702, expected_num_async_tasks1=15, - expected_num_queries2=686, - expected_num_async_tasks2=28, - expected_num_queries3=357, + expected_num_queries2=724, + expected_num_async_tasks2=18, + expected_num_queries3=322, expected_num_async_tasks3=25, ) From 30ad08b1193c2a872fc4d1b4d7ed2a83db820e7a Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 15:51:17 +0200 Subject: [PATCH 13/53] optimize hash_code setting --- dojo/importers/default_importer.py | 6 ++---- dojo/importers/default_reimporter.py | 1 - unittests/test_importers_performance.py | 3 --- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 90358cc0996..3be3657099b 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -198,14 +198,14 @@ def process_findings( # Force parsers to use unsaved_tags (stored in below after saving) unsaved_finding.tags = None + finding = unsaved_finding finding = self.process_cve(unsaved_finding) # Calculate hash_code before saving based on unsaved_endpoints and unsaved_vulnerability_ids finding.set_hash_code(True) # postprocessing will be done after processing related fields like endpoints, vulnerability ids, etc. - unsaved_finding.save_no_options() + finding.save_no_options() - finding = unsaved_finding # Determine how the finding should be grouped self.process_finding_groups( finding, @@ -213,8 +213,6 @@ def process_findings( ) # Process any request/response pairs self.process_request_response_pairs(finding) - # Process any endpoints on the endpoint, or added on the form - self.process_endpoints(finding, self.endpoints_to_add) # Parsers must use unsaved_tags to store tags, so we can clean them finding.tags = clean_tags(finding.unsaved_tags) # Process any files diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 5c2f22abba9..59b1e6655d8 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -588,7 +588,6 @@ def process_matched_active_finding( ): existing_finding.component_name = existing_finding.component_name or component_name existing_finding.component_version = existing_finding.component_version or component_version - existing_finding.save_no_options() # Return False here to make sure further processing happens return existing_finding, False diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 6f28fbe9e6f..029fdddebc2 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -220,9 +220,6 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.system_settings(enable_product_grade=True) - # Refresh the cache with the new settings - from dojo.middleware import DojoSytemSettingsMiddleware - DojoSytemSettingsMiddleware.load() self.import_reimport_performance( expected_num_queries1=702, From 551f15359ae849341e776d56f30a2c1250f10b20 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 15:53:54 +0200 Subject: [PATCH 14/53] fix counts --- unittests/test_importers_performance.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 029fdddebc2..3b4ecfd2294 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,10 +176,10 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=682, + expected_num_queries1=281, expected_num_async_tasks1=10, - expected_num_queries2=700, - expected_num_async_tasks2=12, + expected_num_queries2=816, + expected_num_async_tasks2=22, expected_num_queries3=292, expected_num_async_tasks3=20, ) @@ -198,10 +198,10 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=682, + expected_num_queries1=281, expected_num_async_tasks1=10, - expected_num_queries2=700, - expected_num_async_tasks2=12, + expected_num_queries2=821, + expected_num_async_tasks2=22, expected_num_queries3=297, expected_num_async_tasks3=20, ) @@ -222,10 +222,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=702, - expected_num_async_tasks1=15, - expected_num_queries2=724, - expected_num_async_tasks2=18, + expected_num_queries1=321, + expected_num_async_tasks1=20, + expected_num_queries2=851, + expected_num_async_tasks2=28, expected_num_queries3=322, expected_num_async_tasks3=25, ) From a285f328e1c014d269e7487851126caa5d858a90 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 27 Jul 2025 15:59:32 +0200 Subject: [PATCH 15/53] set hash code for new findings in reimport --- dojo/importers/default_importer.py | 8 ++++++++ dojo/importers/default_reimporter.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 3be3657099b..c32ca4e5757 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -225,6 +225,14 @@ def process_findings( # since the save above no changes have been made to the finding, only to related objects such as endpoints. # we don't have to save the finding again and can trigger postprocessing directly # this saves a database UDPATE which is costly (and may trigger extra processing via signals such as audit logging) + # all data is already saved on the finding, we only need to generate as store the hash_code + # this is an optimization to avoid a full UDPATE statement of the finding which is a quite a big object with lots of fields + # after that we tirgger the post processing directly + # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() + # this would do a full UDPATE statement for the finding + + finding.set_hash_code(True) + finding.save(update_fields=["hash_code"]) # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 59b1e6655d8..8bc01406b0d 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -236,10 +236,13 @@ def process_findings( finding, unsaved_finding, ) - # by this time the finding has been processed and saved to the database. - # since the save above no changes have been made to the finding, only to related objects such as endpoints. - # we don't have to save the finding again and can trigger postprocessing directly - # this saves a database UDPATE which is costly (and may trigger extra processing via signals such as audit logging) + # all data is already saved on the finding, we only need to generate as store the hash_code + # this is an optimization to avoid a full UDPATE statement of the finding which is a quite a big object with lots of fields + # after that we tirgger the post processing directly + # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() + # this would do a full UDPATE statement for the finding + finding.set_hash_code(True) + finding.save(update_fields=["hash_code"]) # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) @@ -588,6 +591,7 @@ def process_matched_active_finding( ): existing_finding.component_name = existing_finding.component_name or component_name existing_finding.component_version = existing_finding.component_version or component_version + existing_finding.save_no_options() # Return False here to make sure further processing happens return existing_finding, False From 77670898b86956ed10480727d3e2af2258846539 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 18:22:05 +0200 Subject: [PATCH 16/53] make smaller second save work --- dojo/importers/default_importer.py | 8 +++++++- dojo/importers/default_reimporter.py | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index c32ca4e5757..40f8b286faf 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -198,6 +198,10 @@ def process_findings( # Force parsers to use unsaved_tags (stored in below after saving) unsaved_finding.tags = None + finding = self.process_cve(unsaved_finding) + # postprocessing will be done after processing related fields like endpoints, vulnerability ids, etc. + unsaved_finding.save_no_options() + finding = unsaved_finding finding = self.process_cve(unsaved_finding) # Calculate hash_code before saving based on unsaved_endpoints and unsaved_vulnerability_ids @@ -213,6 +217,8 @@ def process_findings( ) # Process any request/response pairs self.process_request_response_pairs(finding) + # Process any endpoints on the endpoint, or added on the form + self.process_endpoints(finding, self.endpoints_to_add) # Parsers must use unsaved_tags to store tags, so we can clean them finding.tags = clean_tags(finding.unsaved_tags) # Process any files @@ -232,7 +238,7 @@ def process_findings( # this would do a full UDPATE statement for the finding finding.set_hash_code(True) - finding.save(update_fields=["hash_code"]) + finding.save(update_fields=["hash_code", "cve"]) # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 8bc01406b0d..0a3f4ca0343 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -242,7 +242,7 @@ def process_findings( # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() # this would do a full UDPATE statement for the finding finding.set_hash_code(True) - finding.save(update_fields=["hash_code"]) + finding.save(update_fields=["hash_code", "cve"]) # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) @@ -616,6 +616,7 @@ def process_finding_that_was_not_matched( unsaved_finding = self.process_cve(unsaved_finding) # Hash code is already calculated earlier as it's the primary matching criteria for reimport # Save it. Don't dedupe before endpoints are added. + unsaved_finding = self.process_cve(unsaved_finding) unsaved_finding.save_no_options() finding = unsaved_finding # Force parsers to use unsaved_tags (stored in finding_post_processing function below) From 8466ed8109695b1fa61968f2bdff25d9cd4d9345 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 19:19:48 +0200 Subject: [PATCH 17/53] make smaller second save work - add no_options --- dojo/importers/default_importer.py | 3 ++- dojo/importers/default_reimporter.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 40f8b286faf..44836b727d3 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -237,8 +237,9 @@ def process_findings( # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() # this would do a full UDPATE statement for the finding + logger.debug("setting hash_code and cve for finding %s", finding.id) finding.set_hash_code(True) - finding.save(update_fields=["hash_code", "cve"]) + finding.save_no_options(update_fields=["hash_code"]) # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 0a3f4ca0343..90315ea90f7 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -242,7 +242,7 @@ def process_findings( # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() # this would do a full UDPATE statement for the finding finding.set_hash_code(True) - finding.save(update_fields=["hash_code", "cve"]) + finding.save_no_options(update_fields=["hash_code"]) # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) From aec055fbe52e196b417ead1116a1f8553818c6ee Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 19:27:03 +0200 Subject: [PATCH 18/53] update query counts --- unittests/test_importers_performance.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 3b4ecfd2294..dbb611b1356 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,11 +176,11 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=281, + expected_num_queries1=712, expected_num_async_tasks1=10, - expected_num_queries2=816, + expected_num_queries2=655, expected_num_async_tasks2=22, - expected_num_queries3=292, + expected_num_queries3=332, expected_num_async_tasks3=20, ) @@ -198,11 +198,11 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=281, + expected_num_queries1=712, expected_num_async_tasks1=10, - expected_num_queries2=821, + expected_num_queries2=655, expected_num_async_tasks2=22, - expected_num_queries3=297, + expected_num_queries3=332, expected_num_async_tasks3=20, ) @@ -222,10 +222,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=321, - expected_num_async_tasks1=20, - expected_num_queries2=851, + expected_num_queries1=732, + expected_num_async_tasks1=15, + expected_num_queries2=685, expected_num_async_tasks2=28, - expected_num_queries3=322, + expected_num_queries3=357, expected_num_async_tasks3=25, ) From 7029cce9d009b73fb084dee01060a1c4bd186ebb Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 20:45:09 +0200 Subject: [PATCH 19/53] update counts --- unittests/test_importers_performance.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index dbb611b1356..ae3190c5b8c 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -198,11 +198,11 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=712, + expected_num_queries1=682, expected_num_async_tasks1=10, expected_num_queries2=655, - expected_num_async_tasks2=22, - expected_num_queries3=332, + expected_num_async_tasks2=12, + expected_num_queries3=292, expected_num_async_tasks3=20, ) @@ -222,10 +222,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=732, - expected_num_async_tasks1=15, + expected_num_queries1=682, + expected_num_async_tasks1=10, expected_num_queries2=685, - expected_num_async_tasks2=28, - expected_num_queries3=357, - expected_num_async_tasks3=25, + expected_num_async_tasks2=12, + expected_num_queries3=292, + expected_num_async_tasks3=20, ) From 739c84476a4b1f9efdedab64d8b7a96bac932f6f Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 20:48:03 +0200 Subject: [PATCH 20/53] remove logging --- dojo/importers/default_importer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 44836b727d3..dabc516a0a7 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -237,7 +237,6 @@ def process_findings( # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() # this would do a full UDPATE statement for the finding - logger.debug("setting hash_code and cve for finding %s", finding.id) finding.set_hash_code(True) finding.save_no_options(update_fields=["hash_code"]) From f2aa5b20a8ae505eff0435c429f7f258bf3ca799 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 21:03:52 +0200 Subject: [PATCH 21/53] perf3b: compute hash_code on first save --- dojo/importers/default_importer.py | 16 ++++------------ dojo/importers/default_reimporter.py | 9 ++------- unittests/test_importers_performance.py | 16 ++++++++-------- 3 files changed, 14 insertions(+), 27 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index dabc516a0a7..249b28231d8 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -199,6 +199,9 @@ def process_findings( # Force parsers to use unsaved_tags (stored in below after saving) unsaved_finding.tags = None finding = self.process_cve(unsaved_finding) + # Calculate hash_code before saving based on unsaved_endpoints and unsaved_vulnerability_ids + finding.set_hash_code(True) + # postprocessing will be done after processing related fields like endpoints, vulnerability ids, etc. unsaved_finding.save_no_options() @@ -227,18 +230,7 @@ def process_findings( finding = self.process_vulnerability_ids(finding) # Categorize this finding as a new one new_findings.append(finding) - # by this time the finding has been processed and saved to the database. - # since the save above no changes have been made to the finding, only to related objects such as endpoints. - # we don't have to save the finding again and can trigger postprocessing directly - # this saves a database UDPATE which is costly (and may trigger extra processing via signals such as audit logging) - # all data is already saved on the finding, we only need to generate as store the hash_code - # this is an optimization to avoid a full UDPATE statement of the finding which is a quite a big object with lots of fields - # after that we tirgger the post processing directly - # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() - # this would do a full UDPATE statement for the finding - - finding.set_hash_code(True) - finding.save_no_options(update_fields=["hash_code"]) + # all data is already saved on the finding, we only need to trigger post processing # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 90315ea90f7..f8700eef5b1 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -236,13 +236,7 @@ def process_findings( finding, unsaved_finding, ) - # all data is already saved on the finding, we only need to generate as store the hash_code - # this is an optimization to avoid a full UDPATE statement of the finding which is a quite a big object with lots of fields - # after that we tirgger the post processing directly - # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() - # this would do a full UDPATE statement for the finding - finding.set_hash_code(True) - finding.save_no_options(update_fields=["hash_code"]) + # all data is already saved on the finding, we only need to trigger post processing # to avoid pushing a finding group multiple times, we push those outside of the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) @@ -617,6 +611,7 @@ def process_finding_that_was_not_matched( # Hash code is already calculated earlier as it's the primary matching criteria for reimport # Save it. Don't dedupe before endpoints are added. unsaved_finding = self.process_cve(unsaved_finding) + # Hash code is already calculated earlier as it's the primary matching criteria for reimport unsaved_finding.save_no_options() finding = unsaved_finding # Force parsers to use unsaved_tags (stored in finding_post_processing function below) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index ae3190c5b8c..1d5273ec239 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,11 +176,11 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=712, + expected_num_queries1=682, expected_num_async_tasks1=10, - expected_num_queries2=655, + expected_num_queries2=610, expected_num_async_tasks2=22, - expected_num_queries3=332, + expected_num_queries3=292, expected_num_async_tasks3=20, ) @@ -200,9 +200,9 @@ def test_import_reimport_reimport_performance_no_async(self): self.import_reimport_performance( expected_num_queries1=682, expected_num_async_tasks1=10, - expected_num_queries2=655, - expected_num_async_tasks2=12, - expected_num_queries3=292, + expected_num_queries2=615, + expected_num_async_tasks2=22, + expected_num_queries3=297, expected_num_async_tasks3=20, ) @@ -224,8 +224,8 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.import_reimport_performance( expected_num_queries1=682, expected_num_async_tasks1=10, - expected_num_queries2=685, - expected_num_async_tasks2=12, + expected_num_queries2=610, + expected_num_async_tasks2=22, expected_num_queries3=292, expected_num_async_tasks3=20, ) From 44bbb4c50e17ecc979ce5136f29a23b1545a1aa5 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 21:26:36 +0200 Subject: [PATCH 22/53] fix cve for reimport --- dojo/importers/default_reimporter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index f8700eef5b1..96513f6d243 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -612,6 +612,7 @@ def process_finding_that_was_not_matched( # Save it. Don't dedupe before endpoints are added. unsaved_finding = self.process_cve(unsaved_finding) # Hash code is already calculated earlier as it's the primary matching criteria for reimport + # Save it. Don't dedupe before endpoints are added. unsaved_finding.save_no_options() finding = unsaved_finding # Force parsers to use unsaved_tags (stored in finding_post_processing function below) From 3cb5dafc44e2193917cf30880b2ffd009ce59a36 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 21:49:01 +0200 Subject: [PATCH 23/53] ruff --- dojo/templatetags/multiply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dojo/templatetags/multiply.py b/dojo/templatetags/multiply.py index 641fa4889cc..de4b83c1c64 100644 --- a/dojo/templatetags/multiply.py +++ b/dojo/templatetags/multiply.py @@ -4,5 +4,5 @@ @register.filter -def multiply(value, arg): +def multiply(value, arg): # noqa: FURB118 return value * arg From ef6488746908c467f374bf7f99c667e4fdf47032 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 3 Aug 2025 23:15:25 +0200 Subject: [PATCH 24/53] fix no async --- unittests/test_importers_performance.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 1d5273ec239..a629b7f97c3 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -222,10 +222,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=682, - expected_num_async_tasks1=10, - expected_num_queries2=610, - expected_num_async_tasks2=22, - expected_num_queries3=292, - expected_num_async_tasks3=20, + expected_num_queries1=702, + expected_num_async_tasks1=15, + expected_num_queries2=645, + expected_num_async_tasks2=28, + expected_num_queries3=322, + expected_num_async_tasks3=25, ) From 10a82bad9007addbdc7db40f8f44104056bb4ab0 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Mon, 4 Aug 2025 20:11:11 +0200 Subject: [PATCH 25/53] Merge remote-tracking branch 'upstream/dev' into perf3-reduce-saves --- dojo/templatetags/multiply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dojo/templatetags/multiply.py b/dojo/templatetags/multiply.py index de4b83c1c64..641fa4889cc 100644 --- a/dojo/templatetags/multiply.py +++ b/dojo/templatetags/multiply.py @@ -4,5 +4,5 @@ @register.filter -def multiply(value, arg): # noqa: FURB118 +def multiply(value, arg): return value * arg From 5504c8ddd63e19057e83dd8fe1f976dcc47e6f4e Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 18:22:05 +0200 Subject: [PATCH 26/53] make smaller second save work --- dojo/importers/default_reimporter.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 96513f6d243..8cd398c2353 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -611,8 +611,6 @@ def process_finding_that_was_not_matched( # Hash code is already calculated earlier as it's the primary matching criteria for reimport # Save it. Don't dedupe before endpoints are added. unsaved_finding = self.process_cve(unsaved_finding) - # Hash code is already calculated earlier as it's the primary matching criteria for reimport - # Save it. Don't dedupe before endpoints are added. unsaved_finding.save_no_options() finding = unsaved_finding # Force parsers to use unsaved_tags (stored in finding_post_processing function below) From f48b55f47ed1c2618bdd0bec24c983c1b5dd2181 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 1 Aug 2025 21:26:36 +0200 Subject: [PATCH 27/53] fix cve for reimport --- dojo/importers/default_reimporter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 8cd398c2353..455730a647b 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -610,7 +610,6 @@ def process_finding_that_was_not_matched( unsaved_finding = self.process_cve(unsaved_finding) # Hash code is already calculated earlier as it's the primary matching criteria for reimport # Save it. Don't dedupe before endpoints are added. - unsaved_finding = self.process_cve(unsaved_finding) unsaved_finding.save_no_options() finding = unsaved_finding # Force parsers to use unsaved_tags (stored in finding_post_processing function below) From c714da8178fbfbdd2362b4695090b1284c8c8514 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 3 Aug 2025 23:10:52 +0200 Subject: [PATCH 28/53] initial --- dojo/decorators.py | 46 +++++++------ dojo/finding/helper.py | 20 ++++++ dojo/importers/default_importer.py | 50 +++++++++++++- dojo/utils.py | 14 ++++ unittests/test_importers_performance.py | 86 ++++++++++++------------- 5 files changed, 151 insertions(+), 65 deletions(-) diff --git a/dojo/decorators.py b/dojo/decorators.py index 1d1f6aac67c..7c8e11a59ea 100644 --- a/dojo/decorators.py +++ b/dojo/decorators.py @@ -79,25 +79,33 @@ def we_want_async(*args, func=None, **kwargs): # Defect Dojo performs all tasks asynchrnonously using celery # *unless* the user initiating the task has set block_execution to True in their usercontactinfo profile -def dojo_async_task(func): - @wraps(func) - def __wrapper__(*args, **kwargs): - from dojo.utils import get_current_user # noqa: PLC0415 circular import - user = get_current_user() - kwargs["async_user"] = user - - dojo_async_task_counter.incr( - func.__name__, - args=args, - kwargs=kwargs, - ) - - countdown = kwargs.pop("countdown", 0) - if we_want_async(*args, func=func, **kwargs): - return func.apply_async(args=args, kwargs=kwargs, countdown=countdown) - return func(*args, **kwargs) - - return __wrapper__ +def dojo_async_task(func=None, *, signature=False): + def decorator(func): + @wraps(func) + def __wrapper__(*args, **kwargs): + from dojo.utils import get_current_user # noqa: PLC0415 circular import + user = get_current_user() + kwargs["async_user"] = user + + dojo_async_task_counter.incr( + func.__name__, + args=args, + kwargs=kwargs, + ) + + countdown = kwargs.pop("countdown", 0) + if we_want_async(*args, func=func, **kwargs): + # Return a signature for use in chord/group if requested + if signature: + return func.si(*args, **kwargs) + # Execute the task + return func.apply_async(args=args, kwargs=kwargs, countdown=countdown) + return func(*args, **kwargs) + return __wrapper__ + + if func is None: + return decorator + return decorator(func) # decorator with parameters needs another wrapper layer diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index ad7ad916b2d..caff6049078 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -357,6 +357,19 @@ def add_findings_to_auto_group(name, findings, group_by, *, create_finding_group finding_group.findings.add(*findings) +@dojo_model_to_id +@dojo_async_task(signature=True) +@app.task +@dojo_model_from_id +def post_process_finding_save_signature(finding, dedupe_option=True, rules_option=True, product_grading_option=True, # noqa: FBT002 + issue_updater_option=True, push_to_jira=False, user=None, *args, **kwargs): # noqa: FBT002 - this is bit hard to fix nice have this universally fixed + """ + Returns a task signature for post-processing a finding. This is useful for creating task signatures + that can be used in chords or groups. + """ + return post_process_finding_save_internal(finding, dedupe_option, rules_option, product_grading_option, + issue_updater_option, push_to_jira, user, *args, **kwargs) + @dojo_model_to_id @dojo_async_task @app.task @@ -364,6 +377,13 @@ def add_findings_to_auto_group(name, findings, group_by, *, create_finding_group def post_process_finding_save(finding, dedupe_option=True, rules_option=True, product_grading_option=True, # noqa: FBT002 issue_updater_option=True, push_to_jira=False, user=None, *args, **kwargs): # noqa: FBT002 - this is bit hard to fix nice have this universally fixed + return post_process_finding_save_internal(finding, dedupe_option, rules_option, product_grading_option, + issue_updater_option, push_to_jira, user, *args, **kwargs) + + +def post_process_finding_save_internal(finding, dedupe_option=True, rules_option=True, product_grading_option=True, # noqa: FBT002 + issue_updater_option=True, push_to_jira=False, user=None, *args, **kwargs): # noqa: FBT002 - this is bit hard to fix nice have this universally fixed + if not finding: logger.warning("post_process_finding_save called with finding==None, skipping post processing") return diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 249b28231d8..eb91913f5a7 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -5,7 +5,7 @@ from django.db.models.query_utils import Q from django.urls import reverse -import dojo.finding.helper as finding_helper + import dojo.jira_link.helper as jira_helper from dojo.importers.base_importer import BaseImporter, Parser from dojo.importers.options import ImporterOptions @@ -155,6 +155,12 @@ def process_findings( parsed_findings: list[Finding], **kwargs: dict, ) -> list[Finding]: + from celery import chord + from dojo.finding import helper as finding_helper + from dojo.models import Dojo_User + from dojo.utils import calculate_grade, calculate_grade_signature + task_signatures = [] + """ Saves findings in memory that were parsed from the scan report into the database. This process involves first saving associated objects such as endpoints, files, @@ -232,9 +238,31 @@ def process_findings( new_findings.append(finding) # all data is already saved on the finding, we only need to trigger post processing - # to avoid pushing a finding group multiple times, we push those outside of the loop + # Collect finding for parallel processing - we'll process them all at once after the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) - finding_helper.post_process_finding_save(finding, dedupe_option=True, rules_option=True, product_grading_option=True, issue_updater_option=True, push_to_jira=push_to_jira) + # Process finding - either sync or async based on block_execution + if Dojo_User.wants_block_execution(self.user): + # This will run synchronously, but we still call the dojo_async decorated function to count the task + finding_helper.post_process_finding_save( + finding, + dedupe_option=True, + rules_option=True, + product_grading_option=False, + issue_updater_option=True, + push_to_jira=push_to_jira, + ) + else: + # Add to task signatures for async execution + task_signatures.append( + finding_helper.post_process_finding_save_signature( + finding, + dedupe_option=True, + rules_option=True, + product_grading_option=False, + issue_updater_option=True, + push_to_jira=push_to_jira, + ), + ) for (group_name, findings) in group_names_to_findings_dict.items(): finding_helper.add_findings_to_auto_group( @@ -250,6 +278,22 @@ def process_findings( else: jira_helper.push_to_jira(findings[0]) + # Calculate product grade after all findings are processed + product = self.test.engagement.product + if task_signatures: + # If we have async tasks, use chord to wait for them before calculating grade + if Dojo_User.wants_block_execution(self.user): + # Run the chord synchronously by passing sync=True to each task + for task_sig in task_signatures: + task_sig.apply_async(sync=True).get() + calculate_grade(product, sync=True) + else: + # Run the chord asynchronously + chord(task_signatures)(calculate_grade_signature(product)) + else: + # If everything was sync, calculate grade now as post processing is done + calculate_grade(product) + sync = kwargs.get("sync", True) if not sync: return [serialize("json", [finding]) for finding in new_findings] diff --git a/dojo/utils.py b/dojo/utils.py index a8b6b1aa86b..f33f176a329 100644 --- a/dojo/utils.py +++ b/dojo/utils.py @@ -1555,11 +1555,25 @@ def get_setting(setting): return getattr(settings, setting) +@dojo_model_to_id +@dojo_async_task(signature=True) +@app.task +@dojo_model_from_id(model=Product) +def calculate_grade_signature(product, *args, **kwargs): + """Returns a signature for calculating product grade that can be used in chords or groups.""" + return calculate_grade_internal(product, *args, **kwargs) + + @dojo_model_to_id @dojo_async_task @app.task @dojo_model_from_id(model=Product) def calculate_grade(product, *args, **kwargs): + return calculate_grade_internal(product, *args, **kwargs) + + +def calculate_grade_internal(product, *args, **kwargs): + """Internal function for calculating product grade.""" system_settings = System_Settings.objects.get() if not product: logger.warning("ignoring calculate product for product None!") diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index a629b7f97c3..0129f3c45bb 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -127,49 +127,49 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ importer = DefaultImporter(**import_options) test, _, _len_new_findings, _len_closed_findings, _, _, _ = importer.process_scan(scan) - # use reimport with the full report so it add a finding and some endpoints - with ( - self.subTest("reimport1"), impersonate(Dojo_User.objects.get(username="admin")), - self.assertNumQueries(expected_num_queries2), - self.assertNumAsyncTask(expected_num_async_tasks2), - STACK_HAWK_FILENAME.open(encoding="utf-8") as scan, - ): - reimport_options = { - "test": test, - "user": lead, - "lead": lead, - "scan_date": None, - "minimum_severity": "Info", - "active": True, - "verified": True, - "sync": True, - "scan_type": STACK_HAWK_SCAN_TYPE, - "tags": ["performance-test-reimport", "reimport-tag-in-param", "reimport-go-faster"], - "apply_tags_to_findings": True, - } - reimporter = DefaultReImporter(**reimport_options) - test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) - - # use reimport with the subset again to close a finding and mitigate some endpoints - with ( - self.subTest("reimport2"), impersonate(Dojo_User.objects.get(username="admin")), - self.assertNumQueries(expected_num_queries3), - self.assertNumAsyncTask(expected_num_async_tasks3), - STACK_HAWK_SUBSET_FILENAME.open(encoding="utf-8") as scan, - ): - reimport_options = { - "test": test, - "user": lead, - "lead": lead, - "scan_date": None, - "minimum_severity": "Info", - "active": True, - "verified": True, - "sync": True, - "scan_type": STACK_HAWK_SCAN_TYPE, - } - reimporter = DefaultReImporter(**reimport_options) - test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) + # # use reimport with the full report so it add a finding and some endpoints + # with ( + # self.subTest("reimport1"), impersonate(Dojo_User.objects.get(username="admin")), + # self.assertNumQueries(expected_num_queries2), + # self.assertNumAsyncTask(expected_num_async_tasks2), + # STACK_HAWK_FILENAME.open(encoding="utf-8") as scan, + # ): + # reimport_options = { + # "test": test, + # "user": lead, + # "lead": lead, + # "scan_date": None, + # "minimum_severity": "Info", + # "active": True, + # "verified": True, + # "sync": True, + # "scan_type": STACK_HAWK_SCAN_TYPE, + # "tags": ["performance-test-reimport", "reimport-tag-in-param", "reimport-go-faster"], + # "apply_tags_to_findings": True, + # } + # reimporter = DefaultReImporter(**reimport_options) + # test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) + + # # use reimport with the subset again to close a finding and mitigate some endpoints + # with ( + # self.subTest("reimport2"), impersonate(Dojo_User.objects.get(username="admin")), + # self.assertNumQueries(expected_num_queries3), + # self.assertNumAsyncTask(expected_num_async_tasks3), + # STACK_HAWK_SUBSET_FILENAME.open(encoding="utf-8") as scan, + # ): + # reimport_options = { + # "test": test, + # "user": lead, + # "lead": lead, + # "scan_date": None, + # "minimum_severity": "Info", + # "active": True, + # "verified": True, + # "sync": True, + # "scan_type": STACK_HAWK_SCAN_TYPE, + # } + # reimporter = DefaultReImporter(**reimport_options) + # test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) # patch the we_want_async decorator to always return True so we don't depend on block_execution flag shenanigans # @patch("dojo.decorators.we_want_async", return_value=True) From 2d35637540c3e950c565af4df502bafaf95cf63d Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 3 Aug 2025 23:30:32 +0200 Subject: [PATCH 29/53] fix counts --- unittests/test_importers_performance.py | 98 ++++++++++++------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 0129f3c45bb..4fd134ba876 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -127,57 +127,57 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ importer = DefaultImporter(**import_options) test, _, _len_new_findings, _len_closed_findings, _, _, _ = importer.process_scan(scan) - # # use reimport with the full report so it add a finding and some endpoints - # with ( - # self.subTest("reimport1"), impersonate(Dojo_User.objects.get(username="admin")), - # self.assertNumQueries(expected_num_queries2), - # self.assertNumAsyncTask(expected_num_async_tasks2), - # STACK_HAWK_FILENAME.open(encoding="utf-8") as scan, - # ): - # reimport_options = { - # "test": test, - # "user": lead, - # "lead": lead, - # "scan_date": None, - # "minimum_severity": "Info", - # "active": True, - # "verified": True, - # "sync": True, - # "scan_type": STACK_HAWK_SCAN_TYPE, - # "tags": ["performance-test-reimport", "reimport-tag-in-param", "reimport-go-faster"], - # "apply_tags_to_findings": True, - # } - # reimporter = DefaultReImporter(**reimport_options) - # test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) - - # # use reimport with the subset again to close a finding and mitigate some endpoints - # with ( - # self.subTest("reimport2"), impersonate(Dojo_User.objects.get(username="admin")), - # self.assertNumQueries(expected_num_queries3), - # self.assertNumAsyncTask(expected_num_async_tasks3), - # STACK_HAWK_SUBSET_FILENAME.open(encoding="utf-8") as scan, - # ): - # reimport_options = { - # "test": test, - # "user": lead, - # "lead": lead, - # "scan_date": None, - # "minimum_severity": "Info", - # "active": True, - # "verified": True, - # "sync": True, - # "scan_type": STACK_HAWK_SCAN_TYPE, - # } - # reimporter = DefaultReImporter(**reimport_options) - # test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) + # use reimport with the full report so it add a finding and some endpoints + with ( + self.subTest("reimport1"), impersonate(Dojo_User.objects.get(username="admin")), + self.assertNumQueries(expected_num_queries2), + self.assertNumAsyncTask(expected_num_async_tasks2), + STACK_HAWK_FILENAME.open(encoding="utf-8") as scan, + ): + reimport_options = { + "test": test, + "user": lead, + "lead": lead, + "scan_date": None, + "minimum_severity": "Info", + "active": True, + "verified": True, + "sync": True, + "scan_type": STACK_HAWK_SCAN_TYPE, + "tags": ["performance-test-reimport", "reimport-tag-in-param", "reimport-go-faster"], + "apply_tags_to_findings": True, + } + reimporter = DefaultReImporter(**reimport_options) + test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) + + # use reimport with the subset again to close a finding and mitigate some endpoints + with ( + self.subTest("reimport2"), impersonate(Dojo_User.objects.get(username="admin")), + self.assertNumQueries(expected_num_queries3), + self.assertNumAsyncTask(expected_num_async_tasks3), + STACK_HAWK_SUBSET_FILENAME.open(encoding="utf-8") as scan, + ): + reimport_options = { + "test": test, + "user": lead, + "lead": lead, + "scan_date": None, + "minimum_severity": "Info", + "active": True, + "verified": True, + "sync": True, + "scan_type": STACK_HAWK_SCAN_TYPE, + } + reimporter = DefaultReImporter(**reimport_options) + test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) # patch the we_want_async decorator to always return True so we don't depend on block_execution flag shenanigans # @patch("dojo.decorators.we_want_async", return_value=True) # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=682, - expected_num_async_tasks1=10, + expected_num_queries1=684, + expected_num_async_tasks1=11, expected_num_queries2=610, expected_num_async_tasks2=22, expected_num_queries3=292, @@ -198,8 +198,8 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=682, - expected_num_async_tasks1=10, + expected_num_queries1=683, + expected_num_async_tasks1=11, expected_num_queries2=615, expected_num_async_tasks2=22, expected_num_queries3=297, @@ -222,8 +222,8 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=702, - expected_num_async_tasks1=15, + expected_num_queries1=687, + expected_num_async_tasks1=11, expected_num_queries2=645, expected_num_async_tasks2=28, expected_num_queries3=322, From 6507a937b035bf9d11ba6c1b41079c1a41fd0941 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 3 Aug 2025 23:33:44 +0200 Subject: [PATCH 30/53] fix counts --- dojo/finding/helper.py | 1 + dojo/importers/default_importer.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index caff6049078..8bfd291e283 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -370,6 +370,7 @@ def post_process_finding_save_signature(finding, dedupe_option=True, rules_optio return post_process_finding_save_internal(finding, dedupe_option, rules_option, product_grading_option, issue_updater_option, push_to_jira, user, *args, **kwargs) + @dojo_model_to_id @dojo_async_task @app.task diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index eb91913f5a7..fba590e2f76 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -5,7 +5,6 @@ from django.db.models.query_utils import Q from django.urls import reverse - import dojo.jira_link.helper as jira_helper from dojo.importers.base_importer import BaseImporter, Parser from dojo.importers.options import ImporterOptions @@ -156,6 +155,7 @@ def process_findings( **kwargs: dict, ) -> list[Finding]: from celery import chord + from dojo.finding import helper as finding_helper from dojo.models import Dojo_User from dojo.utils import calculate_grade, calculate_grade_signature From e2194abbb5e331f36714bbe90f148a28d9f300a0 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Mon, 4 Aug 2025 00:11:02 +0200 Subject: [PATCH 31/53] simplify --- dojo/decorators.py | 5 +-- dojo/importers/default_importer.py | 46 +++++++++---------------- unittests/test_importers_performance.py | 2 +- 3 files changed, 21 insertions(+), 32 deletions(-) diff --git a/dojo/decorators.py b/dojo/decorators.py index 7c8e11a59ea..b7b84d59430 100644 --- a/dojo/decorators.py +++ b/dojo/decorators.py @@ -93,11 +93,12 @@ def __wrapper__(*args, **kwargs): kwargs=kwargs, ) + if signature: + return func.si(*args, **kwargs) + countdown = kwargs.pop("countdown", 0) if we_want_async(*args, func=func, **kwargs): # Return a signature for use in chord/group if requested - if signature: - return func.si(*args, **kwargs) # Execute the task return func.apply_async(args=args, kwargs=kwargs, countdown=countdown) return func(*args, **kwargs) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index fba590e2f76..dbee002520e 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -15,6 +15,7 @@ Test_Import, ) from dojo.notifications.helper import create_notification +from dojo.decorators import we_want_async from dojo.validators import clean_tags logger = logging.getLogger(__name__) @@ -159,7 +160,7 @@ def process_findings( from dojo.finding import helper as finding_helper from dojo.models import Dojo_User from dojo.utils import calculate_grade, calculate_grade_signature - task_signatures = [] + post_processing_task_signatures = [] """ Saves findings in memory that were parsed from the scan report into the database. @@ -240,29 +241,17 @@ def process_findings( # Collect finding for parallel processing - we'll process them all at once after the loop push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) - # Process finding - either sync or async based on block_execution - if Dojo_User.wants_block_execution(self.user): - # This will run synchronously, but we still call the dojo_async decorated function to count the task - finding_helper.post_process_finding_save( + # Always create signatures - we'll execute them sync or async later + post_processing_task_signatures.append( + finding_helper.post_process_finding_save_signature( finding, dedupe_option=True, rules_option=True, product_grading_option=False, issue_updater_option=True, push_to_jira=push_to_jira, - ) - else: - # Add to task signatures for async execution - task_signatures.append( - finding_helper.post_process_finding_save_signature( - finding, - dedupe_option=True, - rules_option=True, - product_grading_option=False, - issue_updater_option=True, - push_to_jira=push_to_jira, - ), - ) + ), + ) for (group_name, findings) in group_names_to_findings_dict.items(): finding_helper.add_findings_to_auto_group( @@ -280,19 +269,18 @@ def process_findings( # Calculate product grade after all findings are processed product = self.test.engagement.product - if task_signatures: + if post_processing_task_signatures: # If we have async tasks, use chord to wait for them before calculating grade - if Dojo_User.wants_block_execution(self.user): - # Run the chord synchronously by passing sync=True to each task - for task_sig in task_signatures: - task_sig.apply_async(sync=True).get() - calculate_grade(product, sync=True) + if we_want_async(async_user=self.user): + # Run the chord asynchronously and after completing post processing tasks, calculate grade ONCE + chord(post_processing_task_signatures)(calculate_grade_signature(product)) else: - # Run the chord asynchronously - chord(task_signatures)(calculate_grade_signature(product)) - else: - # If everything was sync, calculate grade now as post processing is done - calculate_grade(product) + # Execute each task synchronously + for task_sig in post_processing_task_signatures: + task_sig() + + # Calculate grade, which can be prelimary calculated before the async tasks have finished + calculate_grade(product) sync = kwargs.get("sync", True) if not sync: diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 4fd134ba876..fd8c5031f6c 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -177,7 +177,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( expected_num_queries1=684, - expected_num_async_tasks1=11, + expected_num_async_tasks1=12, expected_num_queries2=610, expected_num_async_tasks2=22, expected_num_queries3=292, From 72e95c3cfba22f8de638c29f0e34a257607f1842 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Mon, 4 Aug 2025 08:48:02 +0200 Subject: [PATCH 32/53] simplify --- dojo/importers/default_importer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index dbee002520e..73af7e2ef45 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -6,6 +6,7 @@ from django.urls import reverse import dojo.jira_link.helper as jira_helper +from dojo.decorators import we_want_async from dojo.importers.base_importer import BaseImporter, Parser from dojo.importers.options import ImporterOptions from dojo.models import ( @@ -15,7 +16,6 @@ Test_Import, ) from dojo.notifications.helper import create_notification -from dojo.decorators import we_want_async from dojo.validators import clean_tags logger = logging.getLogger(__name__) @@ -158,7 +158,6 @@ def process_findings( from celery import chord from dojo.finding import helper as finding_helper - from dojo.models import Dojo_User from dojo.utils import calculate_grade, calculate_grade_signature post_processing_task_signatures = [] From cb2d0e3a93d0e7c8285b75791a70aeddd38461a8 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 13 Sep 2025 23:27:30 +0200 Subject: [PATCH 33/53] refactor to await results --- dojo/finding/helper.py | 3 +- dojo/importers/default_importer.py | 53 +++++++++++++++--------------- dojo/tasks.py | 31 +++++++++++++++++ 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 8bfd291e283..61c3c3945b8 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -365,7 +365,8 @@ def post_process_finding_save_signature(finding, dedupe_option=True, rules_optio issue_updater_option=True, push_to_jira=False, user=None, *args, **kwargs): # noqa: FBT002 - this is bit hard to fix nice have this universally fixed """ Returns a task signature for post-processing a finding. This is useful for creating task signatures - that can be used in chords or groups. + that can be used in chords or groups or to await results. We need this extra method because of our dojo_async decorator. + If we use more of these celery features, we should probably move away from that decorator. """ return post_process_finding_save_internal(finding, dedupe_option, rules_option, product_grading_option, issue_updater_option, push_to_jira, user, *args, **kwargs) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 73af7e2ef45..2a885fdc71f 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -7,6 +7,7 @@ import dojo.jira_link.helper as jira_helper from dojo.decorators import we_want_async +from dojo.finding import helper as finding_helper from dojo.importers.base_importer import BaseImporter, Parser from dojo.importers.options import ImporterOptions from dojo.models import ( @@ -16,6 +17,8 @@ Test_Import, ) from dojo.notifications.helper import create_notification +from dojo.tasks import wait_for_tasks_and_calculate_grade +from dojo.utils import calculate_grade from dojo.validators import clean_tags logger = logging.getLogger(__name__) @@ -155,11 +158,7 @@ def process_findings( parsed_findings: list[Finding], **kwargs: dict, ) -> list[Finding]: - from celery import chord - - from dojo.finding import helper as finding_helper - from dojo.utils import calculate_grade, calculate_grade_signature - post_processing_task_signatures = [] + async_task_ids = [] """ Saves findings in memory that were parsed from the scan report into the database. @@ -189,7 +188,7 @@ def process_findings( unsaved_finding.reporter = self.user unsaved_finding.last_reviewed_by = self.user unsaved_finding.last_reviewed = self.now - logger.debug("process_parsed_findings: unique_id_from_tool: %s, hash_code: %s, active from report: %s, verified from report: %s", unsaved_finding.unique_id_from_tool, unsaved_finding.hash_code, unsaved_finding.active, unsaved_finding.verified) + logger.debug("process_parsed_finding: unique_id_from_tool: %s, hash_code: %s, active from report: %s, verified from report: %s", unsaved_finding.unique_id_from_tool, unsaved_finding.hash_code, unsaved_finding.active, unsaved_finding.verified) # indicates an override. Otherwise, do not change the value of unsaved_finding.active if self.active is not None: unsaved_finding.active = self.active @@ -238,20 +237,25 @@ def process_findings( new_findings.append(finding) # all data is already saved on the finding, we only need to trigger post processing - # Collect finding for parallel processing - we'll process them all at once after the loop + # We create a signature for the post processing task so we can decide to apply it async or sync push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) - # Always create signatures - we'll execute them sync or async later - post_processing_task_signatures.append( - finding_helper.post_process_finding_save_signature( - finding, - dedupe_option=True, - rules_option=True, - product_grading_option=False, - issue_updater_option=True, - push_to_jira=push_to_jira, - ), + post_processing_task_signature = finding_helper.post_process_finding_save_signature( + finding, + dedupe_option=True, + rules_option=True, + product_grading_option=False, + issue_updater_option=True, + push_to_jira=push_to_jira, ) + # We need to call apply_async to get the result of the task so we can collect the task ID + if we_want_async(async_user=self.user): + result = post_processing_task_signature.apply_async() + async_task_ids.append(result.id) + else: + # Execute task immediately for synchronous processing + post_processing_task_signature() + for (group_name, findings) in group_names_to_findings_dict.items(): finding_helper.add_findings_to_auto_group( group_name, @@ -268,17 +272,12 @@ def process_findings( # Calculate product grade after all findings are processed product = self.test.engagement.product - if post_processing_task_signatures: - # If we have async tasks, use chord to wait for them before calculating grade - if we_want_async(async_user=self.user): - # Run the chord asynchronously and after completing post processing tasks, calculate grade ONCE - chord(post_processing_task_signatures)(calculate_grade_signature(product)) - else: - # Execute each task synchronously - for task_sig in post_processing_task_signatures: - task_sig() - # Calculate grade, which can be prelimary calculated before the async tasks have finished + if we_want_async(async_user=self.user) and async_task_ids: + # Tasks were executed immediately during processing, now coordinate final grade calculation + wait_for_tasks_and_calculate_grade.delay(async_task_ids, product.id) + + # Synchronous tasks were already executed during processing, just calculate grade calculate_grade(product) sync = kwargs.get("sync", True) diff --git a/dojo/tasks.py b/dojo/tasks.py index 0957ac40194..c0b5059db91 100644 --- a/dojo/tasks.py +++ b/dojo/tasks.py @@ -2,6 +2,7 @@ from datetime import date, timedelta from auditlog.models import LogEntry +from celery.result import AsyncResult from celery.utils.log import get_task_logger from dateutil.relativedelta import relativedelta from django.conf import settings @@ -192,6 +193,36 @@ def fix_loop_duplicates_task(*args, **kwargs): return fix_loop_duplicates() +@app.task +def wait_for_tasks_and_calculate_grade(task_ids, product_id, *args, **kwargs): + """ + Wait for all specified tasks to complete, then calculate product grade. + This provides coordination for immediate task execution without using chord. + """ + logger.info(f"Waiting for {len(task_ids)} tasks to complete before calculating grade for product {product_id}") + + # Wait for all tasks to complete + results = [AsyncResult(task_id) for task_id in task_ids] + + # This will block until all tasks are done + for result in results: + try: + result.get(timeout=300) # 5 minute timeout per task + except Exception as e: + logger.warning(f"Task {result.id} failed: {e}") + # Continue waiting for other tasks even if one fails + + # All tasks completed, now calculate grade + try: + product = Product.objects.get(id=product_id) + logger.info(f"All post-processing tasks completed, calculating grade for product {product.name}") + calculate_grade(product) + except Product.DoesNotExist: + logger.error(f"Product {product_id} not found for grade calculation") + except Exception as e: + logger.error(f"Error calculating grade for product {product_id}: {e}") + + @app.task def evaluate_pro_proposition(*args, **kwargs): # Ensure we should be doing this From b9b481509606c3d07d1e233aaba41e20d4f68c4d Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 13 Sep 2025 23:38:57 +0200 Subject: [PATCH 34/53] handle reimport and close old findings --- dojo/importers/base_importer.py | 5 ++-- dojo/importers/default_importer.py | 6 +++++ dojo/importers/default_reimporter.py | 39 ++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py index 503610b5426..428bc7f897f 100644 --- a/dojo/importers/base_importer.py +++ b/dojo/importers/base_importer.py @@ -743,6 +743,7 @@ def mitigate_finding( note_message: str, *, finding_groups_enabled: bool, + product_grading_option: bool = True, ) -> None: """ Mitigates a finding, all endpoint statuses, leaves a note on the finding @@ -764,9 +765,9 @@ def mitigate_finding( # to avoid pushing a finding group multiple times, we push those outside of the loop if finding_groups_enabled and finding.finding_group: # don't try to dedupe findings that we are closing - finding.save(dedupe_option=False) + finding.save(dedupe_option=False, product_grading_option=product_grading_option) else: - finding.save(dedupe_option=False, push_to_jira=self.push_to_jira) + finding.save(dedupe_option=False, push_to_jira=self.push_to_jira, product_grading_option=product_grading_option) def notify_scan_added( self, diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 2a885fdc71f..e4a854f1eed 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -357,12 +357,18 @@ def close_old_findings( "as it is not present anymore in recent scans." ), finding_groups_enabled=self.findings_groups_enabled, + product_grading_option=False, ) # push finding groups to jira since we only only want to push whole groups if self.findings_groups_enabled and self.push_to_jira: for finding_group in {finding.finding_group for finding in old_findings if finding.finding_group is not None}: jira_helper.push_to_jira(finding_group) + # Calculate grade once after all findings have been closed + if old_findings: + product = self.test.engagement.product + calculate_grade(product) + return old_findings def parse_findings_static_test_type( diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 455730a647b..13b9a257779 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -6,6 +6,7 @@ import dojo.finding.helper as finding_helper import dojo.jira_link.helper as jira_helper +from dojo.decorators import we_want_async from dojo.importers.base_importer import BaseImporter, Parser from dojo.importers.options import ImporterOptions from dojo.models import ( @@ -15,6 +16,8 @@ Test, Test_Import, ) +from dojo.tasks import wait_for_tasks_and_calculate_grade +from dojo.utils import calculate_grade from dojo.validators import clean_tags logger = logging.getLogger(__name__) @@ -176,6 +179,7 @@ def process_findings( self.reactivated_items = [] self.unchanged_items = [] self.group_names_to_findings_dict = {} + async_task_ids = [] logger.debug(f"starting reimport of {len(parsed_findings) if parsed_findings else 0} items.") logger.debug("STEP 1: looping over findings from the reimported report and trying to match them to existing findings") @@ -238,9 +242,24 @@ def process_findings( ) # all data is already saved on the finding, we only need to trigger post processing - # to avoid pushing a finding group multiple times, we push those outside of the loop + # Execute post-processing task immediately if async, otherwise execute synchronously push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) - finding_helper.post_process_finding_save(finding, dedupe_option=True, rules_option=True, product_grading_option=True, issue_updater_option=True, push_to_jira=push_to_jira) + + post_processing_task_signature = finding_helper.post_process_finding_save_signature( + finding, + dedupe_option=True, + rules_option=True, + product_grading_option=False, + issue_updater_option=True, + push_to_jira=push_to_jira, + ) + if we_want_async(async_user=self.user): + # Execute task immediately and collect task ID + result = post_processing_task_signature.apply_async() + async_task_ids.append(result.id) + else: + # Execute task immediately for synchronous processing + post_processing_task_signature() self.to_mitigate = (set(self.original_items) - set(self.reactivated_items) - set(self.unchanged_items)) # due to #3958 we can have duplicates inside the same report @@ -252,6 +271,16 @@ def process_findings( self.untouched = set(self.unchanged_items) - set(self.to_mitigate) - set(self.new_items) - set(self.reactivated_items) # Process groups self.process_groups_for_all_findings(**kwargs) + + # Calculate product grade once after all findings are processed + product = self.test.engagement.product + + if we_want_async(async_user=self.user) and async_task_ids: + # Tasks were executed immediately during processing, now coordinate final grade calculation + wait_for_tasks_and_calculate_grade.delay(async_task_ids, product.id) + # Synchronous tasks were already executed during processing, just calculate grade + calculate_grade(product) + # Process the results and return them back return self.process_results(**kwargs) @@ -286,6 +315,7 @@ def close_old_findings( finding, f"Mitigated by {self.test.test_type} re-upload.", finding_groups_enabled=self.findings_groups_enabled, + product_grading_option=False, ) mitigated_findings.append(finding) # push finding groups to jira since we only only want to push whole groups @@ -293,6 +323,11 @@ def close_old_findings( for finding_group in {finding.finding_group for finding in findings if finding.finding_group is not None}: jira_helper.push_to_jira(finding_group) + # Calculate grade once after all findings have been closed + if mitigated_findings: + product = self.test.engagement.product + calculate_grade(product) + return mitigated_findings def parse_findings_static_test_type( From 26295cbd7dd2c314d7a7198ca5526d1782db00a9 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 13 Sep 2025 23:55:30 +0200 Subject: [PATCH 35/53] update query and task counts --- unittests/test_importers_performance.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index fd8c5031f6c..ac29f8cf4a9 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,12 +176,12 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=684, - expected_num_async_tasks1=12, + expected_num_queries1=713, + expected_num_async_tasks1=11, expected_num_queries2=610, - expected_num_async_tasks2=22, + expected_num_async_tasks2=23, expected_num_queries3=292, - expected_num_async_tasks3=20, + expected_num_async_tasks3=21, ) # @patch("dojo.decorators.we_want_async", return_value=False) @@ -198,12 +198,12 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=683, + expected_num_queries1=713, expected_num_async_tasks1=11, expected_num_queries2=615, - expected_num_async_tasks2=22, + expected_num_async_tasks2=23, expected_num_queries3=297, - expected_num_async_tasks3=20, + expected_num_async_tasks3=21, ) # @patch("dojo.decorators.we_want_async", return_value=False) @@ -222,10 +222,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=687, + expected_num_queries1=717, expected_num_async_tasks1=11, - expected_num_queries2=645, - expected_num_async_tasks2=28, - expected_num_queries3=322, - expected_num_async_tasks3=25, + expected_num_queries2=619, + expected_num_async_tasks2=23, + expected_num_queries3=301, + expected_num_async_tasks3=21, ) From 87e5d457c2bc2ab2331950096deb6ee1beb1ed05 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 14 Sep 2025 22:19:48 +0200 Subject: [PATCH 36/53] switch back to chords --- dojo/importers/default_importer.py | 44 +++++++++++++++++++++------- dojo/importers/default_reimporter.py | 43 +++++++++++++++++++++------ dojo/tasks.py | 31 -------------------- 3 files changed, 68 insertions(+), 50 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index e4a854f1eed..dde117e9e39 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -1,11 +1,13 @@ import logging +from celery import chord from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.serializers import serialize from django.db.models.query_utils import Q from django.urls import reverse import dojo.jira_link.helper as jira_helper +from dojo import utils from dojo.decorators import we_want_async from dojo.finding import helper as finding_helper from dojo.importers.base_importer import BaseImporter, Parser @@ -17,7 +19,6 @@ Test_Import, ) from dojo.notifications.helper import create_notification -from dojo.tasks import wait_for_tasks_and_calculate_grade from dojo.utils import calculate_grade from dojo.validators import clean_tags @@ -158,7 +159,11 @@ def process_findings( parsed_findings: list[Finding], **kwargs: dict, ) -> list[Finding]: - async_task_ids = [] + # Progressive batching for chord execution + post_processing_task_signatures = [] + current_batch_number = 1 + max_batch_size = 1024 + pending_grade_calculations = [] """ Saves findings in memory that were parsed from the scan report into the database. @@ -248,10 +253,25 @@ def process_findings( push_to_jira=push_to_jira, ) - # We need to call apply_async to get the result of the task so we can collect the task ID if we_want_async(async_user=self.user): - result = post_processing_task_signature.apply_async() - async_task_ids.append(result.id) + # Collect signatures for progressive batch execution + post_processing_task_signatures.append(post_processing_task_signature) + + # Calculate current batch size: 2^batch_number, capped at max_batch_size + current_batch_size = min(2 ** current_batch_number, max_batch_size) + + # Launch chord when batch is full + if len(post_processing_task_signatures) >= current_batch_size: + product = self.test.engagement.product + calculate_grade_signature = utils.calculate_grade_signature(product) + chord_result = chord(post_processing_task_signatures)(calculate_grade_signature) + pending_grade_calculations.append(chord_result) + + logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {current_batch_size})") + + # Reset for next batch + post_processing_task_signatures = [] + current_batch_number += 1 else: # Execute task immediately for synchronous processing post_processing_task_signature() @@ -270,14 +290,18 @@ def process_findings( else: jira_helper.push_to_jira(findings[0]) - # Calculate product grade after all findings are processed + # Handle any remaining signatures in the final batch product = self.test.engagement.product - if we_want_async(async_user=self.user) and async_task_ids: - # Tasks were executed immediately during processing, now coordinate final grade calculation - wait_for_tasks_and_calculate_grade.delay(async_task_ids, product.id) + if we_want_async(async_user=self.user): + if post_processing_task_signatures: + # Launch final chord with remaining signatures + calculate_grade_signature = utils.calculate_grade_signature(product) + chord_result = chord(post_processing_task_signatures)(calculate_grade_signature) + pending_grade_calculations.append(chord_result) + logger.debug(f"Launched final chord with {len(post_processing_task_signatures)} remaining tasks") - # Synchronous tasks were already executed during processing, just calculate grade + # Always perform an initial grading, even though it might get overwritten alter. calculate_grade(product) sync = kwargs.get("sync", True) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 13b9a257779..9fa3b14ec93 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -1,11 +1,13 @@ import logging +from celery import chord from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.serializers import serialize from django.db.models.query_utils import Q import dojo.finding.helper as finding_helper import dojo.jira_link.helper as jira_helper +from dojo import utils from dojo.decorators import we_want_async from dojo.importers.base_importer import BaseImporter, Parser from dojo.importers.options import ImporterOptions @@ -16,7 +18,6 @@ Test, Test_Import, ) -from dojo.tasks import wait_for_tasks_and_calculate_grade from dojo.utils import calculate_grade from dojo.validators import clean_tags @@ -179,7 +180,11 @@ def process_findings( self.reactivated_items = [] self.unchanged_items = [] self.group_names_to_findings_dict = {} - async_task_ids = [] + # Progressive batching for chord execution + post_processing_task_signatures = [] + current_batch_number = 1 + max_batch_size = 1024 + pending_grade_calculations = [] logger.debug(f"starting reimport of {len(parsed_findings) if parsed_findings else 0} items.") logger.debug("STEP 1: looping over findings from the reimported report and trying to match them to existing findings") @@ -254,9 +259,24 @@ def process_findings( push_to_jira=push_to_jira, ) if we_want_async(async_user=self.user): - # Execute task immediately and collect task ID - result = post_processing_task_signature.apply_async() - async_task_ids.append(result.id) + # Collect signatures for progressive batch execution + post_processing_task_signatures.append(post_processing_task_signature) + + # Calculate current batch size: 2^batch_number, capped at max_batch_size + current_batch_size = min(2 ** current_batch_number, max_batch_size) + + # Launch chord when batch is full + if len(post_processing_task_signatures) >= current_batch_size: + product = self.test.engagement.product + calculate_grade_signature = utils.calculate_grade_signature(product) + chord_result = chord(post_processing_task_signatures)(calculate_grade_signature) + pending_grade_calculations.append(chord_result) + + logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {current_batch_size})") + + # Reset for next batch + post_processing_task_signatures = [] + current_batch_number += 1 else: # Execute task immediately for synchronous processing post_processing_task_signature() @@ -272,12 +292,17 @@ def process_findings( # Process groups self.process_groups_for_all_findings(**kwargs) - # Calculate product grade once after all findings are processed + # Handle any remaining signatures in the final batch product = self.test.engagement.product - if we_want_async(async_user=self.user) and async_task_ids: - # Tasks were executed immediately during processing, now coordinate final grade calculation - wait_for_tasks_and_calculate_grade.delay(async_task_ids, product.id) + if we_want_async(async_user=self.user): + if post_processing_task_signatures: + # Launch final chord with remaining signatures + calculate_grade_signature = utils.calculate_grade_signature(product) + chord_result = chord(post_processing_task_signatures)(calculate_grade_signature) + pending_grade_calculations.append(chord_result) + logger.debug(f"Launched final chord with {len(post_processing_task_signatures)} remaining tasks") + # Synchronous tasks were already executed during processing, just calculate grade calculate_grade(product) diff --git a/dojo/tasks.py b/dojo/tasks.py index c0b5059db91..0957ac40194 100644 --- a/dojo/tasks.py +++ b/dojo/tasks.py @@ -2,7 +2,6 @@ from datetime import date, timedelta from auditlog.models import LogEntry -from celery.result import AsyncResult from celery.utils.log import get_task_logger from dateutil.relativedelta import relativedelta from django.conf import settings @@ -193,36 +192,6 @@ def fix_loop_duplicates_task(*args, **kwargs): return fix_loop_duplicates() -@app.task -def wait_for_tasks_and_calculate_grade(task_ids, product_id, *args, **kwargs): - """ - Wait for all specified tasks to complete, then calculate product grade. - This provides coordination for immediate task execution without using chord. - """ - logger.info(f"Waiting for {len(task_ids)} tasks to complete before calculating grade for product {product_id}") - - # Wait for all tasks to complete - results = [AsyncResult(task_id) for task_id in task_ids] - - # This will block until all tasks are done - for result in results: - try: - result.get(timeout=300) # 5 minute timeout per task - except Exception as e: - logger.warning(f"Task {result.id} failed: {e}") - # Continue waiting for other tasks even if one fails - - # All tasks completed, now calculate grade - try: - product = Product.objects.get(id=product_id) - logger.info(f"All post-processing tasks completed, calculating grade for product {product.name}") - calculate_grade(product) - except Product.DoesNotExist: - logger.error(f"Product {product_id} not found for grade calculation") - except Exception as e: - logger.error(f"Error calculating grade for product {product_id}: {e}") - - @app.task def evaluate_pro_proposition(*args, **kwargs): # Ensure we should be doing this From 930792da3d7b2da67795a71ee3a3a24b11da322c Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 14 Sep 2025 22:57:27 +0200 Subject: [PATCH 37/53] simplify --- dojo/importers/default_importer.py | 49 ++++++++++---------- dojo/importers/default_reimporter.py | 60 +++++++++++++------------ unittests/test_importers_performance.py | 12 ++--- 3 files changed, 63 insertions(+), 58 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index dde117e9e39..3329d5470b9 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -163,7 +163,6 @@ def process_findings( post_processing_task_signatures = [] current_batch_number = 1 max_batch_size = 1024 - pending_grade_calculations = [] """ Saves findings in memory that were parsed from the scan report into the database. @@ -176,7 +175,13 @@ def process_findings( logger.debug("starting import of %i parsed findings.", len(parsed_findings) if parsed_findings else 0) group_names_to_findings_dict = {} - for non_clean_unsaved_finding in parsed_findings: + # Create iterator over parsed findings + findings_iterator = iter(parsed_findings) + + # Get first finding to start the loop + non_clean_unsaved_finding = next(findings_iterator, None) + + while non_clean_unsaved_finding: # make sure the severity is something is digestible unsaved_finding = self.sanitize_severity(non_clean_unsaved_finding) # Filter on minimum severity if applicable @@ -253,25 +258,31 @@ def process_findings( push_to_jira=push_to_jira, ) - if we_want_async(async_user=self.user): - # Collect signatures for progressive batch execution - post_processing_task_signatures.append(post_processing_task_signature) + post_processing_task_signatures.append(post_processing_task_signature) + # Get next finding for next iteration + non_clean_unsaved_finding = next(findings_iterator, None) + is_final = not non_clean_unsaved_finding + + # Check if we should launch a chord (batch full or end of findings) + if we_want_async(async_user=self.user) and post_processing_task_signatures: # Calculate current batch size: 2^batch_number, capped at max_batch_size current_batch_size = min(2 ** current_batch_number, max_batch_size) - # Launch chord when batch is full - if len(post_processing_task_signatures) >= current_batch_size: + batch_full = len(post_processing_task_signatures) >= current_batch_size + + if batch_full or is_final: + # Launch chord with current batch of signatures product = self.test.engagement.product calculate_grade_signature = utils.calculate_grade_signature(product) - chord_result = chord(post_processing_task_signatures)(calculate_grade_signature) - pending_grade_calculations.append(chord_result) + chord(post_processing_task_signatures)(calculate_grade_signature) - logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {current_batch_size})") + logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})") - # Reset for next batch + # Reset for next batch (only if not final) post_processing_task_signatures = [] - current_batch_number += 1 + if not is_final: + current_batch_number += 1 else: # Execute task immediately for synchronous processing post_processing_task_signature() @@ -290,18 +301,10 @@ def process_findings( else: jira_helper.push_to_jira(findings[0]) - # Handle any remaining signatures in the final batch - product = self.test.engagement.product - - if we_want_async(async_user=self.user): - if post_processing_task_signatures: - # Launch final chord with remaining signatures - calculate_grade_signature = utils.calculate_grade_signature(product) - chord_result = chord(post_processing_task_signatures)(calculate_grade_signature) - pending_grade_calculations.append(chord_result) - logger.debug(f"Launched final chord with {len(post_processing_task_signatures)} remaining tasks") + # Note: All chord batching is now handled within the loop above - # Always perform an initial grading, even though it might get overwritten alter. + # Always perform an initial grading, even though it might get overwritten later. + product = self.test.engagement.product calculate_grade(product) sync = kwargs.get("sync", True) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 9fa3b14ec93..ec456ee0acf 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -184,13 +184,18 @@ def process_findings( post_processing_task_signatures = [] current_batch_number = 1 max_batch_size = 1024 - pending_grade_calculations = [] logger.debug(f"starting reimport of {len(parsed_findings) if parsed_findings else 0} items.") logger.debug("STEP 1: looping over findings from the reimported report and trying to match them to existing findings") deduplicationLogger.debug(f"Algorithm used for matching new findings to existing findings: {self.deduplication_algorithm}") - for non_clean_unsaved_finding in parsed_findings: + # Create iterator over parsed findings + findings_iterator = iter(parsed_findings) + + # Get first finding to start the loop + non_clean_unsaved_finding = next(findings_iterator, None) + + while non_clean_unsaved_finding: # make sure the severity is something is digestible unsaved_finding = self.sanitize_severity(non_clean_unsaved_finding) # Filter on minimum severity if applicable @@ -258,28 +263,33 @@ def process_findings( issue_updater_option=True, push_to_jira=push_to_jira, ) - if we_want_async(async_user=self.user): - # Collect signatures for progressive batch execution - post_processing_task_signatures.append(post_processing_task_signature) + post_processing_task_signatures.append(post_processing_task_signature) + + # Get next finding for next iteration + non_clean_unsaved_finding = next(findings_iterator, None) - # Calculate current batch size: 2^batch_number, capped at max_batch_size - current_batch_size = min(2 ** current_batch_number, max_batch_size) + # Check if we should launch a chord (batch full or end of findings) + if we_want_async(async_user=self.user) and post_processing_task_signatures: + # Calculate current batch size: 2^batch_number, capped at max_batch_size + current_batch_size = min(2 ** current_batch_number, max_batch_size) - # Launch chord when batch is full - if len(post_processing_task_signatures) >= current_batch_size: - product = self.test.engagement.product - calculate_grade_signature = utils.calculate_grade_signature(product) - chord_result = chord(post_processing_task_signatures)(calculate_grade_signature) - pending_grade_calculations.append(chord_result) + batch_full = len(post_processing_task_signatures) >= current_batch_size + is_final = not non_clean_unsaved_finding - logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {current_batch_size})") + if batch_full or is_final: + # Launch chord with current batch of signatures + product = self.test.engagement.product + calculate_grade_signature = utils.calculate_grade_signature(product) + chord(post_processing_task_signatures)(calculate_grade_signature) - # Reset for next batch - post_processing_task_signatures = [] + logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})") + + # Reset for next batch (only if not final) + post_processing_task_signatures = [] + if not is_final: current_batch_number += 1 - else: - # Execute task immediately for synchronous processing - post_processing_task_signature() + else: + post_processing_task_signature() self.to_mitigate = (set(self.original_items) - set(self.reactivated_items) - set(self.unchanged_items)) # due to #3958 we can have duplicates inside the same report @@ -292,18 +302,10 @@ def process_findings( # Process groups self.process_groups_for_all_findings(**kwargs) - # Handle any remaining signatures in the final batch - product = self.test.engagement.product - - if we_want_async(async_user=self.user): - if post_processing_task_signatures: - # Launch final chord with remaining signatures - calculate_grade_signature = utils.calculate_grade_signature(product) - chord_result = chord(post_processing_task_signatures)(calculate_grade_signature) - pending_grade_calculations.append(chord_result) - logger.debug(f"Launched final chord with {len(post_processing_task_signatures)} remaining tasks") + # Note: All chord batching is now handled within the loop above # Synchronous tasks were already executed during processing, just calculate grade + product = self.test.engagement.product calculate_grade(product) # Process the results and return them back diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index ac29f8cf4a9..df32ca1c651 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,12 +176,12 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=713, - expected_num_async_tasks1=11, - expected_num_queries2=610, - expected_num_async_tasks2=23, - expected_num_queries3=292, - expected_num_async_tasks3=21, + expected_num_queries1=715, + expected_num_async_tasks1=13, + expected_num_queries2=612, + expected_num_async_tasks2=25, + expected_num_queries3=294, + expected_num_async_tasks3=23, ) # @patch("dojo.decorators.we_want_async", return_value=False) From baac1717b74531215c5ed9ed59349b1ab0557c90 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 14 Sep 2025 23:08:02 +0200 Subject: [PATCH 38/53] respect system settings product grading enabled --- dojo/importers/default_importer.py | 20 +++++++++++++++----- dojo/importers/default_reimporter.py | 20 +++++++++++++++----- unittests/test_importers_performance.py | 18 +++++++++--------- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 3329d5470b9..a7480e32142 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -1,6 +1,6 @@ import logging -from celery import chord +from celery import chord, group from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.serializers import serialize from django.db.models.query_utils import Q @@ -15,6 +15,7 @@ from dojo.models import ( Engagement, Finding, + System_Settings, Test, Test_Import, ) @@ -274,8 +275,13 @@ def process_findings( if batch_full or is_final: # Launch chord with current batch of signatures product = self.test.engagement.product - calculate_grade_signature = utils.calculate_grade_signature(product) - chord(post_processing_task_signatures)(calculate_grade_signature) + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade_signature = utils.calculate_grade_signature(product) + chord(post_processing_task_signatures)(calculate_grade_signature) + # If product grading is disabled, just run the post-processing tasks without the grade calculation callback + elif post_processing_task_signatures: + group(post_processing_task_signatures).apply_async() logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})") @@ -305,7 +311,9 @@ def process_findings( # Always perform an initial grading, even though it might get overwritten later. product = self.test.engagement.product - calculate_grade(product) + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(product) sync = kwargs.get("sync", True) if not sync: @@ -394,7 +402,9 @@ def close_old_findings( # Calculate grade once after all findings have been closed if old_findings: product = self.test.engagement.product - calculate_grade(product) + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(product) return old_findings diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index ec456ee0acf..25f6da5635d 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -1,6 +1,6 @@ import logging -from celery import chord +from celery import chord, group from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.serializers import serialize from django.db.models.query_utils import Q @@ -15,6 +15,7 @@ Development_Environment, Finding, Notes, + System_Settings, Test, Test_Import, ) @@ -279,8 +280,13 @@ def process_findings( if batch_full or is_final: # Launch chord with current batch of signatures product = self.test.engagement.product - calculate_grade_signature = utils.calculate_grade_signature(product) - chord(post_processing_task_signatures)(calculate_grade_signature) + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade_signature = utils.calculate_grade_signature(product) + chord(post_processing_task_signatures)(calculate_grade_signature) + # If product grading is disabled, just run the post-processing tasks without the grade calculation callback + elif post_processing_task_signatures: + group(post_processing_task_signatures).apply_async() logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})") @@ -306,7 +312,9 @@ def process_findings( # Synchronous tasks were already executed during processing, just calculate grade product = self.test.engagement.product - calculate_grade(product) + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(product) # Process the results and return them back return self.process_results(**kwargs) @@ -353,7 +361,9 @@ def close_old_findings( # Calculate grade once after all findings have been closed if mitigated_findings: product = self.test.engagement.product - calculate_grade(product) + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(product) return mitigated_findings diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index df32ca1c651..9c77e4b3800 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,12 +176,12 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=715, - expected_num_async_tasks1=13, - expected_num_queries2=612, - expected_num_async_tasks2=25, - expected_num_queries3=294, - expected_num_async_tasks3=23, + expected_num_queries1=713, + expected_num_async_tasks1=10, + expected_num_queries2=610, + expected_num_async_tasks2=22, + expected_num_queries3=292, + expected_num_async_tasks3=20, ) # @patch("dojo.decorators.we_want_async", return_value=False) @@ -199,11 +199,11 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.save() self.import_reimport_performance( expected_num_queries1=713, - expected_num_async_tasks1=11, + expected_num_async_tasks1=10, expected_num_queries2=615, - expected_num_async_tasks2=23, + expected_num_async_tasks2=22, expected_num_queries3=297, - expected_num_async_tasks3=21, + expected_num_async_tasks3=20, ) # @patch("dojo.decorators.we_want_async", return_value=False) From fdcd1ec20dbce27c14402f226892ac5e610f8ad1 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 14 Sep 2025 23:19:28 +0200 Subject: [PATCH 39/53] finding/test delete grading only if enabled --- dojo/finding/helper.py | 1 - dojo/models.py | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 61c3c3945b8..b9c9d1425b3 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -477,7 +477,6 @@ def finding_post_delete(sender, instance, **kwargs): # Catch instances in async delete where a single object is deleted more than once with suppress(Finding.DoesNotExist): logger.debug("finding post_delete, sender: %s instance: %s", to_str_typed(sender), to_str_typed(instance)) - # calculate_grade(instance.test.engagement.product) def reset_duplicate_before_delete(dupe): diff --git a/dojo/models.py b/dojo/models.py index 6cc1cf16b19..f837c8394d4 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -2260,7 +2260,9 @@ def delete(self, *args, **kwargs): with suppress(Test.DoesNotExist, Engagement.DoesNotExist, Product.DoesNotExist): # Suppressing a potential issue created from async delete removing # related objects in a separate task - calculate_grade(self.engagement.product) + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(self.engagement.product) @property def statistics(self): @@ -2869,7 +2871,9 @@ def delete(self, *args, **kwargs): with suppress(Finding.DoesNotExist, Test.DoesNotExist, Engagement.DoesNotExist, Product.DoesNotExist): # Suppressing a potential issue created from async delete removing # related objects in a separate task - calculate_grade(self.test.engagement.product) + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(self.test.engagement.product) # only used by bulk risk acceptance api @classmethod From a1f0005bfb94684d7f3189d6e9ba279a4d970eb9 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 14 Sep 2025 23:26:49 +0200 Subject: [PATCH 40/53] optimize asyn_dupe_delete grading --- dojo/models.py | 30 ++++++++++++++++-------------- dojo/tasks.py | 16 ++++++++++++++-- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/dojo/models.py b/dojo/models.py index f837c8394d4..92fef262a00 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -2254,15 +2254,16 @@ def hash_code_allows_null_cwe(self): deduplicationLogger.debug(f"HASHCODE_ALLOWS_NULL_CWE is: {hashCodeAllowsNullCwe}") return hashCodeAllowsNullCwe - def delete(self, *args, **kwargs): + def delete(self, *args, product_grading_option=True, **kwargs): logger.debug("%d test delete", self.id) super().delete(*args, **kwargs) - with suppress(Test.DoesNotExist, Engagement.DoesNotExist, Product.DoesNotExist): - # Suppressing a potential issue created from async delete removing - # related objects in a separate task - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(self.engagement.product) + if product_grading_option: + with suppress(Test.DoesNotExist, Engagement.DoesNotExist, Product.DoesNotExist): + # Suppressing a potential issue created from async delete removing + # related objects in a separate task + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(self.engagement.product) @property def statistics(self): @@ -2863,17 +2864,18 @@ def copy(self, test=None): return copy - def delete(self, *args, **kwargs): + def delete(self, *args, product_grading_option=True, **kwargs): logger.debug("%d finding delete", self.id) from dojo.finding import helper helper.finding_delete(self) super().delete(*args, **kwargs) - with suppress(Finding.DoesNotExist, Test.DoesNotExist, Engagement.DoesNotExist, Product.DoesNotExist): - # Suppressing a potential issue created from async delete removing - # related objects in a separate task - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(self.test.engagement.product) + if product_grading_option: + with suppress(Finding.DoesNotExist, Test.DoesNotExist, Engagement.DoesNotExist, Product.DoesNotExist): + # Suppressing a potential issue created from async delete removing + # related objects in a separate task + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(self.test.engagement.product) # only used by bulk risk acceptance api @classmethod diff --git a/dojo/tasks.py b/dojo/tasks.py index 0957ac40194..7488fadf962 100644 --- a/dojo/tasks.py +++ b/dojo/tasks.py @@ -129,7 +129,7 @@ def async_dupe_delete(*args, **kwargs): logger.info("delete excess duplicates (max_dupes per finding: %s, max deletes per run: %s)", dupe_max, total_duplicate_delete_count_max_per_run) deduplicationLogger.info("delete excess duplicates (max_dupes per finding: %s, max deletes per run: %s)", dupe_max, total_duplicate_delete_count_max_per_run) - # limit to 100 to prevent overlapping jobs + # limit to settings.DUPE_DELETE_MAX_PER_RUN to prevent overlapping jobs results = Finding.objects \ .filter(duplicate=True) \ .order_by() \ @@ -146,13 +146,17 @@ def async_dupe_delete(*args, **kwargs): queryset=Finding.objects.filter(duplicate=True).order_by("date"))) total_deleted_count = 0 + affected_products = set() for original in originals_with_too_many_duplicates: duplicate_list = original.original_finding.all() dupe_count = len(duplicate_list) - dupe_max for finding in duplicate_list: deduplicationLogger.debug(f"deleting finding {finding.id}:{finding.title} ({finding.hash_code}))") - finding.delete() + # Collect the product for batch grading later + affected_products.add(finding.test.engagement.product) + # Skip individual product grading during deletion + finding.delete(product_grading_option=False) total_deleted_count += 1 dupe_count -= 1 if dupe_count <= 0: @@ -165,6 +169,14 @@ def async_dupe_delete(*args, **kwargs): logger.info("total number of excess duplicates deleted: %s", total_deleted_count) + # Batch product grading for all affected products + if affected_products: + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + logger.info("performing batch product grading for %s products", len(affected_products)) + for product in affected_products: + calculate_grade(product) + @app.task(ignore_result=False) def celery_status(): From bd997832ae788d1adba3fdce4a2de6fcba3ef5ea Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 14 Sep 2025 23:29:52 +0200 Subject: [PATCH 41/53] cleanup comments --- dojo/importers/default_importer.py | 4 +++- dojo/importers/default_reimporter.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index a7480e32142..a6804b385db 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -268,6 +268,8 @@ def process_findings( # Check if we should launch a chord (batch full or end of findings) if we_want_async(async_user=self.user) and post_processing_task_signatures: # Calculate current batch size: 2^batch_number, capped at max_batch_size + # We do this because post processing only starts after all tasks have been added to the chord + # So we start with small batches to minmize the delay current_batch_size = min(2 ** current_batch_number, max_batch_size) batch_full = len(post_processing_task_signatures) >= current_batch_size @@ -279,8 +281,8 @@ def process_findings( if system_settings.enable_product_grade: calculate_grade_signature = utils.calculate_grade_signature(product) chord(post_processing_task_signatures)(calculate_grade_signature) - # If product grading is disabled, just run the post-processing tasks without the grade calculation callback elif post_processing_task_signatures: + # If product grading is disabled, just run the post-processing tasks without the grade calculation callback group(post_processing_task_signatures).apply_async() logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})") diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 25f6da5635d..1a707809d7c 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -272,6 +272,8 @@ def process_findings( # Check if we should launch a chord (batch full or end of findings) if we_want_async(async_user=self.user) and post_processing_task_signatures: # Calculate current batch size: 2^batch_number, capped at max_batch_size + # We do this because post processing only starts after all tasks have been added to the chord + # So we start with small batches to minmize the delay current_batch_size = min(2 ** current_batch_number, max_batch_size) batch_full = len(post_processing_task_signatures) >= current_batch_size @@ -284,8 +286,8 @@ def process_findings( if system_settings.enable_product_grade: calculate_grade_signature = utils.calculate_grade_signature(product) chord(post_processing_task_signatures)(calculate_grade_signature) - # If product grading is disabled, just run the post-processing tasks without the grade calculation callback elif post_processing_task_signatures: + # If product grading is disabled, just run the post-processing tasks without the grade calculation callback group(post_processing_task_signatures).apply_async() logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})") From 29de9529347d4d43826eaee06ad6e18eeac449e9 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Mon, 15 Sep 2025 18:57:00 +0200 Subject: [PATCH 42/53] fix merge artifact --- dojo/importers/default_importer.py | 8 -------- unittests/test_importers_performance.py | 6 +++--- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index a6804b385db..4367efd9dcd 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -221,14 +221,6 @@ def process_findings( # postprocessing will be done after processing related fields like endpoints, vulnerability ids, etc. unsaved_finding.save_no_options() - finding = unsaved_finding - finding = self.process_cve(unsaved_finding) - # Calculate hash_code before saving based on unsaved_endpoints and unsaved_vulnerability_ids - finding.set_hash_code(True) - - # postprocessing will be done after processing related fields like endpoints, vulnerability ids, etc. - finding.save_no_options() - # Determine how the finding should be grouped self.process_finding_groups( finding, diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 9c77e4b3800..24e3f302226 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -176,7 +176,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ # def test_import_reimport_reimport_performance_async(self, mock): def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( - expected_num_queries1=713, + expected_num_queries1=683, expected_num_async_tasks1=10, expected_num_queries2=610, expected_num_async_tasks2=22, @@ -198,7 +198,7 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self.import_reimport_performance( - expected_num_queries1=713, + expected_num_queries1=683, expected_num_async_tasks1=10, expected_num_queries2=615, expected_num_async_tasks2=22, @@ -222,7 +222,7 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=717, + expected_num_queries1=687, expected_num_async_tasks1=11, expected_num_queries2=619, expected_num_async_tasks2=23, From b64f34be24aebe301313102515aa341cd6f47fa4 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Wed, 17 Sep 2025 23:37:37 +0200 Subject: [PATCH 43/53] fix loop --- dojo/importers/default_importer.py | 8 ++++++++ dojo/settings/settings.dist.py | 6 +++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 4367efd9dcd..ed43e469568 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -188,6 +188,14 @@ def process_findings( # Filter on minimum severity if applicable if Finding.SEVERITIES[unsaved_finding.severity] > Finding.SEVERITIES[self.minimum_severity]: # finding's severity is below the configured threshold : ignoring the finding + logger.debug( + "skipping finding due to minimum_severity filter (finding=%s severity=%s min=%s)", + getattr(unsaved_finding, "unique_id_from_tool", None), + unsaved_finding.severity, + self.minimum_severity, + ) + # Advance iterator to avoid infinite loop when skipping + non_clean_unsaved_finding = next(findings_iterator, None) continue # Some parsers provide "mitigated" field but do not set timezone (because they are probably not available in the report) diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 7b134e52ad3..e1c440b9b12 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -30,6 +30,10 @@ DD_SITE_URL=(str, "http://localhost:8080"), DD_DEBUG=(bool, False), DD_DJANGO_DEBUG_TOOLBAR_ENABLED=(bool, False), + # django-auditlog imports django-jsonfield-backport raises a warning that can be ignored, + # see https://github.com/laymonage/django-jsonfield-backport + # debug_toolbar.E001 is raised when running tests in dev mode via run-unittests.sh + DD_SILENCED_SYSTEM_CHECKS=(list, ["debug_toolbar.E001", "django_jsonfield_backport.W001"]), DD_TEMPLATE_DEBUG=(bool, False), DD_LOG_LEVEL=(str, ""), DD_DJANGO_METRICS_ENABLED=(bool, False), @@ -1821,7 +1825,7 @@ def saml2_attrib_map_format(din): # django-auditlog imports django-jsonfield-backport raises a warning that can be ignored, # see https://github.com/laymonage/django-jsonfield-backport -SILENCED_SYSTEM_CHECKS = ["django_jsonfield_backport.W001"] +SILENCED_SYSTEM_CHECKS = env("DD_SILENCED_SYSTEM_CHECKS") VULNERABILITY_URLS = { "ALAS": "https://alas.aws.amazon.com/AL2/&&.html", # e.g. https://alas.aws.amazon.com/alas2.html From 98ef20d8ef825d37bc8ca0e0e64926ae57df17ac Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Wed, 17 Sep 2025 23:47:07 +0200 Subject: [PATCH 44/53] simplify loop --- dojo/importers/default_importer.py | 38 ++++++++++-------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index ed43e469568..dc6e9d4aef0 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -176,27 +176,17 @@ def process_findings( logger.debug("starting import of %i parsed findings.", len(parsed_findings) if parsed_findings else 0) group_names_to_findings_dict = {} - # Create iterator over parsed findings - findings_iterator = iter(parsed_findings) - - # Get first finding to start the loop - non_clean_unsaved_finding = next(findings_iterator, None) - - while non_clean_unsaved_finding: - # make sure the severity is something is digestible - unsaved_finding = self.sanitize_severity(non_clean_unsaved_finding) - # Filter on minimum severity if applicable - if Finding.SEVERITIES[unsaved_finding.severity] > Finding.SEVERITIES[self.minimum_severity]: - # finding's severity is below the configured threshold : ignoring the finding - logger.debug( - "skipping finding due to minimum_severity filter (finding=%s severity=%s min=%s)", - getattr(unsaved_finding, "unique_id_from_tool", None), - unsaved_finding.severity, - self.minimum_severity, - ) - # Advance iterator to avoid infinite loop when skipping - non_clean_unsaved_finding = next(findings_iterator, None) + # Pre-sanitize and filter by minimum severity + cleaned_findings = [] + for raw_finding in parsed_findings or []: + sanitized = self.sanitize_severity(raw_finding) + if Finding.SEVERITIES[sanitized.severity] > Finding.SEVERITIES[self.minimum_severity]: + logger.debug("skipping finding due to minimum severity filter (finding=%s severity=%s min=%s)", sanitized.title, sanitized.severity, self.minimum_severity) continue + cleaned_findings.append(sanitized) + + for idx, unsaved_finding in enumerate(cleaned_findings): + is_final_finding = idx == len(cleaned_findings) - 1 # Some parsers provide "mitigated" field but do not set timezone (because they are probably not available in the report) # Finding.mitigated is DateTimeField and it requires timezone @@ -261,10 +251,6 @@ def process_findings( post_processing_task_signatures.append(post_processing_task_signature) - # Get next finding for next iteration - non_clean_unsaved_finding = next(findings_iterator, None) - is_final = not non_clean_unsaved_finding - # Check if we should launch a chord (batch full or end of findings) if we_want_async(async_user=self.user) and post_processing_task_signatures: # Calculate current batch size: 2^batch_number, capped at max_batch_size @@ -274,7 +260,7 @@ def process_findings( batch_full = len(post_processing_task_signatures) >= current_batch_size - if batch_full or is_final: + if batch_full or is_final_finding: # Launch chord with current batch of signatures product = self.test.engagement.product system_settings = System_Settings.objects.get() @@ -289,7 +275,7 @@ def process_findings( # Reset for next batch (only if not final) post_processing_task_signatures = [] - if not is_final: + if not is_final_finding: current_batch_number += 1 else: # Execute task immediately for synchronous processing From 1a77366280df9dfe2115266ec603620fa8c8f37b Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 19 Sep 2025 21:48:59 +0200 Subject: [PATCH 45/53] fix reimport loop --- dojo/importers/default_reimporter.py | 31 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 1a707809d7c..49744fd0c2e 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -190,19 +190,22 @@ def process_findings( logger.debug("STEP 1: looping over findings from the reimported report and trying to match them to existing findings") deduplicationLogger.debug(f"Algorithm used for matching new findings to existing findings: {self.deduplication_algorithm}") - # Create iterator over parsed findings - findings_iterator = iter(parsed_findings) - - # Get first finding to start the loop - non_clean_unsaved_finding = next(findings_iterator, None) - - while non_clean_unsaved_finding: - # make sure the severity is something is digestible - unsaved_finding = self.sanitize_severity(non_clean_unsaved_finding) - # Filter on minimum severity if applicable - if Finding.SEVERITIES[unsaved_finding.severity] > Finding.SEVERITIES[self.minimum_severity]: - # finding's severity is below the configured threshold : ignoring the finding + # Pre-sanitize and filter by minimum severity to avoid loop control pitfalls + cleaned_findings = [] + for raw_finding in parsed_findings or []: + sanitized = self.sanitize_severity(raw_finding) + if Finding.SEVERITIES[sanitized.severity] > Finding.SEVERITIES[self.minimum_severity]: + logger.debug( + "skipping finding due to minimum severity filter (finding=%s severity=%s min=%s)", + getattr(sanitized, "title", ""), + sanitized.severity, + self.minimum_severity, + ) continue + cleaned_findings.append(sanitized) + + for idx, unsaved_finding in enumerate(cleaned_findings): + is_final = idx == len(cleaned_findings) - 1 # Some parsers provide "mitigated" field but do not set timezone (because they are probably not available in the report) # Finding.mitigated is DateTimeField and it requires timezone if unsaved_finding.mitigated and not unsaved_finding.mitigated.tzinfo: @@ -266,9 +269,6 @@ def process_findings( ) post_processing_task_signatures.append(post_processing_task_signature) - # Get next finding for next iteration - non_clean_unsaved_finding = next(findings_iterator, None) - # Check if we should launch a chord (batch full or end of findings) if we_want_async(async_user=self.user) and post_processing_task_signatures: # Calculate current batch size: 2^batch_number, capped at max_batch_size @@ -277,7 +277,6 @@ def process_findings( current_batch_size = min(2 ** current_batch_number, max_batch_size) batch_full = len(post_processing_task_signatures) >= current_batch_size - is_final = not non_clean_unsaved_finding if batch_full or is_final: # Launch chord with current batch of signatures From 668ac10ddb62a1f634d2a726bf43e27239b5edd6 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 19 Sep 2025 21:51:21 +0200 Subject: [PATCH 46/53] revert settings changes --- docker-compose.override.dev.yml | 2 +- dojo/settings/settings.dist.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/docker-compose.override.dev.yml b/docker-compose.override.dev.yml index 9acb558cb08..b3b81dcb068 100644 --- a/docker-compose.override.dev.yml +++ b/docker-compose.override.dev.yml @@ -11,7 +11,7 @@ services: environment: PYTHONWARNINGS: error # We are strict about Warnings during development DD_DEBUG: 'True' - DD_DJANGO_DEBUG_TOOLBAR_ENABLED: 'True' + DD_DJANGO_DEBUG_TOOLBAR_ENABLED: 'False' DD_ADMIN_USER: "${DD_ADMIN_USER:-admin}" DD_ADMIN_PASSWORD: "${DD_ADMIN_PASSWORD:-admin}" DD_EMAIL_URL: "smtp://mailhog:1025" diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index e1c440b9b12..7b134e52ad3 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -30,10 +30,6 @@ DD_SITE_URL=(str, "http://localhost:8080"), DD_DEBUG=(bool, False), DD_DJANGO_DEBUG_TOOLBAR_ENABLED=(bool, False), - # django-auditlog imports django-jsonfield-backport raises a warning that can be ignored, - # see https://github.com/laymonage/django-jsonfield-backport - # debug_toolbar.E001 is raised when running tests in dev mode via run-unittests.sh - DD_SILENCED_SYSTEM_CHECKS=(list, ["debug_toolbar.E001", "django_jsonfield_backport.W001"]), DD_TEMPLATE_DEBUG=(bool, False), DD_LOG_LEVEL=(str, ""), DD_DJANGO_METRICS_ENABLED=(bool, False), @@ -1825,7 +1821,7 @@ def saml2_attrib_map_format(din): # django-auditlog imports django-jsonfield-backport raises a warning that can be ignored, # see https://github.com/laymonage/django-jsonfield-backport -SILENCED_SYSTEM_CHECKS = env("DD_SILENCED_SYSTEM_CHECKS") +SILENCED_SYSTEM_CHECKS = ["django_jsonfield_backport.W001"] VULNERABILITY_URLS = { "ALAS": "https://alas.aws.amazon.com/AL2/&&.html", # e.g. https://alas.aws.amazon.com/alas2.html From 0b8f17939ba2766b3319aadfe4197708e5242755 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 19 Sep 2025 21:51:45 +0200 Subject: [PATCH 47/53] revert settings changes --- docker-compose.override.dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.override.dev.yml b/docker-compose.override.dev.yml index b3b81dcb068..9acb558cb08 100644 --- a/docker-compose.override.dev.yml +++ b/docker-compose.override.dev.yml @@ -11,7 +11,7 @@ services: environment: PYTHONWARNINGS: error # We are strict about Warnings during development DD_DEBUG: 'True' - DD_DJANGO_DEBUG_TOOLBAR_ENABLED: 'False' + DD_DJANGO_DEBUG_TOOLBAR_ENABLED: 'True' DD_ADMIN_USER: "${DD_ADMIN_USER:-admin}" DD_ADMIN_PASSWORD: "${DD_ADMIN_PASSWORD:-admin}" DD_EMAIL_URL: "smtp://mailhog:1025" From edd58154432785c46ae8edcaf3f036e9f2dd8fe4 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Tue, 23 Sep 2025 19:32:15 +0200 Subject: [PATCH 48/53] update counts --- unittests/test_importers_performance.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 24e3f302226..33c02b40bfe 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -222,10 +222,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self.import_reimport_performance( - expected_num_queries1=687, + expected_num_queries1=684, expected_num_async_tasks1=11, - expected_num_queries2=619, + expected_num_queries2=616, expected_num_async_tasks2=23, - expected_num_queries3=301, + expected_num_queries3=298, expected_num_async_tasks3=21, ) From 4de0f6043f627fcbd4036c1639f8c562210eac75 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 26 Sep 2025 17:08:08 +0200 Subject: [PATCH 49/53] extract product grading method call --- dojo/importers/base_importer.py | 9 ++++++++- dojo/importers/default_importer.py | 11 ++--------- dojo/importers/default_reimporter.py | 11 ++--------- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py index 428bc7f897f..06dadf5a810 100644 --- a/dojo/importers/base_importer.py +++ b/dojo/importers/base_importer.py @@ -24,6 +24,7 @@ Endpoint, FileUpload, Finding, + System_Settings, Test, Test_Import, Test_Import_Finding_Action, @@ -33,7 +34,7 @@ from dojo.notifications.helper import create_notification from dojo.tools.factory import get_parser from dojo.tools.parser_test import ParserTest -from dojo.utils import max_safe +from dojo.utils import calculate_grade, max_safe logger = logging.getLogger(__name__) @@ -811,3 +812,9 @@ def notify_scan_added( url=reverse("view_test", args=(test.id,)), url_api=reverse("test-detail", args=(test.id,)), ) + + def perform_product_grading(self): + product = self.test.engagement.product + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(product) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index dc6e9d4aef0..e944b23f3fc 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -20,7 +20,6 @@ Test_Import, ) from dojo.notifications.helper import create_notification -from dojo.utils import calculate_grade from dojo.validators import clean_tags logger = logging.getLogger(__name__) @@ -298,10 +297,7 @@ def process_findings( # Note: All chord batching is now handled within the loop above # Always perform an initial grading, even though it might get overwritten later. - product = self.test.engagement.product - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(product) + self.perform_product_grading() sync = kwargs.get("sync", True) if not sync: @@ -389,10 +385,7 @@ def close_old_findings( # Calculate grade once after all findings have been closed if old_findings: - product = self.test.engagement.product - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(product) + self.perform_product_grading() return old_findings diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 49744fd0c2e..14bb30f77d9 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -19,7 +19,6 @@ Test, Test_Import, ) -from dojo.utils import calculate_grade from dojo.validators import clean_tags logger = logging.getLogger(__name__) @@ -312,10 +311,7 @@ def process_findings( # Note: All chord batching is now handled within the loop above # Synchronous tasks were already executed during processing, just calculate grade - product = self.test.engagement.product - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(product) + self.perform_product_grading() # Process the results and return them back return self.process_results(**kwargs) @@ -361,10 +357,7 @@ def close_old_findings( # Calculate grade once after all findings have been closed if mitigated_findings: - product = self.test.engagement.product - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(product) + self.perform_product_grading() return mitigated_findings From d774ac75884bedc5ee93cd6f0cfc39e5fa4a17df Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 26 Sep 2025 17:32:30 +0200 Subject: [PATCH 50/53] cleanup model deletes --- dojo/importers/base_importer.py | 9 +-------- dojo/importers/default_importer.py | 5 +++-- dojo/importers/default_reimporter.py | 5 +++-- dojo/models.py | 14 ++++++-------- dojo/utils.py | 6 ++++++ 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py index 06dadf5a810..428bc7f897f 100644 --- a/dojo/importers/base_importer.py +++ b/dojo/importers/base_importer.py @@ -24,7 +24,6 @@ Endpoint, FileUpload, Finding, - System_Settings, Test, Test_Import, Test_Import_Finding_Action, @@ -34,7 +33,7 @@ from dojo.notifications.helper import create_notification from dojo.tools.factory import get_parser from dojo.tools.parser_test import ParserTest -from dojo.utils import calculate_grade, max_safe +from dojo.utils import max_safe logger = logging.getLogger(__name__) @@ -812,9 +811,3 @@ def notify_scan_added( url=reverse("view_test", args=(test.id,)), url_api=reverse("test-detail", args=(test.id,)), ) - - def perform_product_grading(self): - product = self.test.engagement.product - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(product) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index e944b23f3fc..d9a54aacead 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -20,6 +20,7 @@ Test_Import, ) from dojo.notifications.helper import create_notification +from dojo.utils import perform_product_grading from dojo.validators import clean_tags logger = logging.getLogger(__name__) @@ -297,7 +298,7 @@ def process_findings( # Note: All chord batching is now handled within the loop above # Always perform an initial grading, even though it might get overwritten later. - self.perform_product_grading() + perform_product_grading(self.test.engagement.product) sync = kwargs.get("sync", True) if not sync: @@ -385,7 +386,7 @@ def close_old_findings( # Calculate grade once after all findings have been closed if old_findings: - self.perform_product_grading() + perform_product_grading(self.test.engagement.product) return old_findings diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 14bb30f77d9..0ad354b6af6 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -19,6 +19,7 @@ Test, Test_Import, ) +from dojo.utils import perform_product_grading from dojo.validators import clean_tags logger = logging.getLogger(__name__) @@ -311,7 +312,7 @@ def process_findings( # Note: All chord batching is now handled within the loop above # Synchronous tasks were already executed during processing, just calculate grade - self.perform_product_grading() + perform_product_grading(self.test.engagement.product) # Process the results and return them back return self.process_results(**kwargs) @@ -357,7 +358,7 @@ def close_old_findings( # Calculate grade once after all findings have been closed if mitigated_findings: - self.perform_product_grading() + perform_product_grading(self.test.engagement.product) return mitigated_findings diff --git a/dojo/models.py b/dojo/models.py index 7a4c69542d9..b51a9f69a6c 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -1652,7 +1652,8 @@ def delete(self, *args, **kwargs): with suppress(Engagement.DoesNotExist, Product.DoesNotExist): # Suppressing a potential issue created from async delete removing # related objects in a separate task - calculate_grade(self.product) + from dojo.utils import perform_product_grading # noqa: PLC0415 circular import + perform_product_grading(self.test.engagement.product) def inherit_tags(self, potentially_existing_tags): # get a copy of the tags to be inherited @@ -2258,9 +2259,8 @@ def delete(self, *args, product_grading_option=True, **kwargs): with suppress(Test.DoesNotExist, Engagement.DoesNotExist, Product.DoesNotExist): # Suppressing a potential issue created from async delete removing # related objects in a separate task - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(self.engagement.product) + from dojo.utils import perform_product_grading # noqa: PLC0415 circular import + perform_product_grading(self.test.engagement.product) @property def statistics(self): @@ -2868,9 +2868,8 @@ def delete(self, *args, product_grading_option=True, **kwargs): with suppress(Finding.DoesNotExist, Test.DoesNotExist, Engagement.DoesNotExist, Product.DoesNotExist): # Suppressing a potential issue created from async delete removing # related objects in a separate task - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade(self.test.engagement.product) + from dojo.utils import perform_product_grading # noqa: PLC0415 circular import + perform_product_grading(self.test.engagement.product) # only used by bulk risk acceptance api @classmethod @@ -4713,7 +4712,6 @@ def __str__(self): from dojo.utils import ( # noqa: E402 # there is issue due to a circular import - calculate_grade, parse_cvss_data, to_str_typed, ) diff --git a/dojo/utils.py b/dojo/utils.py index 9fb962842b1..184987f125a 100644 --- a/dojo/utils.py +++ b/dojo/utils.py @@ -1619,6 +1619,12 @@ def calculate_grade_internal(product, *args, **kwargs): logger.debug("Product %s grade %i is up to date", product.id, prod_numeric_grade) +def perform_product_grading(product): + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade(product) + + def get_celery_worker_status(): from .tasks import celery_status # noqa: PLC0415 circular import res = celery_status.apply_async() From 360b7889fa3f0f7e9e51d6fee237c08dbab84eaf Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 26 Sep 2025 17:35:48 +0200 Subject: [PATCH 51/53] product grade logging fix --- dojo/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dojo/utils.py b/dojo/utils.py index 184987f125a..c8cd1edd2aa 100644 --- a/dojo/utils.py +++ b/dojo/utils.py @@ -1616,7 +1616,8 @@ def calculate_grade_internal(product, *args, **kwargs): product.prod_numeric_grade = prod_numeric_grade super(Product, product).save() else: - logger.debug("Product %s grade %i is up to date", product.id, prod_numeric_grade) + # Use %s to safely handle None grades without formatter errors + logger.debug("Product %s grade %s is up to date", product.id, prod_numeric_grade) def perform_product_grading(product): From 6a2c516b7e1575f6258e0d04873bf52c15472fb7 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 26 Sep 2025 17:52:43 +0200 Subject: [PATCH 52/53] extract chord orchestration into method --- dojo/importers/base_importer.py | 44 ++++++++++++++++++++++++++++ dojo/importers/default_importer.py | 33 ++++----------------- dojo/importers/default_reimporter.py | 33 ++++----------------- 3 files changed, 56 insertions(+), 54 deletions(-) diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py index 428bc7f897f..8eba4036248 100644 --- a/dojo/importers/base_importer.py +++ b/dojo/importers/base_importer.py @@ -1,6 +1,7 @@ import base64 import logging +from celery import chord, group from django.conf import settings from django.core.exceptions import ValidationError from django.core.files.base import ContentFile @@ -10,6 +11,7 @@ from django.utils.timezone import make_aware import dojo.finding.helper as finding_helper +from dojo import utils from dojo.importers.endpoint_manager import EndpointManager from dojo.importers.options import ImporterOptions from dojo.models import ( @@ -24,6 +26,7 @@ Endpoint, FileUpload, Finding, + System_Settings, Test, Test_Import, Test_Import_Finding_Action, @@ -532,6 +535,47 @@ def update_test_type_from_internal_test(self, internal_test: ParserTest) -> None self.test.test_type.dynamic_tool = dynamic_tool self.test.test_type.save() + def maybe_launch_post_processing_chord( + self, + post_processing_task_signatures, + current_batch_number: int, + max_batch_size: int, + * + is_final_batch: bool, + ) -> tuple[list, int, bool]: + """ + Helper to optionally launch a chord of post-processing tasks with a calculate-grade callback + when async is desired. Uses exponential batch sizing up to the configured max batch size. + + Returns a tuple of (post_processing_task_signatures, current_batch_number, launched) + where launched indicates whether a chord/group was dispatched and signatures were reset. + """ + launched = False + if not post_processing_task_signatures: + return post_processing_task_signatures, current_batch_number, launched + + current_batch_size = min(2 ** current_batch_number, max_batch_size) + batch_full = len(post_processing_task_signatures) >= current_batch_size + + if batch_full or is_final_batch: + product = self.test.engagement.product + system_settings = System_Settings.objects.get() + if system_settings.enable_product_grade: + calculate_grade_signature = utils.calculate_grade_signature(product) + chord(post_processing_task_signatures)(calculate_grade_signature) + else: + group(post_processing_task_signatures).apply_async() + + logger.debug( + f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})", + ) + post_processing_task_signatures = [] + if not is_final_batch: + current_batch_number += 1 + launched = True + + return post_processing_task_signatures, current_batch_number, launched + def verify_tool_configuration_from_test(self): """ Verify that the Tool_Configuration supplied along with the diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index d9a54aacead..d127ed33f6a 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -1,13 +1,11 @@ import logging -from celery import chord, group from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.serializers import serialize from django.db.models.query_utils import Q from django.urls import reverse import dojo.jira_link.helper as jira_helper -from dojo import utils from dojo.decorators import we_want_async from dojo.finding import helper as finding_helper from dojo.importers.base_importer import BaseImporter, Parser @@ -15,7 +13,6 @@ from dojo.models import ( Engagement, Finding, - System_Settings, Test, Test_Import, ) @@ -253,30 +250,12 @@ def process_findings( # Check if we should launch a chord (batch full or end of findings) if we_want_async(async_user=self.user) and post_processing_task_signatures: - # Calculate current batch size: 2^batch_number, capped at max_batch_size - # We do this because post processing only starts after all tasks have been added to the chord - # So we start with small batches to minmize the delay - current_batch_size = min(2 ** current_batch_number, max_batch_size) - - batch_full = len(post_processing_task_signatures) >= current_batch_size - - if batch_full or is_final_finding: - # Launch chord with current batch of signatures - product = self.test.engagement.product - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade_signature = utils.calculate_grade_signature(product) - chord(post_processing_task_signatures)(calculate_grade_signature) - elif post_processing_task_signatures: - # If product grading is disabled, just run the post-processing tasks without the grade calculation callback - group(post_processing_task_signatures).apply_async() - - logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})") - - # Reset for next batch (only if not final) - post_processing_task_signatures = [] - if not is_final_finding: - current_batch_number += 1 + post_processing_task_signatures, current_batch_number, _ = self.maybe_launch_post_processing_chord( + post_processing_task_signatures, + current_batch_number, + max_batch_size, + is_final_finding, + ) else: # Execute task immediately for synchronous processing post_processing_task_signature() diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 0ad354b6af6..7adb2c65c48 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -1,13 +1,11 @@ import logging -from celery import chord, group from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.serializers import serialize from django.db.models.query_utils import Q import dojo.finding.helper as finding_helper import dojo.jira_link.helper as jira_helper -from dojo import utils from dojo.decorators import we_want_async from dojo.importers.base_importer import BaseImporter, Parser from dojo.importers.options import ImporterOptions @@ -15,7 +13,6 @@ Development_Environment, Finding, Notes, - System_Settings, Test, Test_Import, ) @@ -271,30 +268,12 @@ def process_findings( # Check if we should launch a chord (batch full or end of findings) if we_want_async(async_user=self.user) and post_processing_task_signatures: - # Calculate current batch size: 2^batch_number, capped at max_batch_size - # We do this because post processing only starts after all tasks have been added to the chord - # So we start with small batches to minmize the delay - current_batch_size = min(2 ** current_batch_number, max_batch_size) - - batch_full = len(post_processing_task_signatures) >= current_batch_size - - if batch_full or is_final: - # Launch chord with current batch of signatures - product = self.test.engagement.product - system_settings = System_Settings.objects.get() - if system_settings.enable_product_grade: - calculate_grade_signature = utils.calculate_grade_signature(product) - chord(post_processing_task_signatures)(calculate_grade_signature) - elif post_processing_task_signatures: - # If product grading is disabled, just run the post-processing tasks without the grade calculation callback - group(post_processing_task_signatures).apply_async() - - logger.debug(f"Launched chord with {len(post_processing_task_signatures)} tasks (batch #{current_batch_number}, size: {len(post_processing_task_signatures)})") - - # Reset for next batch (only if not final) - post_processing_task_signatures = [] - if not is_final: - current_batch_number += 1 + post_processing_task_signatures, current_batch_number, _ = self.maybe_launch_post_processing_chord( + post_processing_task_signatures, + current_batch_number, + max_batch_size, + is_final, + ) else: post_processing_task_signature() From dba9a40e4107c46f530f5f1fd1686e28ec21079b Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 26 Sep 2025 18:34:48 +0200 Subject: [PATCH 53/53] fix model traversal --- dojo/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dojo/models.py b/dojo/models.py index b51a9f69a6c..5bdf195bedb 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -1653,7 +1653,7 @@ def delete(self, *args, **kwargs): # Suppressing a potential issue created from async delete removing # related objects in a separate task from dojo.utils import perform_product_grading # noqa: PLC0415 circular import - perform_product_grading(self.test.engagement.product) + perform_product_grading(self.product) def inherit_tags(self, potentially_existing_tags): # get a copy of the tags to be inherited @@ -2260,7 +2260,7 @@ def delete(self, *args, product_grading_option=True, **kwargs): # Suppressing a potential issue created from async delete removing # related objects in a separate task from dojo.utils import perform_product_grading # noqa: PLC0415 circular import - perform_product_grading(self.test.engagement.product) + perform_product_grading(self.engagement.product) @property def statistics(self):