Skip to content

Commit 57690b9

Browse files
Import history optimize (#13182)
* import history optimization * debugtoolbar: fix for requirements-dev.txt * import history optimization * fix fallback * add bulk existence check * add unit test file * update query counts * try without keepdb * clean database between test runs * clean database between test runs * remove keepdb * fix dojo_testdata fixture * restore entrypoint * revert testdata changes, run transactionaltest at the end * fix entrypoint * fix excludes * add comments * update counts
1 parent 2fc71eb commit 57690b9

6 files changed

Lines changed: 240 additions & 42 deletions

File tree

docker/entrypoint-unit-tests.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,16 @@ echo "Unit Tests"
8080
echo "------------------------------------------------------------"
8181

8282
# Removing parallel and shuffle for now to maintain stability
83-
python3 manage.py test unittests -v 3 --keepdb --no-input --exclude-tag="non-parallel" || {
83+
python3 manage.py test unittests -v 3 --keepdb --no-input --exclude-tag="non-parallel" --exclude-tag="transactional" || {
8484
exit 1;
8585
}
8686
python3 manage.py test unittests -v 3 --keepdb --no-input --tag="non-parallel" || {
8787
exit 1;
88-
}
88+
}
89+
# Running one unit tests that inherits from TransactionTestCase somehow changes the behaviour of how Django loads fixtures into the database.
90+
# Meaning any test after this one would fail to load our dojo_testdata.json fixture. In a way this makes sense as it contains some data integrity problems.
91+
# I tried to fix these in https://github.com/DefectDojo/django-DefectDojo/pull/13217.
92+
# For now here we run the only TranscationTestCase at the end to avoid the problem.
93+
python3 manage.py test unittests -v 3 --keepdb --no-input --tag="transactional" || {
94+
exit 1;
95+
}

dojo/finding/helper.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,24 @@ def update_finding_status(new_state_finding, user, changed_fields=None):
166166
new_state_finding.last_status_update = now
167167

168168

169+
def filter_findings_by_existence(findings):
170+
"""
171+
Return only findings that still exist in the database (by id).
172+
173+
Centralized helper used by importers to avoid FK violations during
174+
bulk_create.
175+
"""
176+
if not findings:
177+
return []
178+
candidate_ids = [finding.id for finding in findings if getattr(finding, "id", None)]
179+
if not candidate_ids:
180+
return []
181+
existing_ids = set(
182+
Finding.objects.filter(id__in=candidate_ids).values_list("id", flat=True),
183+
)
184+
return [finding for finding in findings if finding.id in existing_ids]
185+
186+
169187
def can_edit_mitigated_data(user):
170188
return settings.EDITABLE_MITIGATED_DATA and user.is_superuser
171189

dojo/importers/base_importer.py

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,9 @@ def update_import_history(
333333
) -> Test_Import:
334334
"""Creates a record of the import or reimport operation that has occurred."""
335335
# Quick fail check to determine if we even wanted this
336+
if settings.TRACK_IMPORT_HISTORY is False:
337+
return None
338+
336339
if untouched_findings is None:
337340
untouched_findings = []
338341
if reactivated_findings is None:
@@ -341,8 +344,6 @@ def update_import_history(
341344
closed_findings = []
342345
if new_findings is None:
343346
new_findings = []
344-
if settings.TRACK_IMPORT_HISTORY is False:
345-
return None
346347
# Log the current state of what has occurred in case there could be
347348
# deviation from what is displayed in the view
348349
logger.debug(
@@ -374,30 +375,49 @@ def update_import_history(
374375
)
375376

376377
# Create a history record for each finding
377-
for finding in closed_findings:
378-
self.create_import_history_record_safe(Test_Import_Finding_Action(
379-
test_import=test_import,
380-
finding=finding,
381-
action=IMPORT_CLOSED_FINDING,
382-
))
383-
for finding in new_findings:
384-
self.create_import_history_record_safe(Test_Import_Finding_Action(
385-
test_import=test_import,
386-
finding=finding,
387-
action=IMPORT_CREATED_FINDING,
388-
))
389-
for finding in reactivated_findings:
390-
self.create_import_history_record_safe(Test_Import_Finding_Action(
391-
test_import=test_import,
392-
finding=finding,
393-
action=IMPORT_REACTIVATED_FINDING,
394-
))
395-
for finding in untouched_findings:
396-
self.create_import_history_record_safe(Test_Import_Finding_Action(
397-
test_import=test_import,
398-
finding=finding,
399-
action=IMPORT_UNTOUCHED_FINDING,
400-
))
378+
finding_action_mappings = [
379+
(closed_findings, IMPORT_CLOSED_FINDING),
380+
(new_findings, IMPORT_CREATED_FINDING),
381+
(reactivated_findings, IMPORT_REACTIVATED_FINDING),
382+
(untouched_findings, IMPORT_UNTOUCHED_FINDING),
383+
]
384+
385+
# In longer running imports it can happen that the async_dupe_delete task removes a finding before the history record is created
386+
# We filter out these findings here to avoid FK violations (IntegrityError)
387+
all_findings = []
388+
for _list, _ in finding_action_mappings:
389+
all_findings.extend(_list)
390+
existing_findings = finding_helper.filter_findings_by_existence(all_findings) if all_findings else []
391+
existing_ids = {f.id for f in existing_findings}
392+
393+
# Collect all import history records using the validated IDs
394+
import_history_records = []
395+
for findings, action in finding_action_mappings:
396+
import_history_records.extend(
397+
Test_Import_Finding_Action(
398+
test_import=test_import,
399+
finding_id=finding.id,
400+
action=action,
401+
)
402+
for finding in findings
403+
if finding.id in existing_ids
404+
)
405+
406+
# Bulk create all at once and let Django handle batching internally.
407+
# Still in even more rare cases a finding can be deleted once we arrive here.
408+
# If any integrity error occurs, fall back to inserting all records individually.
409+
# The bulk_create is atomic so all batches will succeed or all will fail/rollback
410+
try:
411+
# keep bulk failure contained so fallback can proceed in TestCase transaction
412+
Test_Import_Finding_Action.objects.bulk_create(
413+
import_history_records,
414+
ignore_conflicts=True,
415+
batch_size=100,
416+
)
417+
except IntegrityError:
418+
logger.warning("IntegrityError occurred while bulk creating Test_Import_Finding_Actions, falling back to individual inserts")
419+
for record in import_history_records:
420+
self.create_import_history_record_safe(record)
401421

402422
# Add any tags to the findings imported if necessary
403423
if self.apply_tags_to_findings and self.tags:
@@ -418,10 +438,10 @@ def create_import_history_record_safe(
418438
test_import_finding_action,
419439
):
420440
"""Creates an import history record, while catching any IntegrityErrors that might happen because of the background job having deleted a finding"""
421-
logger.debug(f"creating Test_Import_Finding_Action for finding: {test_import_finding_action.finding.id} action: {test_import_finding_action.action}")
441+
logger.debug(f"creating Test_Import_Finding_Action for finding_id: {test_import_finding_action.finding_id} action: {test_import_finding_action.action}")
422442
try:
423443
test_import_finding_action.save()
424-
except IntegrityError as e:
444+
except (IntegrityError, ValueError) as e:
425445
# This try catch makes us look we don't know what we're doing, but in https://github.com/DefectDojo/django-DefectDojo/issues/6217 we decided that for now this is the best solution
426446
logger.warning("Error creating Test_Import_Finding_Action: %s", e)
427447
logger.debug("Error creating Test_Import_Finding_Action, finding marked as duplicate and deleted ?")

dojo/settings/settings.dist.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@
3030
DD_SITE_URL=(str, "http://localhost:8080"),
3131
DD_DEBUG=(bool, False),
3232
DD_DJANGO_DEBUG_TOOLBAR_ENABLED=(bool, False),
33+
# django-auditlog imports django-jsonfield-backport raises a warning that can be ignored,
34+
# see https://github.com/laymonage/django-jsonfield-backport
35+
# debug_toolbar.E001 is raised when running tests in dev mode via run-unittests.sh
36+
DD_SILENCED_SYSTEM_CHECKS=(list, ["debug_toolbar.E001", "django_jsonfield_backport.W001"]),
3337
DD_TEMPLATE_DEBUG=(bool, False),
3438
DD_LOG_LEVEL=(str, ""),
3539
DD_DJANGO_METRICS_ENABLED=(bool, False),
@@ -740,6 +744,7 @@ def generate_url(scheme, double_slashes, user, password, host, port, path, param
740744
# Override default Django behavior for incorrect URLs
741745
APPEND_SLASH = env("DD_APPEND_SLASH")
742746

747+
743748
# Whether to use a secure cookie for the CSRF cookie.
744749
CSRF_COOKIE_SECURE = env("DD_CSRF_COOKIE_SECURE")
745750
CSRF_COOKIE_SAMESITE = env("DD_CSRF_COOKIE_SAMESITE")
@@ -1814,9 +1819,7 @@ def saml2_attrib_map_format(din):
18141819
# for very large objects
18151820
DELETE_PREVIEW = env("DD_DELETE_PREVIEW")
18161821

1817-
# django-auditlog imports django-jsonfield-backport raises a warning that can be ignored,
1818-
# see https://github.com/laymonage/django-jsonfield-backport
1819-
SILENCED_SYSTEM_CHECKS = ["django_jsonfield_backport.W001"]
1822+
SILENCED_SYSTEM_CHECKS = env("DD_SILENCED_SYSTEM_CHECKS")
18201823

18211824
VULNERABILITY_URLS = {
18221825
"ALAS": "https://alas.aws.amazon.com/AL2/&&.html", # e.g. https://alas.aws.amazon.com/alas2.html

unittests/test_importers_performance.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,11 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_
176176
# def test_import_reimport_reimport_performance_async(self, mock):
177177
def test_import_reimport_reimport_performance_async(self):
178178
self.import_reimport_performance(
179-
expected_num_queries1=682,
179+
expected_num_queries1=679,
180180
expected_num_async_tasks1=10,
181-
expected_num_queries2=610,
181+
expected_num_queries2=606,
182182
expected_num_async_tasks2=22,
183-
expected_num_queries3=292,
183+
expected_num_queries3=289,
184184
expected_num_async_tasks3=20,
185185
)
186186

@@ -198,11 +198,11 @@ def test_import_reimport_reimport_performance_no_async(self):
198198
testuser.usercontactinfo.block_execution = True
199199
testuser.usercontactinfo.save()
200200
self.import_reimport_performance(
201-
expected_num_queries1=682,
201+
expected_num_queries1=679,
202202
expected_num_async_tasks1=10,
203-
expected_num_queries2=615,
203+
expected_num_queries2=611,
204204
expected_num_async_tasks2=22,
205-
expected_num_queries3=297,
205+
expected_num_queries3=294,
206206
expected_num_async_tasks3=20,
207207
)
208208

@@ -222,10 +222,10 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self
222222
self.system_settings(enable_product_grade=True)
223223

224224
self.import_reimport_performance(
225-
expected_num_queries1=687,
225+
expected_num_queries1=684,
226226
expected_num_async_tasks1=15,
227-
expected_num_queries2=621,
227+
expected_num_queries2=617,
228228
expected_num_async_tasks2=28,
229-
expected_num_queries3=302,
229+
expected_num_queries3=299,
230230
expected_num_async_tasks3=25,
231231
)
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import logging
2+
from unittest.mock import patch
3+
4+
from django.contrib.auth.models import User as DjangoUser
5+
from django.test import TransactionTestCase, tag
6+
from django.utils import timezone
7+
8+
from dojo.importers.default_importer import DefaultImporter
9+
from dojo.models import (
10+
Development_Environment,
11+
Engagement,
12+
Finding,
13+
Product,
14+
Product_Type,
15+
SLA_Configuration,
16+
Test,
17+
Test_Import_Finding_Action,
18+
)
19+
20+
logger = logging.getLogger(__name__)
21+
22+
23+
# we need to run this as a TransactionTestCase to be able to mimic the behavior of the bulk_create fallback at runtime when a FK violation occurs
24+
25+
26+
@tag("transactional")
27+
class UpdateImportHistoryTests(TransactionTestCase):
28+
29+
# loading fixtures fails in TransactionTestCase, not sure why. possibly because they are not up-to-date and missing fields like sla_configuration
30+
# creating testdata via code is a better approach, at least here.
31+
def setUp(self):
32+
super().setUp()
33+
self.env, _ = Development_Environment.objects.get_or_create(name="Development")
34+
self.prod_type = Product_Type.objects.create(name="UpdateImportHistory PT")
35+
# Ensure a valid SLA configuration exists and is assigned explicitly to avoid default FK issues
36+
self.sla = SLA_Configuration.objects.create(name="UpdateImportHistory SLA")
37+
self.prod = Product.objects.create(
38+
name="UpdateImportHistory P",
39+
prod_type=self.prod_type,
40+
sla_configuration=self.sla,
41+
)
42+
self.eng = Engagement.objects.create(
43+
name="UpdateImportHistory E",
44+
product=self.prod,
45+
target_start=timezone.now(),
46+
target_end=timezone.now(),
47+
)
48+
# Ensure a reporter/lead user exists for FK constraints
49+
self.user = DjangoUser.objects.create(username="admin")
50+
51+
# Minimal importer
52+
self.importer = DefaultImporter(
53+
user=self.user,
54+
lead=self.user,
55+
environment=self.env,
56+
engagement=self.eng,
57+
minimum_severity="Info",
58+
active=True,
59+
verified=True,
60+
sync=True,
61+
scan_type="StackHawk HawkScan",
62+
)
63+
# Explicitly create the Test similar to Engagement creation
64+
self.test = Test.objects.create(
65+
title="UpdateImportHistory T",
66+
engagement=self.eng,
67+
lead=self.user,
68+
environment=self.env,
69+
test_type=self.importer.get_or_create_test_type("StackHawk HawkScan"),
70+
scan_type="StackHawk HawkScan",
71+
target_start=timezone.now(),
72+
target_end=timezone.now(),
73+
percent_complete=0,
74+
)
75+
# Attach to importer
76+
self.importer.test = self.test
77+
78+
def _create_findings(self, count):
79+
findings = []
80+
for i in range(count):
81+
f = Finding(
82+
title=f"F{i}",
83+
test=self.importer.test,
84+
severity="Low",
85+
reporter=self.user,
86+
)
87+
f.save()
88+
findings.append(f)
89+
return findings
90+
91+
def test_success_path_creates_expected_actions(self):
92+
new_findings = self._create_findings(5)
93+
closed_findings = self._create_findings(3)
94+
95+
test_import = self.importer.update_import_history(
96+
new_findings=new_findings,
97+
closed_findings=closed_findings,
98+
)
99+
100+
total_expected = len(new_findings) + len(closed_findings)
101+
created = Test_Import_Finding_Action.objects.filter(test_import=test_import).count()
102+
self.assertEqual(created, total_expected)
103+
104+
def test_fk_violation_in_batch_results_in_partial_fallback(self):
105+
# One bad finding (deleted after pre-check) triggers IntegrityError; fallback saves the valid ones
106+
new_findings = self._create_findings(9)
107+
bad = self._create_findings(1)[0]
108+
109+
# Patch the existence filter to return all findings as-if they exist, then delete to simulate race after check
110+
with patch("dojo.finding.helper.filter_findings_by_existence", side_effect=lambda lst: lst):
111+
bad_id = bad.id
112+
Finding.objects.filter(id=bad_id).delete()
113+
test_import = self.importer.update_import_history(new_findings=[*new_findings, bad])
114+
115+
created = Test_Import_Finding_Action.objects.filter(test_import=test_import).count()
116+
# Expect only the 9 valid ones to be created; the bad one is skipped/raises during fallback
117+
self.assertEqual(created, len(new_findings))
118+
119+
def test_fk_violation_second_batch_results_in_partial_fallback(self):
120+
# Create 300 findings so Django's bulk_create will batch internally (batch_size=100)
121+
total = 300
122+
new_findings = self._create_findings(total)
123+
124+
# Delete a finding in the second batch (index 150) after the existence check
125+
bad = new_findings[150]
126+
with patch("dojo.finding.helper.filter_findings_by_existence", side_effect=lambda lst: lst):
127+
Finding.objects.filter(id=bad.id).delete()
128+
test_import = self.importer.update_import_history(new_findings=new_findings)
129+
130+
# Expect all but the deleted one to be created via fallback
131+
created = Test_Import_Finding_Action.objects.filter(test_import=test_import).count()
132+
self.assertEqual(created, total - 1)
133+
134+
def test_precheck_filters_out_deleted_findings_allows_successful_bulk(self):
135+
# If a finding is deleted before the existence check, it should be filtered out
136+
new_findings = self._create_findings(5)
137+
closed_findings = self._create_findings(3)
138+
139+
# Delete one from new and one from closed before calling update_import_history
140+
Finding.objects.filter(id=new_findings[0].id).delete()
141+
Finding.objects.filter(id=closed_findings[0].id).delete()
142+
143+
test_import = self.importer.update_import_history(
144+
new_findings=new_findings,
145+
closed_findings=closed_findings,
146+
)
147+
148+
expected = (len(new_findings) - 1) + (len(closed_findings) - 1)
149+
created = Test_Import_Finding_Action.objects.filter(test_import=test_import).count()
150+
self.assertEqual(created, expected)

0 commit comments

Comments
 (0)