Skip to content

Commit 226e284

Browse files
perf: chunk close_old_findings status sync queries (1000 PKs per SELECT)
1 parent 2aaca49 commit 226e284

1 file changed

Lines changed: 32 additions & 24 deletions

File tree

dojo/importers/default_reimporter.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from itertools import batched
23

34
from django.conf import settings
45
from django.core.files.uploadedfile import TemporaryUploadedFile
@@ -30,6 +31,9 @@
3031
logger = logging.getLogger(__name__)
3132
deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication")
3233

34+
# Bound IN-list size when bulk-loading status fields for close_old_findings.
35+
_CLOSE_OLD_FINDINGS_STATUS_FIELDS_CHUNK = 1000
36+
3337

3438
class DefaultReImporterOptions(ImporterOptions):
3539
def validate_test(
@@ -471,32 +475,36 @@ def _sync_close_old_finding_status_fields(self, findings: list[Finding]) -> None
471475
These can change during reimport (e.g. false positive) while the in-memory instances
472476
are stale. Per-finding refresh_from_db in close_old_findings was added in
473477
https://github.com/DefectDojo/django-DefectDojo/pull/12291. A naive refresh per
474-
finding issues one SELECT each; we batch one query for all primary keys and fall
478+
finding issues one SELECT each; we batch one query per chunk of primary keys and fall
475479
back to refresh_from_db only when needed.
476-
"""
477-
ids = [f.pk for f in findings if f.pk is not None]
478-
if not ids:
479-
for finding in findings:
480-
finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
481-
return
482480
483-
fresh_by_id = {
484-
row["id"]: row
485-
for row in Finding.objects.filter(pk__in=ids).values(
486-
"id",
487-
"false_p",
488-
"risk_accepted",
489-
"out_of_scope",
490-
)
491-
}
492-
for finding in findings:
493-
row = fresh_by_id.get(finding.pk)
494-
if row is not None:
495-
finding.false_p = row["false_p"]
496-
finding.risk_accepted = row["risk_accepted"]
497-
finding.out_of_scope = row["out_of_scope"]
498-
else:
499-
finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
481+
This really should be fixed differently, but for now we at least optimize it to be done in bulk.
482+
"""
483+
findings_without_pk = [f for f in findings if f.pk is None]
484+
findings_with_pk = [f for f in findings if f.pk is not None]
485+
486+
for finding in findings_without_pk:
487+
finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
488+
489+
for chunk in batched(findings_with_pk, _CLOSE_OLD_FINDINGS_STATUS_FIELDS_CHUNK, strict=False):
490+
ids = [f.pk for f in chunk]
491+
fresh_by_id = {
492+
row["id"]: row
493+
for row in Finding.objects.filter(pk__in=ids).values(
494+
"id",
495+
"false_p",
496+
"risk_accepted",
497+
"out_of_scope",
498+
)
499+
}
500+
for finding in chunk:
501+
row = fresh_by_id.get(finding.pk)
502+
if row is not None:
503+
finding.false_p = row["false_p"]
504+
finding.risk_accepted = row["risk_accepted"]
505+
finding.out_of_scope = row["out_of_scope"]
506+
else:
507+
finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
500508

501509
def close_old_findings(
502510
self,

0 commit comments

Comments
 (0)