|
1 | 1 | import logging |
| 2 | +from itertools import batched |
2 | 3 |
|
3 | 4 | from django.conf import settings |
4 | 5 | from django.core.files.uploadedfile import TemporaryUploadedFile |
|
30 | 31 | logger = logging.getLogger(__name__) |
31 | 32 | deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication") |
32 | 33 |
|
| 34 | +# Bound IN-list size when bulk-loading status fields for close_old_findings. |
| 35 | +_CLOSE_OLD_FINDINGS_STATUS_FIELDS_CHUNK = 1000 |
| 36 | + |
33 | 37 |
|
34 | 38 | class DefaultReImporterOptions(ImporterOptions): |
35 | 39 | def validate_test( |
@@ -471,32 +475,36 @@ def _sync_close_old_finding_status_fields(self, findings: list[Finding]) -> None |
471 | 475 | These can change during reimport (e.g. false positive) while the in-memory instances |
472 | 476 | are stale. Per-finding refresh_from_db in close_old_findings was added in |
473 | 477 | https://github.com/DefectDojo/django-DefectDojo/pull/12291. A naive refresh per |
474 | | - finding issues one SELECT each; we batch one query for all primary keys and fall |
| 478 | + finding issues one SELECT each; we batch one query per chunk of primary keys and fall |
475 | 479 | back to refresh_from_db only when needed. |
476 | | - """ |
477 | | - ids = [f.pk for f in findings if f.pk is not None] |
478 | | - if not ids: |
479 | | - for finding in findings: |
480 | | - finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"]) |
481 | | - return |
482 | 480 |
|
483 | | - fresh_by_id = { |
484 | | - row["id"]: row |
485 | | - for row in Finding.objects.filter(pk__in=ids).values( |
486 | | - "id", |
487 | | - "false_p", |
488 | | - "risk_accepted", |
489 | | - "out_of_scope", |
490 | | - ) |
491 | | - } |
492 | | - for finding in findings: |
493 | | - row = fresh_by_id.get(finding.pk) |
494 | | - if row is not None: |
495 | | - finding.false_p = row["false_p"] |
496 | | - finding.risk_accepted = row["risk_accepted"] |
497 | | - finding.out_of_scope = row["out_of_scope"] |
498 | | - else: |
499 | | - finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"]) |
| 481 | + This really should be fixed differently, but for now we at least optimize it to be done in bulk. |
| 482 | + """ |
| 483 | + findings_without_pk = [f for f in findings if f.pk is None] |
| 484 | + findings_with_pk = [f for f in findings if f.pk is not None] |
| 485 | + |
| 486 | + for finding in findings_without_pk: |
| 487 | + finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"]) |
| 488 | + |
| 489 | + for chunk in batched(findings_with_pk, _CLOSE_OLD_FINDINGS_STATUS_FIELDS_CHUNK, strict=False): |
| 490 | + ids = [f.pk for f in chunk] |
| 491 | + fresh_by_id = { |
| 492 | + row["id"]: row |
| 493 | + for row in Finding.objects.filter(pk__in=ids).values( |
| 494 | + "id", |
| 495 | + "false_p", |
| 496 | + "risk_accepted", |
| 497 | + "out_of_scope", |
| 498 | + ) |
| 499 | + } |
| 500 | + for finding in chunk: |
| 501 | + row = fresh_by_id.get(finding.pk) |
| 502 | + if row is not None: |
| 503 | + finding.false_p = row["false_p"] |
| 504 | + finding.risk_accepted = row["risk_accepted"] |
| 505 | + finding.out_of_scope = row["out_of_scope"] |
| 506 | + else: |
| 507 | + finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"]) |
500 | 508 |
|
501 | 509 | def close_old_findings( |
502 | 510 | self, |
|
0 commit comments