Skip to content

Commit ebe181e

Browse files
Reimport: Do not reactivate endpoint statuses with special statuses (#14402)
* fix(endpoint_manager): exclude certain endpoint statuses from existing findings * Remove misleading comment * test(import_reimport): skip reactivation for special endpoint statuses * fix(location_manager): exclude special endpoint statuses from reactivation logic fix(location): optimize get_or_create methods for LocationFindingReference and LocationProductReference test(import_reimport): enhance tests to skip reactivation for special statuses in endpoint and location contexts * fix(test_importers_performance): update expected query and async task counts for performance tests * perf(endpoint_manager): avoid double queryset evaluation in update_endpoint_status Evaluate existing_finding_endpoint_status_list once into a list with select_related("endpoint") before the two list comprehensions, preventing a duplicate DB hit and N+1 endpoint lookups. Update expected performance test counts to reflect the reduced query counts. * perf(endpoint_manager): use prefetched status_finding_non_special to avoid extra DB queries Add a named Prefetch to build_candidate_scope_queryset that fetches only non-special endpoint statuses (excluding false_positive, out_of_scope, risk_accepted) with their endpoint joined in via select_related. This replaces the two separate "status_finding" and "status_finding__endpoint" prefetches with a single query and avoids per-finding DB hits in update_endpoint_status and process_matched_special_status_finding. Update expected performance test counts to reflect the reduced query counts. --------- Co-authored-by: Valentijn Scholten <valentijnscholten@gmail.com>
1 parent 544cdc3 commit ebe181e

7 files changed

Lines changed: 125 additions & 33 deletions

File tree

dojo/finding/deduplication.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from django.db.models.query_utils import Q
99

1010
from dojo.celery import app
11-
from dojo.models import Finding, System_Settings
11+
from dojo.models import Endpoint_Status, Finding, System_Settings
1212

1313
logger = logging.getLogger(__name__)
1414
deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication")
@@ -295,12 +295,19 @@ def build_candidate_scope_queryset(test, mode="deduplication", service=None):
295295
# Base prefetches for both modes
296296
prefetch_list = ["endpoints", "vulnerability_id_set", "found_by"]
297297

298-
# Additional prefetches for reimport mode
298+
# Additional prefetches for reimport mode: fetch only non-special endpoint statuses with their
299+
# endpoint joined in, so endpoint_manager can read status_finding_non_special directly without
300+
# any extra DB queries
299301
if mode == "reimport":
300-
prefetch_list.extend([
301-
"status_finding",
302-
"status_finding__endpoint",
303-
])
302+
prefetch_list.append(
303+
Prefetch(
304+
"status_finding",
305+
queryset=Endpoint_Status.objects.exclude(
306+
Q(false_positive=True) | Q(out_of_scope=True) | Q(risk_accepted=True),
307+
).select_related("endpoint"),
308+
to_attr="status_finding_non_special",
309+
),
310+
)
304311

305312
return (
306313
queryset

dojo/importers/default_reimporter.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -766,12 +766,8 @@ def process_matched_mitigated_finding(
766766
else:
767767
# TODO: Delete this after the move to Locations
768768
# Reactivate mitigated endpoints that are not false positives, out of scope, or risk accepted
769-
endpoint_statuses = existing_finding.status_finding.exclude(
770-
Q(false_positive=True)
771-
| Q(out_of_scope=True)
772-
| Q(risk_accepted=True),
773-
)
774-
self.endpoint_manager.chunk_endpoints_and_reactivate(endpoint_statuses)
769+
# status_finding_non_special is prefetched by build_candidate_scope_queryset
770+
self.endpoint_manager.chunk_endpoints_and_reactivate(existing_finding.status_finding_non_special)
775771
existing_finding.notes.add(note)
776772
self.reactivated_items.append(existing_finding)
777773
# The new finding is active while the existing on is mitigated. The existing finding needs to

dojo/importers/endpoint_manager.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,9 @@ def update_endpoint_status(
157157
"""Update the list of endpoints from the new finding with the list that is in the old finding"""
158158
# New endpoints are already added in serializers.py / views.py (see comment "# for existing findings: make sure endpoints are present or created")
159159
# So we only need to mitigate endpoints that are no longer present
160-
# using `.all()` will mark as mitigated also `endpoint_status` with flags `false_positive`, `out_of_scope` and `risk_accepted`. This is a known issue. This is not a bug. This is a future.
161-
existing_finding_endpoint_status_list = existing_finding.status_finding.all()
160+
# status_finding_non_special is prefetched by build_candidate_scope_queryset with the
161+
# special-status exclusion and endpoint select_related already applied at the DB level
162+
existing_finding_endpoint_status_list = existing_finding.status_finding_non_special
162163
new_finding_endpoints_list = new_finding.unsaved_endpoints
163164
if new_finding.is_mitigated:
164165
# New finding is mitigated, so mitigate all old endpoints

dojo/importers/location_manager.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,17 +127,19 @@ def update_location_status(
127127
"""Update the list of locations from the new finding with the list that is in the old finding"""
128128
# New endpoints are already added in serializers.py / views.py (see comment "# for existing findings: make sure endpoints are present or created")
129129
# So we only need to mitigate endpoints that are no longer present
130-
# using `.all()` will mark as mitigated also `endpoint_status` with flags `false_positive`, `out_of_scope` and `risk_accepted`. This is a known issue. This is not a bug. This is a future.
131-
130+
existing_location_refs: QuerySet[LocationFindingReference] = existing_finding.locations.exclude(
131+
status__in=[
132+
FindingLocationStatus.FalsePositive,
133+
FindingLocationStatus.RiskAccepted,
134+
FindingLocationStatus.OutOfScope,
135+
],
136+
)
132137
if new_finding.is_mitigated:
133138
# New finding is mitigated, so mitigate all existing location refs
134-
self.chunk_locations_and_mitigate(existing_finding.locations.all(), user)
139+
self.chunk_locations_and_mitigate(existing_location_refs, user)
135140
else:
136141
# New finding not mitigated; so, reactivate all refs
137-
existing_location_refs: QuerySet[LocationFindingReference] = existing_finding.locations.all()
138-
139-
new_locations_values = [str(location) for location in new_finding.unsaved_locations]
140-
142+
new_locations_values = {str(location) for location in new_finding.unsaved_locations}
141143
# Reactivate endpoints in the old finding that are in the new finding
142144
location_refs_to_reactivate = existing_location_refs.filter(location__location_value__in=new_locations_values)
143145
# Mitigate endpoints in the existing finding not in the new finding

dojo/location/models.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,19 @@ def associate_with_finding(
111111
audit_time: datetime | None = None,
112112
) -> LocationFindingReference:
113113
"""
114-
Get or create a LocationFindingReference for this location and finding,
115-
updating the status each time. Also associates the related product.
114+
Get or create a LocationFindingReference for this location and finding.
115+
Also associates the related product.
116116
"""
117+
# Check if there is an existing reference for this finding and location
118+
# If this method is being used to set the status
119+
if LocationFindingReference.objects.filter(
120+
location=self,
121+
finding=finding,
122+
).exists():
123+
return LocationFindingReference.objects.get(
124+
location=self,
125+
finding=finding,
126+
)
117127
# Determine the status
118128
if status is None:
119129
status = self.status_from_finding(finding)
@@ -144,10 +154,17 @@ def associate_with_product(
144154
product: Product,
145155
status: ProductLocationStatus | None = None,
146156
) -> LocationProductReference:
147-
"""
148-
Get or create a LocationProductReference for this location and product,
149-
updating the status each time.
150-
"""
157+
"""Get or create a LocationProductReference for this location and product"""
158+
# Check if there is an existing reference for this finding and location
159+
# If this method is being used to set the status
160+
if LocationProductReference.objects.filter(
161+
location=self,
162+
product=product,
163+
).exists():
164+
return LocationProductReference.objects.get(
165+
location=self,
166+
product=product,
167+
)
151168
if status is None:
152169
status = self.status_from_product(product)
153170
# Use a transaction for safety in concurrent scenarios

unittests/test_import_reimport.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from django.test.client import Client
1212
from django.urls import reverse
1313
from django.utils import timezone
14+
from parameterized import parameterized
1415

1516
from dojo.models import Engagement, Finding, Product, Product_Type, Test, Test_Type, User
1617

@@ -2003,6 +2004,74 @@ def test_import_nuclei_emptyc(self):
20032004
test_id2 = reimport0["test"]
20042005
self.assertEqual(test_id, test_id2)
20052006

2007+
@parameterized.expand(
2008+
[
2009+
("Test False Positive Status (Endpoint Status)", {"false_positive": True}, "status_finding"),
2010+
("Test Out of Scope Status (Endpoint Status)", {"out_of_scope": True}, "status_finding"),
2011+
("Test Risk Accepted Status (Endpoint Status)", {"risk_accepted": True}, "status_finding"),
2012+
("Test False Positive Status (Locations)", {"status": "FalsePositive"}, "locations"),
2013+
("Test Out of Scope Status (Locations)", {"status": "OutOfScope"}, "locations"),
2014+
("Test Risk Accepted Status (Locations)", {"status": "RiskAccepted"}, "locations"),
2015+
],
2016+
)
2017+
def test_import_reimport_endpoint_where_eps_reactivation_skips_special_status(self, label: str, special_status_fields: dict, m2m_key: str):
2018+
"""
2019+
When Findings are set to False Positive, Out of Scope, or Risk Accepted, they are not reactivated
2020+
because these statuses are often set by humans. The same needs to apply for the Endpoint Status as
2021+
they are an extension of the finding being partially mitigated.
2022+
"""
2023+
if settings.V3_FEATURE_LOCATIONS:
2024+
# TODO: Delete this after the move to Locations
2025+
if m2m_key == "status_finding":
2026+
# This test will fail for endpoint statuses with locations enabled
2027+
# return early here
2028+
return
2029+
context = {
2030+
"auditor": User.objects.get(username="admin"),
2031+
"audit_time": timezone.now(),
2032+
}
2033+
# TODO: Delete this after the move to Locations
2034+
else:
2035+
if m2m_key == "locations":
2036+
# This test will fail for locations with locations disabled
2037+
# return early here
2038+
return
2039+
context = {
2040+
"mitigated": True,
2041+
"mitigated_by": User.objects.get(username="admin"),
2042+
"mitigated_time": timezone.now(),
2043+
}
2044+
# Now start the test
2045+
with assertTestImportModelsCreated(self, imports=1, affected_findings=1, created=1):
2046+
import0 = self.import_scan_with_params(
2047+
self.gitlab_dast_file_name, self.scan_type_gitlab_dast, active=True, verified=True,
2048+
)
2049+
test_id = import0["test"]
2050+
findings = self.get_test_findings_api(test_id)
2051+
self.assert_finding_count_json(1, findings)
2052+
finding = Finding.objects.get(id=findings["results"][0]["id"])
2053+
# Get the related objects on the finding
2054+
related_obects = getattr(finding, m2m_key).all()
2055+
self.assertEqual(len(related_obects), 1)
2056+
# Update the related objects with the special status fields
2057+
related_objects_context = {**context, **special_status_fields}
2058+
related_obects.update(**related_objects_context)
2059+
# Reimport the same file
2060+
reimport0 = self.reimport_scan_with_params(
2061+
test_id, self.gitlab_dast_file_name, scan_type=self.scan_type_gitlab_dast,
2062+
)
2063+
test_id = reimport0["test"]
2064+
findings = self.get_test_findings_api(test_id)
2065+
self.assert_finding_count_json(1, findings)
2066+
finding = Finding.objects.get(id=findings["results"][0]["id"])
2067+
# Get the related objects on the finding
2068+
related_obects = getattr(finding, m2m_key).all()
2069+
self.assertEqual(len(related_obects), 1)
2070+
related_object = related_obects.first()
2071+
# Ensure the status is the same as the baseline
2072+
for key, value in related_objects_context.items():
2073+
self.assertEqual(getattr(related_object, key), value)
2074+
20062075
def test_import_reimport_endpoint_where_eps_date_is_different(self):
20072076
endpoint_count_before = self.db_endpoint_count()
20082077
endpoint_status_count_before_active = self.db_endpoint_status_count(mitigated=False)

unittests/test_importers_performance.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,9 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
270270
self._import_reimport_performance(
271271
expected_num_queries1=295,
272272
expected_num_async_tasks1=6,
273-
expected_num_queries2=227,
273+
expected_num_queries2=226,
274274
expected_num_async_tasks2=17,
275-
expected_num_queries3=109,
275+
expected_num_queries3=108,
276276
expected_num_async_tasks3=16,
277277
)
278278

@@ -292,9 +292,9 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
292292
self._import_reimport_performance(
293293
expected_num_queries1=302,
294294
expected_num_async_tasks1=6,
295-
expected_num_queries2=234,
295+
expected_num_queries2=233,
296296
expected_num_async_tasks2=17,
297-
expected_num_queries3=116,
297+
expected_num_queries3=115,
298298
expected_num_async_tasks3=16,
299299
)
300300

@@ -315,9 +315,9 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
315315
self._import_reimport_performance(
316316
expected_num_queries1=309,
317317
expected_num_async_tasks1=8,
318-
expected_num_queries2=241,
318+
expected_num_queries2=240,
319319
expected_num_async_tasks2=19,
320-
expected_num_queries3=120,
320+
expected_num_queries3=119,
321321
expected_num_async_tasks3=18,
322322
)
323323

0 commit comments

Comments
 (0)