Skip to content

Commit f56f5bd

Browse files
committed
urls as unique
1 parent 9f364c8 commit f56f5bd

11 files changed

Lines changed: 349 additions & 288 deletions

File tree

dojo/db_migrations/0259_locations.py

Lines changed: 244 additions & 238 deletions
Large diffs are not rendered by default.

dojo/endpoint/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def endpoint_meta_import(file, product, create_endpoints, create_tags, create_me
334334
elif object_class == Location:
335335
endpoints = Location.objects.filter(url__host=host, products__product=product)
336336
if not endpoints.exists() and create_endpoints:
337-
url = URL.objects.create(host=host)
337+
url = URL.get_or_create_from_values(host=host)
338338
url.location.associate_with_product(product)
339339
endpoints = [url.location]
340340
meta = [(key, row.get(key)) for key in keys]

dojo/finding/helper.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -941,17 +941,7 @@ def get_value(field_name, default=None):
941941
for endpoint_url in endpoint_urls:
942942
try:
943943
if settings.V3_FEATURE_LOCATIONS:
944-
unsaved_url = URL.from_value(endpoint_url)
945-
saved_url, _ = URL.objects.get_or_create(
946-
protocol=unsaved_url.protocol,
947-
user_info=unsaved_url.user_info,
948-
host=unsaved_url.host,
949-
port=unsaved_url.port,
950-
path=unsaved_url.path,
951-
query=unsaved_url.query,
952-
fragment=unsaved_url.fragment,
953-
host_validation_failure=unsaved_url.host_validation_failure,
954-
)
944+
saved_url = URL.create_location_from_value(endpoint_url)
955945
saved_url.location.associate_with_finding(finding)
956946
else:
957947
# TODO: Delete this after the move to Locations

dojo/importers/location_manager.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,9 @@
2424

2525
# test_notifications.py: Implement Locations
2626
class LocationManager:
27-
28-
def get_or_create_url(self, unsaved_url: URL) -> URL:
29-
saved_url, _ = URL.objects.get_or_create(
30-
protocol=unsaved_url.protocol,
31-
user_info=unsaved_url.user_info,
32-
host=unsaved_url.host,
33-
port=unsaved_url.port,
34-
path=unsaved_url.path,
35-
query=unsaved_url.query,
36-
fragment=unsaved_url.fragment,
37-
host_validation_failure=unsaved_url.host_validation_failure,
38-
)
39-
return saved_url
40-
4127
def get_or_create_location(self, unsaved_location: AbstractLocation) -> AbstractLocation | None:
4228
if isinstance(unsaved_location, URL):
43-
return self.get_or_create_url(unsaved_location)
29+
return URL.get_or_create_from_object(unsaved_location)
4430
logger.debug(f"IMPORT_SCAN: Unsupported location type: {type(unsaved_location)}")
4531
return None
4632

dojo/location/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def url_get_or_create(**kwargs):
8585
matches = list(qs.order_by("id")[:2])
8686
if not matches:
8787
# Most common case: nothing exists yet
88-
return URL.objects.create(**kwargs), True
88+
return URL.get_or_create_from_values(**kwargs), True
8989
if len(matches) == 1:
9090
# Common case: exactly one existing URL
9191
return matches[0], False

dojo/management/commands/migrate_endpoints_to_locations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ def _endpoint_to_url(self, endpoint: Endpoint) -> Location:
3636
# Create the raw URL object first
3737
# This should create the location object as well
3838
url = URL.objects.get_or_create(
39-
protocol=endpoint.protocol or "",
39+
protocol=(endpoint.protocol or "").lower(),
4040
user_info=endpoint.userinfo or "",
41-
host=endpoint.host,
41+
host=(endpoint.host or "").lower(),
4242
port=endpoint.port,
4343
path=endpoint.path or "",
4444
query=endpoint.query or "",

dojo/url/models.py

Lines changed: 92 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
from django.core.exceptions import ValidationError
88
from django.core.validators import MaxValueValidator, MinValueValidator
9-
from django.db.models import BooleanField, CharField, Index, PositiveIntegerField
9+
from django.db.models import BooleanField, CharField, Index, PositiveIntegerField, UniqueConstraint
10+
from django.db.models.functions import Lower
1011

1112
# Ignoring the N811 error as this is an external library and we cannot change its name
1213
# We are already using "URL" in our own code so we need to alias this import
@@ -60,6 +61,10 @@ def parse(self, value: str) -> ParsedUrl:
6061
error_message = f"No host provided in URL: {parsed_url}"
6162
raise ValidationError(error_message)
6263

64+
if parsed_url.port is not None and (parsed_url.port < 1 or parsed_url.port > 65535):
65+
error_message = f"Invalid port: {parsed_url.port}"
66+
raise ValidationError(error_message)
67+
6368
return ParsedUrl(
6469
raw=value,
6570
protocol=parsed_url.scheme,
@@ -162,6 +167,19 @@ class Meta:
162167
verbose_name = "Locations - URL"
163168
verbose_name_plural = "Locations - URLs"
164169
indexes = (Index(fields=["host"]),)
170+
constraints = [
171+
UniqueConstraint(
172+
Lower("protocol"),
173+
"user_info",
174+
Lower("host"),
175+
"port",
176+
"path",
177+
"query",
178+
"fragment",
179+
"host_validation_failure",
180+
name="url_unique",
181+
),
182+
]
165183

166184
def manual_str(self):
167185
value = ""
@@ -206,10 +224,19 @@ def get_location_type(cls) -> str:
206224
def get_location_value(self) -> str:
207225
return str(self)
208226

227+
def normalize_url_parts(self):
228+
self.clean_protocol()
229+
self.clean_user_info()
230+
self.clean_host()
231+
self.clean_port()
232+
self.clean_path()
233+
self.clean_query()
234+
self.clean_fragment()
235+
self.clean_host_validation_failure()
236+
209237
def pre_save_logic(self) -> None:
210238
"""Allow for some pre save operations by other classes."""
211-
# Set default port based on protocol if not provided
212-
self.clean_port()
239+
self.normalize_url_parts()
213240
super().pre_save_logic()
214241

215242
@staticmethod
@@ -220,11 +247,25 @@ def _parse_string_value(value: str) -> ParsedUrl:
220247
def clean(self, *args: list, **kwargs: dict) -> None:
221248
"""Validate the input supplied."""
222249
super().clean(*args, **kwargs)
223-
# Ensure the full value is correctly parsable. If not, an exception will be raised
224-
self.clean_port()
225-
self.clean_path()
226-
self.clean_query()
227-
self.clean_fragment()
250+
self.normalize_url_parts()
251+
252+
def clean_protocol(self) -> None:
253+
if not self.protocol:
254+
self.protocol = ""
255+
else:
256+
self.protocol = self.protocol.lower()
257+
258+
def clean_user_info(self):
259+
if not self.user_info:
260+
self.user_info = ""
261+
else:
262+
self.user_info = self.remove_null_bytes(self.user_info.strip())
263+
264+
def clean_host(self) -> None:
265+
if not self.host:
266+
self.host = ""
267+
else:
268+
self.host = self.host.lower()
228269

229270
def clean_port(self) -> None:
230271
if self.port is None:
@@ -249,15 +290,54 @@ def clean_query(self) -> None:
249290
else:
250291
self.query = self.remove_null_bytes(self.query.strip().removeprefix("?"))
251292

293+
def clean_host_validation_failure(self):
294+
self.host_validation_failure = bool(self.host_validation_failure)
295+
252296
def remove_null_bytes(self, value: str) -> str:
253297
return value.replace("\x00", "%00")
254298

299+
@staticmethod
300+
def get_or_create_from_object(url: URL) -> URL:
301+
url.normalize_url_parts()
302+
url, _ = URL.objects.get_or_create(
303+
protocol=url.protocol,
304+
user_info=url.user_info,
305+
host=url.host,
306+
port=url.port,
307+
path=url.path,
308+
query=url.query,
309+
fragment=url.fragment,
310+
host_validation_failure=url.host_validation_failure,
311+
)
312+
return url
313+
314+
@staticmethod
315+
def get_or_create_from_values(
316+
protocol=None,
317+
user_info=None,
318+
host=None,
319+
port=None,
320+
path=None,
321+
query=None,
322+
fragment=None,
323+
host_validation_failure=None,
324+
) -> URL:
325+
return URL.get_or_create_from_object(URL(
326+
protocol=protocol,
327+
user_info=user_info,
328+
host=host,
329+
port=port,
330+
path=path,
331+
query=query,
332+
fragment=fragment,
333+
host_validation_failure=host_validation_failure,
334+
))
335+
255336
@staticmethod
256337
def create_location_from_value(value: str) -> URL:
257338
"""Parse a string URL and return the resulting *persisted* URL Model."""
258-
url = URL.from_value(value)
259-
url.save()
260-
return url
339+
unsaved_url = URL.from_value(value)
340+
return URL.get_or_create_from_object(unsaved_url)
261341

262342
@staticmethod
263343
def from_value(value: str) -> URL:
@@ -280,5 +360,5 @@ def from_value(value: str) -> URL:
280360
query=query,
281361
fragment=fragment,
282362
)
283-
url.full_clean()
363+
url.normalize_url_parts()
284364
return url

unittests/test_deduplication_logic.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,7 @@ def create_and_associate_endpoints(self, finding, *endpoint_defs: dict):
305305
for e_def in endpoint_defs:
306306
e_def.pop("product", None)
307307
e_def.pop("finding", None)
308-
url = URL(**e_def)
309-
url.save()
308+
url = URL.get_or_create_from_values(**e_def)
310309
url.location.associate_with_finding(finding)
311310
else:
312311
# TODO: Delete this after the move to Locations

unittests/tools/test_awssecurityhub_parser.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def test_inspector_ec2(self):
7373
self.assertEqual("CVE-2022-3643", finding.unsaved_vulnerability_ids[0])
7474
self.assertEqual("- Update kernel-4.14.301\n\t- yum update kernel\n", finding.mitigation)
7575
location = self.get_unsaved_locations(finding)[0]
76-
self.assertEqual("AwsEc2Instance_arn_aws_ec2_us-east-1_XXXXXXXXXXXX_i-11111111111111111", location.host)
76+
self.assertEqual("AwsEc2Instance_arn_aws_ec2_us-east-1_XXXXXXXXXXXX_i-11111111111111111".lower(), location.host.lower())
7777

7878
def test_inspector_ec2_with_no_vulnerabilities(self):
7979
with sample_path("inspector_ec2_cve_no_vulnerabilities.json").open(encoding="utf-8") as test_file:
@@ -98,7 +98,7 @@ def test_inspector_ec2_ghsa(self):
9898
self.assertSetEqual({"CVE-2023-34256", "GHSA-p98r-538v-jgw5"}, set(finding.unsaved_vulnerability_ids))
9999
self.assertEqual("https://github.com/bottlerocket-os/bottlerocket/security/advisories/GHSA-p98r-538v-jgw5", finding.references)
100100
location = self.get_unsaved_locations(finding)[0]
101-
self.assertEqual("AwsEc2Instance_arn_aws_ec2_eu-central-1_012345678912_instance_i-07c11cc535d830123", location.host)
101+
self.assertEqual("AwsEc2Instance_arn_aws_ec2_eu-central-1_012345678912_instance_i-07c11cc535d830123".lower(), location.host.lower())
102102

103103
def test_inspector_ecr(self):
104104
with sample_path("inspector_ecr.json").open(encoding="utf-8") as test_file:
@@ -116,7 +116,7 @@ def test_inspector_ecr(self):
116116
self.assertIn("Repository: repo-os", finding.impact)
117117
self.assertEqual(0.0014, finding.epss_score)
118118
location = self.get_unsaved_locations(finding)[0]
119-
self.assertEqual("AwsEcrContainerImage_arn_aws_ecr_eu-central-1_123456789012_repository_repo-os_sha256_af965ef68c78374a5f987fce98c0ddfa45801df2395bf012c50b863e65978d74", location.host)
119+
self.assertEqual("AwsEcrContainerImage_arn_aws_ecr_eu-central-1_123456789012_repository_repo-os_sha256_af965ef68c78374a5f987fce98c0ddfa45801df2395bf012c50b863e65978d74".lower(), location.host.lower())
120120

121121
def test_guardduty(self):
122122
with sample_path("guardduty.json").open(encoding="utf-8") as test_file:
@@ -133,7 +133,7 @@ def test_guardduty(self):
133133
self.assertEqual("User AssumedRole : 123123123 is anomalously invoking APIs commonly used in Discovery tactics. - Resource: 123123123", finding.title)
134134
self.assertEqual("TTPs/Discovery/IAMUser-AnomalousBehavior\n[https://docs.aws.amazon.com/guardduty/latest/ug/guardduty_finding-types-active.html](https://docs.aws.amazon.com/guardduty/latest/ug/guardduty_finding-types-active.html)", finding.mitigation)
135135
location = self.get_unsaved_locations(findings[0])[0]
136-
self.assertEqual("AwsEc2Instance_arn_aws_ec2_us-east-1_123456789012_instance_i-1234567890", location.host)
136+
self.assertEqual("AwsEc2Instance_arn_aws_ec2_us-east-1_123456789012_instance_i-1234567890".lower(), location.host.lower())
137137
self.assertEqual("This is a GuardDuty Finding\nAPIs commonly used in Discovery tactics were invoked by user AssumedRole : 123123123, under anomalous circumstances. Such activity is not typically seen from this user.\n**AWS Finding ARN:** arn:aws:guardduty:us-east-1:123456789012:detector/123456789/finding/2123123123123\n**SourceURL:** [https://us-east-1.console.aws.amazon.com/guardduty/home?region=us-east-1#/findings?macros=current&fId=2123123123123](https://us-east-1.console.aws.amazon.com/guardduty/home?region=us-east-1#/findings?macros=current&fId=2123123123123)\n**AwsAccountId:** 123456789012\n**Region:** us-east-1\n**Generator ID:** arn:aws:guardduty:us-east-1:123456789012:detector/123456789\n", finding.description)
138138

139139
def test_issue_10956(self):

unittests/tools/test_ms_defender_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_parser_defender_issue_11217(self):
9898
self.assertEqual(1, len(findings))
9999
finding = findings[0]
100100
self.assertEqual("Medium", finding.severity)
101-
self.assertEqual("Max_Mustermann_iPadAir_17zoll__2ndgeneration_", self.get_unsaved_locations(finding)[0].host)
101+
self.assertEqual("Max_Mustermann_iPadAir_17zoll__2ndgeneration_".lower(), self.get_unsaved_locations(finding)[0].host.lower())
102102

103103
def test_parser_defender_error_handling(self):
104104
"""https://github.com/DefectDojo/django-DefectDojo/issues/11896 handle missing values properly, i.e. defenderAvStatus"""

0 commit comments

Comments
 (0)