Skip to content

Commit 0b5354e

Browse files
authored
Tag: Update allowed characters for a unified format (#12194)
* Implement migration * Remove lower case requirement * Create common validator * UI: Apply validator * API: Apply validator * Add release notes * Add unit tests * Fixing ruff * Fix some migration updates * Applying feedback * Silly copy/paste
1 parent d902fbb commit 0b5354e

7 files changed

Lines changed: 236 additions & 51 deletions

File tree

docs/content/en/open_source/upgrading/2.46.md

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,42 @@
22
title: 'Upgrading to DefectDojo Version 2.46.x'
33
toc_hide: true
44
weight: -20250407
5-
description: Import Payload Changes
5+
description: Tag Formatting Changes + Import Payload Changes
6+
---
7+
8+
### Tag Formatting Update
9+
10+
Tags can no longer contain the following characters:
11+
12+
- **Commas (,)**
13+
- **Quotations** (both single `'` and double `"`)
14+
- **Spaces**
15+
16+
#### Automatic Migration
17+
18+
To ensure a smooth transition, an automatic migration will be applied to existing tags as follows:
19+
20+
- **Commas** → Replaced with **hyphens (`-`)**
21+
- **Quotations** (single and double) → **Removed**
22+
- **Spaces** → Replaced with **underscores (`_`)**
23+
24+
#### Examples
25+
26+
- `example,tag``example-tag`
27+
- `'SingleQuoted'``SingleQuoted`
28+
- `"DoubleQuoted"``DoubleQuoted`
29+
- `space separated tag``space_separated_tag`
30+
31+
#### Why This Change?
32+
33+
This update improves consistency, enhances search capabilities, and aligns with best practices for tag formatting.
34+
35+
#### Recommended Actions
36+
37+
We recommend reviewing your current tags, specifically CI/CD processes before upgrading to ensure they align with the new format. Performing a database backup is also suggested as this migration cannot be reverted.
38+
39+
Following the deployment of these new behaviors, requests sent to the API or through the UI with any of the violations listed above will result in an error, with the details of the error raised in the response.
40+
641
---
742

843
### Asynchronous Import deprecation in 2.47.0

dojo/api_v2/serializers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@
118118
requires_tool_type,
119119
)
120120
from dojo.user.utils import get_configuration_permissions_codenames
121-
from dojo.utils import is_scan_file_too_large
121+
from dojo.utils import is_scan_file_too_large, tag_validator
122122

123123
logger = logging.getLogger(__name__)
124124
deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication")
@@ -225,6 +225,8 @@ def to_internal_value(self, data):
225225
self.fail("not_a_str")
226226
# Run the children validation
227227
self.child.run_validation(s)
228+
# Validate the tag to ensure it doesn't contain invalid characters
229+
tag_validator(s, exception_class=RestFrameworkValidationError)
228230
substrings = re.findall(r'(?:"[^"]*"|[^",]+)', s)
229231
data_safe.extend(substrings)
230232

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Generated by Django 5.0.8 on 2024-09-12 18:22
2+
3+
import logging
4+
from django.db import migrations
5+
from django.db.models import Q
6+
7+
logger = logging.getLogger(__name__)
8+
9+
# Only apply the process to models that _could_ have tags
10+
model_names = [
11+
"Product",
12+
"Endpoint",
13+
"Engagement",
14+
"Test",
15+
"Finding",
16+
"Finding_Template",
17+
"App_Analysis",
18+
"Objects_Product",
19+
]
20+
21+
22+
def clean_tag_value(tag: str) -> str:
23+
"""
24+
Clean each tag value by:
25+
- Converting all commas to hyphens
26+
- Converting all spaces to underscores
27+
- Removing all single/double quotes
28+
"""
29+
return tag.replace(",", "-").replace(" ", "_").replace('"', "").replace("'", "")
30+
31+
32+
def clean_all_tag_fields(apps, schema_editor):
33+
"""
34+
Cleans tag values for all models in the `model_names` list, removing unwanted characters.
35+
Updates both 'tags' and 'inherited_tags' fields where applicable.
36+
"""
37+
updated_count = {}
38+
for model_name in model_names:
39+
TaggedModel = apps.get_model("dojo", model_name)
40+
unique_tags_per_model = {}
41+
count_per_model = 0
42+
# Only fetch the objects with tags that contain a character in violation
43+
queryset = (
44+
TaggedModel.objects.filter(
45+
Q(**{"tags__name__icontains": ","})
46+
| Q(**{"tags__name__icontains": " "})
47+
| Q(**{"tags__name__icontains": '"'})
48+
| Q(**{"tags__name__icontains": "'"})
49+
)
50+
.distinct()
51+
.prefetch_related("tags")
52+
)
53+
# Iterate over each instance to clean the tags. The iterator is used here
54+
# to prevent loading the entire queryset into memory at once. Instead, we
55+
# will only process 500 objects at a time
56+
for instance in queryset.iterator(chunk_size=500):
57+
# Get the current list of tags to work with
58+
raw_tags = instance.tags.all()
59+
# Clean each tag here while preserving the original value
60+
cleaned_tags = {tag.name: clean_tag_value(tag.name) for tag in raw_tags}
61+
# Quick check to avoid writing things without impact
62+
if cleaned_tags:
63+
instance.tags.set(list(cleaned_tags.values()), clear=True)
64+
count_per_model += 1
65+
# Update the running list of cleaned tags with the changes on this model
66+
unique_tags_per_model.update(cleaned_tags)
67+
# Add a quick logging statement every 100 objects cleaned
68+
if count_per_model > 0 and count_per_model % 100 == 0:
69+
logger.info(
70+
f"{TaggedModel.__name__}.tags: cleaned {count_per_model} tags..."
71+
)
72+
# Update the final count of the tags cleaned for the given model
73+
if count_per_model:
74+
updated_count[f"{TaggedModel.__name__}"] = (
75+
count_per_model,
76+
unique_tags_per_model,
77+
)
78+
"""
79+
Write a helpful statement about what tags were changed for each model in the list.
80+
It looks something like this:
81+
82+
Product: 1 instances cleaned
83+
"quoted string with spaces" -> quoted_string_with_spaces
84+
"quoted with spaces, and also commas!" -> quoted_with_spaces-_and_also_commas!
85+
"quoted,comma,tag" -> quoted-comma-tag
86+
Engagement: 1 instances cleaned
87+
"quoted string with spaces" -> quoted_string_with_spaces
88+
"quoted with spaces, and also commas!" -> quoted_with_spaces-_and_also_commas!
89+
"quoted,comma,tag" -> quoted-comma-tag
90+
Test: 1 instances cleaned
91+
"quoted string with spaces" -> quoted_string_with_spaces
92+
"quoted with spaces, and also commas!" -> quoted_with_spaces-_and_also_commas!
93+
"quoted,comma,tag" -> quoted-comma-tag
94+
Finding: 1 instances cleaned
95+
"quoted string with spaces" -> quoted_string_with_spaces
96+
"quoted with spaces, and also commas!" -> quoted_with_spaces-_and_also_commas!
97+
"quoted,comma,tag" -> quoted-comma-tag
98+
"""
99+
for key, (count, tags) in updated_count.items():
100+
logger.info(f"{key}: {count} instances cleaned")
101+
for old, new in tags.items():
102+
if old != new:
103+
logger.info(f" {old} -> {new}")
104+
105+
106+
def cannot_turn_back_time(apps, schema_editor):
107+
"""
108+
We cannot possibly return to the original state without knowing
109+
the original value at the time the migration is revoked. Instead
110+
we will do nothing.
111+
"""
112+
pass
113+
114+
115+
class Migration(migrations.Migration):
116+
dependencies = [
117+
("dojo", "0226_import_history_left_untouched_rename"),
118+
]
119+
120+
operations = [
121+
migrations.RunPython(clean_all_tag_fields, cannot_turn_back_time),
122+
]

dojo/forms.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
get_system_setting,
111111
is_finding_groups_enabled,
112112
is_scan_file_too_large,
113+
tag_validator,
113114
)
114115
from dojo.widgets import TableCheckboxWidget
115116

@@ -338,6 +339,9 @@ class Meta:
338339
"business_criticality", "platform", "lifecycle", "origin", "user_records", "revenue", "external_audience", "enable_product_tag_inheritance",
339340
"internet_accessible", "enable_simple_risk_acceptance", "enable_full_risk_acceptance", "disable_sla_breach_notifications"]
340341

342+
def clean_tags(self):
343+
tag_validator(self.cleaned_data.get("tags"))
344+
341345

342346
class DeleteProductForm(forms.ModelForm):
343347
id = forms.IntegerField(required=True,
@@ -602,6 +606,9 @@ def clean(self):
602606

603607
return cleaned_data
604608

609+
def clean_tags(self):
610+
tag_validator(self.cleaned_data.get("tags"))
611+
605612
# date can only be today or in the past, not the future
606613
def clean_scan_date(self):
607614
date = self.cleaned_data.get("scan_date", None)
@@ -708,6 +715,9 @@ def clean(self):
708715

709716
return cleaned_data
710717

718+
def clean_tags(self):
719+
tag_validator(self.cleaned_data.get("tags"))
720+
711721
# date can only be today or in the past, not the future
712722
def clean_scan_date(self):
713723
date = self.cleaned_data.get("scan_date", None)
@@ -1021,6 +1031,9 @@ def is_valid(self):
10211031
return False
10221032
return True
10231033

1034+
def clean_tags(self):
1035+
tag_validator(self.cleaned_data.get("tags"))
1036+
10241037
class Meta:
10251038
model = Engagement
10261039
exclude = ("first_contacted", "real_start", "engagement_type", "inherited_tags",
@@ -1076,6 +1089,9 @@ class Meta:
10761089
"environment", "percent_complete", "tags", "lead", "version", "branch_tag", "build_id", "commit_hash",
10771090
"api_scan_configuration"]
10781091

1092+
def clean_tags(self):
1093+
tag_validator(self.cleaned_data.get("tags"))
1094+
10791095

10801096
class DeleteTestForm(forms.ModelForm):
10811097
id = forms.IntegerField(required=True,
@@ -1172,6 +1188,9 @@ def clean(self):
11721188

11731189
return cleaned_data
11741190

1191+
def clean_tags(self):
1192+
tag_validator(self.cleaned_data.get("tags"))
1193+
11751194
class Meta:
11761195
model = Finding
11771196
exclude = ("reporter", "url", "numerical_severity", "under_review", "reviewers", "cve", "inherited_tags",
@@ -1249,6 +1268,9 @@ def clean(self):
12491268

12501269
return cleaned_data
12511270

1271+
def clean_tags(self):
1272+
tag_validator(self.cleaned_data.get("tags"))
1273+
12521274
class Meta:
12531275
model = Finding
12541276
exclude = ("reporter", "url", "numerical_severity", "under_review", "reviewers", "cve", "inherited_tags",
@@ -1306,6 +1328,9 @@ def clean(self):
13061328

13071329
return cleaned_data
13081330

1331+
def clean_tags(self):
1332+
tag_validator(self.cleaned_data.get("tags"))
1333+
13091334
class Meta:
13101335
model = Finding
13111336
exclude = ("reporter", "url", "numerical_severity", "active", "false_p", "verified", "endpoint_status", "cve", "inherited_tags",
@@ -1428,6 +1453,9 @@ def clean(self):
14281453

14291454
return cleaned_data
14301455

1456+
def clean_tags(self):
1457+
tag_validator(self.cleaned_data.get("tags"))
1458+
14311459
def _post_clean(self):
14321460
super()._post_clean()
14331461

@@ -1504,6 +1532,9 @@ def clean(self):
15041532

15051533
return cleaned_data
15061534

1535+
def clean_tags(self):
1536+
tag_validator(self.cleaned_data.get("tags"))
1537+
15071538
class Meta:
15081539
fields = ["title", "cwe", "vulnerability_ids", "cvssv3", "severity", "description", "mitigation", "impact", "references", "tags"]
15091540
order = ("title", "cwe", "vulnerability_ids", "cvssv3", "severity", "description", "impact", "is_mitigated")
@@ -1534,6 +1565,9 @@ class Meta:
15341565
order = ("title", "cwe", "vulnerability_ids", "cvssv3", "severity", "description", "impact")
15351566
exclude = ("numerical_severity", "is_mitigated", "last_used", "endpoint_status", "cve")
15361567

1568+
def clean_tags(self):
1569+
tag_validator(self.cleaned_data.get("tags"))
1570+
15371571

15381572
class DeleteFindingTemplateForm(forms.ModelForm):
15391573
id = forms.IntegerField(required=True,
@@ -1587,6 +1621,9 @@ def clean(self):
15871621
raise forms.ValidationError(msg)
15881622
return cleaned_data
15891623

1624+
def clean_tags(self):
1625+
tag_validator(self.cleaned_data.get("tags"))
1626+
15901627
class Meta:
15911628
model = Finding
15921629
fields = ("severity", "date", "planned_remediation_date", "active", "verified", "false_p", "duplicate", "out_of_scope",
@@ -1637,6 +1674,9 @@ def clean(self):
16371674

16381675
return cleaned_data
16391676

1677+
def clean_tags(self):
1678+
tag_validator(self.cleaned_data.get("tags"))
1679+
16401680

16411681
class AddEndpointForm(forms.Form):
16421682
endpoint = forms.CharField(max_length=5000, required=True, label="Endpoint(s)",
@@ -1700,6 +1740,9 @@ def clean(self):
17001740

17011741
return cleaned_data
17021742

1743+
def clean_tags(self):
1744+
tag_validator(self.cleaned_data.get("tags"))
1745+
17031746

17041747
class DeleteEndpointForm(forms.ModelForm):
17051748
id = forms.IntegerField(required=True,

dojo/utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from django.conf import settings
2626
from django.contrib import messages
2727
from django.contrib.auth.signals import user_logged_in, user_logged_out, user_login_failed
28+
from django.core.exceptions import ValidationError
2829
from django.core.paginator import Paginator
2930
from django.db.models import Case, Count, IntegerField, Q, Sum, Value, When
3031
from django.db.models.query import QuerySet
@@ -2697,3 +2698,21 @@ def generate_file_response_from_file_path(
26972698
response["Content-Disposition"] = f'attachment; filename="{full_file_name}"'
26982699
response["Content-Length"] = file_size
26992700
return response
2701+
2702+
2703+
def tag_validator(value: str | list[str], exception_class: Callable = ValidationError) -> None:
2704+
TAG_PATTERN = re.compile(r'[ ,\'"]')
2705+
error_messages = []
2706+
2707+
if isinstance(value, list):
2708+
for tag in value:
2709+
if TAG_PATTERN.search(tag):
2710+
error_messages.append(f"Invalid tag: '{tag}'. Tags should not contain spaces, commas, or quotes.")
2711+
elif isinstance(value, str):
2712+
if TAG_PATTERN.search(value):
2713+
error_messages.append(f"Invalid tag: '{value}'. Tags should not contain spaces, commas, or quotes.")
2714+
else:
2715+
error_messages.append(f"Value must be a string or list of strings: {value} - {type(value)}.")
2716+
2717+
if error_messages:
2718+
raise exception_class(error_messages)

unittests/dojo_test_case.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,15 +632,15 @@ def get_finding_api(self, finding_id):
632632
self.assertEqual(200, response.status_code, response.content[:1000])
633633
return response.data
634634

635-
def post_new_finding_api(self, finding_details, push_to_jira=None):
635+
def post_new_finding_api(self, finding_details: dict, push_to_jira=None, expected_status_code: int = 201):
636636
payload = copy.deepcopy(finding_details)
637637
if push_to_jira is not None:
638638
payload["push_to_jira"] = push_to_jira
639639

640640
# logger.debug('posting new finding push_to_jira: %s', payload.get('push_to_jira', None))
641641

642642
response = self.client.post(reverse("finding-list"), payload, format="json")
643-
self.assertEqual(201, response.status_code, response.content[:1000])
643+
self.assertEqual(expected_status_code, response.status_code, response.content[:1000])
644644
return response.data
645645

646646
def put_finding_api(self, finding_id, finding_details, push_to_jira=None):

0 commit comments

Comments
 (0)