DefectDojo · valentijnscholten · Oct 24, 2025 · Oct 23, 2025 · Oct 23, 2025
@@ -5,6 +5,10 @@ weight: -20251006
 description: MobSF parsers & Helm chart changes.
 ---
 
+## Deduplication fix of `UNIQUE_ID_OR_HASH_CODE`
+A bug was fixed in the `UNIQUE_ID_OR_HASH_CODE` algorithm where it stopped processing candidate findings with equal `unique_id_from_tool` or `hash_code` value.
+Strictly speaking this is not a breaking change, but we wanted to make you aware that you can see more (better) more deduplicatation for parsers using this algorithm.
+
 ## Merge of MobSF parsers
 
 Mobsfscan Scan" has been merged into the "MobSF Scan" parser. The "Mobsfscan Scan" scan_type has been retained to keep deduplication working for existing Tests, but users are encouraged to move to the "MobSF Scan" scan_type.
@@ -17,16 +21,16 @@ This release introduces more important changes to the Helm chart configuration:
 
 #### Tags
 
-`tag` and `repositoryPrefix` fields have been deprecated. Currently, image tags used in containers are derived by default from the `appVersion` defined in the Chart.  
-This behavior can be overridden by setting the `tag` value in `images.django` and `images.nginx`.  
-If fine-tuning is necessary, each container’s image value can also be customized individually (`celery.beat.image`, `celery.worker.image`, `django.nginx.image`, `django.uwsgi.image`, `initializer.image`, and `dbMigrationChecker.image`).  
+`tag` and `repositoryPrefix` fields have been deprecated. Currently, image tags used in containers are derived by default from the `appVersion` defined in the Chart.
+This behavior can be overridden by setting the `tag` value in `images.django` and `images.nginx`.
+If fine-tuning is necessary, each container’s image value can also be customized individually (`celery.beat.image`, `celery.worker.image`, `django.nginx.image`, `django.uwsgi.image`, `initializer.image`, and `dbMigrationChecker.image`).
 Digest pinning is now supported as well.
 
 #### Security context
 
 This Helm chart extends security context capabilities to all deployed pods and containers.
 You can define a default pod and container security context globally using `securityContext.podSecurityContext` and `securityContext.containerSecurityContext` keys.
-Additionally, each deployment can specify its own pod and container security contexts, which will override or merge with the global ones. 
+Additionally, each deployment can specify its own pod and container security contexts, which will override or merge with the global ones.
 
 #### Fine-grained resources
 

@@ -508,7 +508,7 @@ def deduplicate_uid_or_hash_code(new_finding):
                 id=new_finding.id).exclude(
                         duplicate=True).order_by("id")
     deduplicationLogger.debug("Found "
-        + str(len(existing_findings)) + " findings with either the same unique_id_from_tool or hash_code")
+        + str(len(existing_findings)) + " findings with either the same unique_id_from_tool or hash_code: " + str([find.id for find in existing_findings]))
     for find in existing_findings:
         if is_deduplication_on_engagement_mismatch(new_finding, find):
             deduplicationLogger.debug(
@@ -517,10 +517,10 @@ def deduplicate_uid_or_hash_code(new_finding):
         try:
             if are_endpoints_duplicates(new_finding, find):
                 set_duplicate(new_finding, find)
+                break
         except Exception as e:
             deduplicationLogger.debug(str(e))
             continue
-        break
 
 
 def set_duplicate(new_finding, existing_finding):

@@ -1181,55 +1181,136 @@ def test_dedupe_same_id_different_test_type_unique_id_or_hash_code(self):
         # expect not duplicate as the mathcing finding is from another test_type, hash_code is also different
         self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
 
-    def test_identical_different_endpoints_unique_id_or_hash_code(self):
+    def test_identical_different_endpoints_unique_id_or_hash_code_dynamic(self):
         # create identical copy, so unique id is the same
-        finding_new, finding_224 = self.copy_and_reset_finding(find_id=224)
+        finding_new1, finding_224 = self.copy_and_reset_finding(find_id=224)
 
-        finding_new.save(dedupe_option=False)
-        ep1 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost.com", protocol="https")
+        finding_new1.save(dedupe_option=False)
+        ep1 = Endpoint(product=finding_new1.test.engagement.product, finding=finding_new1, host="myhost.com", protocol="https")
         ep1.save()
-        finding_new.endpoints.add(ep1)
-        finding_new.save()
+        finding_new1.endpoints.add(ep1)
+        finding_new1.save()
 
         if settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []:
             # expect duplicate, as endpoints shouldn't affect dedupe and hash_code due to unique_id
-            self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
+            self.assert_finding(finding_new1, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
         else:
-            self.assert_finding(finding_new, not_pk=224, duplicate=False, duplicate_finding_id=None, hash_code=finding_224.hash_code)
+            # endpoints don't match with 224, so not a duplicate
+            self.assert_finding(finding_new1, not_pk=224, duplicate=False, duplicate_finding_id=None, hash_code=finding_224.hash_code)
+
+        # remove the finding to prevent it from being duplicated by the next finding we create
+        finding_new1.delete()
 
         # same scenario, now with different uid. and different endpoints, but hash will be different due the endpoints because we set dynamic_finding to True
-        finding_new, finding_224 = self.copy_and_reset_finding(find_id=224)
+        finding_new2, finding_224 = self.copy_and_reset_finding(find_id=224)
 
-        finding_new.save(dedupe_option=False)
-        ep1 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost.com", protocol="https")
+        finding_new2.save(dedupe_option=False)
+        ep1 = Endpoint(product=finding_new1.test.engagement.product, finding=finding_new2, host="myhost.com", protocol="https")
         ep1.save()
-        finding_new.endpoints.add(ep1)
-        finding_new.unique_id_from_tool = 1
-        finding_new.dynamic_finding = True
-        finding_new.save()
+        finding_new2.endpoints.add(ep1)
+        finding_new2.unique_id_from_tool = 1
+        finding_new2.dynamic_finding = True
+        finding_new2.save()
 
         if settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []:
             # different uid. and different endpoints, but endpoints not used for hash anymore -> duplicate
-            self.assert_finding(finding_new, not_pk=224, duplicate=True, hash_code=finding_224.hash_code)
+            self.assert_finding(finding_new2, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
+        else:
+            # endpoints do not match with 224
+            self.assert_finding(finding_new1, not_pk=224, duplicate=False, duplicate_finding_id=None, hash_code=finding_224.hash_code)
+
+    def test_identical_different_endpoints_unique_id_or_hash_code_static(self):
+        # create identical copy, so unique id is the same
+        finding_new1, finding_224 = self.copy_and_reset_finding(find_id=224)
+
+        finding_new1.save(dedupe_option=False)
+        ep1 = Endpoint(product=finding_new1.test.engagement.product, finding=finding_new1, host="myhost.com", protocol="https")
+        ep1.save()
+        finding_new1.endpoints.add(ep1)
+        finding_new1.save()
+
+        if settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []:
+            # expect duplicate, as endpoints shouldn't affect dedupe and hash_code due to unique_id
+            self.assert_finding(finding_new1, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
         else:
-            self.assert_finding(finding_new, not_pk=224, duplicate=False, hash_code=finding_224.hash_code)
+            # endpoints don't match with 224, so not a duplicate
+            self.assert_finding(finding_new1, not_pk=224, duplicate=False, duplicate_finding_id=None, hash_code=finding_224.hash_code)
+
+        # remove the finding to prevent it from being duplicated by the next finding we create
+        finding_new1.delete()
 
         # same scenario, now with different uid. and different endpoints
-        finding_new, finding_224 = self.copy_and_reset_finding(find_id=224)
+        finding_new3, finding_224 = self.copy_and_reset_finding(find_id=224)
 
-        finding_new.save(dedupe_option=False)
-        ep1 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost.com", protocol="https")
+        finding_new3.save(dedupe_option=False)
+        ep1 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost.com", protocol="https")
         ep1.save()
-        finding_new.endpoints.add(ep1)
-        finding_new.unique_id_from_tool = 1
-        finding_new.dynamic_finding = False
-        finding_new.save()
+        finding_new3.endpoints.add(ep1)
+        finding_new3.unique_id_from_tool = 1
+        finding_new3.dynamic_finding = False
+        finding_new3.save()
+
+        if settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []:
+            # different uid. and different endpoints, dynamic_finding is set to False hash_code still not affected by endpoints
+            self.assert_finding(finding_new3, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
+        else:
+            # endpoints do not match with 224
+            self.assert_finding(finding_new1, not_pk=224, duplicate=False, duplicate_finding_id=None, hash_code=finding_224.hash_code)
+
+    def test_identical_different_endpoints_unique_id_or_hash_code_multiple(self):
+        # create identical copy, so unique id is the same
+        finding_new1, finding_224 = self.copy_and_reset_finding(find_id=224)
+
+        finding_new1.save(dedupe_option=False)
+        ep1 = Endpoint(product=finding_new1.test.engagement.product, finding=finding_new1, host="myhost.com", protocol="https")
+        ep1.save()
+        finding_new1.endpoints.add(ep1)
+        finding_new1.save()
+
+        if settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []:
+            # expect duplicate, as endpoints shouldn't affect dedupe and hash_code due to unique_id
+            self.assert_finding(finding_new1, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
+        else:
+            # endpoints don't match with 224, so not a duplicate
+            self.assert_finding(finding_new1, not_pk=224, duplicate=False, duplicate_finding_id=None, hash_code=finding_224.hash_code)
+
+        # same scenario, now with different uid. and different endpoints, but hash will be different due the endpoints because we set dynamic_finding to True
+        finding_new2, finding_224 = self.copy_and_reset_finding(find_id=224)
+
+        finding_new2.save(dedupe_option=False)
+        ep1 = Endpoint(product=finding_new1.test.engagement.product, finding=finding_new2, host="myhost.com", protocol="https")
+        ep1.save()
+        finding_new2.endpoints.add(ep1)
+        finding_new2.unique_id_from_tool = 1
+        finding_new2.dynamic_finding = True
+        finding_new2.save()
+
+        if settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []:
+            # different uid. and different endpoints, but endpoints not used for hash anymore -> duplicate
+            self.assert_finding(finding_new2, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
+        else:
+            # endpoints do not match with 224, but they do match with the finding just created. this proves that the dedupe algo considers more than only the first
+            # candidate https://github.com/DefectDojo/django-DefectDojo/issues/13497
+            self.assert_finding(finding_new2, not_pk=224, duplicate=True, duplicate_finding_id=finding_new1.pk, hash_code=finding_224.hash_code)
+
+        # same scenario, now with different uid. and different endpoints
+        finding_new3, finding_224 = self.copy_and_reset_finding(find_id=224)
+
+        finding_new3.save(dedupe_option=False)
+        ep1 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost.com", protocol="https")
+        ep1.save()
+        finding_new3.endpoints.add(ep1)
+        finding_new3.unique_id_from_tool = 1
+        finding_new3.dynamic_finding = False
+        finding_new3.save()
 
         if settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []:
             # different uid. and different endpoints, dynamic_finding is set to False hash_code still not affected by endpoints
-            self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
+            self.assert_finding(finding_new3, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
         else:
-            self.assert_finding(finding_new, not_pk=224, duplicate=False, duplicate_finding_id=None, hash_code=finding_224.hash_code)
+            # endpoints do not match with 224, but they do match with the finding just created. this proves that the dedupe algo considers more than only the first
+            # candidate https://github.com/DefectDojo/django-DefectDojo/issues/13497
+            self.assert_finding(finding_new3, not_pk=224, duplicate=True, duplicate_finding_id=finding_new1.pk, hash_code=finding_224.hash_code)
 
     # # some extra tests