@@ -36,14 +36,15 @@ def setUp(self):
3636 testuser .is_superuser = True
3737 testuser .is_staff = True
3838 testuser .save ()
39- UserContactInfo .objects .create (user = testuser , block_execution = False )
39+ UserContactInfo .objects .create (user = testuser , block_execution = True )
4040
4141 # Authenticate API client as admin for import endpoints
4242 self .login_as_admin ()
4343
4444 self .system_settings (enable_webhooks_notifications = False )
4545 self .system_settings (enable_product_grade = False )
4646 self .system_settings (enable_github = False )
47+ self .system_settings (enable_deduplication = True )
4748
4849 # Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation
4950 # As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count.
@@ -52,20 +53,321 @@ def setUp(self):
5253 for model in [Development_Environment , Dojo_User , Endpoint , Endpoint_Status , Engagement , Finding , Product , Product_Type , User , Test ]:
5354 ContentType .objects .get_for_model (model )
5455
55- def test_one_import_no_duplicate_findings (self ):
56+ # Internal helper methods for reusable test logic
57+ def _test_single_import_no_duplicates (self , filename , scan_type , scanner_name , expected_duplicates = 0 ):
58+ """Internal method to test single import with expected duplicates"""
59+ self .login_as_admin ()
60+
5661 response_json = self .import_scan_with_params (
57- STACK_HAWK_FILENAME ,
58- scan_type = STACK_HAWK_SCAN_TYPE ,
62+ filename ,
63+ scan_type = scan_type ,
5964 minimum_severity = "Info" ,
6065 active = True ,
6166 verified = True ,
6267 engagement = None ,
63- product_type_name = "PT StackHawk " ,
64- product_name = "P StackHawk " ,
65- engagement_name = "E StackHawk " ,
68+ product_type_name = f "PT { scanner_name } Single " ,
69+ product_name = f "P { scanner_name } Single " ,
70+ engagement_name = f "E { scanner_name } Single " ,
6671 auto_create_context = True ,
6772 )
6873
6974 test_id = response_json ["test" ]
70- dup_count = Finding .objects .filter (test_id = test_id , duplicate = True ).count ()
71- self .assertEqual (0 , dup_count )
75+ test = Test .objects .get (id = test_id )
76+
77+ # Verify expected duplicates were created
78+ dup_count = Finding .objects .filter (test = test , duplicate = True ).count ()
79+ self .assertEqual (expected_duplicates , dup_count )
80+
81+ def _test_full_then_subset_duplicates (self , full_filename , subset_filename , scan_type , scanner_name , expected_duplicates ):
82+ """Internal method to test full scan then subset creates expected duplicates"""
83+ # First import: full scan
84+ response_json = self .import_scan_with_params (
85+ full_filename ,
86+ scan_type = scan_type ,
87+ minimum_severity = "Info" ,
88+ active = True ,
89+ verified = True ,
90+ engagement = None ,
91+ product_type_name = f"PT { scanner_name } Full" ,
92+ product_name = f"P { scanner_name } Full" ,
93+ engagement_name = f"E { scanner_name } Full" ,
94+ auto_create_context = True ,
95+ )
96+
97+ first_test_id = response_json ["test" ]
98+ first_test = Test .objects .get (id = first_test_id )
99+
100+ # Verify first import has no duplicates
101+ first_dup_count = Finding .objects .filter (test = first_test , duplicate = True ).count ()
102+ self .assertEqual (0 , first_dup_count )
103+
104+ # Second import: subset into the same engagement
105+ response_json = self .import_scan_with_params (
106+ subset_filename ,
107+ scan_type = scan_type ,
108+ minimum_severity = "Info" ,
109+ active = True ,
110+ verified = True ,
111+ engagement = first_test .engagement .id , # Same engagement ID
112+ product_type_name = None , # Use existing
113+ product_name = None , # Use existing
114+ engagement_name = None , # Use existing
115+ auto_create_context = False ,
116+ )
117+
118+ second_test_id = response_json ["test" ]
119+ second_test = Test .objects .get (id = second_test_id )
120+
121+ # The second test should contain expected duplicates
122+ second_test_dup_count = Finding .objects .filter (test = second_test , duplicate = True ).count ()
123+ self .assertEqual (expected_duplicates , second_test_dup_count )
124+
125+ # Engagement should have expected duplicates total
126+ eng_dup_count = Finding .objects .filter (test__engagement = first_test .engagement , duplicate = True ).count ()
127+ self .assertEqual (expected_duplicates , eng_dup_count )
128+
129+ # Product should have expected duplicates total
130+ prod_dup_count = Finding .objects .filter (test__engagement__product = first_test .engagement .product , duplicate = True ).count ()
131+ self .assertEqual (expected_duplicates , prod_dup_count )
132+
133+ def _test_different_products_no_duplicates (self , filename , scan_type , scanner_name , expected_duplicates = 0 ):
134+ """Internal method to test importing into different products creates expected duplicates"""
135+ # First import: into Product A
136+ response_json = self .import_scan_with_params (
137+ filename ,
138+ scan_type = scan_type ,
139+ minimum_severity = "Info" ,
140+ active = True ,
141+ verified = True ,
142+ engagement = None ,
143+ product_type_name = f"PT { scanner_name } Product A" ,
144+ product_name = f"P { scanner_name } Product A" ,
145+ engagement_name = f"E { scanner_name } Product A" ,
146+ auto_create_context = True ,
147+ )
148+
149+ first_test_id = response_json ["test" ]
150+ first_test = Test .objects .get (id = first_test_id )
151+
152+ # Verify first import has expected duplicates
153+ first_dup_count = Finding .objects .filter (test = first_test , duplicate = True ).count ()
154+ self .assertEqual (expected_duplicates , first_dup_count )
155+
156+ # Second import: same scan into Product B (different product)
157+ response_json = self .import_scan_with_params (
158+ filename ,
159+ scan_type = scan_type ,
160+ minimum_severity = "Info" ,
161+ active = True ,
162+ verified = True ,
163+ engagement = None ,
164+ product_type_name = f"PT { scanner_name } Product B" ,
165+ product_name = f"P { scanner_name } Product B" ,
166+ engagement_name = f"E { scanner_name } Product B" ,
167+ auto_create_context = True ,
168+ )
169+
170+ second_test_id = response_json ["test" ]
171+ second_test = Test .objects .get (id = second_test_id )
172+
173+ # The second test should contain expected duplicates (different products don't deduplicate)
174+ second_test_dup_count = Finding .objects .filter (test = second_test , duplicate = True ).count ()
175+ self .assertEqual (expected_duplicates , second_test_dup_count )
176+
177+ # First product should still have expected duplicates
178+ first_prod_dup_count = Finding .objects .filter (test__engagement__product = first_test .engagement .product , duplicate = True ).count ()
179+ self .assertEqual (expected_duplicates , first_prod_dup_count )
180+
181+ # Second product should have expected duplicates
182+ second_prod_dup_count = Finding .objects .filter (test__engagement__product = second_test .engagement .product , duplicate = True ).count ()
183+ self .assertEqual (expected_duplicates , second_prod_dup_count )
184+
185+ def _test_same_product_different_engagements_duplicates (self , filename , scan_type , scanner_name , expected_duplicates ):
186+ """Internal method to test importing into same product but different engagements creates expected duplicates"""
187+ # First import: into Engagement 1
188+ response_json = self .import_scan_with_params (
189+ filename ,
190+ scan_type = scan_type ,
191+ minimum_severity = "Info" ,
192+ active = True ,
193+ verified = True ,
194+ engagement = None ,
195+ product_type_name = f"PT { scanner_name } SameProd" ,
196+ product_name = f"P { scanner_name } SameProd" ,
197+ engagement_name = f"E { scanner_name } SameProd 1" ,
198+ auto_create_context = True ,
199+ )
200+ first_test = Test .objects .get (id = response_json ["test" ])
201+
202+ # Second import: into Engagement 2 (same product)
203+ response_json = self .import_scan_with_params (
204+ filename ,
205+ scan_type = scan_type ,
206+ minimum_severity = "Info" ,
207+ active = True ,
208+ verified = True ,
209+ engagement = None ,
210+ product_type_name = None , # Use existing
211+ product_name = f"P { scanner_name } SameProd" , # Same product
212+ engagement_name = f"E { scanner_name } SameProd 2" , # Different engagement
213+ auto_create_context = True ,
214+ )
215+ Test .objects .get (id = response_json ["test" ])
216+
217+ # Product should have expected duplicates total
218+ prod_dup_count = Finding .objects .filter (test__engagement__product = first_test .engagement .product , duplicate = True ).count ()
219+ self .assertEqual (expected_duplicates , prod_dup_count )
220+
221+ def _test_same_product_different_engagements_dedupe_on_engagements_no_duplicates (self , filename , scan_type , scanner_name , expected_duplicates = 0 ):
222+ """Internal method to test importing into same product but different engagements with dedupe_on_engagements creates expected duplicates"""
223+ # First import: into Engagement A
224+ response_json = self .import_scan_with_params (
225+ filename ,
226+ scan_type = scan_type ,
227+ minimum_severity = "Info" ,
228+ active = True ,
229+ verified = True ,
230+ engagement = None ,
231+ product_type_name = f"PT { scanner_name } DedupeEng" ,
232+ product_name = f"P { scanner_name } DedupeEng" ,
233+ engagement_name = f"E { scanner_name } DedupeEng A" ,
234+ auto_create_context = True ,
235+ )
236+ first_test = Test .objects .get (id = response_json ["test" ])
237+
238+ # Set deduplication_on_engagement to True for the engagement
239+ first_test .engagement .deduplication_on_engagement = True
240+ first_test .engagement .save ()
241+
242+ # Second import: into Engagement B (same product, different engagement)
243+ response_json = self .import_scan_with_params (
244+ filename ,
245+ scan_type = scan_type ,
246+ minimum_severity = "Info" ,
247+ active = True ,
248+ verified = True ,
249+ engagement = None ,
250+ product_type_name = None , # Use existing
251+ product_name = f"P { scanner_name } DedupeEng" , # Same product
252+ engagement_name = f"E { scanner_name } DedupeEng B" , # Different engagement
253+ auto_create_context = True ,
254+ )
255+ second_test = Test .objects .get (id = response_json ["test" ])
256+
257+ # The second test should contain expected duplicates because deduplication_on_engagement is True
258+ second_test_dup_count = Finding .objects .filter (test = second_test , duplicate = True ).count ()
259+ self .assertEqual (expected_duplicates , second_test_dup_count )
260+
261+ # Product should have expected duplicates total
262+ prod_dup_count = Finding .objects .filter (test__engagement__product = first_test .engagement .product , duplicate = True ).count ()
263+ self .assertEqual (expected_duplicates , prod_dup_count )
264+
265+ # Test cases for ZAP (LEGACY algorithm)
266+ def test_zap_single_import_no_duplicates (self ):
267+ """Test that importing ZAP scan (LEGACY algorithm) creates 0 duplicate findings"""
268+ self ._test_single_import_no_duplicates ("scans/zap/5_zap_sample_one.xml" , "ZAP Scan" , "ZAP" )
269+
270+ def test_zap_full_then_subset_duplicates (self ):
271+ """Test that importing full ZAP scan then subset creates duplicates"""
272+ # For now, use the same file for both full and subset since we don't have a proper subset
273+ # This will test the same file imported twice into the same engagement
274+ self ._test_full_then_subset_duplicates ("scans/zap/5_zap_sample_one.xml" , "scans/zap/5_zap_sample_one.xml" , "ZAP Scan" , "ZAP" , 2 )
275+
276+ def test_zap_different_products_no_duplicates (self ):
277+ """Test that importing ZAP scan into different products creates 0 duplicates"""
278+ self ._test_different_products_no_duplicates ("scans/zap/5_zap_sample_one.xml" , "ZAP Scan" , "ZAP" )
279+
280+ def test_zap_same_product_different_engagements_duplicates (self ):
281+ """Test that importing ZAP scan into same product but different engagements creates duplicates"""
282+ self ._test_same_product_different_engagements_duplicates ("scans/zap/5_zap_sample_one.xml" , "ZAP Scan" , "ZAP" , 2 )
283+
284+ def test_zap_same_product_different_engagements_dedupe_on_engagements_no_duplicates (self ):
285+ """Test that importing ZAP scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
286+ self ._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates ("scans/zap/5_zap_sample_one.xml" , "ZAP Scan" , "ZAP" )
287+
288+ # Test cases for Checkmarx (UNIQUE_ID_FROM_TOOL algorithm)
289+ def test_checkmarx_single_import_no_duplicates (self ):
290+ """Test that importing Checkmarx scan (UNIQUE_ID_FROM_TOOL algorithm) creates 0 duplicate findings"""
291+ self ._test_single_import_no_duplicates ("scans/checkmarx/single_finding.xml" , "Checkmarx Scan detailed" , "Checkmarx" )
292+
293+ def test_checkmarx_full_then_subset_duplicates (self ):
294+ """Test that importing full Checkmarx scan then subset creates duplicates"""
295+ # For now, use the same file for both full and subset
296+ self ._test_full_then_subset_duplicates ("scans/checkmarx/single_finding.xml" , "scans/checkmarx/single_finding.xml" , "Checkmarx Scan detailed" , "Checkmarx" , 1 )
297+
298+ def test_checkmarx_different_products_no_duplicates (self ):
299+ """Test that importing Checkmarx scan into different products creates 0 duplicates"""
300+ self ._test_different_products_no_duplicates ("scans/checkmarx/single_finding.xml" , "Checkmarx Scan detailed" , "Checkmarx" )
301+
302+ def test_checkmarx_same_product_different_engagements_duplicates (self ):
303+ """Test that importing Checkmarx scan into same product but different engagements creates duplicates"""
304+ self ._test_same_product_different_engagements_duplicates ("scans/checkmarx/single_finding.xml" , "Checkmarx Scan detailed" , "Checkmarx" , 1 )
305+
306+ def test_checkmarx_same_product_different_engagements_dedupe_on_engagements_no_duplicates (self ):
307+ """Test that importing Checkmarx scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
308+ self ._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates ("scans/checkmarx/single_finding.xml" , "Checkmarx Scan detailed" , "Checkmarx" )
309+
310+ # Test cases for Trivy (HASH_CODE algorithm)
311+ def test_trivy_single_import_no_duplicates (self ):
312+ """Test that importing Trivy scan (HASH_CODE algorithm) creates 0 duplicate findings"""
313+ self ._test_single_import_no_duplicates ("scans/trivy/issue_9092.json" , "Trivy Scan" , "Trivy" )
314+
315+ def test_trivy_full_then_subset_duplicates (self ):
316+ """Test that importing full Trivy scan then subset creates duplicates"""
317+ # For now, use the same file for both full and subset
318+ self ._test_full_then_subset_duplicates ("scans/trivy/issue_9092.json" , "scans/trivy/issue_9092.json" , "Trivy Scan" , "Trivy" , 1 )
319+
320+ def test_trivy_different_products_no_duplicates (self ):
321+ """Test that importing Trivy scan into different products creates 0 duplicates"""
322+ self ._test_different_products_no_duplicates ("scans/trivy/issue_9092.json" , "Trivy Scan" , "Trivy" )
323+
324+ def test_trivy_same_product_different_engagements_duplicates (self ):
325+ """Test that importing Trivy scan into same product but different engagements creates duplicates"""
326+ self ._test_same_product_different_engagements_duplicates ("scans/trivy/issue_9092.json" , "Trivy Scan" , "Trivy" , 1 )
327+
328+ def test_trivy_same_product_different_engagements_dedupe_on_engagements_no_duplicates (self ):
329+ """Test that importing Trivy scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
330+ self ._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates ("scans/trivy/issue_9092.json" , "Trivy Scan" , "Trivy" )
331+
332+ # Test cases for Veracode (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm)
333+ def test_veracode_single_import_no_duplicates (self ):
334+ """Test that importing Veracode scan (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm) creates 0 duplicate findings"""
335+ self ._test_single_import_no_duplicates ("scans/veracode/one_finding.xml" , "Veracode Scan" , "Veracode" )
336+
337+ def test_veracode_full_then_subset_duplicates (self ):
338+ """Test that importing full Veracode scan then subset creates duplicates"""
339+ # For now, use the same file for both full and subset
340+ self ._test_full_then_subset_duplicates ("scans/veracode/one_finding.xml" , "scans/veracode/one_finding.xml" , "Veracode Scan" , "Veracode" , 1 )
341+
342+ def test_veracode_different_products_no_duplicates (self ):
343+ """Test that importing Veracode scan into different products creates 0 duplicates"""
344+ self ._test_different_products_no_duplicates ("scans/veracode/one_finding.xml" , "Veracode Scan" , "Veracode" )
345+
346+ def test_veracode_same_product_different_engagements_duplicates (self ):
347+ """Test that importing Veracode scan into same product but different engagements creates duplicates"""
348+ self ._test_same_product_different_engagements_duplicates ("scans/veracode/one_finding.xml" , "Veracode Scan" , "Veracode" , 1 )
349+
350+ def test_veracode_same_product_different_engagements_dedupe_on_engagements_no_duplicates (self ):
351+ """Test that importing Veracode scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
352+ self ._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates ("scans/veracode/one_finding.xml" , "Veracode Scan" , "Veracode" )
353+
354+ # Test cases for StackHawk (HASH_CODE algorithm)
355+ def test_stackhawk_single_import_no_duplicates (self ):
356+ """Test that importing StackHawk scan (HASH_CODE algorithm) creates 0 duplicate findings"""
357+ self ._test_single_import_no_duplicates ("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json" , "StackHawk HawkScan" , "StackHawk" )
358+
359+ def test_stackhawk_full_then_subset_duplicates (self ):
360+ """Test that importing full StackHawk scan then subset creates duplicates"""
361+ self ._test_full_then_subset_duplicates ("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json" , "scans/stackhawk/stackhawk_many_vul_without_duplicated_findings_subset.json" , "StackHawk HawkScan" , "StackHawk" , 5 )
362+
363+ def test_stackhawk_different_products_no_duplicates (self ):
364+ """Test that importing StackHawk scan into different products creates 0 duplicates"""
365+ self ._test_different_products_no_duplicates ("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json" , "StackHawk HawkScan" , "StackHawk" )
366+
367+ def test_stackhawk_same_product_different_engagements_duplicates (self ):
368+ """Test that importing StackHawk scan into same product but different engagements creates duplicates"""
369+ self ._test_same_product_different_engagements_duplicates ("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json" , "StackHawk HawkScan" , "StackHawk" , 6 )
370+
371+ def test_stackhawk_same_product_different_engagements_dedupe_on_engagements_no_duplicates (self ):
372+ """Test that importing StackHawk scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
373+ self ._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates ("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json" , "StackHawk HawkScan" , "StackHawk" )
0 commit comments