@@ -170,6 +170,8 @@ def get_reimport_match_candidates_for_batch(
170170 can override candidate retrieval without copying the full `process_findings()`
171171 implementation.
172172
173+ Is overridden in Pro.
174+
173175 Returns:
174176 (candidates_by_hash, candidates_by_uid, candidates_by_key)
175177
@@ -201,6 +203,51 @@ def get_reimport_match_candidates_for_batch(
201203
202204 return candidates_by_hash , candidates_by_uid , candidates_by_key
203205
206+ def add_new_finding_to_candidates (
207+ self ,
208+ finding : Finding ,
209+ candidates_by_hash : dict ,
210+ candidates_by_uid : dict ,
211+ candidates_by_key : dict ,
212+ ) -> None :
213+ """
214+ Add a newly created finding to candidate dictionaries for subsequent findings in the same batch.
215+
216+ This allows duplicates within the same scan report to be detected even when they're processed
217+ in the same batch. When a new finding is created (no match found), it is added to the candidate
218+ dictionaries so that subsequent findings in the same batch can match against it.
219+
220+ This is intentionally a separate method so downstream editions (e.g. Dojo Pro)
221+ can override candidate addition logic without copying the full `process_findings()`
222+ implementation.
223+
224+ Args:
225+ finding: The newly created finding to add to candidates
226+ candidates_by_hash: Dictionary mapping hash_code to list of findings (modified in-place)
227+ candidates_by_uid: Dictionary mapping unique_id_from_tool to list of findings (modified in-place)
228+ candidates_by_key: Dictionary mapping (title_lower, severity) to list of findings (modified in-place)
229+
230+ """
231+ if not finding :
232+ return
233+
234+ if finding .hash_code :
235+ candidates_by_hash .setdefault (finding .hash_code , []).append (finding )
236+ deduplicationLogger .debug (
237+ f"Added finding { finding .id } (hash_code: { finding .hash_code } ) to candidates for next findings in this report" ,
238+ )
239+ if finding .unique_id_from_tool :
240+ candidates_by_uid .setdefault (finding .unique_id_from_tool , []).append (finding )
241+ deduplicationLogger .debug (
242+ f"Added finding { finding .id } (unique_id_from_tool: { finding .unique_id_from_tool } ) to candidates for next findings in this report" ,
243+ )
244+ if finding .title :
245+ legacy_key = (finding .title .lower (), finding .severity )
246+ candidates_by_key .setdefault (legacy_key , []).append (finding )
247+ deduplicationLogger .debug (
248+ f"Added finding { finding .id } (title: { finding .title } , severity: { finding .severity } ) to candidates for next findings in this report" ,
249+ )
250+
204251 def process_findings (
205252 self ,
206253 parsed_findings : list [Finding ],
@@ -293,7 +340,7 @@ def process_findings(
293340 is_final = is_final_batch and idx == len (batch_findings ) - 1
294341
295342 # Match any findings to this new one coming in using pre-fetched candidates
296- matched_findings = self .match_finding_for_reimport (
343+ matched_findings = self .match_finding_to_candidate_reimport (
297344 unsaved_finding ,
298345 candidates_by_hash = candidates_by_hash ,
299346 candidates_by_uid = candidates_by_uid ,
@@ -325,23 +372,12 @@ def process_findings(
325372 finding = self .process_finding_that_was_not_matched (unsaved_finding )
326373
327374 # Add newly created finding to candidates for subsequent findings in this batch
328- if finding :
329- if finding .hash_code :
330- candidates_by_hash .setdefault (finding .hash_code , []).append (finding )
331- deduplicationLogger .debug (
332- f"Added finding { finding .id } (hash_code: { finding .hash_code } ) to candidates for next findings in this report" ,
333- )
334- if finding .unique_id_from_tool :
335- candidates_by_uid .setdefault (finding .unique_id_from_tool , []).append (finding )
336- deduplicationLogger .debug (
337- f"Added finding { finding .id } (unique_id_from_tool: { finding .unique_id_from_tool } ) to candidates for next findings in this report" ,
338- )
339- if finding .title :
340- legacy_key = (finding .title .lower (), finding .severity )
341- candidates_by_key .setdefault (legacy_key , []).append (finding )
342- deduplicationLogger .debug (
343- f"Added finding { finding .id } (title: { finding .title } , severity: { finding .severity } ) to candidates for next findings in this report" ,
344- )
375+ self .add_new_finding_to_candidates (
376+ finding ,
377+ candidates_by_hash ,
378+ candidates_by_uid ,
379+ candidates_by_key ,
380+ )
345381
346382 # This condition __appears__ to always be true, but am afraid to remove it
347383 if finding :
@@ -483,50 +519,7 @@ def parse_findings_dynamic_test_type(
483519 logger .debug ("REIMPORT_SCAN parser v2: Create parse findings" )
484520 return super ().parse_findings_dynamic_test_type (scan , parser )
485521
486- def match_new_finding_to_existing_finding (
487- self ,
488- unsaved_finding : Finding ,
489- ) -> list [Finding ]:
490- """Matches a single new finding to N existing findings and then returns those matches"""
491- # This code should match the logic used for deduplication out of the re-import feature.
492- # See utils.py deduplicate_* functions
493- deduplicationLogger .debug ("return findings bases on algorithm: %s" , self .deduplication_algorithm )
494- if self .deduplication_algorithm == "hash_code" :
495- return Finding .objects .filter (
496- test = self .test ,
497- hash_code = unsaved_finding .hash_code ,
498- ).exclude (hash_code = None ).order_by ("id" )
499- if self .deduplication_algorithm == "unique_id_from_tool" :
500- deduplicationLogger .debug (f"unique_id_from_tool: { unsaved_finding .unique_id_from_tool } " )
501- return Finding .objects .filter (
502- test = self .test ,
503- unique_id_from_tool = unsaved_finding .unique_id_from_tool ,
504- ).exclude (unique_id_from_tool = None ).order_by ("id" )
505- if self .deduplication_algorithm == "unique_id_from_tool_or_hash_code" :
506- deduplicationLogger .debug (f"unique_id_from_tool: { unsaved_finding .unique_id_from_tool } " )
507- deduplicationLogger .debug (f"hash_code: { unsaved_finding .hash_code } " )
508- query = Finding .objects .filter (
509- Q (test = self .test ),
510- (Q (hash_code__isnull = False ) & Q (hash_code = unsaved_finding .hash_code ))
511- | (Q (unique_id_from_tool__isnull = False ) & Q (unique_id_from_tool = unsaved_finding .unique_id_from_tool )),
512- ).order_by ("id" )
513- deduplicationLogger .debug (query .query )
514- return query
515- if self .deduplication_algorithm == "legacy" :
516- # This is the legacy reimport behavior. Although it's pretty flawed and doesn't match the legacy algorithm for deduplication,
517- # this is left as is for simplicity.
518- # Re-writing the legacy deduplication here would be complicated and counter-productive.
519- # If you have use cases going through this section, you're advised to create a deduplication configuration for your parser
520- logger .warning ("Legacy reimport. In case of issue, you're advised to create a deduplication configuration in order not to go through this section" )
521- return Finding .objects .filter (
522- title__iexact = unsaved_finding .title ,
523- test = self .test ,
524- severity = unsaved_finding .severity ,
525- numerical_severity = Finding .get_numerical_severity (unsaved_finding .severity )).order_by ("id" )
526- logger .error (f'Internal error: unexpected deduplication_algorithm: "{ self .deduplication_algorithm } "' )
527- return None
528-
529- def match_finding_for_reimport (
522+ def match_finding_to_candidate_reimport (
530523 self ,
531524 unsaved_finding : Finding ,
532525 candidates_by_hash : dict | None = None ,
0 commit comments