@@ -159,6 +159,48 @@ def process_scan(
159159 test_import_history ,
160160 )
161161
162+ def get_reimport_match_candidates_for_batch (
163+ self ,
164+ batch_findings : list [Finding ],
165+ ) -> tuple [dict , dict , dict ]:
166+ """
167+ Fetch candidate matches for a batch of *unsaved* findings during reimport.
168+
169+ This is intentionally a separate method so downstream editions (e.g. Dojo Pro)
170+ can override candidate retrieval without copying the full `process_findings()`
171+ implementation.
172+
173+ Returns:
174+ (candidates_by_hash, candidates_by_uid, candidates_by_key)
175+
176+ """
177+ candidates_by_hash : dict = {}
178+ candidates_by_uid : dict = {}
179+ candidates_by_key : dict = {}
180+
181+ if self .deduplication_algorithm == "hash_code" :
182+ candidates_by_hash = find_candidates_for_deduplication_hash (
183+ self .test ,
184+ batch_findings ,
185+ mode = "reimport" ,
186+ )
187+ elif self .deduplication_algorithm == "unique_id_from_tool" :
188+ candidates_by_uid = find_candidates_for_deduplication_unique_id (
189+ self .test ,
190+ batch_findings ,
191+ mode = "reimport" ,
192+ )
193+ elif self .deduplication_algorithm == "unique_id_from_tool_or_hash_code" :
194+ candidates_by_uid , candidates_by_hash = find_candidates_for_deduplication_uid_or_hash (
195+ self .test ,
196+ batch_findings ,
197+ mode = "reimport" ,
198+ )
199+ elif self .deduplication_algorithm == "legacy" :
200+ candidates_by_key = find_candidates_for_reimport_legacy (self .test , batch_findings )
201+
202+ return candidates_by_hash , candidates_by_uid , candidates_by_key
203+
162204 def process_findings (
163205 self ,
164206 parsed_findings : list [Finding ],
@@ -242,24 +284,9 @@ def process_findings(
242284 deduplicationLogger .debug (f"unsaved finding's hash_code: { unsaved_finding .hash_code } " )
243285
244286 # Fetch all candidates for this batch at once (batch candidate finding)
245- candidates_by_hash = {}
246- candidates_by_uid = {}
247- candidates_by_key = {}
248-
249- if self .deduplication_algorithm == "hash_code" :
250- candidates_by_hash = find_candidates_for_deduplication_hash (
251- self .test , batch_findings , mode = "reimport" ,
252- )
253- elif self .deduplication_algorithm == "unique_id_from_tool" :
254- candidates_by_uid = find_candidates_for_deduplication_unique_id (
255- self .test , batch_findings , mode = "reimport" ,
256- )
257- elif self .deduplication_algorithm == "unique_id_from_tool_or_hash_code" :
258- candidates_by_uid , candidates_by_hash = find_candidates_for_deduplication_uid_or_hash (
259- self .test , batch_findings , mode = "reimport" ,
260- )
261- elif self .deduplication_algorithm == "legacy" :
262- candidates_by_key = find_candidates_for_reimport_legacy (self .test , batch_findings )
287+ candidates_by_hash , candidates_by_uid , candidates_by_key = self .get_reimport_match_candidates_for_batch (
288+ batch_findings ,
289+ )
263290
264291 # Process each finding in the batch using pre-fetched candidates
265292 for idx , unsaved_finding in enumerate (batch_findings ):
0 commit comments