Skip to content

Commit daac9c8

Browse files
reimport: extract method to get candidates
1 parent 58d2abd commit daac9c8

1 file changed

Lines changed: 45 additions & 18 deletions

File tree

dojo/importers/default_reimporter.py

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,48 @@ def process_scan(
159159
test_import_history,
160160
)
161161

162+
def get_reimport_match_candidates_for_batch(
163+
self,
164+
batch_findings: list[Finding],
165+
) -> tuple[dict, dict, dict]:
166+
"""
167+
Fetch candidate matches for a batch of *unsaved* findings during reimport.
168+
169+
This is intentionally a separate method so downstream editions (e.g. Dojo Pro)
170+
can override candidate retrieval without copying the full `process_findings()`
171+
implementation.
172+
173+
Returns:
174+
(candidates_by_hash, candidates_by_uid, candidates_by_key)
175+
176+
"""
177+
candidates_by_hash: dict = {}
178+
candidates_by_uid: dict = {}
179+
candidates_by_key: dict = {}
180+
181+
if self.deduplication_algorithm == "hash_code":
182+
candidates_by_hash = find_candidates_for_deduplication_hash(
183+
self.test,
184+
batch_findings,
185+
mode="reimport",
186+
)
187+
elif self.deduplication_algorithm == "unique_id_from_tool":
188+
candidates_by_uid = find_candidates_for_deduplication_unique_id(
189+
self.test,
190+
batch_findings,
191+
mode="reimport",
192+
)
193+
elif self.deduplication_algorithm == "unique_id_from_tool_or_hash_code":
194+
candidates_by_uid, candidates_by_hash = find_candidates_for_deduplication_uid_or_hash(
195+
self.test,
196+
batch_findings,
197+
mode="reimport",
198+
)
199+
elif self.deduplication_algorithm == "legacy":
200+
candidates_by_key = find_candidates_for_reimport_legacy(self.test, batch_findings)
201+
202+
return candidates_by_hash, candidates_by_uid, candidates_by_key
203+
162204
def process_findings(
163205
self,
164206
parsed_findings: list[Finding],
@@ -242,24 +284,9 @@ def process_findings(
242284
deduplicationLogger.debug(f"unsaved finding's hash_code: {unsaved_finding.hash_code}")
243285

244286
# Fetch all candidates for this batch at once (batch candidate finding)
245-
candidates_by_hash = {}
246-
candidates_by_uid = {}
247-
candidates_by_key = {}
248-
249-
if self.deduplication_algorithm == "hash_code":
250-
candidates_by_hash = find_candidates_for_deduplication_hash(
251-
self.test, batch_findings, mode="reimport",
252-
)
253-
elif self.deduplication_algorithm == "unique_id_from_tool":
254-
candidates_by_uid = find_candidates_for_deduplication_unique_id(
255-
self.test, batch_findings, mode="reimport",
256-
)
257-
elif self.deduplication_algorithm == "unique_id_from_tool_or_hash_code":
258-
candidates_by_uid, candidates_by_hash = find_candidates_for_deduplication_uid_or_hash(
259-
self.test, batch_findings, mode="reimport",
260-
)
261-
elif self.deduplication_algorithm == "legacy":
262-
candidates_by_key = find_candidates_for_reimport_legacy(self.test, batch_findings)
287+
candidates_by_hash, candidates_by_uid, candidates_by_key = self.get_reimport_match_candidates_for_batch(
288+
batch_findings,
289+
)
263290

264291
# Process each finding in the batch using pre-fetched candidates
265292
for idx, unsaved_finding in enumerate(batch_findings):

0 commit comments

Comments
 (0)