valentijnscholten
diff --git a/‎docs/content/en/connecting_your_tools/parsers/file/openvas.md‎
Lines changed: 30 additions & 7 deletions b/‎docs/content/en/connecting_your_tools/parsers/file/openvas.md‎
Lines changed: 30 additions & 7 deletions
diff --git a/‎dojo/settings/settings.dist.py‎
Lines changed: 3 additions & 0 deletions b/‎dojo/settings/settings.dist.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎dojo/tools/factory.py‎
Lines changed: 6 additions & 1 deletion b/‎dojo/tools/factory.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎dojo/tools/openvas/parser.py‎
Lines changed: 22 additions & 2 deletions b/‎dojo/tools/openvas/parser.py‎
Lines changed: 22 additions & 2 deletions
diff --git a/‎dojo/tools/openvas/parser_v1/__init__.py‎ b/‎dojo/tools/openvas/parser_v1/__init__.py‎
diff --git a/‎dojo/tools/openvas/csv_parser.py‎ ‎…jo/tools/openvas/parser_v1/csv_parser.py‎dojo/tools/openvas/csv_parser.py renamed to dojo/tools/openvas/parser_v1/csv_parser.py b/‎dojo/tools/openvas/csv_parser.py‎ ‎…jo/tools/openvas/parser_v1/csv_parser.py‎dojo/tools/openvas/csv_parser.py renamed to dojo/tools/openvas/parser_v1/csv_parser.py
diff --git a/‎dojo/tools/openvas/xml_parser.py‎ ‎…jo/tools/openvas/parser_v1/xml_parser.py‎dojo/tools/openvas/xml_parser.py renamed to dojo/tools/openvas/parser_v1/xml_parser.py b/‎dojo/tools/openvas/xml_parser.py‎ ‎…jo/tools/openvas/parser_v1/xml_parser.py‎dojo/tools/openvas/xml_parser.py renamed to dojo/tools/openvas/parser_v1/xml_parser.py
diff --git a/‎dojo/tools/openvas/parser_v2/__init__.py‎ b/‎dojo/tools/openvas/parser_v2/__init__.py‎
diff --git a/‎dojo/tools/openvas/parser_v2/common.py‎
Lines changed: 112 additions & 0 deletions b/‎dojo/tools/openvas/parser_v2/common.py‎
Lines changed: 112 additions & 0 deletions
@@ -2,16 +2,39 @@
 title: "OpenVAS Parser"
 toc_hide: true
 ---
-You can either upload the exported results of an OpenVAS Scan in a .csv or .xml format.
+You can upload the results of an OpenVAS/Greenbone report in either .csv or .xml format.
 
 ### Sample Scan Data
 Sample OpenVAS scans can be found [here](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/openvas).
 
-### Default Deduplication Hashcode Fields
-By default, DefectDojo identifies duplicate Findings using these [hashcode fields](https://docs.defectdojo.com/en/working_with_findings/finding_deduplication/about_deduplication/):
+### Parser versions
+The OpenVAS parser has two versions: Version 2 and the legacy version. Only version 2 should be used going forward. This documentation assumes Version 2 going forward.
+
+Version 2 comes with a number of improvements:
+- Use of a hash code algorithm for deduplication
+- Increased consistency in parsing between the XML and CSV parsers.
+- Combined findings where the only differences are in fields that cannot be rehashed due to inconsistent values between scans (e.g. fields containing timestamps or packet IDs). This prevents duplicates if the vulnerability is found multiple times on the same endpoint.
+- Increased parser value coverage
+- Heuristic for fix_available detection
+- Updated mapping to DefectDojo fields compared to version 1.
+
+### Deduplication Algorithm
+Default Deduplication Hashcode Fields:
+By default, DefectDojo Parser V2 identifies duplicate findings using the following [hashcode fields](https://docs.defectdojo.com/en/working_with_findings/finding_deduplication/about_deduplication/):
 
 - title
-- cwe
-- line
-- file path
-- description
+- severity
+- vuln_id_from_tool
+- endpoints
+
+The legacy version (version 1) uses the legacy deduplication algorithm.
+
+### CSV and XML differences and similarityies
+The parser attempts to parse XML and CSV files in a similar way. However, this is not always possible. The following lists the differences between the parsers:
+
+- EPSS scores and percentiles are only available in CSV format.
+- CVSS vectors are only available in the XML format.
+- The CVSS score will always be reported as CVSS v3 in the CSV parser 
+- The references in the CSV parser will never contain URLs.
+
+If no supported CVSS version is detected, the score (if present) is registered as a CVSS v3 score, even if this is incorrect.
@@ -1355,6 +1355,7 @@ def saml2_attrib_map_format(din):
     "Qualys Hacker Guardian Scan": ["title", "severity", "description"],
     "Cyberwatch scan (Galeax)": ["title", "description", "severity"],
     "Cycognito Scan": ["title", "severity"],
+    "OpenVAS Parser v2": ["title", "severity", "vuln_id_from_tool", "endpoints"],
 }
 
 # Override the hardcoded settings here via the env var
@@ -1426,6 +1427,7 @@ def saml2_attrib_map_format(din):
     "HCL AppScan on Cloud SAST XML": True,
     "AWS Inspector2 Scan": True,
     "Cyberwatch scan (Galeax)": True,
+    "OpenVAS Parser v2": True,
 }
 
 # List of fields that are known to be usable in hash_code computation)
@@ -1612,6 +1614,7 @@ def saml2_attrib_map_format(din):
     "Red Hat Satellite": DEDUPE_ALGO_HASH_CODE,
     "Qualys Hacker Guardian Scan": DEDUPE_ALGO_HASH_CODE,
     "Cyberwatch scan (Galeax)": DEDUPE_ALGO_HASH_CODE,
+    "OpenVAS Parser v2": DEDUPE_ALGO_HASH_CODE,
 }
 
 # Override the hardcoded settings here via the env var
 
@@ -119,7 +119,12 @@ def requires_tool_type(scan_type):
                 module = import_module(f"dojo.tools.{module_name}.parser")
                 for attribute_name in dir(module):
                     attribute = getattr(module, attribute_name)
-                    if isclass(attribute) and attribute_name.lower() == module_name.replace("_", "") + "parser":
+                    # Allow parser class names with optional v[number] suffix (e.g., OpenVASParser, OpenVASParserV2)
+                    expected_base = module_name.replace("_", "") + "parser"
+                    if isclass(attribute) and (
+                        attribute_name.lower() == expected_base or
+                        re.match(rf"^{re.escape(expected_base)}v\d+$", attribute_name.lower())
+                    ):
                         register(attribute)
         except:
             logger.exception("failed to load %s", module_name)
@@ -1,5 +1,7 @@
-from dojo.tools.openvas.csv_parser import OpenVASCSVParser
-from dojo.tools.openvas.xml_parser import OpenVASXMLParser
+from dojo.tools.openvas.parser_v1.csv_parser import OpenVASCSVParser
+from dojo.tools.openvas.parser_v1.xml_parser import OpenVASXMLParser
+from dojo.tools.openvas.parser_v2.csv_parser import get_findings_from_csv
+from dojo.tools.openvas.parser_v2.xml_parser import get_findings_from_xml
 
 
 class OpenVASParser:
@@ -18,3 +20,21 @@ def get_findings(self, filename, test):
         if str(filename.name).endswith(".xml"):
             return OpenVASXMLParser().get_findings(filename, test)
         return None
+
+
+class OpenVASParserV2:
+    def get_scan_types(self):
+        return ["OpenVAS Parser v2"]
+
+    def get_label_for_scan_types(self, scan_type):
+        return scan_type
+
+    def get_description_for_scan_types(self, scan_type):
+        return "Import CSV or XML output of Greenbone OpenVAS report."
+
+    def get_findings(self, file, test):
+        if str(file.name).endswith(".csv"):
+            return get_findings_from_csv(file, test)
+        if str(file.name).endswith(".xml"):
+            return get_findings_from_xml(file, test)
+        return None
@@ -0,0 +1,112 @@
+import hashlib
+from dataclasses import dataclass
+
+from dojo.models import Endpoint, Finding
+
+
+@dataclass
+class OpenVASFindingAuxData:
+
+    """Dataclass to contain all information added later to a finding"""
+
+    references: list[str]
+    summary: str = ""
+    qod: str = ""
+    openvas_result: str = ""
+    fallback_cvss_score: float | None = None
+
+
+def setup_finding(test) -> tuple[Finding, OpenVASFindingAuxData]:
+    """Base setup and init for findings and auxiliary data"""
+    finding = Finding(test=test, dynamic_finding=True, static_finding=False, severity="Info", nb_occurences=1, cwe=None)
+    finding.unsaved_vulnerability_ids = []
+    finding.unsaved_endpoints = [Endpoint()]
+
+    aux_info = OpenVASFindingAuxData([])
+
+    return finding, aux_info
+
+
+def is_valid_severity(severity: str) -> bool:
+    valid_severity = ("Info", "Low", "Medium", "High", "Critical")
+    return severity in valid_severity
+
+
+def cleanup_openvas_text(text: str) -> str:
+    """Removes unnessesary defectojo newlines"""
+    return text.replace("\n  ", " ")
+
+
+def escape_restructured_text(text: str) -> str:
+    """Changes text so that restructured text symbols are not interpreted"""
+    # OpenVAS likes to include markdown like tables in some fields
+    # Defectdojo uses reStructuredText which causes them to be rendered wrong
+    text = text.replace("```", "")
+    text = text.replace("```", "")
+    return f"```\n{text}\n```"
+
+
+def postprocess_finding(finding: Finding, aux_info: OpenVASFindingAuxData):
+    """Update finding with AuxData content"""
+    if aux_info.openvas_result:
+        finding.steps_to_reproduce = escape_restructured_text(cleanup_openvas_text(aux_info.openvas_result))
+    if aux_info.summary:
+        finding.description += f"\n**Summary**: {cleanup_openvas_text(aux_info.summary)}"
+    if aux_info.qod:
+        finding.description += f"\n**QoD**: {aux_info.qod}"
+    if len(aux_info.references) > 0:
+        finding.references = "\n".join(["- " + ref for ref in aux_info.references])
+    # fallback in case no cvss version is detected
+    if aux_info.fallback_cvss_score and not finding.cvssv3_score and not finding.cvssv4_score:
+        finding.cvssv3_score = aux_info.fallback_cvss_score
+
+    # heuristic for fixed-available detection
+    if finding.mitigation:
+        search_terms = ["Update to version", "The vendor has released updates"]
+        if any(text in finding.mitigation for text in search_terms):
+            finding.fix_available = True
+
+
+def deduplicate(dupes: dict[str, Finding], finding: Finding):
+    """Combine multiple openvas findings into one defectdojo finding with potentially multiple endpoints"""
+    finding_hash = gen_finding_hash(finding)
+
+    if finding_hash not in dupes:
+        dupes[finding_hash] = finding
+    else:
+        # OpenVas does not combine multiple findings into one
+        # e.g if 2 vulnerable java runtimes are present on the host this is reported as 2 finding.
+        # The only way do differantiate theese findings when they are based on the same vulnerabilty
+        # is the data in mapped to steps to reproduce.
+        # However we cannot hash this field as it can contain data that changes between scans
+        # e.g timestamps or packet ids
+        # we therfore combine them into one defectdojo finding because duplicates during reimport cause
+        # https://github.com/DefectDojo/django-DefectDojo/issues/3958
+        org = dupes[finding_hash]
+        org.nb_occurences += 1
+        if org.steps_to_reproduce != finding.steps_to_reproduce:
+            if "Endpoint" in org.steps_to_reproduce:
+                org.steps_to_reproduce += "\n---------------------------------------\n"
+                org.steps_to_reproduce += f"**Endpoint**: {finding.unsaved_endpoints[0].host}\n"
+                org.steps_to_reproduce += finding.steps_to_reproduce
+            else:
+                tmp = org.steps_to_reproduce
+                org.steps_to_reproduce = f"**Endpoint**: {org.unsaved_endpoints[0].host}\n"
+                org.steps_to_reproduce += tmp
+
+        # combine identical findings on different hosts into one with multiple hosts
+        endpoint = finding.unsaved_endpoints[0]
+        if endpoint not in org.unsaved_endpoints:
+            org.unsaved_endpoints += finding.unsaved_endpoints
+
+
+def gen_finding_hash(finding: Finding) -> str:
+    """Generate a hash for a finding that is used for deduplication of findings inside the current report"""
+    endpoint = finding.unsaved_endpoints[0]
+    hash_data = [
+        str(endpoint),
+        finding.title,
+        finding.vuln_id_from_tool,
+        finding.severity,
+    ]
+    return hashlib.sha256("|".join(hash_data).encode("utf-8")).hexdigest()
Original file line number	Diff line number	Diff line change
`@@ -1355,6 +1355,7 @@ def saml2_attrib_map_format(din):`
`1355`	`1355`	`"Qualys Hacker Guardian Scan": ["title", "severity", "description"],`
`1356`	`1356`	`"Cyberwatch scan (Galeax)": ["title", "description", "severity"],`
`1357`	`1357`	`"Cycognito Scan": ["title", "severity"],`
	`1358`	`+ "OpenVAS Parser v2": ["title", "severity", "vuln_id_from_tool", "endpoints"],`
`1358`	`1359`	`}`
`1359`	`1360`
`1360`	`1361`	`# Override the hardcoded settings here via the env var`
`@@ -1426,6 +1427,7 @@ def saml2_attrib_map_format(din):`
`1426`	`1427`	`"HCL AppScan on Cloud SAST XML": True,`
`1427`	`1428`	`"AWS Inspector2 Scan": True,`
`1428`	`1429`	`"Cyberwatch scan (Galeax)": True,`
	`1430`	`+ "OpenVAS Parser v2": True,`
`1429`	`1431`	`}`
`1430`	`1432`
`1431`	`1433`	`# List of fields that are known to be usable in hash_code computation)`
`@@ -1612,6 +1614,7 @@ def saml2_attrib_map_format(din):`
`1612`	`1614`	`"Red Hat Satellite": DEDUPE_ALGO_HASH_CODE,`
`1613`	`1615`	`"Qualys Hacker Guardian Scan": DEDUPE_ALGO_HASH_CODE,`
`1614`	`1616`	`"Cyberwatch scan (Galeax)": DEDUPE_ALGO_HASH_CODE,`
	`1617`	`+ "OpenVAS Parser v2": DEDUPE_ALGO_HASH_CODE,`
`1615`	`1618`	`}`
`1616`	`1619`
`1617`	`1620`	`# Override the hardcoded settings here via the env var`