Wiz Parser: Add SCA parser and fortify old format (#10905)

Maffooch · web-flow · commit d2a29842bac4 · 2024-09-15T21:39:35.000-05:00
* Wiz Parser: Add SCA parser and fortify old format

* Adding unit tests

* Fix ruff
diff --git a/dojo/settings/.settings.dist.py.sha256sum b/dojo/settings/.settings.dist.py.sha256sum
@@ -1 +1 @@
-1a74292fc58b2bd05c763c8c126b0b35888e2a6f8ef9ab2588bb6c8589987c9c
+702d74c8bc703d11c03cf5b3f7c4319ad0cdeaef68db6426d1112c59e59365a6
diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py
@@ -1496,7 +1496,7 @@ def saml2_attrib_map_format(dict):
     "OSV Scan": DEDUPE_ALGO_HASH_CODE,
     "Nosey Parker Scan": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE,
     "Bearer CLI": DEDUPE_ALGO_HASH_CODE,
-    "Wiz Scan": DEDUPE_ALGO_HASH_CODE,
+    "Wiz Scan": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE,
     "Deepfence Threatmapper Report": DEDUPE_ALGO_HASH_CODE,
     "Kubescape JSON Importer": DEDUPE_ALGO_HASH_CODE,
     "Kiuwan SCA Scan": DEDUPE_ALGO_HASH_CODE,
diff --git a/dojo/tools/wiz/parser.py b/dojo/tools/wiz/parser.py
@@ -1,91 +1,209 @@
 import csv
 import io
+import json
+import logging
 import sys
 
-from dojo.models import Finding
+from dateutil import parser as date_parser
 
+from dojo.models import SEVERITIES, Finding, Test
 
-class WizParser:
-    def get_scan_types(self):
-        return ["Wiz Scan"]
+logger = logging.getLogger(__name__)
 
-    def get_label_for_scan_types(self, scan_type):
-        return "Wiz Scan"
 
-    def get_description_for_scan_types(self, scan_type):
-        return "Wiz scan results in csv file format."
+class WizParserByTitle:
+    """Parser the CSV where the "Title" field is the match for a finding title."""
 
-    def get_findings(self, filename, test):
-        content = filename.read()
-        if isinstance(content, bytes):
-            content = content.decode("utf-8")
-        csv.field_size_limit(int(sys.maxsize / 10))  # the request/resp are big
-        reader = csv.DictReader(io.StringIO(content))
+    def parse_findings(self, test: Test, reader: csv.DictReader) -> list[Finding]:
+        """Parse the CSV with the assumed format of the link below.
+
+        test file: https://github.com/DefectDojo/django-DefectDojo/blob/master/unittests/scans/wiz/multiple_findings.csv
+        """
         findings = []
+        description_fields = [
+            "Description",
+            "Resource Type",
+            "Resource external ID",
+            "Subscription ID",
+            "Project IDs",
+            "Project Names",
+            "Control ID",
+            "Resource Name",
+            "Resource Region",
+            "Resource Status",
+            "Resource Platform",
+            "Resource OS",
+            "Resource original JSON",
+            "Issue ID",
+            "Resource vertex ID",
+            "Ticket URLs",
+            "Note",
+            "Due At",
+            "Subscription Name",
+            "Wiz URL",
+            "Cloud Provider URL",
+            "Resource Tags",
+            "Kubernetes Cluster",
+            "Kubernetes Namespace",
+            "Container Service",
+        ]
+        # Iterate over the objects to create findings
         for row in reader:
             if row.get("Status").lower() == "open":
-                Title = row.get("Title")
-                Severity = row.get("Severity")
-                Description = row.get("Description")
-                Resource_Type = row.get("Resource Type")
-                Resource_external_ID = row.get("Resource external ID")
-                Subscription_ID = row.get("Subscription ID")
-                Project_IDs = row.get("Project IDs")
-                Project_Names = row.get("Project Names")
-                Control_ID = row.get("Control ID")
-                Resource_Name = row.get("Resource Name")
-                Resource_Region = row.get("Resource Region")
-                Resource_Status = row.get("Resource Status")
-                Resource_Platform = row.get("Resource Platform")
-                Resource_OS = row.get("Resource OS")
-                Resource_original_JSON = row.get("Resource original JSON")
-                Issue_ID = row.get("Issue ID")
-                Resource_vertex_ID = row.get("Resource vertex ID")
-                Ticket_URLs = row.get("Ticket URLs")
-                Note = row.get("Note")
-                Due_At = row.get("Due At")
-                Subscription_Name = row.get("Subscription Name")
-                Wiz_URL = row.get("Wiz URL")
-                Cloud_Provider_URL = row.get("Cloud Provider URL")
-                Resource_Tags = row.get("Resource Tags")
-                Kubernetes_Cluster = row.get("Kubernetes Cluster")
-                Kubernetes_Namespace = row.get("Kubernetes Namespace")
-                Container_Service = row.get("Container Service")
+                title = row.get("Title")
+                severity = row.get("Severity")
+                mitigation = row.get("Remediation Recommendation")
                 description = ""
-                description += "**Description**: " + Description + "\n"
-                description += "**Resource Type**: " + Resource_Type + "\n"
-                description += "**external ID**: " + Resource_external_ID + "\n"
-                description += "**Subscription ID**: " + Subscription_ID + "\n"
-                description += "**Project IDs**: " + Project_IDs + "\n"
-                description += "**Project Names**: " + Project_Names + "\n"
-                description += "**Control ID**: " + Control_ID + "\n"
-                description += "**Resource Name**: " + Resource_Name + "\n"
-                description += "**Resource Region**: " + Resource_Region + "\n"
-                description += "**Resource Status**: " + Resource_Status + "\n"
-                description += "**Resource Platform**: " + Resource_Platform + "\n"
-                description += "**Resource OS**: " + Resource_OS + "\n"
-                description += "**original JSON**: " + Resource_original_JSON + "\n"
-                description += "**Issue ID**: " + Issue_ID + "\n"
-                description += "**vertex ID**: " + Resource_vertex_ID + "\n"
-                description += "**Ticket URLs**: " + Ticket_URLs + "\n"
-                description += "**Note**: " + Note + "\n"
-                description += "**Due At**: " + Due_At + "\n"
-                description += "**Subscription Name**: " + Subscription_Name + "\n"
-                description += "**Wiz URL**: " + Wiz_URL + "\n"
-                description += "**Provider URL**: " + Cloud_Provider_URL + "\n"
-                description += "**Resource Tags**: " + Resource_Tags + "\n"
-                description += "**Kubernetes Cluster**: " + Kubernetes_Cluster + "\n"
-                description += "**Kubernetes Namespace**: " + Kubernetes_Namespace + "\n"
-                description += "**Container Service**: " + Container_Service + "\n"
+                # Iterate over the description fields to create the description
+                for field in description_fields:
+                    if (field_value := row.get(field)) is not None and len(field_value) > 0:
+                        description += f"**{field}**: {field_value}\n"
+                # Create the finding object
                 findings.append(
                     Finding(
-                        title=Title,
+                        title=title,
                         description=description,
-                        severity=Severity.lower().capitalize(),
+                        severity=severity.lower().capitalize(),
                         static_finding=False,
                         dynamic_finding=True,
-                        mitigation=row.get("Remediation Recommendation"),
+                        mitigation=mitigation,
                         test=test,
                     ),
                 )
         return findings
+
+
+class WizParserByDetailedName:
+    """Parser the CSV where the "DetailedName" and "Name" fields are the match for a finding title."""
+
+    def parse_findings(self, test: Test, reader: csv.DictReader) -> list[Finding]:
+        """Parse the CSV with the assumed format of the link below.
+
+        test file: Coming soon!
+        """
+        findings = []
+        description_fields = {
+            "WizURL": "Wiz URL",
+            "HasExploit": "Has Exploit",
+            "HasCisaKevExploit": "Has Cisa Kev Exploit",
+            "LocationPath": "Location Path",
+            "Version": "Version",
+            "DetectionMethod": "Detection Method",
+            "Link": "Link",
+            "Projects": "Projects",
+            "AssetID": "Asset ID",
+            "AssetName": "Asset Name",
+            "AssetRegion": "Asset Region",
+            "ProviderUniqueId": "Provider Unique Id",
+            "CloudProviderURL": "Cloud Provider URL",
+            "CloudPlatform": "Cloud Platform",
+            "SubscriptionExternalId": "Subscription External Id",
+            "SubscriptionId": "Subscription Id",
+            "SubscriptionName": "Subscription Name",
+            "ExecutionControllers": "Execution Controllers",
+            "ExecutionControllersSubscriptionExternalIds": "Execution Controllers Subscription External Ids",
+            "ExecutionControllersSubscriptionNames": "Execution Controllers Subscription Names",
+            "OperatingSystem": "Operating System",
+            "IpAddresses": "Ip Addresses",
+        }
+        mitigation_fields = {
+            "LocationPath": "Location Path",
+            "FixedVersion": "Fixed Version",
+            "Remediation": "Remediation",
+        }
+
+        for row in reader:
+            # Common fields
+            vulnerability_id = row.get("Name")
+            package_name = row.get("DetailedName")
+            package_version = row.get("Version")
+            severity = row.get("VendorSeverity")
+            finding_id = row.get("ID")
+
+            description = self._construct_string_field(description_fields, row)
+            mitigation = self._construct_string_field(mitigation_fields, row)
+            status_dict = self._convert_status(row)
+            # Create the finding object
+            finding = Finding(
+                title=f"{package_name}: {vulnerability_id}",
+                description=description,
+                mitigation=mitigation,
+                severity=self._validate_severities(severity),
+                static_finding=True,
+                unique_id_from_tool=finding_id,
+                component_name=package_name,
+                component_version=package_version,
+                date=date_parser.parse(row.get("FirstDetected")),
+                test=test,
+                **status_dict,
+            )
+            finding.unsaved_vulnerability_ids = [vulnerability_id]
+            finding.unsaved_tags = self._parse_tags(row.get("Tags", "[]"))
+            findings.append(finding)
+        return findings
+
+    def _construct_string_field(self, fields: dict[str, str], row: dict) -> str:
+        """Construct a formatted string based on the fields dict supplied."""
+        return_string = ""
+        for field, pretty_field in fields.items():
+            if (field_value := row.get(field)) is not None and len(field_value) > 0:
+                return_string += f"**{pretty_field}**: `{field_value}`\n"
+        return return_string
+
+    def _parse_tags(self, tags: str) -> list[str]:
+        """parse the Tag string dict, and convert to a list of strings.
+
+        The format of the tags is is "{""key"":""value""}" format
+        """
+        # Convert the string to a dict
+        tag_dict = json.loads(tags)
+        return [f"{key}: {value}" for key, value in tag_dict.items()]
+
+    def _validate_severities(self, severity: str) -> str:
+        """Ensure the supplied severity fits what DefectDojo is expecting."""
+        if severity not in SEVERITIES:
+            logger.error(f"Severity is not supported: {severity}")
+            # Default to Info severity
+            return "Info"
+        return severity
+
+    def _convert_status(self, row: dict) -> dict:
+        """Convert the "FindingStatus" column to a dict of Finding statuses.
+
+        - Open-> Active = True
+        - Other statuses that may exist...
+        """
+        if (status := row.get("FindingStatus")) is not None:
+            if status == "Open":
+                return {"active": True}
+        # Return the default status of active
+        return {"active": True}
+
+
+class WizParser(
+    WizParserByTitle,
+    WizParserByDetailedName,
+):
+    def get_scan_types(self):
+        return ["Wiz Scan"]
+
+    def get_label_for_scan_types(self, scan_type):
+        return "Wiz Scan"
+
+    def get_description_for_scan_types(self, scan_type):
+        return "Wiz scan results in csv file format."
+
+    def get_findings(self, filename, test):
+        content = filename.read()
+        if isinstance(content, bytes):
+            content = content.decode("utf-8")
+        csv.field_size_limit(int(sys.maxsize / 10))  # the request/resp are big
+        reader = csv.DictReader(io.StringIO(content))
+        # Determine which parser to use
+        if "Title" in reader.fieldnames:
+            return WizParserByTitle().parse_findings(test, reader)
+        if all(field in reader.fieldnames for field in ["Name", "DetailedName"]):
+            return WizParserByDetailedName().parse_findings(test, reader)
+        else:
+            msg = "This CSV format of Wiz is not supported"
+            raise ValueError(msg)
diff --git a/unittests/scans/wiz/sca_format.csv b/unittests/scans/wiz/sca_format.csv
@@ -0,0 +1,6 @@
+ID,WizURL,Name,CVSSSeverity,HasExploit,HasCisaKevExploit,FindingStatus,VendorSeverity,FirstDetected,LastDetected,ResolvedAt,ResolutionReason,Remediation,LocationPath,DetailedName,Version,FixedVersion,DetectionMethod,Link,Projects,AssetID,AssetName,AssetRegion,ProviderUniqueId,CloudProviderURL,CloudPlatform,Status,SubscriptionExternalId,SubscriptionId,SubscriptionName,Tags,ExecutionControllers,ExecutionControllersSubscriptionExternalIds,ExecutionControllersSubscriptionNames,CriticalRelatedIssuesCount,HighRelatedIssuesCount,MediumRelatedIssuesCount,LowRelatedIssuesCount,InfoRelatedIssuesCount,OperatingSystem,IpAddresses
+3ea99668-721f-11ef-94e9-bb8aaa280b32,https://app.wiz.io/explorer/vulnerability-findings#~(entity~(~'3ea99668-721f-11ef-94e9-bb8aaa280b32*2cSECURITY_TOOL_FINDING)),CVE-2024-39474,Low,false,false,Open,Low,2024-08-28T03:02:01Z,2024-08-07T12:27:01Z,,,go get -u github.com/containerd/containerd,/home/kubernetes/bin/containerd-gcfs-grpc,github.com/containerd/containerd,1.4.0,1.5.11,Library,https://github.com/advisories/GHSA-c9cp-9c75-9v8c,MyProject; Ops_k8s,a2ca5de0-7223-11ef-af4c-032d5da2afbf,k8s-bigcorp-default-zone-0dd0efbc-46n6,us-central1,https://www.googleapis.com/compute/v1/projects/big-corp/zones/us-central1-f/instances/k8s-bigcorp-default-zone-0dd0efbc-46n6,https://console.cloud.google.com/compute/instancesDetail/zones/us-central1-f/instances/k8s-bigcorp-default-zone-0dd0efbc-46n6?project=MyProject,GCP,Active,Big-Corp,11edc986-d649-500f-ae57-01eb6e2e42d8,Big-Corp,"{""k8s-myproject-public-117f7861-node"":""k8s-myproject-public-117f7861-node"",""goog-k8s-cost-mgnt"":"""",""goog-gke-node"":"""",""goog-k8s-cluster-location"":""us-central1"",""goog-k8s-cluster-name"":""myproject-public"",""goog-k8s-node-pool-name"":""default-pool""}",[],,,0,0,0,0,0,Linux,10.110.208.6
+3ea9e0fa-721f-11ef-9fd4-c3ec645f4a2e,https://app.wiz.io/explorer/vulnerability-findings#~(entity~(~'3ea9e0fa-721f-11ef-9fd4-c3ec645f4a2e*2cSECURITY_TOOL_FINDING)),CVE-2024-36891,Medium,false,false,Open,Medium,2024-08-28T03:02:01Z,2024-08-07T12:27:11Z,,,go get -u k8s.io/apimachinery,/home/kubernetes/bin/log-counter,k8s.io/apimachinery,0.17.2,0.17.9,Library,https://github.com/advisories/GHSA-33c5-9fx5-fvjm,MyProject; Ops_k8s,d44b822c-7223-11ef-abf8-d719c8dbc602,k8s-bigcorp-default-zone-a0930315-p21b,us-central1,https://www.googleapis.com/compute/v1/projects/big-corp/zones/us-central1-c/instances/k8s-bigcorp-default-zone-a0930315-p21b,https://console.cloud.google.com/compute/instancesDetail/zones/us-central1-c/instances/k8s-bigcorp-default-zone-a0930315-p21b?project=MyProject,GCP,Active,Big-Corp,11edc986-d649-500f-ae57-01eb6e2e42d8,Big-Corp,"{""k8s-myproject-public-117f7861-node"":""k8s-myproject-public-117f7861-node"",""goog-k8s-cost-mgnt"":"""",""goog-gke-node"":"""",""goog-k8s-cluster-location"":""us-central1"",""goog-k8s-cluster-name"":""myproject-public"",""goog-k8s-node-pool-name"":""default-pool""}",[],,,0,0,0,0,0,Linux,10.110.208.15
+3eaa0422-721f-11ef-b00a-c772bc38a673,https://app.wiz.io/explorer/vulnerability-findings#~(entity~(~'3eaa0422-721f-11ef-b00a-c772bc38a673*2cSECURITY_TOOL_FINDING)),GHSA-c9cp-9c75-9v8c,Medium,false,false,Open,Medium,2024-08-28T05:52:53Z,2024-08-07T12:27:01Z,,,,,kernel,109.17800.218.33,109.17800.218.88,OS,https://cloud.google.com/container-optimized-os/docs/release-notes/m109#cos-109-17800-218-88_,MyProject; Ops_k8s,a2ca5de0-7223-11ef-af4c-032d5da2afbf,k8s-bigcorp-default-zone-0dd0efbc-46n6,us-central1,https://www.googleapis.com/compute/v1/projects/big-corp/zones/us-central1-f/instances/k8s-bigcorp-default-zone-0dd0efbc-46n6,https://console.cloud.google.com/compute/instancesDetail/zones/us-central1-f/instances/k8s-bigcorp-default-zone-0dd0efbc-46n6?project=MyProject,GCP,Active,Big-Corp,11edc986-d649-500f-ae57-01eb6e2e42d8,Big-Corp,"{""k8s-myproject-public-117f7861-node"":""k8s-myproject-public-117f7861-node"",""goog-k8s-cost-mgnt"":"""",""goog-gke-node"":"""",""goog-k8s-cluster-location"":""us-central1"",""goog-k8s-cluster-name"":""myproject-public"",""goog-k8s-node-pool-name"":""default-pool""}",[],,,0,0,0,0,0,Linux,10.110.208.6
+3ea965b2-721f-11ef-8a61-83124251d18e,https://app.wiz.io/explorer/vulnerability-findings#~(entity~(~'3ea965b2-721f-11ef-8a61-83124251d18e*2cSECURITY_TOOL_FINDING)),CVE-2020-8559,Medium,false,false,Open,Medium,2024-08-28T03:02:01Z,2024-08-07T12:28:33Z,,,,,kernel,109.17800.218.33,109.17800.218.83,OS,https://cloud.google.com/container-optimized-os/docs/release-notes/m109#cos-109-17800-218-83_,MyProject; Ops_k8s,045832bc-7224-11ef-baf5-2fe1c91669e7,k8s-bigcorp-default-zone-2a743c24-8wcy,us-central1,https://www.googleapis.com/compute/v1/projects/big-corp/zones/us-central1-b/instances/k8s-bigcorp-default-zone-2a743c24-8wcy,https://console.cloud.google.com/compute/instancesDetail/zones/us-central1-b/instances/k8s-bigcorp-default-zone-2a743c24-8wcy?project=MyProject,GCP,Active,Big-Corp,11edc986-d649-500f-ae57-01eb6e2e42d8,Big-Corp,"{""k8s-myproject-public-117f7861-node"":""k8s-myproject-public-117f7861-node"",""goog-k8s-cost-mgnt"":"""",""goog-gke-node"":"""",""goog-k8s-cluster-location"":""us-central1"",""goog-k8s-cluster-name"":""myproject-public"",""goog-k8s-node-pool-name"":""default-pool""}",[],,,0,0,0,0,0,Linux,10.110.208.233
+3ea9bdfa-721f-11ef-8722-2fdfd8ca32bc,https://app.wiz.io/explorer/vulnerability-findings#~(entity~(~'3ea9bdfa-721f-11ef-8722-2fdfd8ca32bc*2cSECURITY_TOOL_FINDING)),CVE-2024-36891,Medium,false,false,Open,Medium,2024-08-28T03:02:01Z,2024-08-07T12:28:33Z,,,,,kernel,109.17800.218.33,109.17800.218.88,OS,https://cloud.google.com/container-optimized-os/docs/release-notes/m109#cos-109-17800-218-88_,MyProject; Ops_k8s,045832bc-7224-11ef-baf5-2fe1c91669e7,k8s-bigcorp-default-zone-2a743c24-8wcy,us-central1,https://www.googleapis.com/compute/v1/projects/big-corp/zones/us-central1-b/instances/k8s-bigcorp-default-zone-2a743c24-8wcy,https://console.cloud.google.com/compute/instancesDetail/zones/us-central1-b/instances/k8s-bigcorp-default-zone-2a743c24-8wcy?project=MyProject,GCP,Active,Big-Corp,11edc986-d649-500f-ae57-01eb6e2e42d8,Big-Corp,"{""k8s-myproject-public-117f7861-node"":""k8s-myproject-public-117f7861-node"",""goog-k8s-cost-mgnt"":"""",""goog-gke-node"":"""",""goog-k8s-cluster-location"":""us-central1"",""goog-k8s-cluster-name"":""myproject-public"",""goog-k8s-node-pool-name"":""default-pool""}",[],,,0,0,0,0,0,Linux,10.110.208.233
diff --git a/unittests/tools/test_wiz_parser.py b/unittests/tools/test_wiz_parser.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-1a74292fc58b2bd05c763c8c126b0b35888e2a6f8ef9ab2588bb6c8589987c9c`
	`1`	`+702d74c8bc703d11c03cf5b3f7c4319ad0cdeaef68db6426d1112c59e59365a6`