diff --git a/unittests/dojo_test_case.py b/unittests/dojo_test_case.py index 5be0e1a5e3e..4818dd798ce 100644 --- a/unittests/dojo_test_case.py +++ b/unittests/dojo_test_case.py @@ -499,7 +499,7 @@ def __init__(self, *args, **kwargs): def login_as_admin(self): testuser = self.get_test_admin() - token = Token.objects.get(user=testuser) + token, _ = Token.objects.get_or_create(user=testuser) self.client = APIClient() self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key) diff --git a/unittests/scans/sarif/bash-report-subset-same-hash-code-same-unique-id.sarif b/unittests/scans/sarif/bash-report-subset-same-hash-code-same-unique-id.sarif new file mode 100644 index 00000000000..2482ebd6888 --- /dev/null +++ b/unittests/scans/sarif/bash-report-subset-same-hash-code-same-unique-id.sarif @@ -0,0 +1,405 @@ +{ + "runs": [ + { + "tool": { + "driver": { + "name": "Shell Script Analysis", + "rules": [ + { + "id": "2076", + "help": { + "text": "Don't quote right-hand side of =~, it'll match literally rather than as a regex.", + "markdown": "Don't quote right-hand side of =~, it'll match literally rather than as a regex." + }, + "name": "", + "properties": { + "tags": [ + "Scan" + ], + "precision": "high" + }, + "defaultConfiguration": { + "level": "note" + }, + "fullDescription": { + "text": "Don't quote right-hand side of =~, it'll match literally rather than as a regex." + }, + "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC2076", + "shortDescription": { + "text": "Don't quote right-hand side of =~, it'll match literally rather than as a regex." + } + }, + { + "id": "2071", + "help": { + "text": "> is for string comparisons. Use -gt instead.", + "markdown": "> is for string comparisons. Use -gt instead." + }, + "name": "", + "properties": { + "tags": [ + "Scan" + ], + "precision": "high" + }, + "defaultConfiguration": { + "level": "note" + }, + "fullDescription": { + "text": "> is for string comparisons. Use -gt instead." + }, + "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC2071", + "shortDescription": { + "text": "> is for string comparisons" + } + }, + { + "id": "2072", + "help": { + "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare.", + "markdown": "Decimals are not supported. Either use integers only, or use bc or awk to compare." + }, + "name": "", + "properties": { + "tags": [ + "Scan" + ], + "precision": "high" + }, + "defaultConfiguration": { + "level": "note" + }, + "fullDescription": { + "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare." + }, + "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC2072", + "shortDescription": { + "text": "Decimals are not supported" + } + }, + { + "id": "2077", + "help": { + "text": "You need spaces around the comparison operator.", + "markdown": "You need spaces around the comparison operator." + }, + "name": "", + "properties": { + "tags": [ + "Scan" + ], + "precision": "high" + }, + "defaultConfiguration": { + "level": "note" + }, + "fullDescription": { + "text": "You need spaces around the comparison operator." + }, + "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC2077", + "shortDescription": { + "text": "You need spaces around the comparison operator." + } + }, + { + "id": "1035", + "help": { + "text": "You are missing a required space here.", + "markdown": "You are missing a required space here." + }, + "name": "", + "properties": { + "tags": [ + "Scan" + ], + "precision": "high" + }, + "defaultConfiguration": { + "level": "note" + }, + "fullDescription": { + "text": "You are missing a required space here." + }, + "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC1035", + "shortDescription": { + "text": "You are missing a required space here." + } + } + ], + "version": "1.0.0-scan", + "fullName": "Shell Script Analysis" + } + }, + "conversion": { + "tool": { + "driver": { + "name": "@ShiftLeft/sast-scan" + } + }, + "invocation": { + "arguments": [ + "-a", + "--shell=bash", + "-f", + "json", + "-S", + "error", + "--color=never", + "/app/legacy-setup.bash", + "/app/test.sh", + "/app/upgrade.bash", + "/app/entrypoint_scripts/os/ubuntu.sh", + "/app/entrypoint_scripts/os/linux.sh", + "/app/entrypoint_scripts/common/config-vars.sh", + "/app/entrypoint_scripts/common/install-dojo.sh", + "/app/entrypoint_scripts/common/common-os.sh", + "/app/entrypoint_scripts/common/dojo-shared-resources.sh", + "/app/entrypoint_scripts/common/cmd-args.sh", + "/app/entrypoint_scripts/common/prompt.sh", + "/app/entrypoint_scripts/run/startup-docker.bash", + "/app/entrypoint_scripts/run/run-local-dojo.bash", + "/app/setup/setup.bash", + "/app/setup/upgrade.bash", + "/app/setup/scripts/os/ubuntu.sh", + "/app/setup/scripts/os/linux.sh", + "/app/setup/scripts/common/config-vars.sh", + "/app/setup/scripts/common/install-dojo.sh", + "/app/setup/scripts/common/common-os.sh", + "/app/setup/scripts/common/dojo-shared-resources.sh", + "/app/setup/scripts/common/cmd-args.sh", + "/app/setup/scripts/common/prompt.sh", + "/app/setup/scripts/run/startup-docker.bash", + "/app/setup/scripts/run/run-local-dojo.bash", + "/app/docker/entrypoint-uwsgi-dev.sh", + "/app/docker/entrypoint.sh", + "/app/docker/entrypoint-uwsgi.sh", + "/app/docker/entrypoint-uwsgi-ptvsd.sh", + "/app/docker/wait-for-it.sh", + "/app/docker/entrypoint-celery.sh", + "/app/docker/entrypoint-unit-tests.sh", + "/app/docker/entrypoint-nginx.sh", + "/app/docker/dojo-data.bash", + "/app/docker/entrypoint-unit-tests-devDocker.sh", + "/app/docker/setEnv.sh", + "/app/docker/entrypoint-celery-worker.sh", + "/app/docker/entrypoint-initializer.sh", + "/app/docker/entrypoint-celery-beat.sh", + "/app/docker/entrypoint-integration-tests.sh", + "/app/docker/unit-tests.sh" + ], + "executionSuccessful": true, + "commandLine": "-a --shell=bash -f json -S error --color=never /app/legacy-setup.bash /app/test.sh /app/upgrade.bash /app/entrypoint_scripts/os/ubuntu.sh /app/entrypoint_scripts/os/linux.sh /app/entrypoint_scripts/common/config-vars.sh /app/entrypoint_scripts/common/install-dojo.sh /app/entrypoint_scripts/common/common-os.sh /app/entrypoint_scripts/common/dojo-shared-resources.sh /app/entrypoint_scripts/common/cmd-args.sh /app/entrypoint_scripts/common/prompt.sh /app/entrypoint_scripts/run/startup-docker.bash /app/entrypoint_scripts/run/run-local-dojo.bash /app/setup/setup.bash /app/setup/upgrade.bash /app/setup/scripts/os/ubuntu.sh /app/setup/scripts/os/linux.sh /app/setup/scripts/common/config-vars.sh /app/setup/scripts/common/install-dojo.sh /app/setup/scripts/common/common-os.sh /app/setup/scripts/common/dojo-shared-resources.sh /app/setup/scripts/common/cmd-args.sh /app/setup/scripts/common/prompt.sh /app/setup/scripts/run/startup-docker.bash /app/setup/scripts/run/run-local-dojo.bash /app/docker/entrypoint-uwsgi-dev.sh /app/docker/entrypoint.sh /app/docker/entrypoint-uwsgi.sh /app/docker/entrypoint-uwsgi-ptvsd.sh /app/docker/wait-for-it.sh /app/docker/entrypoint-celery.sh /app/docker/entrypoint-unit-tests.sh /app/docker/entrypoint-nginx.sh /app/docker/dojo-data.bash /app/docker/entrypoint-unit-tests-devDocker.sh /app/docker/setEnv.sh /app/docker/entrypoint-celery-worker.sh /app/docker/entrypoint-initializer.sh /app/docker/entrypoint-celery-beat.sh /app/docker/entrypoint-integration-tests.sh /app/docker/unit-tests.sh", + "endTimeUtc": "2021-03-08T15:39:40Z", + "workingDirectory": { + "uri": "file:///home/damien/dd" + } + } + }, + "invocations": [ + { + "executionSuccessful": true, + "endTimeUtc": "2021-03-08T15:39:40Z", + "workingDirectory": { + "uri": "file:///home/damien/dd" + } + } + ], + "properties": { + "metrics": { + "total": 27, + "critical": 0, + "high": 0, + "medium": 0, + "low": 27 + } + }, + "results": [ + { + "message": { + "markdown": "", + "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare." + }, + "level": "note", + "locations": [ + { + "physicalLocation": { + "region": { + "snippet": { + "text": " if [[ \"$PYV\"<\"2.7\" ]]; then\n" + }, + "startLine": 143 + }, + "artifactLocation": { + "uri": "file:///home/damien/dd/entrypoint_scripts/common/dojo-shared-resources.sh" + }, + "contextRegion": { + "snippet": { + "text": " PYV=`python -c \"import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)\";`\n if [[ \"$PYV\"<\"2.7\" ]]; then\n" + }, + "endLine": 143, + "startLine": 142 + } + } + } + ], + "properties": { + "issue_confidence": "MEDIUM", + "issue_severity": "LOW", + "issue_tags": {} + }, + "baselineState": "new", + "partialFingerprints": { + "scanPrimaryLocationHash": "4d655189c485c086", + "scanFileHash": "4ee28649c65c392d" + }, + "ruleId": "2072", + "ruleIndex": 2 + }, + { + "message": { + "markdown": "", + "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare." + }, + "level": "note", + "locations": [ + { + "physicalLocation": { + "region": { + "snippet": { + "text": " if [[ \"$PYV\"<\"2.7\" ]]; then\n" + }, + "startLine": 142 + }, + "artifactLocation": { + "uri": "file:///home/damien/dd/setup/scripts/common/dojo-shared-resources.sh" + }, + "contextRegion": { + "snippet": { + "text": " PYV=`python -c \"import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)\";`\n if [[ \"$PYV\"<\"2.7\" ]]; then\n" + }, + "endLine": 143, + "startLine": 141 + } + } + } + ], + "properties": { + "issue_confidence": "MEDIUM", + "issue_severity": "LOW", + "issue_tags": {} + }, + "baselineState": "new", + "partialFingerprints": { + "scanPrimaryLocationHash": "4d655189c485c086", + "scanFileHash": "4ee28649c65c392d" + }, + "ruleId": "2072", + "ruleIndex": 2 + }, + { + "message": { + "markdown": "", + "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare." + }, + "level": "note", + "locations": [ + { + "physicalLocation": { + "region": { + "snippet": { + "text": " if [[ \"$PYV\"<\"2.7\" ]]; then\n" + }, + "startLine": 143 + }, + "artifactLocation": { + "uri": "file:///home/damien/dd/entrypoint_scripts/common/dojo-shared-resources.sh" + }, + "contextRegion": { + "snippet": { + "text": " PYV=`python -c \"import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)\";`\n if [[ \"$PYV\"<\"2.7\" ]]; then\n" + }, + "endLine": 143, + "startLine": 142 + } + } + } + ], + "properties": { + "issue_confidence": "MEDIUM", + "issue_severity": "LOW", + "issue_tags": {} + }, + "baselineState": "new", + "partialFingerprints": { + "scanPrimaryLocationHash": "4d655189c485c086", + "scanFileHash": "4ee28649c65c392d" + }, + "ruleId": "2072", + "ruleIndex": 2 + }, + { + "message": { + "markdown": "", + "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare." + }, + "level": "note", + "locations": [ + { + "physicalLocation": { + "region": { + "snippet": { + "text": " if [[ \"$PYV\"<\"2.7\" ]]; then\n" + }, + "startLine": 143 + }, + "artifactLocation": { + "uri": "file:///home/damien/dd/entrypoint_scripts/common/dojo-shared-resources.sh" + }, + "contextRegion": { + "snippet": { + "text": " PYV=`python -c \"import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)\";`\n if [[ \"$PYV\"<\"2.7\" ]]; then\n" + }, + "endLine": 143, + "startLine": 143 + } + } + } + ], + "properties": { + "issue_confidence": "MEDIUM", + "issue_severity": "LOW", + "issue_tags": {} + }, + "baselineState": "new", + "partialFingerprints": { + "scanPrimaryLocationHash": "4d655189c485c086", + "scanFileHash": "4ee28649c65c392d" + }, + "ruleId": "2072", + "ruleIndex": 2 + } + ], + "automationDetails": { + "description": { + "text": "Static Analysis Security Test results using @ShiftLeft/sast-scan" + }, + "guid": "70d0f865-f0e4-406c-8837-40852afccaeb" + }, + "versionControlProvenance": [ + { + "branch": "dev", + "repositoryUri": "https://github.com/damiencarol/django-DefectDojo", + "revisionId": "288c68d1ba1f35ebeff1d1bdb032186a23f0ea5b" + } + ] + } + ], + "version": "2.1.0", + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", + "inlineExternalProperties": [ + { + "guid": "70d0f865-f0e4-406c-8837-40852afccaeb", + "runGuid": "fbb1392e-e657-4572-ac07-0e107d1ff3f1" + } + ] +} \ No newline at end of file diff --git a/unittests/scans/zap/dvwa_baseline_dojo_subset.xml b/unittests/scans/zap/dvwa_baseline_dojo_subset.xml new file mode 100644 index 00000000000..c5819dc97d1 --- /dev/null +++ b/unittests/scans/zap/dvwa_baseline_dojo_subset.xml @@ -0,0 +1,662 @@ + + + 10020 + 10020 + X-Frame-Options Header Not Set + X-Frame-Options Header Not Set + 2 + 2 + Medium (Medium) + <p>X-Frame-Options header is not included in the HTTP response to protect against 'ClickJacking' attacks.</p> + + + http://172.17.0.2/vulnerabilities/brute/ + GET + X-Frame-Options + + + http://172.17.0.2/vulnerabilities/sqli_blind/ + GET + X-Frame-Options + + + http://172.17.0.2/vulnerabilities/exec/ + GET + X-Frame-Options + + + http://172.17.0.2/instructions.php + GET + X-Frame-Options + + + http://172.17.0.2/ + GET + X-Frame-Options + + + http://172.17.0.2/setup.php + GET + X-Frame-Options + + + http://172.17.0.2/vulnerabilities/upload/ + GET + X-Frame-Options + + + http://172.17.0.2/vulnerabilities/csrf/ + GET + X-Frame-Options + + + http://172.17.0.2/vulnerabilities/sqli/ + GET + X-Frame-Options + + + http://172.17.0.2/vulnerabilities/fi/?page=include.php + GET + X-Frame-Options + + + http://172.17.0.2/vulnerabilities/captcha/ + GET + X-Frame-Options + + + 11 + <p>Most modern Web browsers support the X-Frame-Options HTTP header. Ensure it's set on all web pages returned by your site (if you expect the page to be framed only by pages on your server (e.g. it's part of a FRAMESET) then you'll want to use SAMEORIGIN, otherwise if you never expect the page to be framed, you should use DENY. Alternatively consider implementing Content Security Policy's "frame-ancestors" directive. </p> + <p>https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options</p> + 16 + 15 + 3 + + + 10038 + 10038 + Content Security Policy (CSP) Header Not Set + Content Security Policy (CSP) Header Not Set + 2 + 3 + Medium (High) + <p>Content Security Policy (CSP) is an added layer of security that helps to detect and mitigate certain types of attacks, including Cross Site Scripting (XSS) and data injection attacks. These attacks are used for everything from data theft to site defacement or distribution of malware. CSP provides a set of standard HTTP headers that allow website owners to declare approved sources of content that browsers should be allowed to load on that page — covered types are JavaScript, CSS, HTML frames, fonts, images and embeddable objects such as Java applets, ActiveX, audio and video files.</p> + + + http://172.17.0.2/vulnerabilities/fi/?page=include.php + GET + + + http://172.17.0.2/instructions.php + GET + + + http://172.17.0.2/vulnerabilities/sqli/ + GET + + + http://172.17.0.2/vulnerabilities/exec/ + GET + + + http://172.17.0.2/sitemap.xml + GET + + + http://172.17.0.2/setup.php + GET + + + http://172.17.0.2/ + GET + + + http://172.17.0.2/vulnerabilities/captcha/ + GET + + + http://172.17.0.2/vulnerabilities/upload/ + GET + + + http://172.17.0.2/vulnerabilities/brute/ + GET + + + http://172.17.0.2/vulnerabilities/csrf/ + GET + + + 11 + <p>Ensure that your web server, application server, load balancer, etc. is configured to set the Content-Security-Policy header, to achieve optimal browser support: "Content-Security-Policy" for Chrome 25+, Firefox 23+ and Safari 7+, "X-Content-Security-Policy" for Firefox 4.0+ and Internet Explorer 10+, and "X-WebKit-CSP" for Chrome 14+ and Safari 6+.</p> + <p>https://developer.mozilla.org/en-US/docs/Web/Security/CSP/Introducing_Content_Security_Policy</p><p>https://cheatsheetseries.owasp.org/cheatsheets/Content_Security_Policy_Cheat_Sheet.html</p><p>http://www.w3.org/TR/CSP/</p><p>http://w3c.github.io/webappsec/specs/content-security-policy/csp-specification.dev.html</p><p>http://www.html5rocks.com/en/tutorials/security/content-security-policy/</p><p>http://caniuse.com/#feat=contentsecuritypolicy</p><p>http://content-security-policy.com/</p> + 16 + 15 + 3 + + + 10108 + 10108 + Reverse Tabnabbing + Reverse Tabnabbing + 2 + 2 + Medium (Medium) + <p>At least one link on this page is vulnerable to Reverse tabnabbing as it uses a target attribute without using both of the "noopener" and "noreferrer" keywords in the "rel" attribute, which allows the target page to take control of this page.</p> + + + http://172.17.0.2/vulnerabilities/brute/ + GET + <a href="https://www.owasp.org/index.php/Testing_for_Brute_Force_(OWASP-AT-004)" target="_blank">https://www.owasp.org/index.php/Testing_for_Brute_Force_(OWASP-AT-004)</a> + + + http://172.17.0.2/ + GET + <a href="https://www.virtualbox.org/" target="_blank">VirtualBox</a> + + + http://172.17.0.2/vulnerabilities/sqli/ + GET + <a href="http://www.securiteam.com/securityreviews/5DP0N1P76E.html" target="_blank">http://www.securiteam.com/securityreviews/5DP0N1P76E.html</a> + + + http://172.17.0.2/vulnerabilities/sqli_blind/ + GET + <a href="http://www.securiteam.com/securityreviews/5DP0N1P76E.html" target="_blank">http://www.securiteam.com/securityreviews/5DP0N1P76E.html</a> + + + http://172.17.0.2/vulnerabilities/xss_d/ + GET + <a href="https://www.owasp.org/index.php/Cross-site_Scripting_(XSS)" target="_blank">https://www.owasp.org/index.php/Cross-site_Scripting_(XSS)</a> + + + http://172.17.0.2/instructions.php + GET + <a href="https://www.virtualbox.org/" target="_blank">https://www.virtualbox.org/</a> + + + http://172.17.0.2/vulnerabilities/csrf/ + GET + <a href="https://www.owasp.org/index.php/Cross-Site_Request_Forgery" target="_blank">https://www.owasp.org/index.php/Cross-Site_Request_Forgery</a> + + + http://172.17.0.2/vulnerabilities/upload/ + GET + <a href="https://www.owasp.org/index.php/Unrestricted_File_Upload" target="_blank">https://www.owasp.org/index.php/Unrestricted_File_Upload</a> + + + http://172.17.0.2/vulnerabilities/fi/?page=include.php + GET + <a href="https://en.wikipedia.org/wiki/Remote_File_Inclusion" target="_blank">https://en.wikipedia.org/wiki/Remote_File_Inclusion</a> + + + http://172.17.0.2/vulnerabilities/captcha/ + GET + <a href="https://www.google.com/recaptcha/admin/create" target="_blank">https://www.google.com/recaptcha/admin/create</a> + + + http://172.17.0.2/vulnerabilities/exec/ + GET + <a href="http://www.scribd.com/doc/2530476/Php-Endangers-Remote-Code-Execution" target="_blank">http://www.scribd.com/doc/2530476/Php-Endangers-Remote-Code-Execution</a> + + + 11 + <p>Do not use a target attribute, or if you have to then also add the attribute: rel="noopener noreferrer".</p> + <p>https://owasp.org/www-community/attacks/Reverse_Tabnabbing</p><p>https://dev.to/ben/the-targetblank-vulnerability-by-example</p><p>https://mathiasbynens.github.io/rel-noopener/</p><p>https://medium.com/@jitbit/target-blank-the-most-underestimated-vulnerability-ever-96e328301f4c</p> + 3 + + + 10096 + 10096 + Timestamp Disclosure - Unix + Timestamp Disclosure - Unix + 0 + 1 + Informational (Low) + <p>A timestamp was disclosed by the application/web server - Unix</p> + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1019803690 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1839030562 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 909522486 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 722521979 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 40341101 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1309151649 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1732584194 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 405537848 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1894986606 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1473231341 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 155497632 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1990404162 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1700485571 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1069501632 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 38016083 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 2022574463 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 373897302 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1163531501 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 643717713 + + + http://172.17.0.2/vulnerabilities/javascript/ + GET + 1444681467 + + + 67 + <p>Manually confirm that the timestamp data is not sensitive, and that the data cannot be aggregated to disclose exploitable patterns.</p> + <p>1019803690, which evaluates to: 2002-04-26 06:48:10</p> + <p>http://projects.webappsec.org/w/page/13246936/Information%20Leakage</p> + 200 + 13 + 3 + + + 10036 + 10036 + Server Leaks Version Information via "Server" HTTP Response Header Field + Server Leaks Version Information via "Server" HTTP Response Header Field + 1 + 3 + Low (High) + <p>The web/application server is leaking version information via the "Server" HTTP response header. Access to such information may facilitate attackers identifying other vulnerabilities your web/application server is subject to.</p> + + + http://172.17.0.2/vulnerabilities/brute/ + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/vulnerabilities/captcha/ + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/vulnerabilities/csrf/ + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/instructions.php + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/ + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/setup.php + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/vulnerabilities/upload/ + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/robots.txt + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/sitemap.xml + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/vulnerabilities/fi/?page=include.php + GET + Apache/2.4.25 (Debian) + + + http://172.17.0.2/vulnerabilities/exec/ + GET + Apache/2.4.25 (Debian) + + + 11 + <p>Ensure that your web server, application server, load balancer, etc. is configured to suppress the "Server" header or provide generic details.</p> + <p>http://httpd.apache.org/docs/current/mod/core.html#servertokens</p><p>http://msdn.microsoft.com/en-us/library/ff648552.aspx#ht_urlscan_007</p><p>http://blogs.msdn.com/b/varunm/archive/2013/04/23/remove-unwanted-http-response-headers.aspx</p><p>http://www.troyhunt.com/2012/02/shhh-dont-let-your-response-headers.html</p> + 200 + 13 + 3 + + + 10202 + 10202 + Absence of Anti-CSRF Tokens + Absence of Anti-CSRF Tokens + 1 + 2 + Low (Medium) + <p>No Anti-CSRF tokens were found in a HTML submission form.</p><p>A cross-site request forgery is an attack that involves forcing a victim to send an HTTP request to a target destination without their knowledge or intent in order to perform an action as the victim. The underlying cause is application functionality using predictable URL/form actions in a repeatable way. The nature of the attack is that CSRF exploits the trust that a web site has for a user. By contrast, cross-site scripting (XSS) exploits the trust that a user has for a web site. Like XSS, CSRF attacks are not necessarily cross-site, but they can be. Cross-site request forgery is also known as CSRF, XSRF, one-click attack, session riding, confused deputy, and sea surf.</p><p></p><p>CSRF attacks are effective in a number of situations, including:</p><p> * The victim has an active session on the target site.</p><p> * The victim is authenticated via HTTP auth on the target site.</p><p> * The victim is on the same local network as the target site.</p><p></p><p>CSRF has primarily been used to perform an action against a target site using the victim's privileges, but recent techniques have been discovered to disclose information by gaining access to the response. The risk of information disclosure is dramatically increased when the target site is vulnerable to XSS, because XSS can be used as a platform for CSRF, allowing the attack to operate within the bounds of the same-origin policy.</p> + + + http://172.17.0.2/vulnerabilities/xss_d/ + GET + <form name="XSS" method="GET"> + + + http://172.17.0.2/vulnerabilities/captcha/ + GET + <form action="#" method="POST" style="display:none;"> + + + http://172.17.0.2/vulnerabilities/xss_s/ + GET + <form method="post" name="guestform" "> + + + http://172.17.0.2/vulnerabilities/csrf/ + GET + <form action="#" method="GET"> + + + http://172.17.0.2/setup.php + GET + <form action="#" method="post"> + + + http://172.17.0.2/vulnerabilities/brute/ + GET + <form action="#" method="GET"> + + + http://172.17.0.2/vulnerabilities/sqli/ + GET + <form action="#" method="GET"> + + + http://172.17.0.2/vulnerabilities/weak_id/ + GET + <form method="post"> + + + http://172.17.0.2/vulnerabilities/exec/ + GET + <form name="ping" action="#" method="post"> + + + http://172.17.0.2/vulnerabilities/sqli_blind/ + GET + <form action="#" method="GET"> + + + http://172.17.0.2/vulnerabilities/xss_r/ + GET + <form name="XSS" action="#" method="GET"> + + + http://172.17.0.2/vulnerabilities/upload/ + GET + <form enctype="multipart/form-data" action="#" method="POST"> + + + 12 + <p>Phase: Architecture and Design</p><p>Use a vetted library or framework that does not allow this weakness to occur or provides constructs that make this weakness easier to avoid.</p><p>For example, use anti-CSRF packages such as the OWASP CSRFGuard.</p><p></p><p>Phase: Implementation</p><p>Ensure that your application is free of cross-site scripting issues, because most CSRF defenses can be bypassed using attacker-controlled script.</p><p></p><p>Phase: Architecture and Design</p><p>Generate a unique nonce for each form, place the nonce into the form, and verify the nonce upon receipt of the form. Be sure that the nonce is not predictable (CWE-330).</p><p>Note that this can be bypassed using XSS.</p><p></p><p>Identify especially dangerous operations. When the user performs a dangerous operation, send a separate confirmation request to ensure that the user intended to perform that operation.</p><p>Note that this can be bypassed using XSS.</p><p></p><p>Use the ESAPI Session Management control.</p><p>This control includes a component for CSRF.</p><p></p><p>Do not use the GET method for any request that triggers a state change.</p><p></p><p>Phase: Implementation</p><p>Check the HTTP Referer header to see if the request originated from an expected page. This could break legitimate functionality, because users or proxies may have disabled sending the Referer for privacy reasons.</p> + <p>No known Anti-CSRF token [anticsrf, CSRFToken, __RequestVerificationToken, csrfmiddlewaretoken, authenticity_token, OWASP_CSRFTOKEN, anoncsrf, csrf_token, _csrf, _csrfSecret, __csrf_magic, CSRF] was found in the following HTML form: [Form 1: "" ].</p> + <p>http://projects.webappsec.org/Cross-Site-Request-Forgery</p><p>http://cwe.mitre.org/data/definitions/352.html</p> + 352 + 9 + 3 + + + 10031 + 10031 + User Controllable HTML Element Attribute (Potential XSS) + User Controllable HTML Element Attribute (Potential XSS) + 0 + 1 + Informational (Low) + <p>This check looks at user-supplied input in query string parameters and POST data to identify where certain HTML attribute values might be controlled. This provides hot-spot detection for XSS (cross-site scripting) that will require further review by a security analyst to determine exploitability.</p> + + + http://172.17.0.2/vulnerabilities/captcha/ + POST + Change + + + http://172.17.0.2/vulnerabilities/exec/ + POST + Submit + + + http://172.17.0.2/vulnerabilities/csrf/?Change=Change&password_conf=ZAP&password_new=ZAP + GET + Change + + + http://172.17.0.2/vulnerabilities/captcha/ + POST + Change + + + http://172.17.0.2/vulnerabilities/exec/ + POST + Submit + + + http://172.17.0.2/vulnerabilities/brute/?Login=Login&password=ZAP&username=ZAP + GET + Login + + + http://172.17.0.2/vulnerabilities/upload/ + POST + MAX_FILE_SIZE + + + http://172.17.0.2/vulnerabilities/upload/ + POST + Upload + + + http://172.17.0.2/vulnerabilities/upload/ + POST + Upload + + + http://172.17.0.2/vulnerabilities/upload/ + POST + Upload + + + http://172.17.0.2/vulnerabilities/brute/?Login=Login&password=ZAP&username=ZAP + GET + Login + + + http://172.17.0.2/vulnerabilities/csrf/?Change=Change&password_conf=ZAP&password_new=ZAP + GET + Change + + + http://172.17.0.2/vulnerabilities/exec/ + POST + Submit + + + 13 + <p>Validate all input and sanitize output it before writing to any HTML attributes.</p> + <p>User-controlled HTML attribute values were found. Try injecting special characters to see if XSS might be possible. The page at the following URL:</p><p></p><p>http://172.17.0.2/vulnerabilities/captcha/</p><p></p><p>appears to include user input in: </p><p></p><p>a(n) [input] tag [name] attribute </p><p></p><p>The user input found was:</p><p>Change=Change</p><p></p><p>The user-controlled value was:</p><p>change</p> + <p>http://websecuritytool.codeplex.com/wikipage?title=Checks#user-controlled-html-attribute</p> + 20 + 20 + 3 + + + 10024 + 10024 + Information Disclosure - Sensitive Information in URL + Information Disclosure - Sensitive Information in URL + 0 + 2 + Informational (Medium) + <p>The request appeared to contain sensitive information leaked in the URL. This can violate PCI and most organizational compliance policies. You can configure the list of strings for this check to add or remove values specific to your environment.</p> + + + http://172.17.0.2/vulnerabilities/brute/?Login=Login&password=ZAP&username=ZAP + GET + username + username + + + http://172.17.0.2/vulnerabilities/csrf/?Change=Change&password_conf=ZAP&password_new=ZAP + GET + password_conf + password_conf + + + http://172.17.0.2/vulnerabilities/brute/?Login=Login&password=ZAP&username=ZAP + GET + password + password + + + http://172.17.0.2/vulnerabilities/csrf/?Change=Change&password_conf=ZAP&password_new=ZAP + GET + password_new + password_new + + + 4 + <p>Do not pass sensitive information in URIs.</p> + <p>The URL contains potentially sensitive information. The following string was found via the pattern: user</p><p>username</p> + <p></p> + 200 + 13 + 3 + + + 10054 + 10054 + Cookie Without SameSite Attribute + Cookie Without SameSite Attribute + 1 + 2 + Low (Medium) + <p>A cookie has been set without the SameSite attribute, which means that the cookie can be sent as a result of a 'cross-site' request. The SameSite attribute is an effective counter measure to cross-site request forgery, cross-site script inclusion, and timing attacks.</p> + + + http://172.17.0.2/security.php + POST + + + http://172.17.0.2/security.php + POST + PHPSESSID + Set-Cookie: PHPSESSID + + + http://172.17.0.2/vulnerabilities/weak_id/ + POST + + + 3 + <p>Ensure that the SameSite attribute is set to either 'lax' or ideally 'strict' for all cookies.</p> + <p>https://tools.ietf.org/html/draft-ietf-httpbis-cookie-same-site</p> + 16 + 13 + 3 + + + 10029 + 10029 + Cookie Poisoning + Cookie Poisoning + 0 + 1 + Informational (Low) + <p>This check looks at user-supplied input in query string parameters and POST data to identify where cookie parameters might be controlled. This is called a cookie poisoning attack, and becomes exploitable when an attacker can manipulate the cookie in various ways. In some cases this will not be exploitable, however, allowing URL parameters to set cookie values is generally considered a bug.</p> + + + http://172.17.0.2/security.php + POST + security + + + 1 + <p>Do not allow user input to control cookie names and values. If some query string parameters must be set in cookie values, be sure to filter out semicolon's that can serve as name/value pair delimiters.</p> + <p>An attacker may be able to poison cookie values through POST parameters. To test if this is a more serious issue, you should try resending that request as a GET, with the POST parameter included as a query string parameter. For example: http://nottrusted.com/page?value=maliciousInput.</p><p></p><p>This was identified at:</p><p></p><p>http://172.17.0.2/security.php</p><p></p><p>User-input was found in the following cookie:</p><p>security=low</p><p></p><p>The user input was:</p><p>security=low</p> + <p>http://websecuritytool.codeplex.com/wikipage?title=Checks#user-controlled-cookie</p> + 20 + 20 + 3 + + diff --git a/unittests/test_import_reimport.py b/unittests/test_import_reimport.py index 98ce581626c..e3130cc7efc 100644 --- a/unittests/test_import_reimport.py +++ b/unittests/test_import_reimport.py @@ -10,8 +10,6 @@ from django.test.client import Client from django.urls import reverse from django.utils import timezone -from rest_framework.authtoken.models import Token -from rest_framework.test import APIClient from dojo.models import Finding, Test, Test_Type, User @@ -1751,10 +1749,7 @@ def setUp(self): testuser = User.objects.get(username="admin") testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() - - token = Token.objects.get(user=testuser) - self.client = APIClient() - self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key) + self.login_as_admin() # self.url = reverse(self.viewname + '-list') # Statistics only available in API Response @@ -2029,10 +2024,7 @@ def setUp(self): testuser = User.objects.get(username="admin") testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() - - token = Token.objects.get(user=testuser) - self.client = APIClient() - self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key) + self.login_as_admin() # self.url = reverse(self.viewname + '-list') self.client_ui = Client() diff --git a/unittests/test_importers_deduplication.py b/unittests/test_importers_deduplication.py new file mode 100644 index 00000000000..f418a624d66 --- /dev/null +++ b/unittests/test_importers_deduplication.py @@ -0,0 +1,445 @@ +import logging + +from django.contrib.contenttypes.models import ContentType + +from dojo.models import ( + Development_Environment, + Dojo_User, + Endpoint, + Endpoint_Status, + Engagement, + Finding, + Product, + Product_Type, + Test, + User, + UserContactInfo, +) + +from .dojo_test_case import DojoAPITestCase, get_unit_tests_scans_path + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + + +STACK_HAWK_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings.json" +STACK_HAWK_SUBSET_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings_subset.json" +STACK_HAWK_SCAN_TYPE = "StackHawk HawkScan" + + +class TestDojoImportersDeduplication(DojoAPITestCase): + + def setUp(self): + super().setUp() + + testuser = User.objects.create(username="admin") + testuser.is_superuser = True + testuser.is_staff = True + testuser.save() + UserContactInfo.objects.create(user=testuser, block_execution=True) + + # Authenticate API client as admin for import endpoints + self.login_as_admin() + + self.system_settings(enable_webhooks_notifications=False) + self.system_settings(enable_product_grade=False) + self.system_settings(enable_github=False) + self.system_settings(enable_deduplication=True) + + # Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation + # As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count. + # But if we run the test in isolation, the ContentType ids will not be cached and will result in more queries. + # By warming up the cache here, these queries are executed before we start counting queries + for model in [Development_Environment, Dojo_User, Endpoint, Endpoint_Status, Engagement, Finding, Product, Product_Type, User, Test]: + ContentType.objects.get_for_model(model) + + # Internal helper methods for reusable test logic + def _test_single_import_assess_duplicates(self, filename, scan_type, expected_duplicates): + """Internal method to test single import with expected duplicates""" + self.login_as_admin() + + response_json = self.import_scan_with_params( + filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=None, + product_type_name=f"PT {scan_type} Single", + product_name=f"P {scan_type} Single", + engagement_name=f"E {scan_type} Single", + auto_create_context=True, + ) + + test_id = response_json["test"] + test = Test.objects.get(id=test_id) + + # Verify expected duplicates were created + dup_count = Finding.objects.filter(test=test, duplicate=True).count() + self.assertEqual(expected_duplicates, dup_count) + + # duplicates should be sorted by id + if dup_count > 0: + for finding in test.finding_set.filter(duplicate=True): + self.assertTrue(finding.duplicate_finding.id < finding.id) + + return test_id + + def _test_full_then_subset_duplicates(self, full_filename, subset_filename, scan_type, expected_duplicates, first_import_duplicates=0): + """ + Internal method to test full scan then subset creates expected duplicates + + Args: + first_import_duplicates: Expected number of duplicates in the first import (for files with internal duplicates) + + """ + # First import: full scan + response_json = self.import_scan_with_params( + full_filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=None, + product_type_name=f"PT {scan_type} Full", + product_name=f"P {scan_type} Full", + engagement_name=f"E {scan_type} Full", + auto_create_context=True, + ) + + first_test_id = response_json["test"] + first_test = Test.objects.get(id=first_test_id) + + # Verify first import has expected duplicates (usually 0, but may have internal duplicates) + first_dup_count = Finding.objects.filter(test=first_test, duplicate=True).count() + self.assertEqual(first_import_duplicates, first_dup_count) + + # Second import: subset into the same engagement + response_json = self.import_scan_with_params( + subset_filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=first_test.engagement.id, # Same engagement ID + product_type_name=None, # Use existing + product_name=None, # Use existing + engagement_name=None, # Use existing + auto_create_context=False, + ) + + second_test_id = response_json["test"] + second_test = Test.objects.get(id=second_test_id) + + # The second test should contain expected duplicates + second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count() + self.assertEqual(expected_duplicates, second_test_dup_count) + + # Engagement should have total duplicates from both imports + total_expected_duplicates = first_import_duplicates + expected_duplicates + eng_dup_count = Finding.objects.filter(test__engagement=first_test.engagement, duplicate=True).count() + self.assertEqual(total_expected_duplicates, eng_dup_count) + + # Product should have total duplicates from both imports + prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count() + self.assertEqual(total_expected_duplicates, prod_dup_count) + + return second_test_id + + def _test_different_products_no_duplicates(self, filename, scan_type, expected_duplicates): + """Internal method to test importing into different products creates expected duplicates""" + # First import: into Product A + response_json = self.import_scan_with_params( + filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=None, + product_type_name=f"PT {scan_type} Product A", + product_name=f"P {scan_type} Product A", + engagement_name=f"E {scan_type} Product A", + auto_create_context=True, + ) + + first_test_id = response_json["test"] + first_test = Test.objects.get(id=first_test_id) + + # Verify first import has expected duplicates + first_dup_count = Finding.objects.filter(test=first_test, duplicate=True).count() + self.assertEqual(expected_duplicates, first_dup_count) + + # Second import: same scan into Product B (different product) + response_json = self.import_scan_with_params( + filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=None, + product_type_name=f"PT {scan_type} Product B", + product_name=f"P {scan_type} Product B", + engagement_name=f"E {scan_type} Product B", + auto_create_context=True, + ) + + second_test_id = response_json["test"] + second_test = Test.objects.get(id=second_test_id) + + # The second test should contain expected duplicates (different products don't deduplicate) + second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count() + self.assertEqual(expected_duplicates, second_test_dup_count) + + # First product should still have expected duplicates + first_prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count() + self.assertEqual(expected_duplicates, first_prod_dup_count) + + # Second product should have expected duplicates + second_prod_dup_count = Finding.objects.filter(test__engagement__product=second_test.engagement.product, duplicate=True).count() + self.assertEqual(expected_duplicates, second_prod_dup_count) + + return second_test_id + + def _test_same_product_different_engagements_duplicates(self, filename, scan_type, expected_duplicates): + """Internal method to test importing into same product but different engagements creates expected duplicates""" + # First import: into Engagement 1 + response_json = self.import_scan_with_params( + filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=None, + product_type_name=f"PT {scan_type} SameProd", + product_name=f"P {scan_type} SameProd", + engagement_name=f"E {scan_type} SameProd 1", + auto_create_context=True, + ) + first_test = Test.objects.get(id=response_json["test"]) + + # Second import: into Engagement 2 (same product) + response_json = self.import_scan_with_params( + filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=None, + product_type_name=None, # Use existing + product_name=f"P {scan_type} SameProd", # Same product + engagement_name=f"E {scan_type} SameProd 2", # Different engagement + auto_create_context=True, + ) + second_test = Test.objects.get(id=response_json["test"]) + + # Product should have expected duplicates total + prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count() + self.assertEqual(expected_duplicates, prod_dup_count) + + return second_test.id + + def _test_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self, filename, scan_type, expected_duplicates, first_import_duplicates=0): + """ + Internal method to test importing into same product but different engagements with dedupe_on_engagements creates expected duplicates + + Args: + expected_duplicates: Expected duplicates in second import (usually same as first for files with internal duplicates) + first_import_duplicates: Expected duplicates in first import (for files with internal duplicates) + + """ + # First import: into Engagement A + response_json = self.import_scan_with_params( + filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=None, + product_type_name=f"PT {scan_type} DedupeEng", + product_name=f"P {scan_type} DedupeEng", + engagement_name=f"E {scan_type} DedupeEng A", + auto_create_context=True, + ) + first_test = Test.objects.get(id=response_json["test"]) + + # Set deduplication_on_engagement to True for the engagement + first_test.engagement.deduplication_on_engagement = True + first_test.engagement.save() + + # Second import: into Engagement B (same product, different engagement) + response_json = self.import_scan_with_params( + filename, + scan_type=scan_type, + minimum_severity="Info", + active=True, + verified=True, + engagement=None, + product_type_name=None, # Use existing + product_name=f"P {scan_type} DedupeEng", # Same product + engagement_name=f"E {scan_type} DedupeEng B", # Different engagement + auto_create_context=True, + ) + second_test = Test.objects.get(id=response_json["test"]) + + # The second test should contain expected duplicates because deduplication_on_engagement is True + second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count() + self.assertEqual(expected_duplicates, second_test_dup_count) + + # Product should have total duplicates from both imports + total_expected_duplicates = first_import_duplicates + expected_duplicates + prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count() + self.assertEqual(total_expected_duplicates, prod_dup_count) + + return second_test.id + + # Test cases for ZAP (LEGACY algorithm) + def test_zap_single_import_no_duplicates(self): + """Test that importing ZAP scan (LEGACY algorithm) creates 0 duplicate findings""" + self._test_single_import_assess_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 0) + + def test_zap_full_then_subset_duplicates(self): + """Test that importing full ZAP scan then subset creates duplicates""" + # For now, use the same file for both full and subset since we don't have a proper subset + # This will test the same file imported twice into the same engagement + self._test_full_then_subset_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 10) + + def test_zap_different_products_no_duplicates(self): + """Test that importing ZAP scan into different products creates 0 duplicates""" + self._test_different_products_no_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 0) + + def test_zap_same_product_different_engagements_duplicates(self): + """Test that importing ZAP scan into same product but different engagements creates duplicates""" + self._test_same_product_different_engagements_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 10) + + def test_zap_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self): + """Test that importing ZAP scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates""" + self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 0) + + # Test cases for Checkmarx (UNIQUE_ID_FROM_TOOL algorithm) + def test_checkmarx_single_import_no_duplicates(self): + """Test that importing Checkmarx scan (UNIQUE_ID_FROM_TOOL algorithm) creates 0 duplicate findings""" + self._test_single_import_assess_duplicates("scans/checkmarx/multiple_findings.json", "Checkmarx Scan detailed", 0) + + def test_checkmarx_full_then_subset_duplicates(self): + """Test that importing full Checkmarx scan then subset creates duplicates""" + # For now, use the same file for both full and subset + self._test_full_then_subset_duplicates("scans/checkmarx/multiple_findings.json", "scans/checkmarx/multiple_findings.json", + "Checkmarx Scan detailed", 10) + + def test_checkmarx_different_products_no_duplicates(self): + """Test that importing Checkmarx scan into different products creates 0 duplicates""" + self._test_different_products_no_duplicates("scans/checkmarx/multiple_findings.json", "Checkmarx Scan detailed", 0) + + def test_checkmarx_same_product_different_engagements_duplicates(self): + """Test that importing Checkmarx scan into same product but different engagements creates duplicates""" + self._test_same_product_different_engagements_duplicates("scans/checkmarx/multiple_findings.json", "Checkmarx Scan detailed", 10) + + def test_checkmarx_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self): + """Test that importing Checkmarx scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates""" + self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/checkmarx/multiple_findings.json", + "Checkmarx Scan detailed", 0) + + # Test cases for Trivy (HASH_CODE algorithm) + def test_trivy_single_import_no_duplicates(self): + """Test that importing Trivy scan (HASH_CODE algorithm) creates 0 duplicate findings""" + self._test_single_import_assess_duplicates("scans/trivy/kubernetes.json", "Trivy Scan", 0) + + def test_trivy_full_then_subset_duplicates(self): + """Test that importing full Trivy scan then subset creates duplicates""" + # For now, use the same file for both full and subset + self._test_full_then_subset_duplicates("scans/trivy/kubernetes.json", "scans/trivy/kubernetes.json", "Trivy Scan", 20) + + def test_trivy_different_products_no_duplicates(self): + """Test that importing Trivy scan into different products creates 0 duplicates""" + self._test_different_products_no_duplicates("scans/trivy/kubernetes.json", "Trivy Scan", 0) + + def test_trivy_same_product_different_engagements_duplicates(self): + """Test that importing Trivy scan into same product but different engagements creates duplicates""" + self._test_same_product_different_engagements_duplicates("scans/trivy/kubernetes.json", "Trivy Scan", 20) + + def test_trivy_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self): + """Test that importing Trivy scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates""" + self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/trivy/kubernetes.json", "Trivy Scan", 0) + + # Test cases for SARIF (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm) + # The samples for SARIF is the bash report that has internal duplicates + # These are used on purpose so we capture the behaviour of import and reimport in this scenario. + def test_sarif_single_import_no_duplicates(self): + """Test that importing SARIF scan (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm) creates 0 duplicate findings""" + # bash-report.sarif has 18 internal duplicates, so we expect 18 duplicates even on first import + test_id = self._test_single_import_assess_duplicates("scans/sarif/bash-report.sarif", "SARIF", 18) + + # duplicates should be sorted by id (currently not usefull as tests are running celery tasks in the foreground) + for finding in Finding.objects.filter(test_id=test_id, duplicate=True): + self.assertTrue(finding.duplicate_finding.id < finding.id) + + def test_sarif_full_then_subset_duplicates(self): + """Test that importing full SARIF scan then subset creates duplicates""" + # For now, use the same file for both full and subset + # First import has 18 internal duplicates, second import also has 18 internal duplicates + 9 cross-import duplicates = 27 total in second test + # Total = 18 (first) + 27 (second) = 45 + self._test_full_then_subset_duplicates("scans/sarif/bash-report.sarif", "scans/sarif/bash-report.sarif", "SARIF", 27, first_import_duplicates=18) + + def test_sarif_different_products_no_duplicates(self): + """Test that importing SARIF scan into different products creates 0 duplicates""" + # bash-report.sarif has 18 internal duplicates per import + self._test_different_products_no_duplicates("scans/sarif/bash-report.sarif", "SARIF", 18) + + def test_sarif_same_product_different_engagements_duplicates(self): + """Test that importing SARIF scan into same product but different engagements creates duplicates""" + # 18 internal duplicates in first import + 18 in second import + 9 cross-import duplicates = 45 total + self._test_same_product_different_engagements_duplicates("scans/sarif/bash-report.sarif", "SARIF", 45) + + def test_sarif_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self): + """Test that importing SARIF scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates""" + # bash-report.sarif has 18 internal duplicates per import + # Second test has 18 internal duplicates (no cross-engagement duplicates due to dedupe_on_engagements=True) + # Total product duplicates = 18 (first) + 18 (second) = 36 + self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/sarif/bash-report.sarif", "SARIF", + 18, first_import_duplicates=18) + + # Test cases for Veracode (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm) + def test_veracode_single_import_no_duplicates(self): + """Test that importing Veracode scan (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm) creates 0 duplicate findings""" + self._test_single_import_assess_duplicates("scans/veracode/veracode_scan.xml", "Veracode Scan", 0) + + def test_veracode_full_then_subset_duplicates(self): + """Test that importing full Veracode scan then subset creates duplicates""" + # For now, use the same file for both full and subset + self._test_full_then_subset_duplicates("scans/veracode/veracode_scan.xml", "scans/veracode/veracode_scan.xml", "Veracode Scan", 7) + + def test_veracode_different_products_no_duplicates(self): + """Test that importing Veracode scan into different products creates 0 duplicates""" + self._test_different_products_no_duplicates("scans/veracode/veracode_scan.xml", "Veracode Scan", 0) + + def test_veracode_same_product_different_engagements_duplicates(self): + """Test that importing Veracode scan into same product but different engagements creates duplicates""" + self._test_same_product_different_engagements_duplicates("scans/veracode/veracode_scan.xml", "Veracode Scan", 7) + + def test_veracode_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self): + """Test that importing Veracode scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates""" + self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/veracode/veracode_scan.xml", "Veracode Scan", 0) + + # Test cases for StackHawk (HASH_CODE algorithm) + def test_stackhawk_single_import_no_duplicates(self): + """Test that importing StackHawk scan (HASH_CODE algorithm) creates 0 duplicate findings""" + self._test_single_import_assess_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "StackHawk HawkScan", 0) + + def test_stackhawk_full_then_subset_duplicates(self): + """Test that importing full StackHawk scan then subset creates duplicates""" + self._test_full_then_subset_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", + "scans/stackhawk/stackhawk_many_vul_without_duplicated_findings_subset.json", "StackHawk HawkScan", 5) + + def test_stackhawk_different_products_no_duplicates(self): + """Test that importing StackHawk scan into different products creates 0 duplicates""" + self._test_different_products_no_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "StackHawk HawkScan", 0) + + def test_stackhawk_same_product_different_engagements_duplicates(self): + """Test that importing StackHawk scan into same product but different engagements creates duplicates""" + self._test_same_product_different_engagements_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", + "StackHawk HawkScan", 6) + + def test_stackhawk_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self): + """Test that importing StackHawk scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates""" + self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", + "StackHawk HawkScan", 0) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 6e43f0e2d13..3b4ce357c85 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -34,9 +34,6 @@ STACK_HAWK_SUBSET_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings_subset.json" STACK_HAWK_SCAN_TYPE = "StackHawk HawkScan" -NPM_AUDIT_NO_VULN_FILENAME = get_unit_tests_scans_path("npm_audit") / "one_vuln.json" -NPM_AUDIT_SCAN_TYPE = "NPM Audit Scan" - class TestDojoImporterPerformance(DojoTestCase): @@ -49,6 +46,7 @@ def setUp(self): self.system_settings(enable_webhooks_notifications=False) self.system_settings(enable_product_grade=False) self.system_settings(enable_github=False) + self.system_settings(enable_deduplication=True) # Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation # As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count. @@ -58,7 +56,7 @@ def setUp(self): ContentType.objects.get_for_model(model) @contextmanager - def assertNumAsyncTask(self, num): + def _assertNumAsyncTask(self, num): dojo_async_task_counter.start() try: yield @@ -82,7 +80,7 @@ def assertNumAsyncTask(self, num): ) logger.debug(msg) - def import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3): + def _import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3): """ Log output can be quite large as when the assertNumQueries fails, all queries are printed. It could be usefule to capture the output in `less`: @@ -109,7 +107,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ with ( self.subTest("import1"), impersonate(Dojo_User.objects.get(username="admin")), self.assertNumQueries(expected_num_queries1), - self.assertNumAsyncTask(expected_num_async_tasks1), + self._assertNumAsyncTask(expected_num_async_tasks1), STACK_HAWK_SUBSET_FILENAME.open(encoding="utf-8") as scan, ): import_options = { @@ -133,7 +131,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ with ( self.subTest("reimport1"), impersonate(Dojo_User.objects.get(username="admin")), self.assertNumQueries(expected_num_queries2), - self.assertNumAsyncTask(expected_num_async_tasks2), + self._assertNumAsyncTask(expected_num_async_tasks2), STACK_HAWK_FILENAME.open(encoding="utf-8") as scan, ): reimport_options = { @@ -156,7 +154,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ with ( self.subTest("reimport2"), impersonate(Dojo_User.objects.get(username="admin")), self.assertNumQueries(expected_num_queries3), - self.assertNumAsyncTask(expected_num_async_tasks3), + self._assertNumAsyncTask(expected_num_async_tasks3), STACK_HAWK_SUBSET_FILENAME.open(encoding="utf-8") as scan, ): reimport_options = { @@ -179,7 +177,7 @@ def test_import_reimport_reimport_performance_async(self): configure_audit_system() configure_pghistory_triggers() - self.import_reimport_performance( + self._import_reimport_performance( expected_num_queries1=593, expected_num_async_tasks1=10, expected_num_queries2=498, @@ -197,7 +195,7 @@ def test_import_reimport_reimport_performance_pghistory_async(self): configure_audit_system() configure_pghistory_triggers() - self.import_reimport_performance( + self._import_reimport_performance( expected_num_queries1=559, expected_num_async_tasks1=10, expected_num_queries2=491, @@ -221,12 +219,12 @@ def test_import_reimport_reimport_performance_no_async(self): testuser = User.objects.get(username="admin") testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() - self.import_reimport_performance( - expected_num_queries1=593, + self._import_reimport_performance( + expected_num_queries1=603, expected_num_async_tasks1=10, - expected_num_queries2=503, + expected_num_queries2=515, expected_num_async_tasks2=22, - expected_num_queries3=294, + expected_num_queries3=304, expected_num_async_tasks3=20, ) @@ -243,12 +241,12 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() - self.import_reimport_performance( - expected_num_queries1=559, + self._import_reimport_performance( + expected_num_queries1=569, expected_num_async_tasks1=10, - expected_num_queries2=496, + expected_num_queries2=508, expected_num_async_tasks2=22, - expected_num_queries3=289, + expected_num_queries3=299, expected_num_async_tasks3=20, ) @@ -269,12 +267,12 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self testuser.usercontactinfo.save() self.system_settings(enable_product_grade=True) - self.import_reimport_performance( - expected_num_queries1=594, + self._import_reimport_performance( + expected_num_queries1=604, expected_num_async_tasks1=11, - expected_num_queries2=504, + expected_num_queries2=516, expected_num_async_tasks2=23, - expected_num_queries3=295, + expected_num_queries3=305, expected_num_async_tasks3=21, ) @@ -292,11 +290,190 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr testuser.usercontactinfo.save() self.system_settings(enable_product_grade=True) - self.import_reimport_performance( - expected_num_queries1=560, + self._import_reimport_performance( + expected_num_queries1=570, expected_num_async_tasks1=11, - expected_num_queries2=497, + expected_num_queries2=509, expected_num_async_tasks2=23, - expected_num_queries3=290, + expected_num_queries3=300, expected_num_async_tasks3=21, ) + + # Deduplication is enabled in the tests above, but to properly test it we must run the same import twice and capture the results. + def _deduplication_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, *, check_duplicates=True): + """ + Test method to measure deduplication performance by importing the same scan twice. + The second import should result in all findings being marked as duplicates. + This is different from reimport as we create a new test each time. + """ + product_type, _created = Product_Type.objects.get_or_create(name="test") + product, _created = Product.objects.get_or_create( + name="TestDojoDeduplicationPerformance", + prod_type=product_type, + ) + engagement, _created = Engagement.objects.get_or_create( + name="Test Deduplication Performance Engagement", + product=product, + target_start=timezone.now(), + target_end=timezone.now(), + ) + lead, _ = User.objects.get_or_create(username="admin") + environment, _ = Development_Environment.objects.get_or_create(name="Development") + + # First import - all findings should be new + with ( + self.subTest("first_import"), impersonate(Dojo_User.objects.get(username="admin")), + self.assertNumQueries(expected_num_queries1), + self._assertNumAsyncTask(expected_num_async_tasks1), + STACK_HAWK_FILENAME.open(encoding="utf-8") as scan, + ): + import_options = { + "user": lead, + "lead": lead, + "scan_date": None, + "environment": environment, + "minimum_severity": "Info", + "active": True, + "verified": True, + "scan_type": STACK_HAWK_SCAN_TYPE, + "engagement": engagement, + } + importer = DefaultImporter(**import_options) + _, _, len_new_findings1, len_closed_findings1, _, _, _ = importer.process_scan(scan) + + # Second import - all findings should be duplicates + with ( + self.subTest("second_import"), impersonate(Dojo_User.objects.get(username="admin")), + self.assertNumQueries(expected_num_queries2), + self._assertNumAsyncTask(expected_num_async_tasks2), + STACK_HAWK_FILENAME.open(encoding="utf-8") as scan, + ): + import_options = { + "user": lead, + "lead": lead, + "scan_date": None, + "environment": environment, + "minimum_severity": "Info", + "active": True, + "verified": True, + "scan_type": STACK_HAWK_SCAN_TYPE, + "engagement": engagement, + } + importer = DefaultImporter(**import_options) + _, _, len_new_findings2, len_closed_findings2, _, _, _ = importer.process_scan(scan) + + # Log the results for analysis + logger.debug(f"First import: {len_new_findings1} new findings, {len_closed_findings1} closed findings") + logger.debug(f"Second import: {len_new_findings2} new findings, {len_closed_findings2} closed findings") + + # Assert that process_scan results show no deduplication yet (deduplication happens asynchronously) + # The second import should report 6 new findings because deduplication is not visible in the stats from the importer + self.assertEqual(len_new_findings1, 6, "First import should create 6 new findings") + self.assertEqual(len_closed_findings1, 0, "First import should not close any findings") + self.assertEqual(len_new_findings2, 6, "Second import should report 6 new findings initially (before deduplication)") + self.assertEqual(len_closed_findings2, 0, "Second import should not close any findings") + + # Verify that second import resulted in duplicates by checking the database + # Only check duplicates in sync mode since deduplication happens asynchronously + if check_duplicates: + # Count active findings (non-duplicates) in the engagement + active_findings = Finding.objects.filter( + test__engagement=engagement, + active=True, + duplicate=False, + ).count() + + # Count duplicate findings in the engagement + duplicate_findings = Finding.objects.filter( + test__engagement=engagement, + duplicate=True, + ).count() + + # We should have 6 active findings (from first import) and 6 duplicate findings (from second import) + self.assertEqual(active_findings, 6, f"Expected 6 active findings, got {active_findings}") + self.assertEqual(duplicate_findings, 6, f"Expected 6 duplicate findings, got {duplicate_findings}") + + # Total findings should be 12 (6 active + 6 duplicates) + total_findings = Finding.objects.filter(test__engagement=engagement).count() + self.assertEqual(total_findings, 12, f"Expected 12 total findings, got {total_findings}") + else: + # In async mode, just verify we have 12 total findings (deduplication happens in celery tasks) + total_findings = Finding.objects.filter(test__engagement=engagement).count() + self.assertEqual(total_findings, 12, f"Expected 12 total findings, got {total_findings}") + + @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") + def test_deduplication_performance_async(self): + """ + Test deduplication performance with async tasks enabled. + This test imports the same scan twice to measure deduplication query and task overhead. + """ + configure_audit_system() + configure_pghistory_triggers() + + # Enable deduplication + self.system_settings(enable_deduplication=True) + + self._deduplication_performance( + expected_num_queries1=660, + expected_num_async_tasks1=12, + expected_num_queries2=519, + expected_num_async_tasks2=12, + check_duplicates=False, # Async mode - deduplication happens later + ) + + @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") + def test_deduplication_performance_pghistory_async(self): + """Test deduplication performance with django-pghistory and async tasks enabled.""" + configure_audit_system() + configure_pghistory_triggers() + + # Enable deduplication + self.system_settings(enable_deduplication=True) + + self._deduplication_performance( + expected_num_queries1=624, + expected_num_async_tasks1=12, + expected_num_queries2=500, + expected_num_async_tasks2=12, + check_duplicates=False, # Async mode - deduplication happens later + ) + + @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") + def test_deduplication_performance_no_async(self): + """Test deduplication performance with async tasks disabled.""" + configure_audit_system() + configure_pghistory_triggers() + + # Enable deduplication + self.system_settings(enable_deduplication=True) + + testuser = User.objects.get(username="admin") + testuser.usercontactinfo.block_execution = True + testuser.usercontactinfo.save() + + self._deduplication_performance( + expected_num_queries1=672, + expected_num_async_tasks1=12, + expected_num_queries2=633, + expected_num_async_tasks2=12, + ) + + @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") + def test_deduplication_performance_pghistory_no_async(self): + """Test deduplication performance with django-pghistory and async tasks disabled.""" + configure_audit_system() + configure_pghistory_triggers() + + # Enable deduplication + self.system_settings(enable_deduplication=True) + + testuser = User.objects.get(username="admin") + testuser.usercontactinfo.block_execution = True + testuser.usercontactinfo.save() + + self._deduplication_performance( + expected_num_queries1=636, + expected_num_async_tasks1=12, + expected_num_queries2=596, + expected_num_async_tasks2=12, + )