diff --git a/.github/workflows/helm-docs-updates.yml b/.github/workflows/helm-docs-updates.yml
new file mode 100644
index 00000000000..99677da43f0
--- /dev/null
+++ b/.github/workflows/helm-docs-updates.yml
@@ -0,0 +1,25 @@
+name: Update HELM docs for Renovate & Dependabot
+
+on:
+ pull_request:
+ branches:
+ - master
+ - dev
+ - bugfix
+ - release/**
+ - hotfix/**
+
+jobs:
+ docs_updates:
+ name: Update documentation
+ runs-on: ubuntu-latest
+ if: startsWith(github.head_ref, 'renovate/') or startsWith(github.head_ref, 'dependabot/')
+ steps:
+ - name: Checkout
+ uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+
+ - name: Run helm-docs
+ uses: losisin/helm-docs-github-action@a57fae5676e4c55a228ea654a1bcaec8dd3cf5b5 # v1.6.2
+ with:
+ chart-search-root: "helm/defectdojo"
+ git-push: true
diff --git a/.github/workflows/release-nightly-dev.yml b/.github/workflows/release-nightly-dev.yml
index 51844d5e264..d78e3400191 100644
--- a/.github/workflows/release-nightly-dev.yml
+++ b/.github/workflows/release-nightly-dev.yml
@@ -8,7 +8,7 @@ on:
schedule:
# every day at 5:00 UTC
# in this case inputs are all null/empty, hence the default values are used below
- - cron: "* 5 * * *"
+ - cron: "0 5 * * *"
workflow_dispatch:
jobs:
diff --git a/.github/workflows/release-x-manual-helm-chart.yml b/.github/workflows/release-x-manual-helm-chart.yml
index bd09c558bf9..b6b88edc1c2 100644
--- a/.github/workflows/release-x-manual-helm-chart.yml
+++ b/.github/workflows/release-x-manual-helm-chart.yml
@@ -117,5 +117,5 @@ jobs:
fi
cp -f ./build/index.yaml ./index.yaml
git add ./index.yaml
- git commit -m "Update index.yaml"
+ git commit -m "Update index.yaml - ${{ inputs.release_number }}"
git push -u origin helm-charts
diff --git a/.github/workflows/test-helm-chart.yml b/.github/workflows/test-helm-chart.yml
index 7e89d2ac7fd..934602cf7f8 100644
--- a/.github/workflows/test-helm-chart.yml
+++ b/.github/workflows/test-helm-chart.yml
@@ -79,7 +79,7 @@ jobs:
target_annotation=$(git show "${{ env.ct-branch }}:helm/defectdojo/Chart.yaml" | yq e '.annotations."artifacthub.io/changes"' -)
if [[ "$current_annotation" == "$target_annotation" ]]; then
- echo "::error file=helm/defectdojo/Chart.yaml::The 'artifacthub.io/changes' annotation has not been updated compared to ${{ env.ct-branch }}"
+ echo "::error file=helm/defectdojo/Chart.yaml::The 'artifacthub.io/changes' annotation has not been updated compared to ${{ env.ct-branch }}. For more, check the hint in 'helm/defectdojo/Chart.yaml'"
exit 1
fi
diff --git a/components/package.json b/components/package.json
index 09954b463c9..f9b97fa55a4 100644
--- a/components/package.json
+++ b/components/package.json
@@ -1,6 +1,6 @@
{
"name": "defectdojo",
- "version": "2.51.0",
+ "version": "2.51.1",
"license" : "BSD-3-Clause",
"private": true,
"dependencies": {
diff --git a/docs/content/en/api/api-v2-docs.md b/docs/content/en/api/api-v2-docs.md
index 763ce61893a..557b7cbc63a 100644
--- a/docs/content/en/api/api-v2-docs.md
+++ b/docs/content/en/api/api-v2-docs.md
@@ -9,7 +9,7 @@ DefectDojo\'s API is created using [Django Rest
Framework](http://www.django-rest-framework.org/). The documentation of
each endpoint is available within each DefectDojo installation at
[`/api/v2/oa3/swagger-ui`](https://demo.defectdojo.org/api/v2/oa3/swagger-ui/) and can be accessed by choosing the API v2
-Docs link on the user drop down menu in the header.
+Docs link on the user drop down menu in the header.

@@ -155,7 +155,6 @@ Example for importing a scan result:
tags:test
scan_type:ZAP Scan
minimum_severity:Info
- skip_duplicates:true
close_old_findings:false
- Body tab
diff --git a/dojo/__init__.py b/dojo/__init__.py
index 3ca651bd880..7f55bf358b3 100644
--- a/dojo/__init__.py
+++ b/dojo/__init__.py
@@ -4,6 +4,6 @@
# Django starts so that shared_task will use this app.
from .celery import app as celery_app # noqa: F401
-__version__ = "2.51.0"
+__version__ = "2.51.1"
__url__ = "https://github.com/DefectDojo/django-DefectDojo"
__docs__ = "https://documentation.defectdojo.com"
diff --git a/dojo/jira_link/views.py b/dojo/jira_link/views.py
index 75949aec7bf..d30681bef27 100644
--- a/dojo/jira_link/views.py
+++ b/dojo/jira_link/views.py
@@ -327,6 +327,7 @@ def post(self, request):
return render(request, self.get_template(), {"jform": jform})
# authentication successful
# Get the open and close keys
+ msg = "Unable to find Open/Close ID's (invalid issue key specified?). They will need to be found manually"
try:
issue_id = jform.cleaned_data.get("issue_key")
key_url = jira_server.strip("/") + "/rest/api/latest/issue/" + issue_id + "/transitions?expand=transitions.fields"
@@ -339,8 +340,9 @@ def post(self, request):
if node["to"]["statusCategory"]["name"] == "Done":
close_key = close_key or int(node["id"])
except Exception:
- msg = "Unable to find Open/Close ID's (invalid issue key specified?). They will need to be found manually"
logger.exception(msg) # already logged in jira_helper
+
+ if not open_key or not close_key:
messages.add_message(
request,
messages.ERROR,
diff --git a/dojo/management/commands/pghistory_backfill.py b/dojo/management/commands/pghistory_backfill.py
index 52367e32c1c..456cbe75c5d 100644
--- a/dojo/management/commands/pghistory_backfill.py
+++ b/dojo/management/commands/pghistory_backfill.py
@@ -4,6 +4,7 @@
This command creates initial snapshots for all existing records in tracked models.
"""
import logging
+import time
from django.apps import apps
from django.conf import settings
@@ -33,6 +34,16 @@ def add_arguments(self, parser):
action="store_true",
help="Show what would be done without actually creating events",
)
+ parser.add_argument(
+ "--log-queries",
+ action="store_true",
+ help="Enable database query logging (default: enabled)",
+ )
+ parser.add_argument(
+ "--no-log-queries",
+ action="store_true",
+ help="Disable database query logging",
+ )
def get_excluded_fields(self, model_name):
"""Get the list of excluded fields for a specific model from pghistory configuration."""
@@ -45,6 +56,89 @@ def get_excluded_fields(self, model_name):
}
return excluded_fields_map.get(model_name, [])
+ def process_batch(self, event_model, event_records, model_name, dry_run, batch_start_time, processed, backfill_count, *, is_final_batch=False):
+ """Process a batch of event records by bulk creating them in the database."""
+ if not event_records:
+ return 0, batch_start_time
+
+ if dry_run:
+ actually_created = len(event_records)
+ else:
+ try:
+ attempted = len(event_records)
+ # No need to pass batch_size since we're already batching ourselves
+ created_objects = event_model.objects.bulk_create(event_records)
+ actually_created = len(created_objects) if created_objects else 0
+
+ if actually_created != attempted:
+ logger.warning(
+ f"bulk_create for {model_name}: attempted {attempted}, "
+ f"actually created {actually_created} ({attempted - actually_created} skipped)",
+ )
+ except Exception:
+ logger.exception(f"Failed to bulk create events for {model_name}")
+ raise
+
+ # Calculate timing after the actual database operation
+ batch_end_time = time.time()
+ batch_duration = batch_end_time - batch_start_time
+ batch_records_per_second = len(event_records) / batch_duration if batch_duration > 0 else 0
+
+ # Log batch timing
+ if is_final_batch:
+ self.stdout.write(f" Final batch: {batch_duration:.2f}s ({batch_records_per_second:.1f} records/sec)")
+ else:
+ progress = (processed + actually_created) / backfill_count * 100
+ self.stdout.write(f" Processed {processed + actually_created:,}/{backfill_count:,} records needing backfill ({progress:.1f}%) - "
+ f"Last batch: {batch_duration:.2f}s ({batch_records_per_second:.1f} records/sec)")
+
+ return actually_created, batch_end_time
+
+ def enable_db_logging(self):
+ """Enable database query logging for this command."""
+ # Store original DEBUG setting
+ self.original_debug = settings.DEBUG
+
+ # Configure database query logging
+ db_logger = logging.getLogger("django.db.backends")
+ db_logger.setLevel(logging.DEBUG)
+
+ # Add a handler if one doesn't exist
+ if not db_logger.handlers:
+ handler = logging.StreamHandler()
+ formatter = logging.Formatter(
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ )
+ handler.setFormatter(formatter)
+ db_logger.addHandler(handler)
+
+ # Also enable the SQL logger specifically
+ sql_logger = logging.getLogger("django.db.backends.sql")
+ sql_logger.setLevel(logging.DEBUG)
+
+ # Ensure the root logger propagates to our handlers
+ if not sql_logger.handlers:
+ sql_logger.addHandler(handler)
+
+ # Enable query logging in Django settings
+ settings.DEBUG = True
+
+ self.stdout.write(
+ self.style.SUCCESS("Database query logging enabled"),
+ )
+
+ def disable_db_logging(self):
+ """Disable database query logging."""
+ # Restore original DEBUG setting
+ settings.DEBUG = self.original_debug
+
+ # Disable query logging by setting a higher level
+ logging.getLogger("django.db.backends").setLevel(logging.INFO)
+ logging.getLogger("django.db.backends.sql").setLevel(logging.INFO)
+ self.stdout.write(
+ self.style.SUCCESS("Database query logging disabled"),
+ )
+
def handle(self, *args, **options):
if not settings.ENABLE_AUDITLOG or settings.AUDITLOG_TYPE != "django-pghistory":
self.stdout.write(
@@ -55,6 +149,17 @@ def handle(self, *args, **options):
)
return
+ # Enable database query logging based on options
+ # Default to enabled unless explicitly disabled
+ enable_query_logging = not options.get("no_log_queries")
+
+ if enable_query_logging:
+ self.enable_db_logging()
+ else:
+ self.stdout.write(
+ self.style.WARNING("Database query logging disabled"),
+ )
+
# Models that are tracked by pghistory
tracked_models = [
"Dojo_User", "Endpoint", "Engagement", "Finding", "Finding_Group",
@@ -83,9 +188,11 @@ def handle(self, *args, **options):
)
total_processed = 0
+ total_start_time = time.time()
self.stdout.write(f"Starting backfill for {len(tracked_models)} model(s)...")
for model_name in tracked_models:
+ model_start_time = time.time()
self.stdout.write(f"\nProcessing {model_name}...")
try:
@@ -143,6 +250,7 @@ def handle(self, *args, **options):
processed = 0
event_records = []
failed_records = []
+ batch_start_time = time.time()
for instance in records_needing_backfill.iterator():
try:
@@ -156,8 +264,17 @@ def handle(self, *args, **options):
for field in instance._meta.fields:
field_name = field.name
if field_name not in excluded_fields:
- field_value = getattr(instance, field_name)
- event_data[field_name] = field_value
+ # Handle foreign key fields differently
+ if field.many_to_one: # ForeignKey field
+ # For foreign keys, use the _id field to get the raw ID value
+ # Store it under the _id field name for the Event model
+ field_id_name = f"{field_name}_id"
+ field_value = getattr(instance, field_id_name)
+ event_data[field_id_name] = field_value
+ else:
+ # For non-foreign key fields, use value_from_object() to avoid queries
+ field_value = field.value_from_object(instance)
+ event_data[field_name] = field_value
# Explicitly preserve created timestamp from the original instance
# Only if not excluded and exists
@@ -178,57 +295,32 @@ def handle(self, *args, **options):
event_records.append(EventModel(**event_data))
- except Exception as e:
+ except Exception:
failed_records.append(instance.id)
- logger.error(
- f"Failed to prepare event for {model_name} ID {instance.id}: {e}",
+ logger.exception(
+ f"Failed to prepare event for {model_name} ID {instance.id}",
)
# Bulk create when we hit batch_size records
if len(event_records) >= batch_size:
- if not dry_run and event_records:
- try:
- attempted = len(event_records)
- created_objects = EventModel.objects.bulk_create(event_records, batch_size=batch_size)
- actually_created = len(created_objects) if created_objects else 0
- processed += actually_created
-
- if actually_created != attempted:
- logger.warning(
- f"bulk_create for {model_name}: attempted {attempted}, "
- f"actually created {actually_created} ({attempted - actually_created} skipped)",
- )
- except Exception as e:
- logger.error(f"Failed to bulk create events for {model_name}: {e}")
- raise
- elif dry_run:
- processed += len(event_records)
+ # Process the batch
+ batch_processed, batch_start_time = self.process_batch(
+ EventModel, event_records, model_name, dry_run,
+ batch_start_time, processed, backfill_count,
+ )
+ processed += batch_processed
event_records = [] # Reset for next batch
-
- # Progress update
- progress = (processed / backfill_count) * 100
- self.stdout.write(f" Processed {processed:,}/{backfill_count:,} records needing backfill ({progress:.1f}%)")
+ batch_start_time = time.time() # Reset batch timer
# Handle remaining records
if event_records:
- if not dry_run:
- try:
- attempted = len(event_records)
- created_objects = EventModel.objects.bulk_create(event_records, batch_size=batch_size)
- actually_created = len(created_objects) if created_objects else 0
- processed += actually_created
-
- if actually_created != attempted:
- logger.warning(
- f"bulk_create final batch for {model_name}: attempted {attempted}, "
- f"actually created {actually_created} ({attempted - actually_created} skipped)",
- )
- except Exception as e:
- logger.error(f"Failed to bulk create final batch for {model_name}: {e}")
- raise
- else:
- processed += len(event_records)
+ # Process the final batch
+ batch_processed, _ = self.process_batch(
+ EventModel, event_records, model_name, dry_run,
+ batch_start_time, processed, backfill_count, is_final_batch=True,
+ )
+ processed += batch_processed
# Final progress update
if backfill_count > 0:
@@ -237,18 +329,25 @@ def handle(self, *args, **options):
total_processed += processed
- # Show completion summary
+ # Calculate timing for this model
+ model_end_time = time.time()
+ model_duration = model_end_time - model_start_time
+ records_per_second = processed / model_duration if model_duration > 0 else 0
+
+ # Show completion summary with timing
if failed_records:
self.stdout.write(
self.style.WARNING(
f" ⚠ Completed {model_name}: {processed:,} records processed, "
- f"{len(failed_records)} records failed",
+ f"{len(failed_records)} records failed in {model_duration:.2f}s "
+ f"({records_per_second:.1f} records/sec)",
),
)
else:
self.stdout.write(
self.style.SUCCESS(
- f" ✓ Completed {model_name}: {processed:,} records",
+ f" ✓ Completed {model_name}: {processed:,} records in {model_duration:.2f}s "
+ f"({records_per_second:.1f} records/sec)",
),
)
@@ -256,10 +355,20 @@ def handle(self, *args, **options):
self.stdout.write(
self.style.ERROR(f" ✗ Failed to process {model_name}: {e}"),
)
- logger.error(f"Error processing {model_name}: {e}")
+ logger.exception(f"Error processing {model_name}")
+
+ # Calculate total timing
+ total_end_time = time.time()
+ total_duration = total_end_time - total_start_time
+ total_records_per_second = total_processed / total_duration if total_duration > 0 else 0
+
+ # Disable database query logging if it was enabled
+ if enable_query_logging:
+ self.disable_db_logging()
self.stdout.write(
self.style.SUCCESS(
- f"\nBACKFILL COMPLETE: Processed {total_processed:,} records",
+ f"\nBACKFILL COMPLETE: Processed {total_processed:,} records in {total_duration:.2f}s "
+ f"({total_records_per_second:.1f} records/sec)",
),
)
diff --git a/dojo/management/commands/pghistory_backfill_fast.py b/dojo/management/commands/pghistory_backfill_fast.py
new file mode 100644
index 00000000000..a2f1921fc74
--- /dev/null
+++ b/dojo/management/commands/pghistory_backfill_fast.py
@@ -0,0 +1,543 @@
+"""
+Management command to backfill existing data into django-pghistory using COPY.
+
+This command creates initial snapshots for all existing records in tracked models
+using PostgreSQL COPY for maximum performance.
+"""
+import io
+import logging
+import time
+
+from django.conf import settings
+from django.core.management.base import BaseCommand
+from django.db import connection
+from django.utils import timezone
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+ help = "Backfill existing data into django-pghistory using COPY"
+
+ def add_arguments(self, parser):
+ parser.add_argument(
+ "--model",
+ type=str,
+ help='Specific model to backfill (e.g., "Finding", "Product")',
+ )
+ parser.add_argument(
+ "--batch-size",
+ type=int,
+ default=10000,
+ help="Number of records to process in each batch (default: 10000)",
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="Show what would be done without actually creating events",
+ )
+ parser.add_argument(
+ "--log-queries",
+ action="store_true",
+ help="Enable database query logging (default: enabled)",
+ )
+ parser.add_argument(
+ "--no-log-queries",
+ action="store_true",
+ help="Disable database query logging",
+ )
+
+ def get_excluded_fields(self, model_name):
+ """Get the list of excluded fields for a specific model from pghistory configuration."""
+ # Define excluded fields for each model (matching auditlog.py)
+ excluded_fields_map = {
+ "Dojo_User": ["password"],
+ "Product": ["updated"], # This is the key change
+ "Cred_User": ["password"],
+ "Notification_Webhooks": ["header_name", "header_value"],
+ }
+ return excluded_fields_map.get(model_name, [])
+
+ def process_model_with_copy(self, model_name, batch_size, dry_run):
+ """Process a single model using COPY operations with raw SQL."""
+ try:
+ # Get table names using raw SQL
+ # Handle special cases for table naming
+ if model_name == "Dojo_User":
+ table_name = "dojo_dojo_user"
+ event_table_name = "dojo_dojo_userevent"
+ elif model_name == "Product_Type":
+ table_name = "dojo_product_type"
+ event_table_name = "dojo_product_typeevent"
+ elif model_name == "Finding_Group":
+ table_name = "dojo_finding_group"
+ event_table_name = "dojo_finding_groupevent"
+ elif model_name == "Risk_Acceptance":
+ table_name = "dojo_risk_acceptance"
+ event_table_name = "dojo_risk_acceptanceevent"
+ elif model_name == "Finding_Template":
+ table_name = "dojo_finding_template"
+ event_table_name = "dojo_finding_templateevent"
+ elif model_name == "Cred_User":
+ table_name = "dojo_cred_user"
+ event_table_name = "dojo_cred_userevent"
+ elif model_name == "Notification_Webhooks":
+ table_name = "dojo_notification_webhooks"
+ event_table_name = "dojo_notification_webhooksevent"
+ else:
+ table_name = f"dojo_{model_name.lower()}"
+ event_table_name = f"dojo_{model_name.lower()}event"
+
+ # Check if tables exist
+ with connection.cursor() as cursor:
+ cursor.execute("""
+ SELECT EXISTS (
+ SELECT FROM information_schema.tables
+ WHERE table_name = %s
+ )
+ """, [table_name])
+ table_exists = cursor.fetchone()[0]
+
+ cursor.execute("""
+ SELECT EXISTS (
+ SELECT FROM information_schema.tables
+ WHERE table_name = %s
+ )
+ """, [event_table_name])
+ event_table_exists = cursor.fetchone()[0]
+
+ if not table_exists:
+ self.stdout.write(f" Table {table_name} not found")
+ return 0, 0.0
+
+ if not event_table_exists:
+ self.stdout.write(
+ self.style.ERROR(
+ f" Event table {event_table_name} not found. "
+ f"Is {model_name} tracked by pghistory?",
+ ),
+ )
+ return 0, 0.0
+
+ # Get total count using raw SQL
+ with connection.cursor() as cursor:
+ cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
+ total_count = cursor.fetchone()[0]
+
+ if total_count == 0:
+ self.stdout.write(f" No records found for {model_name}")
+ return 0, 0.0
+
+ self.stdout.write(f" Found {total_count:,} records")
+
+ # Get excluded fields
+ excluded_fields = self.get_excluded_fields(model_name)
+
+ # Check if records already have initial_import events using raw SQL
+ with connection.cursor() as cursor:
+ cursor.execute(f"SELECT COUNT(*) FROM {event_table_name} WHERE pgh_label = 'initial_import'")
+ existing_count = cursor.fetchone()[0]
+
+ # Get records that need backfill using raw SQL
+ with connection.cursor() as cursor:
+ cursor.execute(f"""
+ SELECT COUNT(*) FROM {table_name} t
+ WHERE NOT EXISTS (
+ SELECT 1 FROM {event_table_name} e
+ WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_import'
+ )
+ """)
+ backfill_count = cursor.fetchone()[0]
+
+ # Log the breakdown
+ self.stdout.write(f" Records with initial_import events: {existing_count:,}")
+ self.stdout.write(f" Records needing initial_import events: {backfill_count:,}")
+
+ if backfill_count == 0:
+ self.stdout.write(
+ self.style.SUCCESS(f" ✓ All {total_count:,} records already have initial_import events"),
+ )
+ return total_count, 0.0
+
+ if dry_run:
+ self.stdout.write(f" Would process {backfill_count:,} records using COPY...")
+ return backfill_count, 0.0
+
+ # Get event table columns using raw SQL (excluding auto-generated pgh_id)
+ with connection.cursor() as cursor:
+ cursor.execute("""
+ SELECT column_name
+ FROM information_schema.columns
+ WHERE table_name = %s AND column_name != 'pgh_id'
+ ORDER BY ordinal_position
+ """, [event_table_name])
+ event_columns = [row[0] for row in cursor.fetchall()]
+
+ # Get all IDs that need backfill first
+ with connection.cursor() as cursor:
+ cursor.execute(f"""
+ SELECT t.id FROM {table_name} t
+ WHERE NOT EXISTS (
+ SELECT 1 FROM {event_table_name} e
+ WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_import'
+ )
+ ORDER BY t.id
+ """)
+ ids_to_process = [row[0] for row in cursor.fetchall()]
+
+ if not ids_to_process:
+ self.stdout.write(" No records need backfill")
+ return 0, 0.0
+
+ # Process records in batches using raw SQL
+ processed = 0
+ batch_start_time = time.time()
+ model_start_time = time.time() # Track model start time
+
+ # Get column names for the source table
+ with connection.cursor() as cursor:
+ cursor.execute("""
+ SELECT column_name
+ FROM information_schema.columns
+ WHERE table_name = %s
+ ORDER BY ordinal_position
+ """, [table_name])
+ source_columns = [row[0] for row in cursor.fetchall()]
+
+ # Filter out excluded fields from source columns
+ source_columns = [col for col in source_columns if col not in excluded_fields]
+
+ # Process in batches
+ consecutive_failures = 0
+ max_failures = 3
+
+ for i in range(0, len(ids_to_process), batch_size):
+ batch_ids = ids_to_process[i:i + batch_size]
+
+ # Log progress every 10 batches
+ if i > 0 and i % (batch_size * 10) == 0:
+ self.stdout.write(f" Processing batch starting at index {i:,}...")
+
+ # Get batch of records using raw SQL with specific IDs
+ columns_str = ", ".join(source_columns)
+ placeholders = ", ".join(["%s"] * len(batch_ids))
+ query = f"""
+ SELECT {columns_str} FROM {table_name} t
+ WHERE t.id IN ({placeholders})
+ ORDER BY t.id
+ """
+
+ with connection.cursor() as cursor:
+ cursor.execute(query, batch_ids)
+ batch_rows = cursor.fetchall()
+
+ if not batch_rows:
+ self.stdout.write(f" No records found for batch at index {i}")
+ continue
+
+ # Use PostgreSQL COPY as described in the article
+ try:
+ # Prepare data for COPY using a custom file-like object
+ class FileLikeObject:
+ def __init__(self):
+ self.data = io.BytesIO()
+
+ def write(self, data):
+ return self.data.write(data)
+
+ def read(self, size=-1):
+ return self.data.read(size)
+
+ def seek(self, pos):
+ return self.data.seek(pos)
+
+ def tell(self):
+ return self.data.tell()
+
+ def __len__(self):
+ return len(self.data.getvalue())
+
+ def getvalue(self):
+ return self.data.getvalue()
+
+ copy_buffer = FileLikeObject()
+
+ for row in batch_rows:
+ row_data = []
+
+ # Create a mapping of source columns to values
+ source_values = {}
+ for idx, value in enumerate(row):
+ field_name = source_columns[idx]
+ # Convert value to string for COPY
+ if value is None:
+ source_values[field_name] = ""
+ elif isinstance(value, bool):
+ source_values[field_name] = "t" if value else "f"
+ elif hasattr(value, "isoformat"): # datetime objects
+ source_values[field_name] = value.isoformat()
+ else:
+ source_values[field_name] = str(value)
+
+ # Build row data in the order of event_columns
+ for col in event_columns:
+ if col == "pgh_created_at":
+ row_data.append(timezone.now().isoformat())
+ elif col == "pgh_label":
+ row_data.append("initial_import")
+ elif col == "pgh_obj_id":
+ row_data.append(str(row[0]) if row[0] is not None else "") # Assuming first column is id
+ elif col == "pgh_context_id":
+ row_data.append("") # Empty for backfilled events
+ elif col in source_values:
+ row_data.append(source_values[col])
+ else:
+ row_data.append("") # Default empty value
+
+ # Write tab-separated row to buffer as bytes
+ copy_buffer.write(("\t".join(row_data) + "\n").encode("utf-8"))
+
+ copy_buffer.seek(0)
+
+ # Debug: Show what we're about to copy
+ self.stdout.write(f" Batch {i // batch_size + 1}: Writing to table: {event_table_name}")
+
+ # Use PostgreSQL COPY with psycopg3 syntax
+ with connection.cursor() as cursor:
+ # Get the underlying raw cursor to bypass Django's wrapper
+ raw_cursor = cursor.cursor
+ # Use the copy method (psycopg3 syntax)
+ copy_sql = f"COPY {event_table_name} ({', '.join(event_columns)}) FROM STDIN WITH (FORMAT text, DELIMITER E'\\t')"
+
+ try:
+ # Use psycopg3 copy syntax as per documentation
+ # Prepare data as list of tuples for write_row()
+ records = []
+ for row in batch_rows:
+ row_data = []
+
+ # Create a mapping of source columns to values
+ source_values = {}
+ for idx, value in enumerate(row):
+ field_name = source_columns[idx]
+ source_values[field_name] = value
+
+ # Build row data in the order of event_columns
+ for col in event_columns:
+ if col == "pgh_created_at":
+ row_data.append(timezone.now())
+ elif col == "pgh_label":
+ row_data.append("initial_import")
+ elif col == "pgh_obj_id":
+ row_data.append(row[0]) # Assuming first column is id
+ elif col == "pgh_context_id":
+ row_data.append(None) # Empty for backfilled events
+ elif col in source_values:
+ row_data.append(source_values[col])
+ else:
+ row_data.append(None) # Default NULL value
+
+ records.append(tuple(row_data))
+
+ # Use COPY with write_row() as per psycopg3 docs
+ with raw_cursor.copy(copy_sql) as copy:
+ for record in records:
+ copy.write_row(record)
+ self.stdout.write(" COPY operation completed using write_row")
+
+ # Commit the transaction to persist the data
+ raw_cursor.connection.commit()
+
+ # Debug: Check if data was inserted
+ raw_cursor.execute(f"SELECT COUNT(*) FROM {event_table_name} WHERE pgh_label = 'initial_import'")
+ count = raw_cursor.fetchone()[0]
+ self.stdout.write(f" Records in event table after batch: {count}")
+
+ except Exception as copy_error:
+ self.stdout.write(f" COPY error: {copy_error}")
+ # Try to get more details about the error
+ raw_cursor.execute("SELECT * FROM pg_stat_activity WHERE state = 'active'")
+ self.stdout.write(f" Active queries: {raw_cursor.fetchall()}")
+ raise
+
+ batch_processed = len(batch_rows)
+ processed += batch_processed
+ consecutive_failures = 0 # Reset failure counter on success
+
+ # Calculate timing
+ batch_end_time = time.time()
+ batch_duration = batch_end_time - batch_start_time
+ batch_records_per_second = batch_processed / batch_duration if batch_duration > 0 else 0
+
+ # Log progress
+ progress = (processed / backfill_count) * 100
+ self.stdout.write(f" Processed {processed:,}/{backfill_count:,} records ({progress:.1f}%) - "
+ f"Last batch: {batch_duration:.2f}s ({batch_records_per_second:.1f} records/sec)")
+
+ batch_start_time = time.time() # Reset for next batch
+
+ except Exception as e:
+ consecutive_failures += 1
+ logger.error(f"Bulk insert failed for {model_name} batch: {e}")
+ self.stdout.write(f" Bulk insert failed: {e}")
+ # Log more details about the error
+ self.stdout.write(f" Processed {processed:,} records before failure")
+
+ if consecutive_failures >= max_failures:
+ self.stdout.write(f" Too many consecutive failures ({consecutive_failures}), stopping processing")
+ break
+
+ # Continue with next batch instead of breaking
+ continue
+
+ # Calculate total timing
+ model_end_time = time.time()
+ total_duration = model_end_time - model_start_time
+ records_per_second = processed / total_duration if total_duration > 0 else 0
+
+ self.stdout.write(
+ self.style.SUCCESS(
+ f" ✓ Completed {model_name}: {processed:,} records in {total_duration:.2f}s "
+ f"({records_per_second:.1f} records/sec)",
+ ),
+ )
+
+ return processed, records_per_second # noqa: TRY300
+
+ except Exception as e:
+ self.stdout.write(
+ self.style.ERROR(f" ✗ Failed to process {model_name}: {e}"),
+ )
+ logger.exception(f"Error processing {model_name}")
+ return 0, 0.0
+
+ def enable_db_logging(self):
+ """Enable database query logging for this command."""
+ # Store original DEBUG setting
+ self.original_debug = settings.DEBUG
+
+ # Configure database query logging
+ db_logger = logging.getLogger("django.db.backends")
+ db_logger.setLevel(logging.DEBUG)
+
+ # Add a handler if one doesn't exist
+ if not db_logger.handlers:
+ handler = logging.StreamHandler()
+ formatter = logging.Formatter(
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ )
+ handler.setFormatter(formatter)
+ db_logger.addHandler(handler)
+
+ # Also enable the SQL logger specifically
+ sql_logger = logging.getLogger("django.db.backends.sql")
+ sql_logger.setLevel(logging.DEBUG)
+
+ # Ensure the root logger propagates to our handlers
+ if not sql_logger.handlers:
+ sql_logger.addHandler(handler)
+
+ # Enable query logging in Django settings
+ settings.DEBUG = True
+
+ self.stdout.write(
+ self.style.SUCCESS("Database query logging enabled"),
+ )
+
+ def disable_db_logging(self):
+ """Disable database query logging."""
+ # Restore original DEBUG setting
+ settings.DEBUG = self.original_debug
+
+ # Disable query logging by setting a higher level
+ logging.getLogger("django.db.backends").setLevel(logging.INFO)
+ logging.getLogger("django.db.backends.sql").setLevel(logging.INFO)
+ self.stdout.write(
+ self.style.SUCCESS("Database query logging disabled"),
+ )
+
+ def handle(self, *args, **options):
+ if not settings.ENABLE_AUDITLOG or settings.AUDITLOG_TYPE != "django-pghistory":
+ self.stdout.write(
+ self.style.WARNING(
+ "pghistory is not enabled. Set DD_ENABLE_AUDITLOG=True and "
+ "DD_AUDITLOG_TYPE=django-pghistory",
+ ),
+ )
+ return
+
+ # Check if we can use COPY (PostgreSQL only)
+ if settings.DATABASES["default"]["ENGINE"] != "django.db.backends.postgresql":
+ self.stdout.write(
+ self.style.ERROR(
+ "COPY operations only available with PostgreSQL. "
+ "Please use the original pghistory_backfill command instead.",
+ ),
+ )
+ return
+
+ # Enable database query logging based on options
+ enable_query_logging = not options.get("no_log_queries")
+
+ if enable_query_logging:
+ self.enable_db_logging()
+ else:
+ self.stdout.write(
+ self.style.WARNING("Database query logging disabled"),
+ )
+
+ # Models that are tracked by pghistory
+ tracked_models = [
+ "Dojo_User", "Endpoint", "Engagement", "Finding", "Finding_Group",
+ "Product_Type", "Product", "Test", "Risk_Acceptance",
+ "Finding_Template", "Cred_User", "Notification_Webhooks",
+ ]
+
+ specific_model = options.get("model")
+ if specific_model:
+ if specific_model not in tracked_models:
+ self.stdout.write(
+ self.style.ERROR(
+ f'Model "{specific_model}" is not tracked by pghistory. '
+ f'Available models: {", ".join(tracked_models)}',
+ ),
+ )
+ return
+ tracked_models = [specific_model]
+
+ batch_size = options["batch_size"]
+ dry_run = options["dry_run"]
+
+ if dry_run:
+ self.stdout.write(
+ self.style.WARNING("DRY RUN MODE - No events will be created"),
+ )
+
+ total_processed = 0
+ total_start_time = time.time()
+ self.stdout.write(f"Starting backfill for {len(tracked_models)} model(s) using PostgreSQL COPY...")
+
+ for model_name in tracked_models:
+ time.time()
+ self.stdout.write(f"\nProcessing {model_name}...")
+
+ processed, _ = self.process_model_with_copy(
+ model_name, batch_size, dry_run,
+ )
+ total_processed += processed
+
+ # Calculate total timing
+ total_end_time = time.time()
+ total_duration = total_end_time - total_start_time
+ total_records_per_second = total_processed / total_duration if total_duration > 0 else 0
+
+ # Disable database query logging if it was enabled
+ if enable_query_logging:
+ self.disable_db_logging()
+
+ self.stdout.write(
+ self.style.SUCCESS(
+ f"\nBACKFILL COMPLETE: Processed {total_processed:,} records in {total_duration:.2f}s "
+ f"({total_records_per_second:.1f} records/sec)",
+ ),
+ )
diff --git a/dojo/management/commands/pghistory_backfill_simple.py b/dojo/management/commands/pghistory_backfill_simple.py
new file mode 100644
index 00000000000..0203b5506a7
--- /dev/null
+++ b/dojo/management/commands/pghistory_backfill_simple.py
@@ -0,0 +1,260 @@
+import logging
+import time
+
+from django.apps import apps
+from django.core.management.base import BaseCommand
+from django.db import connection
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+ help = "Backfill pghistory events using direct SQL INSERT - much simpler and faster!"
+
+ def add_arguments(self, parser):
+ parser.add_argument(
+ "--batch-size",
+ type=int,
+ default=10000,
+ help="Number of records to process in each batch",
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="Show what would be processed without making changes",
+ )
+ parser.add_argument(
+ "--models",
+ nargs="+",
+ help="Specific models to process (default: all configured models)",
+ )
+
+ def handle(self, *args, **options):
+ batch_size = options["batch_size"]
+ dry_run = options["dry_run"]
+ specific_models = options.get("models")
+
+ # Define the models to process
+ models_to_process = [
+ "Test",
+ "Product",
+ "Finding",
+ "Endpoint",
+ "Dojo_User",
+ "Product_Type",
+ "Finding_Group",
+ "Risk_Acceptance",
+ "Finding_Template",
+ "Cred_User",
+ "Notification_Webhooks",
+ ]
+
+ if specific_models:
+ models_to_process = [m for m in models_to_process if m in specific_models]
+
+ self.stdout.write(
+ self.style.SUCCESS(
+ f"Starting backfill for {len(models_to_process)} model(s) using direct SQL INSERT...",
+ ),
+ )
+
+ total_processed = 0
+ total_start_time = time.time()
+
+ for model_name in models_to_process:
+ self.stdout.write(f"\nProcessing {model_name}...")
+ processed, _records_per_second = self.process_model_simple(
+ model_name, batch_size, dry_run,
+ )
+ total_processed += processed
+
+ total_duration = time.time() - total_start_time
+ total_records_per_second = total_processed / total_duration if total_duration > 0 else 0
+
+ self.stdout.write(
+ self.style.SUCCESS(
+ f"\n✓ Backfill completed: {total_processed:,} total records in {total_duration:.2f}s "
+ f"({total_records_per_second:.1f} records/sec)",
+ ),
+ )
+
+ def get_excluded_fields(self, model_name):
+ """Get the list of excluded fields for a specific model from pghistory configuration."""
+ excluded_fields_map = {
+ "Dojo_User": ["password"],
+ "Product": ["updated"],
+ "Cred_User": ["password"],
+ "Notification_Webhooks": ["header_name", "header_value"],
+ }
+ return excluded_fields_map.get(model_name, [])
+
+ def process_model_simple(self, model_name, batch_size, dry_run):
+ """Process a single model using direct SQL INSERT - much simpler!"""
+ try:
+ # Get table names
+ table_name, event_table_name = self.get_table_names(model_name)
+
+ if not table_name or not event_table_name:
+ self.stdout.write(f" Skipping {model_name}: table not found")
+ return 0, 0.0
+
+ # Check if event table exists
+ with connection.cursor() as cursor:
+ cursor.execute("""
+ SELECT EXISTS (
+ SELECT 1 FROM information_schema.tables
+ WHERE table_name = %s
+ )
+ """, [event_table_name])
+ if not cursor.fetchone()[0]:
+ self.stdout.write(f" Skipping {model_name}: event table {event_table_name} not found")
+ return 0, 0.0
+
+ # Get counts
+ with connection.cursor() as cursor:
+ cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
+ total_count = cursor.fetchone()[0]
+
+ cursor.execute(f"""
+ SELECT COUNT(*) FROM {table_name} t
+ WHERE NOT EXISTS (
+ SELECT 1 FROM {event_table_name} e
+ WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_import'
+ )
+ """)
+ backfill_count = cursor.fetchone()[0]
+
+ if backfill_count == 0:
+ self.stdout.write(f" No records need backfill for {model_name}")
+ return 0, 0.0
+
+ self.stdout.write(f" {backfill_count:,} records need backfill out of {total_count:,} total")
+
+ if dry_run:
+ self.stdout.write(f" [DRY RUN] Would process {backfill_count:,} records")
+ return backfill_count, 0.0
+
+ # Get source columns (excluding pghistory-specific ones)
+ excluded_fields = self.get_excluded_fields(model_name)
+ with connection.cursor() as cursor:
+ cursor.execute("""
+ SELECT column_name
+ FROM information_schema.columns
+ WHERE table_name = %s
+ ORDER BY ordinal_position
+ """, [table_name])
+ source_columns = [row[0] for row in cursor.fetchall()]
+
+ # Filter out excluded fields
+ source_columns = [col for col in source_columns if col not in excluded_fields]
+
+ # Get event table columns (excluding pgh_id which is auto-generated)
+ with connection.cursor() as cursor:
+ cursor.execute("""
+ SELECT column_name
+ FROM information_schema.columns
+ WHERE table_name = %s AND column_name != 'pgh_id'
+ ORDER BY ordinal_position
+ """, [event_table_name])
+ event_columns = [row[0] for row in cursor.fetchall()]
+
+ # Build the INSERT query - this is the magic!
+ # We use INSERT INTO ... SELECT to directly generate the event data
+ select_columns = []
+ for col in event_columns:
+ if col == "pgh_created_at":
+ select_columns.append("NOW() as pgh_created_at")
+ elif col == "pgh_label":
+ select_columns.append("'initial_import' as pgh_label")
+ elif col == "pgh_obj_id":
+ select_columns.append("t.id as pgh_obj_id")
+ elif col == "pgh_context_id":
+ select_columns.append("NULL as pgh_context_id")
+ elif col in source_columns:
+ select_columns.append(f"t.{col}")
+ else:
+ select_columns.append("NULL as " + col)
+
+ # Get all IDs that need backfill
+ with connection.cursor() as cursor:
+ cursor.execute(f"""
+ SELECT t.id FROM {table_name} t
+ WHERE NOT EXISTS (
+ SELECT 1 FROM {event_table_name} e
+ WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_import'
+ )
+ ORDER BY t.id
+ """)
+ ids_to_process = [row[0] for row in cursor.fetchall()]
+
+ if not ids_to_process:
+ self.stdout.write(" No records need backfill")
+ return 0, 0.0
+
+ # Process in batches using direct SQL
+ processed = 0
+ model_start_time = time.time()
+
+ for i in range(0, len(ids_to_process), batch_size):
+ batch_ids = ids_to_process[i:i + batch_size]
+
+ # Log progress every 10 batches
+ if i > 0 and i % (batch_size * 10) == 0:
+ self.stdout.write(f" Processing batch starting at index {i:,}...")
+
+ # The magic happens here - direct SQL INSERT!
+ insert_sql = f"""
+ INSERT INTO {event_table_name} ({', '.join(event_columns)})
+ SELECT {', '.join(select_columns)}
+ FROM {table_name} t
+ WHERE t.id = ANY(%s)
+ ORDER BY t.id
+ """
+
+ with connection.cursor() as cursor:
+ cursor.execute(insert_sql, [batch_ids])
+ batch_processed = cursor.rowcount
+ processed += batch_processed
+
+ # Log progress every 10 batches
+ if i > 0 and i % (batch_size * 10) == 0:
+ progress = (i + batch_size) / len(ids_to_process) * 100
+ self.stdout.write(f" Processed {processed:,}/{backfill_count:,} records ({progress:.1f}%)")
+
+ # Calculate timing
+ model_end_time = time.time()
+ total_duration = model_end_time - model_start_time
+ records_per_second = processed / total_duration if total_duration > 0 else 0
+
+ self.stdout.write(
+ self.style.SUCCESS(
+ f" ✓ Completed {model_name}: {processed:,} records in {total_duration:.2f}s "
+ f"({records_per_second:.1f} records/sec)",
+ ),
+ )
+
+ return processed, records_per_second # noqa: TRY300
+
+ except Exception as e:
+ self.stdout.write(
+ self.style.ERROR(f" ✗ Failed to process {model_name}: {e}"),
+ )
+ logger.exception(f"Error processing {model_name}")
+ return 0, 0.0
+
+ def get_table_names(self, model_name):
+ """Get the actual table names for a model using Django's model metadata."""
+ try:
+ # Get the Django model
+ Model = apps.get_model("dojo", model_name)
+ table_name = Model._meta.db_table
+
+ # Get the corresponding Event model
+ event_table_name = f"{model_name}Event"
+ EventModel = apps.get_model("dojo", event_table_name)
+ event_table_name = EventModel._meta.db_table
+
+ return table_name, event_table_name # noqa: TRY300
+ except LookupError:
+ # Model not found, return None
+ return None, None
diff --git a/dojo/middleware.py b/dojo/middleware.py
index aa954373c1c..5d63b1a35a0 100644
--- a/dojo/middleware.py
+++ b/dojo/middleware.py
@@ -254,17 +254,19 @@ def _close_search_context(self, request):
total_instances = sum(len(pk_list) for pk_list in captured_tasks.values())
threshold = getattr(settings, "WATSON_ASYNC_INDEX_UPDATE_THRESHOLD", 100)
- # If threshold is below 0, async updating is disabled
- if threshold < 0:
- logger.debug(f"AsyncSearchContextMiddleware: Async updating disabled (threshold={threshold}), using synchronous update")
- elif total_instances > threshold:
- logger.debug(f"AsyncSearchContextMiddleware: {total_instances} instances > {threshold} threshold, triggering async update")
- self._trigger_async_index_update(captured_tasks)
- # Invalidate to prevent synchronous index update by super()._close_search_context()
- search_context_manager.invalidate()
- else:
- logger.debug(f"AsyncSearchContextMiddleware: {total_instances} instances <= {threshold} threshold, using synchronous update")
- # Let watson handle synchronous update for small numbers
+ # only needed when at least one model instance is updated
+ if total_instances > 0:
+ # If threshold is below 0, async updating is disabled
+ if threshold < 0:
+ logger.debug(f"AsyncSearchContextMiddleware: Async updating disabled (threshold={threshold}), using synchronous update")
+ elif total_instances > threshold:
+ logger.debug(f"AsyncSearchContextMiddleware: {total_instances} instances > {threshold} threshold, triggering async update")
+ self._trigger_async_index_update(captured_tasks)
+ # Invalidate to prevent synchronous index update by super()._close_search_context()
+ search_context_manager.invalidate()
+ else:
+ logger.debug(f"AsyncSearchContextMiddleware: {total_instances} instances <= {threshold} threshold, using synchronous update")
+ # Let watson handle synchronous update for small numbers
super()._close_search_context(request)
diff --git a/dojo/templates/dojo/action_history.html b/dojo/templates/dojo/action_history.html
index c2c5e822e22..f3867024943 100644
--- a/dojo/templates/dojo/action_history.html
+++ b/dojo/templates/dojo/action_history.html
@@ -78,18 +78,22 @@
{{ field }}:
- {% if values.0 %}
- {{ values.0|truncatechars:50 }}
+ {% if values.0 is None %}
+ None
+ {% elif values.0 == "" %}
+ ''
{% else %}
- empty
+ {{ values.0|truncatechars:50 }}
{% endif %}
to
- {% if values.1 %}
- {{ values.1|truncatechars:50 }}
+ {% if values.1 is None %}
+ None
+ {% elif values.1 == "" %}
+ ''
{% else %}
- empty
+ {{ values.1|truncatechars:50 }}
{% endif %}
diff --git a/dojo/tools/acunetix/parse_acunetix_xml.py b/dojo/tools/acunetix/parse_acunetix_xml.py
index 1f7d76d4712..e5df13ae730 100644
--- a/dojo/tools/acunetix/parse_acunetix_xml.py
+++ b/dojo/tools/acunetix/parse_acunetix_xml.py
@@ -67,7 +67,7 @@ def get_findings(self, filename, test):
root = parse(filename).getroot()
for scan in root.findall("Scan"):
start_url = scan.findtext("StartURL")
- if ":" not in start_url:
+ if "://" not in start_url:
start_url = "//" + start_url
# get report date
if scan.findtext("StartTime") and scan.findtext("StartTime"):
diff --git a/dojo/user/views.py b/dojo/user/views.py
index 5e1682bbf42..603eb2e0db4 100644
--- a/dojo/user/views.py
+++ b/dojo/user/views.py
@@ -175,7 +175,7 @@ def logout_view(request):
@user_passes_test(lambda u: u.is_active)
def alerts(request):
- alerts = Alerts.objects.filter(user_id=request.user)
+ alerts = Alerts.objects.filter(user_id=request.user).order_by("-id")
if request.method == "POST":
removed_alerts = request.POST.getlist("alert_select")
@@ -194,7 +194,7 @@ def alerts(request):
def delete_alerts(request):
- alerts = Alerts.objects.filter(user_id=request.user)
+ alerts = Alerts.objects.filter(user_id=request.user).order_by("-id")
if request.method == "POST":
alerts.filter().delete()
diff --git a/helm/defectdojo/Chart.yaml b/helm/defectdojo/Chart.yaml
index 6401f7bb41a..510e34f9983 100644
--- a/helm/defectdojo/Chart.yaml
+++ b/helm/defectdojo/Chart.yaml
@@ -1,8 +1,8 @@
apiVersion: v2
-appVersion: "2.51.0"
+appVersion: "2.51.1"
description: A Helm chart for Kubernetes to install DefectDojo
name: defectdojo
-version: 1.7.0
+version: 1.7.1
icon: https://defectdojo.com/hubfs/DefectDojo_favicon.png
maintainers:
- name: madchap
@@ -18,5 +18,24 @@ dependencies:
repository: "oci://us-docker.pkg.dev/os-public-container-registry/defectdojo"
condition: redis.enabled
annotations:
- artifacthub.io/prerelease: "true"
- artifacthub.io/changes: ""
+ # For correct syntax, check https://artifacthub.io/docs/topics/annotations/helm/
+ # This is example for "artifacthub.io/changes"
+ # artifacthub.io/changes: |
+ # - kind: added
+ # description: Cool feature
+ # - kind: fixed
+ # description: Minor bug
+ # - kind: changed
+ # description: Broken feature
+ # - kind: removed
+ # description: Old bug
+ # - kind: deprecated
+ # description: Not-needed feature
+ # - kind: security
+ # description: Critical bug
+ artifacthub.io/prerelease: "false"
+ artifacthub.io/changes: |
+ - kind: added
+ description: Add support for automountServiceAccountToken
+ - kind: changed
+ description: Bump DefectDojo to 2.51.1
diff --git a/helm/defectdojo/README.md b/helm/defectdojo/README.md
index 3db53e1cb21..6fd4cdc2a2a 100644
--- a/helm/defectdojo/README.md
+++ b/helm/defectdojo/README.md
@@ -495,7 +495,7 @@ kubectl delete pvc data-defectdojo-redis-0 data-defectdojo-postgresql-0
# General information about chart values
- 
+ 
A Helm chart for Kubernetes to install DefectDojo
@@ -528,6 +528,7 @@ A Helm chart for Kubernetes to install DefectDojo
| celery.annotations | object | `{}` | |
| celery.beat.affinity | object | `{}` | |
| celery.beat.annotations | object | `{}` | |
+| celery.beat.automountServiceAccountToken | bool | `false` | |
| celery.beat.extraEnv | list | `[]` | |
| celery.beat.extraInitContainers | list | `[]` | |
| celery.beat.extraVolumeMounts | list | `[]` | |
@@ -548,6 +549,7 @@ A Helm chart for Kubernetes to install DefectDojo
| celery.worker.affinity | object | `{}` | |
| celery.worker.annotations | object | `{}` | |
| celery.worker.appSettings.poolType | string | `"solo"` | |
+| celery.worker.automountServiceAccountToken | bool | `false` | |
| celery.worker.extraEnv | list | `[]` | |
| celery.worker.extraInitContainers | list | `[]` | |
| celery.worker.extraVolumeMounts | list | `[]` | |
@@ -582,6 +584,7 @@ A Helm chart for Kubernetes to install DefectDojo
| disableHooks | bool | `false` | |
| django.affinity | object | `{}` | |
| django.annotations | object | `{}` | |
+| django.automountServiceAccountToken | bool | `false` | |
| django.extraInitContainers | list | `[]` | |
| django.extraVolumes | list | `[]` | |
| django.ingress.activateTLS | bool | `true` | |
@@ -656,6 +659,7 @@ A Helm chart for Kubernetes to install DefectDojo
| imagePullSecrets | string | `nil` | |
| initializer.affinity | object | `{}` | |
| initializer.annotations | object | `{}` | |
+| initializer.automountServiceAccountToken | bool | `false` | |
| initializer.extraEnv | list | `[]` | |
| initializer.extraVolumeMounts | list | `[]` | |
| initializer.extraVolumes | list | `[]` | |
@@ -722,6 +726,7 @@ A Helm chart for Kubernetes to install DefectDojo
| serviceAccount.create | bool | `true` | |
| serviceAccount.labels | object | `{}` | |
| tag | string | `"latest"` | |
+| tests.unitTests.automountServiceAccountToken | bool | `false` | |
| tests.unitTests.resources.limits.cpu | string | `"500m"` | |
| tests.unitTests.resources.limits.memory | string | `"512Mi"` | |
| tests.unitTests.resources.requests.cpu | string | `"100m"` | |
diff --git a/helm/defectdojo/templates/celery-beat-deployment.yaml b/helm/defectdojo/templates/celery-beat-deployment.yaml
index 166f6c2afeb..4e5b4833331 100644
--- a/helm/defectdojo/templates/celery-beat-deployment.yaml
+++ b/helm/defectdojo/templates/celery-beat-deployment.yaml
@@ -52,6 +52,7 @@ spec:
{{- end }}
spec:
serviceAccountName: {{ include "defectdojo.serviceAccountName" . }}
+ automountServiceAccountToken: {{ .Values.celery.beat.automountServiceAccountToken }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
- name: {{ . }}
diff --git a/helm/defectdojo/templates/celery-worker-deployment.yaml b/helm/defectdojo/templates/celery-worker-deployment.yaml
index ce4881094e9..68a9cfdf077 100644
--- a/helm/defectdojo/templates/celery-worker-deployment.yaml
+++ b/helm/defectdojo/templates/celery-worker-deployment.yaml
@@ -52,6 +52,7 @@ spec:
{{- end }}
spec:
serviceAccountName: {{ include "defectdojo.serviceAccountName" . }}
+ automountServiceAccountToken: {{ .Values.celery.worker.automountServiceAccountToken }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
- name: {{ . }}
diff --git a/helm/defectdojo/templates/django-deployment.yaml b/helm/defectdojo/templates/django-deployment.yaml
index fb77e8f7e88..63f977bcbaf 100644
--- a/helm/defectdojo/templates/django-deployment.yaml
+++ b/helm/defectdojo/templates/django-deployment.yaml
@@ -59,6 +59,7 @@ spec:
{{- end }}
spec:
serviceAccountName: {{ include "defectdojo.serviceAccountName" . }}
+ automountServiceAccountToken: {{ .Values.django.automountServiceAccountToken }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
- name: {{ quote . }}
diff --git a/helm/defectdojo/templates/initializer-job.yaml b/helm/defectdojo/templates/initializer-job.yaml
index 668812d1a08..aa4bff0cbd7 100644
--- a/helm/defectdojo/templates/initializer-job.yaml
+++ b/helm/defectdojo/templates/initializer-job.yaml
@@ -39,6 +39,7 @@ spec:
{{- end }}
spec:
serviceAccountName: {{ include "defectdojo.serviceAccountName" . }}
+ automountServiceAccountToken: {{ .Values.initializer.automountServiceAccountToken }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
- name: {{ . }}
diff --git a/helm/defectdojo/templates/tests/unit-tests.yaml b/helm/defectdojo/templates/tests/unit-tests.yaml
index 2f390733b22..08939429008 100644
--- a/helm/defectdojo/templates/tests/unit-tests.yaml
+++ b/helm/defectdojo/templates/tests/unit-tests.yaml
@@ -12,6 +12,7 @@ metadata:
helm.sh/hook: test-success
spec:
serviceAccountName: {{ include "defectdojo.serviceAccountName" . }}
+ automountServiceAccountToken: {{ .Values.tests.unitTests.automountServiceAccountToken }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
- name: {{ . }}
diff --git a/helm/defectdojo/values.schema.json b/helm/defectdojo/values.schema.json
index 93e7b3915ff..3d899e176e0 100644
--- a/helm/defectdojo/values.schema.json
+++ b/helm/defectdojo/values.schema.json
@@ -49,6 +49,9 @@
"annotations": {
"type": "object"
},
+ "automountServiceAccountToken": {
+ "type": "boolean"
+ },
"extraEnv": {
"type": "array"
},
@@ -134,6 +137,9 @@
}
}
},
+ "automountServiceAccountToken": {
+ "type": "boolean"
+ },
"extraEnv": {
"type": "array"
},
@@ -288,6 +294,9 @@
"annotations": {
"type": "object"
},
+ "automountServiceAccountToken": {
+ "type": "boolean"
+ },
"extraInitContainers": {
"type": "array"
},
@@ -616,6 +625,9 @@
"annotations": {
"type": "object"
},
+ "automountServiceAccountToken": {
+ "type": "boolean"
+ },
"extraEnv": {
"type": "array"
},
@@ -968,6 +980,9 @@
"unitTests": {
"type": "object",
"properties": {
+ "automountServiceAccountToken": {
+ "type": "boolean"
+ },
"resources": {
"type": "object",
"properties": {
diff --git a/helm/defectdojo/values.yaml b/helm/defectdojo/values.yaml
index 8415ea73067..dd47f65eea4 100644
--- a/helm/defectdojo/values.yaml
+++ b/helm/defectdojo/values.yaml
@@ -114,6 +114,7 @@ dbMigrationChecker:
tests:
unitTests:
+ automountServiceAccountToken: false
resources:
requests:
cpu: 100m
@@ -153,6 +154,7 @@ celery:
# Common annotations to worker and beat deployments and pods.
annotations: {}
beat:
+ automountServiceAccountToken: false
# Annotations for the Celery beat deployment.
annotations: {}
affinity: {}
@@ -192,6 +194,7 @@ celery:
startupProbe: {}
tolerations: []
worker:
+ automountServiceAccountToken: false
# Annotations for the Celery worker deployment.
annotations: {}
affinity: {}
@@ -241,6 +244,7 @@ celery:
# prefetchMultiplier: 128
django:
+ automountServiceAccountToken: false
annotations: {}
service:
annotations: {}
@@ -364,6 +368,7 @@ django:
initializer:
run: true
+ automountServiceAccountToken: false
jobAnnotations: {}
annotations: {}
labels: {}
diff --git a/requirements.txt b/requirements.txt
index 2c61575c349..94021e1365e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,7 +18,7 @@ django_extensions==4.1
django-slack==5.19.0
django-watson==1.6.3
django-prometheus==2.4.1
-Django==5.1.12
+Django==5.1.13
djangorestframework==3.16.1
html2text==2025.4.15
humanize==4.13.0
diff --git a/unittests/dojo_test_case.py b/unittests/dojo_test_case.py
index 5be0e1a5e3e..4818dd798ce 100644
--- a/unittests/dojo_test_case.py
+++ b/unittests/dojo_test_case.py
@@ -499,7 +499,7 @@ def __init__(self, *args, **kwargs):
def login_as_admin(self):
testuser = self.get_test_admin()
- token = Token.objects.get(user=testuser)
+ token, _ = Token.objects.get_or_create(user=testuser)
self.client = APIClient()
self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key)
diff --git a/unittests/scans/acunetix/XML_http_example_co_id_port_num.xml b/unittests/scans/acunetix/XML_http_example_co_id_port_num.xml
new file mode 100644
index 00000000000..2dcb4509bcd
--- /dev/null
+++ b/unittests/scans/acunetix/XML_http_example_co_id_port_num.xml
@@ -0,0 +1,1443 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Form action: /h/search
Form method: GET
Form inputs:
- sq [unknown]
- incShared [checkbox]
- search [submit]
]]>
+
+ ]]>
+
+
+
+
+
+
+
+CSRF is a type of 'confused deputy' attack which leverages the authentication and authorization of the victim when the forged request is being sent to the web server. Therefore, if a CSRF vulnerability could affect highly privileged users such as administrators full application compromise may be possible.]]>
+ This alert requires manual confirmation
+Cross-Site Request Forgery (CSRF, or XSRF) is a vulnerability wherein an attacker tricks a victim into making a request the victim did not intend to make. Therefore, with CSRF, an attacker abuses the trust a web application has with a victim's browser.
+Acunetix found an HTML form with no apparent anti-CSRF protection implemented. Consult the 'Attack details' section for more information about the affected HTML form.]]>
+
Upon sending an HTTP request (legitimate or otherwise), the victim's browser will include the Cookie header. Cookies are typically used to store a user's session identifier in order to prevent the user from authenticating for each request, which would obviously be impractical.
To such an extent, if the victim's authentication session is stored in a Cookie, and is still valid (a browser window/tab does not necessarily need to be open), if the application is vulnerable to CSRF, an attacker can leverage CSRF to launch any desired requests against the website, without the website being able to distinguish whether the requests are legitimate or not.
CSRF in GET requests
The following is a simple example of how CSRF can be abused in GET requests through the use of the <img> tag.
<img src="http://example.com/changePassword/?newPassword=attackerPassword">
The above is a CSRF attack using an HTTP GET request. If a victim visits a web page controlled by an attacker with the following payload, the browser will send a request containing the Cookie to the attacker crafted URL.
CSRF in GET requests
GET requests, however are not the only HTTP method an attacker can abuse. POST requests are equally susceptible to CSRF, however, an attacker will need to make use of a little bit of JavaScript to submit the POST request.
The following is a simple example of how CSRF can be abused POST requests through the use of an <iframe> tag. This code would be loaded in an iFrame which is made invisible to the victim.
iFrame <iframe src="http://attacker.com/csrfAttack" style="width:0;height:0;border:0;border:none;"></iframe>
iFrame Contents <body onload="document.getElementById('csrf').submit()"> <form id="csrf" action="http://example.com/changePassword" method="POST"> <input name="newPassword" value="attackerPassword" /> </form> </body> ]]>
+
+The recommended and the most widely used technique for preventing CSRF attacks is know as an anti-CSRF token, also sometimes referred to as a synchronizer token. The characteristics of a well designed anti-CSRF system involve the following attributes.
+
+ - The anti-CSRF token should be unique for each user session
+ - The session should automatically expire after a suitable amount of time
+ - The anti-CSRF token should be a cryptographically random value of significant length
+ - The anti-CSRF token should be cryptographically secure, that is, generated by a strong Pseudo-Random Number Generator (PRNG) algorithm
+ - The anti-CSRF token is added as a hidden field for forms, or within URLs (only necessary if GET requests cause state changes, that is, GET requests are not idempotent)
+ - The server should reject the requested action if the anti-CSRF token fails validation
+
+When a user submits a form or makes some other authenticated request that requires a Cookie, the anti-CSRF token should be included in the request. Then, the web application will then verify the existence and correctness of this token before processing the request. If the token is missing or incorrect, the request can be rejected.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Form action: zmain
Form method: POST
Form inputs:
- search [hidden]
- crumb [hidden]
- sq [text]
- actionSearch [submit]
- st [select]
]]>
+
+ ]]>
+
+
+
+
+
+
+
+CSRF is a type of 'confused deputy' attack which leverages the authentication and authorization of the victim when the forged request is being sent to the web server. Therefore, if a CSRF vulnerability could affect highly privileged users such as administrators full application compromise may be possible.]]>
+ This alert requires manual confirmation
+Cross-Site Request Forgery (CSRF, or XSRF) is a vulnerability wherein an attacker tricks a victim into making a request the victim did not intend to make. Therefore, with CSRF, an attacker abuses the trust a web application has with a victim's browser.
+Acunetix found an HTML form with no apparent anti-CSRF protection implemented. Consult the 'Attack details' section for more information about the affected HTML form.]]>
+
Upon sending an HTTP request (legitimate or otherwise), the victim's browser will include the Cookie header. Cookies are typically used to store a user's session identifier in order to prevent the user from authenticating for each request, which would obviously be impractical.
To such an extent, if the victim's authentication session is stored in a Cookie, and is still valid (a browser window/tab does not necessarily need to be open), if the application is vulnerable to CSRF, an attacker can leverage CSRF to launch any desired requests against the website, without the website being able to distinguish whether the requests are legitimate or not.
CSRF in GET requests
The following is a simple example of how CSRF can be abused in GET requests through the use of the <img> tag.
<img src="http://example.com/changePassword/?newPassword=attackerPassword">
The above is a CSRF attack using an HTTP GET request. If a victim visits a web page controlled by an attacker with the following payload, the browser will send a request containing the Cookie to the attacker crafted URL.
CSRF in GET requests
GET requests, however are not the only HTTP method an attacker can abuse. POST requests are equally susceptible to CSRF, however, an attacker will need to make use of a little bit of JavaScript to submit the POST request.
The following is a simple example of how CSRF can be abused POST requests through the use of an <iframe> tag. This code would be loaded in an iFrame which is made invisible to the victim.
iFrame <iframe src="http://attacker.com/csrfAttack" style="width:0;height:0;border:0;border:none;"></iframe>
iFrame Contents <body onload="document.getElementById('csrf').submit()"> <form id="csrf" action="http://example.com/changePassword" method="POST"> <input name="newPassword" value="attackerPassword" /> </form> </body> ]]>
+
+The recommended and the most widely used technique for preventing CSRF attacks is know as an anti-CSRF token, also sometimes referred to as a synchronizer token. The characteristics of a well designed anti-CSRF system involve the following attributes.
+
+ - The anti-CSRF token should be unique for each user session
+ - The session should automatically expire after a suitable amount of time
+ - The anti-CSRF token should be a cryptographically random value of significant length
+ - The anti-CSRF token should be cryptographically secure, that is, generated by a strong Pseudo-Random Number Generator (PRNG) algorithm
+ - The anti-CSRF token is added as a hidden field for forms, or within URLs (only necessary if GET requests cause state changes, that is, GET requests are not idempotent)
+ - The server should reject the requested action if the anti-CSRF token fails validation
+
+When a user submits a form or makes some other authenticated request that requires a Cookie, the anti-CSRF token should be included in the request. Then, the web application will then verify the existence and correctness of this token before processing the request. If the token is missing or incorrect, the request can be rejected.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Form action: zmain
Form method: POST
Form inputs:
- query [hidden]
- st [hidden]
- doFolderAction [hidden]
- parentid [hidden]
- crumb [hidden]
- sname [text]
- actionSaveSearch [submit]
]]>
+
+ ]]>
+
+
+
+
+
+
+
+CSRF is a type of 'confused deputy' attack which leverages the authentication and authorization of the victim when the forged request is being sent to the web server. Therefore, if a CSRF vulnerability could affect highly privileged users such as administrators full application compromise may be possible.]]>
+ This alert requires manual confirmation
+Cross-Site Request Forgery (CSRF, or XSRF) is a vulnerability wherein an attacker tricks a victim into making a request the victim did not intend to make. Therefore, with CSRF, an attacker abuses the trust a web application has with a victim's browser.
+Acunetix found an HTML form with no apparent anti-CSRF protection implemented. Consult the 'Attack details' section for more information about the affected HTML form.]]>
+
Upon sending an HTTP request (legitimate or otherwise), the victim's browser will include the Cookie header. Cookies are typically used to store a user's session identifier in order to prevent the user from authenticating for each request, which would obviously be impractical.
To such an extent, if the victim's authentication session is stored in a Cookie, and is still valid (a browser window/tab does not necessarily need to be open), if the application is vulnerable to CSRF, an attacker can leverage CSRF to launch any desired requests against the website, without the website being able to distinguish whether the requests are legitimate or not.
CSRF in GET requests
The following is a simple example of how CSRF can be abused in GET requests through the use of the <img> tag.
<img src="http://example.com/changePassword/?newPassword=attackerPassword">
The above is a CSRF attack using an HTTP GET request. If a victim visits a web page controlled by an attacker with the following payload, the browser will send a request containing the Cookie to the attacker crafted URL.
CSRF in GET requests
GET requests, however are not the only HTTP method an attacker can abuse. POST requests are equally susceptible to CSRF, however, an attacker will need to make use of a little bit of JavaScript to submit the POST request.
The following is a simple example of how CSRF can be abused POST requests through the use of an <iframe> tag. This code would be loaded in an iFrame which is made invisible to the victim.
iFrame <iframe src="http://attacker.com/csrfAttack" style="width:0;height:0;border:0;border:none;"></iframe>
iFrame Contents <body onload="document.getElementById('csrf').submit()"> <form id="csrf" action="http://example.com/changePassword" method="POST"> <input name="newPassword" value="attackerPassword" /> </form> </body> ]]>
+
+The recommended and the most widely used technique for preventing CSRF attacks is know as an anti-CSRF token, also sometimes referred to as a synchronizer token. The characteristics of a well designed anti-CSRF system involve the following attributes.
+
+ - The anti-CSRF token should be unique for each user session
+ - The session should automatically expire after a suitable amount of time
+ - The anti-CSRF token should be a cryptographically random value of significant length
+ - The anti-CSRF token should be cryptographically secure, that is, generated by a strong Pseudo-Random Number Generator (PRNG) algorithm
+ - The anti-CSRF token is added as a hidden field for forms, or within URLs (only necessary if GET requests cause state changes, that is, GET requests are not idempotent)
+ - The server should reject the requested action if the anti-CSRF token fails validation
+
+When a user submits a form or makes some other authenticated request that requires a Cookie, the anti-CSRF token should be included in the request. Then, the web application will then verify the existence and correctness of this token before processing the request. If the token is missing or incorrect, the request can be rejected.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10001 ms]]>
+
+
+
+
+
+
+
+
+
+
+Slowloris and Slow HTTP POST DoS attacks rely on the fact that the HTTP protocol, by design, requires requests to be completely received by the server before they are processed. If an HTTP request is not complete, or if the transfer rate is very low, the server keeps its resources busy waiting for the rest of the data. If the server keeps too many resources busy, this creates a denial of service.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ YUI version 2.7.0.
The version was detected from file content.
References: - http://www.cvedetails.com/cve/CVE-2010-4207/
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Content Security Policy (CSP) can be implemented by adding a Content-Security-Policy header. The value of this header is a string containing the policy directives describing your Content Security Policy. To implement CSP, you should define lists of allowed origins for the all of the types of resources that your site utilizes. For example, if you have a simple site that needs to load scripts, stylesheets, and images hosted locally, as well as from the jQuery library from their CDN, the CSP header could look like the following:
+
+Content-Security-Policy:
+ default-src 'self';
+ script-src 'self' https://code.jquery.com;
+
+
+It was detected that your web application doesn't implement Content Security Policy (CSP) as the CSP header is missing from the response. It's recommended to implement Content Security Policy (CSP) into your web application.]]>
+
+ Content-Security-Policy HTTP header to a web page and giving it values to control resources the user agent is allowed to load for that page. ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/unittests/scans/acunetix/many_findings_with_port_number.xml b/unittests/scans/acunetix/many_findings_with_port_number.xml
new file mode 100644
index 00000000000..2d4b517ae9b
--- /dev/null
+++ b/unittests/scans/acunetix/many_findings_with_port_number.xml
@@ -0,0 +1,335 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10000 ms]]>
+
+
+
+
+
+
+
+
+
+
+Slowloris and Slow HTTP POST DoS attacks rely on the fact that the HTTP protocol, by design, requires requests to be completely received by the server before they are processed. If an HTTP request is not complete, or if the transfer rate is very low, the server keeps its resources busy waiting for the rest of the data. If the server keeps too many resources busy, this creates a denial of service.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta.itsecgames.com
Response: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>itsecgames.com</title>
+ <meta name="description" content="bWAPP">
+ <meta name="keywords" content="bWAPP">
+</head>
+<frameset rows="100%,*" border="0">
+ <frame src="http://www.mmebvba.com/sites/bwapp/" frameborder="0" />
+</frameset>
+</html>
+
]]>
+
+
+
+
+
+
+
+
+
+
+This web server is responding differently when the Host header is manipulated and various common virtual hosts are tested. This could indicate there is a Virtual Host present.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Content Security Policy (CSP) can be implemented by adding a Content-Security-Policy header. The value of this header is a string containing the policy directives describing your Content Security Policy. To implement CSP, you should define lists of allowed origins for the all of the types of resources that your site utilizes. For example, if you have a simple site that needs to load scripts, stylesheets, and images hosted locally, as well as from the jQuery library from their CDN, the CSP header could look like the following:
+
+Content-Security-Policy:
+ default-src 'self';
+ script-src 'self' https://code.jquery.com;
+
+
+It was detected that your web application doesn't implement Content Security Policy (CSP) as the CSP header is missing from the response. It's recommended to implement Content Security Policy (CSP) into your web application.]]>
+
+ Content-Security-Policy HTTP header to a web page and giving it values to control resources the user agent is allowed to load for that page. ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/unittests/scans/acunetix/one_finding_with_port_num.xml b/unittests/scans/acunetix/one_finding_with_port_num.xml
new file mode 100644
index 00000000000..1d05c9ee211
--- /dev/null
+++ b/unittests/scans/acunetix/one_finding_with_port_num.xml
@@ -0,0 +1,43 @@
+
+
+
+ VijayTest
+ Vijay Short Name
+ https://vijaytest.com
+ 24/09/2018, 18:09:55
+ 24/09/2018, 21:42:41
+ 212 minutes, 4 seconds
+ False
+ True
+
+
+ Apache-Coyote/1.1
+
+
+ VijayReportItem1
+ VijayTestModule
+ Vijay Test
+
+
+
+
+
+
+ medium
+ csrf
+ Vijay Test Imapact
+ Vijay Test Detail information
+ Vijay Test Recommendation
+ Vijay Test Description
+
+
+
+
+
+ https://vijayref.com
+
+
+
+
+
+
\ No newline at end of file
diff --git a/unittests/scans/sarif/bash-report-subset-same-hash-code-same-unique-id.sarif b/unittests/scans/sarif/bash-report-subset-same-hash-code-same-unique-id.sarif
new file mode 100644
index 00000000000..2482ebd6888
--- /dev/null
+++ b/unittests/scans/sarif/bash-report-subset-same-hash-code-same-unique-id.sarif
@@ -0,0 +1,405 @@
+{
+ "runs": [
+ {
+ "tool": {
+ "driver": {
+ "name": "Shell Script Analysis",
+ "rules": [
+ {
+ "id": "2076",
+ "help": {
+ "text": "Don't quote right-hand side of =~, it'll match literally rather than as a regex.",
+ "markdown": "Don't quote right-hand side of =~, it'll match literally rather than as a regex."
+ },
+ "name": "",
+ "properties": {
+ "tags": [
+ "Scan"
+ ],
+ "precision": "high"
+ },
+ "defaultConfiguration": {
+ "level": "note"
+ },
+ "fullDescription": {
+ "text": "Don't quote right-hand side of =~, it'll match literally rather than as a regex."
+ },
+ "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC2076",
+ "shortDescription": {
+ "text": "Don't quote right-hand side of =~, it'll match literally rather than as a regex."
+ }
+ },
+ {
+ "id": "2071",
+ "help": {
+ "text": "> is for string comparisons. Use -gt instead.",
+ "markdown": "> is for string comparisons. Use -gt instead."
+ },
+ "name": "",
+ "properties": {
+ "tags": [
+ "Scan"
+ ],
+ "precision": "high"
+ },
+ "defaultConfiguration": {
+ "level": "note"
+ },
+ "fullDescription": {
+ "text": "> is for string comparisons. Use -gt instead."
+ },
+ "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC2071",
+ "shortDescription": {
+ "text": "> is for string comparisons"
+ }
+ },
+ {
+ "id": "2072",
+ "help": {
+ "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare.",
+ "markdown": "Decimals are not supported. Either use integers only, or use bc or awk to compare."
+ },
+ "name": "",
+ "properties": {
+ "tags": [
+ "Scan"
+ ],
+ "precision": "high"
+ },
+ "defaultConfiguration": {
+ "level": "note"
+ },
+ "fullDescription": {
+ "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare."
+ },
+ "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC2072",
+ "shortDescription": {
+ "text": "Decimals are not supported"
+ }
+ },
+ {
+ "id": "2077",
+ "help": {
+ "text": "You need spaces around the comparison operator.",
+ "markdown": "You need spaces around the comparison operator."
+ },
+ "name": "",
+ "properties": {
+ "tags": [
+ "Scan"
+ ],
+ "precision": "high"
+ },
+ "defaultConfiguration": {
+ "level": "note"
+ },
+ "fullDescription": {
+ "text": "You need spaces around the comparison operator."
+ },
+ "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC2077",
+ "shortDescription": {
+ "text": "You need spaces around the comparison operator."
+ }
+ },
+ {
+ "id": "1035",
+ "help": {
+ "text": "You are missing a required space here.",
+ "markdown": "You are missing a required space here."
+ },
+ "name": "",
+ "properties": {
+ "tags": [
+ "Scan"
+ ],
+ "precision": "high"
+ },
+ "defaultConfiguration": {
+ "level": "note"
+ },
+ "fullDescription": {
+ "text": "You are missing a required space here."
+ },
+ "helpUri": "https://github.com/koalaman/shellcheck/wiki/SC1035",
+ "shortDescription": {
+ "text": "You are missing a required space here."
+ }
+ }
+ ],
+ "version": "1.0.0-scan",
+ "fullName": "Shell Script Analysis"
+ }
+ },
+ "conversion": {
+ "tool": {
+ "driver": {
+ "name": "@ShiftLeft/sast-scan"
+ }
+ },
+ "invocation": {
+ "arguments": [
+ "-a",
+ "--shell=bash",
+ "-f",
+ "json",
+ "-S",
+ "error",
+ "--color=never",
+ "/app/legacy-setup.bash",
+ "/app/test.sh",
+ "/app/upgrade.bash",
+ "/app/entrypoint_scripts/os/ubuntu.sh",
+ "/app/entrypoint_scripts/os/linux.sh",
+ "/app/entrypoint_scripts/common/config-vars.sh",
+ "/app/entrypoint_scripts/common/install-dojo.sh",
+ "/app/entrypoint_scripts/common/common-os.sh",
+ "/app/entrypoint_scripts/common/dojo-shared-resources.sh",
+ "/app/entrypoint_scripts/common/cmd-args.sh",
+ "/app/entrypoint_scripts/common/prompt.sh",
+ "/app/entrypoint_scripts/run/startup-docker.bash",
+ "/app/entrypoint_scripts/run/run-local-dojo.bash",
+ "/app/setup/setup.bash",
+ "/app/setup/upgrade.bash",
+ "/app/setup/scripts/os/ubuntu.sh",
+ "/app/setup/scripts/os/linux.sh",
+ "/app/setup/scripts/common/config-vars.sh",
+ "/app/setup/scripts/common/install-dojo.sh",
+ "/app/setup/scripts/common/common-os.sh",
+ "/app/setup/scripts/common/dojo-shared-resources.sh",
+ "/app/setup/scripts/common/cmd-args.sh",
+ "/app/setup/scripts/common/prompt.sh",
+ "/app/setup/scripts/run/startup-docker.bash",
+ "/app/setup/scripts/run/run-local-dojo.bash",
+ "/app/docker/entrypoint-uwsgi-dev.sh",
+ "/app/docker/entrypoint.sh",
+ "/app/docker/entrypoint-uwsgi.sh",
+ "/app/docker/entrypoint-uwsgi-ptvsd.sh",
+ "/app/docker/wait-for-it.sh",
+ "/app/docker/entrypoint-celery.sh",
+ "/app/docker/entrypoint-unit-tests.sh",
+ "/app/docker/entrypoint-nginx.sh",
+ "/app/docker/dojo-data.bash",
+ "/app/docker/entrypoint-unit-tests-devDocker.sh",
+ "/app/docker/setEnv.sh",
+ "/app/docker/entrypoint-celery-worker.sh",
+ "/app/docker/entrypoint-initializer.sh",
+ "/app/docker/entrypoint-celery-beat.sh",
+ "/app/docker/entrypoint-integration-tests.sh",
+ "/app/docker/unit-tests.sh"
+ ],
+ "executionSuccessful": true,
+ "commandLine": "-a --shell=bash -f json -S error --color=never /app/legacy-setup.bash /app/test.sh /app/upgrade.bash /app/entrypoint_scripts/os/ubuntu.sh /app/entrypoint_scripts/os/linux.sh /app/entrypoint_scripts/common/config-vars.sh /app/entrypoint_scripts/common/install-dojo.sh /app/entrypoint_scripts/common/common-os.sh /app/entrypoint_scripts/common/dojo-shared-resources.sh /app/entrypoint_scripts/common/cmd-args.sh /app/entrypoint_scripts/common/prompt.sh /app/entrypoint_scripts/run/startup-docker.bash /app/entrypoint_scripts/run/run-local-dojo.bash /app/setup/setup.bash /app/setup/upgrade.bash /app/setup/scripts/os/ubuntu.sh /app/setup/scripts/os/linux.sh /app/setup/scripts/common/config-vars.sh /app/setup/scripts/common/install-dojo.sh /app/setup/scripts/common/common-os.sh /app/setup/scripts/common/dojo-shared-resources.sh /app/setup/scripts/common/cmd-args.sh /app/setup/scripts/common/prompt.sh /app/setup/scripts/run/startup-docker.bash /app/setup/scripts/run/run-local-dojo.bash /app/docker/entrypoint-uwsgi-dev.sh /app/docker/entrypoint.sh /app/docker/entrypoint-uwsgi.sh /app/docker/entrypoint-uwsgi-ptvsd.sh /app/docker/wait-for-it.sh /app/docker/entrypoint-celery.sh /app/docker/entrypoint-unit-tests.sh /app/docker/entrypoint-nginx.sh /app/docker/dojo-data.bash /app/docker/entrypoint-unit-tests-devDocker.sh /app/docker/setEnv.sh /app/docker/entrypoint-celery-worker.sh /app/docker/entrypoint-initializer.sh /app/docker/entrypoint-celery-beat.sh /app/docker/entrypoint-integration-tests.sh /app/docker/unit-tests.sh",
+ "endTimeUtc": "2021-03-08T15:39:40Z",
+ "workingDirectory": {
+ "uri": "file:///home/damien/dd"
+ }
+ }
+ },
+ "invocations": [
+ {
+ "executionSuccessful": true,
+ "endTimeUtc": "2021-03-08T15:39:40Z",
+ "workingDirectory": {
+ "uri": "file:///home/damien/dd"
+ }
+ }
+ ],
+ "properties": {
+ "metrics": {
+ "total": 27,
+ "critical": 0,
+ "high": 0,
+ "medium": 0,
+ "low": 27
+ }
+ },
+ "results": [
+ {
+ "message": {
+ "markdown": "",
+ "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare."
+ },
+ "level": "note",
+ "locations": [
+ {
+ "physicalLocation": {
+ "region": {
+ "snippet": {
+ "text": " if [[ \"$PYV\"<\"2.7\" ]]; then\n"
+ },
+ "startLine": 143
+ },
+ "artifactLocation": {
+ "uri": "file:///home/damien/dd/entrypoint_scripts/common/dojo-shared-resources.sh"
+ },
+ "contextRegion": {
+ "snippet": {
+ "text": " PYV=`python -c \"import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)\";`\n if [[ \"$PYV\"<\"2.7\" ]]; then\n"
+ },
+ "endLine": 143,
+ "startLine": 142
+ }
+ }
+ }
+ ],
+ "properties": {
+ "issue_confidence": "MEDIUM",
+ "issue_severity": "LOW",
+ "issue_tags": {}
+ },
+ "baselineState": "new",
+ "partialFingerprints": {
+ "scanPrimaryLocationHash": "4d655189c485c086",
+ "scanFileHash": "4ee28649c65c392d"
+ },
+ "ruleId": "2072",
+ "ruleIndex": 2
+ },
+ {
+ "message": {
+ "markdown": "",
+ "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare."
+ },
+ "level": "note",
+ "locations": [
+ {
+ "physicalLocation": {
+ "region": {
+ "snippet": {
+ "text": " if [[ \"$PYV\"<\"2.7\" ]]; then\n"
+ },
+ "startLine": 142
+ },
+ "artifactLocation": {
+ "uri": "file:///home/damien/dd/setup/scripts/common/dojo-shared-resources.sh"
+ },
+ "contextRegion": {
+ "snippet": {
+ "text": " PYV=`python -c \"import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)\";`\n if [[ \"$PYV\"<\"2.7\" ]]; then\n"
+ },
+ "endLine": 143,
+ "startLine": 141
+ }
+ }
+ }
+ ],
+ "properties": {
+ "issue_confidence": "MEDIUM",
+ "issue_severity": "LOW",
+ "issue_tags": {}
+ },
+ "baselineState": "new",
+ "partialFingerprints": {
+ "scanPrimaryLocationHash": "4d655189c485c086",
+ "scanFileHash": "4ee28649c65c392d"
+ },
+ "ruleId": "2072",
+ "ruleIndex": 2
+ },
+ {
+ "message": {
+ "markdown": "",
+ "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare."
+ },
+ "level": "note",
+ "locations": [
+ {
+ "physicalLocation": {
+ "region": {
+ "snippet": {
+ "text": " if [[ \"$PYV\"<\"2.7\" ]]; then\n"
+ },
+ "startLine": 143
+ },
+ "artifactLocation": {
+ "uri": "file:///home/damien/dd/entrypoint_scripts/common/dojo-shared-resources.sh"
+ },
+ "contextRegion": {
+ "snippet": {
+ "text": " PYV=`python -c \"import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)\";`\n if [[ \"$PYV\"<\"2.7\" ]]; then\n"
+ },
+ "endLine": 143,
+ "startLine": 142
+ }
+ }
+ }
+ ],
+ "properties": {
+ "issue_confidence": "MEDIUM",
+ "issue_severity": "LOW",
+ "issue_tags": {}
+ },
+ "baselineState": "new",
+ "partialFingerprints": {
+ "scanPrimaryLocationHash": "4d655189c485c086",
+ "scanFileHash": "4ee28649c65c392d"
+ },
+ "ruleId": "2072",
+ "ruleIndex": 2
+ },
+ {
+ "message": {
+ "markdown": "",
+ "text": "Decimals are not supported. Either use integers only, or use bc or awk to compare."
+ },
+ "level": "note",
+ "locations": [
+ {
+ "physicalLocation": {
+ "region": {
+ "snippet": {
+ "text": " if [[ \"$PYV\"<\"2.7\" ]]; then\n"
+ },
+ "startLine": 143
+ },
+ "artifactLocation": {
+ "uri": "file:///home/damien/dd/entrypoint_scripts/common/dojo-shared-resources.sh"
+ },
+ "contextRegion": {
+ "snippet": {
+ "text": " PYV=`python -c \"import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)\";`\n if [[ \"$PYV\"<\"2.7\" ]]; then\n"
+ },
+ "endLine": 143,
+ "startLine": 143
+ }
+ }
+ }
+ ],
+ "properties": {
+ "issue_confidence": "MEDIUM",
+ "issue_severity": "LOW",
+ "issue_tags": {}
+ },
+ "baselineState": "new",
+ "partialFingerprints": {
+ "scanPrimaryLocationHash": "4d655189c485c086",
+ "scanFileHash": "4ee28649c65c392d"
+ },
+ "ruleId": "2072",
+ "ruleIndex": 2
+ }
+ ],
+ "automationDetails": {
+ "description": {
+ "text": "Static Analysis Security Test results using @ShiftLeft/sast-scan"
+ },
+ "guid": "70d0f865-f0e4-406c-8837-40852afccaeb"
+ },
+ "versionControlProvenance": [
+ {
+ "branch": "dev",
+ "repositoryUri": "https://github.com/damiencarol/django-DefectDojo",
+ "revisionId": "288c68d1ba1f35ebeff1d1bdb032186a23f0ea5b"
+ }
+ ]
+ }
+ ],
+ "version": "2.1.0",
+ "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
+ "inlineExternalProperties": [
+ {
+ "guid": "70d0f865-f0e4-406c-8837-40852afccaeb",
+ "runGuid": "fbb1392e-e657-4572-ac07-0e107d1ff3f1"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/unittests/scans/zap/dvwa_baseline_dojo_subset.xml b/unittests/scans/zap/dvwa_baseline_dojo_subset.xml
new file mode 100644
index 00000000000..c5819dc97d1
--- /dev/null
+++ b/unittests/scans/zap/dvwa_baseline_dojo_subset.xml
@@ -0,0 +1,662 @@
+
+
+ 10020
+ 10020
+ X-Frame-Options Header Not Set
+ X-Frame-Options Header Not Set
+ 2
+ 2
+ Medium (Medium)
+ <p>X-Frame-Options header is not included in the HTTP response to protect against 'ClickJacking' attacks.</p>
+
+
+ http://172.17.0.2/vulnerabilities/brute/
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/vulnerabilities/sqli_blind/
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/vulnerabilities/exec/
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/instructions.php
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/setup.php
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/vulnerabilities/csrf/
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/vulnerabilities/sqli/
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/vulnerabilities/fi/?page=include.php
+ GET
+ X-Frame-Options
+
+
+ http://172.17.0.2/vulnerabilities/captcha/
+ GET
+ X-Frame-Options
+
+
+ 11
+ <p>Most modern Web browsers support the X-Frame-Options HTTP header. Ensure it's set on all web pages returned by your site (if you expect the page to be framed only by pages on your server (e.g. it's part of a FRAMESET) then you'll want to use SAMEORIGIN, otherwise if you never expect the page to be framed, you should use DENY. Alternatively consider implementing Content Security Policy's "frame-ancestors" directive. </p>
+ <p>https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options</p>
+ 16
+ 15
+ 3
+
+
+ 10038
+ 10038
+ Content Security Policy (CSP) Header Not Set
+ Content Security Policy (CSP) Header Not Set
+ 2
+ 3
+ Medium (High)
+ <p>Content Security Policy (CSP) is an added layer of security that helps to detect and mitigate certain types of attacks, including Cross Site Scripting (XSS) and data injection attacks. These attacks are used for everything from data theft to site defacement or distribution of malware. CSP provides a set of standard HTTP headers that allow website owners to declare approved sources of content that browsers should be allowed to load on that page — covered types are JavaScript, CSS, HTML frames, fonts, images and embeddable objects such as Java applets, ActiveX, audio and video files.</p>
+
+
+ http://172.17.0.2/vulnerabilities/fi/?page=include.php
+ GET
+
+
+ http://172.17.0.2/instructions.php
+ GET
+
+
+ http://172.17.0.2/vulnerabilities/sqli/
+ GET
+
+
+ http://172.17.0.2/vulnerabilities/exec/
+ GET
+
+
+ http://172.17.0.2/sitemap.xml
+ GET
+
+
+ http://172.17.0.2/setup.php
+ GET
+
+
+ http://172.17.0.2/
+ GET
+
+
+ http://172.17.0.2/vulnerabilities/captcha/
+ GET
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ GET
+
+
+ http://172.17.0.2/vulnerabilities/brute/
+ GET
+
+
+ http://172.17.0.2/vulnerabilities/csrf/
+ GET
+
+
+ 11
+ <p>Ensure that your web server, application server, load balancer, etc. is configured to set the Content-Security-Policy header, to achieve optimal browser support: "Content-Security-Policy" for Chrome 25+, Firefox 23+ and Safari 7+, "X-Content-Security-Policy" for Firefox 4.0+ and Internet Explorer 10+, and "X-WebKit-CSP" for Chrome 14+ and Safari 6+.</p>
+ <p>https://developer.mozilla.org/en-US/docs/Web/Security/CSP/Introducing_Content_Security_Policy</p><p>https://cheatsheetseries.owasp.org/cheatsheets/Content_Security_Policy_Cheat_Sheet.html</p><p>http://www.w3.org/TR/CSP/</p><p>http://w3c.github.io/webappsec/specs/content-security-policy/csp-specification.dev.html</p><p>http://www.html5rocks.com/en/tutorials/security/content-security-policy/</p><p>http://caniuse.com/#feat=contentsecuritypolicy</p><p>http://content-security-policy.com/</p>
+ 16
+ 15
+ 3
+
+
+ 10108
+ 10108
+ Reverse Tabnabbing
+ Reverse Tabnabbing
+ 2
+ 2
+ Medium (Medium)
+ <p>At least one link on this page is vulnerable to Reverse tabnabbing as it uses a target attribute without using both of the "noopener" and "noreferrer" keywords in the "rel" attribute, which allows the target page to take control of this page.</p>
+
+
+ http://172.17.0.2/vulnerabilities/brute/
+ GET
+ <a href="https://www.owasp.org/index.php/Testing_for_Brute_Force_(OWASP-AT-004)" target="_blank">https://www.owasp.org/index.php/Testing_for_Brute_Force_(OWASP-AT-004)</a>
+
+
+ http://172.17.0.2/
+ GET
+ <a href="https://www.virtualbox.org/" target="_blank">VirtualBox</a>
+
+
+ http://172.17.0.2/vulnerabilities/sqli/
+ GET
+ <a href="http://www.securiteam.com/securityreviews/5DP0N1P76E.html" target="_blank">http://www.securiteam.com/securityreviews/5DP0N1P76E.html</a>
+
+
+ http://172.17.0.2/vulnerabilities/sqli_blind/
+ GET
+ <a href="http://www.securiteam.com/securityreviews/5DP0N1P76E.html" target="_blank">http://www.securiteam.com/securityreviews/5DP0N1P76E.html</a>
+
+
+ http://172.17.0.2/vulnerabilities/xss_d/
+ GET
+ <a href="https://www.owasp.org/index.php/Cross-site_Scripting_(XSS)" target="_blank">https://www.owasp.org/index.php/Cross-site_Scripting_(XSS)</a>
+
+
+ http://172.17.0.2/instructions.php
+ GET
+ <a href="https://www.virtualbox.org/" target="_blank">https://www.virtualbox.org/</a>
+
+
+ http://172.17.0.2/vulnerabilities/csrf/
+ GET
+ <a href="https://www.owasp.org/index.php/Cross-Site_Request_Forgery" target="_blank">https://www.owasp.org/index.php/Cross-Site_Request_Forgery</a>
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ GET
+ <a href="https://www.owasp.org/index.php/Unrestricted_File_Upload" target="_blank">https://www.owasp.org/index.php/Unrestricted_File_Upload</a>
+
+
+ http://172.17.0.2/vulnerabilities/fi/?page=include.php
+ GET
+ <a href="https://en.wikipedia.org/wiki/Remote_File_Inclusion" target="_blank">https://en.wikipedia.org/wiki/Remote_File_Inclusion</a>
+
+
+ http://172.17.0.2/vulnerabilities/captcha/
+ GET
+ <a href="https://www.google.com/recaptcha/admin/create" target="_blank">https://www.google.com/recaptcha/admin/create</a>
+
+
+ http://172.17.0.2/vulnerabilities/exec/
+ GET
+ <a href="http://www.scribd.com/doc/2530476/Php-Endangers-Remote-Code-Execution" target="_blank">http://www.scribd.com/doc/2530476/Php-Endangers-Remote-Code-Execution</a>
+
+
+ 11
+ <p>Do not use a target attribute, or if you have to then also add the attribute: rel="noopener noreferrer".</p>
+ <p>https://owasp.org/www-community/attacks/Reverse_Tabnabbing</p><p>https://dev.to/ben/the-targetblank-vulnerability-by-example</p><p>https://mathiasbynens.github.io/rel-noopener/</p><p>https://medium.com/@jitbit/target-blank-the-most-underestimated-vulnerability-ever-96e328301f4c</p>
+ 3
+
+
+ 10096
+ 10096
+ Timestamp Disclosure - Unix
+ Timestamp Disclosure - Unix
+ 0
+ 1
+ Informational (Low)
+ <p>A timestamp was disclosed by the application/web server - Unix</p>
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1019803690
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1839030562
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 909522486
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 722521979
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 40341101
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1309151649
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1732584194
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 405537848
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1894986606
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1473231341
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 155497632
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1990404162
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1700485571
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1069501632
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 38016083
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 2022574463
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 373897302
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1163531501
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 643717713
+
+
+ http://172.17.0.2/vulnerabilities/javascript/
+ GET
+ 1444681467
+
+
+ 67
+ <p>Manually confirm that the timestamp data is not sensitive, and that the data cannot be aggregated to disclose exploitable patterns.</p>
+ <p>1019803690, which evaluates to: 2002-04-26 06:48:10</p>
+ <p>http://projects.webappsec.org/w/page/13246936/Information%20Leakage</p>
+ 200
+ 13
+ 3
+
+
+ 10036
+ 10036
+ Server Leaks Version Information via "Server" HTTP Response Header Field
+ Server Leaks Version Information via "Server" HTTP Response Header Field
+ 1
+ 3
+ Low (High)
+ <p>The web/application server is leaking version information via the "Server" HTTP response header. Access to such information may facilitate attackers identifying other vulnerabilities your web/application server is subject to.</p>
+
+
+ http://172.17.0.2/vulnerabilities/brute/
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/vulnerabilities/captcha/
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/vulnerabilities/csrf/
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/instructions.php
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/setup.php
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/robots.txt
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/sitemap.xml
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/vulnerabilities/fi/?page=include.php
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ http://172.17.0.2/vulnerabilities/exec/
+ GET
+ Apache/2.4.25 (Debian)
+
+
+ 11
+ <p>Ensure that your web server, application server, load balancer, etc. is configured to suppress the "Server" header or provide generic details.</p>
+ <p>http://httpd.apache.org/docs/current/mod/core.html#servertokens</p><p>http://msdn.microsoft.com/en-us/library/ff648552.aspx#ht_urlscan_007</p><p>http://blogs.msdn.com/b/varunm/archive/2013/04/23/remove-unwanted-http-response-headers.aspx</p><p>http://www.troyhunt.com/2012/02/shhh-dont-let-your-response-headers.html</p>
+ 200
+ 13
+ 3
+
+
+ 10202
+ 10202
+ Absence of Anti-CSRF Tokens
+ Absence of Anti-CSRF Tokens
+ 1
+ 2
+ Low (Medium)
+ <p>No Anti-CSRF tokens were found in a HTML submission form.</p><p>A cross-site request forgery is an attack that involves forcing a victim to send an HTTP request to a target destination without their knowledge or intent in order to perform an action as the victim. The underlying cause is application functionality using predictable URL/form actions in a repeatable way. The nature of the attack is that CSRF exploits the trust that a web site has for a user. By contrast, cross-site scripting (XSS) exploits the trust that a user has for a web site. Like XSS, CSRF attacks are not necessarily cross-site, but they can be. Cross-site request forgery is also known as CSRF, XSRF, one-click attack, session riding, confused deputy, and sea surf.</p><p></p><p>CSRF attacks are effective in a number of situations, including:</p><p> * The victim has an active session on the target site.</p><p> * The victim is authenticated via HTTP auth on the target site.</p><p> * The victim is on the same local network as the target site.</p><p></p><p>CSRF has primarily been used to perform an action against a target site using the victim's privileges, but recent techniques have been discovered to disclose information by gaining access to the response. The risk of information disclosure is dramatically increased when the target site is vulnerable to XSS, because XSS can be used as a platform for CSRF, allowing the attack to operate within the bounds of the same-origin policy.</p>
+
+
+ http://172.17.0.2/vulnerabilities/xss_d/
+ GET
+ <form name="XSS" method="GET">
+
+
+ http://172.17.0.2/vulnerabilities/captcha/
+ GET
+ <form action="#" method="POST" style="display:none;">
+
+
+ http://172.17.0.2/vulnerabilities/xss_s/
+ GET
+ <form method="post" name="guestform" ">
+
+
+ http://172.17.0.2/vulnerabilities/csrf/
+ GET
+ <form action="#" method="GET">
+
+
+ http://172.17.0.2/setup.php
+ GET
+ <form action="#" method="post">
+
+
+ http://172.17.0.2/vulnerabilities/brute/
+ GET
+ <form action="#" method="GET">
+
+
+ http://172.17.0.2/vulnerabilities/sqli/
+ GET
+ <form action="#" method="GET">
+
+
+ http://172.17.0.2/vulnerabilities/weak_id/
+ GET
+ <form method="post">
+
+
+ http://172.17.0.2/vulnerabilities/exec/
+ GET
+ <form name="ping" action="#" method="post">
+
+
+ http://172.17.0.2/vulnerabilities/sqli_blind/
+ GET
+ <form action="#" method="GET">
+
+
+ http://172.17.0.2/vulnerabilities/xss_r/
+ GET
+ <form name="XSS" action="#" method="GET">
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ GET
+ <form enctype="multipart/form-data" action="#" method="POST">
+
+
+ 12
+ <p>Phase: Architecture and Design</p><p>Use a vetted library or framework that does not allow this weakness to occur or provides constructs that make this weakness easier to avoid.</p><p>For example, use anti-CSRF packages such as the OWASP CSRFGuard.</p><p></p><p>Phase: Implementation</p><p>Ensure that your application is free of cross-site scripting issues, because most CSRF defenses can be bypassed using attacker-controlled script.</p><p></p><p>Phase: Architecture and Design</p><p>Generate a unique nonce for each form, place the nonce into the form, and verify the nonce upon receipt of the form. Be sure that the nonce is not predictable (CWE-330).</p><p>Note that this can be bypassed using XSS.</p><p></p><p>Identify especially dangerous operations. When the user performs a dangerous operation, send a separate confirmation request to ensure that the user intended to perform that operation.</p><p>Note that this can be bypassed using XSS.</p><p></p><p>Use the ESAPI Session Management control.</p><p>This control includes a component for CSRF.</p><p></p><p>Do not use the GET method for any request that triggers a state change.</p><p></p><p>Phase: Implementation</p><p>Check the HTTP Referer header to see if the request originated from an expected page. This could break legitimate functionality, because users or proxies may have disabled sending the Referer for privacy reasons.</p>
+ <p>No known Anti-CSRF token [anticsrf, CSRFToken, __RequestVerificationToken, csrfmiddlewaretoken, authenticity_token, OWASP_CSRFTOKEN, anoncsrf, csrf_token, _csrf, _csrfSecret, __csrf_magic, CSRF] was found in the following HTML form: [Form 1: "" ].</p>
+ <p>http://projects.webappsec.org/Cross-Site-Request-Forgery</p><p>http://cwe.mitre.org/data/definitions/352.html</p>
+ 352
+ 9
+ 3
+
+
+ 10031
+ 10031
+ User Controllable HTML Element Attribute (Potential XSS)
+ User Controllable HTML Element Attribute (Potential XSS)
+ 0
+ 1
+ Informational (Low)
+ <p>This check looks at user-supplied input in query string parameters and POST data to identify where certain HTML attribute values might be controlled. This provides hot-spot detection for XSS (cross-site scripting) that will require further review by a security analyst to determine exploitability.</p>
+
+
+ http://172.17.0.2/vulnerabilities/captcha/
+ POST
+ Change
+
+
+ http://172.17.0.2/vulnerabilities/exec/
+ POST
+ Submit
+
+
+ http://172.17.0.2/vulnerabilities/csrf/?Change=Change&password_conf=ZAP&password_new=ZAP
+ GET
+ Change
+
+
+ http://172.17.0.2/vulnerabilities/captcha/
+ POST
+ Change
+
+
+ http://172.17.0.2/vulnerabilities/exec/
+ POST
+ Submit
+
+
+ http://172.17.0.2/vulnerabilities/brute/?Login=Login&password=ZAP&username=ZAP
+ GET
+ Login
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ POST
+ MAX_FILE_SIZE
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ POST
+ Upload
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ POST
+ Upload
+
+
+ http://172.17.0.2/vulnerabilities/upload/
+ POST
+ Upload
+
+
+ http://172.17.0.2/vulnerabilities/brute/?Login=Login&password=ZAP&username=ZAP
+ GET
+ Login
+
+
+ http://172.17.0.2/vulnerabilities/csrf/?Change=Change&password_conf=ZAP&password_new=ZAP
+ GET
+ Change
+
+
+ http://172.17.0.2/vulnerabilities/exec/
+ POST
+ Submit
+
+
+ 13
+ <p>Validate all input and sanitize output it before writing to any HTML attributes.</p>
+ <p>User-controlled HTML attribute values were found. Try injecting special characters to see if XSS might be possible. The page at the following URL:</p><p></p><p>http://172.17.0.2/vulnerabilities/captcha/</p><p></p><p>appears to include user input in: </p><p></p><p>a(n) [input] tag [name] attribute </p><p></p><p>The user input found was:</p><p>Change=Change</p><p></p><p>The user-controlled value was:</p><p>change</p>
+ <p>http://websecuritytool.codeplex.com/wikipage?title=Checks#user-controlled-html-attribute</p>
+ 20
+ 20
+ 3
+
+
+ 10024
+ 10024
+ Information Disclosure - Sensitive Information in URL
+ Information Disclosure - Sensitive Information in URL
+ 0
+ 2
+ Informational (Medium)
+ <p>The request appeared to contain sensitive information leaked in the URL. This can violate PCI and most organizational compliance policies. You can configure the list of strings for this check to add or remove values specific to your environment.</p>
+
+
+ http://172.17.0.2/vulnerabilities/brute/?Login=Login&password=ZAP&username=ZAP
+ GET
+ username
+ username
+
+
+ http://172.17.0.2/vulnerabilities/csrf/?Change=Change&password_conf=ZAP&password_new=ZAP
+ GET
+ password_conf
+ password_conf
+
+
+ http://172.17.0.2/vulnerabilities/brute/?Login=Login&password=ZAP&username=ZAP
+ GET
+ password
+ password
+
+
+ http://172.17.0.2/vulnerabilities/csrf/?Change=Change&password_conf=ZAP&password_new=ZAP
+ GET
+ password_new
+ password_new
+
+
+ 4
+ <p>Do not pass sensitive information in URIs.</p>
+ <p>The URL contains potentially sensitive information. The following string was found via the pattern: user</p><p>username</p>
+ <p></p>
+ 200
+ 13
+ 3
+
+
+ 10054
+ 10054
+ Cookie Without SameSite Attribute
+ Cookie Without SameSite Attribute
+ 1
+ 2
+ Low (Medium)
+ <p>A cookie has been set without the SameSite attribute, which means that the cookie can be sent as a result of a 'cross-site' request. The SameSite attribute is an effective counter measure to cross-site request forgery, cross-site script inclusion, and timing attacks.</p>
+
+
+ http://172.17.0.2/security.php
+ POST
+
+
+ http://172.17.0.2/security.php
+ POST
+ PHPSESSID
+ Set-Cookie: PHPSESSID
+
+
+ http://172.17.0.2/vulnerabilities/weak_id/
+ POST
+
+
+ 3
+ <p>Ensure that the SameSite attribute is set to either 'lax' or ideally 'strict' for all cookies.</p>
+ <p>https://tools.ietf.org/html/draft-ietf-httpbis-cookie-same-site</p>
+ 16
+ 13
+ 3
+
+
+ 10029
+ 10029
+ Cookie Poisoning
+ Cookie Poisoning
+ 0
+ 1
+ Informational (Low)
+ <p>This check looks at user-supplied input in query string parameters and POST data to identify where cookie parameters might be controlled. This is called a cookie poisoning attack, and becomes exploitable when an attacker can manipulate the cookie in various ways. In some cases this will not be exploitable, however, allowing URL parameters to set cookie values is generally considered a bug.</p>
+
+
+ http://172.17.0.2/security.php
+ POST
+ security
+
+
+ 1
+ <p>Do not allow user input to control cookie names and values. If some query string parameters must be set in cookie values, be sure to filter out semicolon's that can serve as name/value pair delimiters.</p>
+ <p>An attacker may be able to poison cookie values through POST parameters. To test if this is a more serious issue, you should try resending that request as a GET, with the POST parameter included as a query string parameter. For example: http://nottrusted.com/page?value=maliciousInput.</p><p></p><p>This was identified at:</p><p></p><p>http://172.17.0.2/security.php</p><p></p><p>User-input was found in the following cookie:</p><p>security=low</p><p></p><p>The user input was:</p><p>security=low</p>
+ <p>http://websecuritytool.codeplex.com/wikipage?title=Checks#user-controlled-cookie</p>
+ 20
+ 20
+ 3
+
+
diff --git a/unittests/test_apiv2_scan_import_options.py b/unittests/test_apiv2_scan_import_options.py
index 078ed2b3050..5b4f6e250c6 100644
--- a/unittests/test_apiv2_scan_import_options.py
+++ b/unittests/test_apiv2_scan_import_options.py
@@ -10,10 +10,7 @@
class ScanImportOptionsTest(APITestCase):
- """
- Test the options `skip_duplicates` and `close_old_findings` for the scan
- import APIv2 endpoint with ZAP
- """
+ """Test uploading empty reports or no report at all"""
fixtures = ["dojo_testdata.json"]
EMPTY_ZAP_SCAN = """
diff --git a/unittests/test_import_reimport.py b/unittests/test_import_reimport.py
index 98ce581626c..e3130cc7efc 100644
--- a/unittests/test_import_reimport.py
+++ b/unittests/test_import_reimport.py
@@ -10,8 +10,6 @@
from django.test.client import Client
from django.urls import reverse
from django.utils import timezone
-from rest_framework.authtoken.models import Token
-from rest_framework.test import APIClient
from dojo.models import Finding, Test, Test_Type, User
@@ -1751,10 +1749,7 @@ def setUp(self):
testuser = User.objects.get(username="admin")
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()
-
- token = Token.objects.get(user=testuser)
- self.client = APIClient()
- self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key)
+ self.login_as_admin()
# self.url = reverse(self.viewname + '-list')
# Statistics only available in API Response
@@ -2029,10 +2024,7 @@ def setUp(self):
testuser = User.objects.get(username="admin")
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()
-
- token = Token.objects.get(user=testuser)
- self.client = APIClient()
- self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key)
+ self.login_as_admin()
# self.url = reverse(self.viewname + '-list')
self.client_ui = Client()
diff --git a/unittests/test_importers_deduplication.py b/unittests/test_importers_deduplication.py
new file mode 100644
index 00000000000..f418a624d66
--- /dev/null
+++ b/unittests/test_importers_deduplication.py
@@ -0,0 +1,445 @@
+import logging
+
+from django.contrib.contenttypes.models import ContentType
+
+from dojo.models import (
+ Development_Environment,
+ Dojo_User,
+ Endpoint,
+ Endpoint_Status,
+ Engagement,
+ Finding,
+ Product,
+ Product_Type,
+ Test,
+ User,
+ UserContactInfo,
+)
+
+from .dojo_test_case import DojoAPITestCase, get_unit_tests_scans_path
+
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
+
+STACK_HAWK_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings.json"
+STACK_HAWK_SUBSET_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings_subset.json"
+STACK_HAWK_SCAN_TYPE = "StackHawk HawkScan"
+
+
+class TestDojoImportersDeduplication(DojoAPITestCase):
+
+ def setUp(self):
+ super().setUp()
+
+ testuser = User.objects.create(username="admin")
+ testuser.is_superuser = True
+ testuser.is_staff = True
+ testuser.save()
+ UserContactInfo.objects.create(user=testuser, block_execution=True)
+
+ # Authenticate API client as admin for import endpoints
+ self.login_as_admin()
+
+ self.system_settings(enable_webhooks_notifications=False)
+ self.system_settings(enable_product_grade=False)
+ self.system_settings(enable_github=False)
+ self.system_settings(enable_deduplication=True)
+
+ # Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation
+ # As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count.
+ # But if we run the test in isolation, the ContentType ids will not be cached and will result in more queries.
+ # By warming up the cache here, these queries are executed before we start counting queries
+ for model in [Development_Environment, Dojo_User, Endpoint, Endpoint_Status, Engagement, Finding, Product, Product_Type, User, Test]:
+ ContentType.objects.get_for_model(model)
+
+ # Internal helper methods for reusable test logic
+ def _test_single_import_assess_duplicates(self, filename, scan_type, expected_duplicates):
+ """Internal method to test single import with expected duplicates"""
+ self.login_as_admin()
+
+ response_json = self.import_scan_with_params(
+ filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=None,
+ product_type_name=f"PT {scan_type} Single",
+ product_name=f"P {scan_type} Single",
+ engagement_name=f"E {scan_type} Single",
+ auto_create_context=True,
+ )
+
+ test_id = response_json["test"]
+ test = Test.objects.get(id=test_id)
+
+ # Verify expected duplicates were created
+ dup_count = Finding.objects.filter(test=test, duplicate=True).count()
+ self.assertEqual(expected_duplicates, dup_count)
+
+ # duplicates should be sorted by id
+ if dup_count > 0:
+ for finding in test.finding_set.filter(duplicate=True):
+ self.assertTrue(finding.duplicate_finding.id < finding.id)
+
+ return test_id
+
+ def _test_full_then_subset_duplicates(self, full_filename, subset_filename, scan_type, expected_duplicates, first_import_duplicates=0):
+ """
+ Internal method to test full scan then subset creates expected duplicates
+
+ Args:
+ first_import_duplicates: Expected number of duplicates in the first import (for files with internal duplicates)
+
+ """
+ # First import: full scan
+ response_json = self.import_scan_with_params(
+ full_filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=None,
+ product_type_name=f"PT {scan_type} Full",
+ product_name=f"P {scan_type} Full",
+ engagement_name=f"E {scan_type} Full",
+ auto_create_context=True,
+ )
+
+ first_test_id = response_json["test"]
+ first_test = Test.objects.get(id=first_test_id)
+
+ # Verify first import has expected duplicates (usually 0, but may have internal duplicates)
+ first_dup_count = Finding.objects.filter(test=first_test, duplicate=True).count()
+ self.assertEqual(first_import_duplicates, first_dup_count)
+
+ # Second import: subset into the same engagement
+ response_json = self.import_scan_with_params(
+ subset_filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=first_test.engagement.id, # Same engagement ID
+ product_type_name=None, # Use existing
+ product_name=None, # Use existing
+ engagement_name=None, # Use existing
+ auto_create_context=False,
+ )
+
+ second_test_id = response_json["test"]
+ second_test = Test.objects.get(id=second_test_id)
+
+ # The second test should contain expected duplicates
+ second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count()
+ self.assertEqual(expected_duplicates, second_test_dup_count)
+
+ # Engagement should have total duplicates from both imports
+ total_expected_duplicates = first_import_duplicates + expected_duplicates
+ eng_dup_count = Finding.objects.filter(test__engagement=first_test.engagement, duplicate=True).count()
+ self.assertEqual(total_expected_duplicates, eng_dup_count)
+
+ # Product should have total duplicates from both imports
+ prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count()
+ self.assertEqual(total_expected_duplicates, prod_dup_count)
+
+ return second_test_id
+
+ def _test_different_products_no_duplicates(self, filename, scan_type, expected_duplicates):
+ """Internal method to test importing into different products creates expected duplicates"""
+ # First import: into Product A
+ response_json = self.import_scan_with_params(
+ filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=None,
+ product_type_name=f"PT {scan_type} Product A",
+ product_name=f"P {scan_type} Product A",
+ engagement_name=f"E {scan_type} Product A",
+ auto_create_context=True,
+ )
+
+ first_test_id = response_json["test"]
+ first_test = Test.objects.get(id=first_test_id)
+
+ # Verify first import has expected duplicates
+ first_dup_count = Finding.objects.filter(test=first_test, duplicate=True).count()
+ self.assertEqual(expected_duplicates, first_dup_count)
+
+ # Second import: same scan into Product B (different product)
+ response_json = self.import_scan_with_params(
+ filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=None,
+ product_type_name=f"PT {scan_type} Product B",
+ product_name=f"P {scan_type} Product B",
+ engagement_name=f"E {scan_type} Product B",
+ auto_create_context=True,
+ )
+
+ second_test_id = response_json["test"]
+ second_test = Test.objects.get(id=second_test_id)
+
+ # The second test should contain expected duplicates (different products don't deduplicate)
+ second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count()
+ self.assertEqual(expected_duplicates, second_test_dup_count)
+
+ # First product should still have expected duplicates
+ first_prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count()
+ self.assertEqual(expected_duplicates, first_prod_dup_count)
+
+ # Second product should have expected duplicates
+ second_prod_dup_count = Finding.objects.filter(test__engagement__product=second_test.engagement.product, duplicate=True).count()
+ self.assertEqual(expected_duplicates, second_prod_dup_count)
+
+ return second_test_id
+
+ def _test_same_product_different_engagements_duplicates(self, filename, scan_type, expected_duplicates):
+ """Internal method to test importing into same product but different engagements creates expected duplicates"""
+ # First import: into Engagement 1
+ response_json = self.import_scan_with_params(
+ filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=None,
+ product_type_name=f"PT {scan_type} SameProd",
+ product_name=f"P {scan_type} SameProd",
+ engagement_name=f"E {scan_type} SameProd 1",
+ auto_create_context=True,
+ )
+ first_test = Test.objects.get(id=response_json["test"])
+
+ # Second import: into Engagement 2 (same product)
+ response_json = self.import_scan_with_params(
+ filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=None,
+ product_type_name=None, # Use existing
+ product_name=f"P {scan_type} SameProd", # Same product
+ engagement_name=f"E {scan_type} SameProd 2", # Different engagement
+ auto_create_context=True,
+ )
+ second_test = Test.objects.get(id=response_json["test"])
+
+ # Product should have expected duplicates total
+ prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count()
+ self.assertEqual(expected_duplicates, prod_dup_count)
+
+ return second_test.id
+
+ def _test_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self, filename, scan_type, expected_duplicates, first_import_duplicates=0):
+ """
+ Internal method to test importing into same product but different engagements with dedupe_on_engagements creates expected duplicates
+
+ Args:
+ expected_duplicates: Expected duplicates in second import (usually same as first for files with internal duplicates)
+ first_import_duplicates: Expected duplicates in first import (for files with internal duplicates)
+
+ """
+ # First import: into Engagement A
+ response_json = self.import_scan_with_params(
+ filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=None,
+ product_type_name=f"PT {scan_type} DedupeEng",
+ product_name=f"P {scan_type} DedupeEng",
+ engagement_name=f"E {scan_type} DedupeEng A",
+ auto_create_context=True,
+ )
+ first_test = Test.objects.get(id=response_json["test"])
+
+ # Set deduplication_on_engagement to True for the engagement
+ first_test.engagement.deduplication_on_engagement = True
+ first_test.engagement.save()
+
+ # Second import: into Engagement B (same product, different engagement)
+ response_json = self.import_scan_with_params(
+ filename,
+ scan_type=scan_type,
+ minimum_severity="Info",
+ active=True,
+ verified=True,
+ engagement=None,
+ product_type_name=None, # Use existing
+ product_name=f"P {scan_type} DedupeEng", # Same product
+ engagement_name=f"E {scan_type} DedupeEng B", # Different engagement
+ auto_create_context=True,
+ )
+ second_test = Test.objects.get(id=response_json["test"])
+
+ # The second test should contain expected duplicates because deduplication_on_engagement is True
+ second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count()
+ self.assertEqual(expected_duplicates, second_test_dup_count)
+
+ # Product should have total duplicates from both imports
+ total_expected_duplicates = first_import_duplicates + expected_duplicates
+ prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count()
+ self.assertEqual(total_expected_duplicates, prod_dup_count)
+
+ return second_test.id
+
+ # Test cases for ZAP (LEGACY algorithm)
+ def test_zap_single_import_no_duplicates(self):
+ """Test that importing ZAP scan (LEGACY algorithm) creates 0 duplicate findings"""
+ self._test_single_import_assess_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 0)
+
+ def test_zap_full_then_subset_duplicates(self):
+ """Test that importing full ZAP scan then subset creates duplicates"""
+ # For now, use the same file for both full and subset since we don't have a proper subset
+ # This will test the same file imported twice into the same engagement
+ self._test_full_then_subset_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 10)
+
+ def test_zap_different_products_no_duplicates(self):
+ """Test that importing ZAP scan into different products creates 0 duplicates"""
+ self._test_different_products_no_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 0)
+
+ def test_zap_same_product_different_engagements_duplicates(self):
+ """Test that importing ZAP scan into same product but different engagements creates duplicates"""
+ self._test_same_product_different_engagements_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 10)
+
+ def test_zap_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
+ """Test that importing ZAP scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
+ self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/zap/dvwa_baseline_dojo_subset.xml", "ZAP Scan", 0)
+
+ # Test cases for Checkmarx (UNIQUE_ID_FROM_TOOL algorithm)
+ def test_checkmarx_single_import_no_duplicates(self):
+ """Test that importing Checkmarx scan (UNIQUE_ID_FROM_TOOL algorithm) creates 0 duplicate findings"""
+ self._test_single_import_assess_duplicates("scans/checkmarx/multiple_findings.json", "Checkmarx Scan detailed", 0)
+
+ def test_checkmarx_full_then_subset_duplicates(self):
+ """Test that importing full Checkmarx scan then subset creates duplicates"""
+ # For now, use the same file for both full and subset
+ self._test_full_then_subset_duplicates("scans/checkmarx/multiple_findings.json", "scans/checkmarx/multiple_findings.json",
+ "Checkmarx Scan detailed", 10)
+
+ def test_checkmarx_different_products_no_duplicates(self):
+ """Test that importing Checkmarx scan into different products creates 0 duplicates"""
+ self._test_different_products_no_duplicates("scans/checkmarx/multiple_findings.json", "Checkmarx Scan detailed", 0)
+
+ def test_checkmarx_same_product_different_engagements_duplicates(self):
+ """Test that importing Checkmarx scan into same product but different engagements creates duplicates"""
+ self._test_same_product_different_engagements_duplicates("scans/checkmarx/multiple_findings.json", "Checkmarx Scan detailed", 10)
+
+ def test_checkmarx_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
+ """Test that importing Checkmarx scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
+ self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/checkmarx/multiple_findings.json",
+ "Checkmarx Scan detailed", 0)
+
+ # Test cases for Trivy (HASH_CODE algorithm)
+ def test_trivy_single_import_no_duplicates(self):
+ """Test that importing Trivy scan (HASH_CODE algorithm) creates 0 duplicate findings"""
+ self._test_single_import_assess_duplicates("scans/trivy/kubernetes.json", "Trivy Scan", 0)
+
+ def test_trivy_full_then_subset_duplicates(self):
+ """Test that importing full Trivy scan then subset creates duplicates"""
+ # For now, use the same file for both full and subset
+ self._test_full_then_subset_duplicates("scans/trivy/kubernetes.json", "scans/trivy/kubernetes.json", "Trivy Scan", 20)
+
+ def test_trivy_different_products_no_duplicates(self):
+ """Test that importing Trivy scan into different products creates 0 duplicates"""
+ self._test_different_products_no_duplicates("scans/trivy/kubernetes.json", "Trivy Scan", 0)
+
+ def test_trivy_same_product_different_engagements_duplicates(self):
+ """Test that importing Trivy scan into same product but different engagements creates duplicates"""
+ self._test_same_product_different_engagements_duplicates("scans/trivy/kubernetes.json", "Trivy Scan", 20)
+
+ def test_trivy_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
+ """Test that importing Trivy scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
+ self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/trivy/kubernetes.json", "Trivy Scan", 0)
+
+ # Test cases for SARIF (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm)
+ # The samples for SARIF is the bash report that has internal duplicates
+ # These are used on purpose so we capture the behaviour of import and reimport in this scenario.
+ def test_sarif_single_import_no_duplicates(self):
+ """Test that importing SARIF scan (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm) creates 0 duplicate findings"""
+ # bash-report.sarif has 18 internal duplicates, so we expect 18 duplicates even on first import
+ test_id = self._test_single_import_assess_duplicates("scans/sarif/bash-report.sarif", "SARIF", 18)
+
+ # duplicates should be sorted by id (currently not usefull as tests are running celery tasks in the foreground)
+ for finding in Finding.objects.filter(test_id=test_id, duplicate=True):
+ self.assertTrue(finding.duplicate_finding.id < finding.id)
+
+ def test_sarif_full_then_subset_duplicates(self):
+ """Test that importing full SARIF scan then subset creates duplicates"""
+ # For now, use the same file for both full and subset
+ # First import has 18 internal duplicates, second import also has 18 internal duplicates + 9 cross-import duplicates = 27 total in second test
+ # Total = 18 (first) + 27 (second) = 45
+ self._test_full_then_subset_duplicates("scans/sarif/bash-report.sarif", "scans/sarif/bash-report.sarif", "SARIF", 27, first_import_duplicates=18)
+
+ def test_sarif_different_products_no_duplicates(self):
+ """Test that importing SARIF scan into different products creates 0 duplicates"""
+ # bash-report.sarif has 18 internal duplicates per import
+ self._test_different_products_no_duplicates("scans/sarif/bash-report.sarif", "SARIF", 18)
+
+ def test_sarif_same_product_different_engagements_duplicates(self):
+ """Test that importing SARIF scan into same product but different engagements creates duplicates"""
+ # 18 internal duplicates in first import + 18 in second import + 9 cross-import duplicates = 45 total
+ self._test_same_product_different_engagements_duplicates("scans/sarif/bash-report.sarif", "SARIF", 45)
+
+ def test_sarif_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
+ """Test that importing SARIF scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
+ # bash-report.sarif has 18 internal duplicates per import
+ # Second test has 18 internal duplicates (no cross-engagement duplicates due to dedupe_on_engagements=True)
+ # Total product duplicates = 18 (first) + 18 (second) = 36
+ self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/sarif/bash-report.sarif", "SARIF",
+ 18, first_import_duplicates=18)
+
+ # Test cases for Veracode (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm)
+ def test_veracode_single_import_no_duplicates(self):
+ """Test that importing Veracode scan (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm) creates 0 duplicate findings"""
+ self._test_single_import_assess_duplicates("scans/veracode/veracode_scan.xml", "Veracode Scan", 0)
+
+ def test_veracode_full_then_subset_duplicates(self):
+ """Test that importing full Veracode scan then subset creates duplicates"""
+ # For now, use the same file for both full and subset
+ self._test_full_then_subset_duplicates("scans/veracode/veracode_scan.xml", "scans/veracode/veracode_scan.xml", "Veracode Scan", 7)
+
+ def test_veracode_different_products_no_duplicates(self):
+ """Test that importing Veracode scan into different products creates 0 duplicates"""
+ self._test_different_products_no_duplicates("scans/veracode/veracode_scan.xml", "Veracode Scan", 0)
+
+ def test_veracode_same_product_different_engagements_duplicates(self):
+ """Test that importing Veracode scan into same product but different engagements creates duplicates"""
+ self._test_same_product_different_engagements_duplicates("scans/veracode/veracode_scan.xml", "Veracode Scan", 7)
+
+ def test_veracode_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
+ """Test that importing Veracode scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
+ self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/veracode/veracode_scan.xml", "Veracode Scan", 0)
+
+ # Test cases for StackHawk (HASH_CODE algorithm)
+ def test_stackhawk_single_import_no_duplicates(self):
+ """Test that importing StackHawk scan (HASH_CODE algorithm) creates 0 duplicate findings"""
+ self._test_single_import_assess_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "StackHawk HawkScan", 0)
+
+ def test_stackhawk_full_then_subset_duplicates(self):
+ """Test that importing full StackHawk scan then subset creates duplicates"""
+ self._test_full_then_subset_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json",
+ "scans/stackhawk/stackhawk_many_vul_without_duplicated_findings_subset.json", "StackHawk HawkScan", 5)
+
+ def test_stackhawk_different_products_no_duplicates(self):
+ """Test that importing StackHawk scan into different products creates 0 duplicates"""
+ self._test_different_products_no_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "StackHawk HawkScan", 0)
+
+ def test_stackhawk_same_product_different_engagements_duplicates(self):
+ """Test that importing StackHawk scan into same product but different engagements creates duplicates"""
+ self._test_same_product_different_engagements_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json",
+ "StackHawk HawkScan", 6)
+
+ def test_stackhawk_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
+ """Test that importing StackHawk scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
+ self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json",
+ "StackHawk HawkScan", 0)
diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py
index 6e43f0e2d13..3b4ce357c85 100644
--- a/unittests/test_importers_performance.py
+++ b/unittests/test_importers_performance.py
@@ -34,9 +34,6 @@
STACK_HAWK_SUBSET_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings_subset.json"
STACK_HAWK_SCAN_TYPE = "StackHawk HawkScan"
-NPM_AUDIT_NO_VULN_FILENAME = get_unit_tests_scans_path("npm_audit") / "one_vuln.json"
-NPM_AUDIT_SCAN_TYPE = "NPM Audit Scan"
-
class TestDojoImporterPerformance(DojoTestCase):
@@ -49,6 +46,7 @@ def setUp(self):
self.system_settings(enable_webhooks_notifications=False)
self.system_settings(enable_product_grade=False)
self.system_settings(enable_github=False)
+ self.system_settings(enable_deduplication=True)
# Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation
# As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count.
@@ -58,7 +56,7 @@ def setUp(self):
ContentType.objects.get_for_model(model)
@contextmanager
- def assertNumAsyncTask(self, num):
+ def _assertNumAsyncTask(self, num):
dojo_async_task_counter.start()
try:
yield
@@ -82,7 +80,7 @@ def assertNumAsyncTask(self, num):
)
logger.debug(msg)
- def import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3):
+ def _import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3):
"""
Log output can be quite large as when the assertNumQueries fails, all queries are printed.
It could be usefule to capture the output in `less`:
@@ -109,7 +107,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_
with (
self.subTest("import1"), impersonate(Dojo_User.objects.get(username="admin")),
self.assertNumQueries(expected_num_queries1),
- self.assertNumAsyncTask(expected_num_async_tasks1),
+ self._assertNumAsyncTask(expected_num_async_tasks1),
STACK_HAWK_SUBSET_FILENAME.open(encoding="utf-8") as scan,
):
import_options = {
@@ -133,7 +131,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_
with (
self.subTest("reimport1"), impersonate(Dojo_User.objects.get(username="admin")),
self.assertNumQueries(expected_num_queries2),
- self.assertNumAsyncTask(expected_num_async_tasks2),
+ self._assertNumAsyncTask(expected_num_async_tasks2),
STACK_HAWK_FILENAME.open(encoding="utf-8") as scan,
):
reimport_options = {
@@ -156,7 +154,7 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_
with (
self.subTest("reimport2"), impersonate(Dojo_User.objects.get(username="admin")),
self.assertNumQueries(expected_num_queries3),
- self.assertNumAsyncTask(expected_num_async_tasks3),
+ self._assertNumAsyncTask(expected_num_async_tasks3),
STACK_HAWK_SUBSET_FILENAME.open(encoding="utf-8") as scan,
):
reimport_options = {
@@ -179,7 +177,7 @@ def test_import_reimport_reimport_performance_async(self):
configure_audit_system()
configure_pghistory_triggers()
- self.import_reimport_performance(
+ self._import_reimport_performance(
expected_num_queries1=593,
expected_num_async_tasks1=10,
expected_num_queries2=498,
@@ -197,7 +195,7 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
configure_audit_system()
configure_pghistory_triggers()
- self.import_reimport_performance(
+ self._import_reimport_performance(
expected_num_queries1=559,
expected_num_async_tasks1=10,
expected_num_queries2=491,
@@ -221,12 +219,12 @@ def test_import_reimport_reimport_performance_no_async(self):
testuser = User.objects.get(username="admin")
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()
- self.import_reimport_performance(
- expected_num_queries1=593,
+ self._import_reimport_performance(
+ expected_num_queries1=603,
expected_num_async_tasks1=10,
- expected_num_queries2=503,
+ expected_num_queries2=515,
expected_num_async_tasks2=22,
- expected_num_queries3=294,
+ expected_num_queries3=304,
expected_num_async_tasks3=20,
)
@@ -243,12 +241,12 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()
- self.import_reimport_performance(
- expected_num_queries1=559,
+ self._import_reimport_performance(
+ expected_num_queries1=569,
expected_num_async_tasks1=10,
- expected_num_queries2=496,
+ expected_num_queries2=508,
expected_num_async_tasks2=22,
- expected_num_queries3=289,
+ expected_num_queries3=299,
expected_num_async_tasks3=20,
)
@@ -269,12 +267,12 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self
testuser.usercontactinfo.save()
self.system_settings(enable_product_grade=True)
- self.import_reimport_performance(
- expected_num_queries1=594,
+ self._import_reimport_performance(
+ expected_num_queries1=604,
expected_num_async_tasks1=11,
- expected_num_queries2=504,
+ expected_num_queries2=516,
expected_num_async_tasks2=23,
- expected_num_queries3=295,
+ expected_num_queries3=305,
expected_num_async_tasks3=21,
)
@@ -292,11 +290,190 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
testuser.usercontactinfo.save()
self.system_settings(enable_product_grade=True)
- self.import_reimport_performance(
- expected_num_queries1=560,
+ self._import_reimport_performance(
+ expected_num_queries1=570,
expected_num_async_tasks1=11,
- expected_num_queries2=497,
+ expected_num_queries2=509,
expected_num_async_tasks2=23,
- expected_num_queries3=290,
+ expected_num_queries3=300,
expected_num_async_tasks3=21,
)
+
+ # Deduplication is enabled in the tests above, but to properly test it we must run the same import twice and capture the results.
+ def _deduplication_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, *, check_duplicates=True):
+ """
+ Test method to measure deduplication performance by importing the same scan twice.
+ The second import should result in all findings being marked as duplicates.
+ This is different from reimport as we create a new test each time.
+ """
+ product_type, _created = Product_Type.objects.get_or_create(name="test")
+ product, _created = Product.objects.get_or_create(
+ name="TestDojoDeduplicationPerformance",
+ prod_type=product_type,
+ )
+ engagement, _created = Engagement.objects.get_or_create(
+ name="Test Deduplication Performance Engagement",
+ product=product,
+ target_start=timezone.now(),
+ target_end=timezone.now(),
+ )
+ lead, _ = User.objects.get_or_create(username="admin")
+ environment, _ = Development_Environment.objects.get_or_create(name="Development")
+
+ # First import - all findings should be new
+ with (
+ self.subTest("first_import"), impersonate(Dojo_User.objects.get(username="admin")),
+ self.assertNumQueries(expected_num_queries1),
+ self._assertNumAsyncTask(expected_num_async_tasks1),
+ STACK_HAWK_FILENAME.open(encoding="utf-8") as scan,
+ ):
+ import_options = {
+ "user": lead,
+ "lead": lead,
+ "scan_date": None,
+ "environment": environment,
+ "minimum_severity": "Info",
+ "active": True,
+ "verified": True,
+ "scan_type": STACK_HAWK_SCAN_TYPE,
+ "engagement": engagement,
+ }
+ importer = DefaultImporter(**import_options)
+ _, _, len_new_findings1, len_closed_findings1, _, _, _ = importer.process_scan(scan)
+
+ # Second import - all findings should be duplicates
+ with (
+ self.subTest("second_import"), impersonate(Dojo_User.objects.get(username="admin")),
+ self.assertNumQueries(expected_num_queries2),
+ self._assertNumAsyncTask(expected_num_async_tasks2),
+ STACK_HAWK_FILENAME.open(encoding="utf-8") as scan,
+ ):
+ import_options = {
+ "user": lead,
+ "lead": lead,
+ "scan_date": None,
+ "environment": environment,
+ "minimum_severity": "Info",
+ "active": True,
+ "verified": True,
+ "scan_type": STACK_HAWK_SCAN_TYPE,
+ "engagement": engagement,
+ }
+ importer = DefaultImporter(**import_options)
+ _, _, len_new_findings2, len_closed_findings2, _, _, _ = importer.process_scan(scan)
+
+ # Log the results for analysis
+ logger.debug(f"First import: {len_new_findings1} new findings, {len_closed_findings1} closed findings")
+ logger.debug(f"Second import: {len_new_findings2} new findings, {len_closed_findings2} closed findings")
+
+ # Assert that process_scan results show no deduplication yet (deduplication happens asynchronously)
+ # The second import should report 6 new findings because deduplication is not visible in the stats from the importer
+ self.assertEqual(len_new_findings1, 6, "First import should create 6 new findings")
+ self.assertEqual(len_closed_findings1, 0, "First import should not close any findings")
+ self.assertEqual(len_new_findings2, 6, "Second import should report 6 new findings initially (before deduplication)")
+ self.assertEqual(len_closed_findings2, 0, "Second import should not close any findings")
+
+ # Verify that second import resulted in duplicates by checking the database
+ # Only check duplicates in sync mode since deduplication happens asynchronously
+ if check_duplicates:
+ # Count active findings (non-duplicates) in the engagement
+ active_findings = Finding.objects.filter(
+ test__engagement=engagement,
+ active=True,
+ duplicate=False,
+ ).count()
+
+ # Count duplicate findings in the engagement
+ duplicate_findings = Finding.objects.filter(
+ test__engagement=engagement,
+ duplicate=True,
+ ).count()
+
+ # We should have 6 active findings (from first import) and 6 duplicate findings (from second import)
+ self.assertEqual(active_findings, 6, f"Expected 6 active findings, got {active_findings}")
+ self.assertEqual(duplicate_findings, 6, f"Expected 6 duplicate findings, got {duplicate_findings}")
+
+ # Total findings should be 12 (6 active + 6 duplicates)
+ total_findings = Finding.objects.filter(test__engagement=engagement).count()
+ self.assertEqual(total_findings, 12, f"Expected 12 total findings, got {total_findings}")
+ else:
+ # In async mode, just verify we have 12 total findings (deduplication happens in celery tasks)
+ total_findings = Finding.objects.filter(test__engagement=engagement).count()
+ self.assertEqual(total_findings, 12, f"Expected 12 total findings, got {total_findings}")
+
+ @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog")
+ def test_deduplication_performance_async(self):
+ """
+ Test deduplication performance with async tasks enabled.
+ This test imports the same scan twice to measure deduplication query and task overhead.
+ """
+ configure_audit_system()
+ configure_pghistory_triggers()
+
+ # Enable deduplication
+ self.system_settings(enable_deduplication=True)
+
+ self._deduplication_performance(
+ expected_num_queries1=660,
+ expected_num_async_tasks1=12,
+ expected_num_queries2=519,
+ expected_num_async_tasks2=12,
+ check_duplicates=False, # Async mode - deduplication happens later
+ )
+
+ @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory")
+ def test_deduplication_performance_pghistory_async(self):
+ """Test deduplication performance with django-pghistory and async tasks enabled."""
+ configure_audit_system()
+ configure_pghistory_triggers()
+
+ # Enable deduplication
+ self.system_settings(enable_deduplication=True)
+
+ self._deduplication_performance(
+ expected_num_queries1=624,
+ expected_num_async_tasks1=12,
+ expected_num_queries2=500,
+ expected_num_async_tasks2=12,
+ check_duplicates=False, # Async mode - deduplication happens later
+ )
+
+ @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog")
+ def test_deduplication_performance_no_async(self):
+ """Test deduplication performance with async tasks disabled."""
+ configure_audit_system()
+ configure_pghistory_triggers()
+
+ # Enable deduplication
+ self.system_settings(enable_deduplication=True)
+
+ testuser = User.objects.get(username="admin")
+ testuser.usercontactinfo.block_execution = True
+ testuser.usercontactinfo.save()
+
+ self._deduplication_performance(
+ expected_num_queries1=672,
+ expected_num_async_tasks1=12,
+ expected_num_queries2=633,
+ expected_num_async_tasks2=12,
+ )
+
+ @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory")
+ def test_deduplication_performance_pghistory_no_async(self):
+ """Test deduplication performance with django-pghistory and async tasks disabled."""
+ configure_audit_system()
+ configure_pghistory_triggers()
+
+ # Enable deduplication
+ self.system_settings(enable_deduplication=True)
+
+ testuser = User.objects.get(username="admin")
+ testuser.usercontactinfo.block_execution = True
+ testuser.usercontactinfo.save()
+
+ self._deduplication_performance(
+ expected_num_queries1=636,
+ expected_num_async_tasks1=12,
+ expected_num_queries2=596,
+ expected_num_async_tasks2=12,
+ )
diff --git a/unittests/tools/test_acunetix_parser_port_num.py b/unittests/tools/test_acunetix_parser_port_num.py
new file mode 100644
index 00000000000..931d1376276
--- /dev/null
+++ b/unittests/tools/test_acunetix_parser_port_num.py
@@ -0,0 +1,203 @@
+from datetime import date
+
+from dojo.models import Test
+from dojo.tools.acunetix.parser import AcunetixParser
+from unittests.dojo_test_case import DojoTestCase, get_unit_tests_scans_path
+
+
+class TestAcunetixParser(DojoTestCase):
+
+ def test_parse_file_with_one_finding(self):
+ with (get_unit_tests_scans_path("acunetix") / "one_finding_with_port_num.xml").open(encoding="utf-8") as testfile:
+ parser = AcunetixParser()
+ findings = parser.get_findings(testfile, Test())
+ for finding in findings:
+ for endpoint in finding.unsaved_endpoints:
+ endpoint.clean()
+ self.assertEqual(1, len(findings))
+ with self.subTest(i=0):
+ finding = findings[0]
+ self.assertEqual("Medium", finding.severity)
+ self.assertEqual(352, finding.cwe)
+ self.assertEqual(date(2018, 9, 24), finding.date)
+ self.assertIsNotNone(finding.description)
+ self.assertGreater(len(finding.description), 0)
+ self.assertFalse(finding.false_p)
+ self.assertEqual("Vijay Test Imapact", finding.impact)
+ self.assertIsNotNone(finding.references)
+ self.assertGreater(len(finding.references), 0)
+ self.assertEqual(1, len(finding.unsaved_endpoints))
+ # check endpoints
+ self.assertEqual(1, len(finding.unsaved_endpoints))
+ endpoint = finding.unsaved_endpoints[0]
+ self.assertEqual("https", endpoint.protocol)
+ self.assertEqual(443, endpoint.port)
+ self.assertEqual("vijaytest.com", endpoint.host)
+ self.assertEqual("some/path", endpoint.path)
+
+ def test_parse_file_with_multiple_finding(self):
+ with (get_unit_tests_scans_path("acunetix") / "many_findings_with_port_number.xml").open(encoding="utf-8") as testfile:
+ parser = AcunetixParser()
+ findings = parser.get_findings(testfile, Test())
+ for finding in findings:
+ for endpoint in finding.unsaved_endpoints:
+ endpoint.clean()
+ self.assertEqual(4, len(findings))
+ with self.subTest(i=0):
+ finding = findings[0]
+ self.assertEqual("Medium", finding.severity)
+ self.assertEqual(date(2020, 2, 27), finding.date)
+ self.assertIsNotNone(finding.description)
+ self.assertEqual("CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:L", finding.cvssv3)
+ self.assertFalse(finding.false_p)
+ self.assertEqual("A single machine can take down another machine's web server with minimal bandwidth and side effects on unrelated services and ports.", finding.impact)
+ # check that this finding have references
+ self.assertIsNotNone(finding.references)
+ self.assertGreater(len(finding.references), 0)
+ # check endpoints
+ self.assertEqual(1, len(finding.unsaved_endpoints))
+ endpoint = finding.unsaved_endpoints[0]
+ self.assertIsNone(endpoint.protocol)
+ self.assertEqual(endpoint.port, 8080)
+ self.assertEqual("www.itsecgames.com", endpoint.host)
+ self.assertIsNone(endpoint.path)
+ # check req/resp
+ self.assertEqual(1, len(finding.unsaved_req_resp))
+ req_resp = finding.unsaved_req_resp[0]
+ self.assertIn("req", req_resp)
+ self.assertIsNotNone(req_resp["req"])
+ self.assertIsInstance(req_resp["req"], str)
+ self.assertIn("resp", req_resp)
+ self.assertIsNotNone(req_resp["resp"])
+ self.assertIsInstance(req_resp["resp"], str)
+
+ with self.subTest(i=1):
+ finding = findings[1]
+ self.assertEqual("Possible virtual host found", finding.title)
+ self.assertEqual("Low", finding.severity)
+ self.assertEqual(200, finding.cwe)
+ self.assertEqual(date(2020, 2, 27), finding.date)
+ self.assertIsNotNone(finding.description)
+ self.assertEqual("CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N", finding.cvssv3)
+ self.assertFalse(finding.false_p)
+ self.assertEqual("Possible sensitive information disclosure.", finding.impact)
+ # check that this finding have references
+ self.assertIsNotNone(finding.references)
+ self.assertGreater(len(finding.references), 0)
+ # check endpoints
+ self.assertEqual(1, len(finding.unsaved_endpoints))
+ endpoint = finding.unsaved_endpoints[0]
+ self.assertIsNone(endpoint.protocol)
+ self.assertEqual(endpoint.port, 8080)
+ self.assertEqual("www.itsecgames.com", endpoint.host)
+ self.assertIsNone(endpoint.path)
+ # check req/resp
+ self.assertEqual(1, len(finding.unsaved_req_resp))
+ req_resp = finding.unsaved_req_resp[0]
+ self.assertIn("req", req_resp)
+ self.assertIsNotNone(req_resp["req"])
+ self.assertIsInstance(req_resp["req"], str)
+ self.assertIn("resp", req_resp)
+ self.assertIsNotNone(req_resp["resp"])
+ self.assertIsInstance(req_resp["resp"], str)
+
+ with self.subTest(i=2):
+ finding = findings[2]
+ self.assertEqual("Unencrypted connection (verified)", finding.title)
+ self.assertEqual("Low", finding.severity)
+ self.assertEqual(310, finding.cwe)
+ self.assertEqual(date(2020, 2, 27), finding.date)
+ self.assertIsNotNone(finding.description)
+ self.assertEqual("CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N", finding.cvssv3)
+ self.assertFalse(finding.false_p)
+ self.assertEqual("Possible information disclosure.", finding.impact)
+ # check that this finding have no references
+ self.assertIsNone(finding.references)
+ # check endpoints
+ self.assertEqual(1, len(finding.unsaved_endpoints))
+ endpoint = finding.unsaved_endpoints[0]
+ self.assertIsNone(endpoint.protocol)
+ self.assertEqual(endpoint.port, 8080)
+ self.assertEqual("www.itsecgames.com", endpoint.host)
+ self.assertIsNone(endpoint.path)
+ # check req/resp
+ self.assertEqual(1, len(finding.unsaved_req_resp))
+ req_resp = finding.unsaved_req_resp[0]
+ self.assertIn("req", req_resp)
+ self.assertIsNotNone(req_resp["req"])
+ self.assertIsInstance(req_resp["req"], str)
+ self.assertIn("resp", req_resp)
+ self.assertIsNotNone(req_resp["resp"])
+ self.assertIsInstance(req_resp["resp"], str)
+
+ def test_parse_file_with_example_com(self):
+ with (get_unit_tests_scans_path("acunetix") / "XML_http_example_co_id_port_num.xml").open(encoding="utf-8") as testfile:
+ parser = AcunetixParser()
+ findings = parser.get_findings(testfile, Test())
+ for finding in findings:
+ for endpoint in finding.unsaved_endpoints:
+ endpoint.clean()
+ self.assertEqual(7, len(findings))
+ with self.subTest(i=0):
+ finding = findings[0]
+ self.assertEqual("HTML form without CSRF protection", finding.title)
+ self.assertEqual("Medium", finding.severity)
+ self.assertEqual(date(2020, 4, 28), finding.date)
+ self.assertIsNotNone(finding.description)
+ self.assertEqual("CVSS:3.0/AV:N/AC:L/PR:N/UI:R/S:U/C:N/I:L/A:N", finding.cvssv3)
+ self.assertFalse(finding.false_p)
+ self.assertIn("An attacker could use CSRF to trick a victim into accessing a website hosted by the attacker,", finding.impact)
+ # aggregated
+ self.assertEqual(3, finding.nb_occurences)
+ # check that this finding have references
+ self.assertIsNotNone(finding.references)
+ self.assertGreater(len(finding.references), 0)
+ # check endpoints
+ self.assertEqual(3, len(finding.unsaved_endpoints))
+ endpoint = finding.unsaved_endpoints[0]
+ self.assertIsNone(endpoint.protocol)
+ self.assertEqual(endpoint.port, 9000)
+ self.assertEqual("example.co.id", endpoint.host)
+ self.assertEqual("h/search", endpoint.path)
+ endpoint = finding.unsaved_endpoints[1]
+ self.assertIsNone(endpoint.protocol)
+ self.assertEqual(endpoint.port, 9000)
+ self.assertEqual("example.co.id", endpoint.host)
+ self.assertEqual("m/zmain", endpoint.path)
+ # check req/resp
+ self.assertEqual(3, len(finding.unsaved_req_resp))
+ for req_resp in finding.unsaved_req_resp:
+ self.assertIn("req", req_resp)
+ self.assertIsNotNone(req_resp["req"])
+ self.assertIsInstance(req_resp["req"], str)
+ self.assertIn("resp", req_resp)
+ self.assertIsNotNone(req_resp["resp"])
+ self.assertIsInstance(req_resp["resp"], str)
+
+ with self.subTest(i=6):
+ finding = findings[6]
+ self.assertEqual("Content Security Policy (CSP) not implemented", finding.title)
+ self.assertEqual("Info", finding.severity)
+ self.assertEqual(date(2020, 4, 28), finding.date)
+ self.assertIsNotNone(finding.description)
+ self.assertFalse(finding.false_p)
+ self.assertIn("CSP can be used to prevent and/or mitigate attacks that involve content/code injection,", finding.impact)
+ # check that this finding have references
+ self.assertIsNotNone(finding.references)
+ self.assertGreater(len(finding.references), 0)
+ # check endpoints
+ self.assertEqual(1, len(finding.unsaved_endpoints))
+ endpoint = finding.unsaved_endpoints[0]
+ self.assertIsNone(endpoint.protocol)
+ self.assertEqual(endpoint.port, 9000)
+ self.assertEqual("example.co.id", endpoint.host)
+ self.assertIsNone(endpoint.path)
+ # check req/resp
+ self.assertEqual(1, len(finding.unsaved_req_resp))
+ req_resp = finding.unsaved_req_resp[0]
+ self.assertIn("req", req_resp)
+ self.assertIsNotNone(req_resp["req"])
+ self.assertIsInstance(req_resp["req"], str)
+ self.assertIn("resp", req_resp)
+ self.assertIsNotNone(req_resp["resp"])
+ self.assertIsInstance(req_resp["resp"], str)