Skip to content

Commit b0277e4

Browse files
add flushing of pghistory entries
1 parent 738de71 commit b0277e4

4 files changed

Lines changed: 111 additions & 46 deletions

File tree

docker-compose.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ services:
5353
DD_SECRET_KEY: "${DD_SECRET_KEY:-hhZCp@D28z!n@NED*yB!ROMt+WzsY*iq}"
5454
DD_CREDENTIAL_AES_256_KEY: "${DD_CREDENTIAL_AES_256_KEY:-&91a*agLqesc*0DJ+2*bAbsUZfR*4nLw}"
5555
DD_DATABASE_READINESS_TIMEOUT: "${DD_DATABASE_READINESS_TIMEOUT:-30}"
56-
DD_AUDITLOG_TYPE: "${DD_AUDITLOG_TYPE:-django-auditlog}"
5756
volumes:
5857
- type: bind
5958
source: ./docker/extra_settings
@@ -75,7 +74,6 @@ services:
7574
DD_SECRET_KEY: "${DD_SECRET_KEY:-hhZCp@D28z!n@NED*yB!ROMt+WzsY*iq}"
7675
DD_CREDENTIAL_AES_256_KEY: "${DD_CREDENTIAL_AES_256_KEY:-&91a*agLqesc*0DJ+2*bAbsUZfR*4nLw}"
7776
DD_DATABASE_READINESS_TIMEOUT: "${DD_DATABASE_READINESS_TIMEOUT:-30}"
78-
DD_AUDITLOG_TYPE: "${DD_AUDITLOG_TYPE:-django-auditlog}"
7977
volumes:
8078
- type: bind
8179
source: ./docker/extra_settings
@@ -96,7 +94,6 @@ services:
9694
DD_SECRET_KEY: "${DD_SECRET_KEY:-hhZCp@D28z!n@NED*yB!ROMt+WzsY*iq}"
9795
DD_CREDENTIAL_AES_256_KEY: "${DD_CREDENTIAL_AES_256_KEY:-&91a*agLqesc*0DJ+2*bAbsUZfR*4nLw}"
9896
DD_DATABASE_READINESS_TIMEOUT: "${DD_DATABASE_READINESS_TIMEOUT:-30}"
99-
DD_AUDITLOG_TYPE: "${DD_AUDITLOG_TYPE:-django-auditlog}"
10097
volumes:
10198
- type: bind
10299
source: ./docker/extra_settings
@@ -118,7 +115,6 @@ services:
118115
DD_SECRET_KEY: "${DD_SECRET_KEY:-hhZCp@D28z!n@NED*yB!ROMt+WzsY*iq}"
119116
DD_CREDENTIAL_AES_256_KEY: "${DD_CREDENTIAL_AES_256_KEY:-&91a*agLqesc*0DJ+2*bAbsUZfR*4nLw}"
120117
DD_DATABASE_READINESS_TIMEOUT: "${DD_DATABASE_READINESS_TIMEOUT:-30}"
121-
DD_AUDITLOG_TYPE: "${DD_AUDITLOG_TYPE:-django-auditlog}"
122118
volumes:
123119
- type: bind
124120
source: ./docker/extra_settings

dojo/auditlog.py

Lines changed: 103 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
import contextlib
88
import logging
99
import sys
10-
from datetime import date, datetime, time
1110

1211
import pghistory
1312
from dateutil.relativedelta import relativedelta
13+
from django.apps import apps
1414
from django.conf import settings
1515
from django.core.management import call_command
1616
from django.db import models
@@ -19,51 +19,122 @@
1919
logger = logging.getLogger(__name__)
2020

2121

22-
def run_flush_auditlog(retention_period: int | None = None,
23-
batch_size: int | None = None,
24-
max_batches: int | None = None) -> tuple[int, int, bool]:
22+
def _flush_models_in_batches(models_to_flush, timestamp_field: str, retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]:
2523
"""
26-
Deletes audit log entries older than the configured retention period.
24+
Generic batched deletion by timestamp for a set of models.
2725
28-
Returns a tuple of (deleted_total, batches_done, reached_limit).
26+
Returns (deleted_or_would_delete_total, batches_done_or_needed, reached_limit)
2927
"""
28+
# Use a timestamp and not a date. this allows for efficient databse index use.
29+
cutoff_dt = timezone.now() - relativedelta(months=retention_period)
30+
logger.info("Audit flush cutoff datetime: %s (retention_period=%s months)", cutoff_dt, retention_period)
31+
32+
total_deleted = 0
33+
total_batches = 0
34+
reached_any_limit = False
35+
36+
for Model in models_to_flush:
37+
deleted_total = 0
38+
batches_done = 0
39+
filter_kwargs = {f"{timestamp_field}__lt": cutoff_dt}
40+
last_pk = None
41+
verb = "Would delete" if dry_run else "Deleted"
42+
43+
while batches_done < max_batches:
44+
batch_qs = Model.objects.filter(**filter_kwargs)
45+
if last_pk is not None:
46+
batch_qs = batch_qs.filter(pk__gt=last_pk)
47+
batch_qs = batch_qs.order_by("pk")
48+
49+
pks = list(batch_qs.values_list("pk", flat=True)[:batch_size])
50+
if not pks:
51+
if batches_done == 0:
52+
logger.info("No outdated %s entries found", Model._meta.object_name)
53+
break
54+
55+
if dry_run:
56+
deleted_count = len(pks)
57+
else:
58+
qs = Model.objects.filter(pk__in=pks)
59+
deleted_count = int(qs._raw_delete(qs.db))
60+
61+
deleted_total += deleted_count
62+
batches_done += 1
63+
last_pk = pks[-1]
64+
65+
logger.info(
66+
"%s %s batch %s (size ~%s), total %s: %s",
67+
verb,
68+
Model._meta.object_name,
69+
batches_done,
70+
batch_size,
71+
verb.lower(),
72+
deleted_total,
73+
)
74+
75+
total_deleted += deleted_total
76+
total_batches += batches_done
77+
if batches_done >= max_batches:
78+
reached_any_limit = True
79+
80+
return total_deleted, total_batches, reached_any_limit
81+
82+
83+
def _flush_django_auditlog(retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]:
3084
# Import inside to avoid model import issues at startup
3185
from auditlog.models import LogEntry # noqa: PLC0415
3286

87+
return _flush_models_in_batches([LogEntry], "timestamp", retention_period, batch_size, max_batches, dry_run=dry_run)
88+
89+
90+
def _iter_pghistory_event_models():
91+
"""Yield pghistory Event models registered under the dojo app."""
92+
for model in apps.get_app_config("dojo").get_models():
93+
if model._meta.object_name.endswith("Event"):
94+
# Ensure the model has pgh_created_at field
95+
if any(f.name == "pgh_created_at" for f in model._meta.fields):
96+
yield model
97+
98+
99+
def _flush_pghistory_events(retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]:
100+
models_to_flush = list(_iter_pghistory_event_models())
101+
return _flush_models_in_batches(models_to_flush, "pgh_created_at", retention_period, batch_size, max_batches, dry_run=dry_run)
102+
103+
104+
def run_flush_auditlog(retention_period: int | None = None,
105+
batch_size: int | None = None,
106+
max_batches: int | None = None,
107+
*,
108+
dry_run: bool = False) -> tuple[int, int, bool]:
109+
"""
110+
Deletes audit entries older than the configured retention from both
111+
django-auditlog and django-pghistory log entries.
112+
113+
Returns a tuple of (deleted_total, batches_done, reached_limit).
114+
"""
33115
retention_period = retention_period if retention_period is not None else getattr(settings, "AUDITLOG_FLUSH_RETENTION_PERIOD", -1)
34116
if retention_period < 0:
35-
logger.info("Flushing auditlog is disabled")
117+
logger.info("Flushing audit logs is disabled")
36118
return 0, 0, False
37119

38-
logger.info("Running Cleanup Task for Logentries with %d Months retention", retention_period)
39-
retention_day = date.today() - relativedelta(months=retention_period)
40-
cutoff_dt = datetime.combine(retention_day, time.min, tzinfo=timezone.get_current_timezone())
41-
42120
batch_size = batch_size if batch_size is not None else getattr(settings, "AUDITLOG_FLUSH_BATCH_SIZE", 1000)
43121
max_batches = max_batches if max_batches is not None else getattr(settings, "AUDITLOG_FLUSH_MAX_BATCHES", 100)
44122

45-
deleted_total = 0
46-
batches_done = 0
47-
while batches_done < max_batches:
48-
batch_qs = LogEntry.objects.filter(timestamp__lt=cutoff_dt).order_by("pk")
49-
pks = list(batch_qs.values_list("pk", flat=True)[:batch_size])
50-
if not pks:
51-
if batches_done == 0:
52-
logger.info("No outdated Logentries found")
53-
break
54-
qs = LogEntry.objects.filter(pk__in=pks)
55-
deleted_count = qs._raw_delete(qs.db)
56-
deleted_total += int(deleted_count)
57-
batches_done += 1
58-
logger.info("Deleted batch %s (size ~%s), total deleted: %s", batches_done, batch_size, deleted_total)
59-
60-
reached_limit = batches_done >= max_batches
61-
if reached_limit:
62-
logger.info("Reached max batches limit (%s). Remaining audit log entries will be deleted in the next run.", max_batches)
63-
else:
64-
logger.info("Total number of audit log entries deleted: %s", deleted_total)
123+
phase = "DRY RUN" if dry_run else "Cleanup"
124+
logger.info("Running %s for django-auditlog entries with %d Months retention across all backends", phase, retention_period)
125+
d_deleted, d_batches, d_limit = _flush_django_auditlog(retention_period, batch_size, max_batches, dry_run=dry_run)
126+
logger.info("Running %s for django-pghistory entries with %d Months retention across all backends", phase, retention_period)
127+
p_deleted, p_batches, p_limit = _flush_pghistory_events(retention_period, batch_size, max_batches, dry_run=dry_run)
128+
129+
total_deleted = d_deleted + p_deleted
130+
total_batches = d_batches + p_batches
131+
reached_limit = bool(d_limit or p_limit)
132+
133+
verb = "would delete" if dry_run else "deleted"
134+
logger.info("Audit flush summary: django-auditlog %s=%s batches=%s; pghistory %s=%s batches=%s; total_%s=%s total_batches=%s",
135+
verb, d_deleted, d_batches, verb, p_deleted, p_batches, verb.replace(" ", "_"), total_deleted, total_batches)
65136

66-
return deleted_total, batches_done, reached_limit
137+
return total_deleted, total_batches, reached_limit
67138

68139

69140
def enable_django_auditlog():

dojo/management/commands/flush_auditlog.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,16 @@ def add_arguments(self, parser):
1010
parser.add_argument("--retention-months", type=int, default=None, help="Override retention period in months")
1111
parser.add_argument("--batch-size", type=int, default=None, help="Override batch size")
1212
parser.add_argument("--max-batches", type=int, default=None, help="Override max batches per run")
13+
parser.add_argument("--dry-run", action="store_true", help="Only show how many entries would be deleted")
1314

1415
def handle(self, *args, **options):
1516
deleted_total, batches_done, reached_limit = run_flush_auditlog(
1617
retention_period=options.get("retention_months"),
1718
batch_size=options.get("batch_size"),
1819
max_batches=options.get("max_batches"),
20+
dry_run=options.get("dry_run", False),
1921
)
20-
if reached_limit:
21-
self.stdout.write(self.style.WARNING(
22-
f"Reached max batches limit; deleted {deleted_total} entries in {batches_done} batches.",
23-
))
24-
else:
25-
self.stdout.write(self.style.SUCCESS(
26-
f"Deleted {deleted_total} audit log entries in {batches_done} batches.",
27-
))
22+
verb = "Would delete" if options.get("dry_run") else "Deleted"
23+
style = self.style.WARNING if options.get("dry_run") else self.style.SUCCESS
24+
suffix = " (reached max batches)" if reached_limit else ""
25+
self.stdout.write(style(f"{verb} {deleted_total} audit log entries in {batches_done} batches{suffix}."))

dojo/settings/settings.dist.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@
318318
# a big performance hit. Especially during (re-)imports.
319319
DD_ENABLE_AUDITLOG=(bool, True),
320320
# Audit logging system: "django-auditlog" (default) or "django-pghistory"
321-
DD_AUDITLOG_TYPE=(str, "django-auditlog"),
321+
DD_AUDITLOG_TYPE=(str, "django-pghistory"),
322322
# Specifies whether the "first seen" date of a given report should be used over the "last seen" date
323323
DD_USE_FIRST_SEEN=(bool, False),
324324
# When set to True, use the older version of the qualys parser that is a more heavy handed in setting severity
@@ -1187,7 +1187,7 @@ def saml2_attrib_map_format(din):
11871187
},
11881188
"flush_auditlog": {
11891189
"task": "dojo.tasks.flush_auditlog",
1190-
"schedule": timedelta(minutes=1),
1190+
"schedule": timedelta(hours=8),
11911191
},
11921192
"update-findings-from-source-issues": {
11931193
"task": "dojo.tools.tool_issue_updater.update_findings_from_source_issues",

0 commit comments

Comments
 (0)