Skip to content

Commit 0c39215

Browse files
optimize flush audit log
1 parent 5ddc508 commit 0c39215

3 files changed

Lines changed: 80 additions & 14 deletions

File tree

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from django.core.management.base import BaseCommand
2+
3+
from dojo.tasks import run_flush_auditlog
4+
5+
6+
class Command(BaseCommand):
7+
help = "Flush old audit log entries based on retention and batching settings"
8+
9+
def add_arguments(self, parser):
10+
parser.add_argument("--retention-months", type=int, default=None, help="Override retention period in months")
11+
parser.add_argument("--batch-size", type=int, default=None, help="Override batch size")
12+
parser.add_argument("--max-batches", type=int, default=None, help="Override max batches per run")
13+
14+
def handle(self, *args, **options):
15+
deleted_total, batches_done, reached_limit = run_flush_auditlog(
16+
retention_period=options.get("retention_months"),
17+
batch_size=options.get("batch_size"),
18+
max_batches=options.get("max_batches"),
19+
)
20+
if reached_limit:
21+
self.stdout.write(self.style.WARNING(
22+
f"Reached max batches limit; deleted {deleted_total} entries in {batches_done} batches.",
23+
))
24+
else:
25+
self.stdout.write(self.style.SUCCESS(
26+
f"Deleted {deleted_total} audit log entries in {batches_done} batches.",
27+
))

dojo/settings/settings.dist.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@
260260
DD_TRACK_IMPORT_HISTORY=(bool, True),
261261
# Delete Auditlogs older than x month; -1 to keep all logs
262262
DD_AUDITLOG_FLUSH_RETENTION_PERIOD=(int, -1),
263+
# Batch size for flushing audit logs per task run
264+
DD_AUDITLOG_FLUSH_BATCH_SIZE=(int, 1000),
265+
# Maximum number of batches to process per task run
266+
DD_AUDITLOG_FLUSH_MAX_BATCHES=(int, 100),
263267
# Allow grouping of findings in the same test, for example to group findings per dependency
264268
# DD_FEATURE_FINDING_GROUPS feature is moved to system_settings, will be removed from settings file
265269
DD_FEATURE_FINDING_GROUPS=(bool, True),
@@ -1183,7 +1187,7 @@ def saml2_attrib_map_format(din):
11831187
},
11841188
"flush_auditlog": {
11851189
"task": "dojo.tasks.flush_auditlog",
1186-
"schedule": timedelta(hours=8),
1190+
"schedule": timedelta(minutes=1),
11871191
},
11881192
"update-findings-from-source-issues": {
11891193
"task": "dojo.tools.tool_issue_updater.update_findings_from_source_issues",
@@ -1914,6 +1918,8 @@ def saml2_attrib_map_format(din):
19141918
AUDITLOG_TYPE = env("DD_AUDITLOG_TYPE")
19151919
AUDITLOG_TWO_STEP_MIGRATION = False
19161920
AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT = False
1921+
AUDITLOG_FLUSH_BATCH_SIZE = env("DD_AUDITLOG_FLUSH_BATCH_SIZE")
1922+
AUDITLOG_FLUSH_MAX_BATCHES = env("DD_AUDITLOG_FLUSH_MAX_BATCHES")
19171923

19181924
USE_FIRST_SEEN = env("DD_USE_FIRST_SEEN")
19191925
USE_QUALYS_LEGACY_SEVERITY_PARSING = env("DD_QUALYS_LEGACY_SEVERITY_PARSING")

dojo/tasks.py

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from datetime import date, timedelta
2+
from datetime import date, datetime, time, timedelta
33

44
from auditlog.models import LogEntry
55
from celery.utils.log import get_task_logger
@@ -93,24 +93,57 @@ def cleanup_alerts(*args, **kwargs):
9393
logger.info("total number of alerts deleted: %s", total_deleted_count)
9494

9595

96-
@app.task(bind=True)
97-
def flush_auditlog(*args, **kwargs):
98-
retention_period = settings.AUDITLOG_FLUSH_RETENTION_PERIOD
96+
def run_flush_auditlog(retention_period: int | None = None,
97+
batch_size: int | None = None,
98+
max_batches: int | None = None) -> tuple[int, int, bool]:
99+
"""
100+
Deletes audit log entries older than the configured retention period.
99101
102+
Returns a tuple of (deleted_total, batches_done, reached_limit).
103+
"""
104+
retention_period = retention_period if retention_period is not None else getattr(settings, "AUDITLOG_FLUSH_RETENTION_PERIOD", -1)
100105
if retention_period < 0:
101106
logger.info("Flushing auditlog is disabled")
102-
return
107+
return 0, 0, False
103108

104109
logger.info("Running Cleanup Task for Logentries with %d Months retention", retention_period)
105-
retention_date = date.today() - relativedelta(months=retention_period)
106-
subset = LogEntry.objects.filter(timestamp__date__lt=retention_date)
107-
event_count = subset.count()
108-
logger.debug("Initially received %d Logentries", event_count)
109-
if event_count > 0:
110-
subset._raw_delete(subset.db)
111-
logger.debug("Total number of audit log entries deleted: %s", event_count)
110+
# Compute a datetime cutoff at start of the cutoff day to keep index-usage friendly
111+
retention_day = date.today() - relativedelta(months=retention_period)
112+
# Use a timestamp to avoid postgres having to cast to a Date field
113+
cutoff_dt = datetime.combine(retention_day, time.min, tzinfo=timezone.get_current_timezone())
114+
115+
# Settings to control batching; sensible defaults if not configured
116+
batch_size = batch_size if batch_size is not None else getattr(settings, "AUDITLOG_FLUSH_BATCH_SIZE", 1000)
117+
max_batches = max_batches if max_batches is not None else getattr(settings, "AUDITLOG_FLUSH_MAX_BATCHES", 100)
118+
119+
# Delete in batches to avoid long-running transactions and table locks
120+
deleted_total = 0
121+
batches_done = 0
122+
while batches_done < max_batches:
123+
batch_qs = LogEntry.objects.filter(timestamp__lt=cutoff_dt).order_by("pk")
124+
pks = list(batch_qs.values_list("pk", flat=True)[:batch_size])
125+
if not pks:
126+
if batches_done == 0:
127+
logger.info("No outdated Logentries found")
128+
break
129+
qs = LogEntry.objects.filter(pk__in=pks)
130+
deleted_count = qs._raw_delete(qs.db)
131+
deleted_total += int(deleted_count)
132+
batches_done += 1
133+
logger.info("Deleted batch %s (size ~%s), total deleted: %s", batches_done, batch_size, deleted_total)
134+
135+
reached_limit = batches_done >= max_batches
136+
if reached_limit:
137+
logger.info("Reached max batches limit (%s). Remaining audit log entries will be deleted in the next run.", max_batches)
112138
else:
113-
logger.debug("No outdated Logentries found")
139+
logger.info("Total number of audit log entries deleted: %s", deleted_total)
140+
141+
return deleted_total, batches_done, reached_limit
142+
143+
144+
@app.task(bind=True)
145+
def flush_auditlog(*args, **kwargs):
146+
run_flush_auditlog()
114147

115148

116149
@app.task(bind=True)

0 commit comments

Comments
 (0)