|
7 | 7 | import contextlib |
8 | 8 | import logging |
9 | 9 | import sys |
10 | | -from datetime import date, datetime, time |
11 | 10 |
|
12 | 11 | import pghistory |
13 | 12 | from dateutil.relativedelta import relativedelta |
| 13 | +from django.apps import apps |
14 | 14 | from django.conf import settings |
15 | 15 | from django.core.management import call_command |
16 | 16 | from django.db import models |
|
19 | 19 | logger = logging.getLogger(__name__) |
20 | 20 |
|
21 | 21 |
|
22 | | -def run_flush_auditlog(retention_period: int | None = None, |
23 | | - batch_size: int | None = None, |
24 | | - max_batches: int | None = None) -> tuple[int, int, bool]: |
| 22 | +def _flush_models_in_batches(models_to_flush, timestamp_field: str, retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]: |
25 | 23 | """ |
26 | | - Deletes audit log entries older than the configured retention period. |
| 24 | + Generic batched deletion by timestamp for a set of models. |
27 | 25 |
|
28 | | - Returns a tuple of (deleted_total, batches_done, reached_limit). |
| 26 | + Returns (deleted_or_would_delete_total, batches_done_or_needed, reached_limit) |
29 | 27 | """ |
| 28 | + # Use a timestamp and not a date. this allows for efficient databse index use. |
| 29 | + cutoff_dt = timezone.now() - relativedelta(months=retention_period) |
| 30 | + logger.info("Audit flush cutoff datetime: %s (retention_period=%s months)", cutoff_dt, retention_period) |
| 31 | + |
| 32 | + total_deleted = 0 |
| 33 | + total_batches = 0 |
| 34 | + reached_any_limit = False |
| 35 | + |
| 36 | + for Model in models_to_flush: |
| 37 | + deleted_total = 0 |
| 38 | + batches_done = 0 |
| 39 | + filter_kwargs = {f"{timestamp_field}__lt": cutoff_dt} |
| 40 | + last_pk = None |
| 41 | + verb = "Would delete" if dry_run else "Deleted" |
| 42 | + |
| 43 | + while batches_done < max_batches: |
| 44 | + batch_qs = Model.objects.filter(**filter_kwargs) |
| 45 | + if last_pk is not None: |
| 46 | + batch_qs = batch_qs.filter(pk__gt=last_pk) |
| 47 | + batch_qs = batch_qs.order_by("pk") |
| 48 | + |
| 49 | + pks = list(batch_qs.values_list("pk", flat=True)[:batch_size]) |
| 50 | + if not pks: |
| 51 | + if batches_done == 0: |
| 52 | + logger.info("No outdated %s entries found", Model._meta.object_name) |
| 53 | + break |
| 54 | + |
| 55 | + if dry_run: |
| 56 | + deleted_count = len(pks) |
| 57 | + else: |
| 58 | + qs = Model.objects.filter(pk__in=pks) |
| 59 | + deleted_count = int(qs._raw_delete(qs.db)) |
| 60 | + |
| 61 | + deleted_total += deleted_count |
| 62 | + batches_done += 1 |
| 63 | + last_pk = pks[-1] |
| 64 | + |
| 65 | + logger.info( |
| 66 | + "%s %s batch %s (size ~%s), total %s: %s", |
| 67 | + verb, |
| 68 | + Model._meta.object_name, |
| 69 | + batches_done, |
| 70 | + batch_size, |
| 71 | + verb.lower(), |
| 72 | + deleted_total, |
| 73 | + ) |
| 74 | + |
| 75 | + total_deleted += deleted_total |
| 76 | + total_batches += batches_done |
| 77 | + if batches_done >= max_batches: |
| 78 | + reached_any_limit = True |
| 79 | + |
| 80 | + return total_deleted, total_batches, reached_any_limit |
| 81 | + |
| 82 | + |
| 83 | +def _flush_django_auditlog(retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]: |
30 | 84 | # Import inside to avoid model import issues at startup |
31 | 85 | from auditlog.models import LogEntry # noqa: PLC0415 |
32 | 86 |
|
| 87 | + return _flush_models_in_batches([LogEntry], "timestamp", retention_period, batch_size, max_batches, dry_run=dry_run) |
| 88 | + |
| 89 | + |
| 90 | +def _iter_pghistory_event_models(): |
| 91 | + """Yield pghistory Event models registered under the dojo app.""" |
| 92 | + for model in apps.get_app_config("dojo").get_models(): |
| 93 | + if model._meta.object_name.endswith("Event"): |
| 94 | + # Ensure the model has pgh_created_at field |
| 95 | + if any(f.name == "pgh_created_at" for f in model._meta.fields): |
| 96 | + yield model |
| 97 | + |
| 98 | + |
| 99 | +def _flush_pghistory_events(retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]: |
| 100 | + models_to_flush = list(_iter_pghistory_event_models()) |
| 101 | + return _flush_models_in_batches(models_to_flush, "pgh_created_at", retention_period, batch_size, max_batches, dry_run=dry_run) |
| 102 | + |
| 103 | + |
| 104 | +def run_flush_auditlog(retention_period: int | None = None, |
| 105 | + batch_size: int | None = None, |
| 106 | + max_batches: int | None = None, |
| 107 | + *, |
| 108 | + dry_run: bool = False) -> tuple[int, int, bool]: |
| 109 | + """ |
| 110 | + Deletes audit entries older than the configured retention from both |
| 111 | + django-auditlog and django-pghistory log entries. |
| 112 | +
|
| 113 | + Returns a tuple of (deleted_total, batches_done, reached_limit). |
| 114 | + """ |
33 | 115 | retention_period = retention_period if retention_period is not None else getattr(settings, "AUDITLOG_FLUSH_RETENTION_PERIOD", -1) |
34 | 116 | if retention_period < 0: |
35 | | - logger.info("Flushing auditlog is disabled") |
| 117 | + logger.info("Flushing audit logs is disabled") |
36 | 118 | return 0, 0, False |
37 | 119 |
|
38 | | - logger.info("Running Cleanup Task for Logentries with %d Months retention", retention_period) |
39 | | - retention_day = date.today() - relativedelta(months=retention_period) |
40 | | - cutoff_dt = datetime.combine(retention_day, time.min, tzinfo=timezone.get_current_timezone()) |
41 | | - |
42 | 120 | batch_size = batch_size if batch_size is not None else getattr(settings, "AUDITLOG_FLUSH_BATCH_SIZE", 1000) |
43 | 121 | max_batches = max_batches if max_batches is not None else getattr(settings, "AUDITLOG_FLUSH_MAX_BATCHES", 100) |
44 | 122 |
|
45 | | - deleted_total = 0 |
46 | | - batches_done = 0 |
47 | | - while batches_done < max_batches: |
48 | | - batch_qs = LogEntry.objects.filter(timestamp__lt=cutoff_dt).order_by("pk") |
49 | | - pks = list(batch_qs.values_list("pk", flat=True)[:batch_size]) |
50 | | - if not pks: |
51 | | - if batches_done == 0: |
52 | | - logger.info("No outdated Logentries found") |
53 | | - break |
54 | | - qs = LogEntry.objects.filter(pk__in=pks) |
55 | | - deleted_count = qs._raw_delete(qs.db) |
56 | | - deleted_total += int(deleted_count) |
57 | | - batches_done += 1 |
58 | | - logger.info("Deleted batch %s (size ~%s), total deleted: %s", batches_done, batch_size, deleted_total) |
59 | | - |
60 | | - reached_limit = batches_done >= max_batches |
61 | | - if reached_limit: |
62 | | - logger.info("Reached max batches limit (%s). Remaining audit log entries will be deleted in the next run.", max_batches) |
63 | | - else: |
64 | | - logger.info("Total number of audit log entries deleted: %s", deleted_total) |
| 123 | + phase = "DRY RUN" if dry_run else "Cleanup" |
| 124 | + logger.info("Running %s for django-auditlog entries with %d Months retention across all backends", phase, retention_period) |
| 125 | + d_deleted, d_batches, d_limit = _flush_django_auditlog(retention_period, batch_size, max_batches, dry_run=dry_run) |
| 126 | + logger.info("Running %s for django-pghistory entries with %d Months retention across all backends", phase, retention_period) |
| 127 | + p_deleted, p_batches, p_limit = _flush_pghistory_events(retention_period, batch_size, max_batches, dry_run=dry_run) |
| 128 | + |
| 129 | + total_deleted = d_deleted + p_deleted |
| 130 | + total_batches = d_batches + p_batches |
| 131 | + reached_limit = bool(d_limit or p_limit) |
| 132 | + |
| 133 | + verb = "would delete" if dry_run else "deleted" |
| 134 | + logger.info("Audit flush summary: django-auditlog %s=%s batches=%s; pghistory %s=%s batches=%s; total_%s=%s total_batches=%s", |
| 135 | + verb, d_deleted, d_batches, verb, p_deleted, p_batches, verb.replace(" ", "_"), total_deleted, total_batches) |
65 | 136 |
|
66 | | - return deleted_total, batches_done, reached_limit |
| 137 | + return total_deleted, total_batches, reached_limit |
67 | 138 |
|
68 | 139 |
|
69 | 140 | def enable_django_auditlog(): |
|
0 commit comments