Re: [PATCH] audit: add backlog high water mark metric

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Steve Grubb <sgrubb@redhat.com>
To: audit@vger.kernel.org, linux-kernel@vger.kernel.org,
	Ricardo Robaina <rrobaina@redhat.com>
Cc: paul@paul-moore.com, eparis@redhat.com,
	Ricardo Robaina <rrobaina@redhat.com>
Subject: Re: [PATCH] audit: add backlog high water mark metric
Date: Mon, 23 Mar 2026 12:48:40 -0400	[thread overview]
Message-ID: <22574086.4csPzL39Zc@x2> (raw)
In-Reply-To: <20260323150700.614139-1-rrobaina@redhat.com>

On Monday, March 23, 2026 11:07:00 AM Eastern Daylight Time Ricardo Robaina 
wrote:
> Currently, determining the optimal `audit_backlog_limit` relies on
> instantaneous polling of the queue size. This misses transient
> micro-bursts, making it difficult for system administrators to know
> if their queue is adequately sized or if they are at risk of
> dropping events.
> 
> This patch introduces `backlog_max_depth`, a high-water mark metric
> that tracks the maximum number of buffers in the audit queue since
> the system was booted or the metric was last reset. To minimize
> performance overhead in the fast-path, the metric is updated using
> a lockless cmpxchg loop in `__audit_log_end()`.
> 
> Userspace can read-and-clear this metric by sending an `AUDIT_SET`
> message with the `AUDIT_STATUS_BACKLOG_MAX_DEPTH` mask. To support
> periodic telemetry polling (e.g., statsd, Prometheus), the reset
> operation atomically returns the snapshot of the high-water mark
> right before zeroing it, ensuring no peaks are lost between polls.

From a user space point of view, this looks good. User space support was co-
developed alongside of this patch to ensure it works as advertised.

Acked-by: Steve Grubb <sgrubb@redhat.com>

-Steve

> Link: https://github.com/linux-audit/audit-kernel/issues/63
> Suggested-by: Steve Grubb <sgrubb@redhat.com>
> Signed-off-by: Ricardo Robaina <rrobaina@redhat.com>
> ---
>  include/linux/audit.h      |  3 ++-
>  include/uapi/linux/audit.h |  2 ++
>  kernel/audit.c             | 32 ++++++++++++++++++++++++++++++++
>  3 files changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/audit.h b/include/linux/audit.h
> index d79218bf075a..53132b303c20 100644
> --- a/include/linux/audit.h
> +++ b/include/linux/audit.h
> @@ -22,7 +22,8 @@
>  			  AUDIT_STATUS_BACKLOG_LIMIT | \
>  			  AUDIT_STATUS_BACKLOG_WAIT_TIME | \
>  			  AUDIT_STATUS_LOST | \
> -			  AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL)
> +			  AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL | \
> +			  AUDIT_STATUS_BACKLOG_MAX_DEPTH)
> 
>  #define AUDIT_INO_UNSET ((unsigned long)-1)
>  #define AUDIT_DEV_UNSET ((dev_t)-1)
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index e8f5ce677df7..862ca93c0c31 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -355,6 +355,7 @@ enum {
>  #define AUDIT_STATUS_BACKLOG_WAIT_TIME		0x0020
>  #define AUDIT_STATUS_LOST			0x0040
>  #define AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL	0x0080
> +#define AUDIT_STATUS_BACKLOG_MAX_DEPTH		0x0100
> 
>  #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT	0x00000001
>  #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME	0x00000002
> @@ -486,6 +487,7 @@ struct audit_status {
>  	__u32           backlog_wait_time_actual;/* time spent waiting while
>  						  * message limit exceeded
>  						  */
> +	__u32		backlog_max_depth; /* message queue max depth */
>  };
> 
>  struct audit_features {
> diff --git a/kernel/audit.c b/kernel/audit.c
> index e1d489bc2dff..256053cb6132 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -163,6 +163,9 @@ static struct sk_buff_head audit_retry_queue;
>  /* queue msgs waiting for new auditd connection */
>  static struct sk_buff_head audit_hold_queue;
> 
> +/* audit queue high water mark since last startup or reset */
> +static atomic_t audit_backlog_max_depth __read_mostly = ATOMIC_INIT(0);
> +
>  /* queue servicing thread */
>  static struct task_struct *kauditd_task;
>  static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
> @@ -1286,6 +1289,7 @@ static int audit_receive_msg(struct sk_buff *skb,
> struct nlmsghdr *nlh, s.backlog		   = skb_queue_len(&audit_queue);
>  		s.feature_bitmap	   = AUDIT_FEATURE_BITMAP_ALL;
>  		s.backlog_wait_time	   = audit_backlog_wait_time;
> +		s.backlog_max_depth	   = atomic_read(&audit_backlog_max_depth);
>  		s.backlog_wait_time_actual =
> atomic_read(&audit_backlog_wait_time_actual); audit_send_reply(skb, seq,
> AUDIT_GET, 0, 0, &s, sizeof(s));
>  		break;
> @@ -1399,6 +1403,12 @@ static int audit_receive_msg(struct sk_buff *skb,
> struct nlmsghdr *nlh, audit_log_config_change("backlog_wait_time_actual",
> 0, actual, 1); return actual;
>  		}
> +		if (s.mask == AUDIT_STATUS_BACKLOG_MAX_DEPTH) {
> +			u32 old_depth = atomic_xchg(&audit_backlog_max_depth, 0);
> +
> +			audit_log_config_change("backlog_max_depth", 0, old_depth, 
1);
> +			return old_depth;
> +		}
>  		break;
>  	}
>  	case AUDIT_GET_FEATURE:
> @@ -2761,6 +2771,25 @@ int audit_signal_info(int sig, struct task_struct
> *t) return audit_signal_info_syscall(t);
>  }
> 
> +/*
> + * audit_update_backlog_max_depth - update the audit queue high water mark
> + *
> + * Safely updates the audit_backlog_max_depth metric using a lockless
> + * cmpxchg loop. This ensures the high-water mark is accurately tracked
> + * even when multiple CPUs are logging audit records concurrently.
> + */
> +static inline void audit_update_backlog_max_depth(void)
> +{
> +	u32 q_len = skb_queue_len(&audit_queue);
> +	u32 q_max = atomic_read(&audit_backlog_max_depth);
> +
> +	while (unlikely(q_len > q_max)) {
> +		if (likely(atomic_try_cmpxchg(&audit_backlog_max_depth,
> +					      &q_max, q_len)))
> +			break;
> +	}
> +}
> +
>  /**
>   * __audit_log_end - enqueue one audit record
>   * @skb: the buffer to send
> @@ -2777,6 +2806,9 @@ static void __audit_log_end(struct sk_buff *skb)
> 
>  		/* queue the netlink packet */
>  		skb_queue_tail(&audit_queue, skb);
> +
> +		/* update backlog high water mark */
> +		audit_update_backlog_max_depth();
>  	} else {
>  		audit_log_lost("rate limit exceeded");
>  		kfree_skb(skb);

next prev parent reply	other threads:[~2026-03-23 16:48 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-23 15:07 [PATCH] audit: add backlog high water mark metric Ricardo Robaina
2026-03-23 16:48 ` Steve Grubb [this message]
2026-04-10 21:34 ` Paul Moore
2026-04-15  3:45   ` Steve Grubb
2026-04-15 15:19     ` Paul Moore
2026-04-15 15:21       ` Paul Moore
2026-04-16 20:33         ` Steve Grubb
2026-04-16 20:51           ` Paul Moore
2026-04-16 20:58             ` Paul Moore
2026-04-17 13:02               ` Ricardo Robaina
2026-05-12 15:54                 ` Paul Moore

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=22574086.4csPzL39Zc@x2 \
    --to=sgrubb@redhat.com \
    --cc=audit@vger.kernel.org \
    --cc=eparis@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paul@paul-moore.com \
    --cc=rrobaina@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.