* [RFC PATCH 3/9] audit: queue netlink multicast sends just like we do for unicast sends
From: Paul Moore @ 2016-11-24 1:41 UTC (permalink / raw)
To: linux-audit
In-Reply-To: <147995162874.18851.11207690780223712694.stgit@sifl>
From: Paul Moore <paul@paul-moore.com>
Sending audit netlink multicast messages is bad for all the same
reasons that sending audit netlink unicast messages is bad, so this
patch reworks things so that we don't do the multicast send in
audit_log_end(), we do it from the dedicated kauditd_thread thread just
as we do for unicast messages.
See the GitHub issues below for more information/history:
* https://github.com/linux-audit/audit-kernel/issues/23
* https://github.com/linux-audit/audit-kernel/issues/22
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
kernel/audit.c | 70 ++++++++++++++++++++++++++++----------------------------
1 file changed, 35 insertions(+), 35 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index b61642b..801247a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -511,26 +511,46 @@ static void flush_hold_queue(void)
static int kauditd_thread(void *dummy)
{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+
set_freezable();
while (!kthread_should_stop()) {
- struct sk_buff *skb;
-
flush_hold_queue();
skb = skb_dequeue(&audit_skb_queue);
-
if (skb) {
- if (!audit_backlog_limit ||
- (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit))
- wake_up(&audit_backlog_wait);
+ nlh = nlmsg_hdr(skb);
+
+ /* if nlh->nlmsg_len is zero then we haven't attempted
+ * to send the message to userspace yet, if nlmsg_len
+ * is non-zero then we have attempted to send it to
+ * the multicast listeners as well as auditd; keep
+ * trying to send to auditd but don't repeat the
+ * multicast send */
+ if (nlh->nlmsg_len == 0) {
+ nlh->nlmsg_len = skb->len;
+ kauditd_send_multicast_skb(skb, GFP_KERNEL);
+
+ /* see the note in kauditd_send_multicast_skb
+ * regarding the nlh->nlmsg_len value and why
+ * it differs between the multicast and unicast
+ * clients */
+ nlh->nlmsg_len -= NLMSG_HDRLEN;
+ }
+
if (audit_pid)
kauditd_send_skb(skb);
else
audit_printk_skb(skb);
- continue;
+ } else {
+ /* we have flushed the backlog so wake everyone up who
+ * is blocked and go to sleep until we have something
+ * in the backlog again */
+ wake_up(&audit_backlog_wait);
+ wait_event_freezable(kauditd_wait,
+ skb_queue_len(&audit_skb_queue));
}
-
- wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue));
}
return 0;
}
@@ -1969,10 +1989,10 @@ out:
* audit_log_end - end one audit record
* @ab: the audit_buffer
*
- * netlink_unicast() cannot be called inside an irq context because it blocks
- * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
- * on a queue and a tasklet is scheduled to remove them from the queue outside
- * the irq context. May be called in any context.
+ * We can not do a netlink send inside an irq context because it blocks (last
+ * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a
+ * queue and a tasklet is scheduled to remove them from the queue outside the
+ * irq context. May be called in any context.
*/
void audit_log_end(struct audit_buffer *ab)
{
@@ -1981,28 +2001,8 @@ void audit_log_end(struct audit_buffer *ab)
if (!audit_rate_check()) {
audit_log_lost("rate limit exceeded");
} else {
- struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
-
- nlh->nlmsg_len = ab->skb->len;
- kauditd_send_multicast_skb(ab->skb, ab->gfp_mask);
-
- /*
- * The original kaudit unicast socket sends up messages with
- * nlmsg_len set to the payload length rather than the entire
- * message length. This breaks the standard set by netlink.
- * The existing auditd daemon assumes this breakage. Fixing
- * this would require co-ordinating a change in the established
- * protocol between the kaudit kernel subsystem and the auditd
- * userspace code.
- */
- nlh->nlmsg_len -= NLMSG_HDRLEN;
-
- if (audit_pid) {
- skb_queue_tail(&audit_skb_queue, ab->skb);
- wake_up_interruptible(&kauditd_wait);
- } else {
- audit_printk_skb(ab->skb);
- }
+ skb_queue_tail(&audit_skb_queue, ab->skb);
+ wake_up_interruptible(&kauditd_wait);
ab->skb = NULL;
}
audit_buffer_free(ab);
^ permalink raw reply related
* [RFC PATCH 4/9] audit: rename the queues and kauditd related functions
From: Paul Moore @ 2016-11-24 1:41 UTC (permalink / raw)
To: linux-audit
In-Reply-To: <147995162874.18851.11207690780223712694.stgit@sifl>
From: Paul Moore <paul@paul-moore.com>
The audit queue names can be shortened and the record sending
helpers associated with the kauditd task could be named better, do
these small cleanups now to make life easier once we start reworking
the queues and kauditd code.
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
kernel/audit.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index 801247a..6ac1df1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -138,9 +138,9 @@ static DEFINE_SPINLOCK(audit_freelist_lock);
static int audit_freelist_count;
static LIST_HEAD(audit_freelist);
-static struct sk_buff_head audit_skb_queue;
+static struct sk_buff_head audit_queue;
/* queue of skbs to send to auditd when/if it comes back */
-static struct sk_buff_head audit_skb_hold_queue;
+static struct sk_buff_head audit_hold_queue;
static struct task_struct *kauditd_task;
static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
@@ -377,8 +377,8 @@ static void audit_hold_skb(struct sk_buff *skb)
{
if (audit_default &&
(!audit_backlog_limit ||
- skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit))
- skb_queue_tail(&audit_skb_hold_queue, skb);
+ skb_queue_len(&audit_hold_queue) < audit_backlog_limit))
+ skb_queue_tail(&audit_hold_queue, skb);
else
kfree_skb(skb);
}
@@ -387,7 +387,7 @@ static void audit_hold_skb(struct sk_buff *skb)
* For one reason or another this nlh isn't getting delivered to the userspace
* audit daemon, just send it to printk.
*/
-static void audit_printk_skb(struct sk_buff *skb)
+static void kauditd_printk_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
char *data = nlmsg_data(nlh);
@@ -402,7 +402,7 @@ static void audit_printk_skb(struct sk_buff *skb)
audit_hold_skb(skb);
}
-static void kauditd_send_skb(struct sk_buff *skb)
+static void kauditd_send_unicast_skb(struct sk_buff *skb)
{
int err;
int attempts = 0;
@@ -493,13 +493,13 @@ static void flush_hold_queue(void)
if (!audit_default || !audit_pid)
return;
- skb = skb_dequeue(&audit_skb_hold_queue);
+ skb = skb_dequeue(&audit_hold_queue);
if (likely(!skb))
return;
while (skb && audit_pid) {
- kauditd_send_skb(skb);
- skb = skb_dequeue(&audit_skb_hold_queue);
+ kauditd_send_unicast_skb(skb);
+ skb = skb_dequeue(&audit_hold_queue);
}
/*
@@ -518,7 +518,7 @@ static int kauditd_thread(void *dummy)
while (!kthread_should_stop()) {
flush_hold_queue();
- skb = skb_dequeue(&audit_skb_queue);
+ skb = skb_dequeue(&audit_queue);
if (skb) {
nlh = nlmsg_hdr(skb);
@@ -540,16 +540,16 @@ static int kauditd_thread(void *dummy)
}
if (audit_pid)
- kauditd_send_skb(skb);
+ kauditd_send_unicast_skb(skb);
else
- audit_printk_skb(skb);
+ kauditd_printk_skb(skb);
} else {
/* we have flushed the backlog so wake everyone up who
* is blocked and go to sleep until we have something
* in the backlog again */
wake_up(&audit_backlog_wait);
wait_event_freezable(kauditd_wait,
- skb_queue_len(&audit_skb_queue));
+ skb_queue_len(&audit_queue));
}
}
return 0;
@@ -865,7 +865,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
s.rate_limit = audit_rate_limit;
s.backlog_limit = audit_backlog_limit;
s.lost = atomic_read(&audit_lost);
- s.backlog = skb_queue_len(&audit_skb_queue);
+ s.backlog = skb_queue_len(&audit_queue);
s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
s.backlog_wait_time = audit_backlog_wait_time_master;
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
@@ -1200,8 +1200,8 @@ static int __init audit_init(void)
audit_default ? "enabled" : "disabled");
register_pernet_subsys(&audit_net_ops);
- skb_queue_head_init(&audit_skb_queue);
- skb_queue_head_init(&audit_skb_hold_queue);
+ skb_queue_head_init(&audit_queue);
+ skb_queue_head_init(&audit_hold_queue);
audit_initialized = AUDIT_INITIALIZED;
audit_enabled = audit_default;
audit_ever_enabled |= !!audit_default;
@@ -1357,7 +1357,7 @@ static long wait_for_auditd(long sleep_time)
DECLARE_WAITQUEUE(wait, current);
if (audit_backlog_limit &&
- skb_queue_len(&audit_skb_queue) > audit_backlog_limit) {
+ skb_queue_len(&audit_queue) > audit_backlog_limit) {
add_wait_queue_exclusive(&audit_backlog_wait, &wait);
set_current_state(TASK_UNINTERRUPTIBLE);
sleep_time = schedule_timeout(sleep_time);
@@ -1406,7 +1406,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
}
while (audit_backlog_limit
- && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
+ && skb_queue_len(&audit_queue) > audit_backlog_limit + reserve) {
if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
long sleep_time;
@@ -1419,7 +1419,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
}
if (audit_rate_check() && printk_ratelimit())
pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
- skb_queue_len(&audit_skb_queue),
+ skb_queue_len(&audit_queue),
audit_backlog_limit);
audit_log_lost("backlog limit exceeded");
audit_backlog_wait_time = 0;
@@ -2001,7 +2001,7 @@ void audit_log_end(struct audit_buffer *ab)
if (!audit_rate_check()) {
audit_log_lost("rate limit exceeded");
} else {
- skb_queue_tail(&audit_skb_queue, ab->skb);
+ skb_queue_tail(&audit_queue, ab->skb);
wake_up_interruptible(&kauditd_wait);
ab->skb = NULL;
}
^ permalink raw reply related
* [RFC PATCH 5/9] audit: rework the audit queue handling
From: Paul Moore @ 2016-11-24 1:41 UTC (permalink / raw)
To: linux-audit
In-Reply-To: <147995162874.18851.11207690780223712694.stgit@sifl>
From: Paul Moore <paul@paul-moore.com>
The audit record backlog queue has always been a bit of a mess, and
the moving the multicast send into kauditd_thread() from
audit_log_end() only makes things worse. This patch attempts to fix
the backlog queue with a better design that should hold up better
under load and have less of a performance impact at syscall
invocation time.
While it looks like there is a log going on in this patch, the main
change is the move from a single backlog queue to three queues:
* A queue for holding records generated from audit_log_end() that
haven't been consumed by kauditd_thread() (audit_queue).
* A queue for holding records that have been sent via multicast but
had a temporary failure when sending via unicast and need a resend
(audit_retry_queue).
* A queue for holding records that haven't been sent via unicast
because no one is listening (audit_hold_queue).
Special care is taken in this patch to ensure that the proper
record ordering is preserved, e.g. we send everything in the hold
queue first, then the retry queue, and finally the main queue.
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
kernel/audit.c | 347 ++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 226 insertions(+), 121 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index 6ac1df1..f4056bc 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -138,11 +138,18 @@ static DEFINE_SPINLOCK(audit_freelist_lock);
static int audit_freelist_count;
static LIST_HEAD(audit_freelist);
+/* queue msgs to send via kauditd_task */
static struct sk_buff_head audit_queue;
-/* queue of skbs to send to auditd when/if it comes back */
+/* queue msgs due to temporary unicast send problems */
+static struct sk_buff_head audit_retry_queue;
+/* queue msgs waiting for new auditd connection */
static struct sk_buff_head audit_hold_queue;
+
+/* queue servicing thread */
static struct task_struct *kauditd_task;
static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
+
+/* waitqueue for callers who are blocked on the audit backlog */
static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
@@ -365,25 +372,6 @@ static int audit_set_failure(u32 state)
}
/*
- * Queue skbs to be sent to auditd when/if it comes back. These skbs should
- * already have been sent via prink/syslog and so if these messages are dropped
- * it is not a huge concern since we already passed the audit_log_lost()
- * notification and stuff. This is just nice to get audit messages during
- * boot before auditd is running or messages generated while auditd is stopped.
- * This only holds messages is audit_default is set, aka booting with audit=1
- * or building your kernel that way.
- */
-static void audit_hold_skb(struct sk_buff *skb)
-{
- if (audit_default &&
- (!audit_backlog_limit ||
- skb_queue_len(&audit_hold_queue) < audit_backlog_limit))
- skb_queue_tail(&audit_hold_queue, skb);
- else
- kfree_skb(skb);
-}
-
-/*
* For one reason or another this nlh isn't getting delivered to the userspace
* audit daemon, just send it to printk.
*/
@@ -398,58 +386,114 @@ static void kauditd_printk_skb(struct sk_buff *skb)
else
audit_log_lost("printk limit exceeded");
}
+}
+
+/**
+ * kauditd_hold_skb - Queue an audit record, waiting for auditd
+ * @skb: audit record
+ *
+ * Description:
+ * Queue the audit record, waiting for an instance of auditd. When this
+ * function is called we haven't given up yet on sending the record, but things
+ * are not looking good. The first thing we want to do is try to write the
+ * record via printk and then see if we want to try and hold on to the record
+ * and queue it, if we have room. If we want to hold on to the record, but we
+ * don't have room, record a record lost message.
+ */
+static void kauditd_hold_skb(struct sk_buff *skb)
+{
+ /* at this point it is uncertain if we will ever send this to auditd so
+ * try to send the message via printk before we go any further */
+ kauditd_printk_skb(skb);
+
+ /* can we just silently drop the message? */
+ if (!audit_default) {
+ kfree_skb(skb);
+ return;
+ }
+
+ /* if we have room, queue the message */
+ if (!audit_backlog_limit ||
+ skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
+ skb_queue_tail(&audit_hold_queue, skb);
+ return;
+ }
- audit_hold_skb(skb);
+ /* we have no other options - drop the message */
+ audit_log_lost("kauditd hold queue overflow");
+ kfree_skb(skb);
}
-static void kauditd_send_unicast_skb(struct sk_buff *skb)
+/**
+ * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
+ * @skb: audit record
+ *
+ * Description:
+ * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
+ * but for some reason we are having problems sending it audit records so
+ * queue the given record and attempt to resend.
+ */
+static void kauditd_retry_skb(struct sk_buff *skb)
{
- int err;
- int attempts = 0;
-#define AUDITD_RETRIES 5
+ /* NOTE: because records should only live in the retry queue for a
+ * short period of time, before either being sent or moved to the hold
+ * queue, we don't currently enforce a limit on this queue */
+ skb_queue_tail(&audit_retry_queue, skb);
+}
+
+/**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the records in the retry
+ * queue into the hold queue in case auditd reconnects.
+ */
+static void auditd_reset(void)
+{
+ struct sk_buff *skb;
+
+ /* break the connection */
+ audit_pid = 0;
+ audit_sock = NULL;
+
+ /* flush all of the retry queue to the hold queue */
+ while ((skb = skb_dequeue(&audit_retry_queue)))
+ kauditd_hold_skb(skb);
+}
+
+/**
+ * kauditd_send_unicast_skb - Send a record via unicast to auditd
+ * @skb: audit record
+ */
+static int kauditd_send_unicast_skb(struct sk_buff *skb)
+{
+ int rc;
-restart:
- /* take a reference in case we can't send it and we want to hold it */
+ /* get an extra skb reference in case we fail to send */
skb_get(skb);
- err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
- if (err < 0) {
- pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n",
- audit_pid, err);
- if (audit_pid) {
- if (err == -ECONNREFUSED || err == -EPERM
- || ++attempts >= AUDITD_RETRIES) {
- char s[32];
-
- snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid);
- audit_log_lost(s);
- audit_pid = 0;
- audit_sock = NULL;
- } else {
- pr_warn("re-scheduling(#%d) write to audit_pid=%d\n",
- attempts, audit_pid);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- goto restart;
- }
- }
- /* we might get lucky and get this in the next auditd */
- audit_hold_skb(skb);
- } else
- /* drop the extra reference if sent ok */
+ rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+ if (rc >= 0) {
consume_skb(skb);
+ rc = 0;
+ }
+
+ return rc;
}
/*
- * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
+ * kauditd_send_multicast_skb - Send a record to any multicast listeners
+ * @skb: audit record
*
+ * Description:
* This function doesn't consume an skb as might be expected since it has to
* copy it anyways.
*/
-static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
+static void kauditd_send_multicast_skb(struct sk_buff *skb)
{
- struct sk_buff *copy;
- struct audit_net *aunet = net_generic(&init_net, audit_net_id);
- struct sock *sock = aunet->nlsk;
+ struct sk_buff *copy;
+ struct audit_net *aunet = net_generic(&init_net, audit_net_id);
+ struct sock *sock = aunet->nlsk;
+ struct nlmsghdr *nlh;
if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
return;
@@ -464,94 +508,155 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
* no reason for new multicast clients to continue with this
* non-compliance.
*/
- copy = skb_copy(skb, gfp_mask);
+ copy = skb_copy(skb, GFP_KERNEL);
if (!copy)
return;
+ nlh = nlmsg_hdr(copy);
+ nlh->nlmsg_len = skb->len;
- nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
+ nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
}
-/*
- * flush_hold_queue - empty the hold queue if auditd appears
- *
- * If auditd just started, drain the queue of messages already
- * sent to syslog/printk. Remember loss here is ok. We already
- * called audit_log_lost() if it didn't go out normally. so the
- * race between the skb_dequeue and the next check for audit_pid
- * doesn't matter.
+/**
+ * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
*
- * If you ever find kauditd to be too slow we can get a perf win
- * by doing our own locking and keeping better track if there
- * are messages in this queue. I don't see the need now, but
- * in 5 years when I want to play with this again I'll see this
- * note and still have no friggin idea what i'm thinking today.
+ * Description:
+ * This function is for use by the wait_event_freezable() call in
+ * kauditd_thread().
*/
-static void flush_hold_queue(void)
+static int kauditd_wake_condition(void)
{
- struct sk_buff *skb;
-
- if (!audit_default || !audit_pid)
- return;
-
- skb = skb_dequeue(&audit_hold_queue);
- if (likely(!skb))
- return;
+ static int pid_last = 0;
+ int rc;
+ int pid = audit_pid;
- while (skb && audit_pid) {
- kauditd_send_unicast_skb(skb);
- skb = skb_dequeue(&audit_hold_queue);
- }
+ /* wake on new messages or a change in the connected auditd */
+ rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
+ if (rc)
+ pid_last = pid;
- /*
- * if auditd just disappeared but we
- * dequeued an skb we need to drop ref
- */
- consume_skb(skb);
+ return rc;
}
static int kauditd_thread(void *dummy)
{
+ int rc;
+ int auditd = 0;
+ int reschedule = 0;
struct sk_buff *skb;
struct nlmsghdr *nlh;
+#define UNICAST_RETRIES 5
+#define AUDITD_BAD(x,y) \
+ ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
+
+ /* NOTE: we do invalidate the auditd connection flag on any sending
+ * errors, but we only "restore" the connection flag at specific places
+ * in the loop in order to help ensure proper ordering of audit
+ * records */
+
set_freezable();
while (!kthread_should_stop()) {
- flush_hold_queue();
+ /* NOTE: possible area for future improvement is to look at
+ * the hold and retry queues, since only this thread
+ * has access to these queues we might be able to do
+ * our own queuing and skip some/all of the locking */
+
+ /* NOTE: it might be a fun experiment to split the hold and
+ * retry queue handling to another thread, but the
+ * synchronization issues and other overhead might kill
+ * any performance gains */
+
+ /* attempt to flush the hold queue */
+ while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ /* requeue to the same spot */
+ skb_queue_head(&audit_hold_queue, skb);
+
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ auditd_reset();
+ reschedule = 0;
+ }
+ } else
+ /* we were able to send successfully */
+ reschedule = 0;
+ }
+
+ /* attempt to flush the retry queue */
+ while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ kauditd_hold_skb(skb);
+ auditd_reset();
+ reschedule = 0;
+ } else
+ /* temporary problem (we hope), queue
+ * to the same spot and retry */
+ skb_queue_head(&audit_retry_queue, skb);
+ } else
+ /* we were able to send successfully */
+ reschedule = 0;
+ }
+ /* standard queue processing, try to be as quick as possible */
+quick_loop:
skb = skb_dequeue(&audit_queue);
if (skb) {
+ /* setup the netlink header, see the comments in
+ * kauditd_send_multicast_skb() for length quirks */
nlh = nlmsg_hdr(skb);
-
- /* if nlh->nlmsg_len is zero then we haven't attempted
- * to send the message to userspace yet, if nlmsg_len
- * is non-zero then we have attempted to send it to
- * the multicast listeners as well as auditd; keep
- * trying to send to auditd but don't repeat the
- * multicast send */
- if (nlh->nlmsg_len == 0) {
- nlh->nlmsg_len = skb->len;
- kauditd_send_multicast_skb(skb, GFP_KERNEL);
-
- /* see the note in kauditd_send_multicast_skb
- * regarding the nlh->nlmsg_len value and why
- * it differs between the multicast and unicast
- * clients */
- nlh->nlmsg_len -= NLMSG_HDRLEN;
- }
-
- if (audit_pid)
- kauditd_send_unicast_skb(skb);
+ nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+
+ /* attempt to send to any multicast listeners */
+ kauditd_send_multicast_skb(skb);
+
+ /* attempt to send to auditd, queue on failure */
+ if (auditd) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ auditd_reset();
+ reschedule = 0;
+ }
+
+ /* move to the retry queue */
+ kauditd_retry_skb(skb);
+ } else
+ /* everything is working so go fast! */
+ goto quick_loop;
+ } else if (reschedule)
+ /* we are currently having problems, move to
+ * the retry queue */
+ kauditd_retry_skb(skb);
else
- kauditd_printk_skb(skb);
+ /* dump the message via printk and hold it */
+ kauditd_hold_skb(skb);
} else {
- /* we have flushed the backlog so wake everyone up who
- * is blocked and go to sleep until we have something
- * in the backlog again */
+ /* we have flushed the backlog so wake everyone */
wake_up(&audit_backlog_wait);
- wait_event_freezable(kauditd_wait,
- skb_queue_len(&audit_queue));
+
+ /* if everything is okay with auditd (if present), go
+ * to sleep until there is something new in the queue
+ * or we have a change in the connected auditd;
+ * otherwise simply reschedule to give things a chance
+ * to recover */
+ if (reschedule) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ } else
+ wait_event_freezable(kauditd_wait,
+ kauditd_wake_condition());
+
+ /* update the auditd connection status */
+ auditd = (audit_pid ? 1 : 0);
}
}
+
return 0;
}
@@ -616,6 +721,7 @@ static int audit_send_reply_thread(void *arg)
kfree(reply);
return 0;
}
+
/**
* audit_send_reply - send an audit reply message via netlink
* @request_skb: skb of request we are replying to (used to target the reply)
@@ -1171,10 +1277,8 @@ static void __net_exit audit_net_exit(struct net *net)
{
struct audit_net *aunet = net_generic(net, audit_net_id);
struct sock *sock = aunet->nlsk;
- if (sock == audit_sock) {
- audit_pid = 0;
- audit_sock = NULL;
- }
+ if (sock == audit_sock)
+ auditd_reset();
RCU_INIT_POINTER(aunet->nlsk, NULL);
synchronize_net();
@@ -1201,6 +1305,7 @@ static int __init audit_init(void)
register_pernet_subsys(&audit_net_ops);
skb_queue_head_init(&audit_queue);
+ skb_queue_head_init(&audit_retry_queue);
skb_queue_head_init(&audit_hold_queue);
audit_initialized = AUDIT_INITIALIZED;
audit_enabled = audit_default;
^ permalink raw reply related
* [RFC PATCH 6/9] audit: rework audit_log_start()
From: Paul Moore @ 2016-11-24 1:42 UTC (permalink / raw)
To: linux-audit
In-Reply-To: <147995162874.18851.11207690780223712694.stgit@sifl>
From: Paul Moore <paul@paul-moore.com>
The backlog queue handling in audit_log_start() is a little odd with
some questionable design decisions, this patch attempts to rectify
this with the following changes:
* Never make auditd wait, ignore any backlog limits as we need auditd
awake so it can drain the backlog queue.
* When we hit a backlog limit and start dropping records, don't wake
all the tasks sleeping on the backlog, that's silly. Instead, let
kauditd_thread() take care of waking everyone once it has had a chance
to drain the backlog queue.
* Don't keep a global backlog timeout countdown, make it per-task. A
per-task timer means we won't have all the sleeping tasks waking at
the same time and hammering on an already stressed backlog queue.
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
kernel/audit.c | 92 ++++++++++++++++++++++----------------------------------
1 file changed, 36 insertions(+), 56 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index f4056bc..e23ce6c 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -107,7 +107,6 @@ static u32 audit_rate_limit;
* When set to zero, this means unlimited. */
static u32 audit_backlog_limit = 64;
#define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
-static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
/* The identity of the user shutting down the audit system. */
@@ -345,7 +344,7 @@ static int audit_set_backlog_limit(u32 limit)
static int audit_set_backlog_wait_time(u32 timeout)
{
return audit_do_config_change("audit_backlog_wait_time",
- &audit_backlog_wait_time_master, timeout);
+ &audit_backlog_wait_time, timeout);
}
static int audit_set_enabled(u32 state)
@@ -973,7 +972,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
s.lost = atomic_read(&audit_lost);
s.backlog = skb_queue_len(&audit_queue);
s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
- s.backlog_wait_time = audit_backlog_wait_time_master;
+ s.backlog_wait_time = audit_backlog_wait_time;
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
break;
}
@@ -1454,24 +1453,6 @@ static inline void audit_get_stamp(struct audit_context *ctx,
}
}
-/*
- * Wait for auditd to drain the queue a little
- */
-static long wait_for_auditd(long sleep_time)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- if (audit_backlog_limit &&
- skb_queue_len(&audit_queue) > audit_backlog_limit) {
- add_wait_queue_exclusive(&audit_backlog_wait, &wait);
- set_current_state(TASK_UNINTERRUPTIBLE);
- sleep_time = schedule_timeout(sleep_time);
- remove_wait_queue(&audit_backlog_wait, &wait);
- }
-
- return sleep_time;
-}
-
/**
* audit_log_start - obtain an audit buffer
* @ctx: audit_context (may be NULL)
@@ -1490,12 +1471,9 @@ static long wait_for_auditd(long sleep_time)
struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
int type)
{
- struct audit_buffer *ab = NULL;
- struct timespec t;
- unsigned int uninitialized_var(serial);
- int reserve = 5; /* Allow atomic callers to go up to five
- entries over the normal backlog limit */
- unsigned long timeout_start = jiffies;
+ struct audit_buffer *ab;
+ struct timespec t;
+ unsigned int uninitialized_var(serial);
if (audit_initialized != AUDIT_INITIALIZED)
return NULL;
@@ -1503,38 +1481,40 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
return NULL;
- if (gfp_mask & __GFP_DIRECT_RECLAIM) {
- if (audit_pid && audit_pid == current->tgid)
- gfp_mask &= ~__GFP_DIRECT_RECLAIM;
- else
- reserve = 0;
- }
-
- while (audit_backlog_limit
- && skb_queue_len(&audit_queue) > audit_backlog_limit + reserve) {
- if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
- long sleep_time;
-
- sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
- if (sleep_time > 0) {
- sleep_time = wait_for_auditd(sleep_time);
- if (sleep_time > 0)
- continue;
+ /* don't ever fail/sleep on auditd since we need auditd to drain the
+ * queue; also, when we are checking for auditd, compare PIDs using
+ * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using
+ * a PID anchored in the caller's namespace */
+ if (!(audit_pid && audit_pid == task_tgid_vnr(current))) {
+ long sleep_time = audit_backlog_wait_time;
+
+ while (audit_backlog_limit &&
+ (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
+ /* wake kauditd to try and flush the queue */
+ wake_up_interruptible(&kauditd_wait);
+
+ /* sleep if we are allowed and we haven't exhausted our
+ * backlog wait limit */
+ if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
+ (sleep_time > 0)) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&audit_backlog_wait,
+ &wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ sleep_time = schedule_timeout(sleep_time);
+ remove_wait_queue(&audit_backlog_wait, &wait);
+ } else {
+ if (audit_rate_check() && printk_ratelimit())
+ pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
+ skb_queue_len(&audit_queue),
+ audit_backlog_limit);
+ audit_log_lost("backlog limit exceeded");
+ return NULL;
}
}
- if (audit_rate_check() && printk_ratelimit())
- pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
- skb_queue_len(&audit_queue),
- audit_backlog_limit);
- audit_log_lost("backlog limit exceeded");
- audit_backlog_wait_time = 0;
- wake_up(&audit_backlog_wait);
- return NULL;
}
- if (!reserve && !audit_backlog_wait_time)
- audit_backlog_wait_time = audit_backlog_wait_time_master;
-
ab = audit_buffer_alloc(ctx, gfp_mask, type);
if (!ab) {
audit_log_lost("out of memory in audit_log_start");
@@ -1542,9 +1522,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
}
audit_get_stamp(ab->ctx, &t, &serial);
-
audit_log_format(ab, "audit(%lu.%03lu:%u): ",
t.tv_sec, t.tv_nsec/1000000, serial);
+
return ab;
}
^ permalink raw reply related
* [RFC PATCH 7/9] audit: wake up kauditd_thread after auditd registers
From: Paul Moore @ 2016-11-24 1:42 UTC (permalink / raw)
To: linux-audit
In-Reply-To: <147995162874.18851.11207690780223712694.stgit@sifl>
From: Paul Moore <paul@paul-moore.com>
This patch was suggested by Richard Briggs back in 2015, see the link
to the mail archive below. Unfortunately, that patch is no longer
even remotely valid due to other changes to the code.
* https://www.redhat.com/archives/linux-audit/2015-October/msg00075.html
Suggested-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
kernel/audit.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/audit.c b/kernel/audit.c
index e23ce6c..0572e5d 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1009,6 +1009,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_pid = new_pid;
audit_nlk_portid = NETLINK_CB(skb).portid;
audit_sock = skb->sk;
+ wake_up_interruptible(&kauditd_wait);
}
if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
err = audit_set_rate_limit(s.rate_limit);
^ permalink raw reply related
* [RFC PATCH 8/9] audit: handle a clean auditd shutdown with grace
From: Paul Moore @ 2016-11-24 1:42 UTC (permalink / raw)
To: linux-audit
In-Reply-To: <147995162874.18851.11207690780223712694.stgit@sifl>
From: Paul Moore <paul@paul-moore.com>
When auditd stops cleanly it sets 'auditd_pid' to 0 with an
AUDIT_SET message, in this case we should reset our backlog
queues via the auditd_reset() function. This patch also adds
a 'auditd_pid' check to the top of kauditd_send_unicast_skb()
so we can fail quicker.
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
kernel/audit.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/kernel/audit.c b/kernel/audit.c
index 0572e5d..b447a6b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -468,6 +468,10 @@ static int kauditd_send_unicast_skb(struct sk_buff *skb)
{
int rc;
+ /* if we know nothing is connected, don't even try the netlink call */
+ if (!audit_pid)
+ return -ECONNREFUSED;
+
/* get an extra skb reference in case we fail to send */
skb_get(skb);
rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
@@ -1009,6 +1013,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_pid = new_pid;
audit_nlk_portid = NETLINK_CB(skb).portid;
audit_sock = skb->sk;
+ if (!new_pid)
+ auditd_reset();
wake_up_interruptible(&kauditd_wait);
}
if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
^ permalink raw reply related
* [RFC PATCH 9/9] audit: don't ever sleep on a command record/message
From: Paul Moore @ 2016-11-24 1:42 UTC (permalink / raw)
To: linux-audit
In-Reply-To: <147995162874.18851.11207690780223712694.stgit@sifl>
From: Paul Moore <paul@paul-moore.com>
Sleeping on a command record/message in audit_log_start() could slow
something, e.g. auditd, from doing something important, e.g. clean
shutdown, which could present problems on a heavily loaded system.
This patch allows tasks to bypass any queue restrictions if they are
logging a command record/message.
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
kernel/audit.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index b447a6b..f20eee0 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1488,11 +1488,19 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
return NULL;
- /* don't ever fail/sleep on auditd since we need auditd to drain the
- * queue; also, when we are checking for auditd, compare PIDs using
- * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using
- * a PID anchored in the caller's namespace */
- if (!(audit_pid && audit_pid == task_tgid_vnr(current))) {
+ /* don't ever fail/sleep on these two conditions:
+ * 1. auditd generated record - since we need auditd to drain the
+ * queue; also, when we are checking for auditd, compare PIDs using
+ * task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
+ * using a PID anchored in the caller's namespace
+ * 2. audit command message - record types 1000 through 1099 inclusive
+ * are command messages/records used to manage the kernel subsystem
+ * and the audit userspace, blocking on these messages could cause
+ * problems under load so don't do it (note: not all of these
+ * command types are valid as record types, but it is quicker to
+ * just check two ints than a series of ints in a if/switch stmt) */
+ if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
+ (type >= 1000 && type <= 1099))) {
long sleep_time = audit_backlog_wait_time;
while (audit_backlog_limit &&
^ permalink raw reply related
* Re: [RFC PATCH 5/9] audit: rework the audit queue handling
From: Richard Guy Briggs @ 2016-11-24 6:35 UTC (permalink / raw)
To: Paul Moore; +Cc: linux-audit
In-Reply-To: <147995171690.18851.7542409069799319067.stgit@sifl>
On 2016-11-23 20:41, Paul Moore wrote:
> From: Paul Moore <paul@paul-moore.com>
>
> The audit record backlog queue has always been a bit of a mess, and
> the moving the multicast send into kauditd_thread() from
> audit_log_end() only makes things worse. This patch attempts to fix
> the backlog queue with a better design that should hold up better
> under load and have less of a performance impact at syscall
> invocation time.
>
> While it looks like there is a log going on in this patch, the main
> change is the move from a single backlog queue to three queues:
>
> * A queue for holding records generated from audit_log_end() that
> haven't been consumed by kauditd_thread() (audit_queue).
>
> * A queue for holding records that have been sent via multicast but
> had a temporary failure when sending via unicast and need a resend
> (audit_retry_queue).
>
> * A queue for holding records that haven't been sent via unicast
> because no one is listening (audit_hold_queue).
>
> Special care is taken in this patch to ensure that the proper
> record ordering is preserved, e.g. we send everything in the hold
> queue first, then the retry queue, and finally the main queue.
>
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
> kernel/audit.c | 347 ++++++++++++++++++++++++++++++++++++--------------------
> 1 file changed, 226 insertions(+), 121 deletions(-)
>
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 6ac1df1..f4056bc 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -138,11 +138,18 @@ static DEFINE_SPINLOCK(audit_freelist_lock);
> static int audit_freelist_count;
> static LIST_HEAD(audit_freelist);
>
> +/* queue msgs to send via kauditd_task */
> static struct sk_buff_head audit_queue;
> -/* queue of skbs to send to auditd when/if it comes back */
> +/* queue msgs due to temporary unicast send problems */
> +static struct sk_buff_head audit_retry_queue;
> +/* queue msgs waiting for new auditd connection */
> static struct sk_buff_head audit_hold_queue;
> +
> +/* queue servicing thread */
> static struct task_struct *kauditd_task;
> static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
> +
> +/* waitqueue for callers who are blocked on the audit backlog */
> static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
>
> static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
> @@ -365,25 +372,6 @@ static int audit_set_failure(u32 state)
> }
>
> /*
> - * Queue skbs to be sent to auditd when/if it comes back. These skbs should
> - * already have been sent via prink/syslog and so if these messages are dropped
> - * it is not a huge concern since we already passed the audit_log_lost()
> - * notification and stuff. This is just nice to get audit messages during
> - * boot before auditd is running or messages generated while auditd is stopped.
> - * This only holds messages is audit_default is set, aka booting with audit=1
> - * or building your kernel that way.
> - */
> -static void audit_hold_skb(struct sk_buff *skb)
> -{
> - if (audit_default &&
> - (!audit_backlog_limit ||
> - skb_queue_len(&audit_hold_queue) < audit_backlog_limit))
> - skb_queue_tail(&audit_hold_queue, skb);
> - else
> - kfree_skb(skb);
> -}
> -
> -/*
> * For one reason or another this nlh isn't getting delivered to the userspace
> * audit daemon, just send it to printk.
> */
> @@ -398,58 +386,114 @@ static void kauditd_printk_skb(struct sk_buff *skb)
> else
> audit_log_lost("printk limit exceeded");
> }
> +}
> +
> +/**
> + * kauditd_hold_skb - Queue an audit record, waiting for auditd
> + * @skb: audit record
> + *
> + * Description:
> + * Queue the audit record, waiting for an instance of auditd. When this
> + * function is called we haven't given up yet on sending the record, but things
> + * are not looking good. The first thing we want to do is try to write the
> + * record via printk and then see if we want to try and hold on to the record
> + * and queue it, if we have room. If we want to hold on to the record, but we
> + * don't have room, record a record lost message.
> + */
> +static void kauditd_hold_skb(struct sk_buff *skb)
> +{
> + /* at this point it is uncertain if we will ever send this to auditd so
> + * try to send the message via printk before we go any further */
> + kauditd_printk_skb(skb);
> +
> + /* can we just silently drop the message? */
> + if (!audit_default) {
> + kfree_skb(skb);
> + return;
> + }
> +
> + /* if we have room, queue the message */
> + if (!audit_backlog_limit ||
> + skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
> + skb_queue_tail(&audit_hold_queue, skb);
> + return;
> + }
>
> - audit_hold_skb(skb);
> + /* we have no other options - drop the message */
> + audit_log_lost("kauditd hold queue overflow");
> + kfree_skb(skb);
> }
>
> -static void kauditd_send_unicast_skb(struct sk_buff *skb)
> +/**
> + * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
> + * @skb: audit record
> + *
> + * Description:
> + * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
> + * but for some reason we are having problems sending it audit records so
> + * queue the given record and attempt to resend.
> + */
> +static void kauditd_retry_skb(struct sk_buff *skb)
> {
> - int err;
> - int attempts = 0;
> -#define AUDITD_RETRIES 5
> + /* NOTE: because records should only live in the retry queue for a
> + * short period of time, before either being sent or moved to the hold
> + * queue, we don't currently enforce a limit on this queue */
> + skb_queue_tail(&audit_retry_queue, skb);
> +}
Can kauditd_retry_skb() be eliminated entirely and just call
skb_queue_tail(&audit_retry_queue, skb) directly? The comment is
helpful, but found the code harder to follow in kauditd_thread() as a
result.
> +
> +/**
> + * auditd_reset - Disconnect the auditd connection
> + *
> + * Description:
> + * Break the auditd/kauditd connection and move all the records in the retry
> + * queue into the hold queue in case auditd reconnects.
> + */
> +static void auditd_reset(void)
> +{
> + struct sk_buff *skb;
> +
> + /* break the connection */
> + audit_pid = 0;
> + audit_sock = NULL;
> +
> + /* flush all of the retry queue to the hold queue */
> + while ((skb = skb_dequeue(&audit_retry_queue)))
> + kauditd_hold_skb(skb);
> +}
> +
> +/**
> + * kauditd_send_unicast_skb - Send a record via unicast to auditd
> + * @skb: audit record
> + */
> +static int kauditd_send_unicast_skb(struct sk_buff *skb)
> +{
> + int rc;
>
> -restart:
> - /* take a reference in case we can't send it and we want to hold it */
> + /* get an extra skb reference in case we fail to send */
> skb_get(skb);
> - err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
> - if (err < 0) {
> - pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n",
> - audit_pid, err);
> - if (audit_pid) {
> - if (err == -ECONNREFUSED || err == -EPERM
> - || ++attempts >= AUDITD_RETRIES) {
> - char s[32];
> -
> - snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid);
> - audit_log_lost(s);
> - audit_pid = 0;
> - audit_sock = NULL;
> - } else {
> - pr_warn("re-scheduling(#%d) write to audit_pid=%d\n",
> - attempts, audit_pid);
> - set_current_state(TASK_INTERRUPTIBLE);
> - schedule();
> - goto restart;
> - }
> - }
> - /* we might get lucky and get this in the next auditd */
> - audit_hold_skb(skb);
> - } else
> - /* drop the extra reference if sent ok */
> + rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
> + if (rc >= 0) {
> consume_skb(skb);
> + rc = 0;
> + }
> +
> + return rc;
> }
>
> /*
> - * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
> + * kauditd_send_multicast_skb - Send a record to any multicast listeners
> + * @skb: audit record
> *
> + * Description:
> * This function doesn't consume an skb as might be expected since it has to
> * copy it anyways.
> */
> -static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
> +static void kauditd_send_multicast_skb(struct sk_buff *skb)
> {
> - struct sk_buff *copy;
> - struct audit_net *aunet = net_generic(&init_net, audit_net_id);
> - struct sock *sock = aunet->nlsk;
> + struct sk_buff *copy;
> + struct audit_net *aunet = net_generic(&init_net, audit_net_id);
> + struct sock *sock = aunet->nlsk;
> + struct nlmsghdr *nlh;
>
> if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
> return;
> @@ -464,94 +508,155 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
> * no reason for new multicast clients to continue with this
> * non-compliance.
> */
> - copy = skb_copy(skb, gfp_mask);
> + copy = skb_copy(skb, GFP_KERNEL);
> if (!copy)
> return;
> + nlh = nlmsg_hdr(copy);
> + nlh->nlmsg_len = skb->len;
>
> - nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
> + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
> }
>
> -/*
> - * flush_hold_queue - empty the hold queue if auditd appears
> - *
> - * If auditd just started, drain the queue of messages already
> - * sent to syslog/printk. Remember loss here is ok. We already
> - * called audit_log_lost() if it didn't go out normally. so the
> - * race between the skb_dequeue and the next check for audit_pid
> - * doesn't matter.
> +/**
> + * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
> *
> - * If you ever find kauditd to be too slow we can get a perf win
> - * by doing our own locking and keeping better track if there
> - * are messages in this queue. I don't see the need now, but
> - * in 5 years when I want to play with this again I'll see this
> - * note and still have no friggin idea what i'm thinking today.
> + * Description:
> + * This function is for use by the wait_event_freezable() call in
> + * kauditd_thread().
> */
> -static void flush_hold_queue(void)
> +static int kauditd_wake_condition(void)
> {
> - struct sk_buff *skb;
> -
> - if (!audit_default || !audit_pid)
> - return;
> -
> - skb = skb_dequeue(&audit_hold_queue);
> - if (likely(!skb))
> - return;
> + static int pid_last = 0;
> + int rc;
> + int pid = audit_pid;
>
> - while (skb && audit_pid) {
> - kauditd_send_unicast_skb(skb);
> - skb = skb_dequeue(&audit_hold_queue);
> - }
> + /* wake on new messages or a change in the connected auditd */
> + rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
> + if (rc)
> + pid_last = pid;
>
> - /*
> - * if auditd just disappeared but we
> - * dequeued an skb we need to drop ref
> - */
> - consume_skb(skb);
> + return rc;
> }
>
> static int kauditd_thread(void *dummy)
> {
> + int rc;
> + int auditd = 0;
> + int reschedule = 0;
> struct sk_buff *skb;
> struct nlmsghdr *nlh;
>
> +#define UNICAST_RETRIES 5
> +#define AUDITD_BAD(x,y) \
> + ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
> +
> + /* NOTE: we do invalidate the auditd connection flag on any sending
> + * errors, but we only "restore" the connection flag at specific places
> + * in the loop in order to help ensure proper ordering of audit
> + * records */
> +
> set_freezable();
> while (!kthread_should_stop()) {
> - flush_hold_queue();
> + /* NOTE: possible area for future improvement is to look at
> + * the hold and retry queues, since only this thread
> + * has access to these queues we might be able to do
> + * our own queuing and skip some/all of the locking */
> +
> + /* NOTE: it might be a fun experiment to split the hold and
> + * retry queue handling to another thread, but the
> + * synchronization issues and other overhead might kill
> + * any performance gains */
> +
> + /* attempt to flush the hold queue */
> + while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
> + rc = kauditd_send_unicast_skb(skb);
> + if (rc) {
> + /* requeue to the same spot */
> + skb_queue_head(&audit_hold_queue, skb);
> +
> + auditd = 0;
> + if (AUDITD_BAD(rc, reschedule)) {
> + auditd_reset();
> + reschedule = 0;
> + }
> + } else
> + /* we were able to send successfully */
> + reschedule = 0;
> + }
> +
> + /* attempt to flush the retry queue */
> + while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
> + rc = kauditd_send_unicast_skb(skb);
> + if (rc) {
> + auditd = 0;
> + if (AUDITD_BAD(rc, reschedule)) {
> + kauditd_hold_skb(skb);
> + auditd_reset();
> + reschedule = 0;
> + } else
> + /* temporary problem (we hope), queue
> + * to the same spot and retry */
> + skb_queue_head(&audit_retry_queue, skb);
> + } else
> + /* we were able to send successfully */
> + reschedule = 0;
> + }
>
> + /* standard queue processing, try to be as quick as possible */
> +quick_loop:
> skb = skb_dequeue(&audit_queue);
> if (skb) {
> + /* setup the netlink header, see the comments in
> + * kauditd_send_multicast_skb() for length quirks */
> nlh = nlmsg_hdr(skb);
> -
> - /* if nlh->nlmsg_len is zero then we haven't attempted
> - * to send the message to userspace yet, if nlmsg_len
> - * is non-zero then we have attempted to send it to
> - * the multicast listeners as well as auditd; keep
> - * trying to send to auditd but don't repeat the
> - * multicast send */
> - if (nlh->nlmsg_len == 0) {
> - nlh->nlmsg_len = skb->len;
> - kauditd_send_multicast_skb(skb, GFP_KERNEL);
> -
> - /* see the note in kauditd_send_multicast_skb
> - * regarding the nlh->nlmsg_len value and why
> - * it differs between the multicast and unicast
> - * clients */
> - nlh->nlmsg_len -= NLMSG_HDRLEN;
> - }
> -
> - if (audit_pid)
> - kauditd_send_unicast_skb(skb);
> + nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
> +
> + /* attempt to send to any multicast listeners */
> + kauditd_send_multicast_skb(skb);
> +
> + /* attempt to send to auditd, queue on failure */
> + if (auditd) {
> + rc = kauditd_send_unicast_skb(skb);
> + if (rc) {
> + auditd = 0;
> + if (AUDITD_BAD(rc, reschedule)) {
> + auditd_reset();
> + reschedule = 0;
> + }
> +
> + /* move to the retry queue */
> + kauditd_retry_skb(skb);
> + } else
> + /* everything is working so go fast! */
> + goto quick_loop;
> + } else if (reschedule)
> + /* we are currently having problems, move to
> + * the retry queue */
> + kauditd_retry_skb(skb);
> else
> - kauditd_printk_skb(skb);
> + /* dump the message via printk and hold it */
> + kauditd_hold_skb(skb);
> } else {
> - /* we have flushed the backlog so wake everyone up who
> - * is blocked and go to sleep until we have something
> - * in the backlog again */
> + /* we have flushed the backlog so wake everyone */
> wake_up(&audit_backlog_wait);
> - wait_event_freezable(kauditd_wait,
> - skb_queue_len(&audit_queue));
> +
> + /* if everything is okay with auditd (if present), go
> + * to sleep until there is something new in the queue
> + * or we have a change in the connected auditd;
> + * otherwise simply reschedule to give things a chance
> + * to recover */
> + if (reschedule) {
> + set_current_state(TASK_INTERRUPTIBLE);
> + schedule();
> + } else
> + wait_event_freezable(kauditd_wait,
> + kauditd_wake_condition());
> +
> + /* update the auditd connection status */
> + auditd = (audit_pid ? 1 : 0);
> }
> }
> +
> return 0;
> }
>
> @@ -616,6 +721,7 @@ static int audit_send_reply_thread(void *arg)
> kfree(reply);
> return 0;
> }
> +
> /**
> * audit_send_reply - send an audit reply message via netlink
> * @request_skb: skb of request we are replying to (used to target the reply)
> @@ -1171,10 +1277,8 @@ static void __net_exit audit_net_exit(struct net *net)
> {
> struct audit_net *aunet = net_generic(net, audit_net_id);
> struct sock *sock = aunet->nlsk;
> - if (sock == audit_sock) {
> - audit_pid = 0;
> - audit_sock = NULL;
> - }
> + if (sock == audit_sock)
> + auditd_reset();
>
> RCU_INIT_POINTER(aunet->nlsk, NULL);
> synchronize_net();
> @@ -1201,6 +1305,7 @@ static int __init audit_init(void)
> register_pernet_subsys(&audit_net_ops);
>
> skb_queue_head_init(&audit_queue);
> + skb_queue_head_init(&audit_retry_queue);
> skb_queue_head_init(&audit_hold_queue);
> audit_initialized = AUDIT_INITIALIZED;
> audit_enabled = audit_default;
>
> --
> Linux-audit mailing list
> Linux-audit@redhat.com
> https://www.redhat.com/mailman/listinfo/linux-audit
- RGB
--
Richard Guy Briggs <rgb@redhat.com>
Kernel Security Engineering, Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635
^ permalink raw reply
* Re: [RFC PATCH 6/9] audit: rework audit_log_start()
From: Richard Guy Briggs @ 2016-11-24 6:41 UTC (permalink / raw)
To: Paul Moore; +Cc: linux-audit
In-Reply-To: <147995172323.18851.11955381649104136907.stgit@sifl>
On 2016-11-23 20:42, Paul Moore wrote:
> From: Paul Moore <paul@paul-moore.com>
>
> The backlog queue handling in audit_log_start() is a little odd with
> some questionable design decisions, this patch attempts to rectify
> this with the following changes:
>
> * Never make auditd wait, ignore any backlog limits as we need auditd
> awake so it can drain the backlog queue.
>
> * When we hit a backlog limit and start dropping records, don't wake
> all the tasks sleeping on the backlog, that's silly. Instead, let
> kauditd_thread() take care of waking everyone once it has had a chance
> to drain the backlog queue.
>
> * Don't keep a global backlog timeout countdown, make it per-task. A
> per-task timer means we won't have all the sleeping tasks waking at
> the same time and hammering on an already stressed backlog queue.
>
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
> kernel/audit.c | 92 ++++++++++++++++++++++----------------------------------
> 1 file changed, 36 insertions(+), 56 deletions(-)
>
> diff --git a/kernel/audit.c b/kernel/audit.c
> index f4056bc..e23ce6c 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -107,7 +107,6 @@ static u32 audit_rate_limit;
> * When set to zero, this means unlimited. */
> static u32 audit_backlog_limit = 64;
> #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
> -static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
> static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
>
> /* The identity of the user shutting down the audit system. */
> @@ -345,7 +344,7 @@ static int audit_set_backlog_limit(u32 limit)
> static int audit_set_backlog_wait_time(u32 timeout)
> {
> return audit_do_config_change("audit_backlog_wait_time",
> - &audit_backlog_wait_time_master, timeout);
> + &audit_backlog_wait_time, timeout);
> }
>
> static int audit_set_enabled(u32 state)
> @@ -973,7 +972,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> s.lost = atomic_read(&audit_lost);
> s.backlog = skb_queue_len(&audit_queue);
> s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
> - s.backlog_wait_time = audit_backlog_wait_time_master;
> + s.backlog_wait_time = audit_backlog_wait_time;
> audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
> break;
> }
> @@ -1454,24 +1453,6 @@ static inline void audit_get_stamp(struct audit_context *ctx,
> }
> }
>
> -/*
> - * Wait for auditd to drain the queue a little
> - */
> -static long wait_for_auditd(long sleep_time)
> -{
> - DECLARE_WAITQUEUE(wait, current);
> -
> - if (audit_backlog_limit &&
> - skb_queue_len(&audit_queue) > audit_backlog_limit) {
> - add_wait_queue_exclusive(&audit_backlog_wait, &wait);
> - set_current_state(TASK_UNINTERRUPTIBLE);
> - sleep_time = schedule_timeout(sleep_time);
> - remove_wait_queue(&audit_backlog_wait, &wait);
> - }
> -
> - return sleep_time;
> -}
> -
> /**
> * audit_log_start - obtain an audit buffer
> * @ctx: audit_context (may be NULL)
> @@ -1490,12 +1471,9 @@ static long wait_for_auditd(long sleep_time)
> struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
> int type)
> {
> - struct audit_buffer *ab = NULL;
> - struct timespec t;
> - unsigned int uninitialized_var(serial);
> - int reserve = 5; /* Allow atomic callers to go up to five
> - entries over the normal backlog limit */
> - unsigned long timeout_start = jiffies;
> + struct audit_buffer *ab;
> + struct timespec t;
> + unsigned int uninitialized_var(serial);
>
> if (audit_initialized != AUDIT_INITIALIZED)
> return NULL;
> @@ -1503,38 +1481,40 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
> if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
> return NULL;
>
> - if (gfp_mask & __GFP_DIRECT_RECLAIM) {
> - if (audit_pid && audit_pid == current->tgid)
> - gfp_mask &= ~__GFP_DIRECT_RECLAIM;
> - else
> - reserve = 0;
> - }
> -
> - while (audit_backlog_limit
> - && skb_queue_len(&audit_queue) > audit_backlog_limit + reserve) {
> - if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
> - long sleep_time;
> -
> - sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
> - if (sleep_time > 0) {
> - sleep_time = wait_for_auditd(sleep_time);
> - if (sleep_time > 0)
> - continue;
> + /* don't ever fail/sleep on auditd since we need auditd to drain the
> + * queue; also, when we are checking for auditd, compare PIDs using
> + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using
> + * a PID anchored in the caller's namespace */
> + if (!(audit_pid && audit_pid == task_tgid_vnr(current))) {
Could the change from task_tgid() [should be same as current->tgid] to
task_tgid_vnr() be pulled out into a seperate patch to make the
namespace behaviour change implicaiton much more clear?
> + long sleep_time = audit_backlog_wait_time;
> +
> + while (audit_backlog_limit &&
> + (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
> + /* wake kauditd to try and flush the queue */
> + wake_up_interruptible(&kauditd_wait);
> +
> + /* sleep if we are allowed and we haven't exhausted our
> + * backlog wait limit */
> + if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
> + (sleep_time > 0)) {
> + DECLARE_WAITQUEUE(wait, current);
> +
> + add_wait_queue_exclusive(&audit_backlog_wait,
> + &wait);
> + set_current_state(TASK_UNINTERRUPTIBLE);
> + sleep_time = schedule_timeout(sleep_time);
> + remove_wait_queue(&audit_backlog_wait, &wait);
> + } else {
> + if (audit_rate_check() && printk_ratelimit())
> + pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
> + skb_queue_len(&audit_queue),
> + audit_backlog_limit);
> + audit_log_lost("backlog limit exceeded");
> + return NULL;
> }
> }
> - if (audit_rate_check() && printk_ratelimit())
> - pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
> - skb_queue_len(&audit_queue),
> - audit_backlog_limit);
> - audit_log_lost("backlog limit exceeded");
> - audit_backlog_wait_time = 0;
> - wake_up(&audit_backlog_wait);
> - return NULL;
> }
>
> - if (!reserve && !audit_backlog_wait_time)
> - audit_backlog_wait_time = audit_backlog_wait_time_master;
> -
> ab = audit_buffer_alloc(ctx, gfp_mask, type);
> if (!ab) {
> audit_log_lost("out of memory in audit_log_start");
> @@ -1542,9 +1522,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
> }
>
> audit_get_stamp(ab->ctx, &t, &serial);
> -
> audit_log_format(ab, "audit(%lu.%03lu:%u): ",
> t.tv_sec, t.tv_nsec/1000000, serial);
> +
> return ab;
> }
>
>
> --
> Linux-audit mailing list
> Linux-audit@redhat.com
> https://www.redhat.com/mailman/listinfo/linux-audit
- RGB
--
Richard Guy Briggs <rgb@redhat.com>
Kernel Security Engineering, Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635
^ permalink raw reply
* Re: [RFC PATCH 5/9] audit: rework the audit queue handling
From: Paul Moore @ 2016-11-25 16:33 UTC (permalink / raw)
To: Richard Guy Briggs; +Cc: linux-audit
In-Reply-To: <20161124063536.GB6897@madcap2.tricolour.ca>
On Thu, Nov 24, 2016 at 1:35 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> On 2016-11-23 20:41, Paul Moore wrote:
>> From: Paul Moore <paul@paul-moore.com>
>>
>> The audit record backlog queue has always been a bit of a mess, and
>> the moving the multicast send into kauditd_thread() from
>> audit_log_end() only makes things worse. This patch attempts to fix
>> the backlog queue with a better design that should hold up better
>> under load and have less of a performance impact at syscall
>> invocation time.
>>
>> While it looks like there is a log going on in this patch, the main
>> change is the move from a single backlog queue to three queues:
>>
>> * A queue for holding records generated from audit_log_end() that
>> haven't been consumed by kauditd_thread() (audit_queue).
>>
>> * A queue for holding records that have been sent via multicast but
>> had a temporary failure when sending via unicast and need a resend
>> (audit_retry_queue).
>>
>> * A queue for holding records that haven't been sent via unicast
>> because no one is listening (audit_hold_queue).
>>
>> Special care is taken in this patch to ensure that the proper
>> record ordering is preserved, e.g. we send everything in the hold
>> queue first, then the retry queue, and finally the main queue.
>>
>> Signed-off-by: Paul Moore <paul@paul-moore.com>
>> ---
>> kernel/audit.c | 347 ++++++++++++++++++++++++++++++++++++--------------------
>> 1 file changed, 226 insertions(+), 121 deletions(-)
...
>> +/**
>> + * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
>> + * @skb: audit record
>> + *
>> + * Description:
>> + * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
>> + * but for some reason we are having problems sending it audit records so
>> + * queue the given record and attempt to resend.
>> + */
>> +static void kauditd_retry_skb(struct sk_buff *skb)
>> {
>> - int err;
>> - int attempts = 0;
>> -#define AUDITD_RETRIES 5
>> + /* NOTE: because records should only live in the retry queue for a
>> + * short period of time, before either being sent or moved to the hold
>> + * queue, we don't currently enforce a limit on this queue */
>> + skb_queue_tail(&audit_retry_queue, skb);
>> +}
>
> Can kauditd_retry_skb() be eliminated entirely and just call
> skb_queue_tail(&audit_retry_queue, skb) directly? The comment is
> helpful, but found the code harder to follow in kauditd_thread() as a
> result.
It could, yes, and I waffled on that quite a bit while cleaning up
these patches before posting. Ultimately I decided to keep it
separate both to mimic the similar hold function as well as draw more
attention to the comment inside the function. There should be zero
performance impact, especially since this is a bit of a corner case,
and the compiler is most likely going to inline this function anyway.
--
paul moore
www.paul-moore.com
^ permalink raw reply
* Re: [RFC PATCH 6/9] audit: rework audit_log_start()
From: Paul Moore @ 2016-11-25 16:38 UTC (permalink / raw)
To: Richard Guy Briggs; +Cc: linux-audit
In-Reply-To: <20161124064125.GC6897@madcap2.tricolour.ca>
On Thu, Nov 24, 2016 at 1:41 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> On 2016-11-23 20:42, Paul Moore wrote:
>> From: Paul Moore <paul@paul-moore.com>
>>
>> The backlog queue handling in audit_log_start() is a little odd with
>> some questionable design decisions, this patch attempts to rectify
>> this with the following changes:
>>
>> * Never make auditd wait, ignore any backlog limits as we need auditd
>> awake so it can drain the backlog queue.
>>
>> * When we hit a backlog limit and start dropping records, don't wake
>> all the tasks sleeping on the backlog, that's silly. Instead, let
>> kauditd_thread() take care of waking everyone once it has had a chance
>> to drain the backlog queue.
>>
>> * Don't keep a global backlog timeout countdown, make it per-task. A
>> per-task timer means we won't have all the sleeping tasks waking at
>> the same time and hammering on an already stressed backlog queue.
>>
>> Signed-off-by: Paul Moore <paul@paul-moore.com>
>> ---
>> kernel/audit.c | 92 ++++++++++++++++++++++----------------------------------
...
>> 1 file changed, 36 insertions(+), 56 deletions(-)
>> + /* don't ever fail/sleep on auditd since we need auditd to drain the
>> + * queue; also, when we are checking for auditd, compare PIDs using
>> + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using
>> + * a PID anchored in the caller's namespace */
>> + if (!(audit_pid && audit_pid == task_tgid_vnr(current))) {
>
> Could the change from task_tgid() [should be same as current->tgid] to
> task_tgid_vnr() be pulled out into a seperate patch to make the
> namespace behaviour change implicaiton much more clear?
Considering the comment above the if-conditional I don't think there
is much to be gained by splitting it out to a separate patch.
--
paul moore
www.paul-moore.com
^ permalink raw reply
* Re: [RFC PATCH 6/9] audit: rework audit_log_start()
From: Richard Guy Briggs @ 2016-11-25 16:41 UTC (permalink / raw)
To: Paul Moore; +Cc: linux-audit
In-Reply-To: <CAHC9VhQzMUWCRsqgVk8ScOtzYfMHP6vwH6ajZKWRKD6z5YjPoQ@mail.gmail.com>
On 2016-11-25 11:38, Paul Moore wrote:
> On Thu, Nov 24, 2016 at 1:41 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > On 2016-11-23 20:42, Paul Moore wrote:
> >> From: Paul Moore <paul@paul-moore.com>
> >>
> >> The backlog queue handling in audit_log_start() is a little odd with
> >> some questionable design decisions, this patch attempts to rectify
> >> this with the following changes:
> >>
> >> * Never make auditd wait, ignore any backlog limits as we need auditd
> >> awake so it can drain the backlog queue.
> >>
> >> * When we hit a backlog limit and start dropping records, don't wake
> >> all the tasks sleeping on the backlog, that's silly. Instead, let
> >> kauditd_thread() take care of waking everyone once it has had a chance
> >> to drain the backlog queue.
> >>
> >> * Don't keep a global backlog timeout countdown, make it per-task. A
> >> per-task timer means we won't have all the sleeping tasks waking at
> >> the same time and hammering on an already stressed backlog queue.
> >>
> >> Signed-off-by: Paul Moore <paul@paul-moore.com>
> >> ---
> >> kernel/audit.c | 92 ++++++++++++++++++++++----------------------------------
>
> ...
>
> >> 1 file changed, 36 insertions(+), 56 deletions(-)
> >> + /* don't ever fail/sleep on auditd since we need auditd to drain the
> >> + * queue; also, when we are checking for auditd, compare PIDs using
> >> + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using
> >> + * a PID anchored in the caller's namespace */
> >> + if (!(audit_pid && audit_pid == task_tgid_vnr(current))) {
> >
> > Could the change from task_tgid() [should be same as current->tgid] to
> > task_tgid_vnr() be pulled out into a seperate patch to make the
> > namespace behaviour change implicaiton much more clear?
>
> Considering the comment above the if-conditional I don't think there
> is much to be gained by splitting it out to a separate patch.
Except the ability to find it when someone goes looking for things that
change namespace behaviour, which this patch objective should not
include.
I agree it is well explained in the comment, but that change is
unrelated to the goal of the rest of the patch.
> paul moore
- RGB
--
Richard Guy Briggs <rgb@redhat.com>
Kernel Security Engineering, Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635
^ permalink raw reply
* Re: [RFC PATCH 6/9] audit: rework audit_log_start()
From: Paul Moore @ 2016-11-25 16:50 UTC (permalink / raw)
To: Richard Guy Briggs; +Cc: linux-audit
In-Reply-To: <20161125164136.GA26673@madcap2.tricolour.ca>
On Fri, Nov 25, 2016 at 11:41 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> On 2016-11-25 11:38, Paul Moore wrote:
>> On Thu, Nov 24, 2016 at 1:41 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
>> > On 2016-11-23 20:42, Paul Moore wrote:
>> >> From: Paul Moore <paul@paul-moore.com>
>> >>
>> >> The backlog queue handling in audit_log_start() is a little odd with
>> >> some questionable design decisions, this patch attempts to rectify
>> >> this with the following changes:
>> >>
>> >> * Never make auditd wait, ignore any backlog limits as we need auditd
>> >> awake so it can drain the backlog queue.
>> >>
>> >> * When we hit a backlog limit and start dropping records, don't wake
>> >> all the tasks sleeping on the backlog, that's silly. Instead, let
>> >> kauditd_thread() take care of waking everyone once it has had a chance
>> >> to drain the backlog queue.
>> >>
>> >> * Don't keep a global backlog timeout countdown, make it per-task. A
>> >> per-task timer means we won't have all the sleeping tasks waking at
>> >> the same time and hammering on an already stressed backlog queue.
>> >>
>> >> Signed-off-by: Paul Moore <paul@paul-moore.com>
>> >> ---
>> >> kernel/audit.c | 92 ++++++++++++++++++++++----------------------------------
>>
>> ...
>>
>> >> 1 file changed, 36 insertions(+), 56 deletions(-)
>> >> + /* don't ever fail/sleep on auditd since we need auditd to drain the
>> >> + * queue; also, when we are checking for auditd, compare PIDs using
>> >> + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using
>> >> + * a PID anchored in the caller's namespace */
>> >> + if (!(audit_pid && audit_pid == task_tgid_vnr(current))) {
>> >
>> > Could the change from task_tgid() [should be same as current->tgid] to
>> > task_tgid_vnr() be pulled out into a seperate patch to make the
>> > namespace behaviour change implicaiton much more clear?
>>
>> Considering the comment above the if-conditional I don't think there
>> is much to be gained by splitting it out to a separate patch.
>
> Except the ability to find it when someone goes looking for things that
> change namespace behaviour, which this patch objective should not
> include.
>
> I agree it is well explained in the comment, but that change is
> unrelated to the goal of the rest of the patch.
It is very related to the goals of the patch, look at the title,
"rework audit_log_start()", as well as the description where I
specifically call out not making auditd wait on the backlog.
--
paul moore
www.paul-moore.com
^ permalink raw reply
* Re: [PATCH] audit: remove the audit freelist
From: Richard Guy Briggs @ 2016-11-29 16:12 UTC (permalink / raw)
To: Florian Westphal; +Cc: linux-kernel, linux-audit
In-Reply-To: <1479215774-29810-1-git-send-email-fw@strlen.de>
On 2016-11-15 14:16, Florian Westphal wrote:
> allows better debugging as freeing audit buffers now always honors slub
> debug hooks (e.g. object poisoning) and leak checker can detect the
> free operation.
>
> Removal also results in a small speedup (using
> single rule 'iptables -A INPUT -i lo -j AUDIT --type drop'):
>
> super_netperf 4 -H 127.0.0.1 -l 360 -t UDP_RR -- -R 1 -m 64
> Before:
> 294953
> After:
> 298013
>
> (alloc/free no longer serializes on spinlock, allocator can use percpu
> pool).
>
> Signed-off-by: Florian Westphal <fw@strlen.de>
I've got a minor concern about a skipped skb_free below, but other than
that, I like this simplification and in particular the patch stats. :)
> ---
> kernel/audit.c | 53 ++++++++---------------------------------------------
> 1 file changed, 8 insertions(+), 45 deletions(-)
>
> diff --git a/kernel/audit.c b/kernel/audit.c
> index f1ca11613379..396868dc523a 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -131,13 +131,6 @@ static int audit_net_id;
> /* Hash for inode-based rules */
> struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
>
> -/* The audit_freelist is a list of pre-allocated audit buffers (if more
> - * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
> - * being placed on the freelist). */
> -static DEFINE_SPINLOCK(audit_freelist_lock);
> -static int audit_freelist_count;
> -static LIST_HEAD(audit_freelist);
> -
> static struct sk_buff_head audit_skb_queue;
> /* queue of skbs to send to auditd when/if it comes back */
> static struct sk_buff_head audit_skb_hold_queue;
> @@ -164,17 +157,11 @@ DEFINE_MUTEX(audit_cmd_mutex);
> * should be at least that large. */
> #define AUDIT_BUFSIZ 1024
>
> -/* AUDIT_MAXFREE is the number of empty audit_buffers we keep on the
> - * audit_freelist. Doing so eliminates many kmalloc/kfree calls. */
> -#define AUDIT_MAXFREE (2*NR_CPUS)
> -
> -/* The audit_buffer is used when formatting an audit record. The caller
> - * locks briefly to get the record off the freelist or to allocate the
> - * buffer, and locks briefly to send the buffer to the netlink layer or
> +/* The audit_buffer is used when formatting an audit record.
> + * The caller locks briefly to send the buffer to the netlink layer or
> * to place it on a transmit queue. Multiple audit_buffers can be in
> * use simultaneously. */
> struct audit_buffer {
> - struct list_head list;
> struct sk_buff *skb; /* formatted skb ready to send */
> struct audit_context *ctx; /* NULL or associated context */
> gfp_t gfp_mask;
> @@ -1247,43 +1234,22 @@ __setup("audit_backlog_limit=", audit_backlog_limit_set);
>
> static void audit_buffer_free(struct audit_buffer *ab)
> {
> - unsigned long flags;
> -
> if (!ab)
> return;
>
> kfree_skb(ab->skb);
> - spin_lock_irqsave(&audit_freelist_lock, flags);
> - if (audit_freelist_count > AUDIT_MAXFREE)
> - kfree(ab);
> - else {
> - audit_freelist_count++;
> - list_add(&ab->list, &audit_freelist);
> - }
> - spin_unlock_irqrestore(&audit_freelist_lock, flags);
> + kfree(ab);
> }
>
> static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx,
> gfp_t gfp_mask, int type)
> {
> - unsigned long flags;
> - struct audit_buffer *ab = NULL;
> + struct audit_buffer *ab;
> struct nlmsghdr *nlh;
>
> - spin_lock_irqsave(&audit_freelist_lock, flags);
> - if (!list_empty(&audit_freelist)) {
> - ab = list_entry(audit_freelist.next,
> - struct audit_buffer, list);
> - list_del(&ab->list);
> - --audit_freelist_count;
> - }
> - spin_unlock_irqrestore(&audit_freelist_lock, flags);
> -
> - if (!ab) {
> - ab = kmalloc(sizeof(*ab), gfp_mask);
> - if (!ab)
> - goto err;
> - }
> + ab = kmalloc(sizeof(*ab), gfp_mask);
> + if (!ab)
> + return NULL;
>
> ab->ctx = ctx;
> ab->gfp_mask = gfp_mask;
> @@ -1294,13 +1260,10 @@ static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx,
>
> nlh = nlmsg_put(ab->skb, 0, 0, type, 0, 0);
> if (!nlh)
> - goto out_kfree_skb;
> + goto err;
>
> return ab;
>
> -out_kfree_skb:
> - kfree_skb(ab->skb);
> - ab->skb = NULL;
Why is the kfree_skb() skipped on error from nlmsg_put()? I don't see
much risk in nlmsg_put() failing considering the very simple arguments,
however the code path is not trivial either.
> err:
> audit_buffer_free(ab);
> return NULL;
- RGB
--
Richard Guy Briggs <rgb@redhat.com>
Kernel Security Engineering, Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635
^ permalink raw reply
* Re: [PATCH] audit: remove the audit freelist
From: Florian Westphal @ 2016-11-29 17:24 UTC (permalink / raw)
To: Richard Guy Briggs; +Cc: Florian Westphal, linux-kernel, linux-audit
In-Reply-To: <20161129161233.GG6897@madcap2.tricolour.ca>
Richard Guy Briggs <rgb@redhat.com> wrote:
> > static void audit_buffer_free(struct audit_buffer *ab)
> > {
> > - unsigned long flags;
> > -
> > if (!ab)
> > return;
> >
> > kfree_skb(ab->skb);
> > - spin_lock_irqsave(&audit_freelist_lock, flags);
> > - if (audit_freelist_count > AUDIT_MAXFREE)
> > - kfree(ab);
> > - else {
> > - audit_freelist_count++;
> > - list_add(&ab->list, &audit_freelist);
> > - }
> > - spin_unlock_irqrestore(&audit_freelist_lock, flags);
> > + kfree(ab);
> > }
[..]
> > nlh = nlmsg_put(ab->skb, 0, 0, type, 0, 0);
> > if (!nlh)
> > - goto out_kfree_skb;
> > + goto err;
> >
> > return ab;
> >
> > -out_kfree_skb:
> > - kfree_skb(ab->skb);
> > - ab->skb = NULL;
>
> Why is the kfree_skb() skipped on error from nlmsg_put()? I don't see
> much risk in nlmsg_put() failing considering the very simple arguments,
> however the code path is not trivial either.
if nlmsg_put fails we jump to err and ...
> > err:
> > audit_buffer_free(ab);
> > return NULL;
... ab->skb gets free'd by audit_buffer_free() here.
^ permalink raw reply
* Re: [RFC PATCH 0/9] Move the audit netlink multicast send to the kauditd_thread
From: Paul Moore @ 2016-11-29 22:08 UTC (permalink / raw)
To: Paul Moore; +Cc: linux-audit
In-Reply-To: <147995162874.18851.11207690780223712694.stgit@sifl>
On Wed, Nov 23, 2016 at 8:41 PM, Paul Moore <pmoore@redhat.com> wrote:
> This patchset started off innocently enough with the goal of moving
> the netlink multicast send from audit_log_end() to kauditd_thread().
> However, things escalated rather quickly as this uncovered, or made
> worse, a number of inherent problems in the audit backlog queues.
> This patchset attempts to address both the multicast and queue
> problems.
>
> I've spent a few weeks playing with this, stressing it a bit, and
> tweaking some of the logic and so far it is performing at least as
> well as the existing code for all the scenarios I've thrown at it;
> if you happen to have a particularly nasty audit test, I'd
> appreciate hearing about it, and I'd appreciate it even more if
> you could give it a test too.
>
> I'm posting this patchset as a RFC because this is a pretty big
> change to some rather critical code and I thought some review
> would be prudent; if I don't see anything substantial by next week
> I'll go ahead and merge this into audit#next, along with the
> patch from WANG Cong which started the little endeavor (see the
> links below). You'll note I'm not including the patch from WANG
> Cong in this patchset for the sake of clarity.
>
> Enough from me, please take a look at the patchset that follows
> and post any comments you may have to the list. In case you are
> running Fedora Rawhide, I've been building some kernels you can
> use to test at the link below:
>
> * GitHub Issue Trackers
> - https://github.com/linux-audit/audit-kernel/issues/23
> - https://github.com/linux-audit/audit-kernel/issues/22
>
> * Fedora Rawhide Kernel Builds
> - https://copr.fedorainfracloud.org/coprs/pcmoore/kernel-testing
As a FYI, I just merged these patches into audit#next.
--
paul moore
www.paul-moore.com
^ permalink raw reply
* [PATCH] netns: avoid disabling irq for netns id
From: Paul Moore @ 2016-11-29 22:11 UTC (permalink / raw)
To: netdev, linux-audit; +Cc: Cong Wang
From: Paul Moore <paul@paul-moore.com>
Bring back commit bc51dddf98c9 ("netns: avoid disabling irq for netns
id") now that we've fixed some audit multicast issues that caused
problems with original attempt. Additional information, and history,
can be found in the links below:
* https://github.com/linux-audit/audit-kernel/issues/22
* https://github.com/linux-audit/audit-kernel/issues/23
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
net/core/net_namespace.c | 35 +++++++++++++++--------------------
1 file changed, 15 insertions(+), 20 deletions(-)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b..10608dd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -213,14 +213,13 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
*/
int peernet2id_alloc(struct net *net, struct net *peer)
{
- unsigned long flags;
bool alloc;
int id;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
alloc = atomic_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
if (alloc && id >= 0)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
@@ -230,12 +229,11 @@ EXPORT_SYMBOL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
{
- unsigned long flags;
int id;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
id = __peernet2id(net, peer);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
return id;
}
@@ -249,18 +247,17 @@ bool peernet_has_id(struct net *net, struct net *peer)
struct net *get_net_ns_by_id(struct net *net, int id)
{
- unsigned long flags;
struct net *peer;
if (id < 0)
return NULL;
rcu_read_lock();
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
peer = idr_find(&net->netns_ids, id);
if (peer)
get_net(peer);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
rcu_read_unlock();
return peer;
@@ -404,17 +401,17 @@ static void cleanup_net(struct work_struct *work)
for_each_net(tmp) {
int id;
- spin_lock_irq(&tmp->nsid_lock);
+ spin_lock_bh(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_irq(&tmp->nsid_lock);
+ spin_unlock_bh(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id);
}
- spin_lock_irq(&net->nsid_lock);
+ spin_lock_bh(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_irq(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
}
rtnl_unlock();
@@ -542,7 +539,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
- unsigned long flags;
struct net *peer;
int nsid, err;
@@ -563,15 +559,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
if (IS_ERR(peer))
return PTR_ERR(peer);
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
err = -EEXIST;
goto out;
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err);
err = 0;
@@ -693,11 +689,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
.idx = 0,
.s_idx = cb->args[0],
};
- unsigned long flags;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
cb->args[0] = net_cb.idx;
return skb->len;
^ permalink raw reply related
* Re: [PATCH] netns: avoid disabling irq for netns id
From: Paul Moore @ 2016-11-29 22:14 UTC (permalink / raw)
To: netdev, linux-audit; +Cc: Cong Wang
In-Reply-To: <148045748887.22539.3188295553967836703.stgit@sifl>
On Tue, Nov 29, 2016 at 5:11 PM, Paul Moore <pmoore@redhat.com> wrote:
> From: Paul Moore <paul@paul-moore.com>
>
> Bring back commit bc51dddf98c9 ("netns: avoid disabling irq for netns
> id") now that we've fixed some audit multicast issues that caused
> problems with original attempt. Additional information, and history,
> can be found in the links below:
>
> * https://github.com/linux-audit/audit-kernel/issues/22
> * https://github.com/linux-audit/audit-kernel/issues/23
>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
> net/core/net_namespace.c | 35 +++++++++++++++--------------------
> 1 file changed, 15 insertions(+), 20 deletions(-)
Cong Wang, I added your sign-off to the patch since you were the
original author, if you would prefer I leave it off or want it changed
just let me know.
David/netdev, since this depends on a bunch of audit changes (about
nine patches) I went ahead and just merged this into the audit#next
branch. If you have a problem with that let me know.
--
paul moore
security @ redhat
^ permalink raw reply
* Re: [PATCH] audit: remove the audit freelist
From: Richard Guy Briggs @ 2016-11-30 4:44 UTC (permalink / raw)
To: Florian Westphal; +Cc: linux-audit, linux-kernel
In-Reply-To: <20161129172450.GD26930@breakpoint.cc>
On 2016-11-29 18:24, Florian Westphal wrote:
> Richard Guy Briggs <rgb@redhat.com> wrote:
> > > static void audit_buffer_free(struct audit_buffer *ab)
> > > {
> > > - unsigned long flags;
> > > -
> > > if (!ab)
> > > return;
> > >
> > > kfree_skb(ab->skb);
> > > - spin_lock_irqsave(&audit_freelist_lock, flags);
> > > - if (audit_freelist_count > AUDIT_MAXFREE)
> > > - kfree(ab);
> > > - else {
> > > - audit_freelist_count++;
> > > - list_add(&ab->list, &audit_freelist);
> > > - }
> > > - spin_unlock_irqrestore(&audit_freelist_lock, flags);
> > > + kfree(ab);
> > > }
> [..]
>
> > > nlh = nlmsg_put(ab->skb, 0, 0, type, 0, 0);
> > > if (!nlh)
> > > - goto out_kfree_skb;
> > > + goto err;
> > >
> > > return ab;
> > >
> > > -out_kfree_skb:
> > > - kfree_skb(ab->skb);
> > > - ab->skb = NULL;
> >
> > Why is the kfree_skb() skipped on error from nlmsg_put()? I don't see
> > much risk in nlmsg_put() failing considering the very simple arguments,
> > however the code path is not trivial either.
>
> if nlmsg_put fails we jump to err and ...
>
> > > err:
> > > audit_buffer_free(ab);
> > > return NULL;
>
> ... ab->skb gets free'd by audit_buffer_free() here.
Duh, thank you! It was already redundant in plain sight in your patch.
Sorry for the brain fart. :)
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
- RGB
--
Richard Guy Briggs <rgb@redhat.com>
Kernel Security Engineering, Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635
^ permalink raw reply
* Re: [PATCH] netns: avoid disabling irq for netns id
From: Cong Wang @ 2016-11-30 6:51 UTC (permalink / raw)
To: Paul Moore; +Cc: Linux Kernel Network Developers, linux-audit
In-Reply-To: <CAGH-KgsK=Dc=Ptfg8P39pemFAsxVr9NKccev7sOW=w1suqKk+w@mail.gmail.com>
On Tue, Nov 29, 2016 at 2:14 PM, Paul Moore <pmoore@redhat.com> wrote:
> On Tue, Nov 29, 2016 at 5:11 PM, Paul Moore <pmoore@redhat.com> wrote:
>> From: Paul Moore <paul@paul-moore.com>
>>
>> Bring back commit bc51dddf98c9 ("netns: avoid disabling irq for netns
>> id") now that we've fixed some audit multicast issues that caused
>> problems with original attempt. Additional information, and history,
>> can be found in the links below:
>>
>> * https://github.com/linux-audit/audit-kernel/issues/22
>> * https://github.com/linux-audit/audit-kernel/issues/23
>>
>> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
>> Signed-off-by: Paul Moore <paul@paul-moore.com>
>> ---
>> net/core/net_namespace.c | 35 +++++++++++++++--------------------
>> 1 file changed, 15 insertions(+), 20 deletions(-)
>
> Cong Wang, I added your sign-off to the patch since you were the
> original author, if you would prefer I leave it off or want it changed
> just let me know.
Thanks for not forgetting it. I am fine with signed-off-by.
^ permalink raw reply
* Problem regards event fields a0-a3 in audit logs
From: C.y @ 2016-11-30 8:40 UTC (permalink / raw)
To: linux-audit
[-- Attachment #1.1: Type: text/plain, Size: 2393 bytes --]
Hi all,
I looked into audit log and find myself having difficultly in understanding
the event field a0-a3.
I tried to look for answer but couldn't get any references for the
following question:
1. What are the initial value of a0-a3, and
2. Is there any way to get 5-th and above arguments?
Audit system reference from redhat (https://access.redhat.com/doc
umentation/en-US/Red_Hat_Enterprise_Linux/6/html/Security_
Guide/app-Audit_Reference.html) did mentioned that the first four arguments
of system call are recorded, but didn't mentions what their initial value
are when there a system call required less than four arguments.
For example, what will the value of a1, a2, a3 be when the system call
'close' are invoked? ('close' only takes 1 argument
http://man7.org/linux/man-pages/man2/close.2.html)
At first I am guessing that they might contain the a1-a3's value from
previous syscall record, so I starting doing some experiment on the audit
log I have collected (https://drive.google.com/file
/d/0B85cIbQMuvqDSHVGSHdDVjRWdHc/view):
`$ ausyscall --dump | grep 6 | head -n 1 ` returns
6 close
and below are part of my audit log that contains 2 syscall records:
...
type=SYSCALL msg=audit(1479471647.440:197): arch=40000028 syscall=192
per=800000 success=yes exit=1995763712 a0=76f4f000 a1=1000 a2=3 a3=812
items=0 ppid=891 pid=907 auid=1001 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0
sgid=0 fsgid=0 tty=pts0 ses=3 comm="raspistill"
exe="/opt/vc/bin/raspistill" key=(null)
type=MMAP msg=audit(1479471647.440:197): fd=3 flags=0x812
type=PROCTITLE msg=audit(1479471647.440:197): proctitle="-bash"
type=SYSCALL msg=audit(1479471647.440:198): arch=40000028 syscall=6
per=800000 success=yes exit=0 a0=3 a1=5 a2=76f31000 a3=0 items=0 ppid=891
pid=907 auid=1001 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0
tty=pts0 ses=3 comm="raspistill" exe="/opt/vc/bin/raspistill" key=(null)
...
the second syscall record show that the a1's value are somehow modified,
which eventually proved that my guessing of that they will contain previous
invoked syscall a1's value(1000) is wrong. That's how I got myself confused
and write to seek for answer regards this question.
Second, I wonder if there are way for me to know the 5-th and above
argument of a particular system call, i.e _llseek that takes 5 arguments?
Thanks a lot for all your help in advance!
Cheers.
Ching-Yi Ng
[-- Attachment #1.2: Type: text/html, Size: 5264 bytes --]
[-- Attachment #2: Type: text/plain, Size: 0 bytes --]
^ permalink raw reply
* Re: [Patch net-next] audit: remove useless synchronize_net()
From: Richard Guy Briggs @ 2016-11-30 9:16 UTC (permalink / raw)
To: Cong Wang; +Cc: netdev, linux-audit, pmoore
In-Reply-To: <1480439696-21818-1-git-send-email-xiyou.wangcong@gmail.com>
On 2016-11-29 09:14, Cong Wang wrote:
> netlink kernel socket is protected by refcount, not RCU.
> Its rcv path is neither protected by RCU. So the synchronize_net()
> is just pointless.
If I understand correctly, xfrm_user_net_exit() usage of
RCU_INIT_POINTER() and synchronize_net() is similarly pointless? Also
net/phonet/socket.c? I probably modelled things based on the former...
> Cc: Richard Guy Briggs <rgb@redhat.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> ---
> kernel/audit.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 92c463d..67b9fbd8 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -1172,9 +1172,8 @@ static void __net_exit audit_net_exit(struct net *net)
> audit_sock = NULL;
> }
>
> - RCU_INIT_POINTER(aunet->nlsk, NULL);
> - synchronize_net();
> netlink_kernel_release(sock);
> + aunet->nlsk = NULL;
> }
>
> static struct pernet_operations audit_net_ops __net_initdata = {
> --
> 2.1.0
>
- RGB
^ permalink raw reply
* Re: Problem regards event fields a0-a3 in audit logs
From: Steve Grubb @ 2016-11-30 15:10 UTC (permalink / raw)
To: linux-audit; +Cc: C.y
In-Reply-To: <CABYhOswoSALK3mJ3JoEakr_BYJUm+zcZWLk7qcJkWevZLyO67w@mail.gmail.com>
On Wednesday, November 30, 2016 4:40:01 PM EST C.y wrote:
> I looked into audit log and find myself having difficultly in understanding
> the event field a0-a3.
>
> I tried to look for answer but couldn't get any references for the
> following question:
> 1. What are the initial value of a0-a3, and
Whatever was passed as arguments for the current syscall.
> 2. Is there any way to get 5-th and above arguments?
Nope. There is only 1 syscall that I know of that has a security relevant
argument higher than what is recorded. This is mmap. What we do is record that
value in a special AUDIT_MMAP auxiliary record. If there were any other
syscalls that had security relevant arguments higher than the first 4 I suspect
we'd do the same thing again.
> Audit system reference from redhat (https://access.redhat.com/doc
> umentation/en-US/Red_Hat_Enterprise_Linux/6/html/Security_
> Guide/app-Audit_Reference.html) did mentioned that the first four arguments
> of system call are recorded, but didn't mentions what their initial value
> are when there a system call required less than four arguments.
> For example, what will the value of a1, a2, a3 be when the system call
> 'close' are invoked? ('close' only takes 1 argument
> http://man7.org/linux/man-pages/man2/close.2.html)
AFAIK, they are whatever is in memory. There is nothing that tells you they
are or are not valid. You would have to know that the syscall takes 2
arguments and just look at the first 2.
> At first I am guessing that they might contain the a1-a3's value from
> previous syscall record, so I starting doing some experiment on the audit
> log I have collected (https://drive.google.com/file
> /d/0B85cIbQMuvqDSHVGSHdDVjRWdHc/view):
>
> `$ ausyscall --dump | grep 6 | head -n 1 ` returns
> 6 close
>
> and below are part of my audit log that contains 2 syscall records:
> ...
> type=SYSCALL msg=audit(1479471647.440:197): arch=40000028 syscall=192
> per=800000 success=yes exit=1995763712 a0=76f4f000 a1=1000 a2=3 a3=812
> items=0 ppid=891 pid=907 auid=1001 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0
> sgid=0 fsgid=0 tty=pts0 ses=3 comm="raspistill"
> exe="/opt/vc/bin/raspistill" key=(null)
> type=MMAP msg=audit(1479471647.440:197): fd=3 flags=0x812
> type=PROCTITLE msg=audit(1479471647.440:197): proctitle="-bash"
> type=SYSCALL msg=audit(1479471647.440:198): arch=40000028 syscall=6
> per=800000 success=yes exit=0 a0=3 a1=5 a2=76f31000 a3=0 items=0 ppid=891
> pid=907 auid=1001 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0
> tty=pts0 ses=3 comm="raspistill" exe="/opt/vc/bin/raspistill" key=(null)
> ...
>
> the second syscall record show that the a1's value are somehow modified,
> which eventually proved that my guessing of that they will contain previous
> invoked syscall a1's value(1000) is wrong. That's how I got myself confused
> and write to seek for answer regards this question.
Best way to look at the records is ausearch -i.
> Second, I wonder if there are way for me to know the 5-th and above
> argument of a particular system call, i.e _llseek that takes 5 arguments?
Answered above.
-Steve
^ permalink raw reply
* Re: [Patch net-next] audit: remove useless synchronize_net()
From: Cong Wang @ 2016-11-30 17:20 UTC (permalink / raw)
To: Richard Guy Briggs
Cc: Linux Kernel Network Developers, linux-audit, Paul Moore
In-Reply-To: <20161130091643.GA32562@madcap2.tricolour.ca>
On Wed, Nov 30, 2016 at 1:16 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> On 2016-11-29 09:14, Cong Wang wrote:
>> netlink kernel socket is protected by refcount, not RCU.
>> Its rcv path is neither protected by RCU. So the synchronize_net()
>> is just pointless.
>
> If I understand correctly, xfrm_user_net_exit() usage of
> RCU_INIT_POINTER() and synchronize_net() is similarly pointless? Also
> net/phonet/socket.c? I probably modelled things based on the former...
Possibly, but xfrm case is slightly different, it has two copies of the pointer
to the netlink socket, also it uses exit_batch(). I need to double check.
Take a look at better examples, fib_front, genetlink, rtnetlink.
^ permalink raw reply
* [PATCH 2/2] audit: log binding and unbinding to netlink multicast
From: Steve Grubb @ 2016-11-30 19:26 UTC (permalink / raw)
To: linux-audit
Log information about programs connecting and disconnecting to the audit
netlink multicast socket. This is needed so that during investigations a
security officer can tell who or what had access to the audit trail. This
helps to meet the FAU_SAR.2 requirement for Common Criteria.
Signed-off-by: sgrubb <sgrubb@redhat.com>
---
include/uapi/linux/audit.h | 1 +
kernel/audit.c | 31 +++++++++++++++++++++++++++----
2 files changed, 28 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 82e8aa5..e6dd046 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -111,6 +111,7 @@
#define AUDIT_PROCTITLE 1327 /* Proctitle emit event */
#define AUDIT_FEATURE_CHANGE 1328 /* audit log listing feature changes */
#define AUDIT_REPLACE 1329 /* Replace auditd if this packet unanswerd */
+#define AUDIT_EVENT_LISTENER 1330 /* Task joined multicast read socket */
#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
diff --git a/kernel/audit.c b/kernel/audit.c
index 22f8c3d..910a7c1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1161,22 +1161,45 @@ void audit_log_task_simple(struct audit_buffer *ab,
struct task_struct *tsk)
}
EXPORT_SYMBOL(audit_log_task_simple);
+/* Log information about who is connecting to the audit multicast socket */
+static void audit_log_multicast_bind(int group, const char *op, int err)
+{
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_EVENT_LISTENER);
+ if (!ab)
+ return;
+
+ audit_log_task_simple(ab, current);
+ audit_log_format(ab, " nlnk-grp=%d op=%s res=%d", group, op, !err);
+ audit_log_end(ab);
+}
+
/* Run custom bind function on netlink socket group connect or bind requests.
*/
-static int audit_bind(struct net *net, int group)
+static int audit_multicast_bind(struct net *net, int group)
{
+ int err = 0;
+
if (!capable(CAP_AUDIT_READ))
- return -EPERM;
+ err = -EPERM;
+ audit_log_multicast_bind(group, "connect", err);
- return 0;
+ return err;
+}
+
+static void audit_multicast_unbind(struct net *net, int group)
+{
+ audit_log_multicast_bind(group, "disconnect", 0);
}
static int __net_init audit_net_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = audit_receive,
- .bind = audit_bind,
+ .bind = audit_multicast_bind,
.flags = NL_CFG_F_NONROOT_RECV,
.groups = AUDIT_NLGRP_MAX,
+ .unbind = audit_multicast_unbind,
};
struct audit_net *aunet = net_generic(net, audit_net_id);
--
2.7.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox