All of lore.kernel.org
 help / color / mirror / Atom feed
From: Florian Westphal <fw@strlen.de>
To: <netfilter-devel@vger.kernel.org>
Cc: Florian Westphal <fw@strlen.de>, Yi Chen <yiche@redhat.com>
Subject: [PATCH nf] netfilter: nfnetlink_queue: fix rcu splat on program exit
Date: Tue, 14 May 2024 12:31:30 +0200	[thread overview]
Message-ID: <20240514103133.2784-1-fw@strlen.de> (raw)

If userspace program exits while the queue its subscribed to has packets
available we get following (harmless) RCU splat:

 net/netfilter/nfnetlink_queue.c:261 suspicious rcu_dereference_check() usage!
 other info that might help us debug this:
 rcu_scheduler_active = 2, debug_locks = 1
 2 locks held by swapper/0/0:
  #0: (rcu_callback){....}-{0:0}, at: rcu_core
  #1: (&inst->lock){+.-.}-{3:3}, at: instance_destroy_rcu
 [..] Call Trace:
  lockdep_rcu_suspicious+0x1ab/0x250
  nfqnl_reinject+0x5d3/0xfb0
  instance_destroy_rcu+0x1b5/0x220
  rcu_core+0xe32 [..]

This is harmless because the incorrectly-obtained pointer will not be
dereferenced in case nfqnl_reinject is called with NF_DROP verdict.

Fix this by open-coding skb+entry release without going through
nfqnl_reinject().  kfree_skb+release_ref is exactly what nfql_reinject
ends up doing when called with NF_DROP, except that it also does a
truckload of other things that are irrelevant for DROP.

A similar warning can be triggered by flushing the ruleset while
packets are being reinjected.

This is harmless as well, the WARN_ON_ONCE() should be removed.

Reported-by: Yi Chen <yiche@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 Due to MR cloed this patch is actually vs nf-next tree.
 It will also conflict with the pending sctp checksum patch
 from Antonio Ojea (nft_queue.sh), I can resend if needed once
 Antonios patch is applied (conflict resulution is simple: use
 both changes).

 net/netfilter/nfnetlink_queue.c               | 12 ++++--
 .../selftests/net/netfilter/nft_queue.sh      | 37 +++++++++++++++++++
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 00f4bd21c59b..812117d837a7 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -323,7 +323,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	hooks = nf_hook_entries_head(net, pf, entry->state.hook);
 
 	i = entry->hook_index;
-	if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
+	if (!hooks || i >= hooks->num_hook_entries) {
 		kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP);
 		nf_queue_entry_free(entry);
 		return;
@@ -401,16 +401,22 @@ static void
 nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 {
 	struct nf_queue_entry *entry, *next;
+	LIST_HEAD(head);
 
 	spin_lock_bh(&queue->lock);
 	list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
 		if (!cmpfn || cmpfn(entry, data)) {
-			list_del(&entry->list);
+			list_move(&entry->list, &head);
 			queue->queue_total--;
-			nfqnl_reinject(entry, NF_DROP);
 		}
 	}
 	spin_unlock_bh(&queue->lock);
+
+	list_for_each_entry_safe(entry, next, &head, list) {
+		list_del(&entry->list);
+		kfree_skb_reason(entry->skb, SKB_DROP_REASON_NETFILTER_DROP);
+		nf_queue_entry_free(entry);
+	}
 }
 
 static int
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 8538f08c64c2..c61d23a8c88d 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -375,6 +375,42 @@ EOF
 	wait 2>/dev/null
 }
 
+test_queue_removal()
+{
+	read tainted_then < /proc/sys/kernel/tainted
+
+	ip netns exec "$ns1" nft -f - <<EOF
+flush ruleset
+table ip filter {
+	chain output {
+		type filter hook output priority 0; policy accept;
+		ip protocol icmp queue num 0
+	}
+}
+EOF
+	ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" &
+	local nfqpid=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
+
+	ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null
+	kill $nfqpid
+
+	ip netns exec "$ns1" nft flush ruleset
+
+	if [ "$tainted_then" -ne 0 ];then
+		return
+	fi
+
+	read tainted_now < /proc/sys/kernel/tainted
+	if [ "$tainted_now" -eq 0 ];then
+		echo "PASS: queue program exiting while packets queued"
+	else
+		echo "TAINT: queue program exiting while packets queued"
+		ret=1
+	fi
+}
+
 ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -413,5 +449,6 @@ test_tcp_localhost
 test_tcp_localhost_connectclose
 test_tcp_localhost_requeue
 test_icmp_vrf
+test_queue_removal
 
 exit $ret
-- 
2.43.2


             reply	other threads:[~2024-05-14 10:45 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-14 10:31 Florian Westphal [this message]
2024-05-14 11:34 ` [PATCH nf] netfilter: nfnetlink_queue: fix rcu splat on program exit Pablo Neira Ayuso
2024-05-14 11:43   ` Florian Westphal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240514103133.2784-1-fw@strlen.de \
    --to=fw@strlen.de \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=yiche@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.