BPF List
 help / color / mirror / Atom feed
* [Bug Report] Packet drops after trimming skb using bpf_skb_adjust_room
@ 2024-04-04  0:22 Kumar Kartikeya Dwivedi
  2024-04-05  4:52 ` John Fastabend
  0 siblings, 1 reply; 3+ messages in thread
From: Kumar Kartikeya Dwivedi @ 2024-04-04  0:22 UTC (permalink / raw)
  To: John Fastabend, bpf
  Cc: Kashyap Sanidhya, rishabh.iyer, Daniel Borkmann, Jakub Sitnicki

[-- Attachment #1: Type: text/plain, Size: 2403 bytes --]

Hello John,

For background, I have a sk_skb program where for certain flows, a
subset of request handling logic (on Rx) that is typically done in
user space is handed over to a BPF program for doing it in-kernel.

Since the protocol is over TCP, I'm using sk_skb programs to handle
these requests and generate responses from within the kernel. The user
space application will decide which flow should be added to the
sockmap on socket accept and then let it be handled in the kernel for
some of the request types.

However, when generating replies and trimming away extra bytes in the
packet by using bpf_skb_adjust_room, I see packet drops because the
strp_msg->full_len is not updated unless tls_sw_has_ctx_rx is true for
the socket. Removing that condition makes everything work, but I am
not sure that is the right fix.

My understanding so far is that in sk_psock_backlog,
sk_psock_handle_skb returns an error since the stm->full_len is not
correct (and passed into it as len), and SK_PSOCK_TX_ENABLED gets
cleared due to this error. Then, on the redirect side
(sk_psock_skb_redirect), it does sock_drop because the test for the
SK_PSOCK_TX_ENABLED flag fails, which does kfree_skb.

I have attached a patch with a selftest (skb_adjust_room) that you can
run to verify the problem. It should basically hang on the second read
as the packet would be dropped by the kernel. The first read goes
through fine. Applying the diff below allows the selftest to pass.

I am not familiar with the code to know whether the fix below is
correct, but it seems to resolve the problem for me (and I carried it
for a while and ran my stuff on it until I got around to reporting
this now).

In case gmail screws up the formatting, I'm just removing the
tls_sw_has_ctx_rx(skb->sk) conditional on the strp_msg->full_len
update in sk_skb_adjust_room.

diff --git a/net/core/filter.c b/net/core/filter.c
index 524adf1fa6d0..e3009d0f3352 100644

--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3622,11 +3622,7 @@ BPF_CALL_4(sk_skb_adjust_room, struct sk_buff
*, skb, s32, len_diff,

                        return -ENOMEM;

                __skb_pull(skb, len_diff_abs);

        }

-       if (tls_sw_has_ctx_rx(skb->sk)) {
-               struct strp_msg *rxm = strp_msg(skb);
-
-               rxm->full_len += len_diff;
-       }
+       strp_msg(skb)->full_len += len_diff;
        return ret;
 }

--

Thanks!

[-- Attachment #2: test.patch --]
[-- Type: application/octet-stream, Size: 4558 bytes --]

From eea2d1340f991b87ce65952184106bb93d3f9dda Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 4 Apr 2024 00:12:50 +0000
Subject: [PATCH] selftests/bpf: Test case to verify bpf_skb_adjust_room fix

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 .../bpf/prog_tests/skb_adjust_room.c          | 78 +++++++++++++++++++
 .../selftests/bpf/progs/skb_adjust_room.c     | 46 +++++++++++
 2 files changed, 124 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/skb_adjust_room.c
 create mode 100644 tools/testing/selftests/bpf/progs/skb_adjust_room.c

diff --git a/tools/testing/selftests/bpf/prog_tests/skb_adjust_room.c b/tools/testing/selftests/bpf/prog_tests/skb_adjust_room.c
new file mode 100644
index 000000000000..d13b9ac5dc1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_adjust_room.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "skb_adjust_room.skel.h"
+
+void test_skb_adjust_room(void)
+{
+	struct skb_adjust_room *skel;
+	int sk[3] = {-1, -1, -1};
+	struct sockaddr_in serv;
+	char buf[8];
+	int err;
+
+	skel = skb_adjust_room__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skb_adjust_room__open_and_load"))
+		return;
+	err = bpf_prog_attach(bpf_program__fd(skel->progs.prog_skb_parser), bpf_map__fd(skel->maps.sockmap), BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach parser"))
+		return;
+	err = bpf_prog_attach(bpf_program__fd(skel->progs.prog_skb_verdict), bpf_map__fd(skel->maps.sockmap), BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach verdict"))
+		return;
+	// Server
+	sk[0] = socket(AF_INET, SOCK_STREAM, 0);
+	if (!ASSERT_GE(sk[0], 0, "server sock"))
+		return;
+	memset(&serv, 0, sizeof(serv));
+	serv.sin_family = AF_INET;
+	serv.sin_addr.s_addr = htonl(INADDR_ANY);
+	serv.sin_port = htons(6969);
+
+	err = bind(sk[0], (struct sockaddr *)&serv, sizeof(serv));
+	if (!ASSERT_OK(err, "bind localhost:6969"))
+		goto end;
+	err = listen(sk[0], 1);
+	if (!ASSERT_OK(err, "listen with backlog = 1"))
+		goto end;
+
+	// Client
+	sk[1] = socket(AF_INET, SOCK_STREAM, 0);
+	if (!ASSERT_GE(sk[1], 0, "client sock"))
+		goto end;
+	memset(&serv, 0, sizeof(serv));
+	serv.sin_family = AF_INET;
+	serv.sin_addr.s_addr = inet_addr("127.0.0.1");
+	serv.sin_port = htons(6969);
+
+	err = connect(sk[1], (struct sockaddr *)&serv, sizeof(serv));
+	if (!ASSERT_OK(err, "connect localhost:6969"))
+		goto end;
+
+	sk[2] = accept(sk[0], NULL, NULL);
+	if (!ASSERT_GE(sk[2], 0, "accept sock"))
+		goto end;
+	err = bpf_map__update_elem(skel->maps.sockmap, &(int){0}, 4, sk + 2, 4, 0);
+	if (!ASSERT_OK(err, "add server sock to sockmap"))
+		goto end;
+
+	for (int i = 0; i < 10; i++) {
+		err = write(sk[1], "1234567", 8);
+		if (!ASSERT_EQ(err, 8, "write from client"))
+			goto end;
+		err = read(sk[1], buf, 3);
+		if (!ASSERT_EQ(err, 3, "read from client"))
+			goto end;
+	}
+	if (!ASSERT_EQ(strcmp(buf, "OK"), 0, "strcmp response"))
+		goto end;
+	// Done!
+end:
+	if (sk[0] >= 0)
+		close(sk[0]);
+	if (sk[1] >= 0)
+		close(sk[1]);
+	if (sk[2] >= 0)
+		close(sk[2]);
+	return;
+}
diff --git a/tools/testing/selftests/bpf/progs/skb_adjust_room.c b/tools/testing/selftests/bpf/progs/skb_adjust_room.c
new file mode 100644
index 000000000000..6de9ae97e45e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_adjust_room.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sockmap SEC(".maps");
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	void *data, *data_end;
+	int key = 0;
+
+	if (bpf_skb_pull_data(skb, skb->len)) {
+		bpf_printk("err len=%d\n", skb->len);
+		return SK_PASS;
+	}
+
+	data_end = (void *)(long)skb->data_end;
+	data = (void *)(long)skb->data;
+
+	if (data + 4 > data_end) {
+		bpf_printk("err len=%d data_end-data=%d\n", skb->len, data_end - data);
+		return SK_PASS;
+	}
+	char *buf = data;
+	bpf_printk("skb=0x%llx data: %c%c%c%c\n", (long long)skb, buf[0], buf[1], buf[2], buf[3]);
+	__builtin_memcpy(data, "OK", 3);
+	if (bpf_skb_adjust_room(skb, 3 - skb->len, 0, 0)) {
+		bpf_printk("err\n");
+		return SK_PASS;
+	}
+	return bpf_sk_redirect_map(skb, &sockmap, key, 0);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-04-05 22:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-04  0:22 [Bug Report] Packet drops after trimming skb using bpf_skb_adjust_room Kumar Kartikeya Dwivedi
2024-04-05  4:52 ` John Fastabend
2024-04-05 22:08   ` Kumar Kartikeya Dwivedi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox