Netdev List
 help / color / mirror / Atom feed
* [PATCH bpf] bpf, sockmap: keep sk_msg copy bitmap in sync
@ 2026-05-20 11:23 Junxi Qian
  2026-05-20 12:24 ` Jiayuan Chen
  0 siblings, 1 reply; 2+ messages in thread
From: Junxi Qian @ 2026-05-20 11:23 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	John Fastabend, Stanislav Fomichev, David S . Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

The sk_msg scatterlist ring stores the actual page descriptors in
sg.data[] and stores the copy-required state for each slot in the
parallel sg.copy bitmap.  bpf_msg_pull_data() trusts this bitmap to
know whether the current slot can be exposed for direct packet access or
whether it has to be copied into a private page first.

Several SK_MSG helpers move or split sg.data[] entries without moving
the matching sg.copy bit.  In particular, bpf_msg_push_data() can split
a copy-marked entry and place the tail in a new slot with a stale clear
copy bit.  A later bpf_msg_pull_data() can then skip the private copy
and expose a direct writable pointer to the shared page.

Keep sg.copy synchronized whenever these helpers move, split, replace or
remove scatterlist entries.  Clear the bit for newly allocated private
pages and preserve it for descriptors that still refer to the original
shared backing page.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Fixes: 6fff607e2f14 ("bpf: sk_msg program helper bpf_msg_push_data")
Fixes: 7246d8ed4dcc ("bpf: helper to pop data from messages")
Reported-by: Junxi Qian <qjx1298677004@gmail.com>
Reported-by: Qi Tang <tpluszz77@gmail.com>
Signed-off-by: Junxi Qian <qjx1298677004@gmail.com>
Cc: stable@vger.kernel.org
---
 net/core/filter.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index 5fa9189eb..2fc21b87c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2724,11 +2724,13 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
 		poffset += len;
 		sge->length = 0;
 		put_page(sg_page(sge));
+		__clear_bit(i, msg->sg.copy);
 
 		sk_msg_iter_var_next(i);
 	} while (i != last_sge);
 
 	sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
+	__clear_bit(first_sge, msg->sg.copy);
 
 	/* To repair sg ring we need to shift entries. If we only
 	 * had a single entry though we can just replace it and
@@ -2754,9 +2756,11 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
 			break;
 
 		msg->sg.data[i] = msg->sg.data[move_from];
+		assign_bit(i, msg->sg.copy, test_bit(move_from, msg->sg.copy));
 		msg->sg.data[move_from].length = 0;
 		msg->sg.data[move_from].page_link = 0;
 		msg->sg.data[move_from].offset = 0;
+		__clear_bit(move_from, msg->sg.copy);
 		sk_msg_iter_var_next(i);
 	} while (1);
 
@@ -2784,6 +2788,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 	   u32, len, u64, flags)
 {
 	struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
+	bool sge_copy, nsge_copy, nnsge_copy, rsge_copy = false;
 	u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
 	u8 *raw, *to, *from;
 	struct page *page;
@@ -2857,6 +2862,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 			sk_msg_iter_var_prev(i);
 		psge = sk_msg_elem(msg, i);
 		rsge = sk_msg_elem_cpy(msg, i);
+		rsge_copy = test_bit(i, msg->sg.copy);
 
 		psge->length = start - offset;
 		rsge.length -= psge->length;
@@ -2881,24 +2887,32 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 
 	/* Shift one or two slots as needed */
 	sge = sk_msg_elem_cpy(msg, new);
+	sge_copy = test_bit(new, msg->sg.copy);
 	sg_unmark_end(&sge);
 
 	nsge = sk_msg_elem_cpy(msg, i);
+	nsge_copy = test_bit(i, msg->sg.copy);
 	if (rsge.length) {
 		sk_msg_iter_var_next(i);
 		nnsge = sk_msg_elem_cpy(msg, i);
+		nnsge_copy = test_bit(i, msg->sg.copy);
 		sk_msg_iter_next(msg, end);
 	}
 
 	while (i != msg->sg.end) {
 		msg->sg.data[i] = sge;
+		assign_bit(i, msg->sg.copy, sge_copy);
 		sge = nsge;
+		sge_copy = nsge_copy;
 		sk_msg_iter_var_next(i);
 		if (rsge.length) {
 			nsge = nnsge;
+			nsge_copy = nnsge_copy;
 			nnsge = sk_msg_elem_cpy(msg, i);
+			nnsge_copy = test_bit(i, msg->sg.copy);
 		} else {
 			nsge = sk_msg_elem_cpy(msg, i);
+			nsge_copy = test_bit(i, msg->sg.copy);
 		}
 	}
 
@@ -2912,6 +2926,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 		get_page(sg_page(&rsge));
 		sk_msg_iter_var_next(new);
 		msg->sg.data[new] = rsge;
+		assign_bit(new, msg->sg.copy, rsge_copy);
 	}
 
 	sk_msg_reset_curr(msg);
@@ -2939,25 +2954,33 @@ static void sk_msg_shift_left(struct sk_msg *msg, int i)
 		prev = i;
 		sk_msg_iter_var_next(i);
 		msg->sg.data[prev] = msg->sg.data[i];
+		assign_bit(prev, msg->sg.copy, test_bit(i, msg->sg.copy));
 	} while (i != msg->sg.end);
 
 	sk_msg_iter_prev(msg, end);
+	__clear_bit(msg->sg.end, msg->sg.copy);
 }
 
 static void sk_msg_shift_right(struct sk_msg *msg, int i)
 {
 	struct scatterlist tmp, sge;
+	bool tmp_copy, copy;
 
 	sk_msg_iter_next(msg, end);
 	sge = sk_msg_elem_cpy(msg, i);
+	copy = test_bit(i, msg->sg.copy);
 	sk_msg_iter_var_next(i);
 	tmp = sk_msg_elem_cpy(msg, i);
+	tmp_copy = test_bit(i, msg->sg.copy);
 
 	while (i != msg->sg.end) {
 		msg->sg.data[i] = sge;
+		assign_bit(i, msg->sg.copy, copy);
 		sk_msg_iter_var_next(i);
 		sge = tmp;
+		copy = tmp_copy;
 		tmp = sk_msg_elem_cpy(msg, i);
+		tmp_copy = test_bit(i, msg->sg.copy);
 	}
 }
 
@@ -3015,6 +3038,8 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
 	 */
 	if (start != offset) {
 		struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
+		u32 sge_idx = i;
+		bool copy = test_bit(i, msg->sg.copy);
 		int a = start - offset;
 		int b = sge->length - pop - a;
 
@@ -3029,6 +3054,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
 				sg_set_page(nsge,
 					    sg_page(sge),
 					    b, sge->offset + pop + a);
+				assign_bit(i, msg->sg.copy, copy);
 			} else {
 				struct page *page, *orig;
 				u8 *to, *from;
@@ -3045,6 +3071,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
 				memcpy(to, from, a);
 				memcpy(to + a, from + a + pop, b);
 				sg_set_page(sge, page, a + b, 0);
+				__clear_bit(sge_idx, msg->sg.copy);
 				put_page(orig);
 			}
 			pop = 0;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH bpf] bpf, sockmap: keep sk_msg copy bitmap in sync
  2026-05-20 11:23 [PATCH bpf] bpf, sockmap: keep sk_msg copy bitmap in sync Junxi Qian
@ 2026-05-20 12:24 ` Jiayuan Chen
  0 siblings, 0 replies; 2+ messages in thread
From: Jiayuan Chen @ 2026-05-20 12:24 UTC (permalink / raw)
  To: Junxi Qian, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	John Fastabend, Stanislav Fomichev, David S . Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni


On 5/20/26 7:23 PM, Junxi Qian wrote:
> The sk_msg scatterlist ring stores the actual page descriptors in
> sg.data[] and stores the copy-required state for each slot in the
> parallel sg.copy bitmap.  bpf_msg_pull_data() trusts this bitmap to
> know whether the current slot can be exposed for direct packet access or
> whether it has to be copied into a private page first.
>
> Several SK_MSG helpers move or split sg.data[] entries without moving
> the matching sg.copy bit.  In particular, bpf_msg_push_data() can split
> a copy-marked entry and place the tail in a new slot with a stale clear
> copy bit.  A later bpf_msg_pull_data() can then skip the private copy
> and expose a direct writable pointer to the shared page.
>
> Keep sg.copy synchronized whenever these helpers move, split, replace or
> remove scatterlist entries.  Clear the bit for newly allocated private
> pages and preserve it for descriptors that still refer to the original
> shared backing page.
>
> Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
> Fixes: 6fff607e2f14 ("bpf: sk_msg program helper bpf_msg_push_data")
> Fixes: 7246d8ed4dcc ("bpf: helper to pop data from messages")
> Reported-by: Junxi Qian <qjx1298677004@gmail.com>
> Reported-by: Qi Tang <tpluszz77@gmail.com>
> Signed-off-by: Junxi Qian <qjx1298677004@gmail.com>
> Cc: stable@vger.kernel.org
>

Thanks for the patch. However, this issue is already being addressed in 
another patch.

Please see:
https://lore.kernel.org/bpf/20260517121626.406516-1-rollkingzzc@gmail.com/
https://lore.kernel.org/bpf/20260520102715.3033936-1-rollkingzzc@gmail.com/
---
pw-bot: cr


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-05-20 12:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-20 11:23 [PATCH bpf] bpf, sockmap: keep sk_msg copy bitmap in sync Junxi Qian
2026-05-20 12:24 ` Jiayuan Chen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox