Netdev List

Netdev List
 help / color / mirror / Atom feed

* [RFCv2 bpf-next 1/7] bpf: xfrm: Add bpf_xdp_get_xfrm_state() kfunc
From: Daniel Xu @ 2023-11-01 21:57 UTC (permalink / raw)
  To: kuba, hawk, edumazet, steffen.klassert, daniel, Herbert Xu, ast,
	john.fastabend, pabeni, davem, antony.antony
  Cc: linux-kernel, netdev, bpf, devel
In-Reply-To: <cover.1698875025.git.dxu@dxuuu.xyz>

This commit adds an unstable kfunc helper to access internal xfrm_state
associated with an SA. This is intended to be used for the upcoming
IPsec pcpu work to assign special pcpu SAs to a particular CPU. In other
words: for custom software RSS.

That being said, the function that this kfunc wraps is fairly generic
and used for a lot of xfrm tasks. I'm sure people will find uses
elsewhere over time.

Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 include/net/xfrm.h        |   9 ++++
 net/xfrm/Makefile         |   1 +
 net/xfrm/xfrm_policy.c    |   2 +
 net/xfrm/xfrm_state_bpf.c | 105 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 117 insertions(+)
 create mode 100644 net/xfrm/xfrm_state_bpf.c

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c9bb0f892f55..1d107241b901 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2190,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void)
 
 #endif
 
+#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF)
+int register_xfrm_state_bpf(void);
+#else
+static inline int register_xfrm_state_bpf(void)
+{
+	return 0;
+}
+#endif
+
 #endif	/* _NET_XFRM_H */
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index cd47f88921f5..547cec77ba03 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
 obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
 obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
+obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c13dc3ef7910..1b7e75159727 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4218,6 +4218,8 @@ void __init xfrm_init(void)
 #ifdef CONFIG_XFRM_ESPINTCP
 	espintcp_init();
 #endif
+
+	register_xfrm_state_bpf();
 }
 
 #ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
new file mode 100644
index 000000000000..4aaac134b97a
--- /dev/null
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable XFRM state BPF helpers.
+ *
+ * Note that it is allowed to break compatibility for these functions since the
+ * interface they are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <net/xdp.h>
+#include <net/xfrm.h>
+
+/* bpf_xfrm_state_opts - Options for XFRM state lookup helpers
+ *
+ * Members:
+ * @error      - Out parameter, set for any errors encountered
+ *		 Values:
+ *		   -EINVAL - netns_id is less than -1
+ *		   -EINVAL - Passed NULL for opts
+ *		   -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ
+ *		   -ENONET - No network namespace found for netns_id
+ * @netns_id	- Specify the network namespace for lookup
+ *		 Values:
+ *		   BPF_F_CURRENT_NETNS (-1)
+ *		     Use namespace associated with ctx
+ *		   [0, S32_MAX]
+ *		     Network Namespace ID
+ * @mark	- XFRM mark to match on
+ * @daddr	- Destination address to match on
+ * @spi		- Security parameter index to match on
+ * @proto	- L3 protocol to match on
+ * @family	- L3 protocol family to match on
+ */
+struct bpf_xfrm_state_opts {
+	s32 error;
+	s32 netns_id;
+	u32 mark;
+	xfrm_address_t daddr;
+	__be32 spi;
+	u8 proto;
+	u16 family;
+};
+
+enum {
+	BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts),
+};
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in xfrm_state BTF");
+
+/* bpf_xdp_get_xfrm_state - Get XFRM state
+ *
+ * Parameters:
+ * @ctx 	- Pointer to ctx (xdp_md) in XDP program
+ *		    Cannot be NULL
+ * @opts	- Options for lookup (documented above)
+ *		    Cannot be NULL
+ * @opts__sz	- Length of the bpf_xfrm_state_opts structure
+ *		    Must be BPF_XFRM_STATE_OPTS_SZ
+ */
+__bpf_kfunc struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz)
+{
+	struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+	struct net *net = dev_net(xdp->rxq->dev);
+
+	if (!opts || opts__sz != BPF_XFRM_STATE_OPTS_SZ) {
+		opts->error = -EINVAL;
+		return NULL;
+	}
+
+	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) {
+		opts->error = -EINVAL;
+		return NULL;
+	}
+
+	if (opts->netns_id >= 0) {
+		net = get_net_ns_by_id(net, opts->netns_id);
+		if (unlikely(!net)) {
+			opts->error = -ENONET;
+			return NULL;
+		}
+	}
+
+	return xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi,
+				 opts->proto, opts->family);
+}
+
+__diag_pop()
+
+BTF_SET8_START(xfrm_state_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
+BTF_SET8_END(xfrm_state_kfunc_set)
+
+static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &xfrm_state_kfunc_set,
+};
+
+int __init register_xfrm_state_bpf(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+					 &xfrm_state_xdp_kfunc_set);
+}
-- 
2.42.0


^ permalink raw reply related

* [RFCv2 bpf-next 2/7] bpf: xfrm: Add bpf_xdp_xfrm_state_release() kfunc
From: Daniel Xu @ 2023-11-01 21:57 UTC (permalink / raw)
  To: kuba, hawk, edumazet, steffen.klassert, daniel, Herbert Xu, ast,
	john.fastabend, pabeni, davem, antony.antony
  Cc: netdev, linux-kernel, bpf, devel
In-Reply-To: <cover.1698875025.git.dxu@dxuuu.xyz>

This kfunc releases a previously acquired xfrm_state from
bpf_xdp_get_xfrm_state().

Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 net/xfrm/xfrm_state_bpf.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
index 4aaac134b97a..386167c86767 100644
--- a/net/xfrm/xfrm_state_bpf.c
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -87,10 +87,26 @@ bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32
 				 opts->proto, opts->family);
 }
 
+/* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @x		- Pointer to referenced xfrm_state object, obtained using
+ *		  bpf_xdp_get_xfrm_state.
+ */
+__bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
+{
+	xfrm_state_put(x);
+}
+
 __diag_pop()
 
 BTF_SET8_START(xfrm_state_kfunc_set)
 BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
 BTF_SET8_END(xfrm_state_kfunc_set)
 
 static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
-- 
2.42.0


^ permalink raw reply related

* [RFCv2 bpf-next 7/7] bpf: xfrm: Add selftest for bpf_xdp_get_xfrm_state()
From: Daniel Xu @ 2023-11-01 21:57 UTC (permalink / raw)
  To: shuah, kuba, hawk, daniel, ast, andrii, john.fastabend, davem,
	steffen.klassert, antony.antony
  Cc: martin.lau, song, yonghong.song, kpsingh, sdf, haoluo, jolsa,
	mykolal, bpf, linux-kselftest, linux-kernel, netdev, devel
In-Reply-To: <cover.1698875025.git.dxu@dxuuu.xyz>

This commit extends test_tunnel selftest to test the new XDP xfrm state
lookup kfunc.

Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 .../selftests/bpf/progs/test_tunnel_kern.c    | 49 +++++++++++++++++++
 tools/testing/selftests/bpf/test_tunnel.sh    | 12 +++--
 2 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index ec7e04e012ae..17bf9ce28460 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -35,6 +35,10 @@ int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
 			  struct bpf_fou_encap *encap, int type) __ksym;
 int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
 			  struct bpf_fou_encap *encap) __ksym;
+struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts,
+		       u32 opts__sz) __ksym;
+void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -948,4 +952,49 @@ int xfrm_get_state(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+SEC("xdp")
+int xfrm_get_state_xdp(struct xdp_md *xdp)
+{
+	struct bpf_xfrm_state_opts opts = {};
+	struct xfrm_state *x = NULL;
+	struct ip_esp_hdr *esph;
+	struct bpf_dynptr ptr;
+	u8 esph_buf[8] = {};
+	u8 iph_buf[20] = {};
+	struct iphdr *iph;
+	u32 off;
+
+	if (bpf_dynptr_from_xdp(xdp, 0, &ptr))
+		goto out;
+
+	off = sizeof(struct ethhdr);
+	iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf));
+	if (!iph || iph->protocol != IPPROTO_ESP)
+		goto out;
+
+	off += sizeof(struct iphdr);
+	esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf));
+	if (!esph)
+		goto out;
+
+	opts.netns_id = BPF_F_CURRENT_NETNS,
+	opts.daddr.a4 = iph->daddr;
+	opts.spi = esph->spi;
+	opts.proto = IPPROTO_ESP;
+	opts.family = AF_INET;
+
+	x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts));
+	if (!x || opts.error)
+		goto out;
+
+	if (!x->replay_esn)
+		goto out;
+
+	bpf_printk("replay-window %d\n", x->replay_esn->replay_window);
+out:
+	if (x)
+		bpf_xdp_xfrm_state_release(x);
+	return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index dd3c79129e87..17d263681c71 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -528,7 +528,7 @@ setup_xfrm_tunnel()
 	# at_ns0 -> root
 	ip netns exec at_ns0 \
 		ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
-			spi $spi_in_to_out reqid 1 mode tunnel \
+			spi $spi_in_to_out reqid 1 mode tunnel replay-window 42 \
 			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
 	ip netns exec at_ns0 \
 		ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
@@ -537,7 +537,7 @@ setup_xfrm_tunnel()
 	# root -> at_ns0
 	ip netns exec at_ns0 \
 		ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
-			spi $spi_out_to_in reqid 2 mode tunnel \
+			spi $spi_out_to_in reqid 2 mode tunnel replay-window 42 \
 			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
 	ip netns exec at_ns0 \
 		ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
@@ -553,14 +553,14 @@ setup_xfrm_tunnel()
 	# root namespace
 	# at_ns0 -> root
 	ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
-		spi $spi_in_to_out reqid 1 mode tunnel \
+		spi $spi_in_to_out reqid 1 mode tunnel replay-window 42 \
 		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
 	ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
 		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
 		mode tunnel
 	# root -> at_ns0
 	ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
-		spi $spi_out_to_in reqid 2 mode tunnel \
+		spi $spi_out_to_in reqid 2 mode tunnel replay-window 42 \
 		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
 	ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
 		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
@@ -585,6 +585,8 @@ test_xfrm_tunnel()
 	tc qdisc add dev veth1 clsact
 	tc filter add dev veth1 proto ip ingress bpf da object-pinned \
 		${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
+	ip link set dev veth1 xdpdrv pinned \
+		${BPF_PIN_TUNNEL_DIR}/xfrm_get_state_xdp
 	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
 	sleep 1
 	grep "reqid 1" ${TRACE}
@@ -593,6 +595,8 @@ test_xfrm_tunnel()
 	check_err $?
 	grep "remote ip 0xac100164" ${TRACE}
 	check_err $?
+	grep "replay-window 42" ${TRACE}
+	check_err $?
 	cleanup
 
 	if [ $ret -ne 0 ]; then
-- 
2.42.0


^ permalink raw reply related

* Re: [PATCH] net: phy: broadcom: Wire suspend/resume for BCM54612E
From: Andrew Lunn @ 2023-11-01 22:06 UTC (permalink / raw)
  To: Marco von Rosenberg
  Cc: Florian Fainelli, Broadcom internal kernel review list,
	Heiner Kallweit, Russell King, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, netdev, linux-kernel
In-Reply-To: <5414570.Sb9uPGUboI@5cd116mnfx>

On Wed, Nov 01, 2023 at 10:42:52PM +0100, Marco von Rosenberg wrote:
> On Tuesday, October 31, 2023 1:31:11 AM CET Andrew Lunn wrote:
> > Are we talking about a device which as been suspended? The PHY has
> > been left running because there is no suspend callback? Something then
> > triggers a resume. The bootloader then suspends the active PHY? Linux
> > then boots, detects its a resume, so does not touch the hardware
> > because there is no resume callback? The suspended PHY is then
> > useless.
> 
> Hi Andrew,
> 
> thanks for your feedback. I guess a bit of context is missing here. The issue 
> has nothing to do with an ordinary suspension of the OS. The main point is 
> that on initial power-up, the bootloader suspends the PHY before booting 
> Linux. With a resume callback defined, Linux would call it on boot and make the 
> PHY usable.

Ah, so you rely on phy_attach_direct() calling phy_resume(phydev).

This seems an odd way to solve the problem. It was not Linux which
suspend the PHY, so using resume is asymmetric.

I think soft_reset() or config_init() should be taking the PHY out of
suspend.

	Andrew

^ permalink raw reply

* Re: [PATCH] rxrpc_find_service_conn_rcu: use read_seqbegin() rather than read_seqbegin_or_lock()
From: Oleg Nesterov @ 2023-11-01 22:15 UTC (permalink / raw)
  To: David Howells
  Cc: Marc Dionne, Alexander Viro, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Chuck Lever, linux-afs, netdev,
	linux-kernel
In-Reply-To: <1959032.1698873608@warthog.procyon.org.uk>

On 11/01, David Howells wrote:
>
> However, I think just changing all of these to always-lockless isn't
> necessarily the most optimal way.

Yes, but so far I am trying to change the users which never take the
lock for writing, so this patch doesn't change the current behaviour.

> I wonder if struct seqlock would make more sense with an rwlock rather than a
> spinlock.  As it is, it does an exclusive spinlock for the readpath which is
> kind of overkill.

Heh. Please see

	[PATCH 4/5] seqlock: introduce read_seqcount_begin_or_lock() and friends
	https://lore.kernel.org/all/20230913155005.GA26252@redhat.com/

I am going to return to this later.

Oleg.


^ permalink raw reply

* Re: [syzbot] [net?] KASAN: slab-use-after-free Read in ptp_release
From: syzbot @ 2023-11-01 22:21 UTC (permalink / raw)
  To: davem, linux-kernel, netdev, reibax, richardcochran,
	syzkaller-bugs
In-Reply-To: <000000000000ffc87a06086172a0@google.com>

syzbot has bisected this issue to:

commit 8f5de6fb245326704f37d91780b9a10253a8a100
Author: Xabier Marquiegui <reibax@gmail.com>
Date:   Wed Oct 11 22:39:55 2023 +0000

    ptp: support multiple timestamp event readers

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=1019f36f680000
start commit:   89ed67ef126c Merge tag 'net-next-6.7' of git://git.kernel...
git tree:       upstream
final oops:     https://syzkaller.appspot.com/x/report.txt?x=1219f36f680000
console output: https://syzkaller.appspot.com/x/log.txt?x=1419f36f680000
kernel config:  https://syzkaller.appspot.com/x/.config?x=6e3b1d98cf5a2cca
dashboard link: https://syzkaller.appspot.com/bug?extid=8a676a50d4eee2f21539
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=13dd173b680000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=16ce0840e80000

Reported-by: syzbot+8a676a50d4eee2f21539@syzkaller.appspotmail.com
Fixes: 8f5de6fb2453 ("ptp: support multiple timestamp event readers")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

^ permalink raw reply

* Re: Does anyone use Appletalk?
From: Arnd Bergmann @ 2023-11-01 22:29 UTC (permalink / raw)
  To: Dan Williams, John Paul Adrian Glaubitz, Geert Uytterhoeven
  Cc: linux-m68k, Jakub Kicinski, Netdev
In-Reply-To: <79b7f88e3dd6536fe69c63ed3b4cc1f2c551ce8d.camel@redhat.com>

On Wed, Nov 1, 2023, at 21:27, Dan Williams wrote:
> On Wed, 2023-11-01 at 13:26 +0100, John Paul Adrian Glaubitz wrote:
>> Hi Geert,
>> 
>> On Wed, 2023-11-01 at 13:19 +0100, Geert Uytterhoeven wrote:
>> > > Isn't that a bit late?
>> > 
>> > It can always be reverted...
>> 
>> Sure, but I'd rather see such discussions before merging the removal
>> patch. Best would have been to reach out to the netatalk project, for
>> example and ask [1]. They just released version 3.1.18 of the
>> netatalk
>> server in October 2023.

I think you mean netatalk 2.2 for appletalk support, as the 3.x
versions only implement AFP over IP, with no localtalk/appletalk
support.

>> It's an incredibly cool project because it allows you to replace the
>> expensive Apple TimeMachine hardware with a cheap Raspberry Pi ;-).
>
> But... Time Machine debuted with 10.5 and AppleTalk got removed in
> 10.6; did the actual TimeCapsules ever support AppleTalk, or were they
> always TCP/IP-based?
>
> (also TimeMachine-capable Airport Extremes [A1354] are like $15 on
> eBay; that's cheaper than a Raspberry Pi)
>
> This patch only removes the Linux-side ipddp driver (eg MacIP) so if
> Time Capsules never supported AppleTalk, this patch is unrelated to
> TimeMachine.

If we had not removed all localtalk support already, ipddp
might have been used to bridge between a pre-ethernet mac
running macip and an IP based AFP server (netatalk or time machine).
Without localtalk support, that is not all that interesting of
course.

> What this patch *may* break is Linux as a MacIP gateway, allowing
> AppleTalk-only machines to talk TCP/IP to systems. But that's like
> what, the 128/512/Plus and PowerBook Duo/1xx? Everything else had a
> PDS/NuBus slot or onboard Ethernet and could do native
> MacTCP/OpenTransport...

As far as I can tell, https://github.com/jasonking3/macipgw
should work fine as a replacement for ipddp.

     Arnd

^ permalink raw reply

* Re: [PATCH] rxrpc_find_service_conn_rcu: use read_seqbegin() rather than read_seqbegin_or_lock()
From: Oleg Nesterov @ 2023-11-01 22:29 UTC (permalink / raw)
  To: David Howells
  Cc: Marc Dionne, Alexander Viro, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Chuck Lever, linux-afs, netdev,
	linux-kernel
In-Reply-To: <20231101221502.GE32034@redhat.com>

sorry for noise, but in case I wasn't clear...

On 11/01, Oleg Nesterov wrote:
>
> On 11/01, David Howells wrote:
> >
> > However, I think just changing all of these to always-lockless isn't
> > necessarily the most optimal way.
>
> Yes, but so far I am trying to change the users which never take the
> lock for writing, so this patch doesn't change the current behaviour.
>
> > I wonder if struct seqlock would make more sense with an rwlock rather than a
> > spinlock.  As it is, it does an exclusive spinlock for the readpath which is
> > kind of overkill.
>
> Heh. Please see
>
> 	[PATCH 4/5] seqlock: introduce read_seqcount_begin_or_lock() and friends
> 	https://lore.kernel.org/all/20230913155005.GA26252@redhat.com/
>

I meant, we already have seqcount_rwlock_t, but currently you can't do
something like read_seqbegin_or_lock(&seqcount_rwlock_t).

> I am going to return to this later.

Yes.

Oleg.


^ permalink raw reply

* Re: [PATCH net] macsec: Abort MACSec Rx offload datapath when skb is not marked with MACSec metadata
From: Sabrina Dubroca @ 2023-11-01 22:31 UTC (permalink / raw)
  To: Rahul Rameshbabu
  Cc: netdev, Jakub Kicinski, Paolo Abeni, Eric Dumazet,
	David S. Miller, Saeed Mahameed
In-Reply-To: <20231101200217.121789-1-rrameshbabu@nvidia.com>

2023-11-01, 13:02:17 -0700, Rahul Rameshbabu wrote:
> When MACSec is configured on an outer netdev, traffic received directly
> through the underlying netdev should not be processed by the MACSec Rx
> datapath. When using MACSec offload on an outer netdev, traffic with no
> metadata indicator in the skb is mistakenly considered as MACSec traffic
> and incorrectly handled in the handle_not_macsec function. Treat skbs with
> no metadata type as non-MACSec packets rather than assuming they are MACSec
> packets.

What about the other drivers? mlx5 is the only driver that sets md_dst
on its macsec skbs, so their offloaded packets just get dropped now?

-- 
Sabrina


^ permalink raw reply

* Re: Does anyone use Appletalk?
From: John Paul Adrian Glaubitz @ 2023-11-01 22:33 UTC (permalink / raw)
  To: Dan Williams, Geert Uytterhoeven
  Cc: linux-m68k, Arnd Bergmann, Jakub Kicinski, netdev
In-Reply-To: <79b7f88e3dd6536fe69c63ed3b4cc1f2c551ce8d.camel@redhat.com>

On Wed, 2023-11-01 at 15:27 -0500, Dan Williams wrote:
> But... Time Machine debuted with 10.5 and AppleTalk got removed in
> 10.6; did the actual TimeCapsules ever support AppleTalk, or were they
> always TCP/IP-based?

netatalk has two actively maintained versions, one for AppleTalk (2.2.x
series) and one for TCP/IP (3.x series). Both are still being developed
and supported [1].

> (also TimeMachine-capable Airport Extremes [A1354] are like $15 on
> eBay; that's cheaper than a Raspberry Pi)

I know that commercial entities don't have interest in legacy architectures
and protocols. But Linux isn't a commercial-only project so legacy applications
have a valid use case. Most people in the Linux community don't have a use case
for IBM mainframes, yet they aren't in sending patches to get s390 support removed.

I understand that sometimes old code needs to be dropped when it becomes
a burden which is why I also agreed to drop ia64 support since I have
heard complaints from multiple upstream projects and I also know that a
lot of stuff there is broken with no one willing to fix it.

But I don't understand the removal in this case. What particular burden
does a legacy networking protocol pose if it can be easily disabled at
compile time to reduce the attack surface?

> This patch only removes the Linux-side ipddp driver (eg MacIP) so if
> Time Capsules never supported AppleTalk, this patch is unrelated to
> TimeMachine.
> 
> What this patch *may* break is Linux as a MacIP gateway, allowing
> AppleTalk-only machines to talk TCP/IP to systems. But that's like
> what, the 128/512/Plus and PowerBook Duo/1xx? Everything else had a
> PDS/NuBus slot or onboard Ethernet and could do native
> MacTCP/OpenTransport...

Which is a valid use case for people from the retro-computing community
as can be seen from the netatalk description above. I don't think that
Arnd reached out to the netatalk project and asked whether the code
is still needed, did he?

Adrian

> [1] https://en.wikipedia.org/wiki/Netatalk

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer
`. `'   Physicist
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913

^ permalink raw reply

* Re: [PATCH] rxrpc_find_service_conn_rcu: use read_seqbegin() rather than read_seqbegin_or_lock()
From: Oleg Nesterov @ 2023-11-01 22:38 UTC (permalink / raw)
  To: David Howells
  Cc: Marc Dionne, Alexander Viro, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Chuck Lever, linux-afs, netdev,
	linux-kernel
In-Reply-To: <1959105.1698873750@warthog.procyon.org.uk>

On 11/01, David Howells wrote:
>
> Oleg Nesterov <oleg@redhat.com> wrote:
>
> > Just none of read_seqbegin_or_lock/need_seqretry/done_seqretry
> > helpers make any sense in this code.
>
> I disagree.  I think in at least a couple of cases I do want a locked second
> path

Sorry for confusion. I never said that the 2nd locked pass makes no sense.

My only point is that rxrpc_find_service_conn_rcu() (and more) use
read_seqbegin_or_lock() incorrectly. They can use read_seqbegin() and this
won't change the current behaviour.

So lets change these users first? Then we can discuss the possible changes
in include/linux/seqlock.h and (perhaps) update the users which actually
want the locking on the 2nd pass.

Oleg.

^ permalink raw reply

* Re: [GIT PULL] io_uring support for get/setsockopt
From: pr-tracker-bot @ 2023-11-01 22:47 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linus Torvalds, io-uring, netdev, Breno Leitao
In-Reply-To: <7a0893f0-bae8-4aee-9e05-7c81354fc829@kernel.dk>

The pull request you sent on Mon, 30 Oct 2023 08:36:04 -0600:

> git://git.kernel.dk/linux.git tags/for-6.7/io_uring-sockopt-2023-10-30

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/f5277ad1e9768dbd05b1ae8dcdba690215d8c5b7

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply

* Re: [PATCH] rxrpc_find_service_conn_rcu: use read_seqbegin() rather than read_seqbegin_or_lock()
From: Al Viro @ 2023-11-01 22:48 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: David Howells, Marc Dionne, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Chuck Lever, linux-afs, netdev,
	linux-kernel
In-Reply-To: <20231101215214.GD32034@redhat.com>

On Wed, Nov 01, 2023 at 10:52:15PM +0100, Oleg Nesterov wrote:

> > Why would you want to force that "switch to locked on the second pass" policy
> > on every possible caller?
> 
> Because this is what (I think) read_seqbegin_or_lock() is supposed to do.
> It should take the lock for writing if the lockless access failed. At least
> according to the documentation.

Not really - it's literally seqbegin or lock, depending upon what the caller
tells it...  IMO the mistake in docs is the insistence on using do-while
loop for its users.

Take a look at d_walk() and try to shoehorn that into your variant.  Especially
the D_WALK_NORETRY handling...

^ permalink raw reply

* Re: [PATCH bpf-next v3 1/2] bpf: add skcipher API support to TC/XDP programs
From: Vadim Fedorenko @ 2023-11-01 22:50 UTC (permalink / raw)
  To: Martin KaFai Lau, David S. Miller, Herbert Xu
  Cc: bpf, netdev, linux-crypto, Jakub Kicinski, Andrii Nakryiko,
	Alexei Starovoitov, Mykola Lysenko, Vadim Fedorenko
In-Reply-To: <dac97b74-5ff1-172b-9cd5-4cdcf07386ec@linux.dev>

On 01/11/2023 21:49, Martin KaFai Lau wrote:
> On 10/31/23 6:48 AM, Vadim Fedorenko wrote:
>> --- /dev/null
>> +++ b/kernel/bpf/crypto.c
>> @@ -0,0 +1,280 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/* Copyright (c) 2023 Meta, Inc */
>> +#include <linux/bpf.h>
>> +#include <linux/bpf_mem_alloc.h>
>> +#include <linux/btf.h>
>> +#include <linux/btf_ids.h>
>> +#include <linux/filter.h>
>> +#include <linux/scatterlist.h>
>> +#include <linux/skbuff.h>
>> +#include <crypto/skcipher.h>
>> +
>> +/**
>> + * struct bpf_crypto_skcipher_ctx - refcounted BPF sync skcipher 
>> context structure
>> + * @tfm:    The pointer to crypto_sync_skcipher struct.
>> + * @rcu:    The RCU head used to free the crypto context with RCU 
>> safety.
>> + * @usage:    Object reference counter. When the refcount goes to 0, the
>> + *        memory is released back to the BPF allocator, which provides
>> + *        RCU safety.
>> + */
>> +struct bpf_crypto_skcipher_ctx {
>> +    struct crypto_sync_skcipher *tfm;
>> +    struct rcu_head rcu;
>> +    refcount_t usage;
>> +};
>> +
>> +__diag_push();
>> +__diag_ignore_all("-Wmissing-prototypes",
>> +          "Global kfuncs as their definitions will be in BTF");
>> +
>> +static void *__bpf_dynptr_data_ptr(const struct bpf_dynptr_kern *ptr)
>> +{
>> +    enum bpf_dynptr_type type;
>> +
>> +    if (!ptr->data)
>> +        return NULL;
>> +
>> +    type = bpf_dynptr_get_type(ptr);
>> +
>> +    switch (type) {
>> +    case BPF_DYNPTR_TYPE_LOCAL:
>> +    case BPF_DYNPTR_TYPE_RINGBUF:
>> +        return ptr->data + ptr->offset;
>> +    case BPF_DYNPTR_TYPE_SKB:
>> +        return skb_pointer_if_linear(ptr->data, ptr->offset, 
>> __bpf_dynptr_size(ptr));
>> +    case BPF_DYNPTR_TYPE_XDP:
>> +    {
>> +        void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset, 
>> __bpf_dynptr_size(ptr));
> 
> I suspect what it is doing here (for skb and xdp in particular) is very 
> similar to bpf_dynptr_slice. Please check if bpf_dynptr_slice(ptr, 0, 
> NULL, sz) will work.
> 

Well, yes, it's simplified version of bpf_dynptr_slice. The problem is
that bpf_dynptr_slice bpf_kfunc which cannot be used in another
bpf_kfunc. Should I refactor the code to use it in both places? Like
create __bpf_dynptr_slice() which will be internal part of bpf_kfunc?

> 
>> +        if (!IS_ERR_OR_NULL(xdp_ptr))
>> +            return xdp_ptr;
>> +
>> +        return NULL;
>> +    }
>> +    default:
>> +        WARN_ONCE(true, "unknown dynptr type %d\n", type);
>> +        return NULL;
>> +    }
>> +}
>> +
>> +/**
>> + * bpf_crypto_skcipher_ctx_create() - Create a mutable BPF crypto 
>> context.
>> + *
>> + * Allocates a crypto context that can be used, acquired, and 
>> released by
>> + * a BPF program. The crypto context returned by this function must 
>> either
>> + * be embedded in a map as a kptr, or freed with 
>> bpf_crypto_skcipher_ctx_release().
>> + *
>> + * bpf_crypto_skcipher_ctx_create() allocates memory using the BPF 
>> memory
>> + * allocator, and will not block. It may return NULL if no memory is 
>> available.
>> + * @algo: bpf_dynptr which holds string representation of algorithm.
>> + * @key:  bpf_dynptr which holds cipher key to do crypto.
>> + */
>> +__bpf_kfunc struct bpf_crypto_skcipher_ctx *
>> +bpf_crypto_skcipher_ctx_create(const struct bpf_dynptr_kern *palgo,
> 
> Song's patch on __const_str should help on the palgo (which is a string) 
> also:
> https://lore.kernel.org/bpf/20231024235551.2769174-4-song@kernel.org/
> 

Got it, I'll update it.

>> +                   const struct bpf_dynptr_kern *pkey, int *err)
>> +{
>> +    struct bpf_crypto_skcipher_ctx *ctx;
>> +    char *algo;
>> +
>> +    if (__bpf_dynptr_size(palgo) > CRYPTO_MAX_ALG_NAME) {
>> +        *err = -EINVAL;
>> +        return NULL;
>> +    }
>> +
>> +    algo = __bpf_dynptr_data_ptr(palgo);
>> +
>> +    if (!crypto_has_skcipher(algo, CRYPTO_ALG_TYPE_SKCIPHER, 
>> CRYPTO_ALG_TYPE_MASK)) {
>> +        *err = -EOPNOTSUPP;
>> +        return NULL;
>> +    }
>> +
>> +    ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
>> +    if (!ctx) {
>> +        *err = -ENOMEM;
>> +        return NULL;
>> +    }
>> +
>> +    memset(ctx, 0, sizeof(*ctx));
> 
> nit. kzalloc()
> 
>> +
>> +    ctx->tfm = crypto_alloc_sync_skcipher(algo, 0, 0);
>> +    if (IS_ERR(ctx->tfm)) {
>> +        *err = PTR_ERR(ctx->tfm);
>> +        ctx->tfm = NULL;
>> +        goto err;
>> +    }
>> +
>> +    *err = crypto_sync_skcipher_setkey(ctx->tfm, 
>> __bpf_dynptr_data_ptr(pkey),
>> +                       __bpf_dynptr_size(pkey));
>> +    if (*err)
>> +        goto err;
>> +
>> +    refcount_set(&ctx->usage, 1);
>> +
>> +    return ctx;
>> +err:
>> +    if (ctx->tfm)
>> +        crypto_free_sync_skcipher(ctx->tfm);
>> +    kfree(ctx);
>> +
>> +    return NULL;
>> +}
> 
> [ ... ]
> 
>> +static int bpf_crypto_skcipher_crypt(struct crypto_sync_skcipher *tfm,
>> +                     const struct bpf_dynptr_kern *src,
>> +                     struct bpf_dynptr_kern *dst,
>> +                     const struct bpf_dynptr_kern *iv,
>> +                     bool decrypt)
>> +{
>> +    struct skcipher_request *req = NULL;
>> +    struct scatterlist sgin, sgout;
>> +    int err;
>> +
>> +    if (crypto_sync_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
>> +        return -EINVAL;
>> +
>> +    if (__bpf_dynptr_is_rdonly(dst))
>> +        return -EINVAL;
>> +
>> +    if (!__bpf_dynptr_size(dst) || !__bpf_dynptr_size(src))
>> +        return -EINVAL;
>> +
>> +    if (__bpf_dynptr_size(iv) != crypto_sync_skcipher_ivsize(tfm))
>> +        return -EINVAL;
>> +
>> +    req = skcipher_request_alloc(&tfm->base, GFP_ATOMIC);
> 
> Doing alloc per packet may kill performance. Is it possible to optimize 
> it somehow? What is the usual size of the req (e.g. the example in the 
> selftest)?
> 

In ktls code aead_request is allocated every time encryption is invoked, 
see tls_decrypt_sg(), apparently per skb. Doesn't look like performance
killer. For selftest it's only sizeof(struct skcipher_request).

>> +    if (!req)
>> +        return -ENOMEM;
>> +
>> +    sg_init_one(&sgin, __bpf_dynptr_data_ptr(src), 
>> __bpf_dynptr_size(src));
>> +    sg_init_one(&sgout, __bpf_dynptr_data_ptr(dst), 
>> __bpf_dynptr_size(dst));
>> +
>> +    skcipher_request_set_crypt(req, &sgin, &sgout, 
>> __bpf_dynptr_size(src),
>> +                   __bpf_dynptr_data_ptr(iv));
>> +
>> +    err = decrypt ? crypto_skcipher_decrypt(req) : 
>> crypto_skcipher_encrypt(req);
> 
> I am hitting this with the selftest in patch 2:
> 
> [   39.806675] =============================
> [   39.807243] WARNING: suspicious RCU usage
> [   39.807825] 6.6.0-rc7-02091-g17e023652cc1 #336 Tainted: G           O
> [   39.808798] -----------------------------
> [   39.809368] kernel/sched/core.c:10149 Illegal context switch in 
> RCU-bh read-side critical section!
> [   39.810589]
> [   39.810589] other info that might help us debug this:
> [   39.810589]
> [   39.811696]
> [   39.811696] rcu_scheduler_active = 2, debug_locks = 1
> [   39.812616] 2 locks held by test_progs/1769:
> [   39.813249]  #0: ffffffff84dce140 (rcu_read_lock){....}-{1:3}, at: 
> ip6_finish_output2+0x625/0x1b70
> [   39.814539]  #1: ffffffff84dce1a0 (rcu_read_lock_bh){....}-{1:3}, at: 
> __dev_queue_xmit+0x1df/0x2c40
> [   39.815813]
> [   39.815813] stack backtrace:
> [   39.816441] CPU: 1 PID: 1769 Comm: test_progs Tainted: G           O 
> 6.6.0-rc7-02091-g17e023652cc1 #336
> [   39.817774] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
> BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
> [   39.819312] Call Trace:
> [   39.819655]  <TASK>
> [   39.819967]  dump_stack_lvl+0x130/0x1d0
> [   39.822669]  dump_stack+0x14/0x20
> [   39.823136]  lockdep_rcu_suspicious+0x220/0x350
> [   39.823777]  __might_resched+0xe0/0x660
> [   39.827915]  __might_sleep+0x89/0xf0
> [   39.828423]  skcipher_walk_virt+0x55/0x120
> [   39.828990]  crypto_ecb_decrypt+0x159/0x310
> [   39.833846]  crypto_skcipher_decrypt+0xa0/0xd0
> [   39.834481]  bpf_crypto_skcipher_crypt+0x29a/0x3c0
> [   39.837100]  bpf_crypto_skcipher_decrypt+0x56/0x70
> [   39.837770]  bpf_prog_fa576505ab32d186_decrypt_sanity+0x180/0x185
> [   39.838627]  cls_bpf_classify+0x3b6/0xf80
> [   39.839807]  tcf_classify+0x2f4/0x550
> 

That's odd. skcipher_walk_virt() checks for SKCIPHER_WALK_SLEEP which
depends on CRYPTO_TFM_REQ_MAY_SLEEP of tfm, which shouldn't be set for
crypto_alloc_sync_skcipher(). I think we need some crypto@ folks to jump
in and explain.

David, Herbert could you please take a look at the patchset to confirm
that crypto API is used properly.

>> +
>> +    skcipher_request_free(req);
>> +
>> +    return err;
>> +}
>> +
> 
> 


^ permalink raw reply

* Re: [PATCH bpf-next v3 2/2] selftests: bpf: crypto skcipher algo selftests
From: Martin KaFai Lau @ 2023-11-01 22:53 UTC (permalink / raw)
  To: Vadim Fedorenko
  Cc: bpf, netdev, linux-crypto, Vadim Fedorenko, Jakub Kicinski,
	Andrii Nakryiko, Alexei Starovoitov, Mykola Lysenko
In-Reply-To: <20231031134900.1432945-2-vadfed@meta.com>

On 10/31/23 6:49 AM, Vadim Fedorenko wrote:
> Add simple tc hook selftests to show the way to work with new crypto
> BPF API. Some weird structre and map are added to setup program to make
> verifier happy about dynptr initialization from memory. Simple AES-ECB
> algo is used to demonstrate encryption and decryption of fixed size
> buffers.
> 
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
> v2 -> v3:
> - disable tests on s390 and aarch64 because of unknown Fatal exception
>    in sg_init_one
> v1 -> v2:
> - add CONFIG_CRYPTO_AES and CONFIG_CRYPTO_ECB to selftest build config
>    suggested by Daniel
> 
>   kernel/bpf/crypto.c                           |   5 +-
>   tools/testing/selftests/bpf/DENYLIST.aarch64  |   1 +
>   tools/testing/selftests/bpf/DENYLIST.s390x    |   1 +
>   tools/testing/selftests/bpf/config            |   3 +
>   .../selftests/bpf/prog_tests/crypto_sanity.c  | 129 +++++++++++++++
>   .../selftests/bpf/progs/crypto_common.h       | 103 ++++++++++++
>   .../selftests/bpf/progs/crypto_sanity.c       | 154 ++++++++++++++++++
>   7 files changed, 394 insertions(+), 2 deletions(-)
>   create mode 100644 tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
>   create mode 100644 tools/testing/selftests/bpf/progs/crypto_common.h
>   create mode 100644 tools/testing/selftests/bpf/progs/crypto_sanity.c
> 
> diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c
> index c2a0703934fc..4ee6193165ca 100644
> --- a/kernel/bpf/crypto.c
> +++ b/kernel/bpf/crypto.c
> @@ -65,8 +65,9 @@ static void *__bpf_dynptr_data_ptr(const struct bpf_dynptr_kern *ptr)
>    *
>    * bpf_crypto_skcipher_ctx_create() allocates memory using the BPF memory
>    * allocator, and will not block. It may return NULL if no memory is available.
> - * @algo: bpf_dynptr which holds string representation of algorithm.
> - * @key:  bpf_dynptr which holds cipher key to do crypto.
> + * @palgo: bpf_dynptr which holds string representation of algorithm.
> + * @pkey:  bpf_dynptr which holds cipher key to do crypto.
> + * @err:   integer to store error code when NULL is returned
>    */
>   __bpf_kfunc struct bpf_crypto_skcipher_ctx *
>   bpf_crypto_skcipher_ctx_create(const struct bpf_dynptr_kern *palgo,
> diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
> index 5c2cc7e8c5d0..72c7ef3e5872 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.aarch64
> +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
> @@ -1,5 +1,6 @@
>   bpf_cookie/multi_kprobe_attach_api               # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
>   bpf_cookie/multi_kprobe_link_api                 # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
> +crypto_sanity					 # sg_init_one has exception on aarch64

What is the exception? Is arm64 just lacking the support?

>   exceptions					 # JIT does not support calling kfunc bpf_throw: -524
>   fexit_sleep                                      # The test never returns. The remaining tests cannot start.
>   kprobe_multi_bench_attach                        # needs CONFIG_FPROBE
> diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
> index 1a63996c0304..8ab7485bedb1 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.s390x
> +++ b/tools/testing/selftests/bpf/DENYLIST.s390x
> @@ -1,5 +1,6 @@
>   # TEMPORARY
>   # Alphabetical order
> +crypto_sanity				 # sg_init_one has exception on s390					       (exceptions)
>   exceptions				 # JIT does not support calling kfunc bpf_throw				       (exceptions)
>   get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
>   stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
> diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
> index 02dd4409200e..48b570fd1752 100644
> --- a/tools/testing/selftests/bpf/config
> +++ b/tools/testing/selftests/bpf/config
> @@ -14,6 +14,9 @@ CONFIG_CGROUP_BPF=y
>   CONFIG_CRYPTO_HMAC=y
>   CONFIG_CRYPTO_SHA256=y
>   CONFIG_CRYPTO_USER_API_HASH=y
> +CONFIG_CRYPTO_SKCIPHER=y
> +CONFIG_CRYPTO_ECB=y
> +CONFIG_CRYPTO_AES=y
>   CONFIG_DEBUG_INFO=y
>   CONFIG_DEBUG_INFO_BTF=y
>   CONFIG_DEBUG_INFO_DWARF4=y
> diff --git a/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
> new file mode 100644
> index 000000000000..a43969da6d15
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
> @@ -0,0 +1,129 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */

s/2022/2023/ :)

> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <net/if.h>
> +#include <linux/in6.h>
> +
> +#include "test_progs.h"
> +#include "network_helpers.h"
> +#include "crypto_sanity.skel.h"
> +
> +#define NS_TEST "crypto_sanity_ns"
> +#define IPV6_IFACE_ADDR "face::1"
> +#define UDP_TEST_PORT 7777
> +static const char plain_text[] = "stringtoencrypt0";
> +static const char crypted_data[] = "\x5B\x59\x39\xEA\xD9\x7A\x2D\xAD\xA7\xE0\x43" \
> +				   "\x37\x8A\x77\x17\xB2";
> +
> +void test_crypto_sanity(void)
> +{
> +	LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS);
> +	LIBBPF_OPTS(bpf_tc_opts, tc_attach_enc);
> +	LIBBPF_OPTS(bpf_tc_opts, tc_attach_dec);
> +	LIBBPF_OPTS(bpf_test_run_opts, opts,
> +		    .data_in = crypted_data,
> +		    .data_size_in = sizeof(crypted_data),

This should not be needed now because the test_run is on a tracing program.

> +		    .repeat = 1,
> +	);
> +	struct nstoken *nstoken = NULL;
> +	struct crypto_sanity *skel;
> +	struct sockaddr_in6 addr;
> +	int sockfd, err, pfd;
> +	socklen_t addrlen;
> +
> +	skel = crypto_sanity__open();
> +	if (!ASSERT_OK_PTR(skel, "skel open"))
> +		return;
> +
> +	bpf_program__set_autoload(skel->progs.skb_crypto_setup, true);

Remove the '?' from SEC("?fentry.s/bpf_fentry_test1") should save this 
set_autoload call.

> +
> +	SYS(fail, "ip netns add %s", NS_TEST);
> +	SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
> +	SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
> +
> +	err = crypto_sanity__load(skel);
> +	if (!ASSERT_OK(err, "crypto_sanity__load"))
> +		goto fail;
> +
> +	nstoken = open_netns(NS_TEST);
> +	if (!ASSERT_OK_PTR(nstoken, "open_netns"))
> +		goto fail;
> +
> +	qdisc_hook.ifindex = if_nametoindex("lo");
> +	if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo"))
> +		goto fail;
> +
> +	err = crypto_sanity__attach(skel);
> +	if (!ASSERT_OK(err, "crypto_sanity__attach"))
> +		goto fail;
> +
> +	pfd = bpf_program__fd(skel->progs.skb_crypto_setup);
> +	if (!ASSERT_GT(pfd, 0, "skb_crypto_setup fd"))
> +		goto fail;
> +
> +	err = bpf_prog_test_run_opts(pfd, &opts);
> +	if (!ASSERT_OK(err, "skb_crypto_setup") ||
> +	    !ASSERT_OK(opts.retval, "skb_crypto_setup retval"))
> +		goto fail;
> +
> +	if (!ASSERT_OK(skel->bss->status, "skb_crypto_setup status"))
> +		goto fail;
> +
> +	err = bpf_tc_hook_create(&qdisc_hook);
> +	if (!ASSERT_OK(err, "create qdisc hook"))
> +		goto fail;
> +
> +	addrlen = sizeof(addr);
> +	err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, UDP_TEST_PORT,
> +			    (void *)&addr, &addrlen);
> +	if (!ASSERT_OK(err, "make_sockaddr"))
> +		goto fail;
> +
> +	tc_attach_dec.prog_fd = bpf_program__fd(skel->progs.decrypt_sanity);
> +	err = bpf_tc_attach(&qdisc_hook, &tc_attach_dec);
> +	if (!ASSERT_OK(err, "attach decrypt filter"))
> +		goto fail;
> +
> +	sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
> +	if (!ASSERT_NEQ(sockfd, -1, "decrypt socket"))
> +		goto fail;
> +	err = sendto(sockfd, crypted_data, 16, 0, (void *)&addr, addrlen);
> +	close(sockfd);
> +	if (!ASSERT_EQ(err, 16, "decrypt send"))
> +		goto fail;
> +
> +	bpf_tc_detach(&qdisc_hook, &tc_attach_dec);
> +	if (!ASSERT_OK(skel->bss->status, "decrypt status"))
> +		goto fail;
> +	if (!ASSERT_STRNEQ(skel->bss->dst, plain_text, sizeof(plain_text), "decrypt"))
> +		goto fail;
> +
> +	tc_attach_enc.prog_fd = bpf_program__fd(skel->progs.encrypt_sanity);
> +	err = bpf_tc_attach(&qdisc_hook, &tc_attach_enc);
> +	if (!ASSERT_OK(err, "attach encrypt filter"))
> +		goto fail;
> +
> +	sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
> +	if (!ASSERT_NEQ(sockfd, -1, "encrypt socket"))
> +		goto fail;
> +	err = sendto(sockfd, plain_text, 16, 0, (void *)&addr, addrlen);
> +	close(sockfd);
> +	if (!ASSERT_EQ(err, 16, "encrypt send"))
> +		goto fail;
> +
> +	bpf_tc_detach(&qdisc_hook, &tc_attach_enc);
> +	if (!ASSERT_OK(skel->bss->status, "encrypt status"))
> +		goto fail;
> +	if (!ASSERT_STRNEQ(skel->bss->dst, crypted_data, sizeof(crypted_data), "encrypt"))
> +		goto fail;
> +
> +fail:
> +	if (nstoken) {
> +		bpf_tc_hook_destroy(&qdisc_hook);
> +		close_netns(nstoken);
> +	}
> +	SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
> +	crypto_sanity__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/crypto_common.h b/tools/testing/selftests/bpf/progs/crypto_common.h
> new file mode 100644
> index 000000000000..584378bb6df8
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/crypto_common.h
> @@ -0,0 +1,103 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
> +
> +#ifndef _CRYPTO_COMMON_H
> +#define _CRYPTO_COMMON_H
> +
> +#include "errno.h"
> +#include <stdbool.h>
> +
> +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
> +private(CTX) static struct bpf_crypto_skcipher_ctx __kptr * global_crypto_ctx;

Add a test to show how it is used?

> +
> +struct bpf_crypto_skcipher_ctx *bpf_crypto_skcipher_ctx_create(const struct bpf_dynptr *algo,
> +							       const struct bpf_dynptr *key,
> +							       int *err) __ksym;
> +struct bpf_crypto_skcipher_ctx *bpf_crypto_skcipher_ctx_acquire(struct bpf_crypto_skcipher_ctx *ctx) __ksym;
> +void bpf_crypto_skcipher_ctx_release(struct bpf_crypto_skcipher_ctx *ctx) __ksym;

Same for acquire and release kfunc. Add test cases.

btw, does it have use cases to store the same bpf_crypto_skcipher_ctx in 
multiple maps?

> +int bpf_crypto_skcipher_encrypt(struct bpf_crypto_skcipher_ctx *ctx,
> +				const struct bpf_dynptr *src, struct bpf_dynptr *dst,
> +				const struct bpf_dynptr *iv) __ksym;
> +int bpf_crypto_skcipher_decrypt(struct bpf_crypto_skcipher_ctx *ctx,
> +				const struct bpf_dynptr *src, struct bpf_dynptr *dst,
> +				const struct bpf_dynptr *iv) __ksym;
> +
> +struct __crypto_skcipher_ctx_value {
> +	struct bpf_crypto_skcipher_ctx __kptr * ctx;
> +};
> +
> +struct crypto_conf_value {
> +	__u8 algo[32];
> +	__u32 algo_size;
> +	__u8 key[32];
> +	__u32 key_size;
> +	__u8 iv[32];
> +	__u32 iv_size;
> +	__u8 dst[32];
> +	__u32 dst_size;
> +};
> +
> +struct array_conf_map {
> +	__uint(type, BPF_MAP_TYPE_ARRAY);
> +	__type(key, int);
> +	__type(value, struct crypto_conf_value);
> +	__uint(max_entries, 1);
> +} __crypto_conf_map SEC(".maps");
> +
> +struct array_map {
> +	__uint(type, BPF_MAP_TYPE_ARRAY);
> +	__type(key, int);
> +	__type(value, struct __crypto_skcipher_ctx_value);
> +	__uint(max_entries, 1);
> +} __crypto_skcipher_ctx_map SEC(".maps");
> +
> +static inline struct crypto_conf_value *crypto_conf_lookup(void)
> +{
> +	struct crypto_conf_value *v, local = {};
> +	u32 key = 0;
> +
> +	v = bpf_map_lookup_elem(&__crypto_conf_map, &key);
> +	if (v)
> +		return v;
> +
> +	bpf_map_update_elem(&__crypto_conf_map, &key, &local, 0);
> +	return bpf_map_lookup_elem(&__crypto_conf_map, &key);

hmm... local is all 0 which became the map value. Where is it initialized?

> +}
> +
> +static inline struct __crypto_skcipher_ctx_value *crypto_skcipher_ctx_value_lookup(void)
> +{
> +	u32 key = 0;
> +
> +	return bpf_map_lookup_elem(&__crypto_skcipher_ctx_map, &key);
> +}
> +
> +static inline int crypto_skcipher_ctx_insert(struct bpf_crypto_skcipher_ctx *ctx)
> +{
> +	struct __crypto_skcipher_ctx_value local, *v;
> +	long status;

nit. s/status/err/

> +	struct bpf_crypto_skcipher_ctx *old;
> +	u32 key = 0;
> +
> +	local.ctx = NULL;
> +	status = bpf_map_update_elem(&__crypto_skcipher_ctx_map, &key, &local, 0);
> +	if (status) {
> +		bpf_crypto_skcipher_ctx_release(ctx);
> +		return status;
> +	}
> +
> +	v = bpf_map_lookup_elem(&__crypto_skcipher_ctx_map, &key);
> +	if (!v) {
> +		bpf_crypto_skcipher_ctx_release(ctx);
> +		return -ENOENT;
> +	}
> +
> +	old = bpf_kptr_xchg(&v->ctx, ctx);
> +	if (old) {
> +		bpf_crypto_skcipher_ctx_release(old);
> +		return -EEXIST;
> +	}
> +
> +	return 0;
> +}
> +
> +#endif /* _CRYPTO_COMMON_H */
> diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
> new file mode 100644
> index 000000000000..71a172d8d2a2
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
> @@ -0,0 +1,154 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
> +
> +#include "vmlinux.h"
> +#include "bpf_tracing_net.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +#include <bpf/bpf_tracing.h>
> +#include "bpf_misc.h"
> +#include "bpf_kfuncs.h"
> +#include "crypto_common.h"
> +
> +#define UDP_TEST_PORT 7777
> +unsigned char crypto_key[16] = "testtest12345678";
> +char crypto_algo[9] = "ecb(aes)";
> +char dst[32] = {};
> +int status;
> +
> +static inline int skb_validate_test(const struct __sk_buff *skb)
> +{
> +	struct ipv6hdr ip6h;
> +	struct udphdr udph;
> +	u32 offset;
> +
> +	if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6))
> +		return -1;
> +
> +	if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h)))
> +		return -1;
> +
> +	if (ip6h.nexthdr != IPPROTO_UDP)
> +		return -1;
> +
> +	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph)))
> +		return -1;
> +
> +	if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT))
> +		return -1;
> +
> +	offset = ETH_HLEN + sizeof(ip6h) + sizeof(udph);
> +	if (skb->len < offset + 16)
> +		return -1;
> +
> +	return offset;
> +}
> +
> +SEC("?fentry.s/bpf_fentry_test1")

I wonder if others have better idea how to do this. This is basically setting up 
the algo and key which requires to call a kfunc in sleepable context. Otherwise, 
the tc's test_run would be a better fit.

> +int BPF_PROG(skb_crypto_setup)
> +{
> +	struct crypto_conf_value *c;
> +	struct bpf_dynptr algo, key;
> +	int err = 0;
> +
> +	status = 0;
> +
> +	c = crypto_conf_lookup();

"c" is not used. Why lookup is needed? May be properly setup the 
__crypto_conf_map so the example is more complete.

> +	if (!c) {
> +		status = -EINVAL;
> +		return 0;
> +	}
> +
> +	bpf_dynptr_from_mem(crypto_algo, sizeof(crypto_algo), 0, &algo);
> +	bpf_dynptr_from_mem(crypto_key, sizeof(crypto_key), 0, &key);
> +	struct bpf_crypto_skcipher_ctx *cctx = bpf_crypto_skcipher_ctx_create(&algo, &key, &err);
> +
> +	if (!cctx) {
> +		status = err;
> +		return 0;
> +	}
> +
> +	err = crypto_skcipher_ctx_insert(cctx);
> +	if (err && err != -EEXIST)
> +		status = err;
> +
> +	return 0;
> +}
> +
> +SEC("tc")
> +int decrypt_sanity(struct __sk_buff *skb)
> +{
> +	struct __crypto_skcipher_ctx_value *v;
> +	struct bpf_crypto_skcipher_ctx *ctx;
> +	struct bpf_dynptr psrc, pdst, iv;
> +	int err;

nit. s/err/offset/. "err" is actually the offset for the common case.

btw, considering dynptr psrc has to be created from skb anyway, is it easier to 
use bpf_dynptr_slice(psrc, 0, NULL, ETH_HLEN + sizeof(ip6h) + sizeof(udph)) in 
the above skb_validate_test()?

> +
> +	err = skb_validate_test(skb);
> +	if (err < 0) {
> +		status = err;
> +		return TC_ACT_SHOT;
> +	}
> +
> +	v = crypto_skcipher_ctx_value_lookup();
> +	if (!v) {
> +		status = -ENOENT;
> +		return TC_ACT_SHOT;
> +	}
> +
> +	ctx = v->ctx;
> +	if (!ctx) {
> +		status = -ENOENT;
> +		return TC_ACT_SHOT;
> +	}
> +
> +	bpf_dynptr_from_skb(skb, 0, &psrc);
> +	bpf_dynptr_adjust(&psrc, err, err + 16);
> +	bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
> +	bpf_dynptr_from_mem(dst, 0, 0, &iv);
> +
> +	bpf_crypto_skcipher_decrypt(ctx, &psrc, &pdst, &iv);

check decrypt return value before setting "status = 0;' below.

> +
> +	status = 0;
> +
> +	return TC_ACT_SHOT;
> +}
> +
> +SEC("tc")
> +int encrypt_sanity(struct __sk_buff *skb)
> +{
> +	struct __crypto_skcipher_ctx_value *v;
> +	struct bpf_crypto_skcipher_ctx *ctx;
> +	struct bpf_dynptr psrc, pdst, iv;
> +	int err;
> +
> +	status = 0;
> +
> +	err = skb_validate_test(skb);
> +	if (err < 0) {
> +		status = err;
> +		return TC_ACT_SHOT;
> +	}
> +
> +	v = crypto_skcipher_ctx_value_lookup();
> +	if (!v) {
> +		status = -ENOENT;
> +		return TC_ACT_SHOT;
> +	}
> +
> +	ctx = v->ctx;
> +	if (!ctx) {
> +		status = -ENOENT;
> +		return TC_ACT_SHOT;
> +	}
> +
> +	bpf_dynptr_from_skb(skb, 0, &psrc);
> +	bpf_dynptr_adjust(&psrc, err, err + 16);
> +	bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
> +	bpf_dynptr_from_mem(dst, 0, 0, &iv);
> +
> +	bpf_crypto_skcipher_encrypt(ctx, &psrc, &pdst, &iv);

Same here. check return value.

> +
> +	return TC_ACT_SHOT;
> +}
> +
> +char __license[] SEC("license") = "GPL";


^ permalink raw reply

* Re: [PATCH bpf-next v3 1/2] bpf: add skcipher API support to TC/XDP programs
From: Martin KaFai Lau @ 2023-11-01 22:59 UTC (permalink / raw)
  To: Vadim Fedorenko
  Cc: bpf, netdev, linux-crypto, Jakub Kicinski, Andrii Nakryiko,
	Alexei Starovoitov, Mykola Lysenko, Vadim Fedorenko,
	David S. Miller, Herbert Xu
In-Reply-To: <91a6d5a7-7b18-48a2-9a74-7c00509467f8@linux.dev>

On 11/1/23 3:50 PM, Vadim Fedorenko wrote:
>>> +static int bpf_crypto_skcipher_crypt(struct crypto_sync_skcipher *tfm,
>>> +                     const struct bpf_dynptr_kern *src,
>>> +                     struct bpf_dynptr_kern *dst,
>>> +                     const struct bpf_dynptr_kern *iv,
>>> +                     bool decrypt)
>>> +{
>>> +    struct skcipher_request *req = NULL;
>>> +    struct scatterlist sgin, sgout;
>>> +    int err;
>>> +
>>> +    if (crypto_sync_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
>>> +        return -EINVAL;
>>> +
>>> +    if (__bpf_dynptr_is_rdonly(dst))
>>> +        return -EINVAL;
>>> +
>>> +    if (!__bpf_dynptr_size(dst) || !__bpf_dynptr_size(src))
>>> +        return -EINVAL;
>>> +
>>> +    if (__bpf_dynptr_size(iv) != crypto_sync_skcipher_ivsize(tfm))
>>> +        return -EINVAL;
>>> +
>>> +    req = skcipher_request_alloc(&tfm->base, GFP_ATOMIC);
>>
>> Doing alloc per packet may kill performance. Is it possible to optimize it 
>> somehow? What is the usual size of the req (e.g. the example in the selftest)?
>>
> 
> In ktls code aead_request is allocated every time encryption is invoked, see 
> tls_decrypt_sg(), apparently per skb. Doesn't look like performance
> killer. For selftest it's only sizeof(struct skcipher_request).

ktls is doing the en/decrypt on the userspace behalf to compensate the cost.

When this kfunc is used in xdp to decrypt a few bytes for each packet and then 
XDP_TX out, this extra alloc will be quite noticeable. If the size is usually 
small, can it be done in the stack memory?


^ permalink raw reply

* Re: [PATCH] rxrpc_find_service_conn_rcu: use read_seqbegin() rather than read_seqbegin_or_lock()
From: Oleg Nesterov @ 2023-11-01 23:17 UTC (permalink / raw)
  To: Al Viro
  Cc: David Howells, Marc Dionne, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Chuck Lever, linux-afs, netdev,
	linux-kernel
In-Reply-To: <20231101224855.GJ1957730@ZenIV>

On 11/01, Al Viro wrote:
>
> On Wed, Nov 01, 2023 at 10:52:15PM +0100, Oleg Nesterov wrote:
>
> > > Why would you want to force that "switch to locked on the second pass" policy
> > > on every possible caller?
> >
> > Because this is what (I think) read_seqbegin_or_lock() is supposed to do.
> > It should take the lock for writing if the lockless access failed. At least
> > according to the documentation.
>
> Not really - it's literally seqbegin or lock, depending upon what the caller
> tells it...

OK, I won't argue right now. But again, this patch doesn't change the current
behaviour. Exactly because the caller does NOT tell read_seqbegin_or_lock() that
it wants "or lock" on the 2nd pass.

> Take a look at d_walk() and try to shoehorn that into your variant.  Especially
> the D_WALK_NORETRY handling...

I am already sleeping, quite possibly I am wrong. But it seems that if we change
done_seqretry() then d_walk() needs something like

	--- a/fs/dcache.c
	+++ b/fs/dcache.c
	@@ -1420,7 +1420,7 @@ static void d_walk(struct dentry *parent, void *data,
			spin_lock(&this_parent->d_lock);
	 
			/* might go back up the wrong parent if we have had a rename. */
	-		if (need_seqretry(&rename_lock, seq))
	+		if (need_seqretry(&rename_lock, &seq))
				goto rename_retry;
			/* go into the first sibling still alive */
			do {
	@@ -1432,22 +1432,20 @@ static void d_walk(struct dentry *parent, void *data,
			rcu_read_unlock();
			goto resume;
		}
	-	if (need_seqretry(&rename_lock, seq))
	+	if (need_seqretry(&rename_lock, &seq))
			goto rename_retry;
		rcu_read_unlock();
	 
	 out_unlock:
		spin_unlock(&this_parent->d_lock);
	-	done_seqretry(&rename_lock, seq);
	+	done_seqretry(&rename_lock, &seq);
		return;
	 
	 rename_retry:
		spin_unlock(&this_parent->d_lock);
		rcu_read_unlock();
	-	BUG_ON(seq & 1);
		if (!retry)
			return;
	-	seq = 1;
		goto again;
	 }


But again, again, this is off-topic and needs another discussion. Right now I am just
trying to audit the users of read_seqbegin_or_lock/need_seqretry and change those who
use them incorrectly.

Oleg.


^ permalink raw reply

* [ANN] netdev development stats for 6.7
From: Jakub Kicinski @ 2023-11-01 23:29 UTC (permalink / raw)
  To: netdev; +Cc: netdev-driver-reviewers, Stanislav Fomichev

Hi!

General stats
-------------

The cycle started on August 29th and ended on Oct 31st, 1 day shorter
than previous one, due to the timing of our PR.

We have seen total of 14243 messages on the list (226 / day) which 
is 17% lower than the (very busy) 6.6 cycle. The number of commits
directly applied by netdev maintainers dropped by 13% to 19 commits 
a day, close to our long term average.

There was a fluctuation in the number of participants. While the number
of people "reviewing" (replying in threads) remained constant (around
410) the number of people exclusively starting threads ("authors"?)
decreased by 40 (344 -> 306).

Fraction of changes with Review/Ack tags has dropped again to 58%
(counting all tags) and 50% (counting tags from different company
than the author).

Rankings
--------

Top reviewers (thr):                 Top reviewers (msg):                
   1 (   ) [24] Simon Horman            1 ( +1) [43] Jakub Kicinski      
   2 (   ) [23] Jakub Kicinski          2 ( -1) [33] Simon Horman        
   3 (   ) [12] Andrew Lunn             3 (   ) [33] Andrew Lunn         
   4 ( +1) [10] Paolo Abeni             4 ( +5) [15] Eric Dumazet        
   5 ( +1) [ 8] Eric Dumazet            5 ( +1) [14] David Ahern         
   6 ( +1) [ 7] David Ahern             6 ( +2) [13] Paolo Abeni         
   7 (+21) [ 7] Kees Cook               7 (+21) [13] Florian Fainelli    
   8 (+12) [ 6] Jiri Pirko              8 ( +5) [12] Jiri Pirko          
   9 (+14) [ 6] Florian Fainelli        9 (+23) [12] Jacob Keller        
  10 (+19) [ 6] Jacob Keller           10 ( +8) [10] Vladimir Oltean     
  11 ( -2) [ 4] Russell King           11 (+24) [ 9] Kees Cook           
  12 ( -2) [ 4] Willem de Bruijn       12 ( +5) [ 8] Rob Herring         
  13 ( +2) [ 4] Vladimir Oltean        13 ( -8) [ 8] Russell King        
  14 ( -1) [ 4] Rob Herring            14 ( +1) [ 8] Jason Wang          
  15 (***) [ 3] Wojciech Drewek        15 ( -4) [ 7] Willem de Bruijn    

No surprises in the top 6. Kees takes #7, helping to review string op
safety and __conted_by patches. Wojciech enters the ranking at #15.
Thanks for all the review work, folks!

Top authors (thr):                   Top authors (msg):                  
   1 (***) [7] Justin Stitt             1 ( +3) [18] Eric Dumazet        
   2 ( +1) [6] Eric Dumazet             2 (***) [16] David Howells       
   3 ( -1) [6] Jakub Kicinski           3 ( +6) [16] Dmitry Safonov      
   4 (+10) [3] Jiri Pirko               4 ( -2) [14] Saeed Mahameed      
   5 ( -1) [2] Tony Nguyen              5 ( +9) [14] Herve Codina        
   6 (***) [2] Oleksij Rempel           6 (   ) [13] Jiri Pirko          
   7 (***) [2] Kees Cook                7 ( -4) [12] Jakub Kicinski      
   8 (***) [2] Ivan Vecera              8 (+26) [11] Aurelien Aptel      
   9 (***) [2] Dan Carpenter            9 ( -8) [11] Tony Nguyen         
  10 (+15) [2] MD Danish Anwar         10 (***) [10] Uwe Kleine-König    

Justin has posted the most individual patches, replacing the use of
unsafe string APIs throughout the drivers. Jiri jumps into top 5
with his devlink and YNL work. David H posted a few series for iov
and network file systems (somewhat netdev-adjacent). Dmitry contributed
the TCP Auth Option support. Herve worked on a HDLC framer for QMC. 

Top reviewers (thr):                 Top reviewers (msg):                
   1 ( +2) [42] RedHat                  1 ( +2) [71] RedHat              
   2 (   ) [27] Meta                    2 ( -1) [52] Meta                
   3 ( +2) [23] Intel                   3 ( +2) [46] Intel               
   4 ( +2) [15] Google                  4 ( +2) [33] Andrew Lunn         
   5 ( -1) [12] nVidia                  5 ( +2) [29] Google              
   6 ( +1) [12] Andrew Lunn             6 ( -2) [23] nVidia              
   7 ( +3) [ 7] Enfabrica               7 ( +4) [14] Broadcom            

The biggest change in the company statistics is the disappearance
of Corigine. Simon is now employed at Red Hat, giving Red Hat the
#1 spot with quite some margin.

With Corigine dropping out, Enfabrica (David Ahern) and Broadcom
(Florian Fainelli) ascend to the top #7.

Top authors (thr):                   Top authors (msg):                  
   1 ( +5) [22] Google                  1 (   ) [76] Intel               
   2 (   ) [19] Intel                   2 (   ) [59] nVidia              
   3 (   ) [17] RedHat                  3 (   ) [55] RedHat              
   4 (   ) [10] Meta                    4 ( +2) [50] Google              
   5 (   ) [ 9] nVidia                  5 (   ) [38] Meta                
   6 ( -5) [ 6] Huawei                  6 (+10) [26] Bootlin             
   7 ( +6) [ 5] Linaro                  7 ( -3) [23] Huawei       

Top scores (positive):               Top scores (negative):              
   1 ( +3) [341] RedHat                 1 (+13) [97] Bootlin             
   2 (   ) [219] Meta                   2 (***) [72] nVidia              
   3 (   ) [183] Andrew Lunn            3 ( -2) [66] Huawei              
   4 ( +2) [ 95] Enfabrica              4 ( +4) [59] Arista              
   5 ( +4) [ 59] Broadcom               5 (+10) [48] Alibaba             
   6 (+17) [ 52] Isovalent              6 (***) [41] Pengutronix         
   7 ( +3) [ 47] ARM             
   8 ( +5) [ 46] Oracle             
   9 ( -1) [ 44] Linux Foundation  
  10 ( -5) [ 32] Linaro         

A few things worth noting in the "community score" metrics.

Intel moved from the "negative" to the "positive" side (at #13, so not
high enough to make the "top"). Shout out to Jake, Wojciech and Przemek
for their review work! This move may have been helped slightly by the
lower volume of Intel patches and external contributions to Intel
drivers. So please do not rest on your laurels :)

nVidia makes the opposite switch and ironically takes negative spot #2,
the exact spot previously occupied by Intel. Jiri's efforts are not
enough to counter balance the flow of patches there :(

Arista is likely a blip as Dmitry had to repost his work a few times.

Bootlin and Pengutronix return to the same (negative) positions they
held in 6.5 cycles. It may be the time to carve out more review time
for folks working at those companies.
-- 
Code: https://github.com/kuba-moo/ml-stat

^ permalink raw reply

* Re: [PATCH bpf-next v3 1/2] bpf: add skcipher API support to TC/XDP programs
From: Martin KaFai Lau @ 2023-11-01 23:41 UTC (permalink / raw)
  To: Vadim Fedorenko
  Cc: bpf, netdev, linux-crypto, Jakub Kicinski, Andrii Nakryiko,
	Alexei Starovoitov, Mykola Lysenko, Vadim Fedorenko,
	David S. Miller, Herbert Xu
In-Reply-To: <91a6d5a7-7b18-48a2-9a74-7c00509467f8@linux.dev>

On 11/1/23 3:50 PM, Vadim Fedorenko wrote:
>>> +static void *__bpf_dynptr_data_ptr(const struct bpf_dynptr_kern *ptr)
>>> +{
>>> +    enum bpf_dynptr_type type;
>>> +
>>> +    if (!ptr->data)
>>> +        return NULL;
>>> +
>>> +    type = bpf_dynptr_get_type(ptr);
>>> +
>>> +    switch (type) {
>>> +    case BPF_DYNPTR_TYPE_LOCAL:
>>> +    case BPF_DYNPTR_TYPE_RINGBUF:
>>> +        return ptr->data + ptr->offset;
>>> +    case BPF_DYNPTR_TYPE_SKB:
>>> +        return skb_pointer_if_linear(ptr->data, ptr->offset, 
>>> __bpf_dynptr_size(ptr));
>>> +    case BPF_DYNPTR_TYPE_XDP:
>>> +    {
>>> +        void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset, 
>>> __bpf_dynptr_size(ptr));
>>
>> I suspect what it is doing here (for skb and xdp in particular) is very 
>> similar to bpf_dynptr_slice. Please check if bpf_dynptr_slice(ptr, 0, NULL, 
>> sz) will work.
>>
> 
> Well, yes, it's simplified version of bpf_dynptr_slice. The problem is
> that bpf_dynptr_slice bpf_kfunc which cannot be used in another
> bpf_kfunc. Should I refactor the code to use it in both places? Like

Sorry, scrolled too fast in my earlier reply :(

I am not aware of this limitation. What error does it have?
The bpf_dynptr_slice_rdwr kfunc() is also calling the bpf_dynptr_slice() kfunc.

> create __bpf_dynptr_slice() which will be internal part of bpf_kfunc?







^ permalink raw reply

* Re: [PATCH net-next V2] ptp: fix corrupted list in ptp_open
From: Richard Cochran @ 2023-11-02  0:12 UTC (permalink / raw)
  To: Edward Adam Davis
  Cc: davem, linux-kernel, netdev, reibax, syzbot+df3f3ef31f60781fa911,
	syzkaller-bugs
In-Reply-To: <tencent_24C96E7894D0EBA2EDD2CFB87BB66EC02D0A@qq.com>

On Tue, Oct 31, 2023 at 05:07:08AM +0800, Edward Adam Davis wrote:
> There is no lock protection when writing ptp->tsevqs in ptp_open(),
> ptp_release(), which can cause data corruption,

Really?  How?

> use mutex lock to avoid this 
> issue.
> 
> Moreover, ptp_release() should not be used to release the queue in ptp_read(),
> and it should be deleted together.
> 
> Reported-and-tested-by: syzbot+df3f3ef31f60781fa911@syzkaller.appspotmail.com
> Fixes: 8f5de6fb2453 ("ptp: support multiple timestamp event readers")
> Signed-off-by: Edward Adam Davis <eadavis@qq.com>
> ---
>  drivers/ptp/ptp_chardev.c | 11 +++++++++--
>  drivers/ptp/ptp_clock.c   |  3 +++
>  drivers/ptp/ptp_private.h |  1 +
>  3 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
> index 282cd7d24077..e31551d2697d 100644
> --- a/drivers/ptp/ptp_chardev.c
> +++ b/drivers/ptp/ptp_chardev.c
> @@ -109,6 +109,9 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
>  	struct timestamp_event_queue *queue;
>  	char debugfsname[32];
>  
> +	if (mutex_lock_interruptible(&ptp->tsevq_mux)) 
> +		return -ERESTARTSYS;
> +

This mutex is not needed.

Please don't ignore review comments.

Thanks,
Richard

^ permalink raw reply

* Re: [syzbot] [net?] [usb?] INFO: rcu detected stall in worker_thread (9)
From: syzbot @ 2023-11-02  0:14 UTC (permalink / raw)
  To: admini, bpf, davem, edumazet, gregkh, hdanton, horms, jiri, kuba,
	linux-kernel, linux-usb, netdev, pabeni, rafael, syzkaller-bugs,
	twuufnxlz
In-Reply-To: <0000000000003495bf060724994a@google.com>

syzbot has bisected this issue to:

commit c2368b19807affd7621f7c4638cd2e17fec13021
Author: Jiri Pirko <jiri@nvidia.com>
Date:   Fri Jul 29 07:10:35 2022 +0000

    net: devlink: introduce "unregistering" mark and use it during devlinks iteration

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=1758e1e3680000
start commit:   1c8b86a3799f Merge tag 'xsa441-6.6-tag' of git://git.kerne..
git tree:       upstream
final oops:     https://syzkaller.appspot.com/x/report.txt?x=14d8e1e3680000
console output: https://syzkaller.appspot.com/x/log.txt?x=10d8e1e3680000
kernel config:  https://syzkaller.appspot.com/x/.config?x=11e478e28144788c
dashboard link: https://syzkaller.appspot.com/bug?extid=225bfad78b079744fd5e
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=155614de680000

Reported-by: syzbot+225bfad78b079744fd5e@syzkaller.appspotmail.com
Fixes: c2368b19807a ("net: devlink: introduce "unregistering" mark and use it during devlinks iteration")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

^ permalink raw reply

* Re: [PATCH bpf-next v8 07/10] bpf, net: switch to dynamic registration
From: Martin KaFai Lau @ 2023-11-02  0:17 UTC (permalink / raw)
  To: Kui-Feng Lee
  Cc: kuifeng, netdev, bpf, ast, song, kernel-team, andrii, thinker.li,
	drosen
In-Reply-To: <51be2e5e-8def-45c5-8864-6b0dcc794300@gmail.com>

On 10/31/23 5:19 PM, Kui-Feng Lee wrote:
> 
> 
> On 10/31/23 17:02, Martin KaFai Lau wrote:
>> On 10/31/23 4:34 PM, Kui-Feng Lee wrote:
>>>>> diff --git a/include/linux/btf.h b/include/linux/btf.h
>>>>> index a8813605f2f6..954536431e0b 100644
>>>>> --- a/include/linux/btf.h
>>>>> +++ b/include/linux/btf.h
>>>>> @@ -12,6 +12,8 @@
>>>>>   #include <uapi/linux/bpf.h>
>>>>>   #define BTF_TYPE_EMIT(type) ((void)(type *)0)
>>>>> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0);    \
>>>>
>>>> ((void)(struct type *)0); is new. Why is it needed?
>>>
>>> This is a trick of BTF to force compiler generate type info for
>>> the given type. Without trick, compiler may skip these types if these
>>> type are not used at all in the module.  For example, modules usually
>>> don't use value types of struct_ops directly.
>> It is not the value type and value type emit is understood. It is the 
>> struct_ops type itself and it is new addition in this patchset afaict. The 
>> value type emit is in the next line which was cut out from the context here.
>>
> I mean both of them are required.
> In the case of a dummy implementation, struct_ops type itself properly never 
> being used, only being declared by the module. Without this line,

Other than bpf_dummy_ops, after reg(), the struct_ops->func() must be used 
somewhere in the kernel or module. Like tcp must be using the tcp_congestion_ops 
after reg(). bpf_dummy_ops is very special and probably should be moved out to 
bpf_testmod somehow but this is for later. Even bpf_dummy_ops does not have an 
issue now. Why it is needed after the kmod support change?

or it is a preemptive addition to be future proof only?

Addition is fine if it is required to work. I am trying to understand why this 
new addition is needed after the kmod support change. The reason why this is 
needed after the kmod support change is not obvious from looking at the code. 
The commit message didn't mention why and what broke after this kmod change. If 
someone wants to clean it up a few months later, we will need to figure out why 
it was added in the first place.

> the module developer will fail to load a struct_ops map of the dummy
> type. This line is added to avoid this awful situation.
> 

^ permalink raw reply

* Re: [PATCH net-next V2] ptp: fix corrupted list in ptp_open
From: Richard Cochran @ 2023-11-02  0:18 UTC (permalink / raw)
  To: Edward Adam Davis
  Cc: habetsm.xilinx, davem, linux-kernel, netdev, reibax,
	syzbot+df3f3ef31f60781fa911, syzkaller-bugs
In-Reply-To: <tencent_2C67C6D2537B236F497823BCC457976F9705@qq.com>

On Tue, Oct 31, 2023 at 06:25:42PM +0800, Edward Adam Davis wrote:
> There is no lock protection when writing ptp->tsevqs in ptp_open(),
> ptp_release(), which can cause data corruption,

NAK.

You haven't identified any actual data corruption issue.

If there is an issue, please state what it is.

Thanks,
Richard



^ permalink raw reply

* Re: [PATCH bpf-next v3 1/2] bpf: add skcipher API support to TC/XDP programs
From: Vadim Fedorenko @ 2023-11-02  0:31 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: bpf, netdev, linux-crypto, Jakub Kicinski, Andrii Nakryiko,
	Alexei Starovoitov, Mykola Lysenko, Vadim Fedorenko,
	David S. Miller, Herbert Xu
In-Reply-To: <4adea710-72ca-0908-d280-625bc3682aa1@linux.dev>

On 01.11.2023 22:59, Martin KaFai Lau wrote:
> On 11/1/23 3:50 PM, Vadim Fedorenko wrote:
>>>> +static int bpf_crypto_skcipher_crypt(struct crypto_sync_skcipher *tfm,
>>>> +                     const struct bpf_dynptr_kern *src,
>>>> +                     struct bpf_dynptr_kern *dst,
>>>> +                     const struct bpf_dynptr_kern *iv,
>>>> +                     bool decrypt)
>>>> +{
>>>> +    struct skcipher_request *req = NULL;
>>>> +    struct scatterlist sgin, sgout;
>>>> +    int err;
>>>> +
>>>> +    if (crypto_sync_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (__bpf_dynptr_is_rdonly(dst))
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (!__bpf_dynptr_size(dst) || !__bpf_dynptr_size(src))
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (__bpf_dynptr_size(iv) != crypto_sync_skcipher_ivsize(tfm))
>>>> +        return -EINVAL;
>>>> +
>>>> +    req = skcipher_request_alloc(&tfm->base, GFP_ATOMIC);
>>>
>>> Doing alloc per packet may kill performance. Is it possible to optimize it 
>>> somehow? What is the usual size of the req (e.g. the example in the selftest)?
>>>
>>
>> In ktls code aead_request is allocated every time encryption is invoked, see 
>> tls_decrypt_sg(), apparently per skb. Doesn't look like performance
>> killer. For selftest it's only sizeof(struct skcipher_request).
> 
> ktls is doing the en/decrypt on the userspace behalf to compensate the cost.
> 
> When this kfunc is used in xdp to decrypt a few bytes for each packet and then 
> XDP_TX out, this extra alloc will be quite noticeable. If the size is usually 
> small, can it be done in the stack memory?

Hmm... looks like SYNC_SKCIPHER_REQUEST_ON_STACK will help us. Ok, I'll change 
this part to use stack allocations.


^ permalink raw reply

* Re: [PATCH bpf-next v3 1/2] bpf: add skcipher API support to TC/XDP programs
From: Vadim Fedorenko @ 2023-11-02  0:38 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: bpf, netdev, linux-crypto, Jakub Kicinski, Andrii Nakryiko,
	Alexei Starovoitov, Mykola Lysenko, Vadim Fedorenko
In-Reply-To: <6947046d-27e3-90ee-3419-0b480af0abb0@linux.dev>

On 01.11.2023 23:41, Martin KaFai Lau wrote:
> On 11/1/23 3:50 PM, Vadim Fedorenko wrote:
>>>> +static void *__bpf_dynptr_data_ptr(const struct bpf_dynptr_kern *ptr)
>>>> +{
>>>> +    enum bpf_dynptr_type type;
>>>> +
>>>> +    if (!ptr->data)
>>>> +        return NULL;
>>>> +
>>>> +    type = bpf_dynptr_get_type(ptr);
>>>> +
>>>> +    switch (type) {
>>>> +    case BPF_DYNPTR_TYPE_LOCAL:
>>>> +    case BPF_DYNPTR_TYPE_RINGBUF:
>>>> +        return ptr->data + ptr->offset;
>>>> +    case BPF_DYNPTR_TYPE_SKB:
>>>> +        return skb_pointer_if_linear(ptr->data, ptr->offset, 
>>>> __bpf_dynptr_size(ptr));
>>>> +    case BPF_DYNPTR_TYPE_XDP:
>>>> +    {
>>>> +        void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset, 
>>>> __bpf_dynptr_size(ptr));
>>>
>>> I suspect what it is doing here (for skb and xdp in particular) is very 
>>> similar to bpf_dynptr_slice. Please check if bpf_dynptr_slice(ptr, 0, NULL, 
>>> sz) will work.
>>>
>>
>> Well, yes, it's simplified version of bpf_dynptr_slice. The problem is
>> that bpf_dynptr_slice bpf_kfunc which cannot be used in another
>> bpf_kfunc. Should I refactor the code to use it in both places? Like
> 
> Sorry, scrolled too fast in my earlier reply :(
> 
> I am not aware of this limitation. What error does it have?
> The bpf_dynptr_slice_rdwr kfunc() is also calling the bpf_dynptr_slice() kfunc.
> 

Yeah, but they are in the same module. We do not declare prototypes of kfuncs in
linux/bpf.h and that's why we cannot use them outside of helpers.c.
The same problem was with bpf_dynptr_is_rdonly() I believe.

>> create __bpf_dynptr_slice() which will be internal part of bpf_kfunc?
> 
> 
> 
> 
> 
> 


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox