Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v2 2/5] llc: convert to getsockopt_iter
From: Breno Leitao @ 2026-05-07 10:57 UTC (permalink / raw)
  To: Jeremy Kerr, Matt Johnston, Martin Schiller, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Shuah Khan
  Cc: linux-x25, linux-kernel, netdev, linux-kselftest, Breno Leitao,
	kernel-team
In-Reply-To: <20260507-getsock_two-v2-0-5873111d9c12@debian.org>

Convert LLC socket's getsockopt implementation to use the new
getsockopt_iter callback with sockopt_t.

Key changes:
- Replace (char __user *optval, int __user *optlen) with sockopt_t *opt
- Use opt->optlen for buffer length (input) and returned size (output)
- Use copy_to_iter() instead of put_user()/copy_to_user()
- Add linux/uio.h for copy_to_iter()

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 net/llc/af_llc.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 1b210db3119e8..35278c519a305 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -27,6 +27,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
+#include <linux/uio.h>
 
 #include <net/llc.h>
 #include <net/llc_sap.h>
@@ -1166,25 +1167,21 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
  *	@sock: Socket to get information from.
  *	@level: Socket level user is requesting operations on.
  *	@optname: Operation name.
- *	@optval: Variable to return operation data in.
- *	@optlen: Length of optval.
+ *	@opt: sockopt context with iterator and length for returning data.
  *
  *	Get connection specific socket information.
  */
 static int llc_ui_getsockopt(struct socket *sock, int level, int optname,
-			     char __user *optval, int __user *optlen)
+			     sockopt_t *opt)
 {
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
-	int val = 0, len = 0, rc = -EINVAL;
+	int val = 0, len, rc = -EINVAL;
 
 	lock_sock(sk);
 	if (unlikely(level != SOL_LLC))
 		goto out;
-	rc = get_user(len, optlen);
-	if (rc)
-		goto out;
-	rc = -EINVAL;
+	len = opt->optlen;
 	if (len != sizeof(int))
 		goto out;
 	switch (optname) {
@@ -1212,7 +1209,8 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname,
 		goto out;
 	}
 	rc = 0;
-	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
+	opt->optlen = len;
+	if (copy_to_iter(&val, len, &opt->iter_out) != len)
 		rc = -EFAULT;
 out:
 	release_sock(sk);
@@ -1239,7 +1237,7 @@ static const struct proto_ops llc_ui_ops = {
 	.listen      = llc_ui_listen,
 	.shutdown    = llc_ui_shutdown,
 	.setsockopt  = llc_ui_setsockopt,
-	.getsockopt  = llc_ui_getsockopt,
+	.getsockopt_iter = llc_ui_getsockopt,
 	.sendmsg     = llc_ui_sendmsg,
 	.recvmsg     = llc_ui_recvmsg,
 	.mmap	     = sock_no_mmap,

-- 
2.52.0


^ permalink raw reply related

* [PATCH net-next v2 3/5] x25: convert to getsockopt_iter
From: Breno Leitao @ 2026-05-07 10:57 UTC (permalink / raw)
  To: Jeremy Kerr, Matt Johnston, Martin Schiller, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Shuah Khan
  Cc: linux-x25, linux-kernel, netdev, linux-kselftest, Breno Leitao,
	kernel-team
In-Reply-To: <20260507-getsock_two-v2-0-5873111d9c12@debian.org>

Convert X.25 socket's getsockopt implementation to use the new
getsockopt_iter callback with sockopt_t.

Key changes:
- Replace (char __user *optval, int __user *optlen) with sockopt_t *opt
- Use opt->optlen for buffer length (input) and returned size (output)
- Use copy_to_iter() instead of put_user()/copy_to_user()
- Add linux/uio.h for copy_to_iter()

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 net/x25/af_x25.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index af8762b24039d..c31d2af5dd223 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -53,6 +53,7 @@
 #include <linux/init.h>
 #include <linux/compat.h>
 #include <linux/ctype.h>
+#include <linux/uio.h>
 
 #include <net/x25.h>
 #include <net/compat.h>
@@ -448,7 +449,7 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
 }
 
 static int x25_getsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int __user *optlen)
+			  sockopt_t *opt)
 {
 	struct sock *sk = sock->sk;
 	int val, len, rc = -ENOPROTOOPT;
@@ -456,22 +457,17 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
 	if (level != SOL_X25 || optname != X25_QBITINCL)
 		goto out;
 
-	rc = -EFAULT;
-	if (get_user(len, optlen))
-		goto out;
+	len = opt->optlen;
 
 	rc = -EINVAL;
 	if (len < 0)
 		goto out;
 
 	len = min_t(unsigned int, len, sizeof(int));
-
-	rc = -EFAULT;
-	if (put_user(len, optlen))
-		goto out;
+	opt->optlen = len;
 
 	val = test_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
-	rc = copy_to_user(optval, &val, len) ? -EFAULT : 0;
+	rc = copy_to_iter(&val, len, &opt->iter_out) != len ? -EFAULT : 0;
 out:
 	return rc;
 }
@@ -1753,7 +1749,7 @@ static const struct proto_ops x25_proto_ops = {
 	.listen =	x25_listen,
 	.shutdown =	sock_no_shutdown,
 	.setsockopt =	x25_setsockopt,
-	.getsockopt =	x25_getsockopt,
+	.getsockopt_iter = x25_getsockopt,
 	.sendmsg =	x25_sendmsg,
 	.recvmsg =	x25_recvmsg,
 	.mmap =		sock_no_mmap,

-- 
2.52.0


^ permalink raw reply related

* [PATCH net-next v2 4/5] kcm: convert to getsockopt_iter
From: Breno Leitao @ 2026-05-07 10:57 UTC (permalink / raw)
  To: Jeremy Kerr, Matt Johnston, Martin Schiller, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Shuah Khan
  Cc: linux-x25, linux-kernel, netdev, linux-kselftest, Breno Leitao,
	kernel-team
In-Reply-To: <20260507-getsock_two-v2-0-5873111d9c12@debian.org>

Convert KCM socket's getsockopt implementation to use the new
getsockopt_iter callback with sockopt_t.

Key changes:
- Replace (char __user *optval, int __user *optlen) with sockopt_t *opt
- Use opt->optlen for buffer length (input) and returned size (output)
- Use copy_to_iter() instead of put_user()/copy_to_user()
- Add linux/uio.h for copy_to_iter()

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 net/kcm/kcmsock.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 3912e75079f5e..b273213cc68d1 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -24,6 +24,7 @@
 #include <linux/workqueue.h>
 #include <linux/syscalls.h>
 #include <linux/sched/signal.h>
+#include <linux/uio.h>
 
 #include <net/kcm.h>
 #include <net/netns/generic.h>
@@ -1167,7 +1168,7 @@ static int kcm_setsockopt(struct socket *sock, int level, int optname,
 }
 
 static int kcm_getsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int __user *optlen)
+			  sockopt_t *opt)
 {
 	struct kcm_sock *kcm = kcm_sk(sock->sk);
 	int val, len;
@@ -1175,9 +1176,7 @@ static int kcm_getsockopt(struct socket *sock, int level, int optname,
 	if (level != SOL_KCM)
 		return -ENOPROTOOPT;
 
-	if (get_user(len, optlen))
-		return -EFAULT;
-
+	len = opt->optlen;
 	if (len < 0)
 		return -EINVAL;
 
@@ -1191,9 +1190,8 @@ static int kcm_getsockopt(struct socket *sock, int level, int optname,
 		return -ENOPROTOOPT;
 	}
 
-	if (put_user(len, optlen))
-		return -EFAULT;
-	if (copy_to_user(optval, &val, len))
+	opt->optlen = len;
+	if (copy_to_iter(&val, len, &opt->iter_out) != len)
 		return -EFAULT;
 	return 0;
 }
@@ -1755,7 +1753,7 @@ static const struct proto_ops kcm_dgram_ops = {
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.setsockopt =	kcm_setsockopt,
-	.getsockopt =	kcm_getsockopt,
+	.getsockopt_iter = kcm_getsockopt,
 	.sendmsg =	kcm_sendmsg,
 	.recvmsg =	kcm_recvmsg,
 	.mmap =		sock_no_mmap,
@@ -1776,7 +1774,7 @@ static const struct proto_ops kcm_seqpacket_ops = {
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.setsockopt =	kcm_setsockopt,
-	.getsockopt =	kcm_getsockopt,
+	.getsockopt_iter = kcm_getsockopt,
 	.sendmsg =	kcm_sendmsg,
 	.recvmsg =	kcm_recvmsg,
 	.mmap =		sock_no_mmap,

-- 
2.52.0


^ permalink raw reply related

* [PATCH net-next v2 5/5] selftests: net: getsockopt_iter: cleanup
From: Breno Leitao @ 2026-05-07 10:57 UTC (permalink / raw)
  To: Jeremy Kerr, Matt Johnston, Martin Schiller, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Shuah Khan
  Cc: linux-x25, linux-kernel, netdev, linux-kselftest, Breno Leitao,
	kernel-team, Stanislav Fomichev, Bobby Eshleman
In-Reply-To: <20260507-getsock_two-v2-0-5873111d9c12@debian.org>

Apply two cleanups suggested by Stanislav and bobby on the original
selftest series:

- Reorder local variable declarations into reverse christmas-tree
  order (longest line first). Because that ordering puts socklen_t
  optlen before the variable whose size it stores, the
  "optlen = sizeof(...)" initializer is moved out of the declaration
  to a plain assignment in the test body, as Stanislav suggested.

- Add ASSERT_EQ(optlen, ...) on every error path so the value the
  kernel writes back to the userspace optlen is pinned down even
  when the syscall returns -1. With do_sock_getsockopt() now writing
  opt->optlen back to userspace unconditionally, asserting that the
  netlink/vsock error paths leave the original input length untouched
  guards against future regressions.

Bobby Eshleman pointed out that
SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW/OLD return a sock_timeval-shaped
payload (16 bytes on 64-bit), which is wider than the u64 case
already covered. Add four tests that exercise this path:

- connect_timeout_new_exact         exact-size buffer
- connect_timeout_new_oversize_clamped  oversize buffer, clamped
- connect_timeout_new_undersize     undersize -> -EINVAL, optlen
                                    untouched
- connect_timeout_old_exact         exact-size buffer for OLD optname

Suggested-by: Stanislav Fomichev <sdf@fomichev.me>
Suggested-by: Bobby Eshleman <bobbyeshleman@meta.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
---
 tools/testing/selftests/net/getsockopt_iter.c | 109 +++++++++++++++++++++++---
 1 file changed, 98 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/net/getsockopt_iter.c b/tools/testing/selftests/net/getsockopt_iter.c
index 179f9e84926fd..209569354d0e3 100644
--- a/tools/testing/selftests/net/getsockopt_iter.c
+++ b/tools/testing/selftests/net/getsockopt_iter.c
@@ -22,6 +22,7 @@
 #include <unistd.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
+#include <linux/time_types.h>
 #include <linux/vm_sockets.h>
 #include <sys/socket.h>
 #include "kselftest_harness.h"
@@ -61,8 +62,10 @@ FIXTURE_TEARDOWN(netlink)
 
 TEST_F(netlink, pktinfo_exact)
 {
+	socklen_t optlen;
 	int val = -1;
-	socklen_t optlen = sizeof(val);
+
+	optlen = sizeof(val);
 
 	ASSERT_EQ(0, getsockopt(self->fd, SOL_NETLINK, NETLINK_PKTINFO,
 				&val, &optlen));
@@ -73,7 +76,9 @@ TEST_F(netlink, pktinfo_exact)
 TEST_F(netlink, pktinfo_oversize_clamped)
 {
 	char buf[16] = {};
-	socklen_t optlen = sizeof(buf);
+	socklen_t optlen;
+
+	optlen = sizeof(buf);
 
 	ASSERT_EQ(0, getsockopt(self->fd, SOL_NETLINK, NETLINK_PKTINFO,
 				buf, &optlen));
@@ -83,11 +88,14 @@ TEST_F(netlink, pktinfo_oversize_clamped)
 TEST_F(netlink, pktinfo_undersize)
 {
 	char buf[2] = {};
-	socklen_t optlen = sizeof(buf);
+	socklen_t optlen;
+
+	optlen = sizeof(buf);
 
 	ASSERT_EQ(-1, getsockopt(self->fd, SOL_NETLINK, NETLINK_PKTINFO,
 				 buf, &optlen));
 	ASSERT_EQ(EINVAL, errno);
+	ASSERT_EQ(sizeof(buf), optlen);
 }
 
 TEST_F(netlink, list_memberships_size_discovery)
@@ -105,7 +113,9 @@ TEST_F(netlink, list_memberships_size_discovery)
 TEST_F(netlink, list_memberships_full_read)
 {
 	__u32 buf[64] = {};
-	socklen_t optlen = sizeof(buf);
+	socklen_t optlen;
+
+	optlen = sizeof(buf);
 
 	ASSERT_EQ(0, getsockopt(self->fd, SOL_NETLINK,
 				NETLINK_LIST_MEMBERSHIPS,
@@ -117,22 +127,28 @@ TEST_F(netlink, list_memberships_full_read)
 
 TEST_F(netlink, bad_level)
 {
+	socklen_t optlen;
 	int val;
-	socklen_t optlen = sizeof(val);
+
+	optlen = sizeof(val);
 
 	ASSERT_EQ(-1, getsockopt(self->fd, SOL_SOCKET + 1, NETLINK_PKTINFO,
 				 &val, &optlen));
 	ASSERT_EQ(ENOPROTOOPT, errno);
+	ASSERT_EQ(sizeof(val), optlen);
 }
 
 TEST_F(netlink, bad_optname)
 {
+	socklen_t optlen;
 	int val;
-	socklen_t optlen = sizeof(val);
+
+	optlen = sizeof(val);
 
 	ASSERT_EQ(-1, getsockopt(self->fd, SOL_NETLINK, 0x7fff,
 				 &val, &optlen));
 	ASSERT_EQ(ENOPROTOOPT, errno);
+	ASSERT_EQ(sizeof(val), optlen);
 }
 
 /* ---------- vsock ---------- */
@@ -157,8 +173,10 @@ FIXTURE_TEARDOWN(vsock)
 
 TEST_F(vsock, buffer_size_exact)
 {
+	socklen_t optlen;
 	uint64_t val = 0;
-	socklen_t optlen = sizeof(val);
+
+	optlen = sizeof(val);
 
 	ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK,
 				SO_VM_SOCKETS_BUFFER_SIZE,
@@ -170,7 +188,9 @@ TEST_F(vsock, buffer_size_exact)
 TEST_F(vsock, buffer_size_oversize_clamped)
 {
 	char buf[16] = {};
-	socklen_t optlen = sizeof(buf);
+	socklen_t optlen;
+
+	optlen = sizeof(buf);
 
 	ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK,
 				SO_VM_SOCKETS_BUFFER_SIZE,
@@ -181,33 +201,100 @@ TEST_F(vsock, buffer_size_oversize_clamped)
 TEST_F(vsock, buffer_size_undersize)
 {
 	char buf[4] = {};
-	socklen_t optlen = sizeof(buf);
+	socklen_t optlen;
+
+	optlen = sizeof(buf);
 
 	ASSERT_EQ(-1, getsockopt(self->fd, AF_VSOCK,
 				 SO_VM_SOCKETS_BUFFER_SIZE,
 				 buf, &optlen));
 	ASSERT_EQ(EINVAL, errno);
+	ASSERT_EQ(sizeof(buf), optlen);
 }
 
 TEST_F(vsock, bad_level)
 {
+	socklen_t optlen;
 	uint64_t val;
-	socklen_t optlen = sizeof(val);
+
+	optlen = sizeof(val);
 
 	ASSERT_EQ(-1, getsockopt(self->fd, SOL_SOCKET + 1,
 				 SO_VM_SOCKETS_BUFFER_SIZE,
 				 &val, &optlen));
 	ASSERT_EQ(ENOPROTOOPT, errno);
+	ASSERT_EQ(sizeof(val), optlen);
 }
 
 TEST_F(vsock, bad_optname)
 {
+	socklen_t optlen;
 	uint64_t val;
-	socklen_t optlen = sizeof(val);
+
+	optlen = sizeof(val);
 
 	ASSERT_EQ(-1, getsockopt(self->fd, AF_VSOCK, 0x7fff,
 				 &val, &optlen));
 	ASSERT_EQ(ENOPROTOOPT, errno);
+	ASSERT_EQ(sizeof(val), optlen);
+}
+
+/* SO_VM_SOCKETS_CONNECT_TIMEOUT_{NEW,OLD} return a sock_timeval-shaped
+ * payload, which is wider than u64 on 64-bit. They exercise the path
+ * where the protocol's reported lv (16 bytes) is larger than the
+ * common 8-byte u64 case covered above.
+ */
+TEST_F(vsock, connect_timeout_new_exact)
+{
+	struct __kernel_sock_timeval tv = {};
+	socklen_t optlen;
+
+	optlen = sizeof(tv);
+
+	ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK,
+				SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW,
+				&tv, &optlen));
+	ASSERT_EQ(sizeof(tv), optlen);
+}
+
+TEST_F(vsock, connect_timeout_new_oversize_clamped)
+{
+	char buf[sizeof(struct __kernel_sock_timeval) * 2] = {};
+	socklen_t optlen;
+
+	optlen = sizeof(buf);
+
+	ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK,
+				SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW,
+				buf, &optlen));
+	ASSERT_EQ(sizeof(struct __kernel_sock_timeval), optlen);
+}
+
+TEST_F(vsock, connect_timeout_new_undersize)
+{
+	socklen_t optlen;
+	uint64_t val;
+
+	optlen = sizeof(val);
+
+	ASSERT_EQ(-1, getsockopt(self->fd, AF_VSOCK,
+				 SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW,
+				 &val, &optlen));
+	ASSERT_EQ(EINVAL, errno);
+	ASSERT_EQ(sizeof(val), optlen);
+}
+
+TEST_F(vsock, connect_timeout_old_exact)
+{
+	struct __kernel_old_timeval tv = {};
+	socklen_t optlen;
+
+	optlen = sizeof(tv);
+
+	ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK,
+				SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD,
+				&tv, &optlen));
+	ASSERT_EQ(sizeof(tv), optlen);
 }
 
 TEST_HARNESS_MAIN

-- 
2.52.0


^ permalink raw reply related

* [PATCH net-next v3 0/4] net: Fix protodown with macvlan
From: Ido Schimmel @ 2026-05-07 10:59 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, pabeni, edumazet, horms, petrm, Ido Schimmel

When protodown is enabled on a macvlan, two bugs cause the macvlan to
incorrectly gain carrier:

1. Toggling the lower device's carrier while protodown is enabled on the
macvlan causes the macvlan to gain carrier, effectively bypassing the
protodown mechanism.

2. Toggling protodown on and then off on the macvlan while the lower
device has no carrier causes the macvlan to gain carrier, since
netif_change_proto_down() unconditionally turns the carrier on.

Patch #1 is a preparation.

Patch #2 solves the first problem by making netif_carrier_on() return
early when protodown is on.

Patch #3 solves the second problem by only calling netif_carrier_on()
when protodown is turned off if there is no linked net device or if the
linked net device has a carrier.

Patch #4 adds a selftest covering both bugs and the basic protodown
functionality.

Targeting at net-next since these are not regressions (i.e., never
worked).

Note that while these changes are in the core, they should only affect
macvlan as protodown is only supported by macvlan and vxlan and only the
former has a linked net device.

v3:
- Keep protodown restricted to carrier state and avoid changing /
  calling netif_stacked_transfer_operstate().
v2: https://lore.kernel.org/netdev/20260505081656.463158-1-idosch@nvidia.com/
- Move protodown handling away from drivers to the core (Jakub).
- Add a new test case for vxlan.
v1: https://lore.kernel.org/netdev/20260429124624.835335-1-idosch@nvidia.com/

Ido Schimmel (4):
  net: Set dev->proto_down before changing carrier state
  net: Do not turn on carrier when protodown is on
  net: Do not unconditionally turn on carrier when turning off protodown
  selftests: net: Add protodown tests

 net/core/dev.c                           |  23 ++-
 net/sched/sch_generic.c                  |   3 +
 tools/testing/selftests/net/Makefile     |   1 +
 tools/testing/selftests/net/protodown.sh | 182 +++++++++++++++++++++++
 4 files changed, 207 insertions(+), 2 deletions(-)
 create mode 100755 tools/testing/selftests/net/protodown.sh

-- 
2.54.0

^ permalink raw reply

* [PATCH net-next v3 1/4] net: Set dev->proto_down before changing carrier state
From: Ido Schimmel @ 2026-05-07 10:59 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, pabeni, edumazet, horms, petrm, Ido Schimmel
In-Reply-To: <20260507105906.891817-1-idosch@nvidia.com>

A subsequent patch will make netif_carrier_on() a NOP for net devices
that have protodown turned on so that they will not accidentally gain
carrier. As a preparation, set dev->proto_down before calling
netif_carrier_{off,on}().

Note that the only driver that supports protodown and has a notion of a
carrier is macvlan and it is calling netif_carrier_{off,on}() with RTNL
held.

No functional changes intended.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 06c195906231..cb6e90058619 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10147,11 +10147,11 @@ int netif_change_proto_down(struct net_device *dev, bool proto_down)
 		return -EOPNOTSUPP;
 	if (!netif_device_present(dev))
 		return -ENODEV;
+	WRITE_ONCE(dev->proto_down, proto_down);
 	if (proto_down)
 		netif_carrier_off(dev);
 	else
 		netif_carrier_on(dev);
-	WRITE_ONCE(dev->proto_down, proto_down);
 	return 0;
 }

-- 
2.54.0

^ permalink raw reply related

* [PATCH net-next v3 2/4] net: Do not turn on carrier when protodown is on
From: Ido Schimmel @ 2026-05-07 10:59 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, pabeni, edumazet, horms, petrm, Ido Schimmel
In-Reply-To: <20260507105906.891817-1-idosch@nvidia.com>

The protodown functionality allows user space to turn off the carrier of
a net device:

 # ip link add name dummy1 up type dummy
 # ip link add name macvlan1 up link dummy1 type macvlan mode bridge
 # ip link set dev macvlan1 protodown on
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  DOWN           0a:5c:a3:05:c7:86 <NO-CARRIER,BROADCAST,MULTICAST,UP>

Different applications can set different protodown reasons, which
prevents an application from turning on the carrier of a net device as
long as others want it down:

 # ip link set dev macvlan1 protodown_reason 1 on
 # ip link set dev macvlan1 protodown_reason 2 on
 # ip link set dev macvlan1 protodown off
 Error: Cannot clear protodown, active reasons.
 # ip link set dev macvlan1 protodown_reason 2 off
 # ip link set dev macvlan1 protodown off
 Error: Cannot clear protodown, active reasons.
 # ip link set dev macvlan1 protodown_reason 1 off
 # ip link set dev macvlan1 protodown off
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  UP             0a:5c:a3:05:c7:86 <BROADCAST,MULTICAST,UP,LOWER_UP>

Unfortunately, this mechanism is not very useful when the carrier of a
net device can be toggled by toggling the carrier of its lower device:

 # ip link set dev macvlan1 protodown on
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  DOWN           0a:5c:a3:05:c7:86 <NO-CARRIER,BROADCAST,MULTICAST,UP>
 # ip link set dev dummy1 carrier off
 # ip link set dev dummy1 carrier on
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  UP             0a:5c:a3:05:c7:86 <BROADCAST,MULTICAST,UP,LOWER_UP>

Obviously, this is not the intended behavior and it is unlikely to be
relied on by anyone. In fact, it is a problem for applications like FRR
that use protodown with macvlan on top of a bridge as part of Virtual
Router Redundancy Protocol (VRRP).

Solve this by preventing a net device configured with protodown on from
gaining carrier by making netif_carrier_on() a NOP when protodown is
turned on.

Output with the patch:

 # ip link add name dummy1 up type dummy
 # ip link add name macvlan1 up link dummy1 type macvlan mode bridge
 # ip link set dev macvlan1 protodown on
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  DOWN           0a:5c:a3:05:c7:86 <NO-CARRIER,BROADCAST,MULTICAST,UP>
 # ip link set dev dummy1 carrier off
 # ip link set dev dummy1 carrier on
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  DOWN           0a:5c:a3:05:c7:86 <NO-CARRIER,BROADCAST,MULTICAST,UP>
 # ip link set dev macvlan1 protodown off
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  UP             0a:5c:a3:05:c7:86 <BROADCAST,MULTICAST,UP,LOWER_UP>

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 net/sched/sch_generic.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a93321db8fd7..05c250c483f0 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -609,6 +609,9 @@ static void netdev_watchdog_down(struct net_device *dev)
  */
 void netif_carrier_on(struct net_device *dev)
 {
+	if (READ_ONCE(dev->proto_down))
+		return;
+
 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
 		if (dev->reg_state == NETREG_UNINITIALIZED)
 			return;
-- 
2.54.0

^ permalink raw reply related

* [PATCH net-next v3 3/4] net: Do not unconditionally turn on carrier when turning off protodown
From: Ido Schimmel @ 2026-05-07 10:59 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, pabeni, edumazet, horms, petrm, Ido Schimmel
In-Reply-To: <20260507105906.891817-1-idosch@nvidia.com>

The protodown functionality allows user space to turn off the carrier of
a net device:

 # ip link add name dummy1 up type dummy
 # ip link add name macvlan1 up link dummy1 type macvlan mode bridge
 # ip link set dev macvlan1 protodown on
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  DOWN           0a:5c:a3:05:c7:86 <NO-CARRIER,BROADCAST,MULTICAST,UP>

When protodown is turned off, the core unconditionally turns on the
carrier of the net device:

 # ip link set dev macvlan1 protodown off
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  UP             0a:5c:a3:05:c7:86 <BROADCAST,MULTICAST,UP,LOWER_UP>

This is wrong as it means that a macvlan can end up with a carrier when
its lower device does not have a carrier:

 # ip link set dev dummy1 carrier off
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  LOWERLAYERDOWN 0a:5c:a3:05:c7:86 <NO-CARRIER,BROADCAST,MULTICAST,UP>
 # ip link set dev macvlan1 protodown on
 # ip link set dev macvlan1 protodown off
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  UP             0a:5c:a3:05:c7:86 <BROADCAST,MULTICAST,UP,LOWER_UP>

Solve this by resolving the linked net device and if one exists, inherit
its carrier state when protodown is turned off. Otherwise, if no linked
net device exists, as before, simply turn on the carrier.

Resolve the linked net device using a new helper and have it return the
device itself (in a similar fashion to dev_get_iflink()) if the device
does not implement both ndo_get_iflink() and get_link_net(). If the
latter is not implemented, it is unclear in which network namespace we
should look up the linked net device. Currently, this helper is only
used for net devices that support protodown (macvlan and vxlan) and for
both it returns the correct result.

Output with the patch:

 # ip link add name dummy1 up type dummy
 # ip link add name macvlan1 up link dummy1 type macvlan mode bridge
 # ip link set dev dummy1 carrier off
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  LOWERLAYERDOWN 0a:5c:a3:05:c7:86 <NO-CARRIER,BROADCAST,MULTICAST,UP>
 # ip link set dev macvlan1 protodown on
 # ip link set dev macvlan1 protodown off
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  LOWERLAYERDOWN 0a:5c:a3:05:c7:86 <NO-CARRIER,BROADCAST,MULTICAST,UP>
 # ip link set dev dummy1 carrier on
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  UP             0a:5c:a3:05:c7:86 <BROADCAST,MULTICAST,UP,LOWER_UP>
 # ip link set dev macvlan1 protodown on
 # ip link set dev macvlan1 protodown off
 $ ip -br link show dev macvlan1
 macvlan1@dummy1  UP             0a:5c:a3:05:c7:86 <BROADCAST,MULTICAST,UP,LOWER_UP>

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 net/core/dev.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index cb6e90058619..0c272f6e9aaa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10141,16 +10141,35 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
 }
 EXPORT_SYMBOL(netdev_port_same_parent_id);
 
+static struct net_device *dev_get_iflink_dev(struct net_device *dev)
+{
+	struct net *net;
+
+	ASSERT_RTNL();
+
+	if (!dev->netdev_ops->ndo_get_iflink || !dev->rtnl_link_ops ||
+	    !dev->rtnl_link_ops->get_link_net)
+		return dev;
+
+	net = dev->rtnl_link_ops->get_link_net(dev);
+	return __dev_get_by_index(net, dev_get_iflink(dev));
+}
+
 int netif_change_proto_down(struct net_device *dev, bool proto_down)
 {
+	struct net_device *iflink_dev;
+
 	if (!dev->change_proto_down)
 		return -EOPNOTSUPP;
 	if (!netif_device_present(dev))
 		return -ENODEV;
+	iflink_dev = dev_get_iflink_dev(dev);
+	if (!iflink_dev)
+		return -ENODEV;
 	WRITE_ONCE(dev->proto_down, proto_down);
 	if (proto_down)
 		netif_carrier_off(dev);
-	else
+	else if (dev == iflink_dev || netif_carrier_ok(iflink_dev))
 		netif_carrier_on(dev);
 	return 0;
 }
-- 
2.54.0


^ permalink raw reply related

* [PATCH net-next v3 4/4] selftests: net: Add protodown tests
From: Ido Schimmel @ 2026-05-07 10:59 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, pabeni, edumazet, horms, petrm, Ido Schimmel
In-Reply-To: <20260507105906.891817-1-idosch@nvidia.com>

Add a selftest for the protodown mechanism.

Five test cases are included:

1. Basic protodown toggling: Verify that setting protodown on macvlan
   results in DOWN operational state and clearing it restores UP.

2. Same as the previous test case, but with vxlan.

3. Protodown reasons: Verify that protodown cannot be cleared while
   there are active protodown reasons, but can be cleared once all
   reasons are removed.

4. Protodown with lower device being toggled: Verify that toggling the
   lower device's carrier while protodown is on does not cause the
   macvlan to gain carrier.

5. Protodown with lower device down: Verify that toggling protodown
   while the lower device has no carrier does not cause the macvlan to
   gain carrier.

Note that the last two test cases fail without "net: Do not turn on
carrier when protodown is on" and "net: Do not unconditionally turn on
carrier when turning off protodown":

 # ./protodown.sh
 TEST: Basic protodown on/off with macvlan                           [ OK ]
 TEST: Basic protodown on/off with vxlan                             [ OK ]
 TEST: Protodown reasons                                             [ OK ]
 TEST: Protodown with lower device toggled                           [FAIL]
         Macvlan operational state is not DOWN despite protodown
 TEST: Protodown with lower device down                              [FAIL]
         Macvlan is not LOWERLAYERDOWN after clearing protodown

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 tools/testing/selftests/net/Makefile     |   1 +
 tools/testing/selftests/net/protodown.sh | 182 +++++++++++++++++++++++
 2 files changed, 183 insertions(+)
 create mode 100755 tools/testing/selftests/net/protodown.sh

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 88c7573a8295..ff1d58625589 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -69,6 +69,7 @@ TEST_PROGS := \
 	nl_netdev.py \
 	nl_nlctrl.py \
 	pmtu.sh \
+	protodown.sh \
 	psock_snd.sh \
 	reuseaddr_ports_exhausted.sh \
 	reuseport_addr_any.sh \
diff --git a/tools/testing/selftests/net/protodown.sh b/tools/testing/selftests/net/protodown.sh
new file mode 100755
index 000000000000..0a7b78c63c37
--- /dev/null
+++ b/tools/testing/selftests/net/protodown.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the protodown mechanism. Verify basic protodown toggling, protodown
+# reasons, operational state when the lower device carrier changes, and correct
+# operational state when the lower device has no carrier.
+
+# shellcheck disable=SC1091,SC2034,SC2154,SC2317
+source lib.sh
+
+require_command jq
+
+ALL_TESTS="
+	protodown_basic_macvlan
+	protodown_basic_vxlan
+	protodown_reasons
+	protodown_lower_toggle
+	protodown_lower_down
+"
+
+operstate_get()
+{
+	local ns=$1; shift
+	local dev=$1; shift
+
+	ip -n "$ns" -j link show dev "$dev" | jq -r '.[].operstate'
+}
+
+operstate_check()
+{
+	local ns=$1; shift
+	local dev=$1; shift
+	local expected=$1; shift
+
+	local current
+	current=$(operstate_get "$ns" "$dev")
+
+	[ "$current" = "$expected" ]
+}
+
+setup_prepare()
+{
+	setup_ns NS
+	defer cleanup_all_ns
+
+	ip -n "$NS" link add name dummy0 up type dummy
+
+	ip -n "$NS" link add name macvlan0 link dummy0 up type macvlan mode bridge
+
+	ip -n "$NS" link add name vxlan0 up type vxlan id 10010 dstport 4789
+}
+
+protodown_basic()
+{
+	local dev=$1; shift
+
+	ip -n "$NS" link set dev "$dev" protodown on
+	check_err $? "Failed to set protodown on"
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" "$dev" DOWN
+	check_err $? "Operational state is not DOWN after setting protodown"
+
+	ip -n "$NS" link set dev "$dev" protodown off
+	check_err $? "Failed to set protodown off"
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" "$dev" UP
+	check_err $? "Operational state is not UP after clearing protodown"
+}
+
+protodown_basic_macvlan()
+{
+	RET=0
+
+	protodown_basic macvlan0
+
+	log_test "Basic protodown on/off with macvlan"
+}
+
+protodown_basic_vxlan()
+{
+	RET=0
+
+	protodown_basic vxlan0
+
+	log_test "Basic protodown on/off with vxlan"
+}
+
+protodown_reasons()
+{
+	RET=0
+
+	ip -n "$NS" link set dev macvlan0 protodown on
+
+	ip -n "$NS" link set dev macvlan0 protodown_reason 0 on
+	check_err $? "Failed to set protodown reason bit 0"
+
+	# Cannot clear protodown while reasons are active.
+	ip -n "$NS" link set dev macvlan0 protodown off 2>/dev/null
+	check_fail $? "Clearing protodown succeeded with active reasons"
+
+	ip -n "$NS" link set dev macvlan0 protodown_reason 0 off
+	check_err $? "Failed to clear protodown reason bit 0"
+
+	# Can clear protodown when no reasons are active.
+	ip -n "$NS" link set dev macvlan0 protodown off
+	check_err $? "Failed to clear protodown with no active reasons"
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 UP
+	check_err $? "Operational state is not UP after clearing protodown"
+
+	log_test "Protodown reasons"
+}
+
+protodown_lower_toggle()
+{
+	RET=0
+
+	ip -n "$NS" link set dev macvlan0 protodown on
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 DOWN
+	check_err $? "Operational state is not DOWN after setting protodown"
+
+	# Toggle carrier on the lower device. The macvlan should stay DOWN
+	# because protodown is on.
+	ip -n "$NS" link set dev dummy0 carrier off
+	ip -n "$NS" link set dev dummy0 carrier on
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" dummy0 UP
+	check_err $? "Lower device is not UP after carrier on"
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 DOWN
+	check_err $? "Macvlan operational state is not DOWN despite protodown"
+
+	# Clear protodown and verify the macvlan comes back up.
+	ip -n "$NS" link set dev macvlan0 protodown off
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 UP
+	check_err $? "Operational state is not UP after clearing protodown"
+
+	log_test "Protodown with lower device toggled"
+}
+
+protodown_lower_down()
+{
+	RET=0
+
+	# Bring the lower device carrier down first.
+	ip -n "$NS" link set dev dummy0 carrier off
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 LOWERLAYERDOWN
+	check_err $? "Macvlan is not LOWERLAYERDOWN with lower carrier off"
+
+	# Toggle protodown on and off while lower has no carrier. The macvlan
+	# should not transition to UP.
+	ip -n "$NS" link set dev macvlan0 protodown on
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 LOWERLAYERDOWN
+	check_err $? "Macvlan is not LOWERLAYERDOWN after setting protodown"
+
+	ip -n "$NS" link set dev macvlan0 protodown off
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 LOWERLAYERDOWN
+	check_err $? "Macvlan is not LOWERLAYERDOWN after clearing protodown"
+
+	# Bring the lower device carrier up. The macvlan should transition to
+	# UP.
+	ip -n "$NS" link set dev dummy0 carrier on
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" dummy0 UP
+	check_err $? "Lower device is not UP after carrier on"
+
+	busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 UP
+	check_err $? "Macvlan is not UP after lower device is UP"
+
+	log_test "Protodown with lower device down"
+}
+
+trap defer_scopes_cleanup EXIT
+setup_prepare
+tests_run
+
+exit "$EXIT_STATUS"
-- 
2.54.0


^ permalink raw reply related

* Re: [RFC net-next 0/4] devlink: Add boot-time defaults
From: Jiri Pirko @ 2026-05-07 11:03 UTC (permalink / raw)
  To: Mark Bloch
  Cc: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
	David S. Miller, Jonathan Corbet, Shuah Khan, Simon Horman,
	Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Andrew Morton,
	Borislav Petkov (AMD), Randy Dunlap, Dave Hansen,
	Christian Brauner, Petr Mladek, Peter Zijlstra (Intel),
	Thomas Gleixner, Pawan Gupta, Dapeng Mi, Kees Cook, Marco Elver,
	Eric Biggers, Li RongQing, Paul E. McKenney, linux-doc,
	linux-kernel, netdev, linux-rdma
In-Reply-To: <3f9215c4-7c84-46d9-ba74-30dabe24db09@nvidia.com>

Wed, May 06, 2026 at 07:35:10PM +0200, mbloch@nvidia.com wrote:
>
>
>On 06/05/2026 18:22, Jiri Pirko wrote:
>> Wed, May 06, 2026 at 02:37:35PM +0200, mbloch@nvidia.com wrote:
>>> This series adds a devlink= kernel command line parameter for applying
>>> selected devlink settings during device initialization.
>>>
>>> Following a discussion with Jakub[1], I am sending this RFC to get the
>>> conversation moving. I started from Jakub's example/request and extended
>>> it to cover requirements from production systems and configurations that
>>> customers use.
>>>
>>> One important caveat is that the parsing logic in this RFC was written
>>> with AI assistance. I am also not sure whether the resulting syntax and
>>> parser are too complex for a kernel command line interface. This is part
>>> of why I am sending it as an RFC: to understand what direction and level
>>> of complexity would be acceptable to people.
>>>
>>> The implementation is intended to support the following properties:
>>>
>>> - A system may have multiple devlink devices that usually need the same
>>>  configuration. For a configuration such as eswitch mode switchdev, a
>>>  user should be able to specify multiple devices to which that
>>>  configuration applies.
>>>
>>> - There may be ordering dependencies between options. For example, in
>>>  mlx5, flow_steering_mode should be set before moving to switchdev.
>>>  With this in mind, defaults are applied per device in the left-to-right
>>>  order in which they appear on the command line.
>>>
>>> The intent is to let deployments set devlink defaults before normal
>>> userspace orchestration runs, while still using devlink concepts and
>> 
>> "defaults before normal userspace orchestrarion". I read it as config
>> before config, which eventually could be skipped.
>> 
>> 
>>> driver callbacks rather than adding driver-specific module parameters.
>>> A default is scoped to one or more devlink handles, for example:
>>>
>>>  devlink=[pci/0000:08:00.0]:esw:mode:switchdev
>>>  devlink=[pci/0000:08:00.0]:param:flow_steering_mode:smfs
>>>  devlink=[pci/0000:08:00.0,pci/0000:08:00.1]:param:flow_steering_mode:hmfs,[pci/0000:08:00.0,pci/0000:08:00.1]:esw:mode:switchdev
>> 
>> I don't like this. What you do, you are basically introducing user
>> configuration tool on kernel cmdline.
>> 
>> The same you would achieve with a proper userspace tool/daemon.
>> I did try to come up with it and push it here:
>> https://github.com/systemd/systemd/pull/37393
>> That didn't get merged for unknown reason, but the idea is sound. You
>> provide configuration files for devlink object and systemd-devlinkd
>> will apply when they appear. Wouldn't this help your case?
>
>I agree that systemd-devlinkd is the right shape for normal
>devlink configuration, and it could probably replace the udev/devlink
>plumbing we use today.
>
>The case I am trying to cover is earlier than that.
>
>On BlueField/ECPF/DPU systems, the host PF driver cannot always finish
>probing independently of the ECPF side. When the ECPF is the eswitch
>manager, the host PF is kept in initializing state until the ECPF eswitch
>side is set up and mlx5 enables the external host PF HCA. That happens as
>part of moving the ECPF to switchdev.
>
>Today userspace observes the ECPF instance and then switches the
>mode through devlink, usually via udev or similar plumbing. That still
>leaves a window where the ECPF has probed, userspace has not applied the
>mode yet, and the host PF is waiting. With many ECPFs this becomes visible
>in host PF probe/boot time. A daemon reacting to the devlink object
>appearing can make the userspace side cleaner, but it still runs after the
>device has appeared and after userspace scheduling/uevent handling.
>
>Long term, for these DPU deployments, we would like mlx5 to initialize
>directly in switchdev. I am hesitant to make that unconditional because it
>changes existing behavior and there is no early opt-out before probe. The
>cmdline parameter was meant as an explicit opt-in middle step: ask the
>driver to apply the same devlink operation during init, before this path
>depends on userspace.
>
>We previously tried to address this with an mlx5 module parameter. By
>design, that was too coarse: it applied to all mlx5 devices handled by the
>module. That makes it usable only for narrow DPU-only configurations. The
>devlink-handle based cmdline syntax was intended to keep the opt-in scoped
>to the specific devices that need this early switchdev transition.

The switchdev mode was introduced at roughly the time CX4 was out. What
stopped us from making it default for CX4+ ?

Introducing this horrible plumbing only bacause we were not able to
change the default sounds so absurd.

Can we write the default mode as a bit in ASIC NV memory perhaps? Simple
devlink cmode permanent param to write it, the driver can read this bit
during init to decide the init flow path?

^ permalink raw reply

* Re: [PATCH net-next v3 1/2] net: openvswitch: make flow_table an rcu pointer
From: Paolo Abeni @ 2026-05-07 11:03 UTC (permalink / raw)
  To: Adrian Moreno, netdev
  Cc: aconole, Eelco Chaudron, Ilya Maximets, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Simon Horman, open list:OPENVSWITCH,
	open list
In-Reply-To: <20260505084253.998548-2-amorenoz@redhat.com>

On 5/5/26 10:42 AM, Adrian Moreno wrote:
> @@ -752,12 +759,16 @@ static struct genl_family dp_packet_genl_family __ro_after_init = {
>  static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
>  			 struct ovs_dp_megaflow_stats *mega_stats)
>  {
> +	struct flow_table *table = ovsl_dereference(dp->table);
>  	int i;
>  
>  	memset(mega_stats, 0, sizeof(*mega_stats));
> +	memset(stats, 0, sizeof(*stats));

Is this an unrelated bugfix, preventing random values from stack
clobbering the stats? Should it land to a separate fix for net?

> @@ -1501,8 +1537,13 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  		rcu_read_unlock();
>  		return -ENODEV;
>  	}
> +	table = rcu_dereference_ovsl(dp->table);

Possibly:
	table = rcu_dereference(dp->table);
would be more accurate

> @@ -518,6 +524,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
>  	call_rcu(&mc->rcu, mask_cache_rcu_cb);
>  	call_rcu(&ma->rcu, mask_array_rcu_cb);

This is always invoked after a rcu grace period, it would be probably
nice to follow-up, not necessarly in this series, moving the above
call_rcu together with the call_rcu(table). Such statements could be
bundled in a new helper.

/P


^ permalink raw reply

* Re: [PATCH net-next v3 1/2] dpll: add fractional frequency offset to pin-parent-device
From: Jiri Pirko @ 2026-05-07 11:08 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Ivan Vecera, netdev, Andrew Lunn, Arkadiusz Kubalewski,
	David S. Miller, Donald Hunter, Eric Dumazet, Jonathan Corbet,
	Leon Romanovsky, Mark Bloch, Michal Schmidt, Paolo Abeni,
	Pasi Vaananen, Petr Oros, Prathosh Satish, Saeed Mahameed,
	Shuah Khan, Simon Horman, Tariq Toukan, Vadim Fedorenko,
	linux-doc, linux-kernel, linux-rdma
In-Reply-To: <20260506183342.767b5fbc@kernel.org>

Thu, May 07, 2026 at 03:33:42AM +0200, kuba@kernel.org wrote:
>On Mon,  4 May 2026 17:53:39 +0200 Ivan Vecera wrote:
>> +          At top level this represents the RX vs TX symbol rate
>> +          offset on the media associated with the pin.
>
>Isn't this a hacky hack? I'd think that pin is in or out.
>Having a freq offset between two pins or pin and parent's
>ref lock makes sense. This new interpretation sounds like
>we are trying to shove a difference between two pins into one?

The pin is in, but it is associated with SyncE port that has RX/TX
symbol rate offset. As the doc says, the "offset on the media associated
with the pin". Why is that hack?


>
>> @@ -299,6 +299,10 @@ zl3073x_dpll_input_pin_ffo_get(const struct dpll_pin *dpll_pin, void *pin_priv,
>>  {
>>  	struct zl3073x_dpll_pin *pin = pin_priv;
>>  
>> +	/* Only rx vs tx symbol rate FFO is supported */
>> +	if (dpll)
>> +		return -ENODATA;
>> +
>>  	*ffo = pin->freq_offset;
>
>It's easy for driver authors to forget this sort of validation.
>We should fail close, so it's better to have some "capability"
>bits or something for the driver to opt into getting given format 
>of the call.

^ permalink raw reply

* RE: [PATCH net-next v2 2/2] net: nfp: Drop PCI class entries with .class_mask = 0
From: Loktionov, Aleksandr @ 2026-05-07 11:11 UTC (permalink / raw)
  To: Uwe Kleine-König (The Capable Hub), Andrew Lunn,
	David S. Miller, Eric Dumazet, Paolo Abeni
  Cc: Petr Machata, Johannes Berg, Arend van Spriel, Marco Crivellari,
	Kees Cook, Jakub Kicinski, Simon Horman, oss-drivers@corigine.com,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Markus Schneider-Pargmann
In-Reply-To: <e853a41d232ca831a93a84545dab322cbe5d49f9.1778149923.git.u.kleine-koenig@baylibre.com>



> -----Original Message-----
> From: Uwe Kleine-König (The Capable Hub) <u.kleine-
> koenig@baylibre.com>
> Sent: Thursday, May 7, 2026 12:50 PM
> To: Andrew Lunn <andrew+netdev@lunn.ch>; David S. Miller
> <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Paolo Abeni
> <pabeni@redhat.com>
> Cc: Loktionov, Aleksandr <aleksandr.loktionov@intel.com>; Petr Machata
> <petrm@nvidia.com>; Johannes Berg <johannes@sipsolutions.net>; Arend
> van Spriel <arend.vanspriel@broadcom.com>; Marco Crivellari
> <marco.crivellari@suse.com>; Kees Cook <kees@kernel.org>; Jakub
> Kicinski <kuba@kernel.org>; Simon Horman <horms@kernel.org>; oss-
> drivers@corigine.com; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; Markus Schneider-Pargmann <msp@baylibre.com>
> Subject: [PATCH net-next v2 2/2] net: nfp: Drop PCI class entries with
> .class_mask = 0
> 
> With .class_mask being zero the value of .class doesn't matter because
> to check if a pci_device_id entry matches a given device the
> expression
> 
> 	(id->class ^ dev->class) & id->class_mask
> 
> is checked for being zero (see pci_match_one_device()). So drop the
> useless assignment for .class and .class_mask to match what (I think)
> all other drivers are doing.
> 
> This also resolves an ambiguity because PCI_VDEVICE_SUB() already
> contains values for .class and .class_mask (which gcc ignores in the
> presence of the named initializer).
> 
> Signed-off-by: Uwe Kleine-König (The Capable Hub) <u.kleine-
> koenig@baylibre.com>
> ---
>  drivers/net/ethernet/netronome/nfp/nfp_main.c    | 16 ---------------
> -
>  .../net/ethernet/netronome/nfp/nfp_netvf_main.c  |  8 --------
>  2 files changed, 24 deletions(-)
> 
> diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c
> b/drivers/net/ethernet/netronome/nfp/nfp_main.c
> index e8e4b84d505a..5a9fb5bad405 100644
> --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
> +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
> @@ -36,50 +36,34 @@ static const struct pci_device_id
> nfp_pci_device_ids[] = {
>  	{
>  		PCI_VDEVICE_SUB(NETRONOME, PCI_DEVICE_ID_NFP3800,
>  				PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID),

...

>  		.driver_data = NFP_DEV_NFP6000_VF,
>  	},
>  	{ } /* Required last entry. */
> --
> 2.47.3


Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>

^ permalink raw reply

* [net-next v3 0/5] Add StarFive jhb100 soc SGMII GMAC support
From: Minda Chen @ 2026-05-07  9:41 UTC (permalink / raw)
  To: Alexandre Torgue, Andrew Lunn, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Maxime Coquelin,
	Emil Renner Berthing, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, netdev
  Cc: linux-kernel, linux-stm32, devicetree, Minda Chen

jhb100 is a Starfive new RISC-V SoC for datacenter BMC (BaseBoard
Managent Controller). Similar with Aspeed 27x0.

The jhb100 minimal system upstream is in progress:
https://patchwork.kernel.org/project/linux-riscv/cover/20260403054945.467700-1-changhuang.liang@starfivetech.com/

jhb100 GMAC still using designware GMAC core like JH7100 and JH7110,
and contains 2 SGMII interfaces, 1 RGMII/RMII interface, 1 RMII
interface. In JH7100/JH7110 dwmac-starfive.c have supported RGMII/RMII
interface. So require to add SGMII support to dwmac-starfive.c for JHB100.

SGMII serdes PHY has been integrated in JHB100 and do not have driver
setting.

In JHB100 EVB board, SGMII connect with motorcomm YT8531s external PHY
and support RJ45 ethernet port.

The patch base in 7.1-rc2

previous patch link:
v2: https://patchwork.kernel.org/project/netdevbpf/cover/20260417024523.107786-1-minda.chen@starfivetech.com/

changes
v3:
patch2: Add jhb100 attach to jh7110 compatibible, remove redundant commit
message. just descript the jhb100 hardware.
patch3: Add oneOf 5 clocks / 6 clocks to claim the clocks and make it
pass yaml test.
patch4: Add review tag and unused tag to phy interface.

v2:
1. patch1 Add the remove reason
2. patch2 rename rx clock to sgmii_rx
3. patch4 confirm sgmii rx clock exist, or will probe error
   sgmii will not call starfive_dwmac_set_mode() 

Minda Chen (5):
  dt-bindings: net: starfive,jh7110-dwmac: Remove jh8100
  dt-bindings: net: starfive,jh7110-dwmac: Add jhb100 support
  dt-bindings: net: starfive,jh7110-dwmac: Add jhb100 sgmii rx clk
  net: stmmac: starfive: Add jhb100 SGMII interface
  net: stmmac: starfive: Add STMMAC_FLAG_SPH_DISABLE flag

 .../bindings/net/starfive,jh7110-dwmac.yaml   | 67 ++++++++++++++-----
 .../ethernet/stmicro/stmmac/dwmac-starfive.c  | 59 ++++++++++++----
 2 files changed, 96 insertions(+), 30 deletions(-)

base-commit: 74fe02ce122a6103f207d29fafc8b3a53de6abaf
-- 
2.17.1

^ permalink raw reply

* [net-next v3 2/5] dt-bindings: net: starfive,jh7110-dwmac: Add jhb100 support
From: Minda Chen @ 2026-05-07  9:41 UTC (permalink / raw)
  To: Alexandre Torgue, Andrew Lunn, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Maxime Coquelin,
	Emil Renner Berthing, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, netdev
  Cc: linux-kernel, linux-stm32, devicetree, Minda Chen
In-Reply-To: <20260507094115.8355-1-minda.chen@starfivetech.com>

The jhb100 GMAC still using Synopsys designware GMAC core.
hardware features are similar with jh7100.
Add jhb100 GMAC compatible and reset, interrupts features.
jhb100 dwmac has only one reset signal and one interrupt
line.

Signed-off-by: Minda Chen <minda.chen@starfivetech.com>
---
 .../bindings/net/starfive,jh7110-dwmac.yaml   | 51 ++++++++++++++-----
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
index 0d1962980f57..06aeaa0f6f00 100644
--- a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
@@ -30,6 +30,10 @@ properties:
       - items:
           - const: starfive,jh7110-dwmac
           - const: snps,dwmac-5.20
+      - items:
+          - const: starfive,jhb100-dwmac
+          - const: starfive,jh7110-dwmac
+          - const: snps,dwmac-5.20
 
   reg:
     maxItems: 1
@@ -107,20 +111,39 @@ allOf:
           contains:
             const: starfive,jh7110-dwmac
     then:
-      properties:
-        interrupts:
-          minItems: 3
-          maxItems: 3
-
-        interrupt-names:
-          minItems: 3
-          maxItems: 3
-
-        resets:
-          minItems: 2
-
-        reset-names:
-          minItems: 2
+      if:
+        properties:
+          compatible:
+            contains:
+              const: starfive,jhb100-dwmac
+      then:
+        properties:
+          interrupts:
+            maxItems: 1
+
+          interrupt-names:
+            const: macirq
+
+          resets:
+            maxItems: 1
+
+          reset-names:
+            const: stmmaceth
+      else:
+        properties:
+          interrupts:
+            minItems: 3
+            maxItems: 3
+
+          interrupt-names:
+            minItems: 3
+            maxItems: 3
+
+          resets:
+            minItems: 2
+
+          reset-names:
+            minItems: 2
 
 unevaluatedProperties: false
 
-- 
2.17.1


^ permalink raw reply related

* [net-next v3 1/5] dt-bindings: net: starfive,jh7110-dwmac: Remove jh8100
From: Minda Chen @ 2026-05-07  9:41 UTC (permalink / raw)
  To: Alexandre Torgue, Andrew Lunn, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Maxime Coquelin,
	Emil Renner Berthing, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, netdev
  Cc: linux-kernel, linux-stm32, devicetree, Minda Chen
In-Reply-To: <20260507094115.8355-1-minda.chen@starfivetech.com>

Remove jh8100 dt-bindings because do not support it now.
StarFive have stopped jh8100 developing and will not release
it outside.

Signed-off-by: Minda Chen <minda.chen@starfivetech.com>
---
 .../bindings/net/starfive,jh7110-dwmac.yaml   | 28 ++++---------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
index 313a15331661..0d1962980f57 100644
--- a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
@@ -30,10 +30,6 @@ properties:
       - items:
           - const: starfive,jh7110-dwmac
           - const: snps,dwmac-5.20
-      - items:
-          - const: starfive,jh8100-dwmac
-          - const: starfive,jh7110-dwmac
-          - const: snps,dwmac-5.20
 
   reg:
     maxItems: 1
@@ -120,25 +116,11 @@ allOf:
           minItems: 3
           maxItems: 3
 
-      if:
-        properties:
-          compatible:
-            contains:
-              const: starfive,jh8100-dwmac
-      then:
-        properties:
-          resets:
-            maxItems: 1
-
-          reset-names:
-            const: stmmaceth
-      else:
-        properties:
-          resets:
-            minItems: 2
-
-          reset-names:
-            minItems: 2
+        resets:
+          minItems: 2
+
+        reset-names:
+          minItems: 2
 
 unevaluatedProperties: false
 
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH net v1] net/mlx5e: Fix PTP TX SQ cleanup on metadata DB failure
From: Tariq Toukan @ 2026-05-07 11:18 UTC (permalink / raw)
  To: Prathamesh Deshpande, Saeed Mahameed, Leon Romanovsky
  Cc: Richard Cochran, Tariq Toukan, Eran Ben Elisha, Jakub Kicinski,
	netdev, linux-kernel
In-Reply-To: <20260504223018.49556-1-prathameshdeshpande7@gmail.com>



On 05/05/2026 1:30, Prathamesh Deshpande wrote:
> mlx5e_ptp_open_txqsq() creates the hardware SQ before allocating the PTP
> traffic metadata database.
> 
> If mlx5e_ptp_alloc_traffic_db() fails, the error path frees the software
> TX queue state but skips destroying the already-created hardware SQ.
> 
> Add a dedicated unwind label that destroys the SQ before freeing the TXQ
> state.
> 
> Fixes: 1880bc4e4a96 ("net/mlx5e: Add TX port timestamp support")
> Signed-off-by: Prathamesh Deshpande <prathameshdeshpande7@gmail.com>
> ---
>   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 4 +++-
>   1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> index 723f66a6bd63..45db2dd7408d 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> @@ -489,12 +489,14 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,
>   
>   	err = mlx5e_ptp_alloc_traffic_db(ptpsq, dev_to_node(mlx5_core_dma_dev(c->mdev)));
>   	if (err)
> -		goto err_free_txqsq;
> +		goto err_destroy_sq;
>   
>   	INIT_WORK(&ptpsq->report_unhealthy_work, mlx5e_ptpsq_unhealthy_work);
>   
>   	return 0;
>   
> +err_destroy_sq:
> +	mlx5e_ptp_destroy_sq(c->mdev, txqsq->sqn);
>   err_free_txqsq:
>   	mlx5e_free_txqsq(txqsq);
>   

Reviewed-by: Tariq Toukan <tariqt@nvidia.com>

Thanks.

^ permalink raw reply

* Re: [PATCH net v1 2/2] net: stmmac: eic7700: fix delay step calculation and ensure safe register initialization
From: Maxime Chevallier @ 2026-05-07 11:21 UTC (permalink / raw)
  To: lizhi2, andrew+netdev, davem, edumazet, kuba, pabeni, robh,
	krzk+dt, conor+dt, netdev, devicetree, linux-kernel,
	mcoquelin.stm32, alexandre.torgue, rmk+kernel, linux-stm32,
	linux-arm-kernel
  Cc: ningyu, linmin, pinkesh.vaghela, pritesh.patel, weishangjuan
In-Reply-To: <20260507083214.192-1-lizhi2@eswincomputing.com>

Hi,

On 07/05/2026 10:32, lizhi2@eswincomputing.com wrote:
> From: Zhi Li <lizhi2@eswincomputing.com>
> 
> Fix several issues in the EIC7700 DWMAC glue driver related to delay
> configuration and register initialization.
> 
> The hardware implements TX/RX delay with a granularity of 20 ps per
> step, but the driver previously assumed a 100 ps step. Update the
> definitions to match the actual hardware behaviour and align with
> the binding constraints.
> 
> Introduce explicit definitions for the maximum programmable delay
> range based on the hardware limits.
> 
> Move HSP CSR configuration into the initialization path after clocks
> are enabled. This ensures that all register accesses occur with the
> required clocks active, avoiding undefined behaviour.
> 
> Clear the TXD and RXD delay control registers during initialization
> to override any residual configuration left by the bootloader. This
> ensures deterministic RGMII timing and prevents unintended delay
> being applied.
> 
> The MAC RGMII delay programming is only required for 100Mbps and
> 1000Mbps modes, where precise clock-to-data alignment is necessary for
> reliable sampling.
> 
> For 10Mbps operation, timing margins are sufficiently relaxed and no
> additional delay compensation is required. In this case, the driver
> falls back to a safe default configuration with delay disabled.
> 
> For unsupported or unexpected link speeds, the driver avoids
> programming invalid delay values and falls back to a safe default
> state by explicitly clearing the delay configuration.
> 
> Explicitly programming zero ensures that no residual delay settings
> from previous configurations or bootloader state remain active.
> 
> These changes fix incorrect delay programming and initialization
> ordering for existing users.
> 
> This also aligns the driver implementation with the updated device
> tree binding.

There's a lot going on in this patch, can you split this into patches
that solves each of these individual issues ?

It's a mix of fixes (the reg access moved after clk config for example)
and non-fixes (the RGMII timings, you're improving the granularity of
the delays, is this required to fix existing setups, or is it a generic
improvement ?), splitting this would make it both easier to review, and
easier to bisect should problems arise in the future.

Thanks,

Maxime



^ permalink raw reply

* [PATCH net 0/6] pull-request: can 2026-05-07
From: Marc Kleine-Budde @ 2026-05-07  8:22 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, linux-can, kernel

Hello netdev-team,

this is a pull request of 6 patches for net/main.

The first patch is by Oliver Hartkopp and adds missing locking to the
raw_setsockopt() function in the CAN raw protocol.

A patch by Lee Jones fixes a hrtimer Use-After-Free in the CAN
broadcast manager protocol.

Alexander Hölzl's patch fixes the RX timeout for CTS holds messages in
the CAN J1939 protocol.

Shuhao Fu contributes a patch that adds missing locking to the
local-destination check in the CAN J1939 protocol.

Stéphane Grosjean's patch updates his email address.

The last patch is by Ciprian Marian Costea and consistently
disables/enables the all IRQ lines while writing the IRQ enable
registers.

regards,
Marc

---

The following changes since commit b266bacba796ff5c4dcd2ae2fc08aacf7ab39153:

  net: ethernet: cortina: Drop half-assembled SKB (2026-05-06 18:43:41 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git tags/linux-can-fixes-for-7.1-20260507

for you to fetch changes up to f22a636357d5ff4fd2bb10d8079d851ccb320438:

  can: flexcan: disable all IRQ lines in flexcan_chip_interrupts_enable() (2026-05-07 10:05:55 +0200)

----------------------------------------------------------------
linux-can-fixes-for-7.1-20260507

----------------------------------------------------------------
Alexander Hölzl (1):
      can: j1939: fix wrong RX timeout for CTS hold messages

Ciprian Marian Costea (1):
      can: flexcan: disable all IRQ lines in flexcan_chip_interrupts_enable()

Lee Jones (1):
      can: bcm: prevent thrtimer UAF in rx path by checking RX_NO_AUTOTIMER

Oliver Hartkopp (1):
      can: raw: add locking for raw flags bitfield

Shuhao Fu (1):
      can: j1939: fix lockless local-destination check

Stéphane Grosjean (1):
      can: peak: Modification of references to email accounts being deleted

 .mailmap                                      |  4 +-
 drivers/net/can/flexcan/flexcan-core.c        | 14 ++++++
 drivers/net/can/peak_canfd/peak_canfd.c       |  2 +-
 drivers/net/can/peak_canfd/peak_canfd_user.h  |  2 +-
 drivers/net/can/peak_canfd/peak_pciefd_main.c |  4 +-
 drivers/net/can/sja1000/peak_pci.c            |  4 +-
 drivers/net/can/sja1000/peak_pcmcia.c         |  4 +-
 drivers/net/can/usb/peak_usb/pcan_usb.c       |  2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_core.c  |  4 +-
 drivers/net/can/usb/peak_usb/pcan_usb_core.h  |  2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c    |  2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c   |  2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_pro.h   |  2 +-
 include/linux/can/dev/peak_canfd.h            |  2 +-
 net/can/bcm.c                                 |  6 +++
 net/can/j1939/transport.c                     | 20 ++++++--
 net/can/raw.c                                 | 66 ++++++++++++---------------
 17 files changed, 83 insertions(+), 59 deletions(-)

^ permalink raw reply

* [PATCH net 6/6] can: flexcan: disable all IRQ lines in flexcan_chip_interrupts_enable()
From: Marc Kleine-Budde @ 2026-05-07  8:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, linux-can, kernel, Ciprian Marian Costea,
	Marc Kleine-Budde
In-Reply-To: <20260507112321.439968-1-mkl@pengutronix.de>

From: Ciprian Marian Costea <ciprianmarian.costea@oss.nxp.com>

flexcan_chip_interrupts_enable() disables only the primary IRQ line while
writing to the IMASK and CTRL registers.

On multi-IRQ platforms (S32G2, MCF5441X), the additional IRQ lines (boff,
err, secondary-mb) remain active so their handlers can fire while
registers are inconsistent.

Disable all registered IRQ lines around the IMASK/CTRL writes. This
also fixes the resume path, which calls this function.

Signed-off-by: Ciprian Marian Costea <ciprianmarian.costea@oss.nxp.com>
Link: https://patch.msgid.link/20260326135825.3428856-3-ciprianmarian.costea@oss.nxp.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan/flexcan-core.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c
index f5d22c61503f..b3bb9acc5a2c 100644
--- a/drivers/net/can/flexcan/flexcan-core.c
+++ b/drivers/net/can/flexcan/flexcan-core.c
@@ -1430,14 +1430,28 @@ static void flexcan_chip_interrupts_enable(const struct net_device *dev)
 {
 	const struct flexcan_priv *priv = netdev_priv(dev);
 	struct flexcan_regs __iomem *regs = priv->regs;
+	u32 quirks = priv->devtype_data.quirks;
 	u64 reg_imask;
 
 	disable_irq(dev->irq);
+	if (quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+		disable_irq(priv->irq_boff);
+		disable_irq(priv->irq_err);
+	}
+	if (quirks & FLEXCAN_QUIRK_SECONDARY_MB_IRQ)
+		disable_irq(priv->irq_secondary_mb);
+
 	priv->write(priv->reg_ctrl_default, &regs->ctrl);
 	reg_imask = priv->rx_mask | priv->tx_mask;
 	priv->write(upper_32_bits(reg_imask), &regs->imask2);
 	priv->write(lower_32_bits(reg_imask), &regs->imask1);
 	enable_irq(dev->irq);
+	if (quirks & FLEXCAN_QUIRK_SECONDARY_MB_IRQ)
+		enable_irq(priv->irq_secondary_mb);
+	if (quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+		enable_irq(priv->irq_boff);
+		enable_irq(priv->irq_err);
+	}
 }
 
 static void flexcan_chip_interrupts_disable(const struct net_device *dev)
-- 
2.53.0


^ permalink raw reply related

* [PATCH net 3/6] can: j1939: fix wrong RX timeout for CTS hold messages
From: Marc Kleine-Budde @ 2026-05-07  8:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, linux-can, kernel, Alexander Hölzl,
	Oleksij Rempel, Marc Kleine-Budde
In-Reply-To: <20260507112321.439968-1-mkl@pengutronix.de>

From: Alexander Hölzl <alexander.hoelzl@gmx.net>

In J1939 segmented transport, a CTS message with data byte 2 set to zero is
interpreted as a hold message. This instructs the transmitter of the
segmented message to hold the connection open but to delay sending.

According to the J1939-21 standard, section 5.10.2.4 the timeout T4 after
which an held open session is invalidated is 1050 ms, not 550 as
implemented currently. The 550 ms are problematic if a device uses hold
messages and assumes it can wait for more than 550 ms before it has to
resend the hold message.

Fix the RX timeout by changing the T4 timeout from 550 ms to 1050.

Signed-off-by: Alexander Hölzl <alexander.hoelzl@gmx.net>
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20260421153152.87772-3-alexander.hoelzl@gmx.net
[mkl: rewrap long lines in patch description, use imperative mood in last section]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index df93d57907da..7ad56b5f17b9 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1479,7 +1479,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
 		}
 	} else {
 		/* CTS(0) */
-		j1939_tp_set_rxtimeout(session, 550);
+		j1939_tp_set_rxtimeout(session, 1050);
 	}
 	return;

-- 
2.53.0

^ permalink raw reply related

* [PATCH net 5/6] can: peak: Modification of references to email accounts being deleted
From: Marc Kleine-Budde @ 2026-05-07  8:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, linux-can, kernel, Stéphane Grosjean,
	Marc Kleine-Budde
In-Reply-To: <20260507112321.439968-1-mkl@pengutronix.de>

From: Stéphane Grosjean <s.grosjean@peak-system.fr>

Following the sale of PEAK-System France by HMS-Networks, this update is
intended to change all my @hms-networks.com email addresses to my new
@peak-system.fr address.

Signed-off-by: Stéphane Grosjean <s.grosjean@peak-system.fr>
Link: https://patch.msgid.link/20260410124251.40506-1-stephane.grosjean@free.fr
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 .mailmap                                      | 4 ++--
 drivers/net/can/peak_canfd/peak_canfd.c       | 2 +-
 drivers/net/can/peak_canfd/peak_canfd_user.h  | 2 +-
 drivers/net/can/peak_canfd/peak_pciefd_main.c | 4 ++--
 drivers/net/can/sja1000/peak_pci.c            | 4 ++--
 drivers/net/can/sja1000/peak_pcmcia.c         | 4 ++--
 drivers/net/can/usb/peak_usb/pcan_usb.c       | 2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_core.c  | 4 ++--
 drivers/net/can/usb/peak_usb/pcan_usb_core.h  | 2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c    | 2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c   | 2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_pro.h   | 2 +-
 include/linux/can/dev/peak_canfd.h            | 2 +-
 13 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/.mailmap b/.mailmap
index 34acd34bbf9b..9af8e6979164 100644
--- a/.mailmap
+++ b/.mailmap
@@ -803,8 +803,8 @@ Sriram Yagnaraman <sriram.yagnaraman@ericsson.com> <sriram.yagnaraman@est.tech>
 Stanislav Fomichev <sdf@fomichev.me> <sdf@google.com>
 Stanislav Fomichev <sdf@fomichev.me> <stfomichev@gmail.com>
 Stefan Wahren <wahrenst@gmx.net> <stefan.wahren@i2se.com>
-Stéphane Grosjean <stephane.grosjean@hms-networks.com> <s.grosjean@peak-system.com>
-Stéphane Grosjean <stephane.grosjean@hms-networks.com> <stephane.grosjean@free.fr>
+Stéphane Grosjean <s.grosjean@peak-system.fr> <s.grosjean@peak-system.com>
+Stéphane Grosjean <s.grosjean@peak-system.fr> <stephane.grosjean@free.fr>
 Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
 Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
 Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c
index 06cb2629f66a..4fd1aefb780f 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c
@@ -2,7 +2,7 @@
 /* Copyright (C) 2007, 2011 Wolfgang Grandegger <wg@grandegger.com>
  *
  * Copyright (C) 2016-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 
 #include <linux/can.h>
diff --git a/drivers/net/can/peak_canfd/peak_canfd_user.h b/drivers/net/can/peak_canfd/peak_canfd_user.h
index 60c6542028cf..dc0ecb566a85 100644
--- a/drivers/net/can/peak_canfd/peak_canfd_user.h
+++ b/drivers/net/can/peak_canfd/peak_canfd_user.h
@@ -2,7 +2,7 @@
 /* CAN driver for PEAK System micro-CAN based adapters
  *
  * Copyright (C) 2003-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 #ifndef PEAK_CANFD_USER_H
 #define PEAK_CANFD_USER_H
diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c
index 93558e33bc02..7c749301ea84 100644
--- a/drivers/net/can/peak_canfd/peak_pciefd_main.c
+++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c
@@ -4,7 +4,7 @@
  * Derived from the PCAN project file driver/src/pcan_pci.c:
  *
  * Copyright (C) 2001-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 
 #include <linux/kernel.h>
@@ -19,7 +19,7 @@
 
 #include "peak_canfd_user.h"
 
-MODULE_AUTHOR("Stéphane Grosjean <stephane.grosjean@hms-networks.com>");
+MODULE_AUTHOR("Stéphane Grosjean <s.grosjean@peak-system.fr>");
 MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe/M.2 FD family cards");
 MODULE_LICENSE("GPL v2");
 
diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c
index 4cc4a1581dd1..69c61ccf621d 100644
--- a/drivers/net/can/sja1000/peak_pci.c
+++ b/drivers/net/can/sja1000/peak_pci.c
@@ -5,7 +5,7 @@
  * Derived from the PCAN project file driver/src/pcan_pci.c:
  *
  * Copyright (C) 2001-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 
 #include <linux/kernel.h>
@@ -22,7 +22,7 @@
 
 #include "sja1000.h"
 
-MODULE_AUTHOR("Stéphane Grosjean <stephane.grosjean@hms-networks.com>");
+MODULE_AUTHOR("Stéphane Grosjean <s.grosjean@peak-system.fr>");
 MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCI family cards");
 MODULE_LICENSE("GPL v2");
 
diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c
index 42a77d435b39..c3c2aa21da47 100644
--- a/drivers/net/can/sja1000/peak_pcmcia.c
+++ b/drivers/net/can/sja1000/peak_pcmcia.c
@@ -4,7 +4,7 @@
  * Derived from the PCAN project file driver/src/pcan_pccard.c
  *
  * Copyright (C) 2006-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -19,7 +19,7 @@
 #include <linux/can/dev.h>
 #include "sja1000.h"
 
-MODULE_AUTHOR("Stéphane Grosjean <stephane.grosjean@hms-networks.com>");
+MODULE_AUTHOR("Stéphane Grosjean <s.grosjean@peak-system.fr>");
 MODULE_DESCRIPTION("CAN driver for PEAK-System PCAN-PC Cards");
 MODULE_LICENSE("GPL v2");
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 9278a1522aae..8fd058c32856 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -4,7 +4,7 @@
  * Derived from the PCAN project file driver/src/pcan_usb.c
  *
  * Copyright (C) 2003-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  *
  * Many thanks to Klaus Hitschler <klaus.hitschler@gmx.de>
  */
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index cf48bb26d46d..c7933d1acc99 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -4,7 +4,7 @@
  * Derived from the PCAN project file driver/src/pcan_usb_core.c
  *
  * Copyright (C) 2003-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  *
  * Many thanks to Klaus Hitschler <klaus.hitschler@gmx.de>
  */
@@ -24,7 +24,7 @@
 
 #include "pcan_usb_core.h"
 
-MODULE_AUTHOR("Stéphane Grosjean <stephane.grosjean@hms-networks.com>");
+MODULE_AUTHOR("Stéphane Grosjean <s.grosjean@peak-system.fr>");
 MODULE_DESCRIPTION("CAN driver for PEAK-System USB adapters");
 MODULE_LICENSE("GPL v2");
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index d1c1897d47b9..65999f04f4b7 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -4,7 +4,7 @@
  * Derived from the PCAN project file driver/src/pcan_usb_core.c
  *
  * Copyright (C) 2003-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  *
  * Many thanks to Klaus Hitschler <klaus.hitschler@gmx.de>
  */
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index eb4f5884ad73..ef9fd693e9bd 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -3,7 +3,7 @@
  * CAN driver for PEAK System PCAN-USB FD / PCAN-USB Pro FD adapter
  *
  * Copyright (C) 2013-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 #include <linux/ethtool.h>
 #include <linux/module.h>
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index 4bfa8d0fbb32..aefcded8e12a 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -4,7 +4,7 @@
  * Derived from the PCAN project file driver/src/pcan_usbpro.c
  *
  * Copyright (C) 2003-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 #include <linux/ethtool.h>
 #include <linux/module.h>
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.h b/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
index 162c7546d3a8..d669c9e610c7 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
@@ -4,7 +4,7 @@
  * Derived from the PCAN project file driver/src/pcan_usbpro_fw.h
  *
  * Copyright (C) 2003-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 #ifndef PCAN_USB_PRO_H
 #define PCAN_USB_PRO_H
diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h
index d3788a3d0942..056e0efa649f 100644
--- a/include/linux/can/dev/peak_canfd.h
+++ b/include/linux/can/dev/peak_canfd.h
@@ -3,7 +3,7 @@
  * CAN driver for PEAK System micro-CAN based adapters
  *
  * Copyright (C) 2003-2025 PEAK System-Technik GmbH
- * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
+ * Author: Stéphane Grosjean <s.grosjean@peak-system.fr>
  */
 #ifndef PUCAN_H
 #define PUCAN_H
-- 
2.53.0


^ permalink raw reply related

* [PATCH net 2/6] can: bcm: prevent thrtimer UAF in rx path by checking RX_NO_AUTOTIMER
From: Marc Kleine-Budde @ 2026-05-07  8:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, linux-can, kernel, Lee Jones, Oliver Hartkopp,
	Marc Kleine-Budde
In-Reply-To: <20260507112321.439968-1-mkl@pengutronix.de>

From: Lee Jones <lee@kernel.org>

Commit f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly
synchronize_rcu()") removed the synchronize_rcu() call from
bcm_delete_rx_op() and introduced the RX_NO_AUTOTIMER flag to prevent
timers from being rearmed during deletion.  However, it only applied
this check to op->timer via bcm_rx_starttimer().

It missed the fact that op->thrtimer can also be rearmed by an
in-flight bcm_rx_handler() (which runs as an RCU reader) via
bcm_rx_update_and_send().  This allows op->thrtimer to be queued after
bcm_remove_op() has already cancelled it, leading to a use-after-free
when the timer fires on the deferred-freed struct bcm_op.

Address the omission by checking the RX_NO_AUTOTIMER flag
in bcm_rx_update_and_send() before starting op->thrtimer, effectively
preventing it from being rearmed concurrently with teardown.

Signed-off-by: Lee Jones <lee@kernel.org>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Fixes: f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly synchronize_rcu()")
Link: https://patch.msgid.link/20260422102239.948594-1-lee@kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/bcm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index a4bef2c48a55..67e5b3149a8f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -539,6 +539,12 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
 	if (hrtimer_active(&op->thrtimer))
 		return;

+	/* bcm_remove_op() may have cancelled thrtimer concurrently with this
+	 * RCU-protected handler; do not rearm it. Mirrors bcm_rx_starttimer().
+	 */
+	if (op->flags & RX_NO_AUTOTIMER)
+		return;
+
 	/* first reception with enabled throttling mode */
 	if (!op->kt_lastmsg)
 		goto rx_changed_settime;
-- 
2.53.0

^ permalink raw reply related

* [PATCH net 4/6] can: j1939: fix lockless local-destination check
From: Marc Kleine-Budde @ 2026-05-07  8:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, linux-can, kernel, Shuhao Fu, Oleksij Rempel,
	Marc Kleine-Budde
In-Reply-To: <20260507112321.439968-1-mkl@pengutronix.de>

From: Shuhao Fu <sfual@cse.ust.hk>

j1939_priv.ents[].nusers is documented as protected by priv->lock, and
its updates already happen under that lock. j1939_can_recv() also reads
it under read_lock_bh(). However, j1939_session_skb_queue() and
j1939_tp_send() still read priv->ents[da].nusers without taking the
lock.

Those transport-side checks decide whether to set J1939_ECU_LOCAL_DST, so
they can race with j1939_local_ecu_get() and j1939_local_ecu_put() while
userspace is binding or releasing sockets concurrently with TP traffic.
This can misclassify TP/ETP sessions as local or remote and take the wrong
transport path.

Fix both transport paths by routing the destination-locality check through
a helper that reads ents[].nusers under read_lock_bh(&priv->lock).

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Signed-off-by: Shuhao Fu <sfual@cse.ust.hk>
Tested-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20260419140614.GA4041240@chcpu16
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 7ad56b5f17b9..25d96976d9d0 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -351,6 +351,18 @@ static void j1939_session_skb_drop_old(struct j1939_session *session)
 	}
 }
 
+static bool j1939_address_is_local(struct j1939_priv *priv, u8 addr)
+{
+	bool local = false;
+
+	read_lock_bh(&priv->lock);
+	if (j1939_address_is_unicast(addr) && priv->ents[addr].nusers)
+		local = true;
+	read_unlock_bh(&priv->lock);
+
+	return local;
+}
+
 void j1939_session_skb_queue(struct j1939_session *session,
 			     struct sk_buff *skb)
 {
@@ -359,8 +371,7 @@ void j1939_session_skb_queue(struct j1939_session *session,
 
 	j1939_ac_fixup(priv, skb);
 
-	if (j1939_address_is_unicast(skcb->addr.da) &&
-	    priv->ents[skcb->addr.da].nusers)
+	if (j1939_address_is_local(priv, skcb->addr.da))
 		skcb->flags |= J1939_ECU_LOCAL_DST;
 
 	skcb->flags |= J1939_ECU_LOCAL_SRC;
@@ -2038,8 +2049,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
 		return ERR_PTR(ret);
 
 	/* fix DST flags, it may be used there soon */
-	if (j1939_address_is_unicast(skcb->addr.da) &&
-	    priv->ents[skcb->addr.da].nusers)
+	if (j1939_address_is_local(priv, skcb->addr.da))
 		skcb->flags |= J1939_ECU_LOCAL_DST;
 
 	/* src is always local, I'm sending ... */
-- 
2.53.0


^ permalink raw reply related

* [PATCH net 1/6] can: raw: add locking for raw flags bitfield
From: Marc Kleine-Budde @ 2026-05-07  8:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, linux-can, kernel, Oliver Hartkopp, Eulgyu Kim,
	Vincent Mailhol, Marc Kleine-Budde
In-Reply-To: <20260507112321.439968-1-mkl@pengutronix.de>

From: Oliver Hartkopp <socketcan@hartkopp.net>

With commit 890e5198a6e5 ("can: raw: use bitfields to store flags in
struct raw_sock") the formerly separate integer values have been integrated
into a single bitfield. This led to a read-modify-write operation when
changing a flag in raw_setsockopt() which now needs a locking to prevent
concurrent access.

Instead of adding a lock/unlock hell in each of the flag manipulations this
patch introduces a wrapper for a new raw_setsockopt_locked() function
analogue to the isotp_setsockopt[_locked]() approach in net/can/isotp.c

Fixes: 890e5198a6e5 ("can: raw: use bitfields to store flags in struct raw_sock")
Reported-by: Eulgyu Kim <eulgyukim@snu.ac.kr>
Closes: https://lore.kernel.org/linux-can/20260503112200.22727-1-eulgyukim@snu.ac.kr/
Tested-by: Eulgyu Kim <eulgyukim@snu.ac.kr>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Reviewed-by: Vincent Mailhol <mailhol@kernel.org>
Tested-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20260504111928.41856-1-socketcan@hartkopp.net
[mkl: use Closes tag instead of Link]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/raw.c | 66 +++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 36 deletions(-)

diff --git a/net/can/raw.c b/net/can/raw.c
index a26942e78e68..82d9c0499c95 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -562,8 +562,8 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
 	return RAW_MIN_NAMELEN;
 }
 
-static int raw_setsockopt(struct socket *sock, int level, int optname,
-			  sockptr_t optval, unsigned int optlen)
+static int raw_setsockopt_locked(struct socket *sock, int optname,
+				 sockptr_t optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct raw_sock *ro = raw_sk(sk);
@@ -575,9 +575,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 	int flag;
 	int err = 0;
 
-	if (level != SOL_CAN_RAW)
-		return -EINVAL;
-
 	switch (optname) {
 	case CAN_RAW_FILTER:
 		if (optlen % sizeof(struct can_filter) != 0)
@@ -598,17 +595,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 				return -EFAULT;
 		}
 
-		rtnl_lock();
-		lock_sock(sk);
-
 		dev = ro->dev;
-		if (ro->bound && dev) {
-			if (dev->reg_state != NETREG_REGISTERED) {
-				if (count > 1)
-					kfree(filter);
-				err = -ENODEV;
-				goto out_fil;
-			}
+		if (ro->bound && dev && dev->reg_state != NETREG_REGISTERED) {
+			if (count > 1)
+				kfree(filter);
+			return -ENODEV;
 		}
 
 		if (ro->bound) {
@@ -622,7 +613,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 			if (err) {
 				if (count > 1)
 					kfree(filter);
-				goto out_fil;
+				return err;
 			}
 
 			/* remove old filter registrations */
@@ -642,11 +633,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 		}
 		ro->filter = filter;
 		ro->count  = count;
-
- out_fil:
-		release_sock(sk);
-		rtnl_unlock();
-
 		break;
 
 	case CAN_RAW_ERR_FILTER:
@@ -658,16 +644,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 
 		err_mask &= CAN_ERR_MASK;
 
-		rtnl_lock();
-		lock_sock(sk);
-
 		dev = ro->dev;
-		if (ro->bound && dev) {
-			if (dev->reg_state != NETREG_REGISTERED) {
-				err = -ENODEV;
-				goto out_err;
-			}
-		}
+		if (ro->bound && dev && dev->reg_state != NETREG_REGISTERED)
+			return -ENODEV;
 
 		/* remove current error mask */
 		if (ro->bound) {
@@ -676,7 +655,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 						   err_mask);
 
 			if (err)
-				goto out_err;
+				return err;
 
 			/* remove old err_mask registration */
 			raw_disable_errfilter(sock_net(sk), dev, sk,
@@ -685,11 +664,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 
 		/* link new err_mask to the socket */
 		ro->err_mask = err_mask;
-
- out_err:
-		release_sock(sk);
-		rtnl_unlock();
-
 		break;
 
 	case CAN_RAW_LOOPBACK:
@@ -769,6 +743,26 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 	return err;
 }
 
+static int raw_setsockopt(struct socket *sock, int level, int optname,
+			  sockptr_t optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	if (level != SOL_CAN_RAW)
+		return -EINVAL;
+
+	rtnl_lock();
+	lock_sock(sk);
+
+	err = raw_setsockopt_locked(sock, optname, optval, optlen);
+
+	release_sock(sk);
+	rtnl_unlock();
+
+	return err;
+}
+
 static int raw_getsockopt(struct socket *sock, int level, int optname,
 			  sockopt_t *opt)
 {

base-commit: b266bacba796ff5c4dcd2ae2fc08aacf7ab39153
-- 
2.53.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox