Netdev List
 help / color / mirror / Atom feed
* [RFC net-next 07/17] tls: replace tcp_inq with socket peek_len
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

TLS (device, strparser, and software) calls tcp_inq() directly to
determine how much data is still pending in the socket receive queue.
This breaks when the underlying socket is not TCP (e.g., MPTCP).

Switch all occurrences of tcp_inq(sk) to sk->sk_socket->ops->
peek_len(sk->sk_socket). This operation is implemented for both TCP
and MPTCP (after the previous commits), making TLS transparently
usable over MPTCP connections.

The change is straightforward: every place where TLS needed the
available in-queue bytes now uses the protocol-specific peek_len
method instead of assuming a TCP socket.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/tls/tls_device.c | 4 ++--
 net/tls/tls_strp.c   | 6 ++++--
 net/tls/tls_sw.c     | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 741aef09bfd3..c44a59d9d715 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -805,7 +805,7 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
 		/* head of next rec is already in, note that the sock_inq will
 		 * include the currently parsed message when called from parser
 		 */
-		sock_data = tcp_inq(sk);
+		sock_data = sk->sk_socket->ops->peek_len(sk->sk_socket);
 		if (sock_data > rcd_len) {
 			trace_tls_device_rx_resync_nh_delay(sk, sock_data,
 							    rcd_len);
@@ -864,7 +864,7 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
 	rxm = strp_msg(skb);
 
 	/* head of next rec is already in, parser will sync for us */
-	if (tcp_inq(sk) > rxm->full_len) {
+	if (sk->sk_socket->ops->peek_len(sk->sk_socket) > rxm->full_len) {
 		trace_tls_device_rx_resync_nh_schedule(sk);
 		ctx->resync_nh_do_now = 1;
 	} else {
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 61b10c697ecc..82a5b64b5f48 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -484,12 +484,14 @@ bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh)
 {
 	struct strp_msg *rxm;
 	struct tls_msg *tlm;
+	int inq;
 
 	DEBUG_NET_WARN_ON_ONCE(!strp->msg_ready);
 	DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len);
 
 	if (!strp->copy_mode && force_refresh) {
-		if (unlikely(tcp_inq(strp->sk) < strp->stm.full_len)) {
+		inq = strp->sk->sk_socket->ops->peek_len(strp->sk->sk_socket);
+		if (unlikely(inq < strp->stm.full_len)) {
 			WRITE_ONCE(strp->msg_ready, 0);
 			strp->msg_announced = 0;
 			memset(&strp->stm, 0, sizeof(strp->stm));
@@ -513,7 +515,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
 {
 	int sz, inq;
 
-	inq = tcp_inq(strp->sk);
+	inq = strp->sk->sk_socket->ops->peek_len(strp->sk->sk_socket);
 	if (inq < 1)
 		return 0;
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9324e4ed20a3..35fb0c3c965a 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1706,12 +1706,14 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
 		       size_t *flushed_at)
 {
 	size_t max_rec;
+	int inq;
 
 	if (len_left <= decrypted)
 		return false;
 
+	inq = sk->sk_socket->ops->peek_len(sk->sk_socket);
 	max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE;
-	if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec)
+	if (done - *flushed_at < SZ_128K && inq > max_rec)
 		return false;
 
 	*flushed_at = done;
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 06/17] mptcp: implement peek_len for proto_ops
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

The TLS stack uses tcp_inq() to query the amount of data available
in the receive queue without consuming it. For MPTCP sockets, this
information is not directly available from a TCP subflow; it must be
computed from the MPTCP receive queue and the current mapping.

Introduce mptcp_peek_len() which returns the number of bytes that
can be peeked from the MPTCP socket. It reuses the existing
mptcp_inq() helper (used by ioctl SIOCINQ). The implementation
considers the first skb in the receive queue, the current ack_seq,
and handles the FIN case.

Assign .peek_len in both mptcp_stream_ops and mptcp_v6_stream_ops
so that upper layers (e.g., TLS) can obtain the correct in-queue
byte count for an MPTCP connection.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/mptcp/protocol.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7f0c560f6b7e..18c8b6c64c3f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -4689,6 +4689,38 @@ static ssize_t mptcp_splice_read(struct socket *sock, loff_t *ppos,
 	return ret;
 }
 
+static int mptcp_inq(struct sock *sk)
+{
+	const struct mptcp_sock *msk = mptcp_sk(sk);
+	const struct sk_buff *skb;
+
+	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+		return 0;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	if (skb) {
+		u64 answ = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
+
+		if (answ >= INT_MAX)
+			answ = INT_MAX;
+
+		/* Subtract 1, if FIN was received */
+		if (answ &&
+		    (sk->sk_state == TCP_CLOSE ||
+		     (sk->sk_shutdown & RCV_SHUTDOWN)))
+			answ--;
+
+		return (int)answ;
+	}
+
+	return 0;
+}
+
+static int mptcp_peek_len(struct socket *sock)
+{
+	return mptcp_inq(sock->sk);
+}
+
 static const struct proto_ops mptcp_stream_ops = {
 	.family		   = PF_INET,
 	.owner		   = THIS_MODULE,
@@ -4712,6 +4744,7 @@ static const struct proto_ops mptcp_stream_ops = {
 	.read_sock	   = mptcp_read_sock,
 	.splice_read	   = mptcp_splice_read,
 	.sendmsg_locked	   = mptcp_sendmsg_locked,
+	.peek_len	   = mptcp_peek_len,
 };
 
 static struct inet_protosw mptcp_protosw = {
@@ -4825,6 +4858,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
 	.read_sock	   = mptcp_read_sock,
 	.splice_read	   = mptcp_splice_read,
 	.sendmsg_locked	   = mptcp_sendmsg_locked,
+	.peek_len	   = mptcp_peek_len,
 };
 
 static struct proto mptcp_v6_prot;
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 05/17] tls: use sendmsg_locked from the underlying socket
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

TLS offload (device and sw) may call tcp_sendmsg_locked() directly
when pushing TLS records. This assumes the underlying socket is always
a TCP socket. With MPTCP, the socket can be an MPTCP socket, which
does not directly expose a sendmsg_locked method via its proto_ops.

Replace the hard-coded tcp_sendmsg_locked() call with
sk->sk_socket->ops->sendmsg_locked(). This enables TLS to work
transparently over any socket that implements .sendmsg_locked,
including MPTCP after the previous commit.

The change is safe because both TCP and MPTCP now provide a conformant
.sendmsg_locked implementation.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/tls/tls_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 94133d62f73e..b6adfa67491b 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -205,7 +205,7 @@ int tls_push_sg(struct sock *sk,
 		bvec_set_page(&bvec, p, size, offset);
 		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
 
-		ret = tcp_sendmsg_locked(sk, &msg, size);
+		ret = sk->sk_socket->ops->sendmsg_locked(sk, &msg, size);
 
 		if (ret != size) {
 			if (ret > 0) {
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 04/17] mptcp: add sendmsg_locked to proto_ops
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

MPTCP currently provides a standard sendmsg() implementation which
acquires and releases the socket lock internally. However, certain
upper layers (e.g., TLS) need to call the sendmsg method while the
socket lock is already held.

Split the existing mptcp_sendmsg() into mptcp_sendmsg_locked() which
assumes the caller holds the socket lock, and a tiny wrapper
mptcp_sendmsg() that acquires the lock and calls the locked version.

Expose .sendmsg_locked in both mptcp_stream_ops and mptcp_v6_stream_ops.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/mptcp/protocol.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a4f7e99b30db..7f0c560f6b7e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1967,7 +1967,7 @@ static void mptcp_rps_record_subflows(const struct mptcp_sock *msk)
 	}
 }
 
-static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+static int mptcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct page_frag *pfrag;
@@ -1979,8 +1979,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
 			  MSG_FASTOPEN | MSG_EOR;
 
-	lock_sock(sk);
-
 	mptcp_rps_record_subflows(msk);
 
 	if (unlikely(inet_test_bit(DEFER_CONNECT, sk) ||
@@ -2096,7 +2094,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	}
 
 out:
-	release_sock(sk);
 	return copied;
 
 do_error:
@@ -2107,6 +2104,17 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	goto out;
 }
 
+static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = mptcp_sendmsg_locked(sk, msg, len);
+	release_sock(sk);
+
+	return ret;
+}
+
 static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
 
 static void mptcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
@@ -4703,6 +4711,7 @@ static const struct proto_ops mptcp_stream_ops = {
 	.set_rcvlowat	   = mptcp_set_rcvlowat,
 	.read_sock	   = mptcp_read_sock,
 	.splice_read	   = mptcp_splice_read,
+	.sendmsg_locked	   = mptcp_sendmsg_locked,
 };
 
 static struct inet_protosw mptcp_protosw = {
@@ -4815,6 +4824,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
 	.set_rcvlowat	   = mptcp_set_rcvlowat,
 	.read_sock	   = mptcp_read_sock,
 	.splice_read	   = mptcp_splice_read,
+	.sendmsg_locked	   = mptcp_sendmsg_locked,
 };
 
 static struct proto mptcp_v6_prot;
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 03/17] tls: add protocol dimension to tls operation cache
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

The current TLS operation cache is indexed solely by IP version
(IPv4/IPv6). This was sufficient when only TCP was supported.
Rename TLS_NUM_PROTS to TLS_NUM_FAMILY to accurately reflect that it
represents the number of address families.

With the introduction of MPTCP, both TCP and MPTCP sockets within the
same IP version now share the same cache entries. When an MPTCP socket
enables TLS, it overwrites the cache with MPTCP-specific operations,
causing existing TCP TLS sockets to use the wrong ops, leading to type
confusion and kernel panics.

Fix by extending the cache arrays with a protocol dimension to separate
TCP and MPTCP. Introduce TLSTCP and TLSMPTCP enum values, along with
separate saved protocol pointers and mutexes for MPTCP. update_sk_prot()
and __tls_build_proto() now select the appropriate cache based on
sk->sk_protocol.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/tls/tls_main.c | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index be824affd1b1..94133d62f73e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -53,7 +53,13 @@ MODULE_ALIAS_TCP_ULP("tls");
 enum {
 	TLSV4,
 	TLSV6,
-	TLS_NUM_PROTS,
+	TLS_NUM_FAMILY,
+};
+
+enum {
+	TLSTCP,
+	TLSMPTCP,
+	TLS_NUM_PROTO,
 };
 
 #define CHECK_CIPHER_DESC(cipher,ci)				\
@@ -117,23 +123,30 @@ CHECK_CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm);
 CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128);
 CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256);
 
+static const struct proto *saved_mptcpv6_prot;
+static DEFINE_MUTEX(mptcpv6_prot_mutex);
 static const struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
+static const struct proto *saved_mptcpv4_prot;
+static DEFINE_MUTEX(mptcpv4_prot_mutex);
 static const struct proto *saved_tcpv4_prot;
 static DEFINE_MUTEX(tcpv4_prot_mutex);
-static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
-static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
+static struct proto
+tls_prots[TLS_NUM_FAMILY][TLS_NUM_PROTO][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
+static struct proto_ops
+tls_proto_ops[TLS_NUM_FAMILY][TLS_NUM_PROTO][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 			 const struct proto *base);
 
 static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
+	int proto = sk->sk_protocol == IPPROTO_MPTCP ? TLSMPTCP : TLSTCP;
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 
 	WRITE_ONCE(sk->sk_prot,
-		   &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]);
+		   &tls_prots[ip_ver][proto][ctx->tx_conf][ctx->rx_conf]);
 	WRITE_ONCE(sk->sk_socket->ops,
-		   &tls_proto_ops[ip_ver][ctx->tx_conf][ctx->rx_conf]);
+		   &tls_proto_ops[ip_ver][proto][ctx->tx_conf][ctx->rx_conf]);
 }
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -971,18 +984,19 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
 static void __tls_build_proto(struct sock *sk,
 			      const struct proto *saved_prot,
 			      struct mutex *prot_mutex,
-			      int family)
+			      int family, int protocol)
 {
+	int proto = sk->sk_protocol == IPPROTO_MPTCP ? TLSMPTCP : TLSTCP;
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 	struct proto *prot = READ_ONCE(sk->sk_prot);
 
-	if (ip_ver == family) {
+	if (ip_ver == family && proto == protocol) {
 		/* smp_load_acquire pairs with smp_store_release below */
 		if (unlikely(prot != smp_load_acquire(&saved_prot))) {
 			mutex_lock(prot_mutex);
 			if (likely(prot != saved_prot)) {
-				build_protos(tls_prots[family], prot);
-				build_proto_ops(tls_proto_ops[family],
+				build_protos(tls_prots[family][protocol], prot);
+				build_proto_ops(tls_proto_ops[family][protocol],
 						sk->sk_socket->ops);
 				/* pairs with smp_load_acquire above */
 				smp_store_release(&saved_prot, prot);
@@ -995,10 +1009,14 @@ static void __tls_build_proto(struct sock *sk,
 static void tls_build_proto(struct sock *sk)
 {
 	/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
+	__tls_build_proto(sk, saved_mptcpv6_prot, &mptcpv6_prot_mutex,
+			  TLSV6, TLSMPTCP);
 	__tls_build_proto(sk, saved_tcpv6_prot, &tcpv6_prot_mutex,
-			  TLSV6);
+			  TLSV6, TLSTCP);
+	__tls_build_proto(sk, saved_mptcpv4_prot, &mptcpv4_prot_mutex,
+			  TLSV4, TLSMPTCP);
 	__tls_build_proto(sk, saved_tcpv4_prot, &tcpv4_prot_mutex,
-			  TLSV4);
+			  TLSV4, TLSTCP);
 }
 
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 02/17] tls: factor out __tls_build_proto for mptcp support
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

tls_build_proto() contains duplicated logic for building IPv4 and IPv6
TLS protocol caches.

Factor out the common code into a new helper __tls_build_proto(), which
takes the saved protocol pointer, mutex, and IP family as parameters.
This prepares for adding MPTCP support by reducing the amount of
duplicated code needed when introducing additional protocol variants.

No functional change intended.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/tls/tls_main.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 9675c75bc50c..be824affd1b1 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -968,35 +968,37 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
 #endif
 }
 
-static void tls_build_proto(struct sock *sk)
+static void __tls_build_proto(struct sock *sk,
+			      const struct proto *saved_prot,
+			      struct mutex *prot_mutex,
+			      int family)
 {
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 	struct proto *prot = READ_ONCE(sk->sk_prot);
 
-	/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
-	if (ip_ver == TLSV6 &&
-	    unlikely(prot != smp_load_acquire(&saved_tcpv6_prot))) {
-		mutex_lock(&tcpv6_prot_mutex);
-		if (likely(prot != saved_tcpv6_prot)) {
-			build_protos(tls_prots[TLSV6], prot);
-			build_proto_ops(tls_proto_ops[TLSV6],
-					sk->sk_socket->ops);
-			smp_store_release(&saved_tcpv6_prot, prot);
+	if (ip_ver == family) {
+		/* smp_load_acquire pairs with smp_store_release below */
+		if (unlikely(prot != smp_load_acquire(&saved_prot))) {
+			mutex_lock(prot_mutex);
+			if (likely(prot != saved_prot)) {
+				build_protos(tls_prots[family], prot);
+				build_proto_ops(tls_proto_ops[family],
+						sk->sk_socket->ops);
+				/* pairs with smp_load_acquire above */
+				smp_store_release(&saved_prot, prot);
+			}
+			mutex_unlock(prot_mutex);
 		}
-		mutex_unlock(&tcpv6_prot_mutex);
 	}
+}
 
-	if (ip_ver == TLSV4 &&
-	    unlikely(prot != smp_load_acquire(&saved_tcpv4_prot))) {
-		mutex_lock(&tcpv4_prot_mutex);
-		if (likely(prot != saved_tcpv4_prot)) {
-			build_protos(tls_prots[TLSV4], prot);
-			build_proto_ops(tls_proto_ops[TLSV4],
-					sk->sk_socket->ops);
-			smp_store_release(&saved_tcpv4_prot, prot);
-		}
-		mutex_unlock(&tcpv4_prot_mutex);
-	}
+static void tls_build_proto(struct sock *sk)
+{
+	/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
+	__tls_build_proto(sk, saved_tcpv6_prot, &tcpv6_prot_mutex,
+			  TLSV6);
+	__tls_build_proto(sk, saved_tcpv4_prot, &tcpv4_prot_mutex,
+			  TLSV4);
 }
 
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 01/17] tls: make tls_ctx_create and update_sk_prot static
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

The TLS TOE (TCP offload engine) support has been removed. As a result,
tls_ctx_create() and update_sk_prot() are no longer used outside of
tls_main.c.

Make them static and remove their prototypes from tls.h. This avoids
exporting unnecessary symbols and cleans up the internal API.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/tls/tls.h      | 2 --
 net/tls/tls_main.c | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/tls/tls.h b/net/tls/tls.h
index 60a37bdaaa25..68dfe109808e 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -136,9 +136,7 @@ struct tls_rec {
 int __net_init tls_proc_init(struct net *net);
 void __net_exit tls_proc_fini(struct net *net);
 
-struct tls_context *tls_ctx_create(struct sock *sk);
 void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
-void update_sk_prot(struct sock *sk, struct tls_context *ctx);
 
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 void tls_err_abort(struct sock *sk, int err);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 8c588cdab733..9675c75bc50c 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -126,7 +126,7 @@ static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CON
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 			 const struct proto *base);
 
-void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 
@@ -913,7 +913,7 @@ static int tls_disconnect(struct sock *sk, int flags)
 	return -EOPNOTSUPP;
 }
 
-struct tls_context *tls_ctx_create(struct sock *sk)
+static struct tls_context *tls_ctx_create(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tls_context *ctx;
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 00/17] MPTCP KTLS support
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang

From: Geliang Tang <tanggeliang@kylinos.cn>

Prior to this work, MPTCP did not support TLS. The two protocols
conflicted because both MPTCP and TLS use the ULP (Upper Layer
Protocol) infrastructure in the Linux kernel. ULP settings, including
TLS configuration, were disabled in MPTCP. If an application attempted
to set TLS for an MPTCP socket, the system would return an error code
indicating EOPNOTSUPP (Operation not supported).

This series adds KTLS support for MPTCP. Since no ULP is currently
attached to the MPTCP socket (msk), KTLS can be configured directly on
the msk rather than on individual subflows. This does not affect its
existing communication, and leverages HMAC-based authentication to
ensure subflow security.

RFC versions of this series have gone through many iterations on MPTCP
mailing list, mainly to address Sashiko's review comments. It is now mostly
stable.

A follow-up series will add MPTCP support to the TLS selftests
(tools/testing/selftests/net/tls.c). All existing TCP test cases have
already been verified to pass over MPTCP as well.

The primary validation use case for this work is NVMe over MPTCP with KTLS.
NVMe over TCP is a storage protocol that transports NVMe commands over TCP.
By combining it with MPTCP, multipath capabilities for storage traffic are 
gained. By adding KTLS, the storage traffic is secured with encryption.
Although NVMe over MPTCP is still under active development, I have already
verified that KTLS operates correctly on top of it.

All feedback is welcome.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/480

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>

Gang Yan (1):
  mptcp: update mptcp_check_readable helper

Geliang Tang (16):
  tls: make tls_ctx_create and update_sk_prot static
  tls: factor out __tls_build_proto for mptcp support
  tls: add protocol dimension to tls operation cache
  mptcp: add sendmsg_locked to proto_ops
  tls: use sendmsg_locked from the underlying socket
  mptcp: implement peek_len for proto_ops
  tls: replace tcp_inq with socket peek_len
  tls: store original read_sock for non-tcp sockets
  tls: introduce tls protocol ops structure
  tls: use protocol ops via tls_context
  mptcp: implement mptcp-specific tls protocol ops
  tls: add mptcp support for sk_poll
  tls: disable device offload for mptcp sockets
  mptcp: implement ulp getsockopt for tls support
  mptcp: implement ulp setsockopt for tls support
  selftests: mptcp: connect: use espintcp for ulp test

 include/net/mptcp.h                           |  11 +
 include/net/tcp.h                             |   1 +
 include/net/tls.h                             |  19 ++
 net/ipv4/tcp.c                                |   9 +-
 net/mptcp/protocol.c                          | 180 +++++++++++++-
 net/mptcp/protocol.h                          |   1 +
 net/mptcp/sockopt.c                           |  68 +++++-
 net/tls/tls.h                                 |   2 -
 net/tls/tls_device.c                          |  10 +-
 net/tls/tls_main.c                            | 227 +++++++++++++++---
 net/tls/tls_strp.c                            |  35 ++-
 net/tls/tls_sw.c                              |  10 +-
 tools/testing/selftests/net/mptcp/config      |   4 +
 .../selftests/net/mptcp/mptcp_connect.c       |   4 +-
 14 files changed, 516 insertions(+), 65 deletions(-)

-- 
2.53.0


^ permalink raw reply

* Re: [PATCH net] net: mana: Fall back to standard MTU when PF reports adapter_mtu of 0
From: Simon Horman @ 2026-06-22 10:41 UTC (permalink / raw)
  To: Erni Sri Satya Vennela
  Cc: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, dipayanroy, ssengar, jacob.e.keller,
	gargaditya, kees, linux-hyperv, netdev, linux-kernel, bpf
In-Reply-To: <20260619055348.467224-1-ernis@linux.microsoft.com>

On Thu, Jun 18, 2026 at 10:53:38PM -0700, Erni Sri Satya Vennela wrote:
> Commit d7709812e13d ("net: mana: hardening: Validate adapter_mtu from
> MANA_QUERY_DEV_CONFIG") rejected any adapter_mtu value smaller than
> ETH_MIN_MTU + ETH_HLEN, including 0, returning -EPROTO and failing
> mana_probe().
> 
> Some older PF firmware versions still in the field report
> adapter_mtu as 0 in the MANA_QUERY_DEV_CONFIG response. With the
> hardening check in place, the MANA VF driver now fails to load on
> those hosts, breaking networking entirely for guests.
> 
> MANA hardware always supports the standard Ethernet MTU. Treat a
> reported adapter_mtu of 0 as "the PF did not advertise a value" and
> fall back to ETH_FRAME_LEN, the same value used for the pre-V2
> message version path. Only jumbo frames remain unavailable until
> the PF reports a valid MTU.
> 
> Other small-but-nonzero bogus values are still rejected, preserving
> the original protection against the unsigned-subtraction wrap that
> would otherwise let ndev->max_mtu underflow to a huge value.
> 
> Fixes: d7709812e13d ("net: mana: hardening: Validate adapter_mtu from MANA_QUERY_DEV_CONFIG")
> Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>

Reviewed-by: Simon Horman <horms@kernel.org>

FTR, I agree with your assessment that the issue flagged in the
AI-generated review of this patch on sashiko.dev can be
treated as a follow-up [1].

And I don't think the low priority issue flagged in the AI-generated
review on https://netdev-ai.bots.linux.dev/sashiko/ should impede progress
of this patch.

[1] https://lore.kernel.org/bpf/ajj+5mhswcqhI2z7@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/


^ permalink raw reply

* Re: "ip help" output is an error
From: Dmitri Seletski @ 2026-06-22 10:39 UTC (permalink / raw)
  To: David Laight, Stephen Hemminger; +Cc: netdev
In-Reply-To: <20260622084925.6f3dfc4f@pumpkin>

Hello David,(sorry for duplicate, I keep on forgetting to turn off HTML)

Yes, user could do the redirection, but it's less convenient and 
counterintuitive. IMHO, it's just wrong.

Once user has done "ip help" - IMHO, exit code should be 0. Because user 
did run a command and it correctly executed with a reasonable 
results(provided help text).

And if exit code is 0 - then it reasonably stands that output data is 
standard output and not an error.


"it do exit(0) is likely cause new scripts to fail on old systems." - do 
I understand correctly, concern is that existing scripts that depend on 
current behavior will fail?

First of all, I am guessing that there are not many scripts that depend 
on "ip help".

Second of all, if there are such scripts and we follow this logic - do 
we ever patch anything? Since any script out there can depend on broken 
behavior that will be patched?

Third of all, people who wrote such scripts, should have reported bug 
here in the first place. But it's neither here nor there.


Do I fail to see a bigger picture here? Cause I feel silly talking in 
this mailing list.(I submitted a change for this issue about half a day ago)

Kind Regards

Dmitri Seletski


On 6/22/26 08:49, David Laight wrote:
> On Sun, 21 Jun 2026 08:21:05 -0700
> Stephen Hemminger <stephen@networkplumber.org> wrote:
>
>> On Sat, 20 Jun 2026 10:36:31 +0100
>> Dmitri Seletski <drjoms@gmail.com> wrote:
>>
>>> Hello iproute2 maintainers,
>>>
>>> I am reporting an inconsistency regarding the exit status of the ip help
>>> command.
>>>
>>> Current Behavior:
>>> When running ip help, the command prints the help documentation to
>>> stdout, but exits with a non-zero status (error). This causes issues in
>>> shell scripts that rely on exit codes for control flow.
>>>
>>> Steps to reproduce:
>>> bash
>>>
>>> # This returns "FAIL" because the exit code is non-zero
>>> if ip help > /dev/null; then
>>>       echo "SUCCESS"
>>> else
>>>       echo "FAIL"
>>> fi
>>>
>>> Expected Behavior:
>>> Since the command successfully performs the requested task (displaying
>>> help information) and does not encounter a system error, it should
>>> return an exit code of 0.
>>>
>>> Context:
>>> This behavior breaks standard Bash logic for automation. For example:
>>> ip help && echo "This will not execute"
>>>
>>> "ip help |grep br" - this will bring no result.
>>>
>>> Current version tested: iproute2-6.19.0
>>>
>>> Thank you for your time and for maintaining this tool.
>>>
>>> Regards,
>>> Dmitri Seletski
>>>
>>>    
>> Yes iproute2 doesn't do a great job of handling error codes
>> with usage vs help. Its a bug and no one has bothered to fix it.
>>
> The version I've got does write(2, "Usage...", 972); exit(-1);
> Changing it to do write(1, ...) is likely to break scripts, and making
> it do exit(0) is likely cause new scripts to fail on old systems.
>
> The 'grep' works fine if you redirect stderr to stdout.
>
> The exit(-1) is a bug; the parameter is only 8 bits and the high bit
> is expected to be used to indicate abnormal termination (eg by a signal).
> That should probably be changed to exit(1), there doesn't seem to be
> a standard way to differentiate between command line errors and
> operational ones.
>
> 	David
>

^ permalink raw reply

* Re: [PATCH v2 2/2] net: fman: use devm_kzalloc() for fman and rely on devres
From: Andrew Lunn @ 2026-06-22 10:36 UTC (permalink / raw)
  To: ZhaoJinming
  Cc: horms, andrew+netdev, davem, edumazet, kuba, linux-kernel,
	madalin.bucur, netdev, pabeni, sean.anderson
In-Reply-To: <20260622090505.2418478-2-zhaojinming@uniontech.com>

On Mon, Jun 22, 2026 at 05:05:05PM +0800, ZhaoJinming wrote:
> The driver now allocates the top-level struct fman with devm_kzalloc()
> so that its lifetime is bound to the device and resources are released
> automatically by the driver core on probe failure or device removal.
> 
> Remove the explicit kfree(fman) from the error paths in fman_config()
> and read_dts_node() to avoid double-free/use-after-free and to follow
> the devm_ allocation convention.
> 
> After of_find_matching_node() consumes fm_node's reference via
> of_node_put(from), the post-muram error paths no longer need to clean
> up fm_node, so replace goto fman_free with direct return ERR_PTR(err).
> 
> This change complements the existing use of devm_* resources (irq,
> ioremap, etc.) and simplifies the error handling paths.
> 
> Signed-off-by: ZhaoJinming <zhaojinming@uniontech.com>

Please take a read of:

https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html

Please read it all, but see section 1.7.4.

    Andrew

---
pw-bot: cr

^ permalink raw reply

* Re: AW: AW: AW: AW: [PATCH net] net: usb: lan78xx: restore VLAN filter table after device reset
From: Nicolai Buchwitz @ 2026-06-22 10:34 UTC (permalink / raw)
  To: Sven Schuchmann
  Cc: Thangaraj Samynathan, Rengarajan Sundararajan, UNGLinuxDriver,
	Woojung.Huh, Andrew Lunn, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, netdev, linux-usb, linux-kernel
In-Reply-To: <BEZP281MB22457C80F9D1AC7F788EC008D9EF2@BEZP281MB2245.DEUP281.PROD.OUTLOOK.COM>

Hi Sven

On 22.6.2026 12:07, Sven Schuchmann wrote:

> [...]

>> > looks good from my point of view
>> > (Calling the lan78xx_write_vlan_table() from
>> > lan78xx_mac_link_up() and from lan78xx_reset()).
>> 
>> Thanks.
> 
> Just to be clear I used this patch which is looking good:

> [...]

Thanks for testing! I've sent a v2 of my patch with your t-b:
https://lore.kernel.org/netdev/20260622102911.484045-1-nb@tipi-net.de/

Regards
Nicolai


^ permalink raw reply

* Re: [PATCH v2 1/2] net: fman: fix clk reference leak in read_dts_node()
From: Andrew Lunn @ 2026-06-22 10:33 UTC (permalink / raw)
  To: ZhaoJinming
  Cc: horms, andrew+netdev, davem, edumazet, kuba, linux-kernel,
	madalin.bucur, netdev, pabeni, sean.anderson
In-Reply-To: <20260622090505.2418478-1-zhaojinming@uniontech.com>

On Mon, Jun 22, 2026 at 05:05:04PM +0800, ZhaoJinming wrote:
> of_clk_get() returns a reference that must be released with clk_put()
> when the clock is no longer needed. The current code never calls
> clk_put(clk), leaking the reference on both the success path and the
> clk_rate == 0 error path.
> 
> Add clk_put(clk) after the clock rate is consumed on the success path,
> and jump to a new clk_put label on the error path to properly release
> the clock reference.

"When the clock is no longer needed": So once you know the rate the
clock ticks at, you no longer need the clock? It is O.K. for it to
disappear, since there is no reference to it?

    Andrew

---
pw-bot: cr

^ permalink raw reply

* Re: [PATCH net v6 0/4] Fix i40e/ice/iavf VF bonding after netdev lock changes
From: Simon Horman @ 2026-06-22 10:31 UTC (permalink / raw)
  To: Jose Ignacio Tornos Martinez
  Cc: netdev, intel-wired-lan, przemyslaw.kitszel, aleksandr.loktionov,
	jacob.e.keller, jesse.brandeburg, anthony.l.nguyen, davem,
	edumazet, kuba, pabeni
In-Reply-To: <20260619061321.8554-1-jtornosm@redhat.com>

On Fri, Jun 19, 2026 at 08:13:15AM +0200, Jose Ignacio Tornos Martinez wrote:
> This series fixes VF bonding failures introduced by commit ad7c7b2172c3
> ("net: hold netdev instance lock during sysfs operations").

...

Hi Jose,

Unfortunately the Netdev CI was unable to apply this series cleanly against net.
Would you be able to rebase and repost?

-- 
pw-bot: changes-requested



^ permalink raw reply

* [PATCH net v2] net: usb: lan78xx: restore VLAN and hash filters after link up
From: Nicolai Buchwitz @ 2026-06-22 10:29 UTC (permalink / raw)
  To: Thangaraj Samynathan, Rengarajan Sundararajan, UNGLinuxDriver,
	Woojung.Huh
  Cc: Andrew Lunn, David S . Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Sven Schuchmann, netdev, linux-usb, linux-kernel,
	Nicolai Buchwitz

Configured VLANs intermittently stop receiving traffic after a link
down/up cycle, e.g. when the network cable is unplugged and plugged back
in. VLAN filtering stays enabled but all VLAN-tagged frames are dropped
until a VLAN is added or removed again.

The LAN7801 datasheet (DS00002123E) states:

  "A portion of the MAC operates on clocks generated by the Ethernet
   PHY. During a PHY reset event, this portion of the MAC is designed to
   not be taken out of reset until the PHY clocks are operational"
  (section 8.10, MAC Reset Watchdog Timer)

  "After a reset event, the RFE will automatically initialize the
   contents of the VHF to 0h."
  (section 7.1.4, VHF Organization)

Thus a link down/up cycle stops and restarts the PHY clock, resets the
PHY-clocked portion of the MAC, and the RFE clears its VLAN/DA hash
filter (VHF) memory. The VHF holds both the VLAN filter table and the
multicast hash table, but the driver never reprograms either from its
shadow copy once the link is back, so both stay empty.

Reprogram the VLAN filter and multicast hash tables on link up.

Reported-by: Sven Schuchmann <schuchmann@schleissheimer.de>
Closes: https://lore.kernel.org/netdev/BEZP281MB224501E38B30BFDC4BD3D364D9E32@BEZP281MB2245.DEUP281.PROD.OUTLOOK.COM/T/#u
Tested-by: Sven Schuchmann <schuchmann@schleissheimer.de>
Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
---
v2:
 - Reprogram in lan78xx_mac_link_up() instead of lan78xx_reset(); the
   table is lost on a plain link down/up cycle, where reset() is not
   called. This also avoids the usb_autopm_get_interface() -EACCES path
   in reset_resume() that was flagged on v1.
 - Also restore the multicast hash table: the RFE clears the whole VHF
   (VLAN + hash) memory, per the LAN7801 datasheet.

v1: https://lore.kernel.org/netdev/20260618191109.4086598-1-nb@tipi-net.de/

 drivers/net/usb/lan78xx.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index bcf293ea1bd3..c4cebacabcb5 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1452,6 +1452,15 @@ static inline u32 lan78xx_hash(char addr[ETH_ALEN])
 	return (ether_crc(ETH_ALEN, addr) >> 23) & 0x1ff;
 }
 
+static int lan78xx_write_mchash_table(struct lan78xx_net *dev)
+{
+	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+
+	return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
+				      DP_SEL_VHF_VLAN_LEN,
+				      DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+}
+
 static void lan78xx_deferred_multicast_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
@@ -1462,9 +1471,7 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
 	netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n",
 		  pdata->rfe_ctl);
 
-	ret = lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
-				     DP_SEL_VHF_VLAN_LEN,
-				     DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+	ret = lan78xx_write_mchash_table(dev);
 	if (ret < 0)
 		goto multicast_write_done;
 
@@ -1557,6 +1564,7 @@ static void lan78xx_set_multicast(struct net_device *netdev)
 }
 
 static void lan78xx_rx_urb_submit_all(struct lan78xx_net *dev);
+static int lan78xx_write_vlan_table(struct lan78xx_net *dev);
 
 static int lan78xx_mac_reset(struct lan78xx_net *dev)
 {
@@ -2514,6 +2522,17 @@ static void lan78xx_mac_link_up(struct phylink_config *config,
 	if (ret < 0)
 		goto link_up_fail;
 
+	/* The RFE clears the VLAN/DA hash filter (VHF) on a link down/up
+	 * cycle, so reprogram both tables from their shadow copies.
+	 */
+	ret = lan78xx_write_vlan_table(dev);
+	if (ret < 0)
+		goto link_up_fail;
+
+	ret = lan78xx_write_mchash_table(dev);
+	if (ret < 0)
+		goto link_up_fail;
+
 	netif_start_queue(net);
 
 	return;
@@ -3065,14 +3084,20 @@ static int lan78xx_set_features(struct net_device *netdev,
 	return lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
 }
 
+static int lan78xx_write_vlan_table(struct lan78xx_net *dev)
+{
+	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+
+	return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, 0,
+				      DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
+}
+
 static void lan78xx_deferred_vlan_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
 			container_of(param, struct lan78xx_priv, set_vlan);
-	struct lan78xx_net *dev = pdata->dev;
 
-	lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, 0,
-			       DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
+	lan78xx_write_vlan_table(pdata->dev);
 }
 
 static int lan78xx_vlan_rx_add_vid(struct net_device *netdev,

base-commit: d07d80b6a129a44538cda1549b7acf95154fb197
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH v3 net] net: watchdog: fix refcount tracking races
From: Marek Szyprowski @ 2026-06-22 10:22 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	netdev, eric.dumazet, syzbot+381d82bbf0253710b35d,
	syzbot+3479efbc2821cb2a79f2
In-Reply-To: <CANn89i+GVoQxFS26=s5w5vUa-ytRUgD1NM6MDZQdtB7FtcXv-w@mail.gmail.com>

On 22.06.2026 10:59, Eric Dumazet wrote:
> On Wed, Jun 17, 2026 at 3:48 AM Marek Szyprowski
> <m.szyprowski@samsung.com> wrote:
>> On 11.06.2026 17:27, Eric Dumazet wrote:
>>> Blamed commit converted the untracked dev_hold()/dev_put() calls
>>> in the watchdog code to use the tracked dev_hold_track()/dev_put_track()
>>> (which were later renamed/interfaced to netdev_hold() and netdev_put()).
>>>
>>> By introducing dev->watchdog_dev_tracker to store the
>>> reference tracking information without adding synchronization
>>> between netdev_watchdog_up() and dev_watchdog(), it enabled the
>>> race condition where this pointer could be overwritten or freed
>>> concurrently, leading to the list corruption crash syzbot reported:
>>>
>>> list_del corruption, ffff888114a18c00->next is NULL
>>>  kernel BUG at lib/list_debug.c:52 !
>>> Oops: invalid opcode: 0000 [#1] SMP KASAN PTI
>>> CPU: 1 UID: 0 PID: 91 Comm: kworker/u8:5 Not tainted syzkaller #0 PREEMPT(lazy)
>>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/09/2026
>>> Workqueue: events_unbound linkwatch_event
>>>  RIP: 0010:__list_del_entry_valid_or_report.cold+0x22/0x2a lib/list_debug.c:52
>>> Call Trace:
>>>  <TASK>
>>>   __list_del_entry_valid include/linux/list.h:132 [inline]
>>>   __list_del_entry include/linux/list.h:246 [inline]
>>>   list_move_tail include/linux/list.h:341 [inline]
>>>   ref_tracker_free+0x1a7/0x6c0 lib/ref_tracker.c:329
>>>   netdev_tracker_free include/linux/netdevice.h:4491 [inline]
>>>   netdev_put include/linux/netdevice.h:4508 [inline]
>>>   netdev_put include/linux/netdevice.h:4504 [inline]
>>>   netdev_watchdog_down net/sched/sch_generic.c:600 [inline]
>>>   dev_deactivate_many+0x28c/0xfe0 net/sched/sch_generic.c:1363
>>>   dev_deactivate+0x109/0x1d0 net/sched/sch_generic.c:1397
>>>   linkwatch_do_dev net/core/link_watch.c:184 [inline]
>>>   linkwatch_do_dev+0xd3/0x120 net/core/link_watch.c:166
>>>   __linkwatch_run_queue+0x3a5/0x810 net/core/link_watch.c:240
>>>   linkwatch_event+0x8f/0xc0 net/core/link_watch.c:314
>>>   process_one_work+0xa0e/0x1980 kernel/workqueue.c:3314
>>>   process_scheduled_works kernel/workqueue.c:3397 [inline]
>>>   worker_thread+0x5ef/0xe50 kernel/workqueue.c:3478
>>>   kthread+0x370/0x450 kernel/kthread.c:436
>>>   ret_from_fork+0x69a/0xc80 arch/x86/kernel/process.c:158
>>>   ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
>>>
>>> This patch has three coordinated parts:
>>>
>>> 1) Add dev->watchdog_lock and dev->watchdog_ref_held to serialize watchdog operations.
>>>
>>> 2) Remove netdev_watchdog_up() call from netif_carrier_on():
>>>    This ensures netdev_watchdog_up() is only called from process/BH context
>>>    (via linkwatch workqueue dev_activate()), allowing us to use
>>>    spin_lock_bh() for synchronization.
>>>
>>> 3) Synchronize watchdog up and watchdog timer:
>>>    Protect netdev_watchdog_up() with tx_global_lock and watchdog_lock.
>>>    Only allocate a new tracker in netdev_watchdog_up() if one is
>>>    not already present.
>>>    In dev_watchdog(), ensure we don't release the tracker if the
>>>    timer was rescheduled either by dev_watchdog() itself or concurrently
>>>    by netdev_watchdog_up().
>>>
>>> Fixes: f12bf6f3f942 ("net: watchdog: add net device refcount tracker")
>>> Reported-by: syzbot+381d82bbf0253710b35d@syzkaller.appspotmail.com
>>> Closes: https://lore.kernel.org/netdev/6a26b751.c25708ab.1b19ef.0013.GAE@google.com/T/#u
>>> Tested-by: syzbot+3479efbc2821cb2a79f2@syzkaller.appspotmail.com
>>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> This patch landed recently in linux-next as commit 8eed5519e496 ("net: watchdog:
>> fix refcount tracking races"). In my tests I found that it causes the following
>> deadlock during system suspend/resume on QEmu's ARM64bit 'virt' machine:
>>
>> root@target:~# time rtcwake -s10 -mmem
>> rtcwake: assuming RTC uses UTC ...
>> rtcwake: wakeup from "mem" using /dev/rtc0 at Wed Jun 17 10:46:12 2026
>> PM: suspend entry (s2idle)
>> Filesystems sync: 0.055 seconds
>> Freezing user space processes
>> Freezing user space processes completed (elapsed 0.006 seconds)
>> OOM killer disabled.
>> Freezing remaining freezable tasks
>> Freezing remaining freezable tasks completed (elapsed 0.003 seconds)
>>
>> ============================================
>> WARNING: possible recursive locking detected
>> 7.1.0-rc7+ #13003 Not tainted
>> --------------------------------------------
>> rtcwake/254 is trying to acquire lock:
>> ffff000006de64e8 (&dev->tx_global_lock){+.-.}-{3:3}, at: netdev_watchdog_up+0x40/0x108
>>
>> but task is already holding lock:
>> ffff000006de64e8 (&dev->tx_global_lock){+.-.}-{3:3}, at: netif_tx_lock+0x1c/0x34
>>
>> other info that might help us debug this:
>>  Possible unsafe locking scenario:
>>
>>        CPU0
>>        ----
>>   lock(&dev->tx_global_lock);
>>   lock(&dev->tx_global_lock);
>>
>>  *** DEADLOCK ***
>>
>>  May be due to missing lock nesting notation
>>
>> 6 locks held by rtcwake/254:
>>  #0: ffff0000071ab3e8 (sb_writers#5){.+.+}-{0:0}, at: vfs_write+0x1ec/0x35c
>>  #1: ffff00000d22c480 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0xf0/0x1c4
>>  #2: ffff0000049162c8 (kn->active#61){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x100/0x1c4
>>  #3: ffffaa79533c03b0 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0x98/0x608
>>  #4: ffff000005e3a138 (&dev->mutex){....}-{4:4}, at: device_resume+0xb4/0x254
>>  #5: ffff000006de64e8 (&dev->tx_global_lock){+.-.}-{3:3}, at: netif_tx_lock+0x1c/0x34
>>
>> stack backtrace:
>> CPU: 1 UID: 0 PID: 254 Comm: rtcwake Not tainted 7.1.0-rc7+ #13003 PREEMPT
>> Hardware name: linux,dummy-virt (DT)
>> Call trace:
>>  show_stack+0x18/0x24 (C)
>>  dump_stack_lvl+0x90/0xd0
>>  dump_stack+0x18/0x24
>>  print_deadlock_bug+0x260/0x350
>>  __lock_acquire+0x11b8/0x225c
>>  lock_acquire+0x1c4/0x3f0
>>  _raw_spin_lock_bh+0x50/0x68
>>  netdev_watchdog_up+0x40/0x108
>>  netif_device_attach+0x9c/0xb0
>>  virtnet_restore+0x100/0x21c
>>  virtio_device_restore_priv+0x11c/0x1d0
>>  virtio_device_restore+0x14/0x20
>>  virtio_mmio_restore+0x34/0x40
>>  platform_pm_resume+0x2c/0x68
>>  dpm_run_callback+0xa0/0x240
>>  device_resume+0x120/0x254
>>  dpm_resume+0x1f8/0x2ec
>>  dpm_resume_end+0x18/0x34
>>  suspend_devices_and_enter+0x1d0/0x990
>>  pm_suspend+0x1ec/0x608
>>  state_store+0x8c/0x110
>>  kobj_attr_store+0x18/0x2c
>>  sysfs_kf_write+0x50/0x7c
>>  kernfs_fop_write_iter+0x130/0x1c4
>>  vfs_write+0x2b8/0x35c
>>  ksys_write+0x6c/0x104
>>  __arm64_sys_write+0x1c/0x28
>>  invoke_syscall+0x54/0x110
>>  el0_svc_common.constprop.0+0x40/0xe8
>>  do_el0_svc+0x20/0x2c
>>  el0_svc+0x54/0x338
>>  el0t_64_sync_handler+0xa0/0xe4
>>  el0t_64_sync+0x198/0x19c
>>
>>
>> Reverting $subject on top of linux-next fixes this issue.
> Thanks for the report Marek!
>
> Acquiring tx_global_lock in netdev_watchdog_up() appears unnecessary anyway
> because the critical state (timer and refcount tracker) is already
> protected by dev->watchdog_lock.
>
> Could you try this patch?

This fixes the observed issue. Thanks! Feel free to add:

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>


> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 3f1c510df850dbdbaf10d483547c7b1f3a5d5482..ef2b4bf51564173751c74fefe17e3913ed2fa056
> 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -594,9 +594,8 @@ void netdev_watchdog_up(struct net_device *dev)
>                 return;
>         if (dev->watchdog_timeo <= 0)
>                 dev->watchdog_timeo = 5*HZ;
> -       spin_lock_bh(&dev->tx_global_lock);
>
> -       spin_lock(&dev->watchdog_lock);
> +       spin_lock_bh(&dev->watchdog_lock);
>         if (!mod_timer(&dev->watchdog_timer,
>                        round_jiffies(jiffies + dev->watchdog_timeo))) {
>                 if (!dev->watchdog_ref_held) {
> @@ -605,9 +604,7 @@ void netdev_watchdog_up(struct net_device *dev)
>                         dev->watchdog_ref_held = true;
>                 }
>         }
> -       spin_unlock(&dev->watchdog_lock);
> -
> -       spin_unlock_bh(&dev->tx_global_lock);
> +       spin_unlock_bh(&dev->watchdog_lock);
>  }
>  EXPORT_SYMBOL_GPL(netdev_watchdog_up);
>
Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


^ permalink raw reply

* [PATCH v3 3/3] drm/xe/xe_ras: Add error-event support for CRI
From: Riana Tauro @ 2026-06-22 10:17 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: aravind.iddamsetty, anshuman.gupta, rodrigo.vivi, joonas.lahtinen,
	kuba, simona.vetter, airlied, pratik.bari, joshua.santosh.ranjan,
	ashwin.kumar.kulkarni, shubham.kumar, ravi.kishore.koppuravuri,
	raag.jadav, maarten.lankhorst, mallesh.koujalagi, soham.purkait,
	Riana Tauro
In-Reply-To: <20260622101716.3313496-5-riana.tauro@intel.com>

Add error-event support for Correctable errors in CRI.
error-event is reported to  userspace for all errors that crossed
threshold on receiving an interrupt for correctable errors.

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
---
 drivers/gpu/drm/xe/xe_ras.c | 53 +++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
index 44f4e1a3455b..acf3207aa2fd 100644
--- a/drivers/gpu/drm/xe/xe_ras.c
+++ b/drivers/gpu/drm/xe/xe_ras.c
@@ -77,6 +77,18 @@ static u8 drm_to_xe_ras_severity(u8 severity)
 	}
 }
 
+static u8 xe_to_drm_ras_severity(u8 severity)
+{
+	switch (severity) {
+	case XE_RAS_SEV_CORRECTABLE:
+		return DRM_XE_RAS_ERR_SEV_CORRECTABLE;
+	case XE_RAS_SEV_UNCORRECTABLE:
+		return DRM_XE_RAS_ERR_SEV_UNCORRECTABLE;
+	default:
+		return DRM_XE_RAS_ERR_SEV_MAX;
+	}
+}
+
 static u8 drm_to_xe_ras_component(u8 component)
 {
 	switch (component) {
@@ -95,6 +107,24 @@ static u8 drm_to_xe_ras_component(u8 component)
 	}
 }
 
+static u8 xe_to_drm_ras_component(u8 component)
+{
+	switch (component) {
+	case XE_RAS_COMP_DEVICE_MEMORY:
+		return DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY;
+	case XE_RAS_COMP_CORE_COMPUTE:
+		return DRM_XE_RAS_ERR_COMP_CORE_COMPUTE;
+	case XE_RAS_COMP_PCIE:
+		return DRM_XE_RAS_ERR_COMP_PCIE;
+	case XE_RAS_COMP_FABRIC:
+		return DRM_XE_RAS_ERR_COMP_FABRIC;
+	case XE_RAS_COMP_SOC_INTERNAL:
+		return DRM_XE_RAS_ERR_COMP_SOC_INTERNAL;
+	default:
+		return DRM_XE_RAS_ERR_COMP_MAX;
+	}
+}
+
 static int ras_status_to_errno(u32 status)
 {
 	switch (status) {
@@ -131,6 +161,27 @@ static inline const char *comp_to_str(u8 component)
 	return xe_ras_components[component];
 }
 
+static void ras_send_error_event(struct xe_device *xe, u8 severity, u8 component)
+{
+	u8 drm_severity, drm_component;
+	u32 value;
+	int ret;
+
+	drm_severity = xe_to_drm_ras_severity(severity);
+	if (drm_severity == DRM_XE_RAS_ERR_SEV_MAX)
+		return;
+
+	drm_component = xe_to_drm_ras_component(component);
+	if (drm_component == DRM_XE_RAS_ERR_COMP_MAX)
+		return;
+
+	ret = xe_ras_get_counter(xe, severity, component, &value);
+	if (ret)
+		return;
+
+	xe_drm_ras_event(xe, drm_component, drm_severity, value, GFP_KERNEL);
+}
+
 void xe_ras_counter_threshold_crossed(struct xe_device *xe,
 				      struct xe_sysctrl_event_response *response)
 {
@@ -152,6 +203,8 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe,
 		severity = errors[id].common.severity;
 		component = errors[id].common.component;
 
+		ras_send_error_event(xe, severity, component);
+
 		xe_warn(xe, "[RAS]: %s %s detected\n",
 			comp_to_str(component), sev_to_str(severity));
 	}
-- 
2.47.1


^ permalink raw reply related

* [PATCH v3 2/3] drm/xe/xe_drm_ras: Add error-event support for PVC
From: Riana Tauro @ 2026-06-22 10:17 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: aravind.iddamsetty, anshuman.gupta, rodrigo.vivi, joonas.lahtinen,
	kuba, simona.vetter, airlied, pratik.bari, joshua.santosh.ranjan,
	ashwin.kumar.kulkarni, shubham.kumar, ravi.kishore.koppuravuri,
	raag.jadav, maarten.lankhorst, mallesh.koujalagi, soham.purkait,
	Riana Tauro
In-Reply-To: <20260622101716.3313496-5-riana.tauro@intel.com>

Report drm_ras error event to userspace when an error occurs.
Add support for core-compute and SoC errors in PVC.

$ sudo ynl --family drm_ras --output-json --subscribe error-report

{
    "name": "error-event",
     "msg": {
         "device-name": "0000:03:00.0",
         "node-id": 1,
         "node-name": "uncorrectable-errors",
         "error-id": 1,
         "error-name": "core-compute",
         "error-value": 1
     }
}

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
---
v2: use ynl (Raag)
    use value as function parameter
    move error event call to hw_error_source_handler 

v3: add has_drm_ras check
---
 drivers/gpu/drm/xe/xe_drm_ras.c  | 30 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_drm_ras.h  |  3 +++
 drivers/gpu/drm/xe/xe_hw_error.c |  5 ++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
index 7937d8ba0ed9..36afdfb5e412 100644
--- a/drivers/gpu/drm/xe/xe_drm_ras.c
+++ b/drivers/gpu/drm/xe/xe_drm_ras.c
@@ -185,6 +185,36 @@ static int register_nodes(struct xe_device *xe)
 	return ret;
 }
 
+/**
+ * xe_drm_ras_event() - Report drm-ras error event to userspace
+ * @xe: xe device structure
+ * @component: error component (see &enum drm_xe_ras_error_component)
+ * @severity: error severity (see &enum drm_xe_ras_error_severity)
+ * @value: value of error counter
+ * @flags: flags for allocation
+ *
+ * Report an error-event to userspace.
+ */
+void xe_drm_ras_event(struct xe_device *xe, u32 component, u32 severity, u32 value, gfp_t flags)
+{
+	struct xe_drm_ras *ras = &xe->ras;
+	struct xe_drm_ras_counter *info = ras->info[severity];
+	struct drm_ras_node *node = &ras->node[severity];
+	int ret;
+
+	/* Event is supported only if drm_ras is enabled */
+	if (!xe->info.has_drm_ras)
+		return;
+
+	if (!info || !info[component].name)
+		return;
+
+	ret = drm_ras_nl_error_event(node, component, info[component].name, value, flags);
+	if (ret)
+		drm_err(&xe->drm, "RAS error-event failed: %d for %s %s\n", ret,
+			info[component].name, error_severity[severity]);
+}
+
 /**
  * xe_drm_ras_init() - Initialize DRM RAS
  * @xe: xe device instance
diff --git a/drivers/gpu/drm/xe/xe_drm_ras.h b/drivers/gpu/drm/xe/xe_drm_ras.h
index 365c70e93e82..2a694bf69478 100644
--- a/drivers/gpu/drm/xe/xe_drm_ras.h
+++ b/drivers/gpu/drm/xe/xe_drm_ras.h
@@ -5,11 +5,14 @@
 #ifndef _XE_DRM_RAS_H_
 #define _XE_DRM_RAS_H_
 
+#include <linux/types.h>
+
 struct xe_device;
 
 #define for_each_error_severity(i)	\
 	for (i = 0; i < DRM_XE_RAS_ERR_SEV_MAX; i++)
 
 int xe_drm_ras_init(struct xe_device *xe);
+void xe_drm_ras_event(struct xe_device *xe, u32 component, u32 severity, u32 value, gfp_t flags);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
index 4a4b363fc844..a833cecc74ec 100644
--- a/drivers/gpu/drm/xe/xe_hw_error.c
+++ b/drivers/gpu/drm/xe/xe_hw_error.c
@@ -432,7 +432,7 @@ static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_er
 	struct xe_drm_ras *ras = &xe->ras;
 	struct xe_drm_ras_counter *info = ras->info[severity];
 	unsigned long flags, err_src;
-	u32 err_bit;
+	u32 err_bit, value;
 
 	if (!IS_DGFX(xe))
 		return;
@@ -495,6 +495,9 @@ static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_er
 			gt_hw_error_handler(tile, hw_err, error_id);
 		if (err_bit == XE_SOC_ERROR)
 			soc_hw_error_handler(tile, hw_err, error_id);
+
+		value = atomic_read(&info[error_id].counter);
+		xe_drm_ras_event(xe, error_id, severity, value, GFP_ATOMIC);
 	}
 
 clear_reg:
-- 
2.47.1


^ permalink raw reply related

* [PATCH v3 1/3] drm/drm_ras: Add drm_ras netlink error event
From: Riana Tauro @ 2026-06-22 10:17 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: aravind.iddamsetty, anshuman.gupta, rodrigo.vivi, joonas.lahtinen,
	kuba, simona.vetter, airlied, pratik.bari, joshua.santosh.ranjan,
	ashwin.kumar.kulkarni, shubham.kumar, ravi.kishore.koppuravuri,
	raag.jadav, maarten.lankhorst, mallesh.koujalagi, soham.purkait,
	Riana Tauro, Zack McKevitt, Lijo Lazar, Hawking Zhang,
	David S. Miller, Paolo Abeni, Eric Dumazet
In-Reply-To: <20260622101716.3313496-5-riana.tauro@intel.com>

Define a new netlink event 'error-event' and a new multicast group
'error-report' in drm_ras. Each event contains device name, node and
error information to identify the error triggering the event.

Add drm_ras_nl_error_event() to trigger an event from the driver.
Userspace must subscribe to 'error-report' to receive 'error-event'
notifications.

Usage:

$ sudo ynl --family drm_ras --subscribe error-report

Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Zack McKevitt <zachary.mckevitt@oss.qualcomm.com>
Cc: Lijo Lazar <lijo.lazar@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
---
v2: remove redundant initialization
    remove unnecessary space
    use ynl in commit message and doc (Raag)
    simplify doc for error-event attrs

v3: rename error-notify to error-report
    Replace notify with report across the file (Raag)
---
 Documentation/gpu/drm-ras.rst            | 21 ++++++
 Documentation/netlink/specs/drm_ras.yaml | 48 +++++++++++++
 drivers/gpu/drm/drm_ras.c                | 87 ++++++++++++++++++++++++
 drivers/gpu/drm/drm_ras_nl.c             |  6 ++
 drivers/gpu/drm/drm_ras_nl.h             |  4 ++
 include/drm/drm_ras.h                    |  5 ++
 include/uapi/drm/drm_ras.h               | 15 ++++
 7 files changed, 186 insertions(+)

diff --git a/Documentation/gpu/drm-ras.rst b/Documentation/gpu/drm-ras.rst
index 83c21853b74b..406e4c49bac1 100644
--- a/Documentation/gpu/drm-ras.rst
+++ b/Documentation/gpu/drm-ras.rst
@@ -56,6 +56,7 @@ User space tools can:
   ``node-id`` and ``error-id`` as parameters.
 * Clear specific error counters with the ``clear-error-counter`` command, using both
   ``node-id`` and ``error-id`` as parameters.
+* Subscribe to the ``error-report`` multicast group to receive ``error-event``.
 
 YAML-based Interface
 --------------------
@@ -111,3 +112,23 @@ Example: Clear an error counter for a given node
 
     sudo ynl --family drm_ras --do clear-error-counter --json '{"node-id":0, "error-id":1}'
     None
+
+Example: Subscribe to ``error-report`` multicast group
+
+.. code-block:: bash
+
+    sudo ynl --family drm_ras --output-json --subscribe error-report
+
+.. code-block:: json
+
+    {
+        "name": "error-event",
+        "msg": {
+            "device-name": "0000:03:00.0",
+            "node-id": 1,
+            "node-name": "uncorrectable-errors",
+            "error-id": 1,
+            "error-name": "error_name1",
+            "error-value": 1
+        }
+    }
diff --git a/Documentation/netlink/specs/drm_ras.yaml b/Documentation/netlink/specs/drm_ras.yaml
index e113056f8c01..8aed3d4515e5 100644
--- a/Documentation/netlink/specs/drm_ras.yaml
+++ b/Documentation/netlink/specs/drm_ras.yaml
@@ -69,6 +69,33 @@ attribute-sets:
         name: error-value
         type: u32
         doc: Current value of the requested error counter.
+  -
+    name: error-event-attrs
+    attributes:
+      -
+        name: device-name
+        type: string
+        doc: Device (PCI BDF, UUID) that reported the error.
+      -
+        name: node-id
+        type: u32
+        doc: ID of the node that reported the error.
+      -
+        name: node-name
+        type: string
+        doc: Name of the node that reported the error.
+      -
+        name: error-id
+        type: u32
+        doc: ID of the error counter.
+      -
+        name: error-name
+        type: string
+        doc: Name of the error.
+      -
+        name: error-value
+        type: u32
+        doc: Current value of the error counter.
 
 operations:
   list:
@@ -124,3 +151,24 @@ operations:
       do:
         request:
           attributes: *id-attrs
+    -
+      name: error-event
+      doc: >-
+           Report an error event to userspace.
+           The event includes the device, node and error information
+           of the error that triggered the event.
+      attribute-set: error-event-attrs
+      mcgrp: error-report
+      event:
+        attributes:
+          - device-name
+          - node-id
+          - node-name
+          - error-id
+          - error-name
+          - error-value
+
+mcast-groups:
+  list:
+    -
+      name: error-report
diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
index d6eab29a1394..77f912a4d101 100644
--- a/drivers/gpu/drm/drm_ras.c
+++ b/drivers/gpu/drm/drm_ras.c
@@ -41,6 +41,11 @@
  *    Userspace must provide Node ID, Error ID.
  *    Clears specific error counter of a node if supported.
  *
+ * 4. ERROR_REPORT: Subscribe to this multicast group to receive error events
+ *
+ * 5. ERROR_EVENT: Report an error event to userspace. The event contains device, node
+ *    and error information that triggered the event.
+ *
  * Node registration:
  *
  * - drm_ras_node_register(): Registers a new node and assigns
@@ -186,6 +191,34 @@ static int msg_reply_value(struct sk_buff *msg, u32 error_id,
 			   value);
 }
 
+static int msg_put_error_event_attrs(struct sk_buff *msg, struct drm_ras_node *node,
+				     u32 error_id, const char *error_name, u32 value)
+{
+	int ret;
+
+	ret = nla_put_string(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_DEVICE_NAME, node->device_name);
+	if (ret)
+		return ret;
+
+	ret = nla_put_u32(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_NODE_ID, node->id);
+	if (ret)
+		return ret;
+
+	ret = nla_put_string(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_NODE_NAME, node->node_name);
+	if (ret)
+		return ret;
+
+	ret = nla_put_u32(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_ID, error_id);
+	if (ret)
+		return ret;
+
+	ret = nla_put_string(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_NAME, error_name);
+	if (ret)
+		return ret;
+
+	return nla_put_u32(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_VALUE, value);
+}
+
 static int doit_reply_value(struct genl_info *info, u32 node_id,
 			    u32 error_id)
 {
@@ -222,6 +255,60 @@ static int doit_reply_value(struct genl_info *info, u32 node_id,
 	return genlmsg_reply(msg, info);
 }
 
+/**
+ * drm_ras_nl_error_event() - Report an error event
+ * @node: Node structure
+ * @error_id: ID of the error
+ * @error_name: Name of the error
+ * @value: Value associated with the error
+ * @flags: GFP flags for memory allocation
+ *
+ * Report an error-event to userspace using the error-report multicast group.
+ *
+ * Return: 0 on success, or negative errno on failure.
+ */
+int drm_ras_nl_error_event(struct drm_ras_node *node, u32 error_id, const char *error_name,
+			   u32 value, gfp_t flags)
+{
+	struct genl_info info;
+	struct sk_buff *msg;
+	struct nlattr *hdr;
+	int ret;
+
+	if (!error_name)
+		return -EINVAL;
+
+	if (!genl_has_listeners(&drm_ras_nl_family, &init_net, DRM_RAS_NLGRP_ERROR_REPORT))
+		return 0;
+
+	genl_info_init_ntf(&info, &drm_ras_nl_family, DRM_RAS_CMD_ERROR_EVENT);
+
+	msg = genlmsg_new(NLMSG_GOODSIZE, flags);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_iput(msg, &info);
+	if (!hdr) {
+		ret = -EMSGSIZE;
+		goto free_msg;
+	}
+
+	ret = msg_put_error_event_attrs(msg, node, error_id, error_name, value);
+	if (ret)
+		goto cancel_msg;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_multicast(&drm_ras_nl_family, msg, 0, DRM_RAS_NLGRP_ERROR_REPORT, flags);
+	return 0;
+
+cancel_msg:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	nlmsg_free(msg);
+	return ret;
+}
+EXPORT_SYMBOL(drm_ras_nl_error_event);
+
 /**
  * drm_ras_nl_get_error_counter_dumpit() - Dump all Error Counters
  * @skb: Netlink message buffer
diff --git a/drivers/gpu/drm/drm_ras_nl.c b/drivers/gpu/drm/drm_ras_nl.c
index dea1c1b2494e..9d3123cc9f9c 100644
--- a/drivers/gpu/drm/drm_ras_nl.c
+++ b/drivers/gpu/drm/drm_ras_nl.c
@@ -58,6 +58,10 @@ static const struct genl_split_ops drm_ras_nl_ops[] = {
 	},
 };
 
+static const struct genl_multicast_group drm_ras_nl_mcgrps[] = {
+	[DRM_RAS_NLGRP_ERROR_REPORT] = { "error-report", },
+};
+
 struct genl_family drm_ras_nl_family __ro_after_init = {
 	.name		= DRM_RAS_FAMILY_NAME,
 	.version	= DRM_RAS_FAMILY_VERSION,
@@ -66,4 +70,6 @@ struct genl_family drm_ras_nl_family __ro_after_init = {
 	.module		= THIS_MODULE,
 	.split_ops	= drm_ras_nl_ops,
 	.n_split_ops	= ARRAY_SIZE(drm_ras_nl_ops),
+	.mcgrps		= drm_ras_nl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(drm_ras_nl_mcgrps),
 };
diff --git a/drivers/gpu/drm/drm_ras_nl.h b/drivers/gpu/drm/drm_ras_nl.h
index a398643572a5..03ec275aca92 100644
--- a/drivers/gpu/drm/drm_ras_nl.h
+++ b/drivers/gpu/drm/drm_ras_nl.h
@@ -21,6 +21,10 @@ int drm_ras_nl_get_error_counter_dumpit(struct sk_buff *skb,
 int drm_ras_nl_clear_error_counter_doit(struct sk_buff *skb,
 					struct genl_info *info);
 
+enum {
+	DRM_RAS_NLGRP_ERROR_REPORT,
+};
+
 extern struct genl_family drm_ras_nl_family;
 
 #endif /* _LINUX_DRM_RAS_GEN_H */
diff --git a/include/drm/drm_ras.h b/include/drm/drm_ras.h
index f2a787bc4f64..d4a275efdbb0 100644
--- a/include/drm/drm_ras.h
+++ b/include/drm/drm_ras.h
@@ -78,9 +78,14 @@ struct drm_device;
 #if IS_ENABLED(CONFIG_DRM_RAS)
 int drm_ras_node_register(struct drm_ras_node *node);
 void drm_ras_node_unregister(struct drm_ras_node *node);
+int drm_ras_nl_error_event(struct drm_ras_node *node, u32 error_id, const char *error_name,
+			   u32 value, gfp_t flags);
 #else
 static inline int drm_ras_node_register(struct drm_ras_node *node) { return 0; }
 static inline void drm_ras_node_unregister(struct drm_ras_node *node) { }
+static inline int drm_ras_nl_error_event(struct drm_ras_node *node, u32 error_id,
+					 const char *error_name, u32 value, gfp_t flags)
+{ return 0; }
 #endif
 
 #endif
diff --git a/include/uapi/drm/drm_ras.h b/include/uapi/drm/drm_ras.h
index 218a3ee86805..eab8231aa87c 100644
--- a/include/uapi/drm/drm_ras.h
+++ b/include/uapi/drm/drm_ras.h
@@ -38,13 +38,28 @@ enum {
 	DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX = (__DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX - 1)
 };
 
+enum {
+	DRM_RAS_A_ERROR_EVENT_ATTRS_DEVICE_NAME = 1,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_NODE_ID,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_NODE_NAME,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_ID,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_NAME,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_VALUE,
+
+	__DRM_RAS_A_ERROR_EVENT_ATTRS_MAX,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_MAX = (__DRM_RAS_A_ERROR_EVENT_ATTRS_MAX - 1)
+};
+
 enum {
 	DRM_RAS_CMD_LIST_NODES = 1,
 	DRM_RAS_CMD_GET_ERROR_COUNTER,
 	DRM_RAS_CMD_CLEAR_ERROR_COUNTER,
+	DRM_RAS_CMD_ERROR_EVENT,
 
 	__DRM_RAS_CMD_MAX,
 	DRM_RAS_CMD_MAX = (__DRM_RAS_CMD_MAX - 1)
 };
 
+#define DRM_RAS_MCGRP_ERROR_REPORT	"error-report"
+
 #endif /* _UAPI_LINUX_DRM_RAS_H */
-- 
2.47.1


^ permalink raw reply related

* [PATCH v3 0/3] Add drm_ras netlink error event support
From: Riana Tauro @ 2026-06-22 10:17 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: aravind.iddamsetty, anshuman.gupta, rodrigo.vivi, joonas.lahtinen,
	kuba, simona.vetter, airlied, pratik.bari, joshua.santosh.ranjan,
	ashwin.kumar.kulkarni, shubham.kumar, ravi.kishore.koppuravuri,
	raag.jadav, maarten.lankhorst, mallesh.koujalagi, soham.purkait,
	Riana Tauro

Define a new netlink event 'error-event' and a new multicast group
'error-report' in drm_ras. Each event contains device name, node and
error information to identify the error triggering the event.

Add drm_ras_nl_error_event() to trigger an event from the driver.
Wire this support to xe drm_ras to notify userspace whenever a GT or
SoC error occurs in PVC. Also add support for correctable errors in
CRI.

$ sudo ynl --family drm_ras --output-json --subscribe error-report

{
    "name": "error-event",
     "msg": {
         "device-name": "0000:03:00.0",
         "node-id": 1,
         "node-name": "uncorrectable-errors",
         "error-id": 1,
         "error-name": "core-compute",
         "error-value": 1
     }
}

Rev2: use ynl in document and commit message
      fix cosmetic review comments
      simplify caller 

Rev3: replace error-event with error-report
      had has_drm_ras check 
      add support for correctable errors in CRI

Riana Tauro (3):
  drm/drm_ras: Add drm_ras netlink error event
  drm/xe/xe_drm_ras: Add error-event support for PVC
  drm/xe/xe_ras: Add error-event support for Crescent Island

 Documentation/gpu/drm-ras.rst            | 21 ++++++
 Documentation/netlink/specs/drm_ras.yaml | 48 +++++++++++++
 drivers/gpu/drm/drm_ras.c                | 87 ++++++++++++++++++++++++
 drivers/gpu/drm/drm_ras_nl.c             |  6 ++
 drivers/gpu/drm/drm_ras_nl.h             |  4 ++
 drivers/gpu/drm/xe/xe_drm_ras.c          | 30 ++++++++
 drivers/gpu/drm/xe/xe_drm_ras.h          |  3 +
 drivers/gpu/drm/xe/xe_hw_error.c         |  5 +-
 drivers/gpu/drm/xe/xe_ras.c              | 53 +++++++++++++++
 include/drm/drm_ras.h                    |  5 ++
 include/uapi/drm/drm_ras.h               | 15 ++++
 11 files changed, 276 insertions(+), 1 deletion(-)

-- 
2.47.1


^ permalink raw reply

* AW: AW: AW: AW: [PATCH net] net: usb: lan78xx: restore VLAN filter table after device reset
From: Sven Schuchmann @ 2026-06-22 10:07 UTC (permalink / raw)
  To: Nicolai Buchwitz
  Cc: Thangaraj Samynathan, Rengarajan Sundararajan,
	UNGLinuxDriver@microchip.com, Woojung.Huh@microchip.com,
	Andrew Lunn, David S . Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, netdev@vger.kernel.org, linux-usb@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <f76711d2f45c527f9ce0f5d288631bc6@tipi-net.de>

Hello Nicolai,

On 19.6.2026 16:01, Nicolai Buchwitz wrote:
> Hi Sven
> 
> On 19.6.2026 15:31, Sven Schuchmann wrote:
> > Hello Nicolai,
> >
> > looks good from my point of view
> > (Calling the lan78xx_write_vlan_table() from
> > lan78xx_mac_link_up() and from lan78xx_reset()).
> 
> Thanks.

Just to be clear I used this patch which is looking good:

---
 drivers/net/usb/lan78xx.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index a5132f2f9..a2db38650 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1571,6 +1571,7 @@ static void lan78xx_set_multicast(struct net_device *netdev)
 }
 
 static void lan78xx_rx_urb_submit_all(struct lan78xx_net *dev);
+static int lan78xx_write_vlan_table(struct lan78xx_net *dev);
 
 static int lan78xx_mac_reset(struct lan78xx_net *dev)
 {
@@ -2528,6 +2529,10 @@ static void lan78xx_mac_link_up(struct phylink_config *config,
 	if (ret < 0)
 		goto link_up_fail;
 
+	ret = lan78xx_write_vlan_table(dev);
+	if (ret < 0)
+		goto link_up_fail;
+
 	netif_start_queue(net);
 
 	return;
@@ -3081,14 +3086,20 @@ static int lan78xx_set_features(struct net_device *netdev,
 	return lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
 }
 
+static int lan78xx_write_vlan_table(struct lan78xx_net *dev)
+{
+	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+
+	return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, 0,
+				      DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
+}
+
 static void lan78xx_deferred_vlan_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
 			container_of(param, struct lan78xx_priv, set_vlan);
-	struct lan78xx_net *dev = pdata->dev;
 
-	lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, 0,
-			       DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
+	lan78xx_write_vlan_table(pdata->dev);
 }
 
 static int lan78xx_vlan_rx_add_vid(struct net_device *netdev,
@@ -3378,6 +3389,15 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 
 	lan78xx_set_multicast(dev->net);
 
+	/* The chip reset above also clears the VLAN filter table held in the
+	 * shared VLAN/DA hash RAM. The network stack does not re-add VLANs
+	 * after a silent device reset (e.g. on reset_resume after USB
+	 * autosuspend), so restore the table from our shadow copy here.
+	 */
+	ret = lan78xx_write_vlan_table(dev);
+	if (ret < 0)
+		return ret;
+
 	/* reset PHY */
 	ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
 	if (ret < 0)
-- 

> 
> > But I investigated a little more and it seems the hash table
> > (which is right behind the vlan table in the controllers memory)
> > also gets cleared. I wrote some random data into this table and have
> > seen that it gets also cleared. I think this needs to be fixed too.
> 
> Something like
> 
> static int lan78xx_write_mchash_table(struct lan78xx_net *dev)
> {
>         struct lan78xx_priv *pdata = (struct lan78xx_priv
> *)(dev->data[0]);
> 
>         return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
>                                       DP_SEL_VHF_VLAN_LEN,
>                                       DP_SEL_VHF_HASH_LEN,
> pdata->mchash_table); // from lan78xx_deferred_multicast_write)
> }
> 
> with callers in lan78xx_deferred_multicast_write() and
> lan78xx_mac_link_up(), should
> do the trick?

I used this one which is also looking good:
---
 drivers/net/usb/lan78xx.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index d449c1950fd3..6d7d349816a6 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1466,6 +1466,8 @@ static inline u32 lan78xx_hash(char addr[ETH_ALEN])
 	return (ether_crc(ETH_ALEN, addr) >> 23) & 0x1ff;
 }
 
+static int lan78xx_write_mchash_table(struct lan78xx_net *dev);
+
 static void lan78xx_deferred_multicast_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
@@ -1476,9 +1478,7 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
 	netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n",
 		  pdata->rfe_ctl);
 
-	ret = lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
-				     DP_SEL_VHF_VLAN_LEN,
-				     DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+	ret = lan78xx_write_mchash_table(dev);
 	if (ret < 0)
 		goto multicast_write_done;
 
@@ -2533,6 +2533,10 @@ static void lan78xx_mac_link_up(struct phylink_config *config,
 	if (ret < 0)
 		goto link_up_fail;
 
+	ret = lan78xx_write_mchash_table(dev);
+	if (ret < 0)
+		goto link_up_fail;
+
 	netif_start_queue(net);
 
 	return;
@@ -3094,6 +3098,16 @@ static int lan78xx_write_vlan_table(struct lan78xx_net *dev)
 				      DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
 }
 
+static int lan78xx_write_mchash_table(struct lan78xx_net *dev)
+{
+	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+
+	return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
+				      DP_SEL_VHF_VLAN_LEN,
+				      DP_SEL_VHF_HASH_LEN,
+				      pdata->mchash_table);
+}
+
 static void lan78xx_deferred_vlan_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
@@ -3398,6 +3412,10 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 	if (ret < 0)
 		return ret;
 
+	ret = lan78xx_write_mchash_table(dev);
+	if (ret < 0)
+		return ret;
+
 	/* reset PHY */
 	ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
 	if (ret < 0)
-- 

> 
> >
> > In the Datasheet from the LAN7801 I can read:
> > "After a reset event, the RFE will automatically initialize the
> > contents of the VHF to 0h."
> > Where VHF also refers to the hash table.
> > But I still do not understand what reset is happening when I just
> > unplug the network cable....
> 
> I suspect it is triggered from the PHY:
> 
> 8.10 (MAC Reset Watchdog Timer):
> "A portion of the MAC operates on clocks generated by the Ethernet PHY
> [...] PHY Reset
> (PHY_RST) results in resetting the portion of the MAC operating on the
> PHY receive and
> transmit clocks."
> 
> So which PHY are you using?

I am using a DP83TC812R from TI. There is currently no driver available
so I ported this one
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/phy/dp83tg720.c
which is working fine (maybe I will also publish a patch for this).

The strange thing is that the MAC Reset Watchdog Timer seems 
to occur "silently" so that nor the mac or the phy driver know
about this reset.

But never the less. The two patches fixed my problem and
I think they should be mainline. 

Regards,

   Sven

^ permalink raw reply related

* [PATCH v2 net-next] sctp: use sctp_auth_shkey_release() in error path for consistency
From: Wentao Liang @ 2026-06-22 10:02 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner, Xin Long, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, linux-sctp, netdev, linux-kernel, Wentao Liang

Use the proper refcount-aware helper sctp_auth_shkey_release() instead
of kfree() when freeing cur_key in the error path of sctp_auth_set_key().
While both are equivalent in the current code, using the helper maintains
abstraction consistency and prevents potential issues if the code is
reordered in the future.

Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
---
 net/sctp/auth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index be9782760f50..84708f87392f 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -753,7 +753,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 	/* Create a new key data based on the info passed in */
 	key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
 	if (!key) {
-		kfree(cur_key);
+		sctp_auth_shkey_release(cur_key);
 		return -ENOMEM;
 	}
 
-- 
2.39.5 (Apple Git-154)


^ permalink raw reply related

* Re: [PATCH 0/2] Add bpf_sock_read_xattr() kfunc to read socket xattrs
From: Christian Brauner @ 2026-06-22 10:02 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Alexei Starovoitov, Daniel Borkmann, Christian Brauner
  Cc: Alexander Viro, Jan Kara, Simon Horman, Kuniyuki Iwashima,
	Willem de Bruijn, linux-fsdevel, netdev, bpf, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Kumar Kartikeya Dwivedi,
	Song Liu, Yonghong Song, Jiri Olsa
In-Reply-To: <20260617-work-bpf-sock-xattr-v1-0-a1276f7c9da3@kernel.org>

On Wed, 17 Jun 2026 13:18:26 +0200, Christian Brauner wrote:
> Add bpf_sock_read_xattr() kfunc to read socket xattrs
> 
> In c8db08110cbe ("Merge tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs")
> we added support for extended attributes for sockets. This comes in two
> flavors: sockfs and non-sockfs/filesystem sockets. Filesystem sockets
> are actual filesystem objects so reading xattrs must use dedicated fs
> helpers such as bpf_get_dentry_xattr() and bpf_get_file_xattr(). Those
> are inherently sleeping operations. Sockfs sockets on the other hand
> don't need to use sleeping operations as the underlying data structure
> is lockless. In addition, retrieval of sockfs extended attributes often
> happens from LSM hooks that only provide struct socket and it's
> completely nonsensical to grab a reference to a file, then force a
> sleeping operation to retrieve the xattr and drop the reference. We know
> that the sockfs file cannot go away while the LSM hook runs.
> 
> [...]

Applied to the vfs-7.3.kfunc branch of the vfs/vfs.git tree.
Patches in the vfs-7.3.kfunc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-7.3.kfunc

[1/2] fs: Add bpf_sock_read_xattr() kfunc to read socket xattrs
      https://git.kernel.org/vfs/vfs/c/f80386e3838e
[2/2] selftests/bpf: Add test for bpf_sock_read_xattr() kfunc
      https://git.kernel.org/vfs/vfs/c/99a63a6aff40


^ permalink raw reply

* [PATCH net v2] net: airoha: Add retry mechanism to airoha_qdma_set_trtcm_param()
From: Lorenzo Bianconi @ 2026-06-22  9:35 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Lorenzo Bianconi
  Cc: Leto Liu, linux-arm-kernel, linux-mediatek, netdev, Brown Huang

From: Brown Huang <brown.huang@airoha.com>

CPU accesses QDMA via the bus. When multiple modules are using the bus
simultaneously, CPU access to QDMA may encounter bus timeouts and fails,
resulting in QDMA configuration failures and potentially causing packet
transmission issues. In order to mitigate the issue, introduce a retry
mechanism to airoha_qdma_set_trtcm_param routine in order to ensure the
configuration is correctly applied to the hardware.

Fixes: ef1ca9271313b ("net: airoha: Add sched HTB offload support")
Signed-off-by: Brown Huang <brown.huang@airoha.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
Changes in v2:
- Wait for write configuration to be completed before running
  airoha_qdma_get_trtcm_param() in airoha_qdma_set_trtcm_param().
- Link to v1: https://lore.kernel.org/r/20260608-airoha_qdma_set_trtcm_param-retry-fix-v1-1-f07704f0d8c5@kernel.org
---
 drivers/net/ethernet/airoha/airoha_eth.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index 3370c3df7c10..bb5c0599a4ee 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -2673,14 +2673,30 @@ static int airoha_qdma_set_trtcm_param(struct airoha_qdma *qdma, int channel,
 		     FIELD_PREP(TRTCM_METER_GROUP_MASK, group) |
 		     FIELD_PREP(TRTCM_PARAM_INDEX_MASK, idx) |
 		     FIELD_PREP(TRTCM_PARAM_RATE_TYPE_MASK, mode);
+	int i;
 
-	airoha_qdma_wr(qdma, REG_TRTCM_DATA_LOW(addr), val);
-	airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
+	for (i = 0; i < 100; i++) {
+		u32 data;
 
-	return read_poll_timeout(airoha_qdma_rr, val,
-				 val & TRTCM_PARAM_RW_DONE_MASK,
-				 USEC_PER_MSEC, 10 * USEC_PER_MSEC, true,
-				 qdma, REG_TRTCM_CFG_PARAM(addr));
+		airoha_qdma_wr(qdma, REG_TRTCM_DATA_LOW(addr), val);
+		wmb();
+		airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
+
+		if (read_poll_timeout(airoha_qdma_rr, data,
+				      data & TRTCM_PARAM_RW_DONE_MASK,
+				      USEC_PER_MSEC, 10 * USEC_PER_MSEC,
+				      true, qdma, REG_TRTCM_CFG_PARAM(addr)))
+			return -ETIMEDOUT;
+
+		if (airoha_qdma_get_trtcm_param(qdma, channel, addr, param,
+						mode, &data, NULL))
+			continue;
+
+		if (data == val)
+			return 0;
+	}
+
+	return -EBUSY;
 }
 
 static int airoha_qdma_set_trtcm_config(struct airoha_qdma *qdma, int channel,

---
base-commit: d07d80b6a129a44538cda1549b7acf95154fb197
change-id: 20260605-airoha_qdma_set_trtcm_param-retry-fix-a9d2956b9b2f

Best regards,
-- 
Lorenzo Bianconi <lorenzo@kernel.org>


^ permalink raw reply related

* Re: [PATCH net v2] net: wwan: iosm: bound device offsets in the MUX downlink decoder
From: Loic Poulain @ 2026-06-22  9:24 UTC (permalink / raw)
  To: Maoyi Xie
  Cc: Sergey Ryazanov, Johannes Berg, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, netdev, linux-kernel,
	stable
In-Reply-To: <178196118045.462404.11069139160448641355@maoyixie.com>

On Sat, Jun 20, 2026 at 3:13 PM Maoyi Xie <maoyixie.tju@gmail.com> wrote:
>
> mux_dl_adb_decode() walks a chain of aggregated datagram tables using
> offsets and lengths taken from the modem. first_table_index,
> next_table_index, table_length, datagram_index and datagram_length are
> all device supplied le values. Only first_table_index was checked, and
> only for being non zero. The decoder then formed adth = block +
> adth_index and read the table header and the datagram entries with no
> bound against the received skb. A modem that reports an index or a
> length past the downlink buffer makes the decoder read out of bounds.
>
> The buffer is IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE and skb->len is at most
> that, so skb->len is the real limit, but none of these in band offsets
> were checked against it.
>
> Validate every device offset and length against skb->len before use.
> The block header must fit. Each table header, on entry and after every
> next_table_index, must lie inside the skb. The datagram table must fit.
> Each datagram index and length must stay inside the skb. The header
> padding must not exceed the datagram length so the receive length does
> not wrap.
>
> This was reproduced under KASAN as a slab out of bounds read on a normal
> downlink receive once the iosm net device is up.
>
> Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support")
> Suggested-by: Loic Poulain <loic.poulain@oss.qualcomm.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Maoyi Xie <maoyixie.tju@gmail.com>

Reviewed-by: Loic Poulain <loic.poulain@oss.qualcomm.com>


> ---
> Changes in v2:
> - mux_dl_process_dg now uses intermediate native endian locals dg_index
>   and dg_len so the bound checks read cleaner and avoid the repeated
>   le32_to_cpu conversions, per Loic Poulain's review. No functional
>   change.
>
> Link to v1: https://lore.kernel.org/all/178185979029.4044562.9993615975949055530@maoyixie.com/
>
>  drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 33 ++++++++++++++++------
>  1 file changed, 24 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
> index bff46f7ca59f..ff9a4bc52f29 100644
> --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
> +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
> @@ -553,19 +553,21 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh,
>         u32 packet_offset, i, rc, dg_len;
>
>         for (i = 0; i < nr_of_dg; i++, dg++) {
> -               if (le32_to_cpu(dg->datagram_index)
> -                               < sizeof(struct mux_adbh))
> +               u32 dg_index = le32_to_cpu(dg->datagram_index);
> +
> +               dg_len = le16_to_cpu(dg->datagram_length);
> +
> +               if (dg_index < sizeof(struct mux_adbh))
>                         goto dg_error;
>
> -               /* Is the packet inside of the ADB */
> -               if (le32_to_cpu(dg->datagram_index) >=
> -                                       le32_to_cpu(adbh->block_length)) {
> +               /* Is the packet inside of the ADB and the received skb ? */
> +               if (dg_index >= le32_to_cpu(adbh->block_length) ||
> +                   dg_index >= skb->len ||
> +                   dg_len > skb->len - dg_index ||
> +                   dl_head_pad_len >= dg_len) {
>                         goto dg_error;
>                 } else {
> -                       packet_offset =
> -                               le32_to_cpu(dg->datagram_index) +
> -                               dl_head_pad_len;
> -                       dg_len = le16_to_cpu(dg->datagram_length);
> +                       packet_offset = dg_index + dl_head_pad_len;
>                         /* Pass the packet to the netif layer. */
>                         rc = ipc_mux_net_receive(ipc_mux, if_id, ipc_mux->wwan,
>                                                  packet_offset,
> @@ -595,6 +597,10 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux,
>         block = skb->data;
>         adbh = (struct mux_adbh *)block;
>
> +       /* The block header itself must fit in the received skb. */
> +       if (skb->len < sizeof(struct mux_adbh))
> +               goto adb_decode_err;
> +
>         /* Process the aggregated datagram tables. */
>         adth_index = le32_to_cpu(adbh->first_table_index);
>
> @@ -606,6 +612,11 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux,
>
>         /* Loop through mixed session tables. */
>         while (adth_index) {
> +               /* The table header must lie within the received skb. */
> +               if (adth_index < sizeof(struct mux_adbh) ||
> +                   adth_index > skb->len - sizeof(struct mux_adth))
> +                       goto adb_decode_err;
> +
>                 /* Get the reference to the table header. */
>                 adth = (struct mux_adth *)(block + adth_index);
>
> @@ -629,6 +640,10 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux,
>                 if (le16_to_cpu(adth->table_length) < sizeof(struct mux_adth))
>                         goto adb_decode_err;
>
> +               /* The whole datagram table must fit in the received skb. */
> +               if (le16_to_cpu(adth->table_length) > skb->len - adth_index)
> +                       goto adb_decode_err;
> +
>                 /* Calculate the number of datagrams. */
>                 nr_of_dg = (le16_to_cpu(adth->table_length) -
>                                         sizeof(struct mux_adth)) /
> --
> 2.34.1
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox