Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net 11/14] netfilter: nf_tables: use list_del_rcu for netlink hooks
From: Pablo Neira Ayuso @ 2026-04-16  1:30 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, horms
In-Reply-To: <20260416013101.221555-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

nft_netdev_unregister_hooks and __nft_unregister_flowtable_net_hooks need
to use list_del_rcu(), this list can be walked by concurrent dumpers.

Add a new helper and use it consistently.

Fixes: f9a43007d3f7 ("netfilter: nf_tables: double hook unregistration in netns path")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 44 ++++++++++++++---------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8c42247a176c..090d4d688a33 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -374,6 +374,12 @@ static void nft_netdev_hook_free_rcu(struct nft_hook *hook)
 	call_rcu(&hook->rcu, __nft_netdev_hook_free_rcu);
 }
 
+static void nft_netdev_hook_unlink_free_rcu(struct nft_hook *hook)
+{
+	list_del_rcu(&hook->list);
+	nft_netdev_hook_free_rcu(hook);
+}
+
 static void nft_netdev_unregister_hooks(struct net *net,
 					struct list_head *hook_list,
 					bool release_netdev)
@@ -384,10 +390,8 @@ static void nft_netdev_unregister_hooks(struct net *net,
 	list_for_each_entry_safe(hook, next, hook_list, list) {
 		list_for_each_entry(ops, &hook->ops_list, list)
 			nf_unregister_net_hook(net, ops);
-		if (release_netdev) {
-			list_del(&hook->list);
-			nft_netdev_hook_free_rcu(hook);
-		}
+		if (release_netdev)
+			nft_netdev_hook_unlink_free_rcu(hook);
 	}
 }
 
@@ -2323,10 +2327,8 @@ void nf_tables_chain_destroy(struct nft_chain *chain)
 
 		if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) {
 			list_for_each_entry_safe(hook, next,
-						 &basechain->hook_list, list) {
-				list_del_rcu(&hook->list);
-				nft_netdev_hook_free_rcu(hook);
-			}
+						 &basechain->hook_list, list)
+				nft_netdev_hook_unlink_free_rcu(hook);
 		}
 		module_put(basechain->type->owner);
 		if (rcu_access_pointer(basechain->stats)) {
@@ -3026,6 +3028,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 				list_for_each_entry(ops, &h->ops_list, list)
 					nf_unregister_net_hook(ctx->net, ops);
 			}
+			/* hook.list is on stack, no need for list_del_rcu() */
 			list_del(&h->list);
 			nft_netdev_hook_free_rcu(h);
 		}
@@ -8903,10 +8906,8 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net,
 	list_for_each_entry_safe(hook, next, hook_list, list) {
 		list_for_each_entry(ops, &hook->ops_list, list)
 			nft_unregister_flowtable_ops(net, flowtable, ops);
-		if (release_netdev) {
-			list_del(&hook->list);
-			nft_netdev_hook_free_rcu(hook);
-		}
+		if (release_netdev)
+			nft_netdev_hook_unlink_free_rcu(hook);
 	}
 }
 
@@ -8977,8 +8978,7 @@ static int nft_register_flowtable_net_hooks(struct net *net,
 
 			nft_unregister_flowtable_ops(net, flowtable, ops);
 		}
-		list_del_rcu(&hook->list);
-		nft_netdev_hook_free_rcu(hook);
+		nft_netdev_hook_unlink_free_rcu(hook);
 	}
 
 	return err;
@@ -8988,10 +8988,8 @@ static void nft_hooks_destroy(struct list_head *hook_list)
 {
 	struct nft_hook *hook, *next;
 
-	list_for_each_entry_safe(hook, next, hook_list, list) {
-		list_del_rcu(&hook->list);
-		nft_netdev_hook_free_rcu(hook);
-	}
+	list_for_each_entry_safe(hook, next, hook_list, list)
+		nft_netdev_hook_unlink_free_rcu(hook);
 }
 
 static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
@@ -9079,8 +9077,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
 				nft_unregister_flowtable_ops(ctx->net,
 							     flowtable, ops);
 		}
-		list_del_rcu(&hook->list);
-		nft_netdev_hook_free_rcu(hook);
+		nft_netdev_hook_unlink_free_rcu(hook);
 	}
 
 	return err;
@@ -9586,13 +9583,8 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 
 static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 {
-	struct nft_hook *hook, *next;
-
 	flowtable->data.type->free(&flowtable->data);
-	list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
-		list_del_rcu(&hook->list);
-		nft_netdev_hook_free_rcu(hook);
-	}
+	nft_hooks_destroy(&flowtable->hook_list);
 	kfree(flowtable->name);
 	module_put(flowtable->data.type->owner);
 	kfree(flowtable);
-- 
2.47.3


^ permalink raw reply related

* [PATCH net 12/14] rculist: add list_splice_rcu() for private lists
From: Pablo Neira Ayuso @ 2026-04-16  1:30 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, horms
In-Reply-To: <20260416013101.221555-1-pablo@netfilter.org>

This patch adds a helper function, list_splice_rcu(), to safely splice
a private (non-RCU-protected) list into an RCU-protected list.

The function ensures that only the pointer visible to RCU readers
(prev->next) is updated using rcu_assign_pointer(), while the rest of
the list manipulations are performed with regular assignments, as the
source list is private and not visible to concurrent RCU readers.

This is useful for moving elements from a private list into a global
RCU-protected list, ensuring safe publication for RCU readers.
Subsystems with some sort of batching mechanism from userspace can
benefit from this new function.

The function __list_splice_rcu() has been added for clarity and to
follow the same pattern as in the existing list_splice*() interfaces,
where there is a check to ensure that the list to splice is not
empty. Note that __list_splice_rcu() has no documentation for this
reason.

Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/rculist.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 2abba7552605..e3bc44225692 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -261,6 +261,35 @@ static inline void list_replace_rcu(struct list_head *old,
 	old->prev = LIST_POISON2;
 }

+static inline void __list_splice_rcu(struct list_head *list,
+				     struct list_head *prev,
+				     struct list_head *next)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+
+	last->next = next;
+	first->prev = prev;
+	next->prev = last;
+	rcu_assign_pointer(list_next_rcu(prev), first);
+}
+
+/**
+ * list_splice_rcu - splice a non-RCU list into an RCU-protected list,
+ *                   designed for stacks.
+ * @list:	the non RCU-protected list to splice
+ * @head:	the place in the existing RCU-protected list to splice
+ *
+ * The list pointed to by @head can be RCU-read traversed concurrently with
+ * this function.
+ */
+static inline void list_splice_rcu(struct list_head *list,
+				   struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice_rcu(list, head, head->next);
+}
+
 /**
  * __list_splice_init_rcu - join an RCU-protected list into an existing list.
  * @list:	the RCU-protected list to splice
-- 
2.47.3

^ permalink raw reply related

* [PATCH net 13/14] netfilter: nf_tables: join hook list via splice_list_rcu() in commit phase
From: Pablo Neira Ayuso @ 2026-04-16  1:31 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, horms
In-Reply-To: <20260416013101.221555-1-pablo@netfilter.org>

Publish new hooks in the list into the basechain/flowtable using
splice_list_rcu() to ensure netlink dump list traversal via rcu is safe
while concurrent ruleset update is going on.

Fixes: 78d9f48f7f44 ("netfilter: nf_tables: add devices to existing flowtable")
Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 090d4d688a33..8c0706d6d887 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -10904,8 +10904,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 				nft_chain_commit_update(nft_trans_container_chain(trans));
 				nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN,
 						       &nft_trans_chain_hooks(trans));
-				list_splice(&nft_trans_chain_hooks(trans),
-					    &nft_trans_basechain(trans)->hook_list);
+				list_splice_rcu(&nft_trans_chain_hooks(trans),
+						&nft_trans_basechain(trans)->hook_list);
 				/* trans destroyed after rcu grace period */
 			} else {
 				nft_chain_commit_drop_policy(nft_trans_container_chain(trans));
@@ -11034,8 +11034,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 							   nft_trans_flowtable(trans),
 							   &nft_trans_flowtable_hooks(trans),
 							   NFT_MSG_NEWFLOWTABLE);
-				list_splice(&nft_trans_flowtable_hooks(trans),
-					    &nft_trans_flowtable(trans)->hook_list);
+				list_splice_rcu(&nft_trans_flowtable_hooks(trans),
+						&nft_trans_flowtable(trans)->hook_list);
 			} else {
 				nft_clear(net, nft_trans_flowtable(trans));
 				nf_tables_flowtable_notify(&ctx,
-- 
2.47.3


^ permalink raw reply related

* [PATCH net 14/14] netfilter: nf_tables: add hook transactions for device deletions
From: Pablo Neira Ayuso @ 2026-04-16  1:31 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, horms
In-Reply-To: <20260416013101.221555-1-pablo@netfilter.org>

Restore the flag that indicates that the hook is going away, ie.
NFT_HOOK_REMOVE, but add a new transaction object to track deletion
of hooks without altering the basechain/flowtable hook_list during
the preparation phase.

The existing approach that moves the hook from the basechain/flowtable
hook_list to transaction hook_list breaks netlink dump path readers
of this RCU-protected list.

It should be possible use an array for nft_trans_hook to store the
deleted hooks to compact the representation but I am not expecting
many hook object, specially now that wildcard support for devices
is in place.

Note that the nft_trans_chain_hooks() list contains a list of struct
nft_trans_hook objects for DELCHAIN and DELFLOWTABLE commands, while
this list stores struct nft_hook objects for NEWCHAIN and NEWFLOWTABLE.
Note that new commands can be updated to use nft_trans_hook for
consistency.

Fixes: 7d937b107108 ("netfilter: nf_tables: support for deleting devices in an existing netdev chain")
Fixes: b6d9014a3335 ("netfilter: nf_tables: delete flowtable hooks via transaction list")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  13 ++++
 net/netfilter/nf_tables_api.c     | 118 +++++++++++++++++++++++++-----
 2 files changed, 114 insertions(+), 17 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ec8a8ec9c0aa..3ec41574af77 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1216,12 +1216,15 @@ struct nft_stats {
 	struct u64_stats_sync	syncp;
 };
 
+#define NFT_HOOK_REMOVE	(1 << 0)
+
 struct nft_hook {
 	struct list_head	list;
 	struct list_head	ops_list;
 	struct rcu_head		rcu;
 	char			ifname[IFNAMSIZ];
 	u8			ifnamelen;
+	u8			flags;
 };
 
 struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook,
@@ -1676,6 +1679,16 @@ struct nft_trans {
 	u8				put_net:1;
 };
 
+/**
+ * struct nft_trans_hook - nf_tables hook update in transaction
+ * @list: used internally
+ * @hook: struct nft_hook with the device hook
+ */
+struct nft_trans_hook {
+	struct list_head		list;
+	struct nft_hook			*hook;
+};
+
 /**
  * struct nft_trans_binding - nf_tables object with binding support in transaction
  * @nft_trans:    base structure, MUST be first member
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8c0706d6d887..34640933dd55 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -380,6 +380,29 @@ static void nft_netdev_hook_unlink_free_rcu(struct nft_hook *hook)
 	nft_netdev_hook_free_rcu(hook);
 }
 
+static void nft_trans_hook_destroy(struct nft_trans_hook *trans_hook)
+{
+	list_del(&trans_hook->list);
+	kfree(trans_hook);
+}
+
+static void nft_netdev_unregister_trans_hook(struct net *net,
+					     struct list_head *hook_list)
+{
+	struct nft_trans_hook *trans_hook, *next;
+	struct nf_hook_ops *ops;
+	struct nft_hook *hook;
+
+	list_for_each_entry_safe(trans_hook, next, hook_list, list) {
+		hook = trans_hook->hook;
+		list_for_each_entry(ops, &hook->ops_list, list)
+			nf_unregister_net_hook(net, ops);
+
+		nft_netdev_hook_unlink_free_rcu(hook);
+		nft_trans_hook_destroy(trans_hook);
+	}
+}
+
 static void nft_netdev_unregister_hooks(struct net *net,
 					struct list_head *hook_list,
 					bool release_netdev)
@@ -2397,8 +2420,12 @@ static struct nft_hook *nft_hook_list_find(struct list_head *hook_list,
 
 	list_for_each_entry(hook, hook_list, list) {
 		if (!strncmp(hook->ifname, this->ifname,
-			     min(hook->ifnamelen, this->ifnamelen)))
+			     min(hook->ifnamelen, this->ifnamelen))) {
+			if (hook->flags & NFT_HOOK_REMOVE)
+				continue;
+
 			return hook;
+		}
 	}
 
 	return NULL;
@@ -3157,6 +3184,32 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
 	return nf_tables_addchain(&ctx, family, policy, flags, extack);
 }
 
+static int nft_trans_delhook(struct nft_hook *hook,
+			     struct list_head *del_list)
+{
+	struct nft_trans_hook *trans_hook;
+
+	trans_hook = kmalloc_obj(*trans_hook, GFP_KERNEL);
+	if (!trans_hook)
+		return -ENOMEM;
+
+	trans_hook->hook = hook;
+	list_add_tail(&trans_hook->list, del_list);
+	hook->flags |= NFT_HOOK_REMOVE;
+
+	return 0;
+}
+
+static void nft_trans_delhook_release(struct list_head *del_list)
+{
+	struct nft_trans_hook *trans_hook, *next;
+
+	list_for_each_entry_safe(trans_hook, next, del_list, list) {
+		trans_hook->hook->flags &= ~NFT_HOOK_REMOVE;
+		nft_trans_hook_destroy(trans_hook);
+	}
+}
+
 static int nft_delchain_hook(struct nft_ctx *ctx,
 			     struct nft_base_chain *basechain,
 			     struct netlink_ext_ack *extack)
@@ -3183,7 +3236,10 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
 			err = -ENOENT;
 			goto err_chain_del_hook;
 		}
-		list_move(&hook->list, &chain_del_list);
+		if (nft_trans_delhook(hook, &chain_del_list) < 0) {
+			err = -ENOMEM;
+			goto err_chain_del_hook;
+		}
 	}
 
 	trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN);
@@ -3203,7 +3259,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
 	return 0;
 
 err_chain_del_hook:
-	list_splice(&chain_del_list, &basechain->hook_list);
+	nft_trans_delhook_release(&chain_del_list);
 	nft_chain_release_hook(&chain_hook);
 
 	return err;
@@ -8984,6 +9040,16 @@ static int nft_register_flowtable_net_hooks(struct net *net,
 	return err;
 }
 
+static void nft_trans_hook_list_destroy(struct list_head *hook_list)
+{
+	struct nft_trans_hook *trans_hook, *next;
+
+	list_for_each_entry_safe(trans_hook, next, hook_list, list) {
+		nft_netdev_hook_unlink_free_rcu(trans_hook->hook);
+		nft_trans_hook_destroy(trans_hook);
+	}
+}
+
 static void nft_hooks_destroy(struct list_head *hook_list)
 {
 	struct nft_hook *hook, *next;
@@ -8992,6 +9058,24 @@ static void nft_hooks_destroy(struct list_head *hook_list)
 		nft_netdev_hook_unlink_free_rcu(hook);
 }
 
+static void nft_flowtable_unregister_trans_hook(struct net *net,
+						struct nft_flowtable *flowtable,
+						struct list_head *hook_list)
+{
+	struct nft_trans_hook *trans_hook, *next;
+	struct nf_hook_ops *ops;
+	struct nft_hook *hook;
+
+	list_for_each_entry_safe(trans_hook, next, hook_list, list) {
+		hook = trans_hook->hook;
+		list_for_each_entry(ops, &hook->ops_list, list)
+			nft_unregister_flowtable_ops(net, flowtable, ops);
+
+		nft_netdev_hook_unlink_free_rcu(hook);
+		nft_trans_hook_destroy(trans_hook);
+	}
+}
+
 static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
 				struct nft_flowtable *flowtable,
 				struct netlink_ext_ack *extack)
@@ -9250,7 +9334,10 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
 			err = -ENOENT;
 			goto err_flowtable_del_hook;
 		}
-		list_move(&hook->list, &flowtable_del_list);
+		if (nft_trans_delhook(hook, &flowtable_del_list) < 0) {
+			err = -ENOMEM;
+			goto err_flowtable_del_hook;
+		}
 	}
 
 	trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
@@ -9271,7 +9358,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
 	return 0;
 
 err_flowtable_del_hook:
-	list_splice(&flowtable_del_list, &flowtable->hook_list);
+	nft_trans_delhook_release(&flowtable_del_list);
 	nft_flowtable_hook_release(&flowtable_hook);
 
 	return err;
@@ -10104,7 +10191,7 @@ static void nft_commit_release(struct nft_trans *trans)
 	case NFT_MSG_DELCHAIN:
 	case NFT_MSG_DESTROYCHAIN:
 		if (nft_trans_chain_update(trans))
-			nft_hooks_destroy(&nft_trans_chain_hooks(trans));
+			nft_trans_hook_list_destroy(&nft_trans_chain_hooks(trans));
 		else
 			nf_tables_chain_destroy(nft_trans_chain(trans));
 		break;
@@ -10127,7 +10214,7 @@ static void nft_commit_release(struct nft_trans *trans)
 	case NFT_MSG_DELFLOWTABLE:
 	case NFT_MSG_DESTROYFLOWTABLE:
 		if (nft_trans_flowtable_update(trans))
-			nft_hooks_destroy(&nft_trans_flowtable_hooks(trans));
+			nft_trans_hook_list_destroy(&nft_trans_flowtable_hooks(trans));
 		else
 			nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
 		break;
@@ -10920,9 +11007,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 				nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN,
 						       &nft_trans_chain_hooks(trans));
 				if (!(table->flags & NFT_TABLE_F_DORMANT)) {
-					nft_netdev_unregister_hooks(net,
-								    &nft_trans_chain_hooks(trans),
-								    true);
+					nft_netdev_unregister_trans_hook(net,
+									 &nft_trans_chain_hooks(trans));
 				}
 			} else {
 				nft_chain_del(nft_trans_chain(trans));
@@ -11052,9 +11138,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 							   nft_trans_flowtable(trans),
 							   &nft_trans_flowtable_hooks(trans),
 							   trans->msg_type);
-				nft_unregister_flowtable_net_hooks(net,
-								   nft_trans_flowtable(trans),
-								   &nft_trans_flowtable_hooks(trans));
+				nft_flowtable_unregister_trans_hook(net,
+								    nft_trans_flowtable(trans),
+								    &nft_trans_flowtable_hooks(trans));
 			} else {
 				list_del_rcu(&nft_trans_flowtable(trans)->list);
 				nf_tables_flowtable_notify(&ctx,
@@ -11223,8 +11309,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 		case NFT_MSG_DELCHAIN:
 		case NFT_MSG_DESTROYCHAIN:
 			if (nft_trans_chain_update(trans)) {
-				list_splice(&nft_trans_chain_hooks(trans),
-					    &nft_trans_basechain(trans)->hook_list);
+				nft_trans_delhook_release(&nft_trans_chain_hooks(trans));
 			} else {
 				nft_use_inc_restore(&table->use);
 				nft_clear(trans->net, nft_trans_chain(trans));
@@ -11338,8 +11423,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 		case NFT_MSG_DELFLOWTABLE:
 		case NFT_MSG_DESTROYFLOWTABLE:
 			if (nft_trans_flowtable_update(trans)) {
-				list_splice(&nft_trans_flowtable_hooks(trans),
-					    &nft_trans_flowtable(trans)->hook_list);
+				nft_trans_delhook_release(&nft_trans_flowtable_hooks(trans));
 			} else {
 				nft_use_inc_restore(&table->use);
 				nft_clear(trans->net, nft_trans_flowtable(trans));
-- 
2.47.3


^ permalink raw reply related

* NULL pointer dereference in map_kptr_match_type when storing scalar values into kptr slots
From: Hiker Cl @ 2026-04-16  2:16 UTC (permalink / raw)
  To: bpf; +Cc: linux-kernel, netdev

Hi BPF maintainers,

I'm reporting a bug I encountered in the BPF subsystem on Linux kernel
version 7.0.0-g1f5ffc672165.

### Summary
A NULL pointer dereference vulnerability was discovered in the eBPF
verifier. A local user can trigger this by loading a BPF program that
attempts to store a scalar value (non-pointer) into a map slot
designated as a kptr (kernel pointer). This leads to an immediate
kernel crash (DoS).
### Environment
- Kernel version: 7.0.0-rc6 (Commit: 71b500afd2f7 from bpf-next tree),
7.0.0-g1f5ffc672165 (Commit: 1f5ffc672165 from linux tree)
- Architecture: x86_64
- Config: BPF_SYSCALL=y, DEBUG_INFO_BTF=y

### Steps to Reproduce （poc.c)
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
/* BTF type tags for kptrs */
#ifndef __kptr_untrusted
#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
#endif
struct map_value {
struct task_struct __kptr_untrusted *ptr;
};
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(max_entries, 1);
__type(key, int);
__type(value, struct map_value);
} crashing_map SEC(".maps");
SEC("kprobe/htab_map_get_next_key")
int trigger_crash(struct pt_regs *ctx)
{
int key = 0;
u64 *val = bpf_map_lookup_elem(&crashing_map, &key);
if (val) {
/*
* Trigger: Store a scalar (non-pointer) into a slot
* designated as a kptr. The verifier's map_kptr_match_type
* fails to handle the NULL reg->btf for scalars.
*/
*val = 0xdeadbeef;
}
return 0;
}
char LICENSE[] SEC("license") = "GPL";

### Kernel Log Extract
[   91.277247][ T7627] Oops: general protection fault, probably for
non-canonical address 0xdffffc0000I
[   91.279715][ T7627] KASAN: null-ptr-deref in range
[0x00000000000000e8-0x00000000000000ef]
[   91.280906][ T7627] CPU: 0 UID: 0 PID: 7627 Comm: bpftool Not
tainted 7.0.0-g1f5ffc672165 #5 PREEMPT(full)
[   91.282421][ T7627] Hardware name: QEMU Standard PC (i440FX + PIIX,
1996), BIOS 1.15.0-1 04/01/2014
[   91.283556][ T7627] RIP: 0010:btf_is_kernel+0x2a/0x50
...

### Actual Results
The kernel crashes during the verification phase. The verifier calls
`map_kptr_match_type`, which subsequently calls
`btf_is_kernel(reg->btf)`. Since the source register is a scalar,
`reg->btf` is NULL, leading to a NULL pointer dereference.

Detailed info including reproducible BPF program and kernel logs have
been filed on Bugzilla:

  https://bugzilla.kernel.org/show_bug.cgi?id=221372

Please let me know if you need more information or if I can help test a patch.

^ permalink raw reply

* [PATCH iwl-next v2 0/3] igc: add support for forcing link speed without autonegotiation
From: KhaiWenTan @ 2026-04-16  1:55 UTC (permalink / raw)
  To: anthony.l.nguyen, przemyslaw.kitszel, andrew+netdev, davem,
	edumazet, kuba, pabeni
  Cc: intel-wired-lan, netdev, linux-kernel, faizal.abdul.rahim,
	hong.aun.looi, khai.wen.tan, Faizal Rahim

From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>

This series adds support for forcing 10/100 Mb/s link speed via ethtool
when autonegotiation is disabled on the igc driver.

Changes in v2:
- Simon Horman's review comment: when forcing half-duplex, set
  hw->fc.requested_mode = igc_fc_none, since half-duplex cannot support
  flow control per IEEE 802.3.
- Split the original single patch into three patches for clarity:
  patches 1 and 2 are preparatory cleanups; patch 3 carries the
  functional change.

v1 at:
https://patchwork.ozlabs.org/project/intel-wired-lan/patch/20260409072747.217836-1-khai.wen.tan@linux.intel.com/

Faizal Rahim (3):
  igc: remove unused autoneg_failed field
  igc: move autoneg-enabled settings into igc_handle_autoneg_enabled()
  igc: add support for forcing link speed without autonegotiation

 drivers/net/ethernet/intel/igc/igc_base.c    |  35 +++-
 drivers/net/ethernet/intel/igc/igc_defines.h |   9 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 203 +++++++++++++------
 drivers/net/ethernet/intel/igc/igc_hw.h      |  10 +-
 drivers/net/ethernet/intel/igc/igc_mac.c     |  16 +-
 drivers/net/ethernet/intel/igc/igc_main.c    |   2 +-
 drivers/net/ethernet/intel/igc/igc_phy.c     |  65 +++++-
 drivers/net/ethernet/intel/igc/igc_phy.h     |   1 +
 8 files changed, 251 insertions(+), 90 deletions(-)

--
2.43.0


^ permalink raw reply

* [PATCH iwl-next v2 1/3] igc: remove unused autoneg_failed field
From: KhaiWenTan @ 2026-04-16  1:55 UTC (permalink / raw)
  To: anthony.l.nguyen, przemyslaw.kitszel, andrew+netdev, davem,
	edumazet, kuba, pabeni
  Cc: intel-wired-lan, netdev, linux-kernel, faizal.abdul.rahim,
	hong.aun.looi, khai.wen.tan, Faizal Rahim, Looi, KhaiWenTan
In-Reply-To: <20260416015520.6090-1-khai.wen.tan@linux.intel.com>

From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>

autoneg_failed in struct igc_mac_info is never set in the igc driver.
Remove the field and the dead code checking it in
igc_config_fc_after_link_up().

Reviewed-by: Looi, Hong Aun <hong.aun.looi@intel.com>
Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>
---
 drivers/net/ethernet/intel/igc/igc_hw.h  |  1 -
 drivers/net/ethernet/intel/igc/igc_mac.c | 16 +---------------
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index be8a49a86d09..86ab8f566f44 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -92,7 +92,6 @@ struct igc_mac_info {
 	bool asf_firmware_present;
 	bool arc_subsystem_valid;

-	bool autoneg_failed;
 	bool get_link_status;
 };

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index 7ac6637f8db7..142beb9ae557 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -438,28 +438,14 @@ void igc_config_collision_dist(struct igc_hw *hw)
  * Checks the status of auto-negotiation after link up to ensure that the
  * speed and duplex were not forced.  If the link needed to be forced, then
  * flow control needs to be forced also.  If auto-negotiation is enabled
- * and did not fail, then we configure flow control based on our link
- * partner.
+ * then we configure flow control based on our link partner.
  */
 s32 igc_config_fc_after_link_up(struct igc_hw *hw)
 {
 	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
-	struct igc_mac_info *mac = &hw->mac;
 	u16 speed, duplex;
 	s32 ret_val = 0;

-	/* Check for the case where we have fiber media and auto-neg failed
-	 * so we had to force link.  In this case, we need to force the
-	 * configuration of the MAC to match the "fc" parameter.
-	 */
-	if (mac->autoneg_failed)
-		ret_val = igc_force_mac_fc(hw);
-
-	if (ret_val) {
-		hw_dbg("Error forcing flow control settings\n");
-		goto out;
-	}
-
 	/* In auto-neg, we need to check and see if Auto-Neg has completed,
 	 * and if so, how the PHY and link partner has flow control
 	 * configured.
--
2.43.0


^ permalink raw reply related

* [PATCH iwl-next v2 2/3] igc: move autoneg-enabled settings into igc_handle_autoneg_enabled()
From: KhaiWenTan @ 2026-04-16  1:55 UTC (permalink / raw)
  To: anthony.l.nguyen, przemyslaw.kitszel, andrew+netdev, davem,
	edumazet, kuba, pabeni
  Cc: intel-wired-lan, netdev, linux-kernel, faizal.abdul.rahim,
	hong.aun.looi, khai.wen.tan, Faizal Rahim, Looi, KhaiWenTan
In-Reply-To: <20260416015520.6090-1-khai.wen.tan@linux.intel.com>

From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>

Move the advertised link modes and flow control configuration from
igc_ethtool_set_link_ksettings() into igc_handle_autoneg_enabled().

No functional change.

Reviewed-by: Looi, Hong Aun <hong.aun.looi@intel.com>
Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 72 ++++++++++++--------
 1 file changed, 44 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 0122009bedd0..cfcbf2fdad6e 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -2000,6 +2000,49 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
 	return 0;
 }

+/**
+ * igc_handle_autoneg_enabled - Configure autonegotiation advertisement
+ * @adapter: private driver structure
+ * @cmd: ethtool link ksettings from user
+ *
+ * Records advertised speeds and flow control settings when autoneg
+ * is enabled.
+ */
+static void igc_handle_autoneg_enabled(struct igc_adapter *adapter,
+				       const struct ethtool_link_ksettings *cmd)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u16 advertised = 0;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  2500baseT_Full))
+		advertised |= ADVERTISE_2500_FULL;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  1000baseT_Full))
+		advertised |= ADVERTISE_1000_FULL;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  100baseT_Full))
+		advertised |= ADVERTISE_100_FULL;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  100baseT_Half))
+		advertised |= ADVERTISE_100_HALF;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  10baseT_Full))
+		advertised |= ADVERTISE_10_FULL;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  10baseT_Half))
+		advertised |= ADVERTISE_10_HALF;
+
+	hw->phy.autoneg_advertised = advertised;
+	if (adapter->fc_autoneg)
+		hw->fc.requested_mode = igc_fc_default;
+}
+
 static int
 igc_ethtool_set_link_ksettings(struct net_device *netdev,
 			       const struct ethtool_link_ksettings *cmd)
@@ -2007,7 +2050,6 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev,
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct net_device *dev = adapter->netdev;
 	struct igc_hw *hw = &adapter->hw;
-	u16 advertised = 0;

 	/* When adapter in resetting mode, autoneg/speed/duplex
 	 * cannot be changed
@@ -2032,34 +2074,8 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev,
 	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
 		usleep_range(1000, 2000);

-	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
-						  2500baseT_Full))
-		advertised |= ADVERTISE_2500_FULL;
-
-	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
-						  1000baseT_Full))
-		advertised |= ADVERTISE_1000_FULL;
-
-	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
-						  100baseT_Full))
-		advertised |= ADVERTISE_100_FULL;
-
-	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
-						  100baseT_Half))
-		advertised |= ADVERTISE_100_HALF;
-
-	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
-						  10baseT_Full))
-		advertised |= ADVERTISE_10_FULL;
-
-	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
-						  10baseT_Half))
-		advertised |= ADVERTISE_10_HALF;
-
 	if (cmd->base.autoneg == AUTONEG_ENABLE) {
-		hw->phy.autoneg_advertised = advertised;
-		if (adapter->fc_autoneg)
-			hw->fc.requested_mode = igc_fc_default;
+		igc_handle_autoneg_enabled(adapter, cmd);
 	} else {
 		netdev_info(dev, "Force mode currently not supported\n");
 	}
--
2.43.0


^ permalink raw reply related

* [PATCH iwl-next v2 3/3] igc: add support for forcing link speed without autonegotiation
From: KhaiWenTan @ 2026-04-16  1:55 UTC (permalink / raw)
  To: anthony.l.nguyen, przemyslaw.kitszel, andrew+netdev, davem,
	edumazet, kuba, pabeni
  Cc: intel-wired-lan, netdev, linux-kernel, faizal.abdul.rahim,
	hong.aun.looi, khai.wen.tan, Faizal Rahim, Looi, KhaiWenTan
In-Reply-To: <20260416015520.6090-1-khai.wen.tan@linux.intel.com>

From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>

Allow users to force 10/100 Mb/s link speed and duplex via ethtool
when autonegotiation is disabled. Previously, the driver rejected
these requests with "Force mode currently not supported.".

Forcing at 1000 Mb/s and 2500 Mb/s is not supported.

Reviewed-by: Looi, Hong Aun <hong.aun.looi@intel.com>
Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>
---
 drivers/net/ethernet/intel/igc/igc_base.c    |  35 ++++-
 drivers/net/ethernet/intel/igc/igc_defines.h |   9 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 131 +++++++++++++------
 drivers/net/ethernet/intel/igc/igc_hw.h      |   9 ++
 drivers/net/ethernet/intel/igc/igc_mac.c     |  10 ++
 drivers/net/ethernet/intel/igc/igc_main.c    |   2 +-
 drivers/net/ethernet/intel/igc/igc_phy.c     |  65 ++++++++-
 drivers/net/ethernet/intel/igc/igc_phy.h     |   1 +
 8 files changed, 211 insertions(+), 51 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index 1613b562d17c..ab9120a3127f 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -114,11 +114,35 @@ static s32 igc_setup_copper_link_base(struct igc_hw *hw)
 	u32 ctrl;

 	ctrl = rd32(IGC_CTRL);
-	ctrl |= IGC_CTRL_SLU;
-	ctrl &= ~(IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX);
-	wr32(IGC_CTRL, ctrl);
-
-	ret_val = igc_setup_copper_link(hw);
+	ctrl &= ~(IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX |
+		  IGC_CTRL_SPEED_MASK | IGC_CTRL_FD);
+
+	if (hw->mac.autoneg_enabled) {
+		ctrl |= IGC_CTRL_SLU;
+		wr32(IGC_CTRL, ctrl);
+		ret_val = igc_setup_copper_link(hw);
+	} else {
+		ctrl |= IGC_CTRL_SLU | IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX;
+
+		switch (hw->mac.forced_speed_duplex) {
+		case IGC_FORCED_10H:
+			ctrl |= IGC_CTRL_SPEED_10;
+			break;
+		case IGC_FORCED_10F:
+			ctrl |= IGC_CTRL_SPEED_10 | IGC_CTRL_FD;
+			break;
+		case IGC_FORCED_100H:
+			ctrl |= IGC_CTRL_SPEED_100;
+			break;
+		case IGC_FORCED_100F:
+			ctrl |= IGC_CTRL_SPEED_100 | IGC_CTRL_FD;
+			break;
+		default:
+			return -IGC_ERR_CONFIG;
+		}
+		wr32(IGC_CTRL, ctrl);
+		ret_val = igc_setup_copper_link(hw);
+	}

 	return ret_val;
 }
@@ -443,6 +467,7 @@ static const struct igc_phy_operations igc_phy_ops_base = {
 	.reset			= igc_phy_hw_reset,
 	.read_reg		= igc_read_phy_reg_gpy,
 	.write_reg		= igc_write_phy_reg_gpy,
+	.force_speed_duplex	= igc_force_speed_duplex,
 };

 const struct igc_info igc_base_info = {
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 9482ab11f050..3f504751c2d9 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -129,10 +129,13 @@
 #define IGC_ERR_SWFW_SYNC		13

 /* Device Control */
+#define IGC_CTRL_FD		BIT(0)  /* Full Duplex */
 #define IGC_CTRL_RST		0x04000000  /* Global reset */
-
 #define IGC_CTRL_PHY_RST	0x80000000  /* PHY Reset */
 #define IGC_CTRL_SLU		0x00000040  /* Set link up (Force Link) */
+#define IGC_CTRL_SPEED_MASK	GENMASK(10, 8)
+#define IGC_CTRL_SPEED_10	FIELD_PREP(IGC_CTRL_SPEED_MASK, 0)
+#define IGC_CTRL_SPEED_100	FIELD_PREP(IGC_CTRL_SPEED_MASK, 1)
 #define IGC_CTRL_FRCSPD		0x00000800  /* Force Speed */
 #define IGC_CTRL_FRCDPX		0x00001000  /* Force Duplex */
 #define IGC_CTRL_VME		0x40000000  /* IEEE VLAN mode enable */
@@ -673,6 +676,10 @@
 #define IGC_GEN_POLL_TIMEOUT	1920

 /* PHY Control Register */
+#define MII_CR_SPEED_MASK	(BIT(6) | BIT(13))
+#define MII_CR_SPEED_10		0x0000	/* SSM=0, SSL=0: 10 Mb/s */
+#define MII_CR_SPEED_100	BIT(13)	/* SSM=0, SSL=1: 100 Mb/s */
+#define MII_CR_DUPLEX_EN	BIT(8)	/* 0 = Half Duplex, 1 = Full Duplex */
 #define MII_CR_RESTART_AUTO_NEG	0x0200  /* Restart auto negotiation */
 #define MII_CR_POWER_DOWN	0x0800  /* Power down */
 #define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index cfcbf2fdad6e..5bd37d1be168 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1914,44 +1914,58 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
 	ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
 	ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);

-	/* advertising link modes */
-	if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF)
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half);
-	if (hw->phy.autoneg_advertised & ADVERTISE_10_FULL)
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full);
-	if (hw->phy.autoneg_advertised & ADVERTISE_100_HALF)
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half);
-	if (hw->phy.autoneg_advertised & ADVERTISE_100_FULL)
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full);
-	if (hw->phy.autoneg_advertised & ADVERTISE_1000_FULL)
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
-	if (hw->phy.autoneg_advertised & ADVERTISE_2500_FULL)
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full);
-
 	/* set autoneg settings */
 	ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
-	ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+	if (hw->mac.autoneg_enabled) {
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+		cmd->base.autoneg = AUTONEG_ENABLE;
+
+		/* advertising link modes only apply when autoneg is on */
+		if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     10baseT_Half);
+		if (hw->phy.autoneg_advertised & ADVERTISE_10_FULL)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     10baseT_Full);
+		if (hw->phy.autoneg_advertised & ADVERTISE_100_HALF)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     100baseT_Half);
+		if (hw->phy.autoneg_advertised & ADVERTISE_100_FULL)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     100baseT_Full);
+		if (hw->phy.autoneg_advertised & ADVERTISE_1000_FULL)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     1000baseT_Full);
+		if (hw->phy.autoneg_advertised & ADVERTISE_2500_FULL)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     2500baseT_Full);
+
+		/* Set pause flow control advertising */
+		switch (hw->fc.requested_mode) {
+		case igc_fc_full:
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     Pause);
+			break;
+		case igc_fc_rx_pause:
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     Pause);
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     Asym_Pause);
+			break;
+		case igc_fc_tx_pause:
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     Asym_Pause);
+			break;
+		default:
+			break;
+		}
+	} else {
+		cmd->base.autoneg = AUTONEG_DISABLE;
+	}

-	/* Set pause flow control settings */
+	/* Pause is always supported */
 	ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);

-	switch (hw->fc.requested_mode) {
-	case igc_fc_full:
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
-		break;
-	case igc_fc_rx_pause:
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     Asym_Pause);
-		break;
-	case igc_fc_tx_pause:
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     Asym_Pause);
-		break;
-	default:
-		break;
-	}
-
 	status = pm_runtime_suspended(&adapter->pdev->dev) ?
 		 0 : rd32(IGC_STATUS);

@@ -1983,7 +1997,6 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
 		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 	cmd->base.speed = speed;
-	cmd->base.autoneg = AUTONEG_ENABLE;

 	/* MDI-X => 2; MDI =>1; Invalid =>0 */
 	if (hw->phy.media_type == igc_media_type_copper)
@@ -2000,6 +2013,41 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
 	return 0;
 }

+/**
+ * igc_handle_autoneg_disabled - Configure forced speed/duplex settings
+ * @adapter: private driver structure
+ * @speed: requested speed (must be SPEED_10 or SPEED_100)
+ * @duplex: requested duplex
+ *
+ * Records forced speed/duplex when autoneg is disabled.
+ * Caller must validate speed before calling this function.
+ */
+static void igc_handle_autoneg_disabled(struct igc_adapter *adapter, u32 speed,
+					u8 duplex)
+{
+	struct igc_mac_info *mac = &adapter->hw.mac;
+
+	switch (speed) {
+	case SPEED_10:
+		mac->forced_speed_duplex = (duplex == DUPLEX_FULL) ?
+			IGC_FORCED_10F : IGC_FORCED_10H;
+		break;
+	case SPEED_100:
+		mac->forced_speed_duplex = (duplex == DUPLEX_FULL) ?
+			IGC_FORCED_100F : IGC_FORCED_100H;
+		break;
+	default:
+		WARN_ONCE(1, "Unsupported speed %u\n", speed);
+		return;
+	}
+
+	mac->autoneg_enabled = false;
+
+	/* Half-duplex cannot support flow control per IEEE 802.3 */
+	if (duplex == DUPLEX_HALF)
+		adapter->hw.fc.requested_mode = igc_fc_none;
+}
+
 /**
  * igc_handle_autoneg_enabled - Configure autonegotiation advertisement
  * @adapter: private driver structure
@@ -2038,6 +2086,7 @@ static void igc_handle_autoneg_enabled(struct igc_adapter *adapter,
 						  10baseT_Half))
 		advertised |= ADVERTISE_10_HALF;

+	hw->mac.autoneg_enabled = true;
 	hw->phy.autoneg_advertised = advertised;
 	if (adapter->fc_autoneg)
 		hw->fc.requested_mode = igc_fc_default;
@@ -2071,14 +2120,20 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev,
 		}
 	}

+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	    cmd->base.speed != SPEED_10 && cmd->base.speed != SPEED_100) {
+		netdev_info(dev, "Unsupported speed for forced link\n");
+		return -EINVAL;
+	}
+
 	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
 		usleep_range(1000, 2000);

-	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE)
 		igc_handle_autoneg_enabled(adapter, cmd);
-	} else {
-		netdev_info(dev, "Force mode currently not supported\n");
-	}
+	else
+		igc_handle_autoneg_disabled(adapter, cmd->base.speed,
+					    cmd->base.duplex);

 	/* MDI-X => 2; MDI => 1; Auto => 3 */
 	if (cmd->base.eth_tp_mdix_ctrl) {
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index 86ab8f566f44..62aaee55668a 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -73,6 +73,13 @@ struct igc_info {

 extern const struct igc_info igc_base_info;

+enum igc_forced_speed_duplex {
+	IGC_FORCED_10H,
+	IGC_FORCED_10F,
+	IGC_FORCED_100H,
+	IGC_FORCED_100F,
+};
+
 struct igc_mac_info {
 	struct igc_mac_operations ops;

@@ -93,6 +100,8 @@ struct igc_mac_info {
 	bool arc_subsystem_valid;

 	bool get_link_status;
+	bool autoneg_enabled;
+	enum igc_forced_speed_duplex forced_speed_duplex;
 };

 struct igc_nvm_operations {
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index 142beb9ae557..8bbb6d5581c7 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -446,6 +446,16 @@ s32 igc_config_fc_after_link_up(struct igc_hw *hw)
 	u16 speed, duplex;
 	s32 ret_val = 0;

+	/* When autoneg is disabled, force the MAC flow control settings
+	 * to match the "fc" parameter.
+	 */
+	if (!hw->mac.autoneg_enabled) {
+		ret_val = igc_force_mac_fc(hw);
+		if (ret_val)
+			hw_dbg("Error forcing flow control settings\n");
+		goto out;
+	}
+
 	/* In auto-neg, we need to check and see if Auto-Neg has completed,
 	 * and if so, how the PHY and link partner has flow control
 	 * configured.
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 72bc5128d8b8..437e1d1ef1e4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7298,7 +7298,7 @@ static int igc_probe(struct pci_dev *pdev,
 	/* Initialize link properties that are user-changeable */
 	adapter->fc_autoneg = true;
 	hw->phy.autoneg_advertised = 0xaf;
-
+	hw->mac.autoneg_enabled = true;
 	hw->fc.requested_mode = igc_fc_default;
 	hw->fc.current_mode = igc_fc_default;

diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index 6c4d204aecfa..4cf737fb3b21 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -494,12 +494,20 @@ s32 igc_setup_copper_link(struct igc_hw *hw)
 	s32 ret_val = 0;
 	bool link;

-	/* Setup autoneg and flow control advertisement and perform
-	 * autonegotiation.
-	 */
-	ret_val = igc_copper_link_autoneg(hw);
-	if (ret_val)
-		goto out;
+	if (hw->mac.autoneg_enabled) {
+		/* Setup autoneg and flow control advertisement and perform
+		 * autonegotiation.
+		 */
+		ret_val = igc_copper_link_autoneg(hw);
+		if (ret_val)
+			goto out;
+	} else {
+		ret_val = hw->phy.ops.force_speed_duplex(hw);
+		if (ret_val) {
+			hw_dbg("Error Forcing Speed/Duplex\n");
+			goto out;
+		}
+	}

 	/* Check link status. Wait up to 100 microseconds for link to become
 	 * valid.
@@ -778,3 +786,48 @@ u16 igc_read_phy_fw_version(struct igc_hw *hw)

 	return gphy_version;
 }
+
+/**
+ * igc_force_speed_duplex - Force PHY speed and duplex settings
+ * @hw: pointer to the HW structure
+ *
+ * Programs the GPY PHY control register to disable autonegotiation
+ * and force the speed/duplex indicated by hw->mac.forced_speed_duplex.
+ */
+s32 igc_force_speed_duplex(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 phy_ctrl;
+	s32 ret_val;
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	phy_ctrl &= ~(MII_CR_SPEED_MASK | MII_CR_DUPLEX_EN |
+		      MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+
+	switch (hw->mac.forced_speed_duplex) {
+	case IGC_FORCED_10H:
+		phy_ctrl |= MII_CR_SPEED_10;
+		break;
+	case IGC_FORCED_10F:
+		phy_ctrl |= MII_CR_SPEED_10 | MII_CR_DUPLEX_EN;
+		break;
+	case IGC_FORCED_100H:
+		phy_ctrl |= MII_CR_SPEED_100;
+		break;
+	case IGC_FORCED_100F:
+		phy_ctrl |= MII_CR_SPEED_100 | MII_CR_DUPLEX_EN;
+		break;
+	default:
+		return -IGC_ERR_CONFIG;
+	}
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	hw->mac.get_link_status = true;
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.h b/drivers/net/ethernet/intel/igc/igc_phy.h
index 832a7e359f18..d37a89174826 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.h
+++ b/drivers/net/ethernet/intel/igc/igc_phy.h
@@ -18,5 +18,6 @@ void igc_power_down_phy_copper(struct igc_hw *hw);
 s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data);
 s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data);
 u16 igc_read_phy_fw_version(struct igc_hw *hw);
+s32 igc_force_speed_duplex(struct igc_hw *hw);

 #endif
--
2.43.0


^ permalink raw reply related

* [PATCH net v5] openvswitch: cap upcall PID array size and pre-size vport replies
From: Weiming Shi @ 2026-04-16  2:46 UTC (permalink / raw)
  To: Aaron Conole, Eelco Chaudron, Ilya Maximets, David S . Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Pravin B Shelar, Thomas Graf, Alex Wang, netdev,
	dev, Xiang Mei, Weiming Shi

The vport netlink reply helpers allocate a fixed-size skb with
nlmsg_new(NLMSG_DEFAULT_SIZE, ...) but serialize the full upcall PID
array via ovs_vport_get_upcall_portids().  Since
ovs_vport_set_upcall_portids() accepts any non-zero multiple of
sizeof(u32) with no upper bound, a CAP_NET_ADMIN user can install a PID
array large enough to overflow the reply buffer, causing nla_put() to
fail with -EMSGSIZE and hitting BUG_ON(err < 0).  On systems with
unprivileged user namespaces enabled (e.g., Ubuntu default), this is
reachable via unshare -Urn since OVS vport mutation operations use
GENL_UNS_ADMIN_PERM.

 kernel BUG at net/openvswitch/datapath.c:2414!
 Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
 CPU: 1 UID: 0 PID: 65 Comm: poc Not tainted 7.0.0-rc7-00195-geb216e422044 #1
 RIP: 0010:ovs_vport_cmd_set+0x34c/0x400
 Call Trace:
  <TASK>
  genl_family_rcv_msg_doit (net/netlink/genetlink.c:1116)
  genl_rcv_msg (net/netlink/genetlink.c:1194)
  netlink_rcv_skb (net/netlink/af_netlink.c:2550)
  genl_rcv (net/netlink/genetlink.c:1219)
  netlink_unicast (net/netlink/af_netlink.c:1344)
  netlink_sendmsg (net/netlink/af_netlink.c:1894)
  __sys_sendto (net/socket.c:2206)
  __x64_sys_sendto (net/socket.c:2209)
  do_syscall_64 (arch/x86/entry/syscall_64.c:63)
  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
  </TASK>
 Kernel panic - not syncing: Fatal exception

Reject attempts to set more PIDs than nr_cpu_ids in
ovs_vport_set_upcall_portids(), and pre-compute the worst-case reply
size in ovs_vport_cmd_msg_size() based on that bound, similar to the
existing ovs_dp_cmd_msg_size().  nr_cpu_ids matches the cap already
used by the per-CPU dispatch configuration on the datapath side
(ovs_dp_cmd_fill_info() serialises at most nr_cpu_ids PIDs), so the
two sides stay consistent.

Fixes: 5cd667b0a456 ("openvswitch: Allow each vport to have an array of 'port_id's.")
Reported-by: Xiang Mei <xmei5@asu.edu>
Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
---
v5 (per Ilya):
- Add blank lines before multi-line comment blocks in
  ovs_vport_cmd_msg_size() for readability.
- Drop parenthetical from the OVS_VPORT_ATTR_UPCALL_PID comment.
- Add lore links for previous versions.
v4: https://lore.kernel.org/netdev/20260415125121.110874-2-bestswngs@gmail.com
- Use nr_cpu_ids instead of num_possible_cpus() for consistency with
  the per-CPU dispatch on the datapath side.
- Annotate ovs_vport_cmd_msg_size() per-attribute; split nested sums.
v3: https://lore.kernel.org/netdev/20260413035514.2113886-3-bestswngs@gmail.com
- Cap at num_possible_cpus(); add ovs_vport_cmd_msg_size(); keep
  BUG_ON(); fix Fixes tag.
v2: https://lore.kernel.org/netdev/20260411141448.1479933-3-bestswngs@gmail.com
- Dynamically size reply skb; drop WARN_ON_ONCE, return plain errors.
v1: https://lore.kernel.org/netdev/20260411055915.1224902-2-bestswngs@gmail.com
---
 net/openvswitch/datapath.c | 35 +++++++++++++++++++++++++++++++++--
 net/openvswitch/vport.c    |  3 +++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index e209099218b4..bbbde50fc649 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2184,9 +2184,40 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 	return err;
 }

+static size_t ovs_vport_cmd_msg_size(void)
+{
+	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
+
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_PORT_NO */
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_TYPE */
+	msgsize += nla_total_size(IFNAMSIZ);    /* OVS_VPORT_ATTR_NAME */
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_IFINDEX */
+	msgsize += nla_total_size(sizeof(s32)); /* OVS_VPORT_ATTR_NETNSID */
+
+	/* OVS_VPORT_ATTR_STATS */
+	msgsize += nla_total_size_64bit(sizeof(struct ovs_vport_stats));
+
+	/* OVS_VPORT_ATTR_UPCALL_STATS(OVS_VPORT_UPCALL_ATTR_SUCCESS +
+	 *                             OVS_VPORT_UPCALL_ATTR_FAIL)
+	 */
+	msgsize += nla_total_size(nla_total_size_64bit(sizeof(u64)) +
+				  nla_total_size_64bit(sizeof(u64)));
+
+	/* OVS_VPORT_ATTR_UPCALL_PID */
+	msgsize += nla_total_size(nr_cpu_ids * sizeof(u32));
+
+	/* OVS_VPORT_ATTR_OPTIONS(OVS_TUNNEL_ATTR_DST_PORT +
+	 *                        OVS_TUNNEL_ATTR_EXTENSION(OVS_VXLAN_EXT_GBP))
+	 */
+	msgsize += nla_total_size(nla_total_size(sizeof(u16)) +
+				  nla_total_size(nla_total_size(0)));
+
+	return msgsize;
+}
+
 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
 {
-	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	return genlmsg_new(ovs_vport_cmd_msg_size(), GFP_KERNEL);
 }

 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
@@ -2196,7 +2227,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
 	struct sk_buff *skb;
 	int retval;

-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	skb = ovs_vport_cmd_alloc_info();
 	if (!skb)
 		return ERR_PTR(-ENOMEM);

diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 23f629e94a36..56b2e2d1a749 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -406,6 +406,9 @@ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
 	if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
 		return -EINVAL;

+	if (nla_len(ids) / sizeof(u32) > nr_cpu_ids)
+		return -EINVAL;
+
 	old = ovsl_dereference(vport->upcall_portids);

 	vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
-- 
2.43.0

^ permalink raw reply related

* [PATCH net] sctp: fix OOB write to userspace in sctp_getsockopt_peer_auth_chunks
From: Michael Bommarito @ 2026-04-16  3:19 UTC (permalink / raw)
  To: linux-sctp, Marcelo Ricardo Leitner, Xin Long
  Cc: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, netdev, linux-kernel, stable

sctp_getsockopt_peer_auth_chunks() checks that the caller's optval
buffer is large enough for the peer AUTH chunk list with

    if (len < num_chunks)
            return -EINVAL;

but then writes num_chunks bytes to p->gauth_chunks, which lives
at offset offsetof(struct sctp_authchunks, gauth_chunks) == 8
inside optval.  The check is missing the sizeof(struct
sctp_authchunks) = 8-byte header.  When the caller supplies
len == num_chunks (for any num_chunks > 0) the test passes but
copy_to_user() writes sizeof(struct sctp_authchunks) = 8 bytes
past the declared buffer.

The sibling function sctp_getsockopt_local_auth_chunks() at the
next line already has the correct check:

    if (len < sizeof(struct sctp_authchunks) + num_chunks)
            return -EINVAL;

Align the peer variant with its sibling.

Reproducer confirms on v7.0-13-generic: an unprivileged userspace
caller that opens a loopback SCTP association with AUTH enabled,
queries num_chunks with a short optval, then issues the real
getsockopt with len == num_chunks and sentinel bytes painted past
the buffer observes those sentinel bytes overwritten with the
peer's AUTH chunk type.  The bytes written are under the peer's
control but land in the caller's own userspace; this is not a
kernel memory corruption, but it is a kernel-side contract
violation that can silently corrupt adjacent userspace data.

Fixes: 65b07e5d0d09 ("[SCTP]: API updates to suport SCTP-AUTH extensions.")
Cc: stable@vger.kernel.org
Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 05fb00c9c335..f5d442753dc9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7033,7 +7033,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,

 	/* See if the user provided enough room for all the data */
 	num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
-	if (len < num_chunks)
+	if (len < sizeof(struct sctp_authchunks) + num_chunks)
 		return -EINVAL;

 	if (copy_to_user(to, ch->chunks, num_chunks))
-- 
2.53.0

^ permalink raw reply related

* [PATCH net v1 1/2] net: tunnel: fix stale transport header after GRE/TEB decap
From: Jiayuan Chen @ 2026-04-16  3:46 UTC (permalink / raw)
  To: netdev
  Cc: Jiayuan Chen, syzbot+83181a31faf9455499c5, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	David Ahern, Pravin B Shelar, Tom Herbert, linux-kernel

syzbot reported a BUG.

I found that after GRE decapsulation in gretap/ip6gretap paths, the
transport_header becomes stale with a negative offset. The sequence is:

1. Before decap, transport_header points to the outer L4 (GRE) header.
2. __iptunnel_pull_header() calls skb_pull_rcsum() to advance skb->data
   past the GRE header, but does not update transport_header.
3. For TEB (gretap/ip6gretap), eth_type_trans() in ip_tunnel_rcv() /
   __ip6_tnl_rcv() further pulls ETH_HLEN (14 bytes) from skb->data.

After these two pulls, skb->data has moved forward while transport_header
still points to the old (now behind skb->data) position, resulting in a
negative skb_transport_offset(): typically -4 after GRE pull alone, or
-18 after GRE + inner Ethernet pull.

In the normal case where the inner frame is a recognizable protocol
(e.g., IPv4/TCP), inet_gro_receive() in net/ipv4/af_inet.c corrects the
transport_header via skb_set_transport_header() during GRO processing.
However, if the inner frame cannot be parsed (e.g., eth_type_trans()
classifies it as ETH_P_802_2 due to a zero/invalid inner Ethernet
header), no GRO callback resets the transport_header, and the stale
offset persists into __netif_receive_skb_core().

When this stale offset is combined with contradictory GSO metadata (e.g.,
SKB_GSO_TCPV4 injected via virtio_net_hdr from a tun device),
qdisc_pkt_len_segs_init() trusts the negative offset: the unsigned
wraparound makes pskb_may_pull() effectively a no-op, and __tcp_hdrlen()
then reads from an invalid memory location, causing a use-after-free.

Fix this by introducing iptunnel_rebuild_transport_header() which resets
and re-probes the transport header after tunnel decapsulation. If the
transport header cannot be rebuilt and the skb carries GSO metadata, the
inconsistent GSO fields are cleared to prevent downstream consumers from
trusting stale offsets.

reproducer: https://gist.github.com/mrpre/5ba943fd86367af748b70de99263da4b

Link: https://syzkaller.appspot.com/bug?extid=83181a31faf9455499c5
Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.")
Fixes: 0d3c703a9d17 ("ipv6: Cleanup IPv6 tunnel receive path")
Reported-by: syzbot+83181a31faf9455499c5@syzkaller.appspotmail.com
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
---
 include/net/ip_tunnels.h | 14 ++++++++++++++
 net/ipv4/ip_tunnel.c     |  2 ++
 net/ipv6/ip6_tunnel.c    |  2 ++
 3 files changed, 18 insertions(+)

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index d708b66e55cd..f160d82e6196 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -662,6 +662,20 @@ static inline int iptunnel_pull_offloads(struct sk_buff *skb)
 	return 0;
 }

+static inline void iptunnel_rebuild_transport_header(struct sk_buff *skb)
+{
+	skb->transport_header = (typeof(skb->transport_header))~0U;
+	skb_probe_transport_header(skb);
+
+	if (!skb_transport_header_was_set(skb) && skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+		shinfo->gso_type = 0;
+		shinfo->gso_size = 0;
+		shinfo->gso_segs = 0;
+	}
+}
+
 static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
 {
 	if (pkt_len > 0) {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 50d0f5fe4e4c..c46be68cfafa 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -445,6 +445,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 	if (tun_dst)
 		skb_dst_set(skb, (struct dst_entry *)tun_dst);

+	iptunnel_rebuild_transport_header(skb);
+
 	gro_cells_receive(&tunnel->gro_cells, skb);
 	return 0;

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 46bc06506470..f95348cf3c77 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -879,6 +879,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 	if (tun_dst)
 		skb_dst_set(skb, (struct dst_entry *)tun_dst);

+	iptunnel_rebuild_transport_header(skb);
+
 	gro_cells_receive(&tunnel->gro_cells, skb);
 	return 0;

-- 
2.43.0

^ permalink raw reply related

* [PATCH net-next v1 2/2] net: add DEBUG_NET_WARN_ON_ONCE for negative transport offset
From: Jiayuan Chen @ 2026-04-16  3:46 UTC (permalink / raw)
  To: netdev
  Cc: Jiayuan Chen, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Pravin B Shelar,
	Tom Herbert, linux-kernel
In-Reply-To: <20260416034610.8873-1-jiayuan.chen@linux.dev>

skb_transport_offset() can silently return a negative
value when the transport_header becomes stale after tunnel
decapsulation. A negative offset is never valid — it means
transport_header points before skb->data, which will cause unsigned
wraparound in any caller that assigns the result to an unsigned
variable.

Add a DEBUG_NET_WARN_ON_ONCE(off < 0) check so that such cases are
caught early in CONFIG_DEBUG_NET=y builds (e.g., syzkaller, kernel test
bots) with a full stack trace pointing to the caller, rather than
silently propagating a bogus offset until something crashes downstream.

Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2bcf78a4de7b..0b1aeacc25f7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3234,7 +3234,10 @@ static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)

 static inline int skb_transport_offset(const struct sk_buff *skb)
 {
-	return skb_transport_header(skb) - skb->data;
+	int off = skb_transport_header(skb) - skb->data;
+
+	DEBUG_NET_WARN_ON_ONCE(off < 0);
+	return off;
 }

 static inline u32 skb_network_header_len(const struct sk_buff *skb)
-- 
2.43.0

^ permalink raw reply related

* Re: [PATCH net v2 2/2] bnge: remove unsupported backing store type
From: Przemek Kitszel @ 2026-04-16  3:54 UTC (permalink / raw)
  To: Vikas Gupta, dharmender.garg
  Cc: netdev, davem, edumazet, kuba, pabeni, andrew+netdev, horms,
	linux-kernel, vsrama-krishna.nemani, bhargava.marreddy,
	rajashekar.hudumula, ajit.khaparde, rahul-rg.gupta
In-Reply-To: <20260415151621.1104956-3-vikas.gupta@broadcom.com>

On 4/15/26 17:16, Vikas Gupta wrote:
> The backing store type, BNGE_CTX_MRAV, is not applicable in Thor Ultra
> devices. Remove it from the backing store configuration, as the firmware

I guess the removed code was needed for previous devices, what is the
impact for them?

> will not populate entities in this backing store type, due to which the
> driver load fails.
> 
> Fixes: 29c5b358f385 ("bng_en: Add backing store support")
> Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
> Reviewed-by: Dharmender Garg <dharmender.garg@broadcom.com>
> ---
>   drivers/net/ethernet/broadcom/bnge/bnge_rmem.c | 16 ----------------
>   1 file changed, 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c
> index 94f15e08a88c..b066ee887a09 100644
> --- a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c
> +++ b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c
> @@ -324,7 +324,6 @@ int bnge_alloc_ctx_mem(struct bnge_dev *bd)
>   	u32 l2_qps, qp1_qps, max_qps;
>   	u32 ena, entries_sp, entries;
>   	u32 srqs, max_srqs, min;
> -	u32 num_mr, num_ah;
>   	u32 extra_srqs = 0;
>   	u32 extra_qps = 0;
>   	u32 fast_qpmd_qps;
> @@ -390,21 +389,6 @@ int bnge_alloc_ctx_mem(struct bnge_dev *bd)
>   	if (!bnge_is_roce_en(bd))
>   		goto skip_rdma;
>   
> -	ctxm = &ctx->ctx_arr[BNGE_CTX_MRAV];
> -	/* 128K extra is needed to accommodate static AH context
> -	 * allocation by f/w.
> -	 */
> -	num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256);
> -	num_ah = min_t(u32, num_mr, 1024 * 128);
> -	ctxm->split_entry_cnt = BNGE_CTX_MRAV_AV_SPLIT_ENTRY + 1;
> -	if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah)
> -		ctxm->mrav_av_entries = num_ah;
> -
> -	rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, num_mr + num_ah, 2);
> -	if (rc)
> -		return rc;
> -	ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV;
> -
>   	ctxm = &ctx->ctx_arr[BNGE_CTX_TIM];
>   	rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, l2_qps + qp1_qps + extra_qps, 1);
>   	if (rc)


^ permalink raw reply

* [PATCH net 1/3] octeontx2-af: npc: cn20k: Handle npc_mcam_idx_2_key_type() failures
From: Ratheesh Kannoth @ 2026-04-16  3:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: sgoutham, davem, edumazet, kuba, pabeni, andrew+netdev,
	dan.carpenter, Ratheesh Kannoth, Dan Carpenter
In-Reply-To: <20260416035352.333808-1-rkannoth@marvell.com>

npc_mcam_idx_2_key_type() can fail; ignoring its return value left
kw_type unchecked in MCAM enable, configure, copy, and read paths.
Return early on error so we do not program or interpret MCAM state
with an invalid key type.

CC: Dan Carpenter <error27@gmail.com>
Fixes: 6d1e70282f76 ("octeontx2-af: npc: cn20k: Use common APIs")
Link: https://lore.kernel.org/netdev/adiQJvuKlEhq2ILx@stanley.mountain/
Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
---
 .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
index 7291fdb89b03..2fcd0ee2b1e1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
@@ -808,7 +808,9 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr,
 	u64 cfg, hw_prio;
 	u8 kw_type;
 
-	npc_mcam_idx_2_key_type(rvu, index, &kw_type);
+	if (npc_mcam_idx_2_key_type(rvu, index, &kw_type))
+		return;
+
 	if (kw_type == NPC_MCAM_KEY_X2) {
 		cfg = rvu_read64(rvu, blkaddr,
 				 NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx,
@@ -1052,10 +1054,12 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index,
 	int kw = 0;
 	u8 kw_type;
 
+	if (npc_mcam_idx_2_key_type(rvu, index, &kw_type))
+		return;
+
 	/* Disable before mcam entry update */
 	npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false);
 
-	npc_mcam_idx_2_key_type(rvu, index, &kw_type);
 	/* CAM1 takes the comparison value and
 	 * CAM0 specifies match for a bit in key being '0' or '1' or 'dontcare'.
 	 * CAM1<n> = 0 & CAM0<n> = 1 => match if key<n> = 0
@@ -1132,8 +1136,13 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest)
 
 	dbank = npc_get_bank(mcam, dest);
 	sbank = npc_get_bank(mcam, src);
-	npc_mcam_idx_2_key_type(rvu, src, &src_kwtype);
-	npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype);
+
+	if (npc_mcam_idx_2_key_type(rvu, src, &src_kwtype))
+		return;
+
+	if (npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype))
+		return;
+
 	if (src_kwtype != dest_kwtype)
 		return;
 
@@ -1188,7 +1197,8 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index,
 	int kw = 0, bank;
 	u8 kw_type;
 
-	npc_mcam_idx_2_key_type(rvu, index, &kw_type);
+	if (npc_mcam_idx_2_key_type(rvu, index, &kw_type))
+		return;
 
 	bank = npc_get_bank(mcam, index);
 	index &= (mcam->banksize - 1);
-- 
2.43.0


^ permalink raw reply related

* [PATCH net 2/3] octeontx2-af: npc: cn20k: Drop debugfs_create_file() error checks in init
From: Ratheesh Kannoth @ 2026-04-16  3:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: sgoutham, davem, edumazet, kuba, pabeni, andrew+netdev,
	dan.carpenter, Ratheesh Kannoth, Dan Carpenter
In-Reply-To: <20260416035352.333808-1-rkannoth@marvell.com>

debugfs is not intended to be checked for allocation failures the way
other kernel APIs are: callers should not fail probe or subsystem init
because a debugfs node could not be created, including when debugfs is
disabled in Kconfig.  Replacing NULL checks with IS_ERR() checks is
similarly wrong for optional debugfs.

Remove dentry checks and -EFAULT returns from npc_cn20k_debugfs_init().
https://staticthinking.wordpress.com/2023/07/24/debugfs-functions-are-not-supposed-to-be-checked/

CC: Dan Carpenter <error27@gmail.com>
Link: https://lore.kernel.org/netdev/adjNGPWKMOk3KgWL@stanley.mountain/
Fixes: 528530dff56b ("octeontx2-af: npc: cn20k: add debugfs support")
Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
---
 .../marvell/octeontx2/af/cn20k/debugfs.c      | 33 ++++++-------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c
index 3debf2fae1a4..6f13296303cb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c
@@ -249,34 +249,21 @@ DEFINE_SHOW_ATTRIBUTE(npc_defrag);
 int npc_cn20k_debugfs_init(struct rvu *rvu)
 {
 	struct npc_priv_t *npc_priv = npc_priv_get();
-	struct dentry *npc_dentry;
 
-	npc_dentry = debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc,
-					 npc_priv, &npc_mcam_layout_fops);
+	debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc,
+			    npc_priv, &npc_mcam_layout_fops);
 
-	if (!npc_dentry)
-		return -EFAULT;
+	debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc,
+			    rvu, &npc_mcam_default_fops);
 
-	npc_dentry = debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc,
-					 rvu, &npc_mcam_default_fops);
+	debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc,
+			    npc_priv, &npc_vidx2idx_map_fops);
 
-	if (!npc_dentry)
-		return -EFAULT;
+	debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc,
+			    npc_priv, &npc_idx2vidx_map_fops);
 
-	npc_dentry = debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc,
-					 npc_priv, &npc_vidx2idx_map_fops);
-	if (!npc_dentry)
-		return -EFAULT;
-
-	npc_dentry = debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc,
-					 npc_priv, &npc_idx2vidx_map_fops);
-	if (!npc_dentry)
-		return -EFAULT;
-
-	npc_dentry = debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc,
-					 npc_priv, &npc_defrag_fops);
-	if (!npc_dentry)
-		return -EFAULT;
+	debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc,
+			    npc_priv, &npc_defrag_fops);
 
 	return 0;
 }
-- 
2.43.0


^ permalink raw reply related

* [PATCH net 0/3] octeontx2-af: Fix smatch reported errors
From: Ratheesh Kannoth @ 2026-04-16  3:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: sgoutham, davem, edumazet, kuba, pabeni, andrew+netdev,
	dan.carpenter, Ratheesh Kannoth

This series tightens error handling in the Marvell OcteonTX2 AF CN20K NPC
layer: MCAM paths now respect npc_mcam_idx_2_key_type() failures, debugfs
setup follows the usual "optional, do not fail init" convention, and
defrag rollback reports failure when freeing slots after a partial
allocation does not complete cleanly.

Patch 1 returns early when resolving an MCAM index to a key type fails,
so enable, configure, copy, and read paths do not program or interpret
hardware with a stale or undefined key type.

Patch 2 removes dentry and allocation-failure checks around
debugfs_create_file() in npc_cn20k_debugfs_init().  Debugfs entries are
diagnostic; callers should not abort probe or subsystem init when they
cannot be created (including when debugfs is disabled).

Patch 3 sets the error code when __npc_subbank_free() fails inside the
rollback loop in npc_defrag_alloc_free_slots(), so the function does not
return success after a failed cleanup.

Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>

Ratheesh Kannoth (3):
  octeontx2-af: npc: cn20k: Handle npc_mcam_idx_2_key_type() failures
  octeontx2-af: npc: cn20k: Drop debugfs_create_file() error checks in
    init
  octeontx2-af: npc: cn20k: Return error when defrag rollback free fails

---
 .../marvell/octeontx2/af/cn20k/debugfs.c      | 33 ++++++-------------
 .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 21 +++++++++---
 2 files changed, 26 insertions(+), 28 deletions(-)

--
2.43.0

^ permalink raw reply

* [PATCH net 3/3] octeontx2-af: npc: cn20k: Return error when defrag rollback free fails
From: Ratheesh Kannoth @ 2026-04-16  3:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: sgoutham, davem, edumazet, kuba, pabeni, andrew+netdev,
	dan.carpenter, Ratheesh Kannoth, Dan Carpenter
In-Reply-To: <20260416035352.333808-1-rkannoth@marvell.com>

In npc_defrag_alloc_free_slots(), the fail_free_alloc rollback loop frees
previously allocated MCAM entries after a partial allocation failure.  If
__npc_subbank_free() fails, we break out of the loop but rc was still zero
from the successful npc_mcam_idx_2_subbank_idx() lookup, so the function
incorrectly returned success.  Set rc to -EFAULT so the failure is visible
to callers.

CC: Dan Carpenter <error27@gmail.com>
Link: https://lore.kernel.org/netdev/adjNJEpILRZATB2N@stanley.mountain/
Fixes: 645c6e3c1999 ("octeontx2-af: npc: cn20k: virtual index support")
Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
---
 drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
index 2fcd0ee2b1e1..df192729ac1d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
@@ -3541,6 +3541,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu,
 			dev_err(rvu->dev,
 				"%s: Error to free mcam idx=%u\n",
 				__func__, save[i]);
+			rc = -EFAULT;
 			break;
 		}
 	}
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH v11 net-next 2/5] psp: add new netlink cmd for dev-assoc and dev-disassoc
From: Wei Wang @ 2026-04-16  3:55 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: netdev, Jakub Kicinski, Daniel Zahka, Willem de Bruijn, David Wei,
	Andrew Lunn, David S . Miller, Eric Dumazet, Simon Horman,
	Wei Wang
In-Reply-To: <bc8ef831-a4ea-4d85-ab7c-a287c9b80e61@redhat.com>

On Mon, Apr 13, 2026 at 3:37 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
>
>
> On 4/9/26 1:14 AM, Wei Wang wrote:
> > From: Wei Wang <weibunny@fb.com>
> >
> > The main purpose of this cmd is to be able to associate a
> > non-psp-capable device (e.g. veth or netkit) with a psp device.
> > One use case is if we create a pair of veth/netkit, and assign 1 end
> > inside a netns, while leaving the other end within the default netns,
> > with a real PSP device, e.g. netdevsim or a physical PSP-capable NIC.
> > With this command, we could associate the veth/netkit inside the netns
> > with PSP device, so the virtual device could act as PSP-capable device
> > to initiate PSP connections, and performs PSP encryption/decryption on
> > the real PSP device.
> >
> > Signed-off-by: Wei Wang <weibunny@fb.com>
> > Reviewed-by: Daniel Zahka <daniel.zahka@gmail.com>
> > ---
> >  Documentation/netlink/specs/psp.yaml |  67 +++++-
> >  include/net/psp/types.h              |  15 ++
> >  include/uapi/linux/psp.h             |  13 ++
> >  net/psp/psp-nl-gen.c                 |  32 +++
> >  net/psp/psp-nl-gen.h                 |   2 +
> >  net/psp/psp_main.c                   |  20 ++
> >  net/psp/psp_nl.c                     | 325 ++++++++++++++++++++++++++-
> >  7 files changed, 462 insertions(+), 12 deletions(-)
> >
> > diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml
> > index c54e1202cbe0..3d1b7223e084 100644
> > --- a/Documentation/netlink/specs/psp.yaml
> > +++ b/Documentation/netlink/specs/psp.yaml
> > @@ -13,6 +13,17 @@ definitions:
> >                hdr0-aes-gmac-128, hdr0-aes-gmac-256]
> >
> >  attribute-sets:
> > +  -
> > +    name: assoc-dev-info
> > +    attributes:
> > +      -
> > +        name: ifindex
> > +        doc: ifindex of an associated network device.
> > +        type: u32
> > +      -
> > +        name: nsid
> > +        doc: Network namespace ID of the associated device.
> > +        type: s32
> >    -
> >      name: dev
> >      attributes:
> > @@ -24,7 +35,9 @@ attribute-sets:
> >            min: 1
> >        -
> >          name: ifindex
> > -        doc: ifindex of the main netdevice linked to the PSP device.
> > +        doc: |
> > +          ifindex of the main netdevice linked to the PSP device,
> > +          or the ifindex to associate with the PSP device.
> >          type: u32
> >        -
> >          name: psp-versions-cap
> > @@ -38,6 +51,28 @@ attribute-sets:
> >          type: u32
> >          enum: version
> >          enum-as-flags: true
> > +      -
> > +        name: assoc-list
> > +        doc: List of associated virtual devices.
> > +        type: nest
> > +        nested-attributes: assoc-dev-info
> > +        multi-attr: true
> > +      -
> > +        name: nsid
> > +        doc: |
> > +          Network namespace ID for the device to associate/disassociate.
> > +          Optional for dev-assoc and dev-disassoc; if not present, the
> > +          device is looked up in the caller's network namespace.
> > +        type: s32
> > +      -
> > +        name: by-association
> > +        doc: |
> > +          Flag indicating the PSP device is an associated device from a
> > +          different network namespace.
> > +          Present when in associated namespace, absent when in primary/host
> > +          namespace.
> > +        type: flag
> > +
> >    -
> >      name: assoc
> >      attributes:
> > @@ -170,6 +205,8 @@ operations:
> >              - ifindex
> >              - psp-versions-cap
> >              - psp-versions-ena
> > +            - assoc-list
> > +            - by-association
> >          pre: psp-device-get-locked
> >          post: psp-device-unlock
> >        dump:
> > @@ -279,6 +316,34 @@ operations:
> >          post: psp-device-unlock
> >        dump:
> >          reply: *stats-all
> > +    -
> > +      name: dev-assoc
> > +      doc: Associate a network device with a PSP device.
> > +      attribute-set: dev
> > +      do:
> > +        request:
> > +          attributes:
> > +            - id
> > +            - ifindex
> > +            - nsid
> > +        reply:
> > +          attributes: []
> > +        pre: psp-device-get-locked
> > +        post: psp-device-unlock
> > +    -
> > +      name: dev-disassoc
> > +      doc: Disassociate a network device from a PSP device.
> > +      attribute-set: dev
> > +      do:
> > +        request:
> > +          attributes:
> > +            - id
> > +            - ifindex
> > +            - nsid
> > +        reply:
> > +          attributes: []
> > +        pre: psp-device-get-locked
> > +        post: psp-device-unlock
> >
> >  mcast-groups:
> >    list:
> > diff --git a/include/net/psp/types.h b/include/net/psp/types.h
> > index 25a9096d4e7d..4bd432ed107a 100644
> > --- a/include/net/psp/types.h
> > +++ b/include/net/psp/types.h
> > @@ -5,6 +5,7 @@
> >
> >  #include <linux/mutex.h>
> >  #include <linux/refcount.h>
> > +#include <net/net_trackers.h>
> >
> >  struct netlink_ext_ack;
> >
> > @@ -43,9 +44,22 @@ struct psp_dev_config {
> >       u32 versions;
> >  };
> >
> > +/**
> > + * struct psp_assoc_dev - wrapper for associated net_device
> > + * @dev_list: list node for psp_dev::assoc_dev_list
> > + * @assoc_dev: the associated net_device
> > + * @dev_tracker: tracker for the net_device reference
> > + */
> > +struct psp_assoc_dev {
> > +     struct list_head dev_list;
> > +     struct net_device *assoc_dev;
> > +     netdevice_tracker dev_tracker;
> > +};
> > +
> >  /**
> >   * struct psp_dev - PSP device struct
> >   * @main_netdev: original netdevice of this PSP device
> > + * @assoc_dev_list: list of psp_assoc_dev entries associated with this PSP device
> >   * @ops:     driver callbacks
> >   * @caps:    device capabilities
> >   * @drv_priv:        driver priv pointer
> > @@ -67,6 +81,7 @@ struct psp_dev_config {
> >   */
> >  struct psp_dev {
> >       struct net_device *main_netdev;
> > +     struct list_head assoc_dev_list;
> >
> >       struct psp_dev_ops *ops;
> >       struct psp_dev_caps *caps;
> > diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h
> > index a3a336488dc3..1c8899cd4da5 100644
> > --- a/include/uapi/linux/psp.h
> > +++ b/include/uapi/linux/psp.h
> > @@ -17,11 +17,22 @@ enum psp_version {
> >       PSP_VERSION_HDR0_AES_GMAC_256,
> >  };
> >
> > +enum {
> > +     PSP_A_ASSOC_DEV_INFO_IFINDEX = 1,
> > +     PSP_A_ASSOC_DEV_INFO_NSID,
> > +
> > +     __PSP_A_ASSOC_DEV_INFO_MAX,
> > +     PSP_A_ASSOC_DEV_INFO_MAX = (__PSP_A_ASSOC_DEV_INFO_MAX - 1)
> > +};
> > +
> >  enum {
> >       PSP_A_DEV_ID = 1,
> >       PSP_A_DEV_IFINDEX,
> >       PSP_A_DEV_PSP_VERSIONS_CAP,
> >       PSP_A_DEV_PSP_VERSIONS_ENA,
> > +     PSP_A_DEV_ASSOC_LIST,
> > +     PSP_A_DEV_NSID,
> > +     PSP_A_DEV_BY_ASSOCIATION,
> >
> >       __PSP_A_DEV_MAX,
> >       PSP_A_DEV_MAX = (__PSP_A_DEV_MAX - 1)
> > @@ -74,6 +85,8 @@ enum {
> >       PSP_CMD_RX_ASSOC,
> >       PSP_CMD_TX_ASSOC,
> >       PSP_CMD_GET_STATS,
> > +     PSP_CMD_DEV_ASSOC,
> > +     PSP_CMD_DEV_DISASSOC,
> >
> >       __PSP_CMD_MAX,
> >       PSP_CMD_MAX = (__PSP_CMD_MAX - 1)
> > diff --git a/net/psp/psp-nl-gen.c b/net/psp/psp-nl-gen.c
> > index 1f5e73e7ccc1..114299c64423 100644
> > --- a/net/psp/psp-nl-gen.c
> > +++ b/net/psp/psp-nl-gen.c
> > @@ -53,6 +53,20 @@ static const struct nla_policy psp_get_stats_nl_policy[PSP_A_STATS_DEV_ID + 1] =
> >       [PSP_A_STATS_DEV_ID] = NLA_POLICY_MIN(NLA_U32, 1),
> >  };
> >
> > +/* PSP_CMD_DEV_ASSOC - do */
> > +static const struct nla_policy psp_dev_assoc_nl_policy[PSP_A_DEV_NSID + 1] = {
> > +     [PSP_A_DEV_ID] = NLA_POLICY_MIN(NLA_U32, 1),
> > +     [PSP_A_DEV_IFINDEX] = { .type = NLA_U32, },
> > +     [PSP_A_DEV_NSID] = { .type = NLA_S32, },
> > +};
> > +
> > +/* PSP_CMD_DEV_DISASSOC - do */
> > +static const struct nla_policy psp_dev_disassoc_nl_policy[PSP_A_DEV_NSID + 1] = {
> > +     [PSP_A_DEV_ID] = NLA_POLICY_MIN(NLA_U32, 1),
> > +     [PSP_A_DEV_IFINDEX] = { .type = NLA_U32, },
> > +     [PSP_A_DEV_NSID] = { .type = NLA_S32, },
> > +};
> > +
> >  /* Ops table for psp */
> >  static const struct genl_split_ops psp_nl_ops[] = {
> >       {
> > @@ -119,6 +133,24 @@ static const struct genl_split_ops psp_nl_ops[] = {
> >               .dumpit = psp_nl_get_stats_dumpit,
> >               .flags  = GENL_CMD_CAP_DUMP,
> >       },
> > +     {
> > +             .cmd            = PSP_CMD_DEV_ASSOC,
> > +             .pre_doit       = psp_device_get_locked,
> > +             .doit           = psp_nl_dev_assoc_doit,
> > +             .post_doit      = psp_device_unlock,
> > +             .policy         = psp_dev_assoc_nl_policy,
> > +             .maxattr        = PSP_A_DEV_NSID,
> > +             .flags          = GENL_CMD_CAP_DO,
> > +     },
> > +     {
> > +             .cmd            = PSP_CMD_DEV_DISASSOC,
> > +             .pre_doit       = psp_device_get_locked,
> > +             .doit           = psp_nl_dev_disassoc_doit,
> > +             .post_doit      = psp_device_unlock,
> > +             .policy         = psp_dev_disassoc_nl_policy,
> > +             .maxattr        = PSP_A_DEV_NSID,
> > +             .flags          = GENL_CMD_CAP_DO,
>
> Sashiko notes that the above allows deleteing an associations bypassing
> the netns boundaries. Do you need ADMIN_PERM flag or exlicit checks in
> the doit cb?

I think the concern is if we are calling this from an assoc_dev's
netns, it should not allow it to delete any assoc_dev from other
assoc_dev's netns. Right?
I will add a check to only allow deletion of assoc_dev from its own
netns.  (except main_dev's netns).

>
> > @@ -292,6 +455,145 @@ int psp_nl_key_rotate_doit(struct sk_buff *skb, struct genl_info *info)
> >       return err;
> >  }
> >
> > +int psp_nl_dev_assoc_doit(struct sk_buff *skb, struct genl_info *info)
> > +{
> > +     struct psp_dev *psd = info->user_ptr[0];
> > +     struct psp_assoc_dev *psp_assoc_dev;
> > +     struct net_device *assoc_dev;
> > +     struct sk_buff *rsp;
> > +     u32 assoc_ifindex;
> > +     struct net *net;
> > +     int nsid, err;
> > +
> > +     if (GENL_REQ_ATTR_CHECK(info, PSP_A_DEV_IFINDEX))
> > +             return -EINVAL;
> > +
> > +     if (info->attrs[PSP_A_DEV_NSID]) {
> > +             nsid = nla_get_s32(info->attrs[PSP_A_DEV_NSID]);
> > +
> > +             net = get_net_ns_by_id(genl_info_net(info), nsid);
> > +             if (!net) {
> > +                     NL_SET_BAD_ATTR(info->extack,
> > +                                     info->attrs[PSP_A_DEV_NSID]);
> > +                     return -EINVAL;
> > +             }
> > +     } else {
> > +             net = get_net(genl_info_net(info));
> > +     }
>
> psp_nl_dev_disassoc_doit() has the same code; perhaps it would be worthy
> move it in a common helper, called via pre_doit()? It should also
> simplify the cleanup paths.
>

Ack.

> > +
> > +     psp_assoc_dev = kzalloc(sizeof(*psp_assoc_dev), GFP_KERNEL);
> > +     if (!psp_assoc_dev) {
> > +             err = -ENOMEM;
> > +             goto alloc_err;
> > +     }
> > +
> > +     assoc_ifindex = nla_get_u32(info->attrs[PSP_A_DEV_IFINDEX]);
> > +     assoc_dev = netdev_get_by_index(net, assoc_ifindex,
> > +                                     &psp_assoc_dev->dev_tracker,
> > +                                     GFP_KERNEL);
> > +     if (!assoc_dev) {
> > +             NL_SET_BAD_ATTR(info->extack, info->attrs[PSP_A_DEV_IFINDEX]);
> > +             err = -ENODEV;
> > +             goto assoc_dev_err;
> > +     }
> > +
> > +     /* Check if device is already associated with a PSP device */
> > +     if (cmpxchg(&assoc_dev->psp_dev, NULL, RCU_INITIALIZER(psd))) {
> > +             NL_SET_ERR_MSG(info->extack,
> > +                            "Device already associated with a PSP device");
> > +             err = -EBUSY;
> > +             goto cmpxchg_err;
> > +     }
> > +
> > +     psp_assoc_dev->assoc_dev = assoc_dev;
> > +     rsp = psp_nl_reply_new(info);
> > +     if (!rsp) {
> > +             err = -ENOMEM;
> > +             goto rsp_err;
> > +     }
> > +
> > +     list_add_tail(&psp_assoc_dev->dev_list, &psd->assoc_dev_list);
>
> Sashiko says:
>
> ---
> list_add_tail(&psp_assoc_dev->dev_list, &psd->assoc_dev_list);
> There doesn't seem to be a limit on the number of devices that can be
> associated with a single PSP device.
> If a user repeatedly associates devices, could the generated netlink message
> in psp_nl_dev_fill() exceed the maximum allowed size (GENLMSG_DEFAULT_SIZE),
> causing it to fail with -EMSGSIZE and permanently break PSP_CMD_DEV_GET
> and management notifications for the device?
> --

Ack. Will enforce a max allowed number on the assoc_dev_list to fit
into GENLMSG_DEFAULT_SIZE.

>
> /P
>

^ permalink raw reply

* Re: [PATCH net 10/13] i40e: fix napi_enable/disable skipping ringless q_vectors
From: Przemek Kitszel @ 2026-04-16  4:20 UTC (permalink / raw)
  To: Jacob Keller, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni
  Cc: netdev, Aleksandr Loktionov, stable, Sunitha Mekala,
	Maciej Fijalkowski
In-Reply-To: <20260414-iwl-net-submission-2026-04-14-v1-10-852f38e7da39@intel.com>

On 4/15/26 07:48, Jacob Keller wrote:
> From: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> 
> After ethtool -L reduces the queue count, i40e_napi_disable_all() sets
> NAPI_STATE_SCHED on all q_vectors, then i40e_vsi_map_rings_to_vectors()
> clears ring pointers on the excess ones.  i40e_napi_enable_all() skips
> those with:
> 
> 	if (q_vector->rx.ring || q_vector->tx.ring)
> 		napi_enable(&q_vector->napi);
> 
> leaving them on dev->napi_list with NAPI_STATE_SCHED permanently set.
> 
> Writing to /sys/class/net/<iface>/threaded calls napi_stop_kthread()
> on every entry in dev->napi_list.  The function loops on msleep(20)
> waiting for NAPI_STATE_SCHED to clear -- which never happens for the
> stale q_vectors.  The task hangs in D state forever; a concurrent write
> deadlocks on dev->lock held by the first.
> 
> Commit 13a8cd191a2b ("i40e: Do not enable NAPI on q_vectors that have no
> rings") added the guard to prevent a divide-by-zero in i40e_napi_poll()
> when epoll busy-poll iterated all device NAPIs (4.x era). Since
> 7adc3d57fe2b ("net: Introduce preferred busy-polling"), from v5.11,
> napi_busy_loop() polls by napi_id keyed to the socket, so ringless
> q_vectors are never selected.  i40e_msix_clean_rings() also independently
> avoids scheduling NAPI for them.  The guard is safe to remove.
> 
> Add an early return in i40e_napi_poll() for num_ringpairs == 0 so the
> function is self-defending against a NULL tx.ring dereference at the
> WB_ON_ITR check, should the NAPI ever fire through an unexpected path.
> 
> Reported-by: Jakub Kicinski <kuba@kernel.org>
> Closes: https://lore.kernel.org/intel-wired-lan/20260316133100.6054a11f@kernel.org/

Maciej developed a better fix for the problem, and he explicitly asked
to not include this patch. Please drop it from this series.

Maciej's fix:
https://lore.kernel.org/intel-wired-lan/20260414121405.631092-1-maciej.fijalkowski@intel.com/T/#u

ask for reject:
https://lore.kernel.org/intel-wired-lan/PH0PR11MB75223C8A00C3183C5082A096A0252@PH0PR11MB7522.namprd11.prod.outlook.com/T/#mbac55f7219d7855a2e5d1527904b2da43ad080cb

> Fixes: 13a8cd191a2b ("i40e: Do not enable NAPI on q_vectors that have no rings")
> Cc: stable@vger.kernel.org
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> Tested-by: Sunitha Mekala <sunithax.d.mekala@intel.com>
> Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
> ---
>   drivers/net/ethernet/intel/i40e/i40e_main.c | 28 ++++++++++++++++------------
>   drivers/net/ethernet/intel/i40e/i40e_txrx.c | 10 ++++++++++
>   2 files changed, 26 insertions(+), 12 deletions(-)
> 


^ permalink raw reply

* Re: [PATCH nf] netfilter: nf_tables: use RCU-safe list primitives for basechain hook list
From: Xiang Mei @ 2026-04-16  4:30 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Weiming Shi, Florian Westphal, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Phil Sutter, Simon Horman,
	netfilter-devel, coreteam, netdev, linux-kernel
In-Reply-To: <ad_C1f2cW5-kctHi@chamomile>

On Wed, Apr 15, 2026 at 9:55 AM Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>
> On Fri, Apr 10, 2026 at 06:13:22PM +0800, Weiming Shi wrote:
> > NFT_MSG_GETCHAIN runs as an NFNL_CB_RCU callback, so chain dumps
> > traverse basechain->hook_list under rcu_read_lock() without holding
> > commit_mutex. Meanwhile, nft_delchain_hook() mutates that same live
> > hook_list with plain list_move() and list_splice(), and the commit/abort
> > paths splice hooks back with plain list_splice(). None of these are
> > RCU-safe list operations.
> >
> > A concurrent GETCHAIN dump can observe partially updated list pointers,
> > follow them into stack-local or transaction-private list heads, and
> > crash when container_of() produces a bogus struct nft_hook pointer.
>
> For the record, v1 of proposed series to fix this is here:
>
> https://patchwork.ozlabs.org/project/netfilter-devel/list/?series=499757

Hi Pablo,

Thanks for working on this.
If this addresses the issue I originally reported, could you please
consider adding:
Reported-by: Xiang Mei <xmei5@asu.edu>

Thanks,
Xiang

^ permalink raw reply

* Re: [PATCH iwl-net] ice: fix infinite recursion in ice_cfg_tx_topo via ice_init_dev_hw
From: Przemek Kitszel @ 2026-04-16  4:36 UTC (permalink / raw)
  To: Jacob Keller, Simon Horman, Petr Oros
  Cc: netdev, Tony Nguyen, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Aleksandr Loktionov,
	Nikolay Aleksandrov, Daniel Zahka, Paul Greenwalt, Dave Ertman,
	Michal Swiatkowski, intel-wired-lan, linux-kernel
In-Reply-To: <f30ad78e-1eb9-4c9d-9034-c8873966de66@intel.com>

On 4/15/26 23:22, Jacob Keller wrote:
> On 4/15/2026 9:30 AM, Simon Horman wrote:
>> On Mon, Apr 13, 2026 at 09:14:20PM +0200, Petr Oros wrote:
>>> On certain E810 configurations where firmware supports Tx scheduler
>>> topology switching (tx_sched_topo_comp_mode_en), ice_cfg_tx_topo()
>>> may need to apply a new 5-layer or 9-layer topology from the DDP
>>> package. If the AQ command to set the topology fails (e.g. due to
>>> invalid DDP data or firmware limitations), the global configuration
>>> lock must still be cleared via a CORER reset.
>>>
>>> Commit 86aae43f21cf ("ice: don't leave device non-functional if Tx
>>> scheduler config fails") correctly fixed this by refactoring
>>> ice_cfg_tx_topo() to always trigger CORER after acquiring the global
>>> lock and re-initialize hardware via ice_init_hw() afterwards.
>>>
>>> However, commit 8a37f9e2ff40 ("ice: move ice_deinit_dev() to the end
>>> of deinit paths") later moved ice_init_dev_hw() into ice_init_hw(),
>>> breaking the reinit path introduced by 86aae43f21cf. This creates an
>>> infinite recursive call chain:
>>>
>>>    ice_init_hw()
>>>      ice_init_dev_hw()
>>>        ice_cfg_tx_topo()         # topology change needed
>>>          ice_deinit_hw()
>>>          ice_init_hw()           # reinit after CORER
>>>            ice_init_dev_hw()     # recurse
>>>              ice_cfg_tx_topo()
>>>                ...               # stack overflow
>>>
>>> Fix by moving ice_init_dev_hw() back out of ice_init_hw() and calling
>>> it explicitly from ice_probe() and ice_devlink_reinit_up(). The third
>>> caller, ice_cfg_tx_topo(), intentionally does not need ice_init_dev_hw()

ice_cfg_tx_topo() stops calling ice_init_dev_hw(), that is the real
change that patch does, OK

>>> during its reinit, it only needs the core HW reinitialization. This
>>> breaks the recursion cleanly without adding flags or guards.
>>>
>>> The deinit ordering changes from commit 8a37f9e2ff40 ("ice: move
>>> ice_deinit_dev() to the end of deinit paths") which fixed slow rmmod
>>> are preserved, only the init-side placement of ice_init_dev_hw() is
>>> reverted.
>>>
>>> Fixes: 8a37f9e2ff40 ("ice: move ice_deinit_dev() to the end of deinit paths")
>>> Signed-off-by: Petr Oros <poros@redhat.com>
>>
>> Hi Petr,
>>
>> I don't intended to delay this patch.
>> But could you follow-up by looking over the AI generated
>> review of this patch on sashiko.dev?
>>
>> Thanks!
> 
> I'll take a look as well. I recently included this fix in Intel Wired
> LAN update last night, so hopefully nothing too problematic...
> 
> Sashiko says:
> 
>> While this code wasn't introduced by this patch, the restructuring makes it
>> more visible: can this cause a use-after-free if the nested hardware
>> initialization fails?
>> If ice_cfg_tx_topo() triggers a topology change, it performs a CORER reset
>> followed by an unroll (ice_deinit_hw) and re-initialization (ice_init_hw). If
>> that nested ice_init_hw() fails, its unroll path frees hw->port_info and
>> destroys control queues and mutexes.

here is a talk about "prerequisite for the problem"

>> Because ice_init_dev_hw() returns void, it swallows the -ENODEV error and

and here is about code that Petr just removes, IOW, does not apply


Plausible sounding comments, yeah, I hope we will not drown in the sea
of AI content :(

for the patch:
I have tested that it does not break my test suite (it was me to start
touching ice_init_hw() and friends), and both code and human written
commit message looks good,

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>

thank you for fixing the code after me!

^ permalink raw reply

* [PATCH v5 net] nfc: hci: fix out-of-bounds read in HCP header parsing
From: Ashutosh Desai @ 2026-04-16  5:15 UTC (permalink / raw)
  To: netdev
  Cc: kuba, edumazet, davem, pabeni, horms, stable, linux-kernel,
	Ashutosh Desai

nfc_hci_recv_from_llc() and nci_hci_data_received_cb() cast skb->data
to struct hcp_packet and read the message header byte without checking
that enough data is present in the linear sk_buff area. A malicious NFC
peer can send a 1-byte HCP frame that passes through the SHDLC layer
and reaches these functions, causing an out-of-bounds heap read.

Fix this by adding pskb_may_pull() before each cast to ensure the full
2-byte HCP header is pulled into the linear area before it is accessed.

Fixes: 8b8d2e08bf0d ("NFC: HCI support")
Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support")
Cc: stable@vger.kernel.org
Signed-off-by: Ashutosh Desai <ashutoshdesai993@gmail.com>
---
V4 -> V5: fix whitespace damage
V3 -> V4: add Fixes tags
V2 -> V3: drop redundant checks from nfc_hci_msg_rx_work/nci_hci_msg_rx_work;
          remove incorrect Suggested-by tag
V1 -> V2: use pskb_may_pull() instead of skb->len check

v4: https://lore.kernel.org/netdev/177614425081.3600288.2536320552978506086@gmail.com/
v3: https://lore.kernel.org/netdev/20260413024329.3293075-1-ashutoshdesai993@gmail.com/
v2: https://lore.kernel.org/netdev/20260409150825.2217133-1-ashutoshdesai993@gmail.com/
v1: https://lore.kernel.org/netdev/20260408223113.2009304-1-ashutoshdesai993@gmail.com/

 net/nfc/hci/core.c | 5 +++++
 net/nfc/nci/hci.c  | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 0d33c81a15fe..cd9cf6c94a50 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -904,6 +904,11 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 	 * unblock waiting cmd context. Otherwise, enqueue to dispatch
 	 * in separate context where handler can also execute command.
 	 */
+	if (!pskb_may_pull(hcp_skb, NFC_HCI_HCP_HEADER_LEN)) {
+		kfree_skb(hcp_skb);
+		return;
+	}
+
 	packet = (struct hcp_packet *)hcp_skb->data;
 	type = HCP_MSG_GET_TYPE(packet->message.header);
 	if (type == NFC_HCI_HCP_RESPONSE) {
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 40ae8e5a7ec7..6e633da257d1 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -482,6 +482,11 @@ void nci_hci_data_received_cb(void *context,
 	 * unblock waiting cmd context. Otherwise, enqueue to dispatch
 	 * in separate context where handler can also execute command.
 	 */
+	if (!pskb_may_pull(hcp_skb, NCI_HCI_HCP_HEADER_LEN)) {
+		kfree_skb(hcp_skb);
+		return;
+	}
+
 	packet = (struct nci_hcp_packet *)hcp_skb->data;
 	type = NCI_HCP_MSG_GET_TYPE(packet->message.header);
 	if (type == NCI_HCI_HCP_RESPONSE) {
-- 
2.34.1


^ permalink raw reply related

* Re: [PATCH v4] nfc: hci: fix out-of-bounds read in HCP header parsing
From: Ashutosh Desai @ 2026-04-16  5:21 UTC (permalink / raw)
  To: Simon Horman; +Cc: netdev, kuba, edumazet, davem, pabeni, linux-kernel
In-Reply-To: <20260415162641.GO772670@horms.kernel.org>

Apologies for the noise, fixed whitespace damage and sent v5.

^ permalink raw reply

* Re: [PATCH net v2 2/2] bnge: remove unsupported backing store type
From: Vikas Gupta @ 2026-04-16  5:22 UTC (permalink / raw)
  To: Przemek Kitszel
  Cc: dharmender.garg, netdev, davem, edumazet, kuba, pabeni,
	andrew+netdev, horms, linux-kernel, vsrama-krishna.nemani,
	bhargava.marreddy, rajashekar.hudumula, ajit.khaparde,
	rahul-rg.gupta
In-Reply-To: <b2735cbf-34ac-4ad8-b524-2aa0f57511f8@intel.com>

[-- Attachment #1: Type: text/plain, Size: 2548 bytes --]

On Thu, Apr 16, 2026 at 9:24 AM Przemek Kitszel
<przemyslaw.kitszel@intel.com> wrote:
>
> On 4/15/26 17:16, Vikas Gupta wrote:
> > The backing store type, BNGE_CTX_MRAV, is not applicable in Thor Ultra
> > devices. Remove it from the backing store configuration, as the firmware
>
> I guess the removed code was needed for previous devices, what is the
> impact for them?

This driver does not support previous devices. Thor Ultra devices have
split MRAV
into two separate contexts, MR and AV. Support for them will be added
in a future
patch series.

>
> > will not populate entities in this backing store type, due to which the
> > driver load fails.
> >
> > Fixes: 29c5b358f385 ("bng_en: Add backing store support")
> > Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
> > Reviewed-by: Dharmender Garg <dharmender.garg@broadcom.com>
> > ---
> >   drivers/net/ethernet/broadcom/bnge/bnge_rmem.c | 16 ----------------
> >   1 file changed, 16 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c
> > index 94f15e08a88c..b066ee887a09 100644
> > --- a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c
> > +++ b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c
> > @@ -324,7 +324,6 @@ int bnge_alloc_ctx_mem(struct bnge_dev *bd)
> >       u32 l2_qps, qp1_qps, max_qps;
> >       u32 ena, entries_sp, entries;
> >       u32 srqs, max_srqs, min;
> > -     u32 num_mr, num_ah;
> >       u32 extra_srqs = 0;
> >       u32 extra_qps = 0;
> >       u32 fast_qpmd_qps;
> > @@ -390,21 +389,6 @@ int bnge_alloc_ctx_mem(struct bnge_dev *bd)
> >       if (!bnge_is_roce_en(bd))
> >               goto skip_rdma;
> >
> > -     ctxm = &ctx->ctx_arr[BNGE_CTX_MRAV];
> > -     /* 128K extra is needed to accommodate static AH context
> > -      * allocation by f/w.
> > -      */
> > -     num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256);
> > -     num_ah = min_t(u32, num_mr, 1024 * 128);
> > -     ctxm->split_entry_cnt = BNGE_CTX_MRAV_AV_SPLIT_ENTRY + 1;
> > -     if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah)
> > -             ctxm->mrav_av_entries = num_ah;
> > -
> > -     rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, num_mr + num_ah, 2);
> > -     if (rc)
> > -             return rc;
> > -     ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV;
> > -
> >       ctxm = &ctx->ctx_arr[BNGE_CTX_TIM];
> >       rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, l2_qps + qp1_qps + extra_qps, 1);
> >       if (rc)
>

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 5465 bytes --]

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox