* [PATCH net-next v1 1/2] net: Save kthread of threaded NAPI in napi_config and restore it when trying to create a new kthread.
2026-06-29 19:20 [PATCH net-next v1 0/2] Reuse threaded NAPI kthread across napi_del()/napi_add() Shuhao Tan
@ 2026-06-29 19:20 ` Shuhao Tan
2026-06-29 19:20 ` [PATCH net-next v1 2/2] selftests: net: Add kthread preserving test in napi_threaded and busy_poll_test Shuhao Tan
2026-06-29 23:26 ` [PATCH net-next v1 0/2] Reuse threaded NAPI kthread across napi_del()/napi_add() Jakub Kicinski
2 siblings, 0 replies; 6+ messages in thread
From: Shuhao Tan @ 2026-06-29 19:20 UTC (permalink / raw)
To: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Andrew Lunn, Shuah Khan
Cc: Shuhao Tan, Mina Almasry, Samiullah Khawaja, Kuniyuki Iwashima,
netdev, linux-kernel, linux-kselftest
Add a napi_thread_ctx struct that has a back pointer to napi_struct.
Make the NAPI kthread to use the thread_ctx as data pointer so that
it can poll on different NAPIs thoughout its lifetime.
Mirror the thread and thread_ctx in napi_config all the time.
Park the thread on napi_del instead of stopping if napi_config is
available.
Restore the thread and context when trying to create a new NAPI
kthread.
Signed-off-by: Shuhao Tan <tanshuhao@google.com>
---
include/linux/netdevice.h | 12 +++++
net/core/dev.c | 106 +++++++++++++++++++++++++++++++-------
2 files changed, 99 insertions(+), 19 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9981d637f8b5..05e430f10aba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -63,6 +63,7 @@ struct dsa_port;
struct ip_tunnel_parm_kern;
struct macsec_context;
struct macsec_ops;
+struct napi_struct;
struct netdev_config;
struct netdev_name_node;
struct sd_flow_limit;
@@ -363,6 +364,10 @@ struct gro_node {
u32 cached_napi_id;
};
+struct napi_thread_ctx {
+ struct napi_struct *napi;
+};
+
/*
* Structure for per-NAPI config
*/
@@ -371,6 +376,12 @@ struct napi_config {
u64 irq_suspend_timeout;
u32 defer_hard_irqs;
cpumask_t affinity_mask;
+ /* thread and thread_ctx mirrors fields of napi_struct when napi_struct
+ * is alive. When the napi_struct gets destroyed, napi_config holds the
+ * sole reference to the now parked kthread.
+ */
+ struct task_struct *thread;
+ struct napi_thread_ctx *thread_ctx;
u8 threaded;
unsigned int napi_id;
};
@@ -404,6 +415,7 @@ struct napi_struct {
struct hrtimer timer;
/* all fields past this point are write-protected by netdev_lock */
struct task_struct *thread;
+ struct napi_thread_ctx *thread_ctx;
unsigned long gro_flush_timeout;
unsigned long irq_suspend_timeout;
u32 defer_hard_irqs;
diff --git a/net/core/dev.c b/net/core/dev.c
index 4b3d5cfdf6e0..c81992c929d9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1647,20 +1647,45 @@ static int napi_threaded_poll(void *data);
static int napi_kthread_create(struct napi_struct *n)
{
+ struct napi_thread_ctx *thread_ctx = NULL;
int err = 0;
+ if (n->config && n->config->thread) {
+ n->thread_ctx = n->config->thread_ctx;
+ n->thread = n->config->thread;
+ WRITE_ONCE(n->thread_ctx->napi, n);
+ kthread_unpark(n->thread);
+ return 0;
+ }
+
+ thread_ctx = kvzalloc_obj(*thread_ctx);
+ if (!thread_ctx)
+ return -ENOMEM;
+
/* Create and wake up the kthread once to put it in
* TASK_INTERRUPTIBLE mode to avoid the blocked task
* warning and work with loadavg.
*/
- n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
+ thread_ctx->napi = n;
+ n->thread = kthread_run(napi_threaded_poll, thread_ctx, "napi/%s-%d",
n->dev->name, n->napi_id);
if (IS_ERR(n->thread)) {
err = PTR_ERR(n->thread);
pr_err("kthread_run failed with err %d\n", err);
n->thread = NULL;
+ goto free_thread_ctx;
+ }
+ n->thread_ctx = thread_ctx;
+ if (n->config) {
+ n->config->thread = n->thread;
+ n->config->thread_ctx = thread_ctx;
}
+ return 0;
+
+free_thread_ctx:
+ kvfree(thread_ctx);
+
return err;
}
@@ -7183,7 +7208,13 @@ static void napi_stop_kthread(struct napi_struct *napi)
}
kthread_stop(napi->thread);
+ kvfree(napi->thread_ctx);
napi->thread = NULL;
+ napi->thread_ctx = NULL;
+ if (napi->config) {
+ napi->config->thread = NULL;
+ napi->config->thread_ctx = NULL;
+ }
}
static void napi_set_threaded_state(struct napi_struct *napi,
@@ -7199,13 +7230,11 @@ static void napi_set_threaded_state(struct napi_struct *napi,
int napi_set_threaded(struct napi_struct *napi,
enum netdev_napi_threaded threaded)
{
- if (threaded) {
- if (!napi->thread) {
- int err = napi_kthread_create(napi);
+ if (threaded && !napi->thread) {
+ int err = napi_kthread_create(napi);
- if (err)
- return err;
- }
+ if (err)
+ return err;
}
if (napi->config)
@@ -7255,8 +7284,15 @@ int netif_set_threaded(struct net_device *dev,
WARN_ON_ONCE(napi_set_threaded(napi, threaded));
/* Override the config for all NAPIs even if currently not listed */
- for (i = 0; i < dev->num_napi_configs; i++)
+ for (i = 0; i < dev->num_napi_configs; i++) {
dev->napi_config[i].threaded = threaded;
+ if (!threaded && dev->napi_config[i].thread) {
+ kthread_stop(dev->napi_config[i].thread);
+ kvfree(dev->napi_config[i].thread_ctx);
+ dev->napi_config[i].thread = NULL;
+ dev->napi_config[i].thread_ctx = NULL;
+ }
+ }
return err;
}
@@ -7501,6 +7537,8 @@ static void napi_save_config(struct napi_struct *n)
n->config->defer_hard_irqs = n->defer_hard_irqs;
n->config->gro_flush_timeout = n->gro_flush_timeout;
n->config->irq_suspend_timeout = n->irq_suspend_timeout;
+ n->config->thread = n->thread;
+ n->config->thread_ctx = n->thread_ctx;
napi_hash_del(n);
}
@@ -7695,6 +7733,21 @@ void __netif_napi_del_locked(struct napi_struct *napi)
if (test_and_clear_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state))
irq_set_affinity_notifier(napi->irq, NULL);
+ if (napi->thread) {
+ if (napi->config) {
+ kthread_park(napi->thread);
+ /* napi->config holds the only reference to the thread
+ * from now on.
+ */
+ napi->thread_ctx->napi = NULL;
+ } else {
+ kthread_stop(napi->thread);
+ kvfree(napi->thread_ctx);
+ }
+ napi->thread = NULL;
+ napi->thread_ctx = NULL;
+ }
+
if (napi->config) {
napi->index = -1;
napi->config = NULL;
@@ -7704,11 +7757,6 @@ void __netif_napi_del_locked(struct napi_struct *napi)
napi_free_frags(napi);
gro_cleanup(&napi->gro);
-
- if (napi->thread) {
- kthread_stop(napi->thread);
- napi->thread = NULL;
- }
}
EXPORT_SYMBOL(__netif_napi_del_locked);
@@ -7804,11 +7852,18 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
return work;
}
-static int napi_thread_wait(struct napi_struct *napi)
+static struct napi_struct *napi_thread_wait(struct napi_thread_ctx *thread_ctx)
{
+ struct napi_struct *napi = READ_ONCE(thread_ctx->napi);
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
+ if (kthread_should_park()) {
+ kthread_parkme();
+ napi = READ_ONCE(thread_ctx->napi);
+ /* Might be awakened for stopping */
+ continue;
+ }
/* Testing SCHED_THREADED bit here to make sure the current
* kthread owns this napi and could poll on this napi.
* Testing SCHED bit is not enough because SCHED bit might be
@@ -7817,7 +7872,7 @@ static int napi_thread_wait(struct napi_struct *napi)
if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state)) {
WARN_ON(!list_empty(&napi->poll_list));
__set_current_state(TASK_RUNNING);
- return 0;
+ return napi;
}
schedule();
@@ -7825,7 +7880,7 @@ static int napi_thread_wait(struct napi_struct *napi)
}
__set_current_state(TASK_RUNNING);
- return -1;
+ return NULL;
}
static void napi_threaded_poll_loop(struct napi_struct *napi,
@@ -7882,13 +7937,18 @@ static void napi_threaded_poll_loop(struct napi_struct *napi,
static int napi_threaded_poll(void *data)
{
- struct napi_struct *napi = data;
+ struct napi_thread_ctx *thread_ctx = data;
unsigned long last_qs = jiffies;
+ struct napi_struct *napi;
bool want_busy_poll;
bool in_busy_poll;
unsigned long val;
- while (!napi_thread_wait(napi)) {
+ while (1) {
+ napi = napi_thread_wait(thread_ctx);
+ if (!napi)
+ break;
+
val = READ_ONCE(napi->state);
want_busy_poll = val & NAPIF_STATE_THREADED_BUSY_POLL;
@@ -12128,11 +12188,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
goto free_all;
dev->cfg_pending = dev->cfg;
- dev->num_napi_configs = maxqs;
napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config));
dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT);
if (!dev->napi_config)
goto free_all;
+ dev->num_napi_configs = maxqs;
strscpy(dev->name, name);
dev->name_assign_type = name_assign_type;
@@ -12160,6 +12220,8 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
static void netdev_napi_exit(struct net_device *dev)
{
+ unsigned int i;
+
if (!list_empty(&dev->napi_list)) {
struct napi_struct *p, *n;
@@ -12171,6 +12233,12 @@ static void netdev_napi_exit(struct net_device *dev)
synchronize_net();
}
+ for (i = 0; i < dev->num_napi_configs; i++) {
+ if (dev->napi_config[i].thread) {
+ kthread_stop(dev->napi_config[i].thread);
+ kvfree(dev->napi_config[i].thread_ctx);
+ }
+ }
kvfree(dev->napi_config);
}
--
2.55.0.rc0.799.gd6f94ed593-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH net-next v1 2/2] selftests: net: Add kthread preserving test in napi_threaded and busy_poll_test
2026-06-29 19:20 [PATCH net-next v1 0/2] Reuse threaded NAPI kthread across napi_del()/napi_add() Shuhao Tan
2026-06-29 19:20 ` [PATCH net-next v1 1/2] net: Save kthread of threaded NAPI in napi_config and restore it when trying to create a new kthread Shuhao Tan
@ 2026-06-29 19:20 ` Shuhao Tan
2026-06-29 23:26 ` [PATCH net-next v1 0/2] Reuse threaded NAPI kthread across napi_del()/napi_add() Jakub Kicinski
2 siblings, 0 replies; 6+ messages in thread
From: Shuhao Tan @ 2026-06-29 19:20 UTC (permalink / raw)
To: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Andrew Lunn, Shuah Khan
Cc: Shuhao Tan, Mina Almasry, Samiullah Khawaja, Kuniyuki Iwashima,
netdev, linux-kernel, linux-kselftest
Add a testcase to ensure the kthread stays the same across NIC link
flap.
Add a testcase to ensure the same kthread can poll different napis
across NIC link flap.
Signed-off-by: Shuhao Tan <tanshuhao@google.com>
---
.../selftests/drivers/net/napi_threaded.py | 41 ++++++++++++++++++-
tools/testing/selftests/net/busy_poll_test.sh | 24 +++++++++++
2 files changed, 64 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py
index f4be72b2145a..20110fb6942e 100755
--- a/tools/testing/selftests/drivers/net/napi_threaded.py
+++ b/tools/testing/selftests/drivers/net/napi_threaded.py
@@ -127,6 +127,44 @@ def change_num_queues(cfg, nl) -> None:
_assert_napi_threaded_enabled(nl, napi0_id)
_assert_napi_threaded_enabled(nl, napi1_id)
+def nic_link_flap(cfg, nl) -> None:
+ """
+ Test that if threaded is enabled, and NIC goes through
+ a reset, the kthread stays unchanged across the link flap.
+ """
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_ge(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ _setup_deferred_cleanup(cfg)
+
+ # set threaded
+ _set_threaded_state(cfg, 1)
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+
+ # check napi threaded is set for both napis
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_enabled(nl, napi1_id)
+
+ pid0 = napis[0].get('pid')
+ pid1 = napis[1].get('pid')
+
+ cmd(f"ip link set {cfg.ifname} down")
+ cmd(f"ip link set {cfg.ifname} up")
+
+ # re-acquire napi info
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_ge(len(napis), 2)
+
+ # check napi threaded is set for both napis
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_enabled(nl, napi1_id)
+
+ # check the kthread remains the same
+ ksft_eq(napis[0].get('pid'), pid0)
+ ksft_eq(napis[1].get('pid'), pid1)
def main() -> None:
""" Ksft boiler plate main """
@@ -134,7 +172,8 @@ def main() -> None:
with NetDrvEnv(__file__, queue_count=2) as cfg:
ksft_run([napi_init,
change_num_queues,
- enable_dev_threaded_disable_napi_threaded],
+ enable_dev_threaded_disable_napi_threaded,
+ nic_link_flap],
args=(cfg, NetdevFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
index 5ec1c85c1623..897ce6700601 100755
--- a/tools/testing/selftests/net/busy_poll_test.sh
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -124,6 +124,23 @@ test_busypoll_with_napi_threaded()
return $?
}
+test_busypoll_with_napi_threaded_link_flap()
+{
+ # Only enable napi threaded poll. Set suspend timeout and prefer busy
+ # poll to 0. Run again after a link flap.
+ test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0 || return $?
+
+ ip netns exec nssv ip link set dev $NSIM_SV_NAME down
+ ip netns exec nscl ip link set dev $NSIM_CL_NAME down
+
+ ip netns exec nssv ip link set dev $NSIM_SV_NAME up
+ ip netns exec nscl ip link set dev $NSIM_CL_NAME up
+
+ test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0
+
+ return $?
+}
+
###
### Code start
###
@@ -176,6 +193,13 @@ if [ $? -ne 0 ]; then
exit 1
fi
+test_busypoll_with_napi_threaded_link_flap
+if [ $? -ne 0 ]; then
+ echo "test_busypoll_with_napi_threaded_link_flap failed"
+ cleanup_ns
+ exit 1
+fi
+
echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
--
2.55.0.rc0.799.gd6f94ed593-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH net-next v1 0/2] Reuse threaded NAPI kthread across napi_del()/napi_add().
2026-06-29 19:20 [PATCH net-next v1 0/2] Reuse threaded NAPI kthread across napi_del()/napi_add() Shuhao Tan
2026-06-29 19:20 ` [PATCH net-next v1 1/2] net: Save kthread of threaded NAPI in napi_config and restore it when trying to create a new kthread Shuhao Tan
2026-06-29 19:20 ` [PATCH net-next v1 2/2] selftests: net: Add kthread preserving test in napi_threaded and busy_poll_test Shuhao Tan
@ 2026-06-29 23:26 ` Jakub Kicinski
2026-06-30 0:47 ` Shuhao Tan
2 siblings, 1 reply; 6+ messages in thread
From: Jakub Kicinski @ 2026-06-29 23:26 UTC (permalink / raw)
To: Shuhao Tan
Cc: David S . Miller, Eric Dumazet, Paolo Abeni, Simon Horman,
Andrew Lunn, Shuah Khan, Mina Almasry, Samiullah Khawaja,
Kuniyuki Iwashima, netdev, linux-kernel, linux-kselftest
On Mon, 29 Jun 2026 12:20:25 -0700 Shuhao Tan wrote:
> These drivers destroy and recreate queues during configuration
> changes. If a NAPI was threaded before destruction, during the
> creation, a new kthread will be spawned for the NAPI.
>
> Some drivers do not have this problem, e.g. netdevsim. But these
> drivers and the drivers mentioned above will still lose kthread
> during link flap (ndo_stop/ndo_open).
>
> Because the kthreads before and after these configuration changes are
> different, all the attributes associated with the kthread are lost.
> These include CPU mask, priority, scheduler policy, etc.. If the
> threaded state is preserved for a NAPI, it makes sense to want to
> preserve the attributes of the thread as well.
Send a netdev Netlink notification when NAPI is re-created and
let the userspace re-apply the settings? Keeping a few u32s
around is one thing but keeping a thread running and visible
in /procfs for the lifetime of a machine feels a little bit much.
IDK.
^ permalink raw reply [flat|nested] 6+ messages in thread