From: Wei Wang <weiwan@google.com>
To: "David S . Miller" <davem@davemloft.net>, netdev@vger.kernel.org
Cc: Jakub Kicinski <kuba@kernel.org>,
Eric Dumazet <edumazet@google.com>,
Paolo Abeni <pabeni@redhat.com>,
Hannes Frederic Sowa <hannes@stressinduktion.org>,
Felix Fietkau <nbd@nbd.name>, Wei Wang <weiwan@google.com>
Subject: [RFC PATCH net-next 1/6] net: implement threaded-able napi poll loop support
Date: Mon, 14 Sep 2020 10:24:48 -0700 [thread overview]
Message-ID: <20200914172453.1833883-2-weiwan@google.com> (raw)
In-Reply-To: <20200914172453.1833883-1-weiwan@google.com>
From: Paolo Abeni <pabeni@redhat.com>
This patch allows running each napi poll loop inside its
own kernel thread.
The rx mode can be enabled per napi instance via the
newly addded napi_set_threaded() api; the requested kthread
will be created on demand and shut down on device stop.
Once that threaded mode is enabled and the kthread is
started, napi_schedule() will wake-up such thread instead
of scheduling the softirq.
The threaded poll loop behaves quite likely the net_rx_action,
but it does not have to manipulate local irqs and uses
an explicit scheduling point based on netdev_budget.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Wei Wang <weiwan@google.com>
---
include/linux/netdevice.h | 5 ++
net/core/dev.c | 113 ++++++++++++++++++++++++++++++++++++++
2 files changed, 118 insertions(+)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 157e0242e9ee..6797eb356e2e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -348,6 +348,7 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
+ struct task_struct *thread;
};
enum {
@@ -358,6 +359,7 @@ enum {
NAPI_STATE_LISTED, /* NAPI added to system lists */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
};
enum {
@@ -368,6 +370,7 @@ enum {
NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
};
enum gro_result {
@@ -489,6 +492,8 @@ static inline bool napi_complete(struct napi_struct *n)
return napi_complete_done(n, 0);
}
+int napi_set_threaded(struct napi_struct *n, bool threded);
+
/**
* napi_disable - prevent NAPI from scheduling
* @n: NAPI context
diff --git a/net/core/dev.c b/net/core/dev.c
index 03624192862a..0fe4c531b682 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -91,6 +91,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
+#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
@@ -1486,9 +1487,19 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
+static int napi_threaded_poll(void *data);
+
+static void napi_thread_start(struct napi_struct *n)
+{
+ if (test_bit(NAPI_STATE_THREADED, &n->state) && !n->thread)
+ n->thread = kthread_create(napi_threaded_poll, n, "%s-%d",
+ n->dev->name, n->napi_id);
+}
+
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ struct napi_struct *n;
int ret;
ASSERT_RTNL();
@@ -1520,6 +1531,9 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
if (!ret && ops->ndo_open)
ret = ops->ndo_open(dev);
+ list_for_each_entry(n, &dev->napi_list, dev_list)
+ napi_thread_start(n);
+
netpoll_poll_enable(dev);
if (ret)
@@ -1565,6 +1579,14 @@ int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
}
EXPORT_SYMBOL(dev_open);
+static void napi_thread_stop(struct napi_struct *n)
+{
+ if (!n->thread)
+ return;
+ kthread_stop(n->thread);
+ n->thread = NULL;
+}
+
static void __dev_close_many(struct list_head *head)
{
struct net_device *dev;
@@ -1593,6 +1615,7 @@ static void __dev_close_many(struct list_head *head)
list_for_each_entry(dev, head, close_list) {
const struct net_device_ops *ops = dev->netdev_ops;
+ struct napi_struct *n;
/*
* Call the device specific close. This cannot fail.
@@ -1604,6 +1627,9 @@ static void __dev_close_many(struct list_head *head)
if (ops->ndo_stop)
ops->ndo_stop(dev);
+ list_for_each_entry(n, &dev->napi_list, dev_list)
+ napi_thread_stop(n);
+
dev->flags &= ~IFF_UP;
netpoll_poll_enable(dev);
}
@@ -4240,6 +4266,11 @@ int gro_normal_batch __read_mostly = 8;
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
+ if (napi->thread) {
+ wake_up_process(napi->thread);
+ return;
+ }
+
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
@@ -6590,6 +6621,30 @@ static void init_gro_hash(struct napi_struct *napi)
napi->gro_bitmask = 0;
}
+int napi_set_threaded(struct napi_struct *n, bool threaded)
+{
+ ASSERT_RTNL();
+
+ if (n->dev->flags & IFF_UP)
+ return -EBUSY;
+
+ if (threaded == !!test_bit(NAPI_STATE_THREADED, &n->state))
+ return 0;
+ if (threaded)
+ set_bit(NAPI_STATE_THREADED, &n->state);
+ else
+ clear_bit(NAPI_STATE_THREADED, &n->state);
+
+ /* if the device is initializing, nothing todo */
+ if (test_bit(__LINK_STATE_START, &n->dev->state))
+ return 0;
+
+ napi_thread_stop(n);
+ napi_thread_start(n);
+ return 0;
+}
+EXPORT_SYMBOL(napi_set_threaded);
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6730,6 +6785,64 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
return work;
}
+static int napi_thread_wait(struct napi_struct *napi)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+ __set_current_state(TASK_RUNNING);
+ return -1;
+}
+
+static int napi_threaded_poll(void *data)
+{
+ struct napi_struct *napi = data;
+
+ while (!napi_thread_wait(napi)) {
+ struct list_head dummy_repoll;
+ int budget = netdev_budget;
+ unsigned long time_limit;
+ bool again = true;
+
+ INIT_LIST_HEAD(&dummy_repoll);
+ local_bh_disable();
+ time_limit = jiffies + 2;
+ do {
+ /* ensure that the poll list is not empty */
+ if (list_empty(&dummy_repoll))
+ list_add(&napi->poll_list, &dummy_repoll);
+
+ budget -= napi_poll(napi, &dummy_repoll);
+ if (unlikely(budget <= 0 ||
+ time_after_eq(jiffies, time_limit))) {
+ cond_resched();
+
+ /* refresh the budget */
+ budget = netdev_budget;
+ __kfree_skb_flush();
+ time_limit = jiffies + 2;
+ }
+
+ if (napi_disable_pending(napi))
+ again = false;
+ else if (!test_bit(NAPI_STATE_SCHED, &napi->state))
+ again = false;
+ } while (again);
+
+ __kfree_skb_flush();
+ local_bh_enable();
+ }
+ return 0;
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
--
2.28.0.618.gf4bc123cb7-goog
next prev parent reply other threads:[~2020-09-14 17:26 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-14 17:24 [RFC PATCH net-next 0/6] implement kthread based napi poll Wei Wang
2020-09-14 17:24 ` Wei Wang [this message]
2020-09-25 19:45 ` [RFC PATCH net-next 1/6] net: implement threaded-able napi poll loop support Hannes Frederic Sowa
2020-09-25 23:50 ` Wei Wang
2020-09-26 14:22 ` Hannes Frederic Sowa
2020-09-28 8:45 ` Paolo Abeni
2020-09-28 18:13 ` Wei Wang
2020-09-14 17:24 ` [RFC PATCH net-next 2/6] net: add sysfs attribute to control napi threaded mode Wei Wang
2020-09-15 2:50 ` kernel test robot
2020-09-15 3:47 ` kernel test robot
2020-09-14 17:24 ` [RFC PATCH net-next 3/6] net: extract napi poll functionality to __napi_poll() Wei Wang
2020-09-14 17:24 ` [RFC PATCH net-next 4/6] net: modify kthread handler to use __napi_poll() Wei Wang
2020-09-14 17:24 ` [RFC PATCH net-next 5/6] net: process RPS/RFS work in kthread context Wei Wang
2020-09-18 22:44 ` Wei Wang
2020-09-21 8:11 ` Eric Dumazet
2020-09-14 17:24 ` [RFC PATCH net-next 6/6] net: improve napi threaded config Wei Wang
2020-09-25 13:48 ` [RFC PATCH net-next 0/6] implement kthread based napi poll Magnus Karlsson
2020-09-25 17:15 ` Wei Wang
2020-09-25 17:30 ` Eric Dumazet
2020-09-25 18:16 ` Stephen Hemminger
2020-09-25 18:23 ` Eric Dumazet
2020-09-25 19:00 ` Stephen Hemminger
2020-09-25 19:06 ` Jakub Kicinski
2020-09-28 14:07 ` Magnus Karlsson
2020-09-28 17:43 ` Eric Dumazet
2020-09-28 18:15 ` Wei Wang
2020-09-29 19:19 ` Jakub Kicinski
2020-09-29 20:16 ` Wei Wang
2020-09-29 21:48 ` Jakub Kicinski
2020-09-30 8:23 ` David Laight
2020-09-30 8:58 ` Paolo Abeni
2020-09-30 15:58 ` Jakub Kicinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200914172453.1833883-2-weiwan@google.com \
--to=weiwan@google.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hannes@stressinduktion.org \
--cc=kuba@kernel.org \
--cc=nbd@nbd.name \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.