[PATCH v5] net: dev_weight: TX/RX orthogonality

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
To: netdev@vger.kernel.org
Cc: hagen@jauu.net, fw@strlen.de, edumazet@google.com, daniel@iogearbox.net
Subject: [PATCH v5] net: dev_weight: TX/RX orthogonality
Date: Thu, 29 Dec 2016 21:37:21 +0100	[thread overview]
Message-ID: <1483043841-10016-1-git-send-email-matthias.tafelmeier@gmx.net> (raw)
In-Reply-To: <20161229.144555.1740958763290967121.davem@davemloft.net>

Oftenly, introducing side effects on packet processing on the other half
of the stack by adjusting one of TX/RX via sysctl is not desirable.
There are cases of demand for asymmetric, orthogonal configurability.

This holds true especially for nodes where RPS for RFS usage on top is
configured and therefore use the 'old dev_weight'. This is quite a
common base configuration setup nowadays, even with NICs of superior processing
support (e.g. aRFS).

A good example use case are nodes acting as noSQL data bases with a
large number of tiny requests and rather fewer but large packets as responses.
It's affordable to have large budget and rx dev_weights for the
requests. But as a side effect having this large a number on TX
processed in one run can overwhelm drivers.

This patch therefore introduces an independent configurability via sysctl to
userland.

Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
---
 Documentation/sysctl/net.txt | 21 +++++++++++++++++++++
 include/linux/netdevice.h    |  4 ++++
 net/core/dev.c               |  8 ++++++--
 net/core/sysctl_net_core.c   | 31 ++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c      |  2 +-
 5 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index f0480f7..53cef32 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -61,6 +61,27 @@ The maximum number of packets that kernel can handle on a NAPI interrupt,
 it's a Per-CPU variable.
 Default: 64
 
+dev_weight_rx_bias
+--------------
+
+RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
+of the driver for the per softirq cycle netdev_budget. This parameter influences
+the proportion of the configured netdev_budget that is spent on RPS based packet
+processing during RX softirq cycles. It is further meant for making current
+dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
+(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
+on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+Default: 1
+
+dev_weight_tx_bias
+--------------
+
+Scales the maximum number of packets that can be processed during a TX softirq cycle.
+Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
+net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+Default: 1
+
 default_qdisc
 --------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 994f742..ecd78b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3795,6 +3795,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		dev_weight_rx_bias;
+extern int		dev_weight_tx_bias;
+extern int		dev_rx_weight;
+extern int		dev_tx_weight;
 
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8db5a0b..0d34e1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3427,7 +3427,11 @@ EXPORT_SYMBOL(netdev_max_backlog);
 
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
-int weight_p __read_mostly = 64;            /* old backlog weight */
+int weight_p __read_mostly = 64;           /* old backlog weight */
+int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
+int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
+int dev_rx_weight __read_mostly = 64;
+int dev_tx_weight __read_mostly = 64;
 
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4833,7 +4837,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		net_rps_action_and_irq_enable(sd);
 	}
 
-	napi->weight = weight_p;
+	napi->weight = dev_rx_weight;
 	while (again) {
 		struct sk_buff *skb;
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2a46e40..698ddd7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -222,6 +222,21 @@ static int set_default_qdisc(struct ctl_table *table, int write,
 }
 #endif
 
+static int proc_do_dev_weight(struct ctl_table *table, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret != 0)
+		return ret;
+
+	dev_rx_weight = weight_p * dev_weight_rx_bias;
+	dev_tx_weight = weight_p * dev_weight_tx_bias;
+
+	return ret;
+}
+
 static int proc_do_rss_key(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -273,7 +288,21 @@ static struct ctl_table net_core_table[] = {
 		.data		= &weight_p,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_rx_bias",
+		.data		= &dev_weight_rx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_tx_bias",
+		.data		= &dev_weight_tx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
 	},
 	{
 		.procname	= "netdev_max_backlog",
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6eb9c8e..b052b27 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
 
 void __qdisc_run(struct Qdisc *q)
 {
-	int quota = weight_p;
+	int quota = dev_tx_weight;
 	int packets;
 
 	while (qdisc_restart(q, &packets)) {
-- 
2.7.4

next prev parent reply	other threads:[~2016-12-29 20:37 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-26  9:49 [PATCH v1] net: dev_weight: TX/RX orthogonality Matthias Tafelmeier
2016-12-26 15:52 ` David Miller
     [not found]   ` <ae0712c3-61c6-432e-78d9-665d0c291c9f@gmx.net>
2016-12-26 16:58     ` [PATCH v1] net: dev_weight: TX/RX orthogonality,Re: " David Miller
2016-12-27  8:25       ` [PATCH] " Matthias Tafelmeier
2016-12-27 16:47         ` Marcelo Ricardo Leitner
2016-12-27 17:29           ` Matthias Tafelmeier
2016-12-28  9:42           ` [PATCH v3] " Matthias Tafelmeier
2016-12-28 19:17             ` David Miller
2016-12-29  9:58               ` [PATCH v4] " Matthias Tafelmeier
2016-12-29 19:08                 ` David Miller
2016-12-29 19:23                   ` Matthias Tafelmeier
2016-12-29 19:44                     ` David Miller
2016-12-29 19:45                       ` David Miller
2016-12-29 19:53                         ` Matthias Tafelmeier
2016-12-29 20:37                         ` Matthias Tafelmeier [this message]
2016-12-30  1:16                           ` [PATCH v5] " David Miller
2017-02-13 20:22                             ` Matthias Tafelmeier

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f0480f7 dfblob:53cef32 dfblob:994f742 dfblob:ecd78b3
dfblob:8db5a0b dfblob:0d34e1c dfblob:2a46e40 dfblob:698ddd7
dfblob:6eb9c8e dfblob:b052b27 )
 OR (
bs:"[PATCH v5] net: dev_weight: TX/RX orthogonality" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1483043841-10016-1-git-send-email-matthias.tafelmeier@gmx.net \
    --to=matthias.tafelmeier@gmx.net \
    --cc=daniel@iogearbox.net \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=hagen@jauu.net \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).