netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Breno Leitao <leitao@debian.org>
To: Jakub Kicinski <kuba@kernel.org>, edumazet@google.com
Cc: "Eric Dumazet" <edumazet@google.com>,
	davem@davemloft.net, pabeni@redhat.com,
	"Steven Rostedt" <rostedt@goodmis.org>,
	"Masami Hiramatsu" <mhiramat@kernel.org>,
	"Mathieu Desnoyers" <mathieu.desnoyers@efficios.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	weiwan@google.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, horms@kernel.org,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Randy Dunlap" <rdunlap@infradead.org>,
	"Bjorn Helgaas" <bhelgaas@google.com>,
	"Johannes Berg" <johannes.berg@intel.com>,
	"Thomas Weißschuh" <linux@weissschuh.net>,
	"open list:TRACING" <linux-trace-kernel@vger.kernel.org>
Subject: Re: [PATCH net-next v3] net: dqs: add NIC stall detector based on BQL
Date: Wed, 14 Feb 2024 06:45:36 -0800	[thread overview]
Message-ID: <ZczSEBFtq6E6APUJ@gmail.com> (raw)
In-Reply-To: <20240213100457.6648a8e0@kernel.org>

On Tue, Feb 13, 2024 at 10:04:57AM -0800, Jakub Kicinski wrote:
> On Tue, 13 Feb 2024 14:57:49 +0100 Eric Dumazet wrote:
> > Please note that adding other sysfs entries is expensive for workloads
> > creating/deleting netdev and netns often.
> > 
> > I _think_ we should find a way for not creating
> > /sys/class/net/<interface>/queues/tx-{Q}/byte_queue_limits  directory
> > and files
> > for non BQL enabled devices (like loopback !)
> 
> We should try, see if anyone screams. We could use IFF_NO_QUEUE, and
> NETIF_F_LLTX as a proxy for "device doesn't have a real queue so BQL 
> would be pointless"? Obviously better to annotate the drivers which
> do have BQL support, but there's >50 of them on a quick count..

Let me make sure I understand the suggestion above. We want to disable
BQL completely for devices that has dev->features & NETIF_F_LLTX or
dev->priv_flags & IFF_NO_QUEUE, right?

Maybe we can add a ->enabled field in struct dql, and set it according
to the features above. Then we can created the sysfs and process the dql
operations based on that field. This should avoid some unnecessary calls
also, if we are not display sysfs.

Here is a very simple PoC to represent what I had in mind. Am I in the
right direction?

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 407c2f281b64..a9d17597ea80 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -62,6 +62,7 @@ struct dql {
 	unsigned int	max_limit;		/* Max limit */
 	unsigned int	min_limit;		/* Minimum limit */
 	unsigned int	slack_hold_time;	/* Time to measure slack */
+	bool		enabled;		/* Queue is active */
 };
 
 /* Set some static maximums */
@@ -101,7 +102,7 @@ void dql_completed(struct dql *dql, unsigned int count);
 void dql_reset(struct dql *dql);
 
 /* Initialize dql state */
-void dql_init(struct dql *dql, unsigned int hold_time);
+void dql_init(struct net_device *dev, struct dql *dql, unsigned int hold_time);
 
 #endif /* _KERNEL_ */
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ef7bfbb98497..5c69bbf4267d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3541,6 +3541,9 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int bytes)
 {
 #ifdef CONFIG_BQL
+	if (!dev_queue->dql.enabled)
+		return
+
 	dql_queued(&dev_queue->dql, bytes);
 
 	if (likely(dql_avail(&dev_queue->dql) >= 0))
@@ -3573,7 +3576,8 @@ static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 {
 	if (xmit_more) {
 #ifdef CONFIG_BQL
-		dql_queued(&dev_queue->dql, bytes);
+		if (dev_queue->dql.enabled)
+			dql_queued(&dev_queue->dql, bytes);
 #endif
 		return netif_tx_queue_stopped(dev_queue);
 	}
@@ -3617,7 +3621,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
 					     unsigned int pkts, unsigned int bytes)
 {
 #ifdef CONFIG_BQL
-	if (unlikely(!bytes))
+	if (unlikely(!bytes) || !dev_queue->dql.enabled)
 		return;
 
 	dql_completed(&dev_queue->dql, bytes);
@@ -3656,6 +3660,9 @@ static inline void netdev_completed_queue(struct net_device *dev,
 static inline void netdev_tx_reset_queue(struct netdev_queue *q)
 {
 #ifdef CONFIG_BQL
+	if (!q->dql.enabled)
+		return;
+
 	clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state);
 	dql_reset(&q->dql);
 #endif
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index fde0aa244148..0a0a51f06c3b 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -10,6 +10,7 @@
 #include <linux/dynamic_queue_limits.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
+#include <linux/netdevice.h>
 
 #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
 #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
@@ -128,11 +129,21 @@ void dql_reset(struct dql *dql)
 }
 EXPORT_SYMBOL(dql_reset);
 
-void dql_init(struct dql *dql, unsigned int hold_time)
+static bool netdev_dql_supported(struct net_device *dev)
+{
+	if (dev->features & NETIF_F_LLTX ||
+	    dev->priv_flags & IFF_NO_QUEUE)
+		return false;
+
+	return true;
+}
+
+void dql_init(struct net_device *dev, struct dql *dql, unsigned int hold_time)
 {
 	dql->max_limit = DQL_MAX_LIMIT;
 	dql->min_limit = 0;
 	dql->slack_hold_time = hold_time;
 	dql_reset(dql);
+	dql->enabled = netdev_dql_supported(dev);
 }
 EXPORT_SYMBOL(dql_init);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9bb792cecc16..76aa70ee2c87 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10052,7 +10052,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
 #ifdef CONFIG_BQL
-	dql_init(&queue->dql, HZ);
+	dql_init(dev, &queue->dql, HZ);
 #endif
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a09d507c5b03..144ce4bb57bc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1709,9 +1709,11 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
 		goto err;
 
 #ifdef CONFIG_BQL
-	error = sysfs_create_group(kobj, &dql_group);
-	if (error)
-		goto err;
+	if (queue->dql.enabled) {
+		error = sysfs_create_group(kobj, &dql_group);
+		if (error)
+			goto err;
+	}
 #endif
 
 	kobject_uevent(kobj, KOBJ_ADD);

  reply	other threads:[~2024-02-14 14:45 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-02 16:53 [PATCH net-next v3] net: dqs: add NIC stall detector based on BQL Breno Leitao
2024-02-06 11:40 ` Paolo Abeni
2024-02-13 13:44   ` Breno Leitao
2024-02-13 13:57 ` Eric Dumazet
2024-02-13 18:04   ` Jakub Kicinski
2024-02-14 14:45     ` Breno Leitao [this message]
2024-02-14 15:41       ` Eric Dumazet
2024-02-14 16:49         ` Breno Leitao
2024-02-14 16:58           ` Eric Dumazet
2024-02-14 17:31             ` Breno Leitao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZczSEBFtq6E6APUJ@gmail.com \
    --to=leitao@debian.org \
    --cc=akpm@linux-foundation.org \
    --cc=bhelgaas@google.com \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=johannes.berg@intel.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=linux@weissschuh.net \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=rdunlap@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=weiwan@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).