[PATCH net v1] net: taprio offload: enforce qdisc to netdev queue mapping

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Yannick Vignon <yannick.vignon@oss.nxp.com>
To: Jamal Hadi Salim <jhs@mojatatu.com>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	Jiri Pirko <jiri@resnulli.us>,
	"David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	netdev@vger.kernel.org, Joakim Zhang <qiangqing.zhang@nxp.com>,
	sebastien.laveze@oss.nxp.com
Cc: Yannick Vignon <yannick.vignon@nxp.com>
Subject: [PATCH net v1] net: taprio offload: enforce qdisc to netdev queue mapping
Date: Tue, 11 May 2021 19:18:29 +0200	[thread overview]
Message-ID: <20210511171829.17181-1-yannick.vignon@oss.nxp.com> (raw)

From: Yannick Vignon <yannick.vignon@nxp.com>

Even though the taprio qdisc is designed for multiqueue devices, all the
queues still point to the same top-level taprio qdisc. This works and is
probably required for software taprio, but at least with offload taprio,
it has an undesirable side effect: because the whole qdisc is run when a
packet has to be sent, it allows packets in a best-effort class to be
processed in the context of a task sending higher priority traffic. If
there are packets left in the qdisc after that first run, the NET_TX
softirq is raised and gets executed immediately in the same process
context. As with any other softirq, it runs up to 10 times and for up to
2ms, during which the calling process is waiting for the sendmsg call (or
similar) to return. In my use case, that calling process is a real-time
task scheduled to send a packet every 2ms, so the long sendmsg calls are
leading to missed timeslots.

By attaching each netdev queue to its own qdisc, as it is done with
the "classic" mq qdisc, each traffic class can be processed independently
without touching the other classes. A high-priority process can then send
packets without getting stuck in the sendmsg call anymore.

Signed-off-by: Yannick Vignon <yannick.vignon@nxp.com>
---

This patch fixes an issue I observed while verifying the behavior of the
taprio qdisc in a real-time networking situation.
I am wondering if implementing separate taprio qdiscs for the software
and accelerated cases wouldn't be a better solution, but that would
require changes to the iproute2 package as well, and would break
backwards compatibility.

---
 net/sched/sch_taprio.c | 85 ++++++++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 5c91df52b8c2..0bfb03052429 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -438,6 +438,11 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	struct Qdisc *child;
 	int queue;
 
+	if (unlikely(FULL_OFFLOAD_IS_ENABLED(q->flags))) {
+		WARN_ONCE(1, "Trying to enqueue skb into the root of a taprio qdisc configured with full offload\n");
+		return qdisc_drop(skb, sch, to_free);
+	}
+
 	queue = skb_get_queue_mapping(skb);
 
 	child = q->qdiscs[queue];
@@ -529,23 +534,7 @@ static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
 
 static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
 {
-	struct taprio_sched *q = qdisc_priv(sch);
-	struct net_device *dev = qdisc_dev(sch);
-	struct sk_buff *skb;
-	int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct Qdisc *child = q->qdiscs[i];
-
-		if (unlikely(!child))
-			continue;
-
-		skb = child->ops->peek(child);
-		if (!skb)
-			continue;
-
-		return skb;
-	}
+	WARN_ONCE(1, "Trying to peek into the root of a taprio qdisc configured with full offload\n");
 
 	return NULL;
 }
@@ -654,27 +643,7 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
 
 static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
 {
-	struct taprio_sched *q = qdisc_priv(sch);
-	struct net_device *dev = qdisc_dev(sch);
-	struct sk_buff *skb;
-	int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct Qdisc *child = q->qdiscs[i];
-
-		if (unlikely(!child))
-			continue;
-
-		skb = child->ops->dequeue(child);
-		if (unlikely(!skb))
-			continue;
-
-		qdisc_bstats_update(sch, skb);
-		qdisc_qstats_backlog_dec(sch, skb);
-		sch->q.qlen--;
-
-		return skb;
-	}
+	WARN_ONCE(1, "Trying to dequeue from the root of a taprio qdisc configured with full offload\n");
 
 	return NULL;
 }
@@ -1759,6 +1728,37 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
 	return taprio_change(sch, opt, extack);
 }
 
+static void taprio_attach(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned int ntx;
+
+	/* Attach underlying qdisc */
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		struct Qdisc *qdisc = q->qdiscs[ntx];
+		struct Qdisc *old;
+
+		if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+			qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+			old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+			if (ntx < dev->real_num_tx_queues)
+				qdisc_hash_add(qdisc, false);
+		} else {
+			old = dev_graft_qdisc(qdisc->dev_queue, sch);
+			qdisc_refcount_inc(sch);
+		}
+		if (old)
+			qdisc_put(old);
+	}
+
+	/* access to the child qdiscs is not needed in offload mode */
+	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+		kfree(q->qdiscs);
+		q->qdiscs = NULL;
+	}
+}
+
 static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
 					     unsigned long cl)
 {
@@ -1785,8 +1785,12 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl,
 	if (dev->flags & IFF_UP)
 		dev_deactivate(dev);
 
-	*old = q->qdiscs[cl - 1];
-	q->qdiscs[cl - 1] = new;
+	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+		*old = dev_graft_qdisc(dev_queue, new);
+	} else {
+		*old = q->qdiscs[cl - 1];
+		q->qdiscs[cl - 1] = new;
+	}
 
 	if (new)
 		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
@@ -2020,6 +2024,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
 	.change		= taprio_change,
 	.destroy	= taprio_destroy,
 	.reset		= taprio_reset,
+	.attach		= taprio_attach,
 	.peek		= taprio_peek,
 	.dequeue	= taprio_dequeue,
 	.enqueue	= taprio_enqueue,
-- 
2.17.1

next             reply	other threads:[~2021-05-11 17:19 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-11 17:18 Yannick Vignon [this message]
2021-05-14 15:32 ` [PATCH net v1] net: taprio offload: enforce qdisc to netdev queue mapping Jakub Kicinski
2021-05-14 20:40   ` Vinicius Costa Gomes
2021-05-14 21:01     ` Jakub Kicinski
2021-05-14 23:47       ` Vinicius Costa Gomes
2021-05-17 17:06         ` Yannick Vignon
2021-05-17 17:34           ` Michael Walle
2021-05-17 21:38             ` Vinicius Costa Gomes
2021-05-17 22:42           ` Vinicius Costa Gomes
2021-05-18 14:34             ` Yannick Vignon
2021-07-09  9:41               ` Yannick Vignon

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:5c91df52b8c dfblob:0bfb0305242 )
 OR (
bs:"[PATCH net v1] net: taprio offload: enforce qdisc to netdev queue mapping" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210511171829.17181-1-yannick.vignon@oss.nxp.com \
    --to=yannick.vignon@oss.nxp.com \
    --cc=davem@davemloft.net \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=qiangqing.zhang@nxp.com \
    --cc=sebastien.laveze@oss.nxp.com \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yannick.vignon@nxp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).