From: John Fastabend <john.fastabend@gmail.com>
To: willemdebruijn.kernel@gmail.com, daniel@iogearbox.net,
eric.dumazet@gmail.com, davem@davemloft.net
Cc: netdev@vger.kernel.org, jiri@resnulli.us, xiyou.wangcong@gmail.com
Subject: [net-next PATCH 14/14] net: sched: pfifo_fast use skb_array
Date: Thu, 07 Dec 2017 09:58:19 -0800 [thread overview]
Message-ID: <20171207175819.5771.31262.stgit@john-Precision-Tower-5810> (raw)
In-Reply-To: <20171207173500.5771.41198.stgit@john-Precision-Tower-5810>
This converts the pfifo_fast qdisc to use the skb_array data structure
and set the lockless qdisc bit. pfifo_fast is the first qdisc to support
the lockless bit that can be a child of a qdisc requiring locking. So
we add logic to clear the lock bit on initialization in these cases when
the qdisc graft operation occurs.
This also removes the logic used to pick the next band to dequeue from
and instead just checks a per priority array for packets from top priority
to lowest. This might need to be a bit more clever but seems to work
for now.
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
net/sched/sch_api.c | 5 ++
net/sched/sch_generic.c | 140 +++++++++++++++++++++++++++++------------------
2 files changed, 92 insertions(+), 53 deletions(-)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 5cb64d2..81f61f5 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -955,6 +955,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
} else {
const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+ /* Only support running class lockless if parent is lockless */
+ if (new && (new->flags & TCQ_F_NOLOCK) &&
+ parent && !(parent->flags & TCQ_F_NOLOCK))
+ new->flags &= ~TCQ_F_NOLOCK;
+
err = -EOPNOTSUPP;
if (cops && cops->graft) {
unsigned long cl = cops->find(parent, classid);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5ff93c2..ff6a5ac 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
+#include <linux/skb_array.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
@@ -578,93 +579,93 @@ struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
/*
* Private data for a pfifo_fast scheduler containing:
- * - queues for the three band
- * - bitmap indicating which of the bands contain skbs
+ * - rings for priority bands
*/
struct pfifo_fast_priv {
- u32 bitmap;
- struct qdisc_skb_head q[PFIFO_FAST_BANDS];
+ struct skb_array q[PFIFO_FAST_BANDS];
};
-/*
- * Convert a bitmap to the first band number where an skb is queued, where:
- * bitmap=0 means there are no skbs on any band.
- * bitmap=1 means there is an skb on band 0.
- * bitmap=7 means there are skbs on all 3 bands, etc.
- */
-static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
-
-static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
- int band)
+static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
+ int band)
{
- return priv->q + band;
+ return &priv->q[band];
}
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
- if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) {
- int band = prio2band[skb->priority & TC_PRIO_MAX];
- struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- struct qdisc_skb_head *list = band2list(priv, band);
-
- priv->bitmap |= (1 << band);
- qdisc->q.qlen++;
- return __qdisc_enqueue_tail(skb, qdisc, list);
- }
+ int band = prio2band[skb->priority & TC_PRIO_MAX];
+ struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
+ struct skb_array *q = band2list(priv, band);
+ int err;
- return qdisc_drop(skb, qdisc, to_free);
+ err = skb_array_produce(q, skb);
+
+ if (unlikely(err))
+ return qdisc_drop_cpu(skb, qdisc, to_free);
+
+ qdisc_qstats_cpu_qlen_inc(qdisc);
+ qdisc_qstats_cpu_backlog_inc(qdisc, skb);
+ return NET_XMIT_SUCCESS;
}
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- int band = bitmap2band[priv->bitmap];
-
- if (likely(band >= 0)) {
- struct qdisc_skb_head *qh = band2list(priv, band);
- struct sk_buff *skb = __qdisc_dequeue_head(qh);
+ struct sk_buff *skb = NULL;
+ int band;
- if (likely(skb != NULL)) {
- qdisc_qstats_backlog_dec(qdisc, skb);
- qdisc_bstats_update(qdisc, skb);
- }
+ for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
+ struct skb_array *q = band2list(priv, band);
- qdisc->q.qlen--;
- if (qh->qlen == 0)
- priv->bitmap &= ~(1 << band);
+ if (__skb_array_empty(q))
+ continue;
- return skb;
+ skb = skb_array_consume_bh(q);
+ }
+ if (likely(skb)) {
+ qdisc_qstats_cpu_backlog_dec(qdisc, skb);
+ qdisc_bstats_cpu_update(qdisc, skb);
+ qdisc_qstats_cpu_qlen_dec(qdisc);
}
- return NULL;
+ return skb;
}
static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- int band = bitmap2band[priv->bitmap];
+ struct sk_buff *skb = NULL;
+ int band;
- if (band >= 0) {
- struct qdisc_skb_head *qh = band2list(priv, band);
+ for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
+ struct skb_array *q = band2list(priv, band);
- return qh->head;
+ skb = __skb_array_peek(q);
}
- return NULL;
+ return skb;
}
static void pfifo_fast_reset(struct Qdisc *qdisc)
{
- int prio;
+ int i, band;
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __qdisc_reset_queue(band2list(priv, prio));
+ for (band = 0; band < PFIFO_FAST_BANDS; band++) {
+ struct skb_array *q = band2list(priv, band);
+ struct sk_buff *skb;
- priv->bitmap = 0;
- qdisc->qstats.backlog = 0;
- qdisc->q.qlen = 0;
+ while ((skb = skb_array_consume_bh(q)) != NULL)
+ kfree_skb(skb);
+ }
+
+ for_each_possible_cpu(i) {
+ struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
+
+ q->backlog = 0;
+ q->qlen = 0;
+ }
}
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
@@ -682,17 +683,48 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
- int prio;
+ unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
+ int prio;
+
+ /* guard against zero length rings */
+ if (!qlen)
+ return -EINVAL;
- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- qdisc_skb_head_init(band2list(priv, prio));
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+ struct skb_array *q = band2list(priv, prio);
+ int err;
+
+ err = skb_array_init(q, qlen, GFP_KERNEL);
+ if (err)
+ return -ENOMEM;
+ }
/* Can by-pass the queue discipline */
qdisc->flags |= TCQ_F_CAN_BYPASS;
return 0;
}
+static void pfifo_fast_destroy(struct Qdisc *sch)
+{
+ struct pfifo_fast_priv *priv = qdisc_priv(sch);
+ int prio;
+
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+ struct skb_array *q = band2list(priv, prio);
+
+ /* NULL ring is possible if destroy path is due to a failed
+ * skb_array_init() in pfifo_fast_init() case.
+ */
+ if (!&q->ring.queue)
+ continue;
+ /* Destroy ring but no need to kfree_skb because a call to
+ * pfifo_fast_reset() has already done that work.
+ */
+ ptr_ring_cleanup(&q->ring, NULL);
+ }
+}
+
struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.id = "pfifo_fast",
.priv_size = sizeof(struct pfifo_fast_priv),
@@ -700,9 +732,11 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.dequeue = pfifo_fast_dequeue,
.peek = pfifo_fast_peek,
.init = pfifo_fast_init,
+ .destroy = pfifo_fast_destroy,
.reset = pfifo_fast_reset,
.dump = pfifo_fast_dump,
.owner = THIS_MODULE,
+ .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
};
EXPORT_SYMBOL(pfifo_fast_ops);
next prev parent reply other threads:[~2017-12-07 17:58 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-07 17:53 [net-next PATCH 00/14] lockless qdisc series John Fastabend
2017-12-07 17:54 ` [net-next PATCH 01/14] net: sched: cleanup qdisc_run and __qdisc_run semantics John Fastabend
2017-12-07 17:54 ` [net-next PATCH 02/14] net: sched: allow qdiscs to handle locking John Fastabend
2017-12-07 17:54 ` [net-next PATCH 03/14] net: sched: remove remaining uses for qdisc_qlen in xmit path John Fastabend
2017-12-07 17:55 ` [net-next PATCH 04/14] net: sched: provide per cpu qstat helpers John Fastabend
2017-12-07 17:55 ` [net-next PATCH 05/14] net: sched: a dflt qdisc may be used with per cpu stats John Fastabend
2017-12-07 17:55 ` [net-next PATCH 06/14] net: sched: explicit locking in gso_cpu fallback John Fastabend
2017-12-07 17:56 ` [net-next PATCH 07/14] net: sched: drop qdisc_reset from dev_graft_qdisc John Fastabend
2017-12-07 17:56 ` [net-next PATCH 08/14] net: sched: use skb list for skb_bad_tx John Fastabend
2017-12-07 17:56 ` [net-next PATCH 09/14] net: sched: check for frozen queue before skb_bad_txq check John Fastabend
2017-12-07 17:57 ` [net-next PATCH 10/14] net: sched: helpers to sum qlen and qlen for per cpu logic John Fastabend
2017-12-07 17:57 ` [net-next PATCH 11/14] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mq John Fastabend
2017-12-07 17:57 ` [net-next PATCH 12/14] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio John Fastabend
2017-12-07 17:57 ` [net-next PATCH 13/14] net: skb_array: expose peek API John Fastabend
2017-12-07 17:58 ` John Fastabend [this message]
2017-12-12 22:06 ` [net-next PATCH 14/14] net: sched: pfifo_fast use skb_array Cong Wang
2017-12-07 18:09 ` [net-next PATCH 00/14] lockless qdisc series David Miller
2017-12-08 18:50 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171207175819.5771.31262.stgit@john-Precision-Tower-5810 \
--to=john.fastabend@gmail.com \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=jiri@resnulli.us \
--cc=netdev@vger.kernel.org \
--cc=willemdebruijn.kernel@gmail.com \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox