From: Patrick McHardy <kaber@trash.net>
To: netdev@vger.kernel.org
Cc: Patrick McHardy <kaber@trash.net>
Subject: net_sched 07/07: add classful multiqueue dummy scheduler
Date: Fri, 4 Sep 2009 18:41:21 +0200 (MEST) [thread overview]
Message-ID: <20090904164119.27300.23137.sendpatchset@x2.localnet> (raw)
In-Reply-To: <20090904164111.27300.29929.sendpatchset@x2.localnet>
commit f114d0f02c9e72fea7bbc4d28a113946183fc65f
Author: Patrick McHardy <kaber@trash.net>
Date: Fri Sep 4 18:25:04 2009 +0200
net_sched: add classful multiqueue dummy scheduler
This patch adds a classful dummy scheduler which can be used as root qdisc
for multiqueue devices and exposes each device queue as a child class.
This allows to address queues individually and graft them similar to regular
classes. Additionally it presents an accumulated view of the statistics of
all real root qdiscs in the dummy root.
Two new callbacks are added to the qdisc_ops and qdisc_class_ops:
- cl_ops->select_queue selects the tx queue number for new child classes.
- qdisc_ops->attach() overrides root qdisc device grafting to attach
non-shared qdiscs to the queues.
Signed-off-by: Patrick McHardy <kaber@trash.net>
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a92dc62..9c69585 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -80,6 +80,7 @@ struct Qdisc
struct Qdisc_class_ops
{
/* Child qdisc manipulation */
+ unsigned int (*select_queue)(struct Qdisc *, struct tcmsg *);
int (*graft)(struct Qdisc *, unsigned long cl,
struct Qdisc *, struct Qdisc **);
struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl);
@@ -122,6 +123,7 @@ struct Qdisc_ops
void (*reset)(struct Qdisc *);
void (*destroy)(struct Qdisc *);
int (*change)(struct Qdisc *, struct nlattr *arg);
+ void (*attach)(struct Qdisc *);
int (*dump)(struct Qdisc *, struct sk_buff *);
int (*dump_stats)(struct Qdisc *, struct gnet_dump *);
@@ -255,6 +257,8 @@ static inline void sch_tree_unlock(struct Qdisc *q)
extern struct Qdisc noop_qdisc;
extern struct Qdisc_ops noop_qdisc_ops;
+extern struct Qdisc_ops pfifo_fast_ops;
+extern struct Qdisc_ops mq_qdisc_ops;
struct Qdisc_class_common
{
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 54d950c..f14e71b 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -2,7 +2,7 @@
# Makefile for the Linux Traffic Control Unit.
#
-obj-y := sch_generic.o
+obj-y := sch_generic.o sch_mq.o
obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o
obj-$(CONFIG_NET_CLS) += cls_api.o
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d71f12b..2a78d54 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -678,6 +678,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (dev->flags & IFF_UP)
dev_deactivate(dev);
+ if (new && new->ops->attach) {
+ new->ops->attach(new);
+ num_q = 0;
+ }
+
for (i = 0; i < num_q; i++) {
struct netdev_queue *dev_queue = &dev->rx_queue;
@@ -692,7 +697,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
}
notify_and_destroy(skb, n, classid, dev->qdisc, new);
- if (new)
+ if (new && !new->ops->attach)
atomic_inc(&new->refcnt);
dev->qdisc = new ? : &noop_qdisc;
@@ -1095,10 +1100,16 @@ create_n_graft:
q = qdisc_create(dev, &dev->rx_queue,
tcm->tcm_parent, tcm->tcm_parent,
tca, &err);
- else
- q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
+ else {
+ unsigned int ntx = 0;
+
+ if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
+ ntx = p->ops->cl_ops->select_queue(p, tcm);
+
+ q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx),
tcm->tcm_parent, tcm->tcm_handle,
tca, &err);
+ }
if (q == NULL) {
if (err == -EAGAIN)
goto replay;
@@ -1674,6 +1685,7 @@ static int __init pktsched_init(void)
{
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
+ register_qdisc(&mq_qdisc_ops);
proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e7c47ce..4ae6aa5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -514,7 +514,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
return 0;
}
-static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
+struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.id = "pfifo_fast",
.priv_size = sizeof(struct pfifo_fast_priv),
.enqueue = pfifo_fast_enqueue,
@@ -670,6 +670,26 @@ static void attach_one_default_qdisc(struct net_device *dev,
dev_queue->qdisc_sleeping = qdisc;
}
+static void attach_default_qdiscs(struct net_device *dev)
+{
+ struct netdev_queue *txq;
+ struct Qdisc *qdisc;
+
+ txq = netdev_get_tx_queue(dev, 0);
+
+ if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) {
+ netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+ dev->qdisc = txq->qdisc_sleeping;
+ atomic_inc(&dev->qdisc->refcnt);
+ } else {
+ qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT);
+ if (qdisc) {
+ qdisc->ops->attach(qdisc);
+ dev->qdisc = qdisc;
+ }
+ }
+}
+
static void transition_one_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_need_watchdog)
@@ -689,7 +709,6 @@ static void transition_one_qdisc(struct net_device *dev,
void dev_activate(struct net_device *dev)
{
- struct netdev_queue *txq;
int need_watchdog;
/* No queueing discipline is attached to device;
@@ -698,13 +717,8 @@ void dev_activate(struct net_device *dev)
virtual interfaces
*/
- if (dev->qdisc == &noop_qdisc) {
- netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
-
- txq = netdev_get_tx_queue(dev, 0);
- dev->qdisc = txq->qdisc_sleeping;
- atomic_inc(&dev->qdisc->refcnt);
- }
+ if (dev->qdisc == &noop_qdisc)
+ attach_default_qdiscs(dev);
if (!netif_carrier_ok(dev))
/* Delay activation until next carrier-on event */
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
new file mode 100644
index 0000000..5e453fd
--- /dev/null
+++ b/net/sched/sch_mq.c
@@ -0,0 +1,234 @@
+/*
+ * net/sched/sch_mq.c Classful multiqueue dummy scheduler
+ *
+ * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+struct mq_sched {
+ struct Qdisc **qdiscs;
+};
+
+static void mq_destroy(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct mq_sched *priv = qdisc_priv(sch);
+ unsigned int ntx;
+
+ if (priv->qdiscs)
+ return;
+ for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
+ qdisc_destroy(priv->qdiscs[ntx]);
+ kfree(priv->qdiscs);
+}
+
+static int mq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct mq_sched *priv = qdisc_priv(sch);
+ struct netdev_queue *dev_queue;
+ struct Qdisc *qdisc;
+ unsigned int ntx;
+
+ if (sch->parent != TC_H_ROOT)
+ return -EOPNOTSUPP;
+
+ if (!netif_is_multiqueue(dev))
+ return -EOPNOTSUPP;
+
+ /* pre-allocate qdiscs, attachment can't fail */
+ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+ GFP_KERNEL);
+ if (priv->qdiscs == NULL)
+ return -ENOMEM;
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ dev_queue = netdev_get_tx_queue(dev, ntx);
+ qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops,
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(ntx + 1)));
+ if (qdisc == NULL)
+ goto err;
+ qdisc->flags |= TCQ_F_CAN_BYPASS;
+ priv->qdiscs[ntx] = qdisc;
+ }
+
+ return 0;
+
+err:
+ mq_destroy(sch);
+ return -ENOMEM;
+}
+
+static void mq_attach(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct mq_sched *priv = qdisc_priv(sch);
+ struct Qdisc *qdisc;
+ unsigned int ntx;
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ qdisc = priv->qdiscs[ntx];
+ qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ if (qdisc)
+ qdisc_destroy(qdisc);
+ }
+ kfree(priv->qdiscs);
+ priv->qdiscs = NULL;
+}
+
+static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct Qdisc *qdisc;
+ unsigned int ntx;
+
+ sch->q.qlen = 0;
+ memset(&sch->bstats, 0, sizeof(sch->bstats));
+ memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ spin_lock_bh(qdisc_lock(qdisc));
+ sch->q.qlen += qdisc->q.qlen;
+ sch->bstats.bytes += qdisc->bstats.bytes;
+ sch->bstats.packets += qdisc->bstats.packets;
+ sch->qstats.qlen += qdisc->qstats.qlen;
+ sch->qstats.backlog += qdisc->qstats.backlog;
+ sch->qstats.drops += qdisc->qstats.drops;
+ sch->qstats.requeues += qdisc->qstats.requeues;
+ sch->qstats.overlimits += qdisc->qstats.overlimits;
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+ return 0;
+}
+
+static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned long ntx = cl - 1;
+
+ if (ntx >= dev->num_tx_queues)
+ return NULL;
+ return netdev_get_tx_queue(dev, ntx);
+}
+
+static unsigned int mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
+{
+ unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
+
+ if (!mq_queue_get(sch, ntx))
+ return 0;
+ return ntx - 1;
+}
+
+static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+ struct Qdisc **old)
+{
+ struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+ struct net_device *dev = qdisc_dev(sch);
+
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+
+ *old = dev_graft_qdisc(dev_queue, new);
+
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+ return 0;
+}
+
+static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
+{
+ struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+
+ return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mq_get(struct Qdisc *sch, u32 classid)
+{
+ unsigned int ntx = TC_H_MIN(classid);
+
+ if (!mq_queue_get(sch, ntx))
+ return 0;
+ return ntx;
+}
+
+static void mq_put(struct Qdisc *sch, unsigned long cl)
+{
+ return;
+}
+
+static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+
+ tcm->tcm_parent = TC_H_ROOT;
+ tcm->tcm_handle |= TC_H_MIN(cl);
+ tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ return 0;
+}
+
+static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
+{
+ struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+ if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+ gnet_stats_copy_queue(d, &sch->qstats) < 0)
+ return -1;
+ return 0;
+}
+
+static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned int ntx;
+
+ if (arg->stop)
+ return;
+
+ arg->count = arg->skip;
+ for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
+ if (arg->fn(sch, ntx + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static const struct Qdisc_class_ops mq_class_ops = {
+ .select_queue = mq_select_queue,
+ .graft = mq_graft,
+ .leaf = mq_leaf,
+ .get = mq_get,
+ .put = mq_put,
+ .walk = mq_walk,
+ .dump = mq_dump_class,
+ .dump_stats = mq_dump_class_stats,
+};
+
+struct Qdisc_ops mq_qdisc_ops __read_mostly = {
+ .cl_ops = &mq_class_ops,
+ .id = "mq",
+ .priv_size = sizeof(struct mq_sched),
+ .init = mq_init,
+ .destroy = mq_destroy,
+ .attach = mq_attach,
+ .dump = mq_dump,
+ .owner = THIS_MODULE,
+};
next prev parent reply other threads:[~2009-09-04 16:41 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-04 16:41 net_sched 00/07: classful multiqueue dummy scheduler Patrick McHardy
2009-09-04 16:41 ` net_sched 01/07: fix class grafting errno codes Patrick McHardy
2009-09-04 16:41 ` net_sched 02/07: make cls_ops->tcf_chain() optional Patrick McHardy
2009-09-05 8:13 ` Jarek Poplawski
2009-09-05 11:57 ` Jarek Poplawski
2009-09-05 12:32 ` Jarek Poplawski
2009-09-05 17:03 ` Patrick McHardy
2009-09-06 9:06 ` David Miller
2009-09-04 16:41 ` net_sched 03/07: make cls_ops->change and cls_ops->delete optional Patrick McHardy
2009-09-04 16:41 ` net_sched 04/07: remove some unnecessary checks in classful schedulers Patrick McHardy
2009-09-04 16:41 ` net_sched 05/07: reintroduce dev->qdisc for use by sch_api Patrick McHardy
2009-09-06 18:57 ` Jarek Poplawski
2009-09-07 13:16 ` Patrick McHardy
2009-09-07 16:49 ` Jarek Poplawski
2009-09-04 16:41 ` net_sched 06/07: move dev_graft_qdisc() to sch_generic.c Patrick McHardy
2009-09-04 16:41 ` Patrick McHardy [this message]
2009-09-06 20:04 ` net_sched 07/07: add classful multiqueue dummy scheduler Jarek Poplawski
2009-09-07 13:27 ` Patrick McHardy
2009-09-07 18:22 ` Jarek Poplawski
2009-09-07 19:24 ` Jarek Poplawski
2009-09-07 19:49 ` Eric Dumazet
2009-09-09 16:02 ` Patrick McHardy
2009-09-09 19:52 ` Jarek Poplawski
2009-09-10 11:28 ` Patrick McHardy
2009-09-11 21:38 ` Jarek Poplawski
2009-09-11 22:10 ` David Miller
2009-09-11 22:21 ` Jarek Poplawski
2009-09-11 22:27 ` David Miller
2009-09-09 16:01 ` Patrick McHardy
2009-09-04 16:42 ` net_sched 00/07: " Patrick McHardy
2009-09-07 8:50 ` David Miller
2009-09-07 9:46 ` Jarek Poplawski
2009-09-07 13:00 ` Eric Dumazet
2009-09-07 13:29 ` Patrick McHardy
2009-09-07 14:23 ` Patrick McHardy
2009-09-07 17:21 ` Eric Dumazet
2009-09-07 17:28 ` Patrick McHardy
2009-09-07 17:30 ` Eric Dumazet
2009-09-07 17:33 ` Patrick McHardy
2009-09-07 17:38 ` Eric Dumazet
2009-09-07 17:46 ` Patrick McHardy
2009-09-08 9:31 ` David Miller
2009-09-08 15:53 ` Patrick McHardy
2009-09-05 7:27 ` David Miller
2009-09-05 17:02 ` Patrick McHardy
2009-09-06 9:01 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090904164119.27300.23137.sendpatchset@x2.localnet \
--to=kaber@trash.net \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox