From: PJ Waskiewicz <peter.p.waskiewicz.jr@intel.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, jeff@garzik.org,
auke-jan.h.kok@intel.com, hadi@cyberus.ca, kaber@trash.net
Subject: [PATCH 3/3] NET: [SCHED] Qdisc changes and sch_rr added for multiqueue
Date: Thu, 28 Jun 2007 09:21:18 -0700 [thread overview]
Message-ID: <20070628162118.18728.64391.stgit@localhost.localdomain> (raw)
In-Reply-To: <20070628162056.18728.20195.stgit@localhost.localdomain>
Updated: Cleaned up Kconfig options for multiqueue. Cleaned up
sch_rr and sch_prio multiqueue handling. Added nested compat netlink
options for new options. Allowing a 0 band option for prio and rr when
in multiqueue mode so it defaults to the number of queues on the NIC.
Add the new sch_rr qdisc for multiqueue network device support.
Allow sch_prio and sch_rr to be compiled with or without multiqueue
hardware
support.
sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS. This
was done since sch_prio and sch_rr only differ in their dequeue routine.
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
---
include/linux/pkt_sched.h | 9 +++
net/sched/Kconfig | 23 +++++++
net/sched/sch_prio.c | 147 +++++++++++++++++++++++++++++++++++++++++----
3 files changed, 166 insertions(+), 13 deletions(-)
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d10f353..268c515 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -101,6 +101,15 @@ struct tc_prio_qopt
__u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
};
+enum
+{
+ TCA_PRIO_UNSPEC,
+ TCA_PRIO_MQ,
+ __TCA_PRIO_MAX
+};
+
+#define TCA_PRIO_MAX (__TCA_PRIO_MAX - 1)
+
/* TBF section */
struct tc_tbf_qopt
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 475df84..65ee9e7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -111,6 +111,29 @@ config NET_SCH_PRIO
To compile this code as a module, choose M here: the
module will be called sch_prio.
+config NET_SCH_RR
+ tristate "Multi Band Round Robin Queuing (RR)"
+ select NET_SCH_PRIO
+ ---help---
+ Say Y here if you want to use an n-band round robin packet
+ scheduler.
+
+ The module uses sch_prio for its framework and is aliased as
+ sch_rr, so it will load sch_prio, although it is referred
+ to using sch_rr.
+
+config NET_SCH_MULTIQUEUE
+ bool "Multiple hardware queue support"
+ ---help---
+ Say Y here if you want to allow supported qdiscs to assign flows to
+ multiple hardware queues on an ethernet device. This will
+ still work on devices with 1 queue.
+
+ Current qdiscs supporting this feature are NET_SCH_PRIO and
+ NET_SCH_RR.
+
+ Most people will say N here.
+
config NET_SCH_RED
tristate "Random Early Detection (RED)"
---help---
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 6d7542c..2ceba92 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -40,9 +40,13 @@
struct prio_sched_data
{
int bands;
+ int curband; /* for round-robin */
struct tcf_proto *filter_list;
u8 prio2band[TC_PRIO_MAX+1];
struct Qdisc *queues[TCQ_PRIO_BANDS];
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ unsigned char mq;
+#endif
};
@@ -70,14 +74,34 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
#endif
if (TC_H_MAJ(band))
band = 0;
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ if (q->mq)
+ skb->queue_mapping =
+ q->prio2band[band&TC_PRIO_MAX];
+ else
+ skb->queue_mapping = 0;
+#endif
return q->queues[q->prio2band[band&TC_PRIO_MAX]];
}
band = res.classid;
}
band = TC_H_MIN(band) - 1;
- if (band >= q->bands)
+ if (band >= q->bands) {
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ if (q->mq)
+ skb->queue_mapping = q->prio2band[0];
+ else
+ skb->queue_mapping = 0;
+#endif
return q->queues[q->prio2band[0]];
+ }
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ if (q->mq)
+ skb->queue_mapping = band;
+ else
+ skb->queue_mapping = 0;
+#endif
return q->queues[band];
}
@@ -144,17 +168,65 @@ prio_dequeue(struct Qdisc* sch)
struct Qdisc *qdisc;
for (prio = 0; prio < q->bands; prio++) {
- qdisc = q->queues[prio];
- skb = qdisc->dequeue(qdisc);
- if (skb) {
- sch->q.qlen--;
- return skb;
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ /* Check if the target subqueue is available before
+ * pulling an skb. This way we avoid excessive requeues
+ * for slower queues.
+ */
+ if (!netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) {
+#endif
+ qdisc = q->queues[prio];
+ skb = qdisc->dequeue(qdisc);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
+ }
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
}
+#endif
}
return NULL;
}
+static struct sk_buff *rr_dequeue(struct Qdisc* sch)
+{
+ struct sk_buff *skb;
+ struct prio_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *qdisc;
+ int bandcount;
+
+ /* Only take one pass through the queues. If nothing is available,
+ * return nothing.
+ */
+ for (bandcount = 0; bandcount < q->bands; bandcount++) {
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ /* Check if the target subqueue is available before
+ * pulling an skb. This way we avoid excessive requeues
+ * for slower queues. If the queue is stopped, try the
+ * next queue.
+ */
+ if (!netif_subqueue_stopped(sch->dev, (q->mq ? q->curband : 0))) {
+#endif
+ qdisc = q->queues[q->curband];
+ skb = qdisc->dequeue(qdisc);
+ if (skb) {
+ sch->q.qlen--;
+ q->curband++;
+ if (q->curband >= q->bands)
+ q->curband = 0;
+ return skb;
+ }
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ }
+#endif
+ q->curband++;
+ if (q->curband >= q->bands)
+ q->curband = 0;
+ }
+ return NULL;
+}
+
static unsigned int prio_drop(struct Qdisc* sch)
{
struct prio_sched_data *q = qdisc_priv(sch);
@@ -198,21 +270,39 @@ prio_destroy(struct Qdisc* sch)
static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
{
struct prio_sched_data *q = qdisc_priv(sch);
- struct tc_prio_qopt *qopt = RTA_DATA(opt);
+ struct tc_prio_qopt *qopt;
+ struct rtattr *tb[TCA_PRIO_MAX];
int i;
- if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
+ if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt,
+ sizeof(*qopt)))
return -EINVAL;
- if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
+ q->bands = qopt->bands;
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ /* If we're multiqueue, make sure the number of incoming bands
+ * matches the number of queues on the device we're associating with.
+ * If the number of bands requested is zero, then set q->bands to
+ * dev->egress_subqueue_count.
+ */
+ q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]);
+
+ if (q->mq) {
+ if (q->bands == 0)
+ q->bands = sch->dev->egress_subqueue_count;
+ else if (q->bands != sch->dev->egress_subqueue_count)
+ return -EINVAL;
+ }
+#endif
+
+ if (q->bands > TCQ_PRIO_BANDS || q->bands < 2)
return -EINVAL;
for (i=0; i<=TC_PRIO_MAX; i++) {
- if (qopt->priomap[i] >= qopt->bands)
+ if (qopt->priomap[i] >= q->bands)
return -EINVAL;
}
sch_tree_lock(sch);
- q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
@@ -268,11 +358,19 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct prio_sched_data *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
+ struct rtattr *nest;
struct tc_prio_qopt opt;
opt.bands = q->bands;
memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
- RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+ nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+#ifdef CONFIG_NET_SCH_MULTIQUEUE
+ if (q->mq)
+ RTA_PUT_FLAG(skb, TCA_PRIO_MQ);
+#endif
+ RTA_NEST_COMPAT_END(skb, nest);
+
return skb->len;
rtattr_failure:
@@ -443,17 +541,40 @@ static struct Qdisc_ops prio_qdisc_ops = {
.owner = THIS_MODULE,
};
+static struct Qdisc_ops rr_qdisc_ops = {
+ .next = NULL,
+ .cl_ops = &prio_class_ops,
+ .id = "rr",
+ .priv_size = sizeof(struct prio_sched_data),
+ .enqueue = prio_enqueue,
+ .dequeue = rr_dequeue,
+ .requeue = prio_requeue,
+ .drop = prio_drop,
+ .init = prio_init,
+ .reset = prio_reset,
+ .destroy = prio_destroy,
+ .change = prio_tune,
+ .dump = prio_dump,
+ .owner = THIS_MODULE,
+};
+
static int __init prio_module_init(void)
{
- return register_qdisc(&prio_qdisc_ops);
+ int err;
+ err = register_qdisc(&prio_qdisc_ops);
+ if (!err)
+ err = register_qdisc(&rr_qdisc_ops);
+ return err;
}
static void __exit prio_module_exit(void)
{
unregister_qdisc(&prio_qdisc_ops);
+ unregister_qdisc(&rr_qdisc_ops);
}
module_init(prio_module_init)
module_exit(prio_module_exit)
MODULE_LICENSE("GPL");
+MODULE_ALIAS("sch_rr");
next prev parent reply other threads:[~2007-06-28 16:21 UTC|newest]
Thread overview: 78+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-06-28 16:20 [PATCH] NET: Multiple queue hardware support PJ Waskiewicz
2007-06-28 16:21 ` [PATCH 1/3] NET: [DOC] Multiqueue hardware support documentation PJ Waskiewicz
2007-06-28 16:21 ` [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API PJ Waskiewicz
2007-06-28 16:31 ` Patrick McHardy
2007-06-28 17:00 ` Patrick McHardy
2007-06-28 19:00 ` Waskiewicz Jr, Peter P
2007-06-28 19:03 ` Patrick McHardy
2007-06-28 19:06 ` Waskiewicz Jr, Peter P
2007-06-28 19:20 ` Patrick McHardy
2007-06-28 19:32 ` Jeff Garzik
2007-06-28 19:37 ` Patrick McHardy
2007-06-28 21:11 ` Waskiewicz Jr, Peter P
2007-06-28 21:18 ` Patrick McHardy
2007-06-28 23:08 ` Waskiewicz Jr, Peter P
2007-06-28 23:31 ` David Miller
2007-06-28 20:39 ` David Miller
2007-06-29 3:39 ` David Miller
2007-06-29 10:54 ` Jeff Garzik
2007-06-28 16:21 ` PJ Waskiewicz [this message]
2007-06-28 16:35 ` [PATCH 3/3] NET: [SCHED] Qdisc changes and sch_rr added for multiqueue Patrick McHardy
2007-06-28 16:43 ` Waskiewicz Jr, Peter P
2007-06-28 16:46 ` Patrick McHardy
2007-06-28 16:50 ` Waskiewicz Jr, Peter P
2007-06-28 16:53 ` Patrick McHardy
2007-06-28 16:50 ` Patrick McHardy
2007-06-28 17:13 ` Patrick McHardy
2007-06-28 19:04 ` Waskiewicz Jr, Peter P
2007-06-28 19:17 ` Patrick McHardy
2007-06-28 19:21 ` Waskiewicz Jr, Peter P
2007-06-28 19:24 ` Patrick McHardy
2007-06-28 19:27 ` Waskiewicz Jr, Peter P
2007-06-29 4:20 ` David Miller
2007-06-29 8:45 ` Waskiewicz Jr, Peter P
2007-06-29 11:43 ` Multiqueue and virtualization WAS(Re: " jamal
2007-06-29 11:59 ` Patrick McHardy
2007-06-29 12:54 ` jamal
2007-06-29 13:08 ` Patrick McHardy
2007-06-29 13:19 ` jamal
2007-06-29 15:33 ` Ben Greear
2007-06-29 15:58 ` Patrick McHardy
2007-06-29 16:16 ` Ben Greear
2007-06-29 21:36 ` David Miller
2007-06-30 7:51 ` Benny Amorsen
2007-06-29 21:31 ` David Miller
2007-06-30 1:30 ` jamal
2007-06-30 4:35 ` David Miller
2007-06-30 14:52 ` jamal
2007-06-30 20:33 ` David Miller
2007-07-03 12:42 ` jamal
2007-07-03 21:24 ` David Miller
2007-07-04 2:20 ` jamal
2007-07-06 7:32 ` Rusty Russell
2007-07-06 14:39 ` jamal
2007-07-06 15:59 ` James Chapman
2007-07-08 2:30 ` Rusty Russell
2007-07-08 6:03 ` David Miller
2007-06-30 14:33 ` Patrick McHardy
2007-06-30 14:37 ` Waskiewicz Jr, Peter P
2007-06-28 17:57 ` [CORE] Stack changes to add multiqueue hardware support API Patrick McHardy
2007-06-28 17:57 ` [SCHED] Qdisc changes and sch_rr added for multiqueue Patrick McHardy
-- strict thread matches above, loose matches on Subject: below --
2007-06-23 21:36 [PATCH] NET: Multiple queue hardware support PJ Waskiewicz
2007-06-23 21:36 ` [PATCH 3/3] NET: [SCHED] Qdisc changes and sch_rr added for multiqueue PJ Waskiewicz
2007-06-24 12:16 ` Patrick McHardy
2007-06-25 17:27 ` Waskiewicz Jr, Peter P
2007-06-25 17:29 ` Patrick McHardy
2007-06-25 21:53 ` Waskiewicz Jr, Peter P
2007-06-25 21:58 ` Patrick McHardy
2007-06-25 22:07 ` Waskiewicz Jr, Peter P
2007-06-24 22:22 ` Patrick McHardy
2007-06-25 17:29 ` Waskiewicz Jr, Peter P
2007-06-21 21:26 [PATCH] NET: Multiple queue hardware support PJ Waskiewicz
2007-06-21 21:26 ` [PATCH 3/3] NET: [SCHED] Qdisc changes and sch_rr added for multiqueue PJ Waskiewicz
2007-06-21 23:47 ` Patrick McHardy
2007-06-22 0:01 ` Waskiewicz Jr, Peter P
2007-06-22 0:26 ` Patrick McHardy
2007-06-22 18:00 ` Waskiewicz Jr, Peter P
2007-06-22 18:42 ` Patrick McHardy
2007-06-22 18:44 ` Patrick McHardy
2007-06-22 18:53 ` Patrick McHardy
2007-06-22 21:03 ` Waskiewicz Jr, Peter P
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070628162118.18728.64391.stgit@localhost.localdomain \
--to=peter.p.waskiewicz.jr@intel.com \
--cc=auke-jan.h.kok@intel.com \
--cc=davem@davemloft.net \
--cc=hadi@cyberus.ca \
--cc=jeff@garzik.org \
--cc=kaber@trash.net \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.