From: PJ Waskiewicz <peter.p.waskiewicz.jr@intel.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, jeff@garzik.org,
auke-jan.h.kok@intel.com, kaber@trash.net, hadi@cyberus.ca
Subject: [PATCH 3/3] NET: [SCHED] Qdisc changes and sch_rr added for multiqueue
Date: Thu, 21 Jun 2007 14:26:47 -0700 [thread overview]
Message-ID: <20070621212647.31066.61074.stgit@localhost.localdomain> (raw)
In-Reply-To: <20070621212629.31066.92148.stgit@localhost.localdomain>
Add the new sch_rr qdisc for multiqueue network device support.
Allow sch_prio to be compiled with or without multiqueue hardware
support.
sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS. This
was done since sch_prio and sch_rr only differ in their dequeue routine.
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
---
net/sched/Kconfig | 32 ++++++++++++
net/sched/sch_generic.c | 3 +
net/sched/sch_prio.c | 123 ++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 150 insertions(+), 8 deletions(-)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 475df84..ca0b352 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -102,8 +102,16 @@ config NET_SCH_ATM
To compile this code as a module, choose M here: the
module will be called sch_atm.
+config NET_SCH_BANDS
+ bool "Multi Band Queueing (PRIO and RR)"
+ ---help---
+ Say Y here if you want to use n-band multiqueue packet
+ schedulers. These include a priority-based scheduler and
+ a round-robin scheduler.
+
config NET_SCH_PRIO
tristate "Multi Band Priority Queueing (PRIO)"
+ depends on NET_SCH_BANDS
---help---
Say Y here if you want to use an n-band priority queue packet
scheduler.
@@ -111,6 +119,30 @@ config NET_SCH_PRIO
To compile this code as a module, choose M here: the
module will be called sch_prio.
+config NET_SCH_PRIO_MQ
+ bool "Multiple hardware queue support for PRIO"
+ depends on NET_SCH_PRIO
+ ---help---
+ Say Y here if you want to allow the PRIO qdisc to assign
+ flows to multiple hardware queues on an ethernet device. This
+ will still work on devices with 1 queue.
+
+ Consider this scheduler for devices that do not use
+ hardware-based scheduling policies. Otherwise, use NET_SCH_RR.
+
+ Most people will say N here.
+
+config NET_SCH_RR
+ bool "Multi Band Round Robin Queuing (RR)"
+ depends on NET_SCH_BANDS && NET_SCH_PRIO
+ ---help---
+ Say Y here if you want to use an n-band round robin packet
+ scheduler.
+
+ The module uses sch_prio for its framework and is aliased as
+ sch_rr, so it will load sch_prio, although it is referred
+ to using sch_rr.
+
config NET_SCH_RED
tristate "Random Early Detection (RED)"
---help---
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9461e8a..203d5c4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -168,7 +168,8 @@ static inline int qdisc_restart(struct net_device *dev)
spin_unlock(&dev->queue_lock);
ret = NETDEV_TX_BUSY;
- if (!netif_queue_stopped(dev))
+ if (!netif_queue_stopped(dev) &&
+ !netif_subqueue_stopped(dev, skb->queue_mapping))
/* churn baby churn .. */
ret = dev_hard_start_xmit(skb, dev);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 6d7542c..4eb3ba5 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -9,6 +9,8 @@
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Fixes: 19990609: J Hadi Salim <hadi@nortelnetworks.com>:
* Init -- EINVAL when opt undefined
+ * Additions: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
+ * Added round-robin scheduling for selection at load-time
*/
#include <linux/module.h>
@@ -40,9 +42,13 @@
struct prio_sched_data
{
int bands;
+#ifdef CONFIG_NET_SCH_RR
+ int curband; /* for round-robin */
+#endif
struct tcf_proto *filter_list;
u8 prio2band[TC_PRIO_MAX+1];
struct Qdisc *queues[TCQ_PRIO_BANDS];
+ u16 band2queue[TC_PRIO_MAX + 1];
};
@@ -70,14 +76,19 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
#endif
if (TC_H_MAJ(band))
band = 0;
+ skb->queue_mapping =
+ q->band2queue[q->prio2band[band&TC_PRIO_MAX]];
return q->queues[q->prio2band[band&TC_PRIO_MAX]];
}
band = res.classid;
}
band = TC_H_MIN(band) - 1;
- if (band >= q->bands)
+ if (band >= q->bands) {
+ skb->queue_mapping = q->band2queue[q->prio2band[0]];
return q->queues[q->prio2band[0]];
+ }
+ skb->queue_mapping = q->band2queue[band];
return q->queues[band];
}
@@ -144,17 +155,59 @@ prio_dequeue(struct Qdisc* sch)
struct Qdisc *qdisc;
for (prio = 0; prio < q->bands; prio++) {
- qdisc = q->queues[prio];
- skb = qdisc->dequeue(qdisc);
- if (skb) {
- sch->q.qlen--;
- return skb;
+ /* Check if the target subqueue is available before
+ * pulling an skb. This way we avoid excessive requeues
+ * for slower queues.
+ */
+ if (!netif_subqueue_stopped(sch->dev, q->band2queue[prio])) {
+ qdisc = q->queues[prio];
+ skb = qdisc->dequeue(qdisc);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
+ }
}
}
return NULL;
}
+#ifdef CONFIG_NET_SCH_RR
+static struct sk_buff *rr_dequeue(struct Qdisc* sch)
+{
+ struct sk_buff *skb;
+ struct prio_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *qdisc;
+ int bandcount;
+
+ /* Only take one pass through the queues. If nothing is available,
+ * return nothing.
+ */
+ for (bandcount = 0; bandcount < q->bands; bandcount++) {
+ /* Check if the target subqueue is available before
+ * pulling an skb. This way we avoid excessive requeues
+ * for slower queues. If the queue is stopped, try the
+ * next queue.
+ */
+ if (!netif_subqueue_stopped(sch->dev, q->band2queue[q->curband])) {
+ qdisc = q->queues[q->curband];
+ skb = qdisc->dequeue(qdisc);
+ if (skb) {
+ sch->q.qlen--;
+ q->curband++;
+ if (q->curband >= q->bands)
+ q->curband = 0;
+ return skb;
+ }
+ }
+ q->curband++;
+ if (q->curband >= q->bands)
+ q->curband = 0;
+ }
+ return NULL;
+}
+#endif
+
static unsigned int prio_drop(struct Qdisc* sch)
{
struct prio_sched_data *q = qdisc_priv(sch);
@@ -200,6 +253,7 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
struct prio_sched_data *q = qdisc_priv(sch);
struct tc_prio_qopt *qopt = RTA_DATA(opt);
int i;
+ int queue;
if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
return -EINVAL;
@@ -211,6 +265,22 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
return -EINVAL;
}
+ /* If we're prio multiqueue or are using round-robin, make
+ * sure the number of incoming bands matches the number of
+ * queues on the device we're associating with.
+ */
+#ifdef CONFIG_NET_SCH_RR
+ if (strcmp("rr", sch->ops->id) == 0)
+ if (qopt->bands != sch->dev->egress_subqueue_count)
+ return -EINVAL;
+#endif
+
+#ifdef CONFIG_NET_SCH_PRIO_MQ
+ if (strcmp("prio", sch->ops->id) == 0)
+ if (qopt->bands != sch->dev->egress_subqueue_count)
+ return -EINVAL;
+#endif
+
sch_tree_lock(sch);
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -242,6 +312,18 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
}
}
}
+
+ /* setup queue to band mapping */
+ for (i = 0, queue = 0; i < q->bands; i++, queue++)
+ q->band2queue[i] = queue;
+
+#ifndef CONFIG_NET_SCH_PRIO_MQ
+ /* for non-mq prio */
+ if (strcmp("prio", sch->ops->id) == 0)
+ for (i = 0; i < q->bands; i++)
+ q->band2queue[i] = 0;
+#endif
+
return 0;
}
@@ -443,17 +525,44 @@ static struct Qdisc_ops prio_qdisc_ops = {
.owner = THIS_MODULE,
};
+#ifdef CONFIG_NET_SCH_RR
+static struct Qdisc_ops rr_qdisc_ops = {
+ .next = NULL,
+ .cl_ops = &prio_class_ops,
+ .id = "rr",
+ .priv_size = sizeof(struct prio_sched_data),
+ .enqueue = prio_enqueue,
+ .dequeue = rr_dequeue,
+ .requeue = prio_requeue,
+ .drop = prio_drop,
+ .init = prio_init,
+ .reset = prio_reset,
+ .destroy = prio_destroy,
+ .change = prio_tune,
+ .dump = prio_dump,
+ .owner = THIS_MODULE,
+};
+#endif
+
static int __init prio_module_init(void)
{
- return register_qdisc(&prio_qdisc_ops);
+ register_qdisc(&prio_qdisc_ops);
+#ifdef CONFIG_NET_SCH_RR
+ register_qdisc(&rr_qdisc_ops);
+#endif
+ return 0;
}
static void __exit prio_module_exit(void)
{
unregister_qdisc(&prio_qdisc_ops);
+#ifdef CONFIG_NET_SCH_RR
+ unregister_qdisc(&rr_qdisc_ops);
+#endif
}
module_init(prio_module_init)
module_exit(prio_module_exit)
MODULE_LICENSE("GPL");
+MODULE_ALIAS("sch_rr");
next prev parent reply other threads:[~2007-06-21 21:26 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-06-21 21:26 [PATCH] NET: Multiple queue hardware support PJ Waskiewicz
2007-06-21 21:26 ` [PATCH 1/3] NET: [DOC] Multiqueue hardware support documentation PJ Waskiewicz
2007-06-21 21:26 ` [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API PJ Waskiewicz
2007-06-21 21:26 ` PJ Waskiewicz [this message]
2007-06-21 23:47 ` [PATCH 3/3] NET: [SCHED] Qdisc changes and sch_rr added for multiqueue Patrick McHardy
2007-06-22 0:01 ` Waskiewicz Jr, Peter P
2007-06-22 0:26 ` Patrick McHardy
2007-06-22 18:00 ` Waskiewicz Jr, Peter P
2007-06-22 18:42 ` Patrick McHardy
2007-06-22 18:44 ` Patrick McHardy
2007-06-22 18:53 ` Patrick McHardy
2007-06-22 21:03 ` Waskiewicz Jr, Peter P
2007-06-21 21:31 ` [PATCH] NET: Multiple queue hardware support Patrick McHardy
2007-06-21 23:27 ` Waskiewicz Jr, Peter P
-- strict thread matches above, loose matches on Subject: below --
2007-06-23 21:36 PJ Waskiewicz
2007-06-23 21:36 ` [PATCH 3/3] NET: [SCHED] Qdisc changes and sch_rr added for multiqueue PJ Waskiewicz
2007-06-24 12:16 ` Patrick McHardy
2007-06-25 17:27 ` Waskiewicz Jr, Peter P
2007-06-25 17:29 ` Patrick McHardy
2007-06-25 21:53 ` Waskiewicz Jr, Peter P
2007-06-25 21:58 ` Patrick McHardy
2007-06-25 22:07 ` Waskiewicz Jr, Peter P
2007-06-24 22:22 ` Patrick McHardy
2007-06-25 17:29 ` Waskiewicz Jr, Peter P
2007-06-28 16:20 [PATCH] NET: Multiple queue hardware support PJ Waskiewicz
2007-06-28 16:21 ` [PATCH 3/3] NET: [SCHED] Qdisc changes and sch_rr added for multiqueue PJ Waskiewicz
2007-06-28 16:35 ` Patrick McHardy
2007-06-28 16:43 ` Waskiewicz Jr, Peter P
2007-06-28 16:46 ` Patrick McHardy
2007-06-28 16:50 ` Waskiewicz Jr, Peter P
2007-06-28 16:53 ` Patrick McHardy
2007-06-28 16:50 ` Patrick McHardy
2007-06-28 17:13 ` Patrick McHardy
2007-06-28 19:04 ` Waskiewicz Jr, Peter P
2007-06-28 19:17 ` Patrick McHardy
2007-06-28 19:21 ` Waskiewicz Jr, Peter P
2007-06-28 19:24 ` Patrick McHardy
2007-06-28 19:27 ` Waskiewicz Jr, Peter P
2007-06-29 4:20 ` David Miller
2007-06-29 8:45 ` Waskiewicz Jr, Peter P
2007-06-30 14:33 ` Patrick McHardy
2007-06-30 14:37 ` Waskiewicz Jr, Peter P
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070621212647.31066.61074.stgit@localhost.localdomain \
--to=peter.p.waskiewicz.jr@intel.com \
--cc=auke-jan.h.kok@intel.com \
--cc=davem@davemloft.net \
--cc=hadi@cyberus.ca \
--cc=jeff@garzik.org \
--cc=kaber@trash.net \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).