netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support
@ 2008-08-28  1:12 Jeff Kirsher
  2008-08-28  1:12 ` [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit Jeff Kirsher
  2008-08-28  8:21 ` [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support David Miller
  0 siblings, 2 replies; 8+ messages in thread
From: Jeff Kirsher @ 2008-08-28  1:12 UTC (permalink / raw)
  To: jeff; +Cc: netdev, davem, Alexander Duyck, Jeff Kirsher

From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch is intended to add a qdisc to support the new tx multiqueue
architecture by providing a band for each hardware queue.  By doing
this it is possible to support a different qdisc per physical hardware
queue.

This qdisc uses the skb->queue_mapping to select which band to place
the traffic onto.  It then uses a round robin w/ a check to see if the
subqueue is stopped to determine which band to dequeue the packet from.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 include/linux/pkt_sched.h |    6 +
 net/sched/Kconfig         |    9 +
 net/sched/Makefile        |    1 
 net/sched/sch_multiq.c    |  474 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 490 insertions(+), 0 deletions(-)
 create mode 100644 net/sched/sch_multiq.c

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index e5de421..7fbc952 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -123,6 +123,12 @@ struct tc_prio_qopt
 	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
 };
 
+/* MULTIQ section */
+
+struct tc_multiq_qopt {
+	int	bands;			/* Number of bands */
+};
+
 /* TBF section */
 
 struct tc_tbf_qopt
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9437b27..efaa7a7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -106,6 +106,15 @@ config NET_SCH_PRIO
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_prio.
 
+config NET_SCH_MULTIQ
+	tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
+	---help---
+	  Say Y here if you want to use an n-band queue packet scheduler
+	  to support devices that have multiple hardware transmit queues.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_multiq.
+
 config NET_SCH_RED
 	tristate "Random Early Detection (RED)"
 	---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7..3d9b953 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
 obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
 obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
 obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
+obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
new file mode 100644
index 0000000..886690e
--- /dev/null
+++ b/net/sched/sch_multiq.c
@@ -0,0 +1,474 @@
+/*
+ * net/sched/sch_multiq.c
+ * 		This qdisc is based off of the rr qdisc and is meant to
+ * 		prevent head-of-line blocking on devices that have multiple
+ * 		hardware queues.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+struct multiq_sched_data {
+	int bands;
+	int curband;
+	struct tcf_proto *filter_list;
+	struct Qdisc **queues;
+
+};
+
+
+static struct Qdisc *
+multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	u32 band;
+	struct tcf_result res;
+	int err;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	err = tc_classify(skb, q->filter_list, &res);
+#ifdef CONFIG_NET_CLS_ACT
+	switch (err) {
+	case TC_ACT_STOLEN:
+	case TC_ACT_QUEUED:
+		*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+	case TC_ACT_SHOT:
+		return NULL;
+	}
+#endif
+	band = skb_get_queue_mapping(skb);
+
+	if (band >= q->bands)
+		return q->queues[0];
+
+	return q->queues[band];
+}
+
+static int
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc_enqueue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static int
+multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc->ops->requeue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	struct sk_buff *skb;
+	int band;
+
+	for (band = 0; band < q->bands; band++) {
+		/* cycle through bands to ensure fairness */
+		q->curband++;
+		if (q->curband >= q->bands)
+			q->curband = 0;
+
+		/* Check that target subqueue is available before
+		 * pulling an skb to avoid excessive requeues
+		 */
+		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+			qdisc = q->queues[q->curband];
+			skb = qdisc->dequeue(qdisc);
+			if (skb) {
+				sch->q.qlen--;
+				return skb;
+			}
+		}
+	}
+	return NULL;
+
+}
+
+static unsigned int multiq_drop(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int band;
+	unsigned int len;
+	struct Qdisc *qdisc;
+
+	for (band = q->bands-1; band >= 0; band--) {
+		qdisc = q->queues[band];
+		if (qdisc->ops->drop) {
+			len = qdisc->ops->drop(qdisc);
+			if (len != 0) {
+				sch->q.qlen--;
+				return len;
+			}
+		}
+	}
+	return 0;
+}
+
+
+static void
+multiq_reset(struct Qdisc *sch)
+{
+	int band;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	for (band = 0; band < q->bands; band++)
+		qdisc_reset(q->queues[band]);
+	sch->q.qlen = 0;
+}
+
+static void
+multiq_destroy(struct Qdisc *sch)
+{
+	int band;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	for (band = 0; band < q->bands; band++)
+		qdisc_destroy(q->queues[band]);
+
+	kfree(q->queues);
+}
+
+static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct tc_multiq_qopt *qopt;
+	struct Qdisc **queues;
+	int i;
+
+	if (sch->parent != TC_H_ROOT)
+		return -EINVAL;
+	if (!netif_is_multiqueue(qdisc_dev(sch)))
+		return -EINVAL;
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+
+	qopt = nla_data(opt);
+
+	if (!qopt->bands)
+		qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
+	if (qopt->bands != qdisc_dev(sch)->real_num_tx_queues ||
+	    qopt->bands < 2)
+		return -EINVAL;
+
+	queues = kzalloc(sizeof(struct Qdisc *)*qopt->bands, GFP_KERNEL);
+	if (!queues)
+		return -ENOBUFS;
+
+	for (i = 0; i < qopt->bands; i++)
+		queues[i] = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	q->queues = xchg(&queues, q->queues);
+	if (queues != NULL) {
+		for (i = 0; i < q->bands; i++) {
+			if (queues[i] != &noop_qdisc) {
+				qdisc_tree_decrease_qlen(queues[i],
+							 queues[i]->q.qlen);
+				qdisc_destroy(queues[i]);
+			}
+		}
+		kfree(queues);
+
+	}
+	q->bands = qopt->bands;
+
+	sch_tree_unlock(sch);
+
+	for (i = 0; i < q->bands; i++) {
+		if (q->queues[i] == &noop_qdisc) {
+			struct Qdisc *child;
+			child = qdisc_create_dflt(qdisc_dev(sch),
+						  sch->dev_queue,
+						  &pfifo_qdisc_ops,
+						  TC_H_MAKE(sch->handle,
+							    i + 1));
+			if (child) {
+				sch_tree_lock(sch);
+				child = xchg(&q->queues[i], child);
+
+				if (child != &noop_qdisc) {
+					qdisc_tree_decrease_qlen(child,
+								 child->q.qlen);
+					qdisc_destroy(child);
+				}
+				sch_tree_unlock(sch);
+			}
+		}
+	}
+	return 0;
+}
+
+static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	q->queues = NULL;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	return multiq_tune(sch, opt);
+}
+
+static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+	struct tc_multiq_qopt opt;
+
+	opt.bands = q->bands;
+
+	nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	if (nest == NULL)
+		goto nla_put_failure;
+	nla_nest_compat_end(skb, nest);
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		      struct Qdisc **old)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return -EINVAL;
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->queues[band];
+	q->queues[band] = new;
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *
+multiq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return NULL;
+
+	return q->queues[band];
+}
+
+static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = TC_H_MIN(classid);
+
+	if (band - 1 >= q->bands)
+		return 0;
+	return band;
+}
+
+static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
+				 u32 classid)
+{
+	return multiq_get(sch, classid);
+}
+
+
+static void multiq_put(struct Qdisc *q, unsigned long cl)
+{
+	return;
+}
+
+static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent,
+			 struct nlattr **tca, unsigned long *arg)
+{
+	unsigned long cl = *arg;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+static int multiq_delete(struct Qdisc *sch, unsigned long cl)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+
+static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	if (q->queues[cl-1])
+		tcm->tcm_info = q->queues[cl-1]->handle;
+	return 0;
+}
+
+static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *cl_q;
+
+	cl_q = q->queues[cl - 1];
+	if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int band;
+
+	if (arg->stop)
+		return;
+
+	for (band = 0; band < q->bands; band++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, band+1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static const struct Qdisc_class_ops multiq_class_ops = {
+	.graft		=	multiq_graft,
+	.leaf		=	multiq_leaf,
+	.get		=	multiq_get,
+	.put		=	multiq_put,
+	.change		=	multiq_change,
+	.delete		=	multiq_delete,
+	.walk		=	multiq_walk,
+	.tcf_chain	=	multiq_find_tcf,
+	.bind_tcf	=	multiq_bind,
+	.unbind_tcf	=	multiq_put,
+	.dump		=	multiq_dump_class,
+	.dump_stats	=	multiq_dump_class_stats,
+};
+
+static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&multiq_class_ops,
+	.id		=	"multiq",
+	.priv_size	=	sizeof(struct multiq_sched_data),
+	.enqueue	=	multiq_enqueue,
+	.dequeue	=	multiq_dequeue,
+	.requeue	=	multiq_requeue,
+	.drop		=	multiq_drop,
+	.init		=	multiq_init,
+	.reset		=	multiq_reset,
+	.destroy	=	multiq_destroy,
+	.change		=	multiq_tune,
+	.dump		=	multiq_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init multiq_module_init(void)
+{
+	return register_qdisc(&multiq_qdisc_ops);
+}
+
+static void __exit multiq_module_exit(void)
+{
+	unregister_qdisc(&multiq_qdisc_ops);
+}
+
+module_init(multiq_module_init)
+module_exit(multiq_module_exit)
+
+MODULE_LICENSE("GPL");


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit
  2008-08-28  1:12 [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support Jeff Kirsher
@ 2008-08-28  1:12 ` Jeff Kirsher
  2008-08-28  8:27   ` David Miller
  2008-08-30  3:13   ` Bill Fink
  2008-08-28  8:21 ` [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support David Miller
  1 sibling, 2 replies; 8+ messages in thread
From: Jeff Kirsher @ 2008-08-28  1:12 UTC (permalink / raw)
  To: jeff; +Cc: netdev, davem, Alexander Duyck, Jeff Kirsher

From: Alexander Duyck <alexander.h.duyck@intel.com>

This new action will have the ability to change the priority and/or
queue_mapping fields on an sk_buff.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 include/linux/tc_act/tc_skbedit.h |   23 ++++
 include/net/tc_act/tc_skbedit.h   |   15 +++
 net/sched/Kconfig                 |   11 ++
 net/sched/Makefile                |    1 
 net/sched/act_skbedit.c           |  199 +++++++++++++++++++++++++++++++++++++
 5 files changed, 249 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/tc_act/tc_skbedit.h
 create mode 100644 include/net/tc_act/tc_skbedit.h
 create mode 100644 net/sched/act_skbedit.c

diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h
new file mode 100644
index 0000000..3965636
--- /dev/null
+++ b/include/linux/tc_act/tc_skbedit.h
@@ -0,0 +1,23 @@
+#ifndef __LINUX_TC_SKBEDIT_H
+#define __LINUX_TC_SKBEDIT_H
+
+#include <linux/pkt_cls.h>
+
+#define SKBEDIT_F_PRIORITY		0x1
+#define SKBEDIT_F_QUEUE_MAPPING		0x2
+
+struct tc_skbedit {
+	tc_gen;
+};
+
+enum {
+	TCA_SKBEDIT_UNSPEC,
+	TCA_SKBEDIT_TM,
+	TCA_SKBEDIT_PARMS,
+	TCA_SKBEDIT_PRIORITY,
+	TCA_SKBEDIT_QUEUE_MAPPING,
+	__TCA_SKBEDIT_MAX
+};
+#define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
+
+#endif
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
new file mode 100644
index 0000000..cf4fd9f
--- /dev/null
+++ b/include/net/tc_act/tc_skbedit.h
@@ -0,0 +1,15 @@
+#ifndef __NET_TC_SKBEDIT_H
+#define __NET_TC_SKBEDIT_H
+
+#include <net/act_api.h>
+
+struct tcf_skbedit {
+	struct tcf_common	common;
+	u32			flags;
+	u32     		priority;
+	u16			queue_mapping;
+};
+#define to_skbedit(pc) \
+	container_of(pc, struct tcf_skbedit, common)
+
+#endif /* __NET_TC_SKBEDIT_H */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index efaa7a7..613823b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -485,6 +485,17 @@ config NET_ACT_SIMP
 	  To compile this code as a module, choose M here: the
 	  module will be called simple.
 
+config NET_ACT_SKBEDIT
+        tristate "SKB Editing"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to change skb priority or queue_mapping settings.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called simple.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 3d9b953..e60c992 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT)	+= act_ipt.o
 obj-$(CONFIG_NET_ACT_NAT)	+= act_nat.o
 obj-$(CONFIG_NET_ACT_PEDIT)	+= act_pedit.o
 obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
+obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
new file mode 100644
index 0000000..54eeb27
--- /dev/null
+++ b/net/sched/act_skbedit.c
@@ -0,0 +1,199 @@
+/*
+ * net/sched/act_skbedit.c	SKB Editing
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexander Duyck <alexander.h.duyck@intel.com>
+ *
+ * Original Authors:	Jamal Hadi Salim (2005-8) (act_simple)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#define TCA_ACT_SKBEDIT 11
+
+#include <linux/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_skbedit.h>
+
+#define SKBEDIT_TAB_MASK     15
+static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
+static u32 skbedit_idx_gen;
+static DEFINE_RWLOCK(skbedit_lock);
+
+static struct tcf_hashinfo skbedit_hash_info = {
+	.htab	=	tcf_skbedit_ht,
+	.hmask	=	SKBEDIT_TAB_MASK,
+	.lock	=	&skbedit_lock,
+};
+
+static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
+		       struct tcf_result *res)
+{
+	struct tcf_skbedit *d = a->priv;
+
+	spin_lock(&d->tcf_lock);
+	d->tcf_tm.lastuse = jiffies;
+	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	d->tcf_bstats.packets++;
+
+	if (d->flags & SKBEDIT_F_PRIORITY)
+		skb->priority = d->priority;
+	if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
+		skb_set_queue_mapping(skb, d->queue_mapping);
+
+	spin_unlock(&d->tcf_lock);
+	return d->tcf_action;
+}
+
+static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
+	[TCA_SKBEDIT_PARMS]		= { .len = sizeof(struct tc_skbedit) },
+	[TCA_SKBEDIT_PRIORITY]		= { .len = sizeof(u32) },
+	[TCA_SKBEDIT_QUEUE_MAPPING]	= { .len = sizeof(u16) },
+};
+
+static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+	struct tc_skbedit *parm;
+	struct tcf_skbedit *d;
+	struct tcf_common *pc;
+	u32 flags = 0, *priority = NULL;
+	u16 *queue_mapping = NULL;
+	int ret = 0, err;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_SKBEDIT_PARMS] == NULL)
+		return -EINVAL;
+
+	if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
+		flags |= SKBEDIT_F_PRIORITY;
+		priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
+	}
+
+	if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
+		flags |= SKBEDIT_F_QUEUE_MAPPING;
+		queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
+	}
+	if (!flags)
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
+
+	pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+				     &skbedit_idx_gen, &skbedit_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+
+		d = to_skbedit(pc);
+		ret = ACT_P_CREATED;
+	} else {
+		d = to_skbedit(pc);
+		if (!ovr) {
+			tcf_hash_release(pc, bind, &skbedit_hash_info);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&d->tcf_lock);
+
+	d->flags = flags;
+	if (flags & SKBEDIT_F_PRIORITY)
+		d->priority = *priority;
+	if (flags & SKBEDIT_F_QUEUE_MAPPING)
+		d->queue_mapping = *queue_mapping;
+	d->tcf_action = parm->action;
+
+	spin_unlock_bh(&d->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &skbedit_hash_info);
+	return ret;
+}
+
+static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_skbedit *d = a->priv;
+
+	if (d)
+		return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
+	return 0;
+}
+
+static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+				int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_skbedit *d = a->priv;
+	struct tc_skbedit opt;
+	struct tcf_t t;
+
+	opt.index = d->tcf_index;
+	opt.refcnt = d->tcf_refcnt - ref;
+	opt.bindcnt = d->tcf_bindcnt - bind;
+	opt.action = d->tcf_action;
+	NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
+	if (d->flags & SKBEDIT_F_PRIORITY)
+		NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
+			&d->priority);
+	if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
+		NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
+			sizeof(d->queue_mapping), &d->queue_mapping);
+	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+	NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_skbedit_ops = {
+	.kind		=	"skbedit",
+	.hinfo		=	&skbedit_hash_info,
+	.type		=	TCA_ACT_SKBEDIT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_skbedit,
+	.dump		=	tcf_skbedit_dump,
+	.cleanup	=	tcf_skbedit_cleanup,
+	.init		=	tcf_skbedit_init,
+	.walk		=	tcf_generic_walker,
+};
+
+MODULE_AUTHOR("Alexander Duyck(2008)");
+MODULE_DESCRIPTION("SKB Editing");
+MODULE_LICENSE("GPL");
+
+static int __init skbedit_init_module(void)
+{
+	return tcf_register_action(&act_skbedit_ops);
+}
+
+static void __exit skbedit_cleanup_module(void)
+{
+	tcf_unregister_action(&act_skbedit_ops);
+}
+
+module_init(skbedit_init_module);
+module_exit(skbedit_cleanup_module);


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support
  2008-08-28  1:12 [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support Jeff Kirsher
  2008-08-28  1:12 ` [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit Jeff Kirsher
@ 2008-08-28  8:21 ` David Miller
  2008-08-28 16:07   ` Alexander Duyck
  1 sibling, 1 reply; 8+ messages in thread
From: David Miller @ 2008-08-28  8:21 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: jeff, netdev, alexander.h.duyck

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Wed, 27 Aug 2008 18:12:53 -0700

> This patch is intended to add a qdisc to support the new tx multiqueue
> architecture by providing a band for each hardware queue.  By doing
> this it is possible to support a different qdisc per physical hardware
> queue.
> 
> This qdisc uses the skb->queue_mapping to select which band to place
> the traffic onto.  It then uses a round robin w/ a check to see if the
> subqueue is stopped to determine which band to dequeue the packet from.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

It looks mostly fine.

But I wonder how useful this "bands" parameter is.
It behaves as follows:

1) "1" is always allowed, essentially non-multiqueue
2) ->real_num_tx_queues is always allowed
3) 0 means auto detect, which is also ->real_num_tx_queues

Everything else is rejected.

It's effectively a boolean, use all the TX queues or just one.  So
maybe that's how it should be presented and implemented.

That makes sense because the user has no way to figure out how many TX
queues are currently available.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit
  2008-08-28  1:12 ` [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit Jeff Kirsher
@ 2008-08-28  8:27   ` David Miller
  2008-08-28 16:12     ` Alexander Duyck
  2008-08-30  3:13   ` Bill Fink
  1 sibling, 1 reply; 8+ messages in thread
From: David Miller @ 2008-08-28  8:27 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: jeff, netdev, alexander.h.duyck

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Wed, 27 Aug 2008 18:12:57 -0700

> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> This new action will have the ability to change the priority and/or
> queue_mapping fields on an sk_buff.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

I think you'll need to do range checking on the queue mapping being
set in tcf_skbedit().

qdisc_restart() will accept any skb_queue_mapping() value it
sees in packets obtained via ->dequeue().


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support
  2008-08-28  8:21 ` [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support David Miller
@ 2008-08-28 16:07   ` Alexander Duyck
  0 siblings, 0 replies; 8+ messages in thread
From: Alexander Duyck @ 2008-08-28 16:07 UTC (permalink / raw)
  To: David Miller; +Cc: jeffrey.t.kirsher, jeff, netdev, alexander.h.duyck

> It looks mostly fine.
>
> But I wonder how useful this "bands" parameter is.
> It behaves as follows:
>
> 1) "1" is always allowed, essentially non-multiqueue
> 2) ->real_num_tx_queues is always allowed
> 3) 0 means auto detect, which is also ->real_num_tx_queues
>
> Everything else is rejected.
>

Actually you give me too much credit.  I don't think bands even works
as a boolean, or at least it wasn't meant to.  It was supposed to be 0
or the number of queues which either way ends up the number of queues.
 It was a bit of leftover interface from the rr/prio configuration.  I
realized I left that in there when I was driving home last night but
by then it was too late.

I will probably just have it always default the number of bands to
always be real_num_tx_queues and save the user from having to enter
bands in the tc command line.

Thanks,

Alex

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit
  2008-08-28  8:27   ` David Miller
@ 2008-08-28 16:12     ` Alexander Duyck
  0 siblings, 0 replies; 8+ messages in thread
From: Alexander Duyck @ 2008-08-28 16:12 UTC (permalink / raw)
  To: David Miller; +Cc: jeffrey.t.kirsher, jeff, netdev, alexander.h.duyck

On Thu, Aug 28, 2008 at 1:27 AM, David Miller <davem@davemloft.net> wrote:

> I think you'll need to do range checking on the queue mapping being
> set in tcf_skbedit().
>
> qdisc_restart() will accept any skb_queue_mapping() value it
> sees in packets obtained via ->dequeue().
>

Noted.  I'll change this so that the action only occurs if
queue_mapping is less than skb->dev->real_num_tx_queues.

Thanks,

Alex

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit
  2008-08-28  1:12 ` [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit Jeff Kirsher
  2008-08-28  8:27   ` David Miller
@ 2008-08-30  3:13   ` Bill Fink
  2008-08-30  5:47     ` Jeff Kirsher
  1 sibling, 1 reply; 8+ messages in thread
From: Bill Fink @ 2008-08-30  3:13 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: jeff, netdev, davem, Alexander Duyck

On Wed, 27 Aug 2008, Jeff Kirsher wrote:

> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> This new action will have the ability to change the priority and/or
> queue_mapping fields on an sk_buff.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> ---
> 
>  include/linux/tc_act/tc_skbedit.h |   23 ++++
>  include/net/tc_act/tc_skbedit.h   |   15 +++
>  net/sched/Kconfig                 |   11 ++
>  net/sched/Makefile                |    1 
>  net/sched/act_skbedit.c           |  199 +++++++++++++++++++++++++++++++++++++
>  5 files changed, 249 insertions(+), 0 deletions(-)
>  create mode 100644 include/linux/tc_act/tc_skbedit.h
>  create mode 100644 include/net/tc_act/tc_skbedit.h
>  create mode 100644 net/sched/act_skbedit.c

> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index efaa7a7..613823b 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -485,6 +485,17 @@ config NET_ACT_SIMP
>  	  To compile this code as a module, choose M here: the
>  	  module will be called simple.
>  
> +config NET_ACT_SKBEDIT
> +        tristate "SKB Editing"
> +        depends on NET_CLS_ACT
> +        ---help---
> +	  Say Y here to change skb priority or queue_mapping settings.
> +
> +	  If unsure, say N.
> +
> +	  To compile this code as a module, choose M here: the
> +	  module will be called simple.
> +
>  config NET_CLS_IND
>  	bool "Incoming device classification"
>  	depends on NET_CLS_U32 || NET_CLS_FW

I'm rather backlogged on e-mail at the moment, so apologies if someone
else already pointed this out, but I'm guessing there was a copy and
paste error here, as the module shouldn't be called simple (that's for
the preceding NET_ACT_SIMP).

						-Bill

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit
  2008-08-30  3:13   ` Bill Fink
@ 2008-08-30  5:47     ` Jeff Kirsher
  0 siblings, 0 replies; 8+ messages in thread
From: Jeff Kirsher @ 2008-08-30  5:47 UTC (permalink / raw)
  To: Bill Fink; +Cc: jeff, netdev, davem, Alexander Duyck

On Fri, Aug 29, 2008 at 8:13 PM, Bill Fink <billfink@mindspring.com> wrote:
> On Wed, 27 Aug 2008, Jeff Kirsher wrote:
>
>> From: Alexander Duyck <alexander.h.duyck@intel.com>
>>
>> This new action will have the ability to change the priority and/or
>> queue_mapping fields on an sk_buff.
>>
>> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
>> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
>> ---
>>
>>  include/linux/tc_act/tc_skbedit.h |   23 ++++
>>  include/net/tc_act/tc_skbedit.h   |   15 +++
>>  net/sched/Kconfig                 |   11 ++
>>  net/sched/Makefile                |    1
>>  net/sched/act_skbedit.c           |  199 +++++++++++++++++++++++++++++++++++++
>>  5 files changed, 249 insertions(+), 0 deletions(-)
>>  create mode 100644 include/linux/tc_act/tc_skbedit.h
>>  create mode 100644 include/net/tc_act/tc_skbedit.h
>>  create mode 100644 net/sched/act_skbedit.c
>
>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>> index efaa7a7..613823b 100644
>> --- a/net/sched/Kconfig
>> +++ b/net/sched/Kconfig
>> @@ -485,6 +485,17 @@ config NET_ACT_SIMP
>>         To compile this code as a module, choose M here: the
>>         module will be called simple.
>>
>> +config NET_ACT_SKBEDIT
>> +        tristate "SKB Editing"
>> +        depends on NET_CLS_ACT
>> +        ---help---
>> +       Say Y here to change skb priority or queue_mapping settings.
>> +
>> +       If unsure, say N.
>> +
>> +       To compile this code as a module, choose M here: the
>> +       module will be called simple.
>> +
>>  config NET_CLS_IND
>>       bool "Incoming device classification"
>>       depends on NET_CLS_U32 || NET_CLS_FW
>
> I'm rather backlogged on e-mail at the moment, so apologies if someone
> else already pointed this out, but I'm guessing there was a copy and
> paste error here, as the module shouldn't be called simple (that's for
> the preceding NET_ACT_SIMP).
>
>                                                -Bill
>

Nice catch.  I was just about to post an updated patch set from Alex,
and it was a good thing I checked mail before sending the patches out.

I have corrected this in the next patch set (coming in about 10 minutes).

-- 
Cheers,
Jeff

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2008-08-30  5:47 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-28  1:12 [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support Jeff Kirsher
2008-08-28  1:12 ` [NET_NEXT PATCH 2/2] pkt_action: add new action skbedit Jeff Kirsher
2008-08-28  8:27   ` David Miller
2008-08-28 16:12     ` Alexander Duyck
2008-08-30  3:13   ` Bill Fink
2008-08-30  5:47     ` Jeff Kirsher
2008-08-28  8:21 ` [NET_NEXT PATCH 1/2] pkt_sched: Add multiqueue scheduler support David Miller
2008-08-28 16:07   ` Alexander Duyck

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).