All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nishanth Devarajan <ndev2021@gmail.com>
To: xiyou.wangcong@gmail.com, jhs@mojatatu.com, jiri@resnulli.us,
	davem@davemloft.net
Cc: netdev@vger.kernel.org, doucette@bu.edu, michel@digirati.com.br
Subject: [PATCH v5 net-next] net/sched: add skbprio scheduler
Date: Mon, 23 Jul 2018 19:37:41 +0530	[thread overview]
Message-ID: <20180723140735.GA6935@gmail.com> (raw)

net/sched: add skbprio scheduler

Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
according to their skb->priority field. Under congestion, already-enqueued lower
priority packets will be dropped to make space available for higher priority
packets. Skbprio was conceived as a solution for denial-of-service defenses that
need to route packets with different priorities as a means to overcome DoS
attacks.

v5
*Do not reference qdisc_dev(sch)->tx_queue_len for setting limit. Instead set
default sch->limit to 64.

v4
*Drop Documentation/networking/sch_skbprio.txt doc file to move it to tc man
page for Skbprio, in iproute2.

v3
*Drop max_limit parameter in struct skbprio_sched_data and instead use
sch->limit.

*Reference qdisc_dev(sch)->tx_queue_len only once, during initialisation for
qdisc (previously being referenced every time qdisc changes).

*Move qdisc's detailed description from in-code to Documentation/networking.

*When qdisc is saturated, enqueue incoming packet first before dequeueing
lowest priority packet in queue - improves usage of call stack registers.

*Introduce and use overlimit stat to keep track of number of dropped packets.

v2
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan <ndev2021@gmail.com>
Reviewed-by: Sachin Paryani <sachin.paryani@gmail.com>
Reviewed-by: Cody Doucette <doucette@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
---
 include/uapi/linux/pkt_sched.h |  15 ++
 net/sched/Kconfig              |  13 ++
 net/sched/Makefile             |   1 +
 net/sched/sch_skbprio.c        | 320 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 349 insertions(+)
 create mode 100644 net/sched/sch_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d9cc9dc..8975fd1 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
 	__u32	limit;	/* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+	__u32	limit;		/* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS	16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7af2467..7699344 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -251,6 +251,19 @@ config NET_SCH_MQPRIO
 
 	  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+	tristate "SKB priority queue scheduler (SKBPRIO)"
+	help
+	  Say Y here if you want to use the SKB priority queue
+	  scheduler. This schedules packets according to skb->priority,
+	  which is useful for request packets in DoS mitigation systems such
+	  as Gatekeeper.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called sch_skbprio.
+
+	  If unsure, say N.
+
 config NET_SCH_CHOKE
 	tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 673ee7d..112ef70 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG)	+= sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)	+= sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 0000000..52c0b6d
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,320 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Nishanth Devarajan, <ndev2021@gmail.com>
+ *		Cody Doucette, <doucette@bu.edu>
+ *	        original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/inet_ecn.h>
+
+/*		SKB Priority Queue
+ *	=================================
+ *
+ * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes
+ * packets according to their skb->priority field. Under congestion,
+ * Skbprio drops already-enqueued lower priority packets to make space
+ * available for higher priority packets; it was conceived as a solution
+ * for denial-of-service defenses that need to route packets with different
+ * priorities as a mean to overcome DoS attacks.
+ */
+
+struct skbprio_sched_data {
+	/* Queue state. */
+	struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
+	struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
+	u16 highest_prio;
+	u16 lowest_prio;
+};
+
+static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
+{
+	int prio;
+
+	for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
+		if (!skb_queue_empty(&q->qdiscs[prio]))
+			return prio;
+	}
+
+	/* SKB queue is empty, return 0 (default highest priority setting). */
+	return 0;
+}
+
+static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
+{
+	int prio;
+
+	for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
+		if (!skb_queue_empty(&q->qdiscs[prio]))
+			return prio;
+	}
+
+	/* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
+	 * (default lowest priority setting).
+	 */
+	return SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
+{
+	const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *qdisc;
+	struct sk_buff_head *lp_qdisc;
+	struct sk_buff *to_drop;
+	u16 prio, lp;
+
+	/* Obtain the priority of @skb. */
+	prio = min(skb->priority, max_priority);
+
+	qdisc = &q->qdiscs[prio];
+	if (sch->q.qlen < sch->limit) {
+		__skb_queue_tail(qdisc, skb);
+		qdisc_qstats_backlog_inc(sch, skb);
+		q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+		/* Check to update highest and lowest priorities. */
+		if (prio > q->highest_prio)
+			q->highest_prio = prio;
+
+		if (prio < q->lowest_prio)
+			q->lowest_prio = prio;
+
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+
+	/* If this packet has the lowest priority, drop it. */
+	lp = q->lowest_prio;
+	if (prio <= lp) {
+		q->qstats[prio].drops++;
+		q->qstats[prio].overlimits++;
+		return qdisc_drop(skb, sch, to_free);
+	}
+
+	__skb_queue_tail(qdisc, skb);
+	qdisc_qstats_backlog_inc(sch, skb);
+	q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+	/* Drop the packet at the tail of the lowest priority qdisc. */
+	lp_qdisc = &q->qdiscs[lp];
+	to_drop = __skb_dequeue_tail(lp_qdisc);
+	BUG_ON(!to_drop);
+	qdisc_qstats_backlog_dec(sch, to_drop);
+	qdisc_drop(to_drop, sch, to_free);
+
+	q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
+	q->qstats[lp].drops++;
+	q->qstats[lp].overlimits++;
+
+	/* Check to update highest and lowest priorities. */
+	if (skb_queue_empty(lp_qdisc)) {
+		if (q->lowest_prio == q->highest_prio) {
+			/* The incoming packet is the only packet in queue. */
+			BUG_ON(sch->q.qlen != 1);
+			q->lowest_prio = prio;
+			q->highest_prio = prio;
+		} else {
+			q->lowest_prio = calc_new_low_prio(q);
+		}
+	}
+
+	if (prio > q->highest_prio)
+		q->highest_prio = prio;
+
+	return NET_XMIT_CN;
+}
+
+static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
+	struct sk_buff *skb = __skb_dequeue(hpq);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	sch->q.qlen--;
+	qdisc_qstats_backlog_dec(sch, skb);
+	qdisc_bstats_update(sch, skb);
+
+	q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
+
+	/* Update highest priority field. */
+	if (skb_queue_empty(hpq)) {
+		if (q->lowest_prio == q->highest_prio) {
+			BUG_ON(sch->q.qlen);
+			q->highest_prio = 0;
+			q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+		} else {
+			q->highest_prio = calc_new_high_prio(q);
+		}
+	}
+	return skb;
+}
+
+static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
+			struct netlink_ext_ack *extack)
+{
+	struct tc_skbprio_qopt *ctl = nla_data(opt);
+
+	sch->limit = ctl->limit;
+	return 0;
+}
+
+static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
+			struct netlink_ext_ack *extack)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	/* Initialise all queues, one for each possible priority. */
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_head_init(&q->qdiscs[prio]);
+
+	memset(&q->qstats, 0, sizeof(q->qstats));
+	q->highest_prio = 0;
+	q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+	sch->limit = 64;
+	if (!opt)
+		return 0;
+
+	return skbprio_change(sch, opt, extack);
+}
+
+static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct tc_skbprio_qopt opt;
+
+	opt.limit = sch->limit;
+
+	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+		return -1;
+
+	return skb->len;
+}
+
+static void skbprio_reset(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	sch->qstats.backlog = 0;
+	sch->q.qlen = 0;
+
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_purge(&q->qdiscs[prio]);
+
+	memset(&q->qstats, 0, sizeof(q->qstats));
+	q->highest_prio = 0;
+	q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static void skbprio_destroy(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_purge(&q->qdiscs[prio]);
+}
+
+static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				   struct gnet_dump *d)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
+		q->qstats[cl - 1].qlen) < 0)
+		return -1;
+	return 0;
+}
+
+static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, i + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops skbprio_class_ops = {
+	.leaf		=	skbprio_leaf,
+	.find		=	skbprio_find,
+	.dump		=	skbprio_dump_class,
+	.dump_stats	=	skbprio_dump_class_stats,
+	.walk		=	skbprio_walk,
+};
+
+static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
+	.cl_ops		=	&skbprio_class_ops,
+	.id		=	"skbprio",
+	.priv_size	=	sizeof(struct skbprio_sched_data),
+	.enqueue	=	skbprio_enqueue,
+	.dequeue	=	skbprio_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	skbprio_init,
+	.reset		=	skbprio_reset,
+	.change		=	skbprio_change,
+	.dump		=	skbprio_dump,
+	.destroy	=	skbprio_destroy,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init skbprio_module_init(void)
+{
+	return register_qdisc(&skbprio_qdisc_ops);
+}
+
+static void __exit skbprio_module_exit(void)
+{
+	unregister_qdisc(&skbprio_qdisc_ops);
+}
+
+module_init(skbprio_module_init)
+module_exit(skbprio_module_exit)
+
+MODULE_LICENSE("GPL");
-- 
1.9.1

             reply	other threads:[~2018-07-23 15:09 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-23 14:07 Nishanth Devarajan [this message]
2018-07-23 20:47 ` [PATCH v5 net-next] net/sched: add skbprio scheduler Cong Wang
2018-07-24 21:43 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180723140735.GA6935@gmail.com \
    --to=ndev2021@gmail.com \
    --cc=davem@davemloft.net \
    --cc=doucette@bu.edu \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=michel@digirati.com.br \
    --cc=netdev@vger.kernel.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.