netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* SFQ: backport some features from ESFQ (try 2)
@ 2007-07-30  0:21 Corey Hickey
  2007-07-30  0:21 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
                   ` (7 more replies)
  0 siblings, 8 replies; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev

Hello,

Patchset try 2 addresses the review by Michael Buesch.

This set of patches adds some of ESFQ's modifications to the original 
SFQ. Thus far, I have received support for this approach rather than for 
trying to get ESFQ included as a separate qdisc.

http://mailman.ds9a.nl/pipermail/lartc/2007q2/021056.html

My patches here implement "tc qdisc change", user-configurable depth 
(number of flows), and user-configurable divisor (for setting hash table 
size). I've left out the remaining ESFQ features (usage of jhash and 
different hashing methods) because Patrick McHardy intends to submit a 
patch that will supersede that functionality; see the URL above.

Default values remain the same, and SFQ's default behavior remains the 
same, so there should be no user disruption.

A patch for iproute2 is included after the end of the kernel patch series.

Thanks for your consideration,
Corey


 include/linux/pkt_sched.h |    8 --
 net/sched/sch_sfq.c       |  296 ++++++++++++++++++++++++++++-----------------
 2 files changed, 187 insertions(+), 117 deletions(-)

[PATCH 1/7] Preparatory refactoring part 1.
[PATCH 2/7] Preparatory refactoring part 2.
[PATCH 3/7] Move two functions.
[PATCH 4/7] Add "depth".
[PATCH 5/7] Add divisor.
[PATCH 6/7] Make qdisc changeable.
[PATCH 7/7] Remove comments about hardcoded values.
[PATCH] [iproute2] SFQ: Support changing depth and divisor.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/7] Preparatory refactoring part 1.
  2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
@ 2007-07-30  0:21 ` Corey Hickey
  2007-07-30 13:51   ` Patrick McHardy
  2007-07-30  0:21 ` [PATCH 2/7] Preparatory refactoring part 2 Corey Hickey
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev; +Cc: Corey Hickey

Make a new function sfq_q_enqueue() that operates directly on the
queue data. This will be useful for implementing sfq_change() in
a later patch. A pleasant side-effect is reducing most of the
duplicate code in sfq_enqueue() and sfq_requeue().

Similarly, make a new function sfq_q_dequeue().

Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
 net/sched/sch_sfq.c |   70 ++++++++++++++++++++++++++------------------------
 1 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 9579573..8ae077f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -77,6 +77,9 @@
 #define SFQ_DEPTH		128
 #define SFQ_HASH_DIVISOR	1024
 
+#define SFQ_HEAD 0
+#define SFQ_TAIL 1
+
 /* This type should contain at least SFQ_DEPTH*2 values */
 typedef unsigned char sfq_index;
 
@@ -244,10 +247,8 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 	return 0;
 }
 
-static int
-sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data *q, unsigned int end)
 {
-	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned hash = sfq_hash(q, skb);
 	sfq_index x;
 
@@ -256,8 +257,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
 		q->hash[x] = hash;
 	}
-	sch->qstats.backlog += skb->len;
-	__skb_queue_tail(&q->qs[x], skb);
+
+	if (end == SFQ_TAIL)
+		__skb_queue_tail(&q->qs[x], skb);
+	else
+		__skb_queue_head(&q->qs[x], skb);
+
 	sfq_inc(q, x);
 	if (q->qs[x].qlen == 1) {		/* The flow is new */
 		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
@@ -270,12 +275,21 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 			q->tail = x;
 		}
 	}
+}
+
+static int
+sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	sfq_q_enqueue(skb, q, SFQ_TAIL);
+	sch->qstats.backlog += skb->len;
 	if (++sch->q.qlen < q->limit-1) {
 		sch->bstats.bytes += skb->len;
 		sch->bstats.packets++;
 		return 0;
 	}
 
+	sch->qstats.drops++;
 	sfq_drop(sch);
 	return NET_XMIT_CN;
 }
@@ -284,28 +298,8 @@ static int
 sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned hash = sfq_hash(q, skb);
-	sfq_index x;
-
-	x = q->ht[hash];
-	if (x == SFQ_DEPTH) {
-		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
-		q->hash[x] = hash;
-	}
+	sfq_q_enqueue(skb, q, SFQ_HEAD);
 	sch->qstats.backlog += skb->len;
-	__skb_queue_head(&q->qs[x], skb);
-	sfq_inc(q, x);
-	if (q->qs[x].qlen == 1) {		/* The flow is new */
-		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
-			q->tail = x;
-			q->next[x] = x;
-			q->allot[x] = q->quantum;
-		} else {
-			q->next[x] = q->next[q->tail];
-			q->next[q->tail] = x;
-			q->tail = x;
-		}
-	}
 	if (++sch->q.qlen < q->limit - 1) {
 		sch->qstats.requeues++;
 		return 0;
@@ -316,13 +310,8 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	return NET_XMIT_CN;
 }
 
-
-
-
-static struct sk_buff *
-sfq_dequeue(struct Qdisc* sch)
+static struct sk_buff *sfq_q_dequeue(struct sfq_sched_data *q)
 {
-	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 	sfq_index a, old_a;
 
@@ -335,8 +324,6 @@ sfq_dequeue(struct Qdisc* sch)
 	/* Grab packet */
 	skb = __skb_dequeue(&q->qs[a]);
 	sfq_dec(q, a);
-	sch->q.qlen--;
-	sch->qstats.backlog -= skb->len;
 
 	/* Is the slot empty? */
 	if (q->qs[a].qlen == 0) {
@@ -353,6 +340,21 @@ sfq_dequeue(struct Qdisc* sch)
 		a = q->next[a];
 		q->allot[a] += q->quantum;
 	}
+
+	return skb;
+}
+
+static struct sk_buff
+*sfq_dequeue(struct Qdisc* sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	skb = sfq_q_dequeue(q);
+	if (skb == NULL)
+		return NULL;
+	sch->q.qlen--;
+	sch->qstats.backlog -= skb->len;
 	return skb;
 }
 
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/7] Preparatory refactoring part 2.
  2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
  2007-07-30  0:21 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
@ 2007-07-30  0:21 ` Corey Hickey
  2007-07-30 13:59   ` Patrick McHardy
  2007-07-30  0:21 ` [PATCH 3/7] Move two functions Corey Hickey
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev; +Cc: Corey Hickey

Factor code out of sfq_init() and sfq_destroy(), again so that the
new functions can be used by sfq_change() later.

Actually, as the diff itself shows, most of the sfq_q_init() code
comes from the original sfq_change(), but sfq_change() is only
called by sfq_init() right now. Thus, it is safe to remove
sfq_change(); "tc qdisc change" doesn't yet work for sfq anyway.

The sfq_destroy() --> sfq_q_destroy() change looks pointless here,
but it's cleaner to split now and add code to sfq_q_destroy() in a
later patch.

Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
 net/sched/sch_sfq.c |   80 +++++++++++++++++++++++++-------------------------
 1 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8ae077f..0c46938 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -380,71 +380,71 @@ static void sfq_perturbation(unsigned long arg)
 	}
 }
 
-static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
 {
-	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct tc_sfq_qopt *ctl = RTA_DATA(opt);
-	unsigned int qlen;
+	int i;
 
-	if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+	if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
 		return -EINVAL;
 
-	sch_tree_lock(sch);
-	q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
-	q->perturb_period = ctl->perturb_period*HZ;
-	if (ctl->limit)
-		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+	q->perturbation = 0;
+	q->max_depth = 0;
+	q->tail = q->limit = SFQ_DEPTH;
+	if (opt == NULL) {
+		q->perturb_period = 0;
+	} else {
+		struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+		if (ctl->quantum)
+			q->quantum = ctl->quantum;
+		q->perturb_period = ctl->perturb_period*HZ;
 
-	qlen = sch->q.qlen;
-	while (sch->q.qlen >= q->limit-1)
-		sfq_drop(sch);
-	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+		if (ctl->limit)
+			q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+	}
 
-	del_timer(&q->perturb_timer);
-	if (q->perturb_period) {
-		q->perturb_timer.expires = jiffies + q->perturb_period;
-		add_timer(&q->perturb_timer);
+	for (i=0; i<SFQ_HASH_DIVISOR; i++)
+		q->ht[i] = SFQ_DEPTH;
+	for (i=0; i<SFQ_DEPTH; i++) {
+		skb_queue_head_init(&q->qs[i]);
+		q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
+		q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
 	}
-	sch_tree_unlock(sch);
+
+	for (i=0; i<SFQ_DEPTH; i++)
+		sfq_link(q, i);
 	return 0;
 }
 
 static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	int i;
+	int err;
+	
+	q->quantum = psched_mtu(sch->dev); /* default */
+	if ((err = sfq_q_init(q, opt)))
+		return err;
 
 	init_timer(&q->perturb_timer);
 	q->perturb_timer.data = (unsigned long)sch;
 	q->perturb_timer.function = sfq_perturbation;
-
-	for (i=0; i<SFQ_HASH_DIVISOR; i++)
-		q->ht[i] = SFQ_DEPTH;
-	for (i=0; i<SFQ_DEPTH; i++) {
-		skb_queue_head_init(&q->qs[i]);
-		q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
-		q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
-	}
-	q->limit = SFQ_DEPTH;
-	q->max_depth = 0;
-	q->tail = SFQ_DEPTH;
-	if (opt == NULL) {
-		q->quantum = psched_mtu(sch->dev);
-		q->perturb_period = 0;
-	} else {
-		int err = sfq_change(sch, opt);
-		if (err)
-			return err;
+	if (q->perturb_period) {
+		q->perturb_timer.expires = jiffies + q->perturb_period;
+		add_timer(&q->perturb_timer);
 	}
-	for (i=0; i<SFQ_DEPTH; i++)
-		sfq_link(q, i);
+
 	return 0;
 }
 
+static void sfq_q_destroy(struct sfq_sched_data *q)
+{
+	del_timer(&q->perturb_timer);
+}
+
 static void sfq_destroy(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	del_timer(&q->perturb_timer);
+	sfq_q_destroy(q);
 }
 
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/7] Move two functions.
  2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
  2007-07-30  0:21 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
  2007-07-30  0:21 ` [PATCH 2/7] Preparatory refactoring part 2 Corey Hickey
@ 2007-07-30  0:21 ` Corey Hickey
  2007-07-30  0:21 ` [PATCH 4/7] Add "depth" Corey Hickey
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev; +Cc: Corey Hickey

Move sfq_q_destroy() to above sfq_q_init() so that it can be used
by an error case in a later patch.

Move sfq_destroy() as well, for clarity.

Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
 net/sched/sch_sfq.c |   24 ++++++++++++------------
 1 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 0c46938..583f925 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -380,6 +380,17 @@ static void sfq_perturbation(unsigned long arg)
 	}
 }
 
+static void sfq_q_destroy(struct sfq_sched_data *q)
+{
+	del_timer(&q->perturb_timer);
+}
+
+static void sfq_destroy(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	sfq_q_destroy(q);
+}
+
 static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
 {
 	struct tc_sfq_qopt *ctl = RTA_DATA(opt);
@@ -420,7 +431,7 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	int err;
-	
+
 	q->quantum = psched_mtu(sch->dev); /* default */
 	if ((err = sfq_q_init(q, opt)))
 		return err;
@@ -436,17 +447,6 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 	return 0;
 }
 
-static void sfq_q_destroy(struct sfq_sched_data *q)
-{
-	del_timer(&q->perturb_timer);
-}
-
-static void sfq_destroy(struct Qdisc *sch)
-{
-	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_q_destroy(q);
-}
-
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/7] Add "depth".
  2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
                   ` (2 preceding siblings ...)
  2007-07-30  0:21 ` [PATCH 3/7] Move two functions Corey Hickey
@ 2007-07-30  0:21 ` Corey Hickey
  2007-07-30  0:21 ` [PATCH 5/7] Add divisor Corey Hickey
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev; +Cc: Corey Hickey

Make "depth" (number of queues) user-configurable:

* replace #define with a parameter
* use old hardcoded value as a default
* kcalloc() arrays in sfq_q_init()
* free() arrays in sfq_q_destroy()

Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
 net/sched/sch_sfq.c |   81 +++++++++++++++++++++++++++++++++++----------------
 1 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 583f925..70124ac 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -74,14 +74,16 @@
 
 	It is easy to increase these values, but not in flight.  */
 
-#define SFQ_DEPTH		128
+#define SFQ_DEPTH_DEFAULT	128
 #define SFQ_HASH_DIVISOR	1024
 
 #define SFQ_HEAD 0
 #define SFQ_TAIL 1
 
-/* This type should contain at least SFQ_DEPTH*2 values */
-typedef unsigned char sfq_index;
+/* This type must contain greater than depth*2 values, so depth is constrained 
+ * accordingly. */
+typedef unsigned int sfq_index;
+#define SFQ_MAX_DEPTH (UINT_MAX / 2 - 1)
 
 struct sfq_head
 {
@@ -95,6 +97,7 @@ struct sfq_sched_data
 	int		perturb_period;
 	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
 	int		limit;
+	unsigned	depth;
 
 /* Variables */
 	struct timer_list perturb_timer;
@@ -103,11 +106,11 @@ struct sfq_sched_data
 	sfq_index	max_depth;	/* Maximal depth */
 
 	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
-	sfq_index	next[SFQ_DEPTH];	/* Active slots link */
-	short		allot[SFQ_DEPTH];	/* Current allotment per slot */
-	unsigned short	hash[SFQ_DEPTH];	/* Hash value indexed by slots */
-	struct sk_buff_head	qs[SFQ_DEPTH];		/* Slot queue */
-	struct sfq_head	dep[SFQ_DEPTH*2];	/* Linked list of slots, indexed by depth */
+	sfq_index	*next;			/* Active slots link */
+	short		*allot;			/* Current allotment per slot */
+	unsigned short	*hash;			/* Hash value indexed by slots */
+	struct sk_buff_head	*qs;		/* Slot queue */
+	struct sfq_head	*dep;			/* Linked list of slots, indexed by depth */
 };
 
 static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
@@ -164,7 +167,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
-	int d = q->qs[x].qlen + SFQ_DEPTH;
+	int d = q->qs[x].qlen + q->depth;
 
 	p = d;
 	n = q->dep[d].next;
@@ -215,7 +218,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 	   drop a packet from it */
 
 	if (d > 1) {
-		sfq_index x = q->dep[d+SFQ_DEPTH].next;
+		sfq_index x = q->dep[d + q->depth].next;
 		skb = q->qs[x].prev;
 		len = skb->len;
 		__skb_unlink(skb, &q->qs[x]);
@@ -238,7 +241,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 		kfree_skb(skb);
 		sfq_dec(q, d);
 		sch->q.qlen--;
-		q->ht[q->hash[d]] = SFQ_DEPTH;
+		q->ht[q->hash[d]] = q->depth;
 		sch->qstats.drops++;
 		sch->qstats.backlog -= len;
 		return len;
@@ -253,8 +256,8 @@ static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data *q, unsigne
 	sfq_index x;
 
 	x = q->ht[hash];
-	if (x == SFQ_DEPTH) {
-		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+	if (x == q->depth) {
+		q->ht[hash] = x = q->dep[q->depth].next;
 		q->hash[x] = hash;
 	}
 
@@ -265,7 +268,7 @@ static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data *q, unsigne
 
 	sfq_inc(q, x);
 	if (q->qs[x].qlen == 1) {		/* The flow is new */
-		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
+		if (q->tail == q->depth) {	/* It is the first flow */
 			q->tail = x;
 			q->next[x] = x;
 			q->allot[x] = q->quantum;
@@ -316,7 +319,7 @@ static struct sk_buff *sfq_q_dequeue(struct sfq_sched_data *q)
 	sfq_index a, old_a;
 
 	/* No active slots */
-	if (q->tail == SFQ_DEPTH)
+	if (q->tail == q->depth)
 		return NULL;
 
 	a = old_a = q->next[q->tail];
@@ -327,10 +330,10 @@ static struct sk_buff *sfq_q_dequeue(struct sfq_sched_data *q)
 
 	/* Is the slot empty? */
 	if (q->qs[a].qlen == 0) {
-		q->ht[q->hash[a]] = SFQ_DEPTH;
+		q->ht[q->hash[a]] = q->depth;
 		a = q->next[a];
 		if (a == old_a) {
-			q->tail = SFQ_DEPTH;
+			q->tail = q->depth;
 			return skb;
 		}
 		q->next[q->tail] = a;
@@ -383,6 +386,11 @@ static void sfq_perturbation(unsigned long arg)
 static void sfq_q_destroy(struct sfq_sched_data *q)
 {
 	del_timer(&q->perturb_timer);
+	kfree(q->dep);
+	kfree(q->next);
+	kfree(q->allot);
+	kfree(q->hash);
+	kfree(q->qs);
 }
 
 static void sfq_destroy(struct Qdisc *sch)
@@ -401,30 +409,53 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
 
 	q->perturbation = 0;
 	q->max_depth = 0;
-	q->tail = q->limit = SFQ_DEPTH;
 	if (opt == NULL) {
 		q->perturb_period = 0;
+		q->tail = q->limit = q->depth = SFQ_DEPTH_DEFAULT;
 	} else {
 		struct tc_sfq_qopt *ctl = RTA_DATA(opt);
 		if (ctl->quantum)
 			q->quantum = ctl->quantum;
 		q->perturb_period = ctl->perturb_period*HZ;
+		q->tail = q->limit = q->depth = ctl->flows ? : SFQ_DEPTH_DEFAULT;
+
+		if (q->depth > SFQ_MAX_DEPTH)
+			return -EINVAL;
 
 		if (ctl->limit)
-			q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+			q->limit = min_t(u32, ctl->limit, q->depth);
 	}
 
+	q->dep = kcalloc(1 + q->depth*2, sizeof(struct sfq_head), GFP_KERNEL);
+	if (!q->dep)
+		goto err_case;
+	q->next = kcalloc(q->depth, sizeof(sfq_index), GFP_KERNEL);
+	if (!q->next)
+		goto err_case;
+	q->allot = kcalloc(q->depth, sizeof(short), GFP_KERNEL);
+	if (!q->allot)
+		goto err_case;
+	q->hash = kcalloc(q->depth, sizeof(unsigned short), GFP_KERNEL);
+	if (!q->hash)
+		goto err_case;
+	q->qs = kcalloc(q->depth, sizeof(struct sk_buff_head), GFP_KERNEL);
+	if (!q->qs)
+		goto err_case;
+
 	for (i=0; i<SFQ_HASH_DIVISOR; i++)
-		q->ht[i] = SFQ_DEPTH;
-	for (i=0; i<SFQ_DEPTH; i++) {
+		q->ht[i] = q->depth;
+	for (i=0; i < q->depth; i++) {
 		skb_queue_head_init(&q->qs[i]);
-		q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
-		q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
+		q->dep[i + q->depth].next = i + q->depth;
+		q->dep[i + q->depth].prev = i + q->depth;
 	}
 
-	for (i=0; i<SFQ_DEPTH; i++)
+	for (i=0; i < q->depth; i++)
 		sfq_link(q, i);
 	return 0;
+err_case:
+	sfq_q_destroy(q);
+	return -ENOBUFS;
 }
 
 static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
@@ -458,7 +489,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	opt.limit = q->limit;
 	opt.divisor = SFQ_HASH_DIVISOR;
-	opt.flows = q->limit;
+	opt.flows = q->depth;
 
 	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/7] Add divisor.
  2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
                   ` (3 preceding siblings ...)
  2007-07-30  0:21 ` [PATCH 4/7] Add "depth" Corey Hickey
@ 2007-07-30  0:21 ` Corey Hickey
  2007-07-30  0:21 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev; +Cc: Corey Hickey

Make hash divisor user-configurable.

Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
 net/sched/sch_sfq.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 70124ac..e6a6a21 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -75,7 +75,7 @@
 	It is easy to increase these values, but not in flight.  */
 
 #define SFQ_DEPTH_DEFAULT	128
-#define SFQ_HASH_DIVISOR	1024
+#define SFQ_DIVISOR_DEFAULT	1024
 
 #define SFQ_HEAD 0
 #define SFQ_TAIL 1
@@ -98,6 +98,7 @@ struct sfq_sched_data
 	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
 	int		limit;
 	unsigned	depth;
+	unsigned	hash_divisor;
 
 /* Variables */
 	struct timer_list perturb_timer;
@@ -105,7 +106,7 @@ struct sfq_sched_data
 	sfq_index	tail;		/* Index of current slot in round */
 	sfq_index	max_depth;	/* Maximal depth */
 
-	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
+	sfq_index	*ht;			/* Hash table */
 	sfq_index	*next;			/* Active slots link */
 	short		*allot;			/* Current allotment per slot */
 	unsigned short	*hash;			/* Hash value indexed by slots */
@@ -120,7 +121,7 @@ static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1
 	/* Have we any rotation primitives? If not, WHY? */
 	h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
 	h ^= h>>10;
-	return h & 0x3FF;
+	return h & (q->hash_divisor-1);
 }
 
 static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
@@ -386,6 +387,7 @@ static void sfq_perturbation(unsigned long arg)
 static void sfq_q_destroy(struct sfq_sched_data *q)
 {
 	del_timer(&q->perturb_timer);
+	kfree(q->ht);
 	kfree(q->dep);
 	kfree(q->next);
 	kfree(q->allot);
@@ -411,12 +413,14 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
 	q->max_depth = 0;
 	if (opt == NULL) {
 		q->perturb_period = 0;
+		q->hash_divisor = SFQ_DIVISOR_DEFAULT;
 		q->tail = q->limit = q->depth = SFQ_DEPTH_DEFAULT;
 	} else {
 		struct tc_sfq_qopt *ctl = RTA_DATA(opt);
 		if (ctl->quantum)
 			q->quantum = ctl->quantum;
 		q->perturb_period = ctl->perturb_period*HZ;
+		q->hash_divisor = ctl->divisor ? : SFQ_DIVISOR_DEFAULT;
 		q->tail = q->limit = q->depth = ctl->flows ? : SFQ_DEPTH_DEFAULT;
 
 		if (q->depth > SFQ_MAX_DEPTH)
@@ -426,6 +430,9 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
 			q->limit = min_t(u32, ctl->limit, q->depth);
 	}
 
+	q->ht = kcalloc(q->hash_divisor, sizeof(sfq_index), GFP_KERNEL);
+	if (!q->ht)
+		goto err_case;
 	q->dep = kcalloc(1 + q->depth*2, sizeof(struct sfq_head), GFP_KERNEL);
 	if (!q->dep)
 		goto err_case;
@@ -442,7 +449,7 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
 	if (!q->qs)
 		goto err_case;
 
-	for (i=0; i<SFQ_HASH_DIVISOR; i++)
+	for (i=0; i<q->hash_divisor; i++)
 		q->ht[i] = q->depth;
 	for (i=0; i < q->depth; i++) {
 		skb_queue_head_init(&q->qs[i]);
@@ -488,7 +495,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.perturb_period = q->perturb_period/HZ;
 
 	opt.limit = q->limit;
-	opt.divisor = SFQ_HASH_DIVISOR;
+	opt.divisor = q->hash_divisor;
 	opt.flows = q->depth;
 
 	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 6/7] Make qdisc changeable.
  2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
                   ` (4 preceding siblings ...)
  2007-07-30  0:21 ` [PATCH 5/7] Add divisor Corey Hickey
@ 2007-07-30  0:21 ` Corey Hickey
  2007-07-30 14:11   ` Patrick McHardy
  2007-07-30  0:21 ` [PATCH 7/7] Remove comments about hardcoded values Corey Hickey
  2007-07-30  0:21 ` [PATCH] [iproute2] SFQ: Support changing depth and divisor Corey Hickey
  7 siblings, 1 reply; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev; +Cc: Corey Hickey

Re-implement sfq_change() and enable Qdisc_opts.change so "tc qdisc
change" will work.

Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
 net/sched/sch_sfq.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 50 insertions(+), 1 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e6a6a21..e042cd0 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -485,6 +485,55 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 	return 0;
 }
 
+static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sfq_sched_data tmp;
+	struct sk_buff *skb;
+	int err;
+	
+	/* set up tmp queue */
+	memset(&tmp, 0, sizeof(struct sfq_sched_data));
+	tmp.quantum = psched_mtu(sch->dev); /* default */
+	if ((err = sfq_q_init(&tmp, opt)))
+		return err;
+
+	/* copy packets from the old queue to the tmp queue */
+	sch_tree_lock(sch);
+	while (sch->q.qlen >= tmp.limit - 1)
+		sfq_drop(sch);
+	while ((skb = sfq_q_dequeue(q)) != NULL)
+		sfq_q_enqueue(skb, &tmp, SFQ_TAIL);
+	
+	/* clean up the old queue */
+	sfq_q_destroy(q);
+
+	/* copy elements of the tmp queue into the old queue */
+	q->perturb_period = tmp.perturb_period;
+	q->quantum        = tmp.quantum;
+	q->limit          = tmp.limit;
+	q->depth          = tmp.depth;
+	q->hash_divisor   = tmp.hash_divisor;
+	q->tail           = tmp.tail;
+	q->max_depth      = tmp.max_depth;
+	q->ht    = tmp.ht;
+	q->dep   = tmp.dep;
+	q->next  = tmp.next;
+	q->allot = tmp.allot;
+	q->hash  = tmp.hash;
+	q->qs    = tmp.qs;
+
+	/* finish up */
+	if (q->perturb_period) {
+		q->perturb_timer.expires = jiffies + q->perturb_period;
+		add_timer(&q->perturb_timer);
+	} else {
+		q->perturbation = 0;
+	}
+	sch_tree_unlock(sch);
+	return 0;
+}
+
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
@@ -519,7 +568,7 @@ static struct Qdisc_ops sfq_qdisc_ops = {
 	.init		=	sfq_init,
 	.reset		=	sfq_reset,
 	.destroy	=	sfq_destroy,
-	.change		=	NULL,
+	.change		=	sfq_change,
 	.dump		=	sfq_dump,
 	.owner		=	THIS_MODULE,
 };
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 7/7] Remove comments about hardcoded values.
  2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
                   ` (5 preceding siblings ...)
  2007-07-30  0:21 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
@ 2007-07-30  0:21 ` Corey Hickey
  2007-07-30  0:21 ` [PATCH] [iproute2] SFQ: Support changing depth and divisor Corey Hickey
  7 siblings, 0 replies; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev; +Cc: Corey Hickey

None of these are true anymore (hooray!).

Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
 include/linux/pkt_sched.h |    8 --------
 net/sched/sch_sfq.c       |   17 +++--------------
 2 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 268c515..58a0ea6 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -148,14 +148,6 @@ struct tc_sfq_qopt
 	unsigned	flows;		/* Maximal number of flows  */
 };
 
-/*
- *  NOTE: limit, divisor and flows are hardwired to code at the moment.
- *
- *	limit=flows=128, divisor=1024;
- *
- *	The only reason for this is efficiency, it is possible
- *	to change these parameters in compile time.
- */
 
 /* RED section */
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e042cd0..3890452 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -61,18 +61,7 @@
 
 	We still need true WFQ for top level CSZ, but using WFQ
 	for the best effort traffic is absolutely pointless:
-	SFQ is superior for this purpose.
-
-	IMPLEMENTATION:
-	This implementation limits maximal queue length to 128;
-	maximal mtu to 2^15-1; number of hash buckets to 1024.
-	The only goal of this restrictions was that all data
-	fit into one 4K page :-). Struct sfq_sched_data is
-	organized in anti-cache manner: all the data for a bucket
-	are scattered over different locations. This is not good,
-	but it allowed me to put it into 4K.
-
-	It is easy to increase these values, but not in flight.  */
+	SFQ is superior for this purpose. */
 
 #define SFQ_DEPTH_DEFAULT	128
 #define SFQ_DIVISOR_DEFAULT	1024
@@ -491,7 +480,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
 	struct sfq_sched_data tmp;
 	struct sk_buff *skb;
 	int err;
-	
+
 	/* set up tmp queue */
 	memset(&tmp, 0, sizeof(struct sfq_sched_data));
 	tmp.quantum = psched_mtu(sch->dev); /* default */
@@ -504,7 +493,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
 		sfq_drop(sch);
 	while ((skb = sfq_q_dequeue(q)) != NULL)
 		sfq_q_enqueue(skb, &tmp, SFQ_TAIL);
-	
+
 	/* clean up the old queue */
 	sfq_q_destroy(q);
 
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH] [iproute2] SFQ: Support changing depth and divisor.
  2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
                   ` (6 preceding siblings ...)
  2007-07-30  0:21 ` [PATCH 7/7] Remove comments about hardcoded values Corey Hickey
@ 2007-07-30  0:21 ` Corey Hickey
  7 siblings, 0 replies; 18+ messages in thread
From: Corey Hickey @ 2007-07-30  0:21 UTC (permalink / raw)
  To: netdev

This can safely be applied either before or after the kernel
patches because the tc_sfq_qopt struct is unchanged:

- old kernels will ignore the parameters from new iproute2
- new kernels will use the same default parameters
---
 include/linux/pkt_sched.h |    9 ---------
 tc/q_sfq.c                |   21 ++++++++++++++++++++-
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d10f353..37946d4 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -139,15 +139,6 @@ struct tc_sfq_qopt
 	unsigned	flows;		/* Maximal number of flows  */
 };
 
-/*
- *  NOTE: limit, divisor and flows are hardwired to code at the moment.
- *
- *	limit=flows=128, divisor=1024;
- *
- *	The only reason for this is efficiency, it is possible
- *	to change these parameters in compile time.
- */
-
 /* RED section */
 
 enum
diff --git a/tc/q_sfq.c b/tc/q_sfq.c
index 05385cf..7754db7 100644
--- a/tc/q_sfq.c
+++ b/tc/q_sfq.c
@@ -25,7 +25,7 @@
 
 static void explain(void)
 {
-	fprintf(stderr, "Usage: ... sfq [ limit NUMBER ] [ perturb SECS ] [ quantum BYTES ]\n");
+	fprintf(stderr, "Usage: ... sfq [ limit NUMBER ] [ depth FLOWS ] [ divisor HASHBITS ] [ perturb SECS ] [ quantum BYTES ]\n");
 }
 
 #define usage() return(-1)
@@ -63,6 +63,25 @@ static int sfq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl
 				return -1;
 			}
 			ok++;
+		} else if (strcmp(*argv, "depth") == 0) {
+			NEXT_ARG();
+			if (get_unsigned(&opt.flows, *argv, 0)) {
+				fprintf(stderr, "Illegal \"depth\"\n");
+				return -1;
+			}
+			ok++;
+		} else if (strcmp(*argv, "divisor") == 0) {
+			NEXT_ARG();
+			if (get_unsigned(&opt.divisor, *argv, 0)) {
+				fprintf(stderr, "Illegal \"divisor\"\n");
+				return -1;
+			}
+			if (opt.divisor >= 15) {
+				fprintf(stderr, "Illegal \"divisor\", must be < 15\n");
+				return -1;
+			}
+			opt.divisor = 1<<opt.divisor;
+			ok++;
 		} else if (strcmp(*argv, "help") == 0) {
 			explain();
 			return -1;
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/7] Preparatory refactoring part 1.
  2007-07-30  0:21 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
@ 2007-07-30 13:51   ` Patrick McHardy
  2007-07-31  1:26     ` Corey Hickey
  0 siblings, 1 reply; 18+ messages in thread
From: Patrick McHardy @ 2007-07-30 13:51 UTC (permalink / raw)
  To: Corey Hickey; +Cc: netdev

Corey Hickey wrote:
> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
> index 9579573..8ae077f 100644
> --- a/net/sched/sch_sfq.c
> +++ b/net/sched/sch_sfq.c
> @@ -77,6 +77,9 @@
>  #define SFQ_DEPTH		128
>  #define SFQ_HASH_DIVISOR	1024
>  
> +#define SFQ_HEAD 0
> +#define SFQ_TAIL 1
> +
>  /* This type should contain at least SFQ_DEPTH*2 values */
>  typedef unsigned char sfq_index;
>  
> @@ -244,10 +247,8 @@ static unsigned int sfq_drop(struct Qdisc *sch)
>  	return 0;
>  }
>  
> -static int
> -sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
> +static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data *q, unsigned int end)


Please make sure to break at 80 chars and to keep the style
in this file consistent (newline before function name).

>  {
> -	struct sfq_sched_data *q = qdisc_priv(sch);
>  	unsigned hash = sfq_hash(q, skb);
>  	sfq_index x;
>  
> @@ -256,8 +257,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
>  		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
>  		q->hash[x] = hash;
>  	}
> -	sch->qstats.backlog += skb->len;


Why not keep this instead of having both callers do it?

> -	__skb_queue_tail(&q->qs[x], skb);
> +
> +	if (end == SFQ_TAIL)
> +		__skb_queue_tail(&q->qs[x], skb);
> +	else
> +		__skb_queue_head(&q->qs[x], skb);
> +
>  	sfq_inc(q, x);
>  	if (q->qs[x].qlen == 1) {		/* The flow is new */
>  		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
> @@ -270,12 +275,21 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
>  			q->tail = x;
>  		}
>  	}
> +}
> +
> +static int
> +sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
> +{
> +	struct sfq_sched_data *q = qdisc_priv(sch);


newline please.

> +	sfq_q_enqueue(skb, q, SFQ_TAIL);
> +	sch->qstats.backlog += skb->len;
>  	if (++sch->q.qlen < q->limit-1) {
>  		sch->bstats.bytes += skb->len;
>  		sch->bstats.packets++;
>  		return 0;
>  	}
>  
> +	sch->qstats.drops++;


sfq_drop already increments this.

>  	sfq_drop(sch);
>  	return NET_XMIT_CN;
>  }
> @@ -284,28 +298,8 @@ static int
>  sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
>  {
>  	struct sfq_sched_data *q = qdisc_priv(sch);

newline please

> -	unsigned hash = sfq_hash(q, skb);
> -	sfq_index x;
> -
> -	x = q->ht[hash];
> -	if (x == SFQ_DEPTH) {
> -		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
> -		q->hash[x] = hash;
> -	}
> +	sfq_q_enqueue(skb, q, SFQ_HEAD);
>  	sch->qstats.backlog += skb->len;
> -	__skb_queue_head(&q->qs[x], skb);
> -	sfq_inc(q, x);
> -	if (q->qs[x].qlen == 1) {		/* The flow is new */
> -		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
> -			q->tail = x;
> -			q->next[x] = x;
> -			q->allot[x] = q->quantum;
> -		} else {
> -			q->next[x] = q->next[q->tail];
> -			q->next[q->tail] = x;
> -			q->tail = x;
> -		}
> -	}
>  	if (++sch->q.qlen < q->limit - 1) {
>  		sch->qstats.requeues++;
>  		return 0;
> @@ -316,13 +310,8 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
>  	return NET_XMIT_CN;
>  }
>  
> -
> -
> -
> -static struct sk_buff *
> -sfq_dequeue(struct Qdisc* sch)
> +static struct sk_buff *sfq_q_dequeue(struct sfq_sched_data *q)


Keep style consistent please.

>  {
> -	struct sfq_sched_data *q = qdisc_priv(sch);
>  	struct sk_buff *skb;
>  	sfq_index a, old_a;
>  
> @@ -335,8 +324,6 @@ sfq_dequeue(struct Qdisc* sch)
>  	/* Grab packet */
>  	skb = __skb_dequeue(&q->qs[a]);
>  	sfq_dec(q, a);
> -	sch->q.qlen--;
> -	sch->qstats.backlog -= skb->len;
>  
>  	/* Is the slot empty? */
>  	if (q->qs[a].qlen == 0) {
> @@ -353,6 +340,21 @@ sfq_dequeue(struct Qdisc* sch)
>  		a = q->next[a];
>  		q->allot[a] += q->quantum;
>  	}
> +
> +	return skb;
> +}
> +
> +static struct sk_buff
> +*sfq_dequeue(struct Qdisc* sch)
> +{
> +	struct sfq_sched_data *q = qdisc_priv(sch);
> +	struct sk_buff *skb;
> +
> +	skb = sfq_q_dequeue(q);
> +	if (skb == NULL)
> +		return NULL;
> +	sch->q.qlen--;
> +	sch->qstats.backlog -= skb->len;
>  	return skb;
>  }
>  


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/7] Preparatory refactoring part 2.
  2007-07-30  0:21 ` [PATCH 2/7] Preparatory refactoring part 2 Corey Hickey
@ 2007-07-30 13:59   ` Patrick McHardy
  2007-07-31  7:43     ` Corey Hickey
  0 siblings, 1 reply; 18+ messages in thread
From: Patrick McHardy @ 2007-07-30 13:59 UTC (permalink / raw)
  To: Corey Hickey; +Cc: netdev

Corey Hickey wrote:
> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
> index 8ae077f..0c46938 100644
> --- a/net/sched/sch_sfq.c
> +++ b/net/sched/sch_sfq.c
> @@ -380,71 +380,71 @@ static void sfq_perturbation(unsigned long arg)
>  	}
>  }
>  
> -static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
> +static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
>  {
> -	struct sfq_sched_data *q = qdisc_priv(sch);
>  	struct tc_sfq_qopt *ctl = RTA_DATA(opt);
> -	unsigned int qlen;
> +	int i;
>  
> -	if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
> +	if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))


opt is dereferenced above (RTA_DATA), so if it is NULL we've already
crashed.

>  		return -EINVAL;
>  
> -	sch_tree_lock(sch);
> -	q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
> -	q->perturb_period = ctl->perturb_period*HZ;
> -	if (ctl->limit)
> -		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
> +	q->perturbation = 0;
> +	q->max_depth = 0;
> +	q->tail = q->limit = SFQ_DEPTH;
> +	if (opt == NULL) {
> +		q->perturb_period = 0;
> +	} else {
> +		struct tc_sfq_qopt *ctl = RTA_DATA(opt);
> +		if (ctl->quantum)
> +			q->quantum = ctl->quantum;
> +		q->perturb_period = ctl->perturb_period*HZ;
>  
> -	qlen = sch->q.qlen;
> -	while (sch->q.qlen >= q->limit-1)
> -		sfq_drop(sch);
> -	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);


I hope that patch that makes changing possible brings this back ..
<checking> .. it doesn't. Please either keep this or fix up 6/7
to bring it back.

> +		if (ctl->limit)
> +			q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
> +	}
>  
> -	del_timer(&q->perturb_timer);
> -	if (q->perturb_period) {
> -		q->perturb_timer.expires = jiffies + q->perturb_period;
> -		add_timer(&q->perturb_timer);
> +	for (i=0; i<SFQ_HASH_DIVISOR; i++)
> +		q->ht[i] = SFQ_DEPTH;
> +	for (i=0; i<SFQ_DEPTH; i++) {
> +		skb_queue_head_init(&q->qs[i]);
> +		q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
> +		q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
>  	}
> -	sch_tree_unlock(sch);
> +
> +	for (i=0; i<SFQ_DEPTH; i++)
> +		sfq_link(q, i);
>  	return 0;
>  }
>  

> +static void sfq_q_destroy(struct sfq_sched_data *q)
> +{
> +	del_timer(&q->perturb_timer);
> +}
> +
>  static void sfq_destroy(struct Qdisc *sch)
>  {
>  	struct sfq_sched_data *q = qdisc_priv(sch);
> -	del_timer(&q->perturb_timer);
> +	sfq_q_destroy(q);
>  }

That really does look a bit pointless.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/7] Make qdisc changeable.
  2007-07-30  0:21 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
@ 2007-07-30 14:11   ` Patrick McHardy
  2007-07-31  7:43     ` Corey Hickey
  2007-08-06  2:47     ` Corey Hickey
  0 siblings, 2 replies; 18+ messages in thread
From: Patrick McHardy @ 2007-07-30 14:11 UTC (permalink / raw)
  To: Corey Hickey; +Cc: netdev

Corey Hickey wrote:
> Re-implement sfq_change() and enable Qdisc_opts.change so "tc qdisc
> change" will work.
> 
> Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
> ---
>  net/sched/sch_sfq.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 50 insertions(+), 1 deletions(-)
> 
> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
> index e6a6a21..e042cd0 100644
> --- a/net/sched/sch_sfq.c
> +++ b/net/sched/sch_sfq.c
> @@ -485,6 +485,55 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
>  	return 0;
>  }
>  
> +static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
> +{
> +	struct sfq_sched_data *q = qdisc_priv(sch);
> +	struct sfq_sched_data tmp;
> +	struct sk_buff *skb;
> +	int err;
> +	
> +	/* set up tmp queue */
> +	memset(&tmp, 0, sizeof(struct sfq_sched_data));
> +	tmp.quantum = psched_mtu(sch->dev); /* default */


If no value is given it should use the old value instead of
reinitializing to the default.

> +	if ((err = sfq_q_init(&tmp, opt)))
> +		return err;


This will also use defaults for all unspecified values. It would
be more consistent with other qdiscs to only change those values
that are actually specified, so something like "tc qdisc change ...
perturb 10" will *only* change the perturbation parameter.
I'm not sure reusing the initialization function and copying the
parameters is the best way to do this.

> +
> +	/* copy packets from the old queue to the tmp queue */
> +	sch_tree_lock(sch);
> +	while (sch->q.qlen >= tmp.limit - 1)
> +		sfq_drop(sch);
> +	while ((skb = sfq_q_dequeue(q)) != NULL)
> +		sfq_q_enqueue(skb, &tmp, SFQ_TAIL);
> +	
> +	/* clean up the old queue */
> +	sfq_q_destroy(q);
> +
> +	/* copy elements of the tmp queue into the old queue */
> +	q->perturb_period = tmp.perturb_period;
> +	q->quantum        = tmp.quantum;
> +	q->limit          = tmp.limit;
> +	q->depth          = tmp.depth;
> +	q->hash_divisor   = tmp.hash_divisor;
> +	q->tail           = tmp.tail;
> +	q->max_depth      = tmp.max_depth;
> +	q->ht    = tmp.ht;
> +	q->dep   = tmp.dep;
> +	q->next  = tmp.next;
> +	q->allot = tmp.allot;
> +	q->hash  = tmp.hash;
> +	q->qs    = tmp.qs;
> +
> +	/* finish up */
> +	if (q->perturb_period) {
> +		q->perturb_timer.expires = jiffies + q->perturb_period;
> +		add_timer(&q->perturb_timer);
> +	} else {
> +		q->perturbation = 0;
> +	}
> +	sch_tree_unlock(sch);
> +	return 0;
> +}
> +

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/7] Preparatory refactoring part 1.
  2007-07-30 13:51   ` Patrick McHardy
@ 2007-07-31  1:26     ` Corey Hickey
  2007-07-31 10:46       ` Patrick McHardy
  0 siblings, 1 reply; 18+ messages in thread
From: Corey Hickey @ 2007-07-31  1:26 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

Patrick McHardy wrote:
>> -static int
>> -sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
>> +static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data *q, unsigned int end)
> 
> 
> Please make sure to break at 80 chars and to keep the style
> in this file consistent (newline before function name).

Ok. For what it's worth, though, most of the original functions in the 
file don't have a newline before the function name. Omitting the newline 
  would thus make the new/changed functions more consistent with the 
rest of the file. I don't have a preference either way, so unless you 
change your mind I'll put the newline back in..

>>  {
>> -	struct sfq_sched_data *q = qdisc_priv(sch);
>>  	unsigned hash = sfq_hash(q, skb);
>>  	sfq_index x;
>>  
>> @@ -256,8 +257,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
>>  		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
>>  		q->hash[x] = hash;
>>  	}
>> -	sch->qstats.backlog += skb->len;
> 
> Why not keep this instead of having both callers do it?

My idea was to have all the sfq_q_* functions operate on "struct 
sfq_sched_data" and have no knowledge of the "struct Qdisc". I did this 
in order to be able to use the new functions in sfq_change() when the 
temporary sfq_sched_data doesn't have a parent Qdisc.

There's probably a better way, and I am of course open to suggestions, 
but what I did made sense to me.

>> -	__skb_queue_tail(&q->qs[x], skb);
>> +
>> +	if (end == SFQ_TAIL)
>> +		__skb_queue_tail(&q->qs[x], skb);
>> +	else
>> +		__skb_queue_head(&q->qs[x], skb);
>> +
>>  	sfq_inc(q, x);
>>  	if (q->qs[x].qlen == 1) {		/* The flow is new */
>>  		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
>> @@ -270,12 +275,21 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
>>  			q->tail = x;
>>  		}
>>  	}
>> +}
>> +
>> +static int
>> +sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
>> +{
>> +	struct sfq_sched_data *q = qdisc_priv(sch);
> 
> newline please.

Ok.

>> +	sfq_q_enqueue(skb, q, SFQ_TAIL);
>> +	sch->qstats.backlog += skb->len;
>>  	if (++sch->q.qlen < q->limit-1) {
>>  		sch->bstats.bytes += skb->len;
>>  		sch->bstats.packets++;
>>  		return 0;
>>  	}
>>  
>> +	sch->qstats.drops++;
> 
> sfq_drop already increments this.

You're right, of course. When I look at the original sfq_requeue(), 
though, I see that same line right before sfq_drop(). That's probably 
why it ended up in my patch. Is that a bug? The original sfq_enqueue() 
doesn't have that line.

http://git.kernel.org/?p=linux/kernel/git/jgarzik/netdev-2.6.git;a=blob;f=net/sched/sch_sfq.c
line 314

>>  	sfq_drop(sch);
>>  	return NET_XMIT_CN;
>>  }
>> @@ -284,28 +298,8 @@ static int
>>  sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
>>  {
>>  	struct sfq_sched_data *q = qdisc_priv(sch);
> 
> newline please

Ok.

>> -static struct sk_buff *
>> -sfq_dequeue(struct Qdisc* sch)
>> +static struct sk_buff *sfq_q_dequeue(struct sfq_sched_data *q)
> 
> 
> Keep style consistent please.

Ok.

Thank you for the review. I'll address the rest of your comments when I 
have time later.

-Corey

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/7] Preparatory refactoring part 2.
  2007-07-30 13:59   ` Patrick McHardy
@ 2007-07-31  7:43     ` Corey Hickey
  0 siblings, 0 replies; 18+ messages in thread
From: Corey Hickey @ 2007-07-31  7:43 UTC (permalink / raw)
  To: Patrick McHardy, Linux Netdev List

Patrick McHardy wrote:
> Corey Hickey wrote:
>> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
>> index 8ae077f..0c46938 100644
>> --- a/net/sched/sch_sfq.c
>> +++ b/net/sched/sch_sfq.c
>> @@ -380,71 +380,71 @@ static void sfq_perturbation(unsigned long arg)
>>  	}
>>  }
>>  
>> -static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
>> +static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
>>  {
>> -	struct sfq_sched_data *q = qdisc_priv(sch);
>>  	struct tc_sfq_qopt *ctl = RTA_DATA(opt);
>> -	unsigned int qlen;
>> +	int i;
>>  
>> -	if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
>> +	if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
> 
> 
> opt is dereferenced above (RTA_DATA), so if it is NULL we've already
> crashed.

I think that test made ESFQ not crash when I did a "qdisc change" 
without giving any parameters, but that was a while ago and I might be 
mistaken. I'll need to rewrite much of this function anyway, and I'll 
pay attention to what happens when I get there.

>>  		return -EINVAL;
>>  
>> -	sch_tree_lock(sch);
>> -	q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
>> -	q->perturb_period = ctl->perturb_period*HZ;
>> -	if (ctl->limit)
>> -		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
>> +	q->perturbation = 0;
>> +	q->max_depth = 0;
>> +	q->tail = q->limit = SFQ_DEPTH;
>> +	if (opt == NULL) {
>> +		q->perturb_period = 0;
>> +	} else {
>> +		struct tc_sfq_qopt *ctl = RTA_DATA(opt);
>> +		if (ctl->quantum)
>> +			q->quantum = ctl->quantum;
>> +		q->perturb_period = ctl->perturb_period*HZ;
>>  
>> -	qlen = sch->q.qlen;
>> -	while (sch->q.qlen >= q->limit-1)
>> -		sfq_drop(sch);
>> -	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
> 
> 
> I hope that patch that makes changing possible brings this back ..
> <checking> .. it doesn't. Please either keep this or fix up 6/7
> to bring it back.

It got lost in translation; I will add it to 6/7.

Thanks,
Corey

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/7] Make qdisc changeable.
  2007-07-30 14:11   ` Patrick McHardy
@ 2007-07-31  7:43     ` Corey Hickey
  2007-08-06  2:47     ` Corey Hickey
  1 sibling, 0 replies; 18+ messages in thread
From: Corey Hickey @ 2007-07-31  7:43 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

Patrick McHardy wrote:
> Corey Hickey wrote:
>> Re-implement sfq_change() and enable Qdisc_opts.change so "tc qdisc
>> change" will work.
>>
>> Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
>> ---
>>  net/sched/sch_sfq.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 files changed, 50 insertions(+), 1 deletions(-)
>>
>> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
>> index e6a6a21..e042cd0 100644
>> --- a/net/sched/sch_sfq.c
>> +++ b/net/sched/sch_sfq.c
>> @@ -485,6 +485,55 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
>>  	return 0;
>>  }
>>  
>> +static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
>> +{
>> +	struct sfq_sched_data *q = qdisc_priv(sch);
>> +	struct sfq_sched_data tmp;
>> +	struct sk_buff *skb;
>> +	int err;
>> +	
>> +	/* set up tmp queue */
>> +	memset(&tmp, 0, sizeof(struct sfq_sched_data));
>> +	tmp.quantum = psched_mtu(sch->dev); /* default */
> 
> 
> If no value is given it should use the old value instead of
> reinitializing to the default.
> 
>> +	if ((err = sfq_q_init(&tmp, opt)))
>> +		return err;
> 
> 
> This will also use defaults for all unspecified values. It would
> be more consistent with other qdiscs to only change those values
> that are actually specified, so something like "tc qdisc change ...
> perturb 10" will *only* change the perturbation parameter.

I somehow had it in my head that a qdisc change was supposed to work 
that way. I'll change my patch.

> I'm not sure reusing the initialization function and copying the
> parameters is the best way to do this.

I'm not sure either, but I do like that it's conceptually simple and it 
keeps the parameter handling in one place. I've thought and stared for a 
while, and I don't see a better way, but that could of course be due to 
my limited understanding and general newbiehood in that regard.

Thanks again for the review. I'll try to get a new batch of patches sent 
off tomorrow.

-Corey

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/7] Preparatory refactoring part 1.
  2007-07-31  1:26     ` Corey Hickey
@ 2007-07-31 10:46       ` Patrick McHardy
  0 siblings, 0 replies; 18+ messages in thread
From: Patrick McHardy @ 2007-07-31 10:46 UTC (permalink / raw)
  To: Corey Hickey; +Cc: netdev

Corey Hickey wrote:
> Patrick McHardy wrote:
> 
>>> -static int
>>> -sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
>>> +static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data
>>> *q, unsigned int end)
>>
>>
>>
>> Please make sure to break at 80 chars and to keep the style
>> in this file consistent (newline before function name).
> 
> 
> Ok. For what it's worth, though, most of the original functions in the
> file don't have a newline before the function name. Omitting the newline
>  would thus make the new/changed functions more consistent with the rest
> of the file. I don't have a preference either way, so unless you change
> your mind I'll put the newline back in..


You're right, just keep it consistent please and break at 80 chars.

>>> -    sch->qstats.backlog += skb->len;
>>
>>
>> Why not keep this instead of having both callers do it?
> 
> 
> My idea was to have all the sfq_q_* functions operate on "struct
> sfq_sched_data" and have no knowledge of the "struct Qdisc". I did this
> in order to be able to use the new functions in sfq_change() when the
> temporary sfq_sched_data doesn't have a parent Qdisc.
> 
> There's probably a better way, and I am of course open to suggestions,
> but what I did made sense to me.


Also sounds fine.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/7] Make qdisc changeable.
  2007-07-30 14:11   ` Patrick McHardy
  2007-07-31  7:43     ` Corey Hickey
@ 2007-08-06  2:47     ` Corey Hickey
  2007-08-06 12:06       ` Patrick McHardy
  1 sibling, 1 reply; 18+ messages in thread
From: Corey Hickey @ 2007-08-06  2:47 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

Patrick McHardy wrote:
>> +	if ((err = sfq_q_init(&tmp, opt)))
>> +		return err;
> 
> 
> This will also use defaults for all unspecified values. It would
> be more consistent with other qdiscs to only change those values
> that are actually specified, so something like "tc qdisc change ...
> perturb 10" will *only* change the perturbation parameter.

I'm fixed this for all the parameters except one--your example above.
Since 0 is a valid value for perturb, I can't see any clever way to
differentiate between the user specifying "perturb 0" or leaving perturb
unspecified. Either way, opt->perturb_period is 0.

The only way I can see would be to add another member, say,
opt->perturb_specified, and use that accordingly. Unfortunately, this
would break usage of sfq with older versions of tc, so I'm hoping
there's a better approach.

Do you have any suggestions? I've looked at the other qdisc files and I 
don't see any other instances like this.

Thanks,
Corey

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/7] Make qdisc changeable.
  2007-08-06  2:47     ` Corey Hickey
@ 2007-08-06 12:06       ` Patrick McHardy
  0 siblings, 0 replies; 18+ messages in thread
From: Patrick McHardy @ 2007-08-06 12:06 UTC (permalink / raw)
  To: Corey Hickey; +Cc: netdev

Corey Hickey wrote:
> Patrick McHardy wrote:
> 
>>> +    if ((err = sfq_q_init(&tmp, opt)))
>>> +        return err;
>>
>>
>>
>> This will also use defaults for all unspecified values. It would
>> be more consistent with other qdiscs to only change those values
>> that are actually specified, so something like "tc qdisc change ...
>> perturb 10" will *only* change the perturbation parameter.
> 
> 
> I'm fixed this for all the parameters except one--your example above.
> Since 0 is a valid value for perturb, I can't see any clever way to
> differentiate between the user specifying "perturb 0" or leaving perturb
> unspecified. Either way, opt->perturb_period is 0.
> 
> The only way I can see would be to add another member, say,
> opt->perturb_specified, and use that accordingly. Unfortunately, this
> would break usage of sfq with older versions of tc, so I'm hoping
> there's a better approach.
> 
> Do you have any suggestions? I've looked at the other qdisc files and I
> don't see any other instances like this.


Maybe use the nested compat attribute and split the base SFQ
struct in the individual members ..

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2007-08-06 12:06 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-30  0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
2007-07-30  0:21 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
2007-07-30 13:51   ` Patrick McHardy
2007-07-31  1:26     ` Corey Hickey
2007-07-31 10:46       ` Patrick McHardy
2007-07-30  0:21 ` [PATCH 2/7] Preparatory refactoring part 2 Corey Hickey
2007-07-30 13:59   ` Patrick McHardy
2007-07-31  7:43     ` Corey Hickey
2007-07-30  0:21 ` [PATCH 3/7] Move two functions Corey Hickey
2007-07-30  0:21 ` [PATCH 4/7] Add "depth" Corey Hickey
2007-07-30  0:21 ` [PATCH 5/7] Add divisor Corey Hickey
2007-07-30  0:21 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
2007-07-30 14:11   ` Patrick McHardy
2007-07-31  7:43     ` Corey Hickey
2007-08-06  2:47     ` Corey Hickey
2007-08-06 12:06       ` Patrick McHardy
2007-07-30  0:21 ` [PATCH 7/7] Remove comments about hardcoded values Corey Hickey
2007-07-30  0:21 ` [PATCH] [iproute2] SFQ: Support changing depth and divisor Corey Hickey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).