* [PATCH 0/7] SFQ: backport some features from ESFQ
@ 2007-07-29 7:08 Corey Hickey
2007-07-29 7:08 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
2007-07-29 7:17 ` [PATCH 0/7] SFQ: backport some features from ESFQ Corey Hickey
0 siblings, 2 replies; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev
Hello,
This set of patches adds some of ESFQ's modifications to the original
SFQ. Thus far, I have received support for this approach rather than for
trying to get ESFQ included as a separate qdisc.
http://mailman.ds9a.nl/pipermail/lartc/2007q2/021056.html
My patches here implement "tc qdisc change", user-configurable depth
(number of flows), and user-configurable divisor (for setting hash table
size). I've left out the remaining ESFQ features (usage of jhash and
different hashing methods) because Patrick McHardy intends to submit a
patch that will supersede that functionality; see the URL above.
Default values remain the same, and SFQ's default behavior remains the
same, so there should be no user disruption.
A patch for iproute2 is included after the end of the kernel patch series.
Thanks for your consideration,
Corey
include/linux/pkt_sched.h | 8 --
net/sched/sch_sfq.c | 301 +++++++++++++++++++++++++++++----------------
2 files changed, 192 insertions(+), 117 deletions(-)
[PATCH 1/7] Preparatory refactoring part 1.
[PATCH 2/7] Preparatory refactoring part 2.
[PATCH 3/7] Move two functions.
[PATCH 4/7] Add "depth".
[PATCH 5/7] Add divisor.
[PATCH 6/7] Make qdisc changeable.
[PATCH 7/7] Remove comments about hardcoded values.
[PATCH] [iproute2] SFQ: Support changing depth and divisor.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 1/7] Preparatory refactoring part 1.
2007-07-29 7:08 [PATCH 0/7] SFQ: backport some features from ESFQ Corey Hickey
@ 2007-07-29 7:08 ` Corey Hickey
2007-07-29 7:08 ` [PATCH 2/7] Preparatory refactoring part 2 Corey Hickey
2007-07-29 7:17 ` [PATCH 0/7] SFQ: backport some features from ESFQ Corey Hickey
1 sibling, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
Make a new function sfq_q_enqueue() that operates directly on the
queue data. This will be useful for implementing sfq_change() in
a later patch. A pleasant side-effect is reducing most of the
duplicate code in sfq_enqueue() and sfq_requeue().
Similarly, make a new function sfq_q_dequeue().
Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
net/sched/sch_sfq.c | 70 ++++++++++++++++++++++++++------------------------
1 files changed, 36 insertions(+), 34 deletions(-)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 9579573..8ae077f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -77,6 +77,9 @@
#define SFQ_DEPTH 128
#define SFQ_HASH_DIVISOR 1024
+#define SFQ_HEAD 0
+#define SFQ_TAIL 1
+
/* This type should contain at least SFQ_DEPTH*2 values */
typedef unsigned char sfq_index;
@@ -244,10 +247,8 @@ static unsigned int sfq_drop(struct Qdisc *sch)
return 0;
}
-static int
-sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data *q, unsigned int end)
{
- struct sfq_sched_data *q = qdisc_priv(sch);
unsigned hash = sfq_hash(q, skb);
sfq_index x;
@@ -256,8 +257,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
q->hash[x] = hash;
}
- sch->qstats.backlog += skb->len;
- __skb_queue_tail(&q->qs[x], skb);
+
+ if (end == SFQ_TAIL)
+ __skb_queue_tail(&q->qs[x], skb);
+ else
+ __skb_queue_head(&q->qs[x], skb);
+
sfq_inc(q, x);
if (q->qs[x].qlen == 1) { /* The flow is new */
if (q->tail == SFQ_DEPTH) { /* It is the first flow */
@@ -270,12 +275,21 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
q->tail = x;
}
}
+}
+
+static int
+sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct sfq_sched_data *q = qdisc_priv(sch);
+ sfq_q_enqueue(skb, q, SFQ_TAIL);
+ sch->qstats.backlog += skb->len;
if (++sch->q.qlen < q->limit-1) {
sch->bstats.bytes += skb->len;
sch->bstats.packets++;
return 0;
}
+ sch->qstats.drops++;
sfq_drop(sch);
return NET_XMIT_CN;
}
@@ -284,28 +298,8 @@ static int
sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- unsigned hash = sfq_hash(q, skb);
- sfq_index x;
-
- x = q->ht[hash];
- if (x == SFQ_DEPTH) {
- q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
- q->hash[x] = hash;
- }
+ sfq_q_enqueue(skb, q, SFQ_HEAD);
sch->qstats.backlog += skb->len;
- __skb_queue_head(&q->qs[x], skb);
- sfq_inc(q, x);
- if (q->qs[x].qlen == 1) { /* The flow is new */
- if (q->tail == SFQ_DEPTH) { /* It is the first flow */
- q->tail = x;
- q->next[x] = x;
- q->allot[x] = q->quantum;
- } else {
- q->next[x] = q->next[q->tail];
- q->next[q->tail] = x;
- q->tail = x;
- }
- }
if (++sch->q.qlen < q->limit - 1) {
sch->qstats.requeues++;
return 0;
@@ -316,13 +310,8 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
return NET_XMIT_CN;
}
-
-
-
-static struct sk_buff *
-sfq_dequeue(struct Qdisc* sch)
+static struct sk_buff *sfq_q_dequeue(struct sfq_sched_data *q)
{
- struct sfq_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
sfq_index a, old_a;
@@ -335,8 +324,6 @@ sfq_dequeue(struct Qdisc* sch)
/* Grab packet */
skb = __skb_dequeue(&q->qs[a]);
sfq_dec(q, a);
- sch->q.qlen--;
- sch->qstats.backlog -= skb->len;
/* Is the slot empty? */
if (q->qs[a].qlen == 0) {
@@ -353,6 +340,21 @@ sfq_dequeue(struct Qdisc* sch)
a = q->next[a];
q->allot[a] += q->quantum;
}
+
+ return skb;
+}
+
+static struct sk_buff
+*sfq_dequeue(struct Qdisc* sch)
+{
+ struct sfq_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb;
+
+ skb = sfq_q_dequeue(q);
+ if (skb == NULL)
+ return NULL;
+ sch->q.qlen--;
+ sch->qstats.backlog -= skb->len;
return skb;
}
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 2/7] Preparatory refactoring part 2.
2007-07-29 7:08 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
@ 2007-07-29 7:08 ` Corey Hickey
2007-07-29 7:08 ` [PATCH 3/7] Move two functions Corey Hickey
0 siblings, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
Factor code out of sfq_init() and sfq_destroy(), again so that the
new functions can be used by sfq_change() later.
Actually, as the diff itself shows, most of the sfq_q_init() code
comes from the original sfq_change(), but sfq_change() is only
called by sfq_init() right now. Thus, it is safe to remove
sfq_change(); "tc qdisc change" doesn't yet work for sfq anyway.
The sfq_destroy() --> sfq_q_destroy() change looks pointless here,
but it's cleaner to split now and add code to sfq_q_destroy() in a
later patch.
Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
net/sched/sch_sfq.c | 80 +++++++++++++++++++++++++-------------------------
1 files changed, 40 insertions(+), 40 deletions(-)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8ae077f..0c46938 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -380,71 +380,71 @@ static void sfq_perturbation(unsigned long arg)
}
}
-static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
{
- struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = RTA_DATA(opt);
- unsigned int qlen;
+ int i;
- if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
return -EINVAL;
- sch_tree_lock(sch);
- q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
- q->perturb_period = ctl->perturb_period*HZ;
- if (ctl->limit)
- q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+ q->perturbation = 0;
+ q->max_depth = 0;
+ q->tail = q->limit = SFQ_DEPTH;
+ if (opt == NULL) {
+ q->perturb_period = 0;
+ } else {
+ struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+ if (ctl->quantum)
+ q->quantum = ctl->quantum;
+ q->perturb_period = ctl->perturb_period*HZ;
- qlen = sch->q.qlen;
- while (sch->q.qlen >= q->limit-1)
- sfq_drop(sch);
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ if (ctl->limit)
+ q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+ }
- del_timer(&q->perturb_timer);
- if (q->perturb_period) {
- q->perturb_timer.expires = jiffies + q->perturb_period;
- add_timer(&q->perturb_timer);
+ for (i=0; i<SFQ_HASH_DIVISOR; i++)
+ q->ht[i] = SFQ_DEPTH;
+ for (i=0; i<SFQ_DEPTH; i++) {
+ skb_queue_head_init(&q->qs[i]);
+ q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
+ q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
}
- sch_tree_unlock(sch);
+
+ for (i=0; i<SFQ_DEPTH; i++)
+ sfq_link(q, i);
return 0;
}
static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- int i;
+ int err;
+
+ q->quantum = psched_mtu(sch->dev); /* default */
+ if ((err = sfq_q_init(q, opt)))
+ return err;
init_timer(&q->perturb_timer);
q->perturb_timer.data = (unsigned long)sch;
q->perturb_timer.function = sfq_perturbation;
-
- for (i=0; i<SFQ_HASH_DIVISOR; i++)
- q->ht[i] = SFQ_DEPTH;
- for (i=0; i<SFQ_DEPTH; i++) {
- skb_queue_head_init(&q->qs[i]);
- q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
- q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
- }
- q->limit = SFQ_DEPTH;
- q->max_depth = 0;
- q->tail = SFQ_DEPTH;
- if (opt == NULL) {
- q->quantum = psched_mtu(sch->dev);
- q->perturb_period = 0;
- } else {
- int err = sfq_change(sch, opt);
- if (err)
- return err;
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
}
- for (i=0; i<SFQ_DEPTH; i++)
- sfq_link(q, i);
+
return 0;
}
+static void sfq_q_destroy(struct sfq_sched_data *q)
+{
+ del_timer(&q->perturb_timer);
+}
+
static void sfq_destroy(struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- del_timer(&q->perturb_timer);
+ sfq_q_destroy(q);
}
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 3/7] Move two functions.
2007-07-29 7:08 ` [PATCH 2/7] Preparatory refactoring part 2 Corey Hickey
@ 2007-07-29 7:08 ` Corey Hickey
2007-07-29 7:08 ` [PATCH 4/7] Add "depth" Corey Hickey
0 siblings, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
Move sfq_q_destroy() to above sfq_q_init() so that it can be used
by an error case in a later patch.
Move sfq_destroy() as well, for clarity.
Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
net/sched/sch_sfq.c | 24 ++++++++++++------------
1 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 0c46938..583f925 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -380,6 +380,17 @@ static void sfq_perturbation(unsigned long arg)
}
}
+static void sfq_q_destroy(struct sfq_sched_data *q)
+{
+ del_timer(&q->perturb_timer);
+}
+
+static void sfq_destroy(struct Qdisc *sch)
+{
+ struct sfq_sched_data *q = qdisc_priv(sch);
+ sfq_q_destroy(q);
+}
+
static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
{
struct tc_sfq_qopt *ctl = RTA_DATA(opt);
@@ -420,7 +431,7 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
int err;
-
+
q->quantum = psched_mtu(sch->dev); /* default */
if ((err = sfq_q_init(q, opt)))
return err;
@@ -436,17 +447,6 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
return 0;
}
-static void sfq_q_destroy(struct sfq_sched_data *q)
-{
- del_timer(&q->perturb_timer);
-}
-
-static void sfq_destroy(struct Qdisc *sch)
-{
- struct sfq_sched_data *q = qdisc_priv(sch);
- sfq_q_destroy(q);
-}
-
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct sfq_sched_data *q = qdisc_priv(sch);
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 4/7] Add "depth".
2007-07-29 7:08 ` [PATCH 3/7] Move two functions Corey Hickey
@ 2007-07-29 7:08 ` Corey Hickey
2007-07-29 7:08 ` [PATCH 5/7] Add divisor Corey Hickey
2007-07-29 18:41 ` [PATCH 4/7] Add "depth" Michael Buesch
0 siblings, 2 replies; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
Make "depth" (number of queues) user-configurable:
* replace #define with a parameter
* use old hardcoded value as a default
* kmalloc() arrays in sfq_q_init()
* free() arrays in sfq_q_destroy()
Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
net/sched/sch_sfq.c | 85 ++++++++++++++++++++++++++++++++++++---------------
1 files changed, 60 insertions(+), 25 deletions(-)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 583f925..2ff6a27 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -74,14 +74,14 @@
It is easy to increase these values, but not in flight. */
-#define SFQ_DEPTH 128
+#define SFQ_DEPTH_DEFAULT 128
#define SFQ_HASH_DIVISOR 1024
#define SFQ_HEAD 0
#define SFQ_TAIL 1
-/* This type should contain at least SFQ_DEPTH*2 values */
-typedef unsigned char sfq_index;
+/* This type should contain at least depth*2 values */
+typedef unsigned int sfq_index;
struct sfq_head
{
@@ -95,6 +95,7 @@ struct sfq_sched_data
int perturb_period;
unsigned quantum; /* Allotment per round: MUST BE >= MTU */
int limit;
+ unsigned depth;
/* Variables */
struct timer_list perturb_timer;
@@ -103,11 +104,11 @@ struct sfq_sched_data
sfq_index max_depth; /* Maximal depth */
sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
- sfq_index next[SFQ_DEPTH]; /* Active slots link */
- short allot[SFQ_DEPTH]; /* Current allotment per slot */
- unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
- struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
- struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */
+ sfq_index *next; /* Active slots link */
+ short *allot; /* Current allotment per slot */
+ unsigned short *hash; /* Hash value indexed by slots */
+ struct sk_buff_head *qs; /* Slot queue */
+ struct sfq_head *dep; /* Linked list of slots, indexed by depth */
};
static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
@@ -164,7 +165,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
{
sfq_index p, n;
- int d = q->qs[x].qlen + SFQ_DEPTH;
+ int d = q->qs[x].qlen + q->depth;
p = d;
n = q->dep[d].next;
@@ -215,7 +216,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
drop a packet from it */
if (d > 1) {
- sfq_index x = q->dep[d+SFQ_DEPTH].next;
+ sfq_index x = q->dep[d+q->depth].next;
skb = q->qs[x].prev;
len = skb->len;
__skb_unlink(skb, &q->qs[x]);
@@ -238,7 +239,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
kfree_skb(skb);
sfq_dec(q, d);
sch->q.qlen--;
- q->ht[q->hash[d]] = SFQ_DEPTH;
+ q->ht[q->hash[d]] = q->depth;
sch->qstats.drops++;
sch->qstats.backlog -= len;
return len;
@@ -253,8 +254,8 @@ static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data *q, unsigne
sfq_index x;
x = q->ht[hash];
- if (x == SFQ_DEPTH) {
- q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+ if (x == q->depth) {
+ q->ht[hash] = x = q->dep[q->depth].next;
q->hash[x] = hash;
}
@@ -265,7 +266,7 @@ static void sfq_q_enqueue(struct sk_buff *skb, struct sfq_sched_data *q, unsigne
sfq_inc(q, x);
if (q->qs[x].qlen == 1) { /* The flow is new */
- if (q->tail == SFQ_DEPTH) { /* It is the first flow */
+ if (q->tail == q->depth) { /* It is the first flow */
q->tail = x;
q->next[x] = x;
q->allot[x] = q->quantum;
@@ -316,7 +317,7 @@ static struct sk_buff *sfq_q_dequeue(struct sfq_sched_data *q)
sfq_index a, old_a;
/* No active slots */
- if (q->tail == SFQ_DEPTH)
+ if (q->tail == q->depth)
return NULL;
a = old_a = q->next[q->tail];
@@ -327,10 +328,10 @@ static struct sk_buff *sfq_q_dequeue(struct sfq_sched_data *q)
/* Is the slot empty? */
if (q->qs[a].qlen == 0) {
- q->ht[q->hash[a]] = SFQ_DEPTH;
+ q->ht[q->hash[a]] = q->depth;
a = q->next[a];
if (a == old_a) {
- q->tail = SFQ_DEPTH;
+ q->tail = q->depth;
return skb;
}
q->next[q->tail] = a;
@@ -383,6 +384,16 @@ static void sfq_perturbation(unsigned long arg)
static void sfq_q_destroy(struct sfq_sched_data *q)
{
del_timer(&q->perturb_timer);
+ if(q->dep)
+ kfree(q->dep);
+ if(q->next)
+ kfree(q->next);
+ if(q->allot)
+ kfree(q->allot);
+ if(q->hash)
+ kfree(q->hash);
+ if(q->qs)
+ kfree(q->qs);
}
static void sfq_destroy(struct Qdisc *sch)
@@ -394,6 +405,7 @@ static void sfq_destroy(struct Qdisc *sch)
static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
{
struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+ sfq_index p = ~0U/2;
int i;
if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
@@ -401,30 +413,53 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
q->perturbation = 0;
q->max_depth = 0;
- q->tail = q->limit = SFQ_DEPTH;
if (opt == NULL) {
q->perturb_period = 0;
+ q->tail = q->limit = q->depth = SFQ_DEPTH_DEFAULT;
} else {
struct tc_sfq_qopt *ctl = RTA_DATA(opt);
if (ctl->quantum)
q->quantum = ctl->quantum;
q->perturb_period = ctl->perturb_period*HZ;
+ q->tail = q->limit = q->depth = ctl->flows ? : SFQ_DEPTH_DEFAULT;
+
+ if (q->depth > p - 1)
+ return -EINVAL;
if (ctl->limit)
- q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+ q->limit = min_t(u32, ctl->limit, q->depth);
}
+ q->dep = kmalloc((1+q->depth*2)*sizeof(struct sfq_head), GFP_KERNEL);
+ if (!q->dep)
+ goto err_case;
+ q->next = kmalloc(q->depth*sizeof(sfq_index), GFP_KERNEL);
+ if (!q->next)
+ goto err_case;
+ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
+ if (!q->allot)
+ goto err_case;
+ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
+ if (!q->hash)
+ goto err_case;
+ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
+ if (!q->qs)
+ goto err_case;
+
for (i=0; i<SFQ_HASH_DIVISOR; i++)
- q->ht[i] = SFQ_DEPTH;
- for (i=0; i<SFQ_DEPTH; i++) {
+ q->ht[i] = q->depth;
+ for (i=0; i<q->depth; i++) {
skb_queue_head_init(&q->qs[i]);
- q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
- q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
+ q->dep[i+q->depth].next = i+q->depth;
+ q->dep[i+q->depth].prev = i+q->depth;
}
- for (i=0; i<SFQ_DEPTH; i++)
+ for (i=0; i<q->depth; i++)
sfq_link(q, i);
return 0;
+err_case:
+ sfq_q_destroy(q);
+ return -ENOBUFS;
}
static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
@@ -458,7 +493,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.limit = q->limit;
opt.divisor = SFQ_HASH_DIVISOR;
- opt.flows = q->limit;
+ opt.flows = q->depth;
RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 5/7] Add divisor.
2007-07-29 7:08 ` [PATCH 4/7] Add "depth" Corey Hickey
@ 2007-07-29 7:08 ` Corey Hickey
2007-07-29 7:08 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
2007-07-29 18:41 ` [PATCH 4/7] Add "depth" Michael Buesch
1 sibling, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
Make hash divisor user-configurable.
Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
net/sched/sch_sfq.c | 18 +++++++++++++-----
1 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 2ff6a27..3e67a68 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -75,7 +75,7 @@
It is easy to increase these values, but not in flight. */
#define SFQ_DEPTH_DEFAULT 128
-#define SFQ_HASH_DIVISOR 1024
+#define SFQ_DIVISOR_DEFAULT 1024
#define SFQ_HEAD 0
#define SFQ_TAIL 1
@@ -96,6 +96,7 @@ struct sfq_sched_data
unsigned quantum; /* Allotment per round: MUST BE >= MTU */
int limit;
unsigned depth;
+ unsigned hash_divisor;
/* Variables */
struct timer_list perturb_timer;
@@ -103,7 +104,7 @@ struct sfq_sched_data
sfq_index tail; /* Index of current slot in round */
sfq_index max_depth; /* Maximal depth */
- sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
+ sfq_index *ht; /* Hash table */
sfq_index *next; /* Active slots link */
short *allot; /* Current allotment per slot */
unsigned short *hash; /* Hash value indexed by slots */
@@ -118,7 +119,7 @@ static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1
/* Have we any rotation primitives? If not, WHY? */
h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
h ^= h>>10;
- return h & 0x3FF;
+ return h & (q->hash_divisor-1);
}
static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
@@ -384,6 +385,8 @@ static void sfq_perturbation(unsigned long arg)
static void sfq_q_destroy(struct sfq_sched_data *q)
{
del_timer(&q->perturb_timer);
+ if(q->ht)
+ kfree(q->ht);
if(q->dep)
kfree(q->dep);
if(q->next)
@@ -415,12 +418,14 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
q->max_depth = 0;
if (opt == NULL) {
q->perturb_period = 0;
+ q->hash_divisor = SFQ_DIVISOR_DEFAULT;
q->tail = q->limit = q->depth = SFQ_DEPTH_DEFAULT;
} else {
struct tc_sfq_qopt *ctl = RTA_DATA(opt);
if (ctl->quantum)
q->quantum = ctl->quantum;
q->perturb_period = ctl->perturb_period*HZ;
+ q->hash_divisor = ctl->divisor ? : SFQ_DIVISOR_DEFAULT;
q->tail = q->limit = q->depth = ctl->flows ? : SFQ_DEPTH_DEFAULT;
if (q->depth > p - 1)
@@ -430,6 +435,9 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
q->limit = min_t(u32, ctl->limit, q->depth);
}
+ q->ht = kmalloc(q->hash_divisor*sizeof(sfq_index), GFP_KERNEL);
+ if (!q->ht)
+ goto err_case;
q->dep = kmalloc((1+q->depth*2)*sizeof(struct sfq_head), GFP_KERNEL);
if (!q->dep)
goto err_case;
@@ -446,7 +454,7 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
if (!q->qs)
goto err_case;
- for (i=0; i<SFQ_HASH_DIVISOR; i++)
+ for (i=0; i<q->hash_divisor; i++)
q->ht[i] = q->depth;
for (i=0; i<q->depth; i++) {
skb_queue_head_init(&q->qs[i]);
@@ -492,7 +500,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.perturb_period = q->perturb_period/HZ;
opt.limit = q->limit;
- opt.divisor = SFQ_HASH_DIVISOR;
+ opt.divisor = q->hash_divisor;
opt.flows = q->depth;
RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 6/7] Make qdisc changeable.
2007-07-29 7:08 ` [PATCH 5/7] Add divisor Corey Hickey
@ 2007-07-29 7:08 ` Corey Hickey
2007-07-29 7:08 ` [PATCH 7/7] Remove comments about hardcoded values Corey Hickey
0 siblings, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
Re-implement sfq_change() and enable Qdisc_opts.change so "tc qdisc
change" will work.
Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
net/sched/sch_sfq.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 50 insertions(+), 1 deletions(-)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3e67a68..4cd523f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -490,6 +490,55 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
return 0;
}
+static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct sfq_sched_data *q = qdisc_priv(sch);
+ struct sfq_sched_data tmp;
+ struct sk_buff *skb;
+ int err;
+
+ /* set up tmp queue */
+ memset(&tmp, 0, sizeof(struct sfq_sched_data));
+ tmp.quantum = psched_mtu(sch->dev); /* default */
+ if ((err = sfq_q_init(&tmp, opt)))
+ return err;
+
+ /* copy packets from the old queue to the tmp queue */
+ sch_tree_lock(sch);
+ while (sch->q.qlen >= tmp.limit - 1)
+ sfq_drop(sch);
+ while ((skb = sfq_q_dequeue(q)) != NULL)
+ sfq_q_enqueue(skb, &tmp, SFQ_TAIL);
+
+ /* clean up the old queue */
+ sfq_q_destroy(q);
+
+ /* copy elements of the tmp queue into the old queue */
+ q->perturb_period = tmp.perturb_period;
+ q->quantum = tmp.quantum;
+ q->limit = tmp.limit;
+ q->depth = tmp.depth;
+ q->hash_divisor = tmp.hash_divisor;
+ q->tail = tmp.tail;
+ q->max_depth = tmp.max_depth;
+ q->ht = tmp.ht;
+ q->dep = tmp.dep;
+ q->next = tmp.next;
+ q->allot = tmp.allot;
+ q->hash = tmp.hash;
+ q->qs = tmp.qs;
+
+ /* finish up */
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ } else {
+ q->perturbation = 0;
+ }
+ sch_tree_unlock(sch);
+ return 0;
+}
+
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct sfq_sched_data *q = qdisc_priv(sch);
@@ -524,7 +573,7 @@ static struct Qdisc_ops sfq_qdisc_ops = {
.init = sfq_init,
.reset = sfq_reset,
.destroy = sfq_destroy,
- .change = NULL,
+ .change = sfq_change,
.dump = sfq_dump,
.owner = THIS_MODULE,
};
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 7/7] Remove comments about hardcoded values.
2007-07-29 7:08 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
@ 2007-07-29 7:08 ` Corey Hickey
2007-07-29 7:08 ` [PATCH] [iproute2] SFQ: Support changing depth and divisor Corey Hickey
0 siblings, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
None of these are true anymore (hooray!).
Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
include/linux/pkt_sched.h | 8 --------
net/sched/sch_sfq.c | 17 +++--------------
2 files changed, 3 insertions(+), 22 deletions(-)
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 268c515..58a0ea6 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -148,14 +148,6 @@ struct tc_sfq_qopt
unsigned flows; /* Maximal number of flows */
};
-/*
- * NOTE: limit, divisor and flows are hardwired to code at the moment.
- *
- * limit=flows=128, divisor=1024;
- *
- * The only reason for this is efficiency, it is possible
- * to change these parameters in compile time.
- */
/* RED section */
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4cd523f..8e84881 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -61,18 +61,7 @@
We still need true WFQ for top level CSZ, but using WFQ
for the best effort traffic is absolutely pointless:
- SFQ is superior for this purpose.
-
- IMPLEMENTATION:
- This implementation limits maximal queue length to 128;
- maximal mtu to 2^15-1; number of hash buckets to 1024.
- The only goal of this restrictions was that all data
- fit into one 4K page :-). Struct sfq_sched_data is
- organized in anti-cache manner: all the data for a bucket
- are scattered over different locations. This is not good,
- but it allowed me to put it into 4K.
-
- It is easy to increase these values, but not in flight. */
+ SFQ is superior for this purpose. */
#define SFQ_DEPTH_DEFAULT 128
#define SFQ_DIVISOR_DEFAULT 1024
@@ -496,7 +485,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
struct sfq_sched_data tmp;
struct sk_buff *skb;
int err;
-
+
/* set up tmp queue */
memset(&tmp, 0, sizeof(struct sfq_sched_data));
tmp.quantum = psched_mtu(sch->dev); /* default */
@@ -509,7 +498,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
sfq_drop(sch);
while ((skb = sfq_q_dequeue(q)) != NULL)
sfq_q_enqueue(skb, &tmp, SFQ_TAIL);
-
+
/* clean up the old queue */
sfq_q_destroy(q);
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH] [iproute2] SFQ: Support changing depth and divisor.
2007-07-29 7:08 ` [PATCH 7/7] Remove comments about hardcoded values Corey Hickey
@ 2007-07-29 7:08 ` Corey Hickey
0 siblings, 0 replies; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:08 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
This can safely be applied either before or after the kernel
patches because the tc_sfq_qopt struct is unchanged:
- old kernels will ignore the parameters from new iproute2
- new kernels will use the same default parameters
---
include/linux/pkt_sched.h | 9 ---------
tc/q_sfq.c | 21 ++++++++++++++++++++-
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d10f353..37946d4 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -139,15 +139,6 @@ struct tc_sfq_qopt
unsigned flows; /* Maximal number of flows */
};
-/*
- * NOTE: limit, divisor and flows are hardwired to code at the moment.
- *
- * limit=flows=128, divisor=1024;
- *
- * The only reason for this is efficiency, it is possible
- * to change these parameters in compile time.
- */
-
/* RED section */
enum
diff --git a/tc/q_sfq.c b/tc/q_sfq.c
index 05385cf..7754db7 100644
--- a/tc/q_sfq.c
+++ b/tc/q_sfq.c
@@ -25,7 +25,7 @@
static void explain(void)
{
- fprintf(stderr, "Usage: ... sfq [ limit NUMBER ] [ perturb SECS ] [ quantum BYTES ]\n");
+ fprintf(stderr, "Usage: ... sfq [ limit NUMBER ] [ depth FLOWS ] [ divisor HASHBITS ] [ perturb SECS ] [ quantum BYTES ]\n");
}
#define usage() return(-1)
@@ -63,6 +63,25 @@ static int sfq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl
return -1;
}
ok++;
+ } else if (strcmp(*argv, "depth") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&opt.flows, *argv, 0)) {
+ fprintf(stderr, "Illegal \"depth\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "divisor") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&opt.divisor, *argv, 0)) {
+ fprintf(stderr, "Illegal \"divisor\"\n");
+ return -1;
+ }
+ if (opt.divisor >= 15) {
+ fprintf(stderr, "Illegal \"divisor\", must be < 15\n");
+ return -1;
+ }
+ opt.divisor = 1<<opt.divisor;
+ ok++;
} else if (strcmp(*argv, "help") == 0) {
explain();
return -1;
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 0/7] SFQ: backport some features from ESFQ
2007-07-29 7:08 [PATCH 0/7] SFQ: backport some features from ESFQ Corey Hickey
2007-07-29 7:08 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
@ 2007-07-29 7:17 ` Corey Hickey
2007-07-29 7:19 ` David Miller
1 sibling, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 7:17 UTC (permalink / raw)
To: netdev
Corey Hickey wrote:
> Hello,
>
> This set of patches adds some of ESFQ's modifications to the original
> SFQ. Thus far, I have received support for this approach rather than for
> trying to get ESFQ included as a separate qdisc.
Crud; I wasn't expecting git-send-email to thread the messages that way.
I guess I should have used --no-chain-reply-to. If I ought re-send this
series with that option, somebody please say the word and I'll do so.
-Corey
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 0/7] SFQ: backport some features from ESFQ
2007-07-29 7:17 ` [PATCH 0/7] SFQ: backport some features from ESFQ Corey Hickey
@ 2007-07-29 7:19 ` David Miller
0 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2007-07-29 7:19 UTC (permalink / raw)
To: bugfood-ml; +Cc: netdev
From: Corey Hickey <bugfood-ml@fatooh.org>
Date: Sun, 29 Jul 2007 00:17:15 -0700
> Corey Hickey wrote:
> > Hello,
> >
> > This set of patches adds some of ESFQ's modifications to the original
> > SFQ. Thus far, I have received support for this approach rather than for
> > trying to get ESFQ included as a separate qdisc.
>
> Crud; I wasn't expecting git-send-email to thread the messages that way.
> I guess I should have used --no-chain-reply-to. If I ought re-send this
> series with that option, somebody please say the word and I'll do so.
It's not necessary, let some feedback come in first.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 4/7] Add "depth".
2007-07-29 7:08 ` [PATCH 4/7] Add "depth" Corey Hickey
2007-07-29 7:08 ` [PATCH 5/7] Add divisor Corey Hickey
@ 2007-07-29 18:41 ` Michael Buesch
2007-07-29 20:21 ` Corey Hickey
1 sibling, 1 reply; 19+ messages in thread
From: Michael Buesch @ 2007-07-29 18:41 UTC (permalink / raw)
To: Corey Hickey; +Cc: netdev
On Sunday 29 July 2007 09:08:51 Corey Hickey wrote:
> p = d;
> n = q->dep[d].next;
> @@ -215,7 +216,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
> drop a packet from it */
>
> if (d > 1) {
> - sfq_index x = q->dep[d+SFQ_DEPTH].next;
> + sfq_index x = q->dep[d+q->depth].next;
Please q->dep[d + q->depth]
Makes it _much_ more readable. And doesn't confuse my brain with a
minus and a BiggerThan sign ;)
> @@ -383,6 +384,16 @@ static void sfq_perturbation(unsigned long arg)
> static void sfq_q_destroy(struct sfq_sched_data *q)
> {
> del_timer(&q->perturb_timer);
> + if(q->dep)
> + kfree(q->dep);
> + if(q->next)
> + kfree(q->next);
> + if(q->allot)
> + kfree(q->allot);
> + if(q->hash)
> + kfree(q->hash);
> + if(q->qs)
> + kfree(q->qs);
No need to check for !=NULL. kfree handles NULL.
> }
>
> static void sfq_destroy(struct Qdisc *sch)
> @@ -394,6 +405,7 @@ static void sfq_destroy(struct Qdisc *sch)
> static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
> {
> struct tc_sfq_qopt *ctl = RTA_DATA(opt);
> + sfq_index p = ~0U/2;
> int i;
>
> if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
> @@ -401,30 +413,53 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
>
> q->perturbation = 0;
> q->max_depth = 0;
> - q->tail = q->limit = SFQ_DEPTH;
> if (opt == NULL) {
> q->perturb_period = 0;
> + q->tail = q->limit = q->depth = SFQ_DEPTH_DEFAULT;
> } else {
> struct tc_sfq_qopt *ctl = RTA_DATA(opt);
> if (ctl->quantum)
> q->quantum = ctl->quantum;
> q->perturb_period = ctl->perturb_period*HZ;
> + q->tail = q->limit = q->depth = ctl->flows ? : SFQ_DEPTH_DEFAULT;
> +
> + if (q->depth > p - 1)
> + return -EINVAL;
Compare depth against (~0U/2)-1? What's that doing? Should probably add a comment.
>
> if (ctl->limit)
> - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
> + q->limit = min_t(u32, ctl->limit, q->depth);
> }
>
> + q->dep = kmalloc((1+q->depth*2)*sizeof(struct sfq_head), GFP_KERNEL);
> + if (!q->dep)
> + goto err_case;
> + q->next = kmalloc(q->depth*sizeof(sfq_index), GFP_KERNEL);
> + if (!q->next)
> + goto err_case;
> + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
> + if (!q->allot)
> + goto err_case;
> + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
> + if (!q->hash)
> + goto err_case;
> + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
> + if (!q->qs)
> + goto err_case;
You may chose to use kcalloc for array allocations.
> for (i=0; i<SFQ_HASH_DIVISOR; i++)
> - q->ht[i] = SFQ_DEPTH;
> - for (i=0; i<SFQ_DEPTH; i++) {
> + q->ht[i] = q->depth;
> + for (i=0; i<q->depth; i++) {
> skb_queue_head_init(&q->qs[i]);
> - q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
> - q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
> + q->dep[i+q->depth].next = i+q->depth;
> + q->dep[i+q->depth].prev = i+q->depth;
> }
>
> - for (i=0; i<SFQ_DEPTH; i++)
> + for (i=0; i<q->depth; i++)
> sfq_link(q, i);
> return 0;
> +err_case:
This leaks a few kmallocs.
> + sfq_q_destroy(q);
> + return -ENOBUFS;
> }
>
> static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
> @@ -458,7 +493,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
>
> opt.limit = q->limit;
> opt.divisor = SFQ_HASH_DIVISOR;
> - opt.flows = q->limit;
> + opt.flows = q->depth;
>
> RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
>
--
Greetings Michael.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 4/7] Add "depth".
2007-07-29 18:41 ` [PATCH 4/7] Add "depth" Michael Buesch
@ 2007-07-29 20:21 ` Corey Hickey
2007-07-29 20:54 ` Michael Buesch
0 siblings, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-29 20:21 UTC (permalink / raw)
To: Michael Buesch; +Cc: Linux Netdev List
Michael Buesch wrote:
> On Sunday 29 July 2007 09:08:51 Corey Hickey wrote:
>> p = d;
>> n = q->dep[d].next;
>> @@ -215,7 +216,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
>> drop a packet from it */
>>
>> if (d > 1) {
>> - sfq_index x = q->dep[d+SFQ_DEPTH].next;
>> + sfq_index x = q->dep[d+q->depth].next;
>
> Please q->dep[d + q->depth]
> Makes it _much_ more readable. And doesn't confuse my brain with a
> minus and a BiggerThan sign ;)
Ok.
>> @@ -383,6 +384,16 @@ static void sfq_perturbation(unsigned long arg)
>> static void sfq_q_destroy(struct sfq_sched_data *q)
>> {
>> del_timer(&q->perturb_timer);
>> + if(q->dep)
>> + kfree(q->dep);
>> + if(q->next)
>> + kfree(q->next);
>> + if(q->allot)
>> + kfree(q->allot);
>> + if(q->hash)
>> + kfree(q->hash);
>> + if(q->qs)
>> + kfree(q->qs);
>
> No need to check for !=NULL. kfree handles NULL.
Ok. Thanks.
>> }
>>
>> static void sfq_destroy(struct Qdisc *sch)
>> @@ -394,6 +405,7 @@ static void sfq_destroy(struct Qdisc *sch)
>> static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
>> {
>> struct tc_sfq_qopt *ctl = RTA_DATA(opt);
>> + sfq_index p = ~0U/2;
>> int i;
>>
>> if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
>> @@ -401,30 +413,53 @@ static int sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
>>
>> q->perturbation = 0;
>> q->max_depth = 0;
>> - q->tail = q->limit = SFQ_DEPTH;
>> if (opt == NULL) {
>> q->perturb_period = 0;
>> + q->tail = q->limit = q->depth = SFQ_DEPTH_DEFAULT;
>> } else {
>> struct tc_sfq_qopt *ctl = RTA_DATA(opt);
>> if (ctl->quantum)
>> q->quantum = ctl->quantum;
>> q->perturb_period = ctl->perturb_period*HZ;
>> + q->tail = q->limit = q->depth = ctl->flows ? : SFQ_DEPTH_DEFAULT;
>> +
>> + if (q->depth > p - 1)
>> + return -EINVAL;
>
> Compare depth against (~0U/2)-1? What's that doing? Should probably add a comment.
~0U/2 - 1 is the maximum value depth can be, based on how it is used in
indexing q->dep. I agree, though, that deserves a comment. Actually,
I'll also change it to '#define SFQ_DEPTH_MAX (~0U/2 - 1)' and put it
near the top of the file next to the 'typedef unsigned int sfq_index;'.
I could also include limits.h and use UINT_MAX instead of ~0U; would
that be preferable?
>>
>> if (ctl->limit)
>> - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
>> + q->limit = min_t(u32, ctl->limit, q->depth);
>> }
>>
>> + q->dep = kmalloc((1+q->depth*2)*sizeof(struct sfq_head), GFP_KERNEL);
>> + if (!q->dep)
>> + goto err_case;
>> + q->next = kmalloc(q->depth*sizeof(sfq_index), GFP_KERNEL);
>> + if (!q->next)
>> + goto err_case;
>> + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
>> + if (!q->allot)
>> + goto err_case;
>> + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
>> + if (!q->hash)
>> + goto err_case;
>> + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
>> + if (!q->qs)
>> + goto err_case;
>
> You may chose to use kcalloc for array allocations.
The arrays in the original code don't get zeroed either, so that
shouldn't be necessary (and I haven't heard of any problems so far). Do
you suggest I use kcalloc() anyway, just as a good practice?
>> for (i=0; i<SFQ_HASH_DIVISOR; i++)
>> - q->ht[i] = SFQ_DEPTH;
>> - for (i=0; i<SFQ_DEPTH; i++) {
>> + q->ht[i] = q->depth;
>> + for (i=0; i<q->depth; i++) {
>> skb_queue_head_init(&q->qs[i]);
>> - q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
>> - q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
>> + q->dep[i+q->depth].next = i+q->depth;
>> + q->dep[i+q->depth].prev = i+q->depth;
>> }
>>
>> - for (i=0; i<SFQ_DEPTH; i++)
>> + for (i=0; i<q->depth; i++)
>> sfq_link(q, i);
>> return 0;
>> +err_case:
>
> This leaks a few kmallocs.
Are you saying that the 'err_case:' leaks kmallocs? It calls
sfq_q_destroy(q), which kfrees each of the arrays: dep, next, allot,
hash, and qs. Is that sufficient, or am I missing something or
misunderstanding you?
>> + sfq_q_destroy(q);
>> + return -ENOBUFS;
>> }
Thank you for your review. Could you please clarify the questions I
have? I'll make, test, and submit a revision of this patch after that.
-Corey
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 4/7] Add "depth".
2007-07-29 20:21 ` Corey Hickey
@ 2007-07-29 20:54 ` Michael Buesch
0 siblings, 0 replies; 19+ messages in thread
From: Michael Buesch @ 2007-07-29 20:54 UTC (permalink / raw)
To: Corey Hickey; +Cc: Linux Netdev List
On Sunday 29 July 2007 22:21, Corey Hickey wrote:
> > Compare depth against (~0U/2)-1? What's that doing? Should probably add a comment.
>
> ~0U/2 - 1 is the maximum value depth can be, based on how it is used in
> indexing q->dep. I agree, though, that deserves a comment. Actually,
> I'll also change it to '#define SFQ_DEPTH_MAX (~0U/2 - 1)' and put it
> near the top of the file next to the 'typedef unsigned int sfq_index;'.
>
> I could also include limits.h and use UINT_MAX instead of ~0U; would
> that be preferable?
Seems like a good idea.
> >>
> >> if (ctl->limit)
> >> - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
> >> + q->limit = min_t(u32, ctl->limit, q->depth);
> >> }
> >>
> >> + q->dep = kmalloc((1+q->depth*2)*sizeof(struct sfq_head), GFP_KERNEL);
> >> + if (!q->dep)
> >> + goto err_case;
> >> + q->next = kmalloc(q->depth*sizeof(sfq_index), GFP_KERNEL);
> >> + if (!q->next)
> >> + goto err_case;
> >> + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
> >> + if (!q->allot)
> >> + goto err_case;
> >> + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
> >> + if (!q->hash)
> >> + goto err_case;
> >> + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
> >> + if (!q->qs)
> >> + goto err_case;
> >
> > You may chose to use kcalloc for array allocations.
>
> The arrays in the original code don't get zeroed either, so that
> shouldn't be necessary (and I haven't heard of any problems so far). Do
> you suggest I use kcalloc() anyway, just as a good practice?
Well, I think we don't have strict rules on that, so it depends
on the developer's taste. The advantage of kcalloc is, that it might
catch errors in the args better than this opencoded multiplication.
(There's some BUG_ON logic in kcalloc)
> >> for (i=0; i<SFQ_HASH_DIVISOR; i++)
> >> - q->ht[i] = SFQ_DEPTH;
> >> - for (i=0; i<SFQ_DEPTH; i++) {
> >> + q->ht[i] = q->depth;
> >> + for (i=0; i<q->depth; i++) {
> >> skb_queue_head_init(&q->qs[i]);
> >> - q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
> >> - q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
> >> + q->dep[i+q->depth].next = i+q->depth;
> >> + q->dep[i+q->depth].prev = i+q->depth;
> >> }
> >>
> >> - for (i=0; i<SFQ_DEPTH; i++)
> >> + for (i=0; i<q->depth; i++)
> >> sfq_link(q, i);
> >> return 0;
> >> +err_case:
> >
> > This leaks a few kmallocs.
>
> Are you saying that the 'err_case:' leaks kmallocs? It calls
> sfq_q_destroy(q), which kfrees each of the arrays: dep, next, allot,
> hash, and qs. Is that sufficient, or am I missing something or
> misunderstanding you?
Ok, I didn't see that. So this should be ok.
> >> + sfq_q_destroy(q);
> >> + return -ENOBUFS;
> >> }
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 6/7] Make qdisc changeable.
2007-07-30 0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
@ 2007-07-30 0:21 ` Corey Hickey
2007-07-30 14:11 ` Patrick McHardy
0 siblings, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-07-30 0:21 UTC (permalink / raw)
To: netdev; +Cc: Corey Hickey
Re-implement sfq_change() and enable Qdisc_opts.change so "tc qdisc
change" will work.
Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
net/sched/sch_sfq.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 50 insertions(+), 1 deletions(-)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e6a6a21..e042cd0 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -485,6 +485,55 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
return 0;
}
+static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct sfq_sched_data *q = qdisc_priv(sch);
+ struct sfq_sched_data tmp;
+ struct sk_buff *skb;
+ int err;
+
+ /* set up tmp queue */
+ memset(&tmp, 0, sizeof(struct sfq_sched_data));
+ tmp.quantum = psched_mtu(sch->dev); /* default */
+ if ((err = sfq_q_init(&tmp, opt)))
+ return err;
+
+ /* copy packets from the old queue to the tmp queue */
+ sch_tree_lock(sch);
+ while (sch->q.qlen >= tmp.limit - 1)
+ sfq_drop(sch);
+ while ((skb = sfq_q_dequeue(q)) != NULL)
+ sfq_q_enqueue(skb, &tmp, SFQ_TAIL);
+
+ /* clean up the old queue */
+ sfq_q_destroy(q);
+
+ /* copy elements of the tmp queue into the old queue */
+ q->perturb_period = tmp.perturb_period;
+ q->quantum = tmp.quantum;
+ q->limit = tmp.limit;
+ q->depth = tmp.depth;
+ q->hash_divisor = tmp.hash_divisor;
+ q->tail = tmp.tail;
+ q->max_depth = tmp.max_depth;
+ q->ht = tmp.ht;
+ q->dep = tmp.dep;
+ q->next = tmp.next;
+ q->allot = tmp.allot;
+ q->hash = tmp.hash;
+ q->qs = tmp.qs;
+
+ /* finish up */
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ } else {
+ q->perturbation = 0;
+ }
+ sch_tree_unlock(sch);
+ return 0;
+}
+
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct sfq_sched_data *q = qdisc_priv(sch);
@@ -519,7 +568,7 @@ static struct Qdisc_ops sfq_qdisc_ops = {
.init = sfq_init,
.reset = sfq_reset,
.destroy = sfq_destroy,
- .change = NULL,
+ .change = sfq_change,
.dump = sfq_dump,
.owner = THIS_MODULE,
};
--
1.5.2.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 6/7] Make qdisc changeable.
2007-07-30 0:21 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
@ 2007-07-30 14:11 ` Patrick McHardy
2007-07-31 7:43 ` Corey Hickey
2007-08-06 2:47 ` Corey Hickey
0 siblings, 2 replies; 19+ messages in thread
From: Patrick McHardy @ 2007-07-30 14:11 UTC (permalink / raw)
To: Corey Hickey; +Cc: netdev
Corey Hickey wrote:
> Re-implement sfq_change() and enable Qdisc_opts.change so "tc qdisc
> change" will work.
>
> Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
> ---
> net/sched/sch_sfq.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 50 insertions(+), 1 deletions(-)
>
> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
> index e6a6a21..e042cd0 100644
> --- a/net/sched/sch_sfq.c
> +++ b/net/sched/sch_sfq.c
> @@ -485,6 +485,55 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
> return 0;
> }
>
> +static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
> +{
> + struct sfq_sched_data *q = qdisc_priv(sch);
> + struct sfq_sched_data tmp;
> + struct sk_buff *skb;
> + int err;
> +
> + /* set up tmp queue */
> + memset(&tmp, 0, sizeof(struct sfq_sched_data));
> + tmp.quantum = psched_mtu(sch->dev); /* default */
If no value is given it should use the old value instead of
reinitializing to the default.
> + if ((err = sfq_q_init(&tmp, opt)))
> + return err;
This will also use defaults for all unspecified values. It would
be more consistent with other qdiscs to only change those values
that are actually specified, so something like "tc qdisc change ...
perturb 10" will *only* change the perturbation parameter.
I'm not sure reusing the initialization function and copying the
parameters is the best way to do this.
> +
> + /* copy packets from the old queue to the tmp queue */
> + sch_tree_lock(sch);
> + while (sch->q.qlen >= tmp.limit - 1)
> + sfq_drop(sch);
> + while ((skb = sfq_q_dequeue(q)) != NULL)
> + sfq_q_enqueue(skb, &tmp, SFQ_TAIL);
> +
> + /* clean up the old queue */
> + sfq_q_destroy(q);
> +
> + /* copy elements of the tmp queue into the old queue */
> + q->perturb_period = tmp.perturb_period;
> + q->quantum = tmp.quantum;
> + q->limit = tmp.limit;
> + q->depth = tmp.depth;
> + q->hash_divisor = tmp.hash_divisor;
> + q->tail = tmp.tail;
> + q->max_depth = tmp.max_depth;
> + q->ht = tmp.ht;
> + q->dep = tmp.dep;
> + q->next = tmp.next;
> + q->allot = tmp.allot;
> + q->hash = tmp.hash;
> + q->qs = tmp.qs;
> +
> + /* finish up */
> + if (q->perturb_period) {
> + q->perturb_timer.expires = jiffies + q->perturb_period;
> + add_timer(&q->perturb_timer);
> + } else {
> + q->perturbation = 0;
> + }
> + sch_tree_unlock(sch);
> + return 0;
> +}
> +
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 6/7] Make qdisc changeable.
2007-07-30 14:11 ` Patrick McHardy
@ 2007-07-31 7:43 ` Corey Hickey
2007-08-06 2:47 ` Corey Hickey
1 sibling, 0 replies; 19+ messages in thread
From: Corey Hickey @ 2007-07-31 7:43 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev
Patrick McHardy wrote:
> Corey Hickey wrote:
>> Re-implement sfq_change() and enable Qdisc_opts.change so "tc qdisc
>> change" will work.
>>
>> Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
>> ---
>> net/sched/sch_sfq.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>> 1 files changed, 50 insertions(+), 1 deletions(-)
>>
>> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
>> index e6a6a21..e042cd0 100644
>> --- a/net/sched/sch_sfq.c
>> +++ b/net/sched/sch_sfq.c
>> @@ -485,6 +485,55 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
>> return 0;
>> }
>>
>> +static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
>> +{
>> + struct sfq_sched_data *q = qdisc_priv(sch);
>> + struct sfq_sched_data tmp;
>> + struct sk_buff *skb;
>> + int err;
>> +
>> + /* set up tmp queue */
>> + memset(&tmp, 0, sizeof(struct sfq_sched_data));
>> + tmp.quantum = psched_mtu(sch->dev); /* default */
>
>
> If no value is given it should use the old value instead of
> reinitializing to the default.
>
>> + if ((err = sfq_q_init(&tmp, opt)))
>> + return err;
>
>
> This will also use defaults for all unspecified values. It would
> be more consistent with other qdiscs to only change those values
> that are actually specified, so something like "tc qdisc change ...
> perturb 10" will *only* change the perturbation parameter.
I somehow had it in my head that a qdisc change was supposed to work
that way. I'll change my patch.
> I'm not sure reusing the initialization function and copying the
> parameters is the best way to do this.
I'm not sure either, but I do like that it's conceptually simple and it
keeps the parameter handling in one place. I've thought and stared for a
while, and I don't see a better way, but that could of course be due to
my limited understanding and general newbiehood in that regard.
Thanks again for the review. I'll try to get a new batch of patches sent
off tomorrow.
-Corey
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 6/7] Make qdisc changeable.
2007-07-30 14:11 ` Patrick McHardy
2007-07-31 7:43 ` Corey Hickey
@ 2007-08-06 2:47 ` Corey Hickey
2007-08-06 12:06 ` Patrick McHardy
1 sibling, 1 reply; 19+ messages in thread
From: Corey Hickey @ 2007-08-06 2:47 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev
Patrick McHardy wrote:
>> + if ((err = sfq_q_init(&tmp, opt)))
>> + return err;
>
>
> This will also use defaults for all unspecified values. It would
> be more consistent with other qdiscs to only change those values
> that are actually specified, so something like "tc qdisc change ...
> perturb 10" will *only* change the perturbation parameter.
I'm fixed this for all the parameters except one--your example above.
Since 0 is a valid value for perturb, I can't see any clever way to
differentiate between the user specifying "perturb 0" or leaving perturb
unspecified. Either way, opt->perturb_period is 0.
The only way I can see would be to add another member, say,
opt->perturb_specified, and use that accordingly. Unfortunately, this
would break usage of sfq with older versions of tc, so I'm hoping
there's a better approach.
Do you have any suggestions? I've looked at the other qdisc files and I
don't see any other instances like this.
Thanks,
Corey
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 6/7] Make qdisc changeable.
2007-08-06 2:47 ` Corey Hickey
@ 2007-08-06 12:06 ` Patrick McHardy
0 siblings, 0 replies; 19+ messages in thread
From: Patrick McHardy @ 2007-08-06 12:06 UTC (permalink / raw)
To: Corey Hickey; +Cc: netdev
Corey Hickey wrote:
> Patrick McHardy wrote:
>
>>> + if ((err = sfq_q_init(&tmp, opt)))
>>> + return err;
>>
>>
>>
>> This will also use defaults for all unspecified values. It would
>> be more consistent with other qdiscs to only change those values
>> that are actually specified, so something like "tc qdisc change ...
>> perturb 10" will *only* change the perturbation parameter.
>
>
> I'm fixed this for all the parameters except one--your example above.
> Since 0 is a valid value for perturb, I can't see any clever way to
> differentiate between the user specifying "perturb 0" or leaving perturb
> unspecified. Either way, opt->perturb_period is 0.
>
> The only way I can see would be to add another member, say,
> opt->perturb_specified, and use that accordingly. Unfortunately, this
> would break usage of sfq with older versions of tc, so I'm hoping
> there's a better approach.
>
> Do you have any suggestions? I've looked at the other qdisc files and I
> don't see any other instances like this.
Maybe use the nested compat attribute and split the base SFQ
struct in the individual members ..
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2007-08-06 12:06 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-29 7:08 [PATCH 0/7] SFQ: backport some features from ESFQ Corey Hickey
2007-07-29 7:08 ` [PATCH 1/7] Preparatory refactoring part 1 Corey Hickey
2007-07-29 7:08 ` [PATCH 2/7] Preparatory refactoring part 2 Corey Hickey
2007-07-29 7:08 ` [PATCH 3/7] Move two functions Corey Hickey
2007-07-29 7:08 ` [PATCH 4/7] Add "depth" Corey Hickey
2007-07-29 7:08 ` [PATCH 5/7] Add divisor Corey Hickey
2007-07-29 7:08 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
2007-07-29 7:08 ` [PATCH 7/7] Remove comments about hardcoded values Corey Hickey
2007-07-29 7:08 ` [PATCH] [iproute2] SFQ: Support changing depth and divisor Corey Hickey
2007-07-29 18:41 ` [PATCH 4/7] Add "depth" Michael Buesch
2007-07-29 20:21 ` Corey Hickey
2007-07-29 20:54 ` Michael Buesch
2007-07-29 7:17 ` [PATCH 0/7] SFQ: backport some features from ESFQ Corey Hickey
2007-07-29 7:19 ` David Miller
-- strict thread matches above, loose matches on Subject: below --
2007-07-30 0:21 SFQ: backport some features from ESFQ (try 2) Corey Hickey
2007-07-30 0:21 ` [PATCH 6/7] Make qdisc changeable Corey Hickey
2007-07-30 14:11 ` Patrick McHardy
2007-07-31 7:43 ` Corey Hickey
2007-08-06 2:47 ` Corey Hickey
2007-08-06 12:06 ` Patrick McHardy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).