netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Stephen Hemminger <shemminger@vyatta.com>
Cc: David Miller <davem@davemloft.net>, netdev@vger.kernel.org
Subject: Re: [PATCH] CHOKe flow scheduler (0.8)
Date: Fri, 14 Jan 2011 04:29:53 +0100	[thread overview]
Message-ID: <1294975793.3403.117.camel@edumazet-laptop> (raw)
In-Reply-To: <20110113153436.70d3c0a3@s6510>

Le jeudi 13 janvier 2011 à 15:34 -0800, Stephen Hemminger a écrit :
> CHOKe ("CHOose and Kill" or "CHOose and Keep") is an alternative
> packet scheduler based on the Random Exponential Drop (RED) algorithm.
> 
> The core idea is:
>   For every packet arrival:
>   	Calculate Qave
> 	if (Qave < minth) 
> 	     Queue the new packet
> 	else 
> 	     Select randomly a packet from the queue 
> 	     if (both packets from same flow)
> 	     then Drop both the packets
> 	     else if (Qave > maxth)
> 	          Drop packet
> 	     else
> 	       	  Admit packet with probability P (same as RED)
> 
> See also:
>   Rong Pan, Balaji Prabhakar, Konstantinos Psounis, "CHOKe: a stateless active
>    queue management scheme for approximating fair bandwidth allocation", 
>   Proceeding of INFOCOM'2000, March 2000.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> ---
> 0.8 change queue length and holes account.
>     keep sch->q.qlen updated, and holes counter not needed.
> 
> ---
>  net/sched/Kconfig     |   11 +
>  net/sched/Makefile    |    1 
>  net/sched/sch_choke.c |  536 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 548 insertions(+)
> 

Hi Stephen

Your diffstat was an old one, here the right one.

 include/linux/pkt_sched.h |   29 +
 net/sched/Kconfig         |   11 
 net/sched/Makefile        |    1 
 net/sched/sch_choke.c     |  552 ++++++++++++++++++++++++++++++++++++


I tested v8 and found several serious problems, please find a diff of my
latest changes :

- wrong oskb/skb used in choke_enqueue()
- choke_zap_head_holes() is called from choke_dequeue() and crash if we
dequeued last packet. (!!!)
- out of bound access in choke_zap_tail_holes()
- choke_dequeue() can be shorter
- choke_change() must dequeue/drop in excess packets or risk new array
overfill (if we reduce queue limit by tc qdisc change ...)
- inline is not needed, space errors in include file

Thanks !

 include/linux/pkt_sched.h |    8 +++---
 net/sched/sch_choke.c     |   43 ++++++++++++++++++++++--------------
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 83bac92..498c798 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -269,10 +269,10 @@ struct tc_choke_qopt {
 };
 
 struct tc_choke_xstats {
-	__u32           early;          /* Early drops */
-	__u32           pdrop;          /* Drops due to queue limits */
-	__u32           other;          /* Drops due to drop() calls */
-	__u32           marked;         /* Marked packets */
+	__u32		early;          /* Early drops */
+	__u32		pdrop;          /* Drops due to queue limits */
+	__u32		other;          /* Drops due to drop() calls */
+	__u32		marked;         /* Marked packets */
 	__u32		matched;	/* Drops due to flow match */
 };
 
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 136d4e5..c710c57 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -74,7 +74,7 @@ struct choke_sched_data {
 };
 
 /* deliver a random number between 0 and N - 1 */
-static inline u32 random_N(unsigned int N)
+static u32 random_N(unsigned int N)
 {
 	return reciprocal_divide(random32(), N);
 }
@@ -99,13 +99,13 @@ static struct sk_buff *choke_peek_random(struct Qdisc *sch,
 }
 
 /* Is ECN parameter configured */
-static inline int use_ecn(const struct choke_sched_data *q)
+static int use_ecn(const struct choke_sched_data *q)
 {
 	return q->flags & TC_RED_ECN;
 }
 
 /* Should packets over max just be dropped (versus marked) */
-static inline int use_harddrop(const struct choke_sched_data *q)
+static int use_harddrop(const struct choke_sched_data *q)
 {
 	return q->flags & TC_RED_HARDDROP;
 }
@@ -113,20 +113,21 @@ static inline int use_harddrop(const struct choke_sched_data *q)
 /* Move head pointer forward to skip over holes */
 static void choke_zap_head_holes(struct choke_sched_data *q)
 {
-	while (q->tab[q->head] == NULL) {
+	do {
 		q->head = (q->head + 1) & q->tab_mask;
-
-		BUG_ON(q->head == q->tail);
-	}
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->head] == NULL);
 }
 
 /* Move tail pointer backwards to reuse holes */
 static void choke_zap_tail_holes(struct choke_sched_data *q)
 {
-	while (q->tab[q->tail - 1] == NULL) {
+	do {
 		q->tail = (q->tail - 1) & q->tab_mask;
-		BUG_ON(q->head == q->tail);
-	}
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->tail] == NULL);
 }
 
 /* Drop packet from queue array by creating a "hole" */
@@ -145,7 +146,7 @@ static void choke_drop_by_idx(struct choke_sched_data *q, unsigned int idx)
    2. fast internal classification
    3. use TC filter based classification
 */
-static inline unsigned int choke_classify(struct sk_buff *skb,
+static unsigned int choke_classify(struct sk_buff *skb,
 					  struct Qdisc *sch, int *qerr)
 
 {
@@ -218,7 +219,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			/* Drop both packets */
 			q->stats.matched++;
 			choke_drop_by_idx(q, idx);
-			sch->qstats.backlog -= qdisc_pkt_len(skb);
+			sch->qstats.backlog -= qdisc_pkt_len(oskb);
 			--sch->q.qlen;
 			qdisc_drop(oskb, sch);
 			goto congestion_drop;
@@ -285,8 +286,7 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 	}
 
 	skb = q->tab[q->head];
-	q->tab[q->head] = NULL; /* not really needed */
-	q->head = (q->head + 1) & q->tab_mask;
+	q->tab[q->head] = NULL;
 	choke_zap_head_holes(q);
 	--sch->q.qlen;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
@@ -371,12 +371,23 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 		sch_tree_lock(sch);
 		old = q->tab;
 		if (old) {
-			unsigned int tail = 0;
+			unsigned int oqlen = sch->q.qlen, tail = 0;
 
 			while (q->head != q->tail) {
-				ntab[tail++] = q->tab[q->head];
+				struct sk_buff *skb = q->tab[q->head];
+
 				q->head = (q->head + 1) & q->tab_mask;
+				if (!skb)
+					continue;
+				if (tail < mask) {
+					ntab[tail++] = skb;
+					continue;
+				}
+				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				--sch->q.qlen;
+				qdisc_drop(skb, sch);
 			}
+			qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
 			q->head = 0;
 			q->tail = tail;
 		}



  reply	other threads:[~2011-01-14  3:29 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-01-13 17:27 [PATCH] CHOKe flow scheduler (0.7) Stephen Hemminger
2011-01-13 17:48 ` [PATCH] CHOKe flow scheduler (iproute) Stephen Hemminger
2011-01-13 21:01   ` Eric Dumazet
2011-01-13 18:00 ` [PATCH] CHOKe flow scheduler (0.7) Eric Dumazet
2011-01-13 18:02   ` Eric Dumazet
2011-01-13 20:37 ` Eric Dumazet
2011-01-13 23:34   ` [PATCH] CHOKe flow scheduler (0.8) Stephen Hemminger
2011-01-14  3:29     ` Eric Dumazet [this message]
2011-01-14  3:34       ` Eric Dumazet
2011-01-14  3:58         ` Eric Dumazet
2011-01-14 11:32           ` Eric Dumazet
2011-01-14 13:54     ` Patrick McHardy
2011-01-14 13:55       ` Patrick McHardy
2011-01-14 14:24         ` Eric Dumazet
2011-01-14 23:45           ` [PATCH] CHOKe flow scheduler (0.9) Stephen Hemminger
2011-01-15  7:45             ` Eric Dumazet
2011-01-17 17:25               ` Stephen Hemminger
2011-01-17 17:54                 ` Eric Dumazet
2011-01-18 19:06                   ` Stephen Hemminger
2011-01-18 19:34                     ` Eric Dumazet
2011-01-20 17:38                       ` [PATCH] CHOKe flow scheduler (0.10) Stephen Hemminger
2011-01-20 18:19                         ` Eric Dumazet
2011-01-20 22:46                           ` Eric Dumazet
2011-02-03  1:21                             ` [PATCH net-next] CHOKe flow scheduler (0.11) Stephen Hemminger
2011-02-03  1:59                               ` Eric Dumazet
2011-02-03  4:53                                 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1294975793.3403.117.camel@edumazet-laptop \
    --to=eric.dumazet@gmail.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=shemminger@vyatta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).