netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: Russell Stuart <russell@stuart.id.au>
Cc: Russell Stuart <russell-tcatm@stuart.id.au>,
	hadi@cyberus.ca, Alan Cox <alan@lxorguk.ukuu.org.uk>,
	Stephen Hemminger <shemminger@osdl.org>,
	netdev@vger.kernel.org, Jesper Dangaard Brouer <hawk@diku.dk>
Subject: Re: [PATCH 0/2] NET: Accurate packet scheduling for ATM/ADSL
Date: Tue, 04 Jul 2006 15:29:09 +0200	[thread overview]
Message-ID: <44AA6D25.9000707@trash.net> (raw)
In-Reply-To: <44A0CE01.4010109@stuart.id.au>

[-- Attachment #1: Type: text/plain, Size: 1192 bytes --]

Russell Stuart wrote:
> On 26/06/2006 9:10 PM, Patrick McHardy wrote:
> 
>>> 5.  We still did have to modify the kernel for ATM.  That was
>>>    because of its rather unusual characteristics.  However,
>>>    it you look at the size of modifications made to the kernel
>>>    verses the size made to the user space tool, (37 lines
>>>    versus 303 lines,) the bulk of the work was does in user
>>>    space.
>>
>>
>> I'm sorry, but arguing that a limited special case solution is
>> better because it needs slightly less code is just not reasonable.
> 
> 
> Without seeing your actual proposal it is difficult to
> judge whether this is a reasonable trade-off or not.
> Hopefully we will see your code soon.  Do you have any
> idea when?

Unfortunately I still didn't got to cleaning them up, so I'm sending
them in their preliminary state. Its not much that is missing, but
the netem usage of skb->cb needs to be integrated better, I failed
to move it to the qdisc_skb_cb so far because of circular includes.
But nothing unfixable. I'm mostly interested if the current size-tables
can express what you need for ATM, I wasn't able to understand the
big comment in tc_core.c in your patch.


[-- Attachment #2: 01.diff --]
[-- Type: text/plain, Size: 13169 bytes --]

[NET_SCHED]: Add accessor function for packet length for qdiscs

Signed-off-by: Patrick McHardy <kaber@trash.net>

---
commit 2a6508576111d82246ee018edbcc4b0f0d18acad
tree 8be27ab6040ea90ed11728763e5b8fcf9e221b67
parent 31304c909e6945b005af62cd55a582e9c010a0b4
author Patrick McHardy <kaber@trash.net> Tue, 04 Jul 2006 15:03:01 +0200
committer Patrick McHardy <kaber@trash.net> Tue, 04 Jul 2006 15:03:01 +0200

 include/net/sch_generic.h |    9 +++++++--
 net/sched/sch_atm.c       |    4 ++--
 net/sched/sch_cbq.c       |   12 ++++++------
 net/sched/sch_dsmark.c    |    2 +-
 net/sched/sch_fifo.c      |    2 +-
 net/sched/sch_gred.c      |   12 ++++++------
 net/sched/sch_hfsc.c      |    8 ++++----
 net/sched/sch_htb.c       |    8 ++++----
 net/sched/sch_netem.c     |    6 +++---
 net/sched/sch_prio.c      |    2 +-
 net/sched/sch_red.c       |    2 +-
 net/sched/sch_sfq.c       |   14 +++++++-------
 net/sched/sch_tbf.c       |    6 +++---
 net/sched/sch_teql.c      |    4 ++--
 14 files changed, 48 insertions(+), 43 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b0e9108..75d7a55 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -184,12 +184,17 @@ tcf_destroy(struct tcf_proto *tp)
 	kfree(tp);
 }
 
+static inline unsigned int qdisc_tx_len(struct sk_buff *skb)
+{
+	return skb->len;
+}
+
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
 	__skb_queue_tail(list, skb);
-	sch->qstats.backlog += skb->len;
-	sch->bstats.bytes += skb->len;
+	sch->qstats.backlog += qdisc_tx_len(skb);
+	sch->bstats.bytes += qdisc_tx_len(skb);
 	sch->bstats.packets++;
 
 	return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index dbf44da..4df305e 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -453,9 +453,9 @@ #endif
 		if (flow) flow->qstats.drops++;
 		return ret;
 	}
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_tx_len(skb);
 	sch->bstats.packets++;
-	flow->bstats.bytes += skb->len;
+	flow->bstats.bytes += qdisc_tx_len(skb);
 	flow->bstats.packets++;
 	/*
 	 * Okay, this may seem weird. We pretend we've dropped the packet if
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 80b7f6a..5d705e2 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -404,7 +404,7 @@ static int
 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	int len = skb->len;
+	int len = qdisc_tx_len(skb);
 	int ret;
 	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 
@@ -688,7 +688,7 @@ #ifdef CONFIG_NET_CLS_POLICE
 
 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 {
-	int len = skb->len;
+	int len = qdisc_tx_len(skb);
 	struct Qdisc *sch = child->__parent;
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = q->rx_class;
@@ -915,7 +915,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int 
 			if (skb == NULL)
 				goto skip_class;
 
-			cl->deficit -= skb->len;
+			cl->deficit -= qdisc_tx_len(skb);
 			q->tx_class = cl;
 			q->tx_borrowed = borrow;
 			if (borrow != cl) {
@@ -923,11 +923,11 @@ #ifndef CBQ_XSTATS_BORROWS_BYTES
 				borrow->xstats.borrows++;
 				cl->xstats.borrows++;
 #else
-				borrow->xstats.borrows += skb->len;
-				cl->xstats.borrows += skb->len;
+				borrow->xstats.borrows += qdisc_tx_len(skb);
+				cl->xstats.borrows += qdisc_tx_len(skb);
 #endif
 			}
-			q->tx_len = skb->len;
+			q->tx_len = qdisc_tx_len(skb);
 
 			if (cl->deficit <= 0) {
 				q->active[prio] = cl;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 11c8a21..53346c6 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -265,7 +265,7 @@ #endif
 		return err;
 	}
 
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_tx_len(skb);
 	sch->bstats.packets++;
 	sch->q.qlen++;
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index c2689f4..ec99321 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -28,7 +28,7 @@ static int bfifo_enqueue(struct sk_buff 
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
-	if (likely(sch->qstats.backlog + skb->len <= q->limit))
+	if (likely(sch->qstats.backlog + qdisc_tx_len(skb) <= q->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_reshape_fail(skb, sch);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 0cafdd5..f0bf5d7 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -189,7 +189,7 @@ static int gred_enqueue(struct sk_buff *
 	}
 
 	q->packetsin++;
-	q->bytesin += skb->len;
+	q->bytesin += qdisc_tx_len(skb);
 
 	if (gred_wred_mode(t))
 		gred_load_wred_set(t, q);
@@ -227,8 +227,8 @@ static int gred_enqueue(struct sk_buff *
 			break;
 	}
 
-	if (q->backlog + skb->len <= q->limit) {
-		q->backlog += skb->len;
+	if (q->backlog + qdisc_tx_len(skb) <= q->limit) {
+		q->backlog += qdisc_tx_len(skb);
 		return qdisc_enqueue_tail(skb, sch);
 	}
 
@@ -255,7 +255,7 @@ static int gred_requeue(struct sk_buff *
 	} else {
 		if (red_is_idling(&q->parms))
 			red_end_of_idle_period(&q->parms);
-		q->backlog += skb->len;
+		q->backlog += qdisc_tx_len(skb);
 	}
 
 	return qdisc_requeue(skb, sch);
@@ -278,7 +278,7 @@ static struct sk_buff *gred_dequeue(stru
 				       "VQ 0x%x after dequeue, screwing up "
 				       "backlog.\n", tc_index_to_dp(skb));
 		} else {
-			q->backlog -= skb->len;
+			q->backlog -= qdisc_tx_len(skb);
 
 			if (!q->backlog && !gred_wred_mode(t))
 				red_start_of_idle_period(&q->parms);
@@ -300,7 +300,7 @@ static unsigned int gred_drop(struct Qdi
 
 	skb = qdisc_dequeue_tail(sch);
 	if (skb) {
-		unsigned int len = skb->len;
+		unsigned int len = qdisc_tx_len(skb);
 		struct gred_sched_data *q;
 		u16 dp = tc_index_to_dp(skb);
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6b1b4a9..3fc8351 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -942,7 +942,7 @@ qdisc_peek_len(struct Qdisc *sch)
 			printk("qdisc_peek_len: non work-conserving qdisc ?\n");
 		return 0;
 	}
-	len = skb->len;
+	len = qdisc_tx_len(skb);
 	if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
 		if (net_ratelimit())
 			printk("qdisc_peek_len: failed to requeue\n");
@@ -1648,7 +1648,7 @@ hfsc_enqueue(struct sk_buff *skb, struct
 		return err;
 	}
 
-	len = skb->len;
+	len = qdisc_tx_len(skb);
 	err = cl->qdisc->enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		cl->qstats.drops++;
@@ -1712,9 +1712,9 @@ hfsc_dequeue(struct Qdisc *sch)
 		return NULL;
 	}
 
-	update_vf(cl, skb->len, cur_time);
+	update_vf(cl, qdisc_tx_len(skb), cur_time);
 	if (realtime)
-		cl->cl_cumul += skb->len;
+		cl->cl_cumul += qdisc_tx_len(skb);
 
 	if (cl->qdisc->q.qlen != 0) {
 		if (cl->cl_flags & HFSC_RSC) {
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 34afe41..b26fa9a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -733,12 +733,12 @@ #endif
 	cl->qstats.drops++;
 	return NET_XMIT_DROP;
     } else {
-	cl->bstats.packets++; cl->bstats.bytes += skb->len;
+	cl->bstats.packets++; cl->bstats.bytes += qdisc_tx_len(skb);
 	htb_activate (q,cl);
     }
 
     sch->q.qlen++;
-    sch->bstats.packets++; sch->bstats.bytes += skb->len;
+    sch->bstats.packets++; sch->bstats.bytes += qdisc_tx_len(skb);
     HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
     return NET_XMIT_SUCCESS;
 }
@@ -1067,7 +1067,7 @@ next:
 	} while (cl != start);
 
 	if (likely(skb != NULL)) {
-		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
+		if ((cl->un.leaf.deficit[level] -= qdisc_tx_len(skb)) < 0) {
 			HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
 				level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
 			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
@@ -1077,7 +1077,7 @@ next:
 		   gives us slightly better performance */
 		if (!cl->un.leaf.q->q.qlen)
 			htb_deactivate (q,cl);
-		htb_charge_class (q,cl,level,skb->len);
+		htb_charge_class (q,cl,level,qdisc_tx_len(skb));
 	}
 	return skb;
 }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c5bd806..aa97ecb 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -225,7 +225,7 @@ static int netem_enqueue(struct sk_buff 
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->q.qlen++;
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_tx_len(skb);
 		sch->bstats.packets++;
 	} else
 		sch->qstats.drops++;
@@ -507,8 +507,8 @@ static int tfifo_enqueue(struct sk_buff 
 
 		__skb_queue_after(list, skb, nskb);
 
-		sch->qstats.backlog += nskb->len;
-		sch->bstats.bytes += nskb->len;
+		sch->qstats.backlog += qdisc_tx_len(nskb);
+		sch->bstats.bytes += qdisc_tx_len(nskb);
 		sch->bstats.packets++;
 
 		return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index a5fa03c..2175732 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -99,7 +99,7 @@ #ifdef CONFIG_NET_CLS_ACT
 #endif
 
 	if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_tx_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d65cadd..24ec0b2 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -95,7 +95,7 @@ static int red_enqueue(struct sk_buff *s
 
 	ret = child->enqueue(skb, child);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_tx_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
 	} else {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index d0d6e59..2a57d0d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -225,7 +225,7 @@ static unsigned int sfq_drop(struct Qdis
 	if (d > 1) {
 		sfq_index x = q->dep[d+SFQ_DEPTH].next;
 		skb = q->qs[x].prev;
-		len = skb->len;
+		len = qdisc_tx_len(skb);
 		__skb_unlink(skb, &q->qs[x]);
 		kfree_skb(skb);
 		sfq_dec(q, x);
@@ -241,7 +241,7 @@ static unsigned int sfq_drop(struct Qdis
 		q->next[q->tail] = q->next[d];
 		q->allot[q->next[d]] += q->quantum;
 		skb = q->qs[d].prev;
-		len = skb->len;
+		len = qdisc_tx_len(skb);
 		__skb_unlink(skb, &q->qs[d]);
 		kfree_skb(skb);
 		sfq_dec(q, d);
@@ -267,7 +267,7 @@ sfq_enqueue(struct sk_buff *skb, struct 
 		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
 		q->hash[x] = hash;
 	}
-	sch->qstats.backlog += skb->len;
+	sch->qstats.backlog += qdisc_tx_len(skb);
 	__skb_queue_tail(&q->qs[x], skb);
 	sfq_inc(q, x);
 	if (q->qs[x].qlen == 1) {		/* The flow is new */
@@ -282,7 +282,7 @@ sfq_enqueue(struct sk_buff *skb, struct 
 		}
 	}
 	if (++sch->q.qlen < q->limit-1) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_tx_len(skb);
 		sch->bstats.packets++;
 		return 0;
 	}
@@ -303,7 +303,7 @@ sfq_requeue(struct sk_buff *skb, struct 
 		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
 		q->hash[x] = hash;
 	}
-	sch->qstats.backlog += skb->len;
+	sch->qstats.backlog += qdisc_tx_len(skb);
 	__skb_queue_head(&q->qs[x], skb);
 	sfq_inc(q, x);
 	if (q->qs[x].qlen == 1) {		/* The flow is new */
@@ -347,7 +347,7 @@ sfq_dequeue(struct Qdisc* sch)
 	skb = __skb_dequeue(&q->qs[a]);
 	sfq_dec(q, a);
 	sch->q.qlen--;
-	sch->qstats.backlog -= skb->len;
+	sch->qstats.backlog -= qdisc_tx_len(skb);
 
 	/* Is the slot empty? */
 	if (q->qs[a].qlen == 0) {
@@ -359,7 +359,7 @@ sfq_dequeue(struct Qdisc* sch)
 		}
 		q->next[q->tail] = a;
 		q->allot[a] += q->quantum;
-	} else if ((q->allot[a] -= skb->len) <= 0) {
+	} else if ((q->allot[a] -= qdisc_tx_len(skb)) <= 0) {
 		q->tail = a;
 		a = q->next[a];
 		q->allot[a] += q->quantum;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index d9a5d29..c87b0e6 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -139,7 +139,7 @@ static int tbf_enqueue(struct sk_buff *s
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	int ret;
 
-	if (skb->len > q->max_size) {
+	if (qdisc_tx_len(skb) > q->max_size) {
 		sch->qstats.drops++;
 #ifdef CONFIG_NET_CLS_POLICE
 		if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
@@ -155,7 +155,7 @@ #endif
 	}
 
 	sch->q.qlen++;
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_tx_len(skb);
 	sch->bstats.packets++;
 	return 0;
 }
@@ -204,7 +204,7 @@ static struct sk_buff *tbf_dequeue(struc
 		psched_time_t now;
 		long toks, delay;
 		long ptoks = 0;
-		unsigned int len = skb->len;
+		unsigned int len = qdisc_tx_len(skb);
 
 		PSCHED_GET_TIME(now);
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 4c16ad5..538f63f 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -97,7 +97,7 @@ teql_enqueue(struct sk_buff *skb, struct
 
 	__skb_queue_tail(&q->q, skb);
 	if (q->q.qlen <= dev->tx_queue_len) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_tx_len(skb);
 		sch->bstats.packets++;
 		return 0;
 	}
@@ -278,7 +278,7 @@ static int teql_master_xmit(struct sk_bu
 	struct Qdisc *start, *q;
 	int busy;
 	int nores;
-	int len = skb->len;
+	int len = qdisc_tx_len(skb);
 	struct sk_buff *skb_res = NULL;
 
 	start = master->slaves;

[-- Attachment #3: 02.diff --]
[-- Type: text/plain, Size: 2212 bytes --]

[NET_SCHED]: Move top-level device queueing code to seperate function

Signed-off-by: Patrick McHardy <kaber@trash.net>

---
commit a39585afe71dafab96208515a8fa99c92b108fee
tree fbb7672a3061a38edc9f75d3fb8f34652796b109
parent 2a6508576111d82246ee018edbcc4b0f0d18acad
author Patrick McHardy <kaber@trash.net> Tue, 04 Jul 2006 15:03:28 +0200
committer Patrick McHardy <kaber@trash.net> Tue, 04 Jul 2006 15:03:28 +0200

 include/net/pkt_sched.h |    1 +
 net/core/dev.c          |   10 +---------
 net/sched/sch_generic.c |   12 ++++++++++++
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 1925c65..44cf69e 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -224,6 +224,7 @@ extern struct qdisc_rate_table *qdisc_ge
 		struct rtattr *tab);
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 
+extern int qdisc_enqueue_root(struct net_device *dev, struct sk_buff *skb);
 extern void __qdisc_run(struct net_device *dev);
 
 static inline void qdisc_run(struct net_device *dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 066a60a..8599120 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1449,15 +1449,7 @@ #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
-		/* Grab device queue */
-		spin_lock(&dev->queue_lock);
-
-		rc = q->enqueue(skb, q);
-
-		qdisc_run(dev);
-
-		spin_unlock(&dev->queue_lock);
-		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+		rc = qdisc_enqueue_root(dev, skb);
 		goto out;
 	}
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d735f51..2bab466 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -77,6 +77,18 @@ void qdisc_unlock_tree(struct net_device
    if one is grabbed, another must be free.
  */
 
+int qdisc_enqueue_root(struct net_device *dev, struct sk_buff *skb)
+{
+	int ret;
+
+	spin_lock(&dev->queue_lock);
+	ret = dev->qdisc->enqueue(skb, dev->qdisc);
+	qdisc_run(dev);
+	spin_unlock(&dev->queue_lock);
+
+	return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
+}
+
 
 /* Kick device.
    Note, that this procedure can be called by a watchdog timer, so that

[-- Attachment #4: 03.diff --]
[-- Type: text/plain, Size: 8494 bytes --]

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d10f353..2ce55d5 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -83,6 +83,21 @@ struct tc_ratespec
 	__u32		rate;
 };
 
+struct tc_sizespec
+{
+	unsigned int	cell_log;
+	unsigned int	addend;
+};
+
+enum {
+	TCA_STAB_UNSPEC,
+	TCA_STAB_BASE,
+	TCA_STAB_DATA,
+	__TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
 /* FIFO section */
 
 struct tc_fifo_qopt
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index facd9ee..167cc22 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -821,6 +821,7 @@ enum
 	TCA_RATE,
 	TCA_FCNT,
 	TCA_STATS2,
+	TCA_STAB,
 	__TCA_MAX
 };
 
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 44cf69e..8fd9a42 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -223,6 +223,7 @@ extern struct Qdisc *qdisc_lookup_class(
 extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 		struct rtattr *tab);
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
+extern void qdisc_put_stab(struct qdisc_size_table *tab);
 
 extern int qdisc_enqueue_root(struct net_device *dev, struct sk_buff *skb);
 extern void __qdisc_run(struct net_device *dev);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 75d7a55..76c50a1 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,6 +23,15 @@ struct qdisc_rate_table
 	int		refcnt;
 };
 
+struct qdisc_size_table
+{
+	struct list_head	list;
+	struct tc_sizespec	size;
+	int			refcnt;
+	unsigned int		tsize;
+	u32			data[];
+};
+
 struct Qdisc
 {
 	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
@@ -33,6 +42,7 @@ #define TCQ_F_THROTTLED	2
 #define TCQ_F_INGRESS	4
 	int			padded;
 	struct Qdisc_ops	*ops;
+	struct qdisc_size_table	*stab;
 	u32			handle;
 	u32			parent;
 	atomic_t		refcnt;
@@ -184,9 +194,19 @@ tcf_destroy(struct tcf_proto *tp)
 	kfree(tp);
 }
 
+struct qdisc_skb_cb {
+	unsigned int	len;
+	char		data[];
+};
+
+static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
+{
+	return (struct qdisc_skb_cb *)skb->cb;
+}
+
 static inline unsigned int qdisc_tx_len(struct sk_buff *skb)
 {
-	return skb->len;
+	return qdisc_skb_cb(skb)->len;
 }
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c7844ba..479fc85 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -286,6 +286,78 @@ void qdisc_put_rtab(struct qdisc_rate_ta
 	}
 }
 
+static LIST_HEAD(qdisc_stab_list);
+
+static struct qdisc_size_table *qdisc_get_stab(struct rtattr *tab, int *err)
+{
+	struct qdisc_size_table *stab;
+	struct rtattr *tb[TCA_STAB_MAX];
+	unsigned int tsize;
+
+	*err = -EINVAL;
+	if (rtattr_parse_nested(tb, TCA_STAB_MAX, tab))
+		return NULL;
+	if (tb[TCA_STAB_BASE-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_STAB_BASE-1]) < sizeof(struct tc_sizespec))
+	    	return NULL;
+
+	tsize = 0;
+	if (tb[TCA_STAB_DATA-1] != NULL)
+		tsize = RTA_PAYLOAD(tb[TCA_STAB_DATA-1]) / sizeof(u32);
+
+	list_for_each_entry(stab, &qdisc_stab_list, list) {
+		if (stab->tsize != tsize)
+			continue;
+		if (memcmp(&stab->size, RTA_DATA(tb[TCA_STAB_BASE-1]),
+		           sizeof(stab->size)))
+			continue;
+		if (tsize > 0  &&
+		    memcmp(stab->data, RTA_DATA(tb[TCA_STAB_DATA-1]),
+		    	   sizeof(u32) * tsize));
+			continue;
+		stab->refcnt++;
+		return stab;
+	}
+
+	*err = -ENOMEM;
+	stab = kmalloc(sizeof(*stab) + sizeof(u32) * tsize, GFP_KERNEL);
+	if (stab == NULL)
+		return stab;
+	memcpy(&stab->size, RTA_DATA(tb[TCA_STAB_BASE-1]), sizeof(stab->size));
+	stab->tsize = tsize;
+	if (tsize > 0)
+		memcpy(stab->data, RTA_DATA(tb[TCA_STAB_DATA-1]),
+		       sizeof(u32) * tsize);
+	list_add_tail(&stab->list, &qdisc_stab_list);
+	*err = 0;
+	return stab;
+}
+
+void qdisc_put_stab(struct qdisc_size_table *stab)
+{
+	if (!stab || --stab->refcnt)
+		return;
+	list_del(&stab->list);
+	kfree(stab);
+}
+
+static int
+qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+	unsigned char *b = skb->tail;
+	struct rtattr *rta = (struct rtattr *)b;
+
+	RTA_PUT(skb, TCA_STAB, 0, NULL);
+	RTA_PUT(skb, TCA_STAB_BASE, sizeof(stab->size), &stab->size);
+	RTA_PUT(skb, TCA_STAB_DATA, sizeof(stab->data[0]) * stab->tsize,
+		stab->data);
+	rta->rta_len = skb->tail - b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
 
 /* Allocate an unique handle from space managed by kernel */
 
@@ -453,6 +525,11 @@ #endif
 	sch->handle = handle;
 
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
+		if (tca[TCA_STAB-1]) {
+			sch->stab = qdisc_get_stab(tca[TCA_STAB-1], &err);
+			if (sch->stab == NULL)
+				goto err_out3;
+		}
 #ifdef CONFIG_NET_ESTIMATOR
 		if (tca[TCA_RATE-1]) {
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
@@ -477,6 +554,7 @@ #endif
 		return sch;
 	}
 err_out3:
+	qdisc_put_stab(sch->stab);
 	dev_put(dev);
 	kfree((char *) sch - sch->padded);
 err_out2:
@@ -488,15 +566,26 @@ err_out:
 
 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
 {
-	if (tca[TCA_OPTIONS-1]) {
-		int err;
+	int err;
 
+	if (tca[TCA_OPTIONS-1]) {
 		if (sch->ops->change == NULL)
 			return -EINVAL;
 		err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
 		if (err)
 			return err;
 	}
+	if (tca[TCA_STAB-1]) {
+		struct qdisc_size_table *stab;
+
+		stab = qdisc_get_stab(tca[TCA_STAB-1], &err);
+		if (stab == NULL)
+			return err;
+		spin_lock_bh(&sch->dev->queue_lock);
+		qdisc_put_stab(sch->stab);
+		sch->stab = stab;
+		spin_unlock_bh(&sch->dev->queue_lock);
+	}
 #ifdef CONFIG_NET_ESTIMATOR
 	if (tca[TCA_RATE-1])
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
@@ -769,6 +858,9 @@ static int tc_fill_qdisc(struct sk_buff 
 		goto rtattr_failure;
 	q->qstats.qlen = q->q.qlen;
 
+	if (q->stab != NULL && qdisc_dump_stab(skb, q->stab) < 0)
+		goto rtattr_failure;
+
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
 			TCA_XSTATS, q->stats_lock, &d) < 0)
 		goto rtattr_failure;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2bab466..9022650 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -67,6 +67,21 @@ void qdisc_unlock_tree(struct net_device
 	write_unlock_bh(&qdisc_tree_lock);
 }
 
+static void qdisc_init_len(struct sk_buff *skb, struct Qdisc *q)
+{
+	unsigned int idx, len = skb->len;
+	struct qdisc_size_table *stab = q->stab;
+
+	if (stab == NULL)
+		goto out;
+	idx = len >> stab->size.cell_log;
+	if (idx < stab->tsize)
+		len = stab->data[idx];
+	len += stab->size.addend;
+out:
+	((struct qdisc_skb_cb *)skb->cb)->len = len;
+}
+
 /* 
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
@@ -82,6 +97,7 @@ int qdisc_enqueue_root(struct net_device
 	int ret;
 
 	spin_lock(&dev->queue_lock);
+	qdisc_init_len(skb, dev->qdisc);
 	ret = dev->qdisc->enqueue(skb, dev->qdisc);
 	qdisc_run(dev);
 	spin_unlock(&dev->queue_lock);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index aa97ecb..15dde88 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -148,7 +148,7 @@ static long tabledist(unsigned long mu, 
 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
+	struct netem_skb_cb *cb = (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 	struct sk_buff *skb2;
 	int ret;
 	int count = 1;
@@ -268,7 +268,7 @@ static struct sk_buff *netem_dequeue(str
 	skb = q->qdisc->dequeue(q->qdisc);
 	if (skb) {
 		const struct netem_skb_cb *cb
-			= (const struct netem_skb_cb *)skb->cb;
+			= (const struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 		psched_time_t now;
 
 		/* if more time remaining? */
@@ -493,13 +493,13 @@ static int tfifo_enqueue(struct sk_buff 
 	struct fifo_sched_data *q = qdisc_priv(sch);
 	struct sk_buff_head *list = &sch->q;
 	const struct netem_skb_cb *ncb
-		= (const struct netem_skb_cb *)nskb->cb;
+		= (const struct netem_skb_cb *)qdisc_skb_cb(nskb)->data;
 	struct sk_buff *skb;
 
 	if (likely(skb_queue_len(list) < q->limit)) {
 		skb_queue_reverse_walk(list, skb) {
 			const struct netem_skb_cb *cb
-				= (const struct netem_skb_cb *)skb->cb;
+				= (const struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 
 			if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send))
 				break;

[-- Attachment #5: iproute.diff --]
[-- Type: text/plain, Size: 4483 bytes --]

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d10f353..2ce55d5 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -83,6 +83,21 @@ struct tc_ratespec
 	__u32		rate;
 };
 
+struct tc_sizespec
+{
+	unsigned int	cell_log;
+	unsigned int	addend;
+};
+
+enum {
+	TCA_STAB_UNSPEC,
+	TCA_STAB_BASE,
+	TCA_STAB_DATA,
+	__TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
 /* FIFO section */
 
 struct tc_fifo_qopt
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5e33a20..addf5fb 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -821,6 +821,7 @@ enum
 	TCA_RATE,
 	TCA_FCNT,
 	TCA_STATS2,
+	TCA_STAB,
 	__TCA_MAX
 };
 
diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c
index e9174ab..c38fa87 100644
--- a/tc/tc_qdisc.c
+++ b/tc/tc_qdisc.c
@@ -41,10 +41,79 @@ static int usage(void)
 	return -1;
 }
 
+static int parse_stab(int *argcp, char ***argvp, struct tc_sizespec *stab,
+		      __u32 **datap)
+{
+	int argc = *argcp;
+	char **argv = *argvp;
+
+	NEXT_ARG();
+	while (argc > 0) {
+		if (matches("overhead", *argv) == 0) {
+			NEXT_ARG();
+			if (stab->addend)
+				duparg("overhead", *argv);
+			if (get_size(&stab->addend, *argv))
+				return -1;
+			NEXT_ARG();
+		} else if (matches("cell_log", *argv) == 0) {
+			NEXT_ARG();
+			if (stab->cell_log)
+				duparg("cell_log", *argv);
+			if (get_u32(&stab->cell_log, *argv, 0))
+				return -1;
+			NEXT_ARG();
+		} else if (get_size(*datap, *argv) == 0) {
+			argv++, argc--;
+			++*datap;
+		} else
+			break;
+	}
+	if (!stab->addend && !stab->cell_log)
+		return -1;
+	*argcp = argc;
+	*argvp = argv;
+	return 0;
+}
+
+static void print_stab(FILE *f, char *prefix, struct rtattr *tab)
+{
+	struct rtattr *tb[TCA_STAB_MAX+1];
+	struct tc_sizespec *size;
+	unsigned int i;
+	__u32 *data;
+	SPRINT_BUF(buf);
+
+	parse_rtattr_nested(tb, TCA_STAB_MAX, tab);
+	if (tb[TCA_STAB_BASE] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_STAB_BASE]) < sizeof(struct tc_sizespec))
+		return;
+	fprintf(f, "%s", prefix);
+	size = RTA_DATA(tb[TCA_STAB_BASE]);
+	if (size->addend) {
+		print_size(buf, SPRINT_BSIZE-1, size->addend);
+		fprintf(f, "overhead %s ", buf);
+	}
+       	if (size->cell_log)
+		fprintf(f, "cell_log %u ", size->cell_log);
+	if (tb[TCA_STAB_DATA] == NULL)
+		return;
+	data = RTA_DATA(tb[TCA_STAB_DATA]);
+	for (i = 0; i < RTA_PAYLOAD(tb[TCA_STAB_DATA]) / sizeof(__u32); i++) {
+		print_size(buf, SPRINT_BSIZE-1, data[i]);
+		fprintf(f, "%s ", buf);
+	}
+}
+
 int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
 {
 	struct qdisc_util *q = NULL;
 	struct tc_estimator est;
+	struct {
+		struct tc_sizespec size;
+		__u32 data[256];
+	} stab;
+	__u32 *stabdata = &stab.data[0];
 	char  d[16];
 	char  k[16];
 	struct {
@@ -55,6 +124,7 @@ int tc_qdisc_modify(int cmd, unsigned fl
 
 	memset(&req, 0, sizeof(req));
 	memset(&est, 0, sizeof(est));
+	memset(&stab, 0, sizeof(stab));
 	memset(&d, 0, sizeof(d));
 	memset(&k, 0, sizeof(k));
 
@@ -108,6 +178,10 @@ #endif
 		} else if (matches(*argv, "estimator") == 0) {
 			if (parse_estimator(&argc, &argv, &est))
 				return -1;
+		} else if (matches(*argv, "stab") == 0) {
+			if (parse_stab(&argc, &argv, &stab.size, &stabdata))
+				return -1;
+			continue;
 		} else if (matches(*argv, "help") == 0) {
 			usage();
 		} else {
@@ -124,6 +198,16 @@ #endif
 		addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1);
 	if (est.ewma_log)
 		addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
+	if (stab.size.addend || stab.size.cell_log) {
+		struct rtattr *tail = NLMSG_TAIL(&req.n);
+
+		addattr_l(&req.n, sizeof(req), TCA_STAB, NULL, 0);
+		addattr_l(&req.n, sizeof(req), TCA_STAB_BASE, &stab.size,
+			  sizeof(stab.size));
+		addattr_l(&req.n, sizeof(req), TCA_STAB_DATA, stab.data,
+		          (void *)stabdata - (void *)stab.data);
+		tail->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail;
+	}
 
 	if (q) {
 		if (!q->parse_qopt) {
@@ -215,7 +299,7 @@ static int print_qdisc(const struct sock
 		q = get_qdisc_kind("prio");
 	else
 		q = get_qdisc_kind(RTA_DATA(tb[TCA_KIND]));
-	
+
 	if (tb[TCA_OPTIONS]) {
 		if (q)
 			q->print_qopt(q, fp, tb[TCA_OPTIONS]);
@@ -223,6 +307,12 @@ static int print_qdisc(const struct sock
 			fprintf(fp, "[cannot parse qdisc parameters]");
 	}
 	fprintf(fp, "\n");
+
+	if (tb[TCA_STAB]) {
+		print_stab(fp, " ", tb[TCA_STAB]);
+		fprintf(fp, "\n");
+	}
+
 	if (show_stats) {
 		struct rtattr *xstats = NULL;
 

  parent reply	other threads:[~2006-07-04 13:29 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-06-14  9:40 [PATCH 0/2] NET: Accurate packet scheduling for ATM/ADSL Jesper Dangaard Brouer
2006-06-14 12:06 ` jamal
2006-06-14 12:55   ` Jesper Dangaard Brouer
2006-06-15 12:57     ` jamal
2006-06-15 13:16     ` jamal
2006-06-20  1:04       ` Patrick McHardy
2006-06-20 14:59         ` jamal
2006-06-20 15:16           ` Patrick McHardy
2006-06-21 12:21             ` Krzysztof Matusik
2006-06-21 12:54               ` Patrick McHardy
2006-06-21 14:33                 ` Krzysztof Matusik
2006-06-14 15:32   ` Andy Furniss
2006-06-20  0:54   ` Patrick McHardy
2006-06-20 14:56     ` jamal
2006-06-20 15:09       ` Patrick McHardy
2006-06-22 18:41         ` jamal
2006-06-23 14:32           ` Patrick McHardy
2006-06-24 14:39             ` jamal
2006-06-26 11:21               ` Patrick McHardy
2006-06-27 13:01                 ` jamal
2006-07-02  4:23                   ` Patrick McHardy
2006-07-02 13:59                     ` jamal
     [not found]   ` <1150287983.3246.27.camel@ras.pc.brisbane.lube>
     [not found]     ` <1150292693.5197.1.camel@jzny2>
     [not found]       ` <1150843471.17455.2.camel@ras.pc.brisbane.lube>
     [not found]         ` <15653CE98281AD4FBD7F70BCEE3666E53CD54A@comxexch01.comx.local>
     [not found]           ` <1151000966.5392.34.camel@jzny2>
2006-06-23 12:37             ` Russell Stuart
2006-06-23 15:21               ` Patrick McHardy
2006-06-26  0:45                 ` Russell Stuart
2006-06-26 11:10                   ` Patrick McHardy
2006-06-27  6:19                     ` Russell Stuart
2006-06-27 17:18                       ` Patrick McHardy
2006-07-04 13:29                       ` Patrick McHardy [this message]
2006-07-04 19:29                         ` jamal
2006-07-04 23:53                           ` Patrick McHardy
2006-07-06  0:39                         ` Russell Stuart
2006-07-07  8:00                           ` Patrick McHardy
2006-07-10  8:44                             ` Russell Stuart
2006-06-24 14:13               ` jamal
2006-06-26  4:23                 ` Russell Stuart
2006-07-18  2:06                 ` Russell Stuart
2006-07-18 13:35                   ` jamal
2006-07-18 21:46                   ` Andy Furniss
2006-07-19  1:02                     ` Russell Stuart
2006-07-19 14:42                       ` Andy Furniss
2006-07-19 14:54                         ` Patrick McHardy
2006-07-19 20:26                         ` [PATCH 0/2] NET: Accurate packet scheduling for ATM/ADSL (RTAB BUG) Jesper Dangaard Brouer
2006-07-19 21:00                           ` Alexey Kuznetsov
2006-07-20  5:47                             ` Russell Stuart
2006-07-20 23:49                               ` Alexey Kuznetsov
2006-07-19 14:50                       ` [PATCH 0/2] NET: Accurate packet scheduling for ATM/ADSL Patrick McHardy
2006-07-20  4:56                         ` Russell Stuart
2006-07-30 23:06                           ` Russell Stuart
2006-08-08 22:01                             ` Russell Stuart
2006-08-09 11:33                               ` jamal
2006-09-04 10:37                                 ` Russell Stuart
2006-06-14 14:27 ` Phillip Susi
2006-06-14 15:08   ` Jesper Dangaard Brouer
2006-06-20  5:35 ` Chris Wedgwood
2006-06-20  7:33   ` Jesper Dangaard Brouer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44AA6D25.9000707@trash.net \
    --to=kaber@trash.net \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=hadi@cyberus.ca \
    --cc=hawk@diku.dk \
    --cc=netdev@vger.kernel.org \
    --cc=russell-tcatm@stuart.id.au \
    --cc=russell@stuart.id.au \
    --cc=shemminger@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).