netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] Add size table feature for qdiscs
@ 2008-07-19 23:34 Jussi Kivilinna
  2008-07-19 23:35 ` [PATCH v2 1/3] net_sched: Add qdisc_enqueue wrapper Jussi Kivilinna
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Jussi Kivilinna @ 2008-07-19 23:34 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

Patch adds generic size table for qdiscs that is similiar to rate table
used by htb/tbf/cbq.

Changes since v1:
 - ingress qdisc / actions handled too
 - size of table is now dynamic
 - add qdisc_enqueue_root wrapper

---

Jussi Kivilinna (3):
      net_sched: Add size table for qdiscs
      net_sched: Add accessor function for packet length for qdiscs
      net_sched: Add qdisc_enqueue wrapper


 include/linux/pkt_sched.h |   20 ++++++
 include/linux/rtnetlink.h |    1 
 include/net/pkt_sched.h   |    1 
 include/net/sch_generic.h |   50 +++++++++++++--
 net/core/dev.c            |    4 +
 net/mac80211/wme.c        |    2 -
 net/sched/act_gact.c      |    2 -
 net/sched/act_ipt.c       |    2 -
 net/sched/act_mirred.c    |    4 +
 net/sched/act_nat.c       |    2 -
 net/sched/act_pedit.c     |    2 -
 net/sched/act_police.c    |    8 +-
 net/sched/act_simple.c    |    2 -
 net/sched/sch_api.c       |  151 ++++++++++++++++++++++++++++++++++++++++++++-
 net/sched/sch_atm.c       |    6 +-
 net/sched/sch_cbq.c       |   19 +++---
 net/sched/sch_dsmark.c    |    4 +
 net/sched/sch_fifo.c      |    2 -
 net/sched/sch_generic.c   |    1 
 net/sched/sch_gred.c      |   12 ++--
 net/sched/sch_hfsc.c      |   16 ++---
 net/sched/sch_htb.c       |   12 ++--
 net/sched/sch_ingress.c   |    2 -
 net/sched/sch_netem.c     |   27 +++++---
 net/sched/sch_prio.c      |    5 +
 net/sched/sch_red.c       |    4 +
 net/sched/sch_sfq.c       |   16 ++---
 net/sched/sch_tbf.c       |    9 +--
 net/sched/sch_teql.c      |    6 +-
 29 files changed, 302 insertions(+), 90 deletions(-)

-- 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 1/3] net_sched: Add qdisc_enqueue wrapper
  2008-07-19 23:34 [PATCH v2 0/3] Add size table feature for qdiscs Jussi Kivilinna
@ 2008-07-19 23:35 ` Jussi Kivilinna
  2008-07-19 23:35 ` [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs Jussi Kivilinna
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 29+ messages in thread
From: Jussi Kivilinna @ 2008-07-19 23:35 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---

 include/net/sch_generic.h |   10 ++++++++++
 net/core/dev.c            |    4 ++--
 net/mac80211/wme.c        |    2 +-
 net/sched/sch_atm.c       |    2 +-
 net/sched/sch_cbq.c       |    5 +++--
 net/sched/sch_dsmark.c    |    2 +-
 net/sched/sch_hfsc.c      |    2 +-
 net/sched/sch_htb.c       |    3 +--
 net/sched/sch_netem.c     |   20 ++++++++++++--------
 net/sched/sch_prio.c      |    3 ++-
 net/sched/sch_red.c       |    2 +-
 net/sched/sch_tbf.c       |    3 ++-
 12 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 8a44386..f396dff 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -306,6 +306,16 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 	return true;
 }
 
+static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	return sch->enqueue(skb, sch);
+}
+
+static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
+{
+	return qdisc_enqueue(skb, sch);
+}
+
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 065b981..2eed17b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1781,7 +1781,7 @@ gso:
 
 		spin_lock(root_lock);
 
-		rc = q->enqueue(skb, q);
+		rc = qdisc_enqueue_root(skb, q);
 		qdisc_run(q);
 
 		spin_unlock(root_lock);
@@ -2083,7 +2083,7 @@ static int ing_filter(struct sk_buff *skb)
 	q = rxq->qdisc;
 	if (q) {
 		spin_lock(qdisc_lock(q));
-		result = q->enqueue(skb, q);
+		result = qdisc_enqueue_root(skb, q);
 		spin_unlock(qdisc_lock(q));
 	}
 
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 6e8099e..07edda0 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -289,7 +289,7 @@ void ieee80211_requeue(struct ieee80211_local *local, int queue)
 		root_lock = qdisc_root_lock(qdisc);
 
 		spin_lock(root_lock);
-		qdisc->enqueue(skb, qdisc);
+		qdisc_enqueue_root(skb, qdisc);
 		spin_unlock(root_lock);
 	}
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 0de757e..68ed35e 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -429,7 +429,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 #endif
 	}
 
-	ret = flow->q->enqueue(skb, flow->q);
+	ret = qdisc_enqueue(skb, flow->q);
 	if (ret != 0) {
 drop: __maybe_unused
 		sch->qstats.drops++;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index a3953bb..1afe3ee 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -387,7 +387,8 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 #ifdef CONFIG_NET_CLS_ACT
 	cl->q->__parent = sch;
 #endif
-	if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
+	ret = qdisc_enqueue(skb, cl->q);
+	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		sch->bstats.packets++;
 		sch->bstats.bytes+=len;
@@ -671,7 +672,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 		q->rx_class = cl;
 		cl->q->__parent = sch;
 
-		if (cl->q->enqueue(skb, cl->q) == 0) {
+		if (qdisc_enqueue(skb, cl->q) == 0) {
 			sch->q.qlen++;
 			sch->bstats.packets++;
 			sch->bstats.bytes+=len;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 3aafbd1..44d347e 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -252,7 +252,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 	}
 
-	err = p->q->enqueue(skb, p->q);
+	err = qdisc_enqueue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
 		sch->qstats.drops++;
 		return err;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5090708..fd61ed6 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1586,7 +1586,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	len = skb->len;
-	err = cl->qdisc->enqueue(skb, cl->qdisc);
+	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		cl->qstats.drops++;
 		sch->qstats.drops++;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ee48457..72b5a94 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -572,8 +572,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		kfree_skb(skb);
 		return ret;
 #endif
-	} else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
-		   NET_XMIT_SUCCESS) {
+	} else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
 		sch->qstats.drops++;
 		cl->qstats.drops++;
 		return NET_XMIT_DROP;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c5ea40c..13c4821 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -82,6 +82,12 @@ struct netem_skb_cb {
 	psched_time_t	time_to_send;
 };
 
+static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct netem_skb_cb));
+	return (struct netem_skb_cb *)skb->cb;
+}
+
 /* init_crandom - initialize correlated random number generator
  * Use entropy source for initial seed.
  */
@@ -184,7 +190,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 		q->duplicate = 0;
 
-		rootq->enqueue(skb2, rootq);
+		qdisc_enqueue_root(skb2, rootq);
 		q->duplicate = dupsave;
 	}
 
@@ -205,7 +211,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
 	}
 
-	cb = (struct netem_skb_cb *)skb->cb;
+	cb = netem_skb_cb(skb);
 	if (q->gap == 0 		/* not doing reordering */
 	    || q->counter < q->gap 	/* inside last reordering gap */
 	    || q->reorder < get_crandom(&q->reorder_cor)) {
@@ -218,7 +224,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		now = psched_get_time();
 		cb->time_to_send = now + delay;
 		++q->counter;
-		ret = q->qdisc->enqueue(skb, q->qdisc);
+		ret = qdisc_enqueue(skb, q->qdisc);
 	} else {
 		/*
 		 * Do re-ordering by putting one out of N packets at the front
@@ -277,8 +283,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 
 	skb = q->qdisc->dequeue(q->qdisc);
 	if (skb) {
-		const struct netem_skb_cb *cb
-			= (const struct netem_skb_cb *)skb->cb;
+		const struct netem_skb_cb *cb = netem_skb_cb(skb);
 		psched_time_t now = psched_get_time();
 
 		/* if more time remaining? */
@@ -457,7 +462,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 	struct sk_buff_head *list = &sch->q;
-	psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
+	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
 	struct sk_buff *skb;
 
 	if (likely(skb_queue_len(list) < q->limit)) {
@@ -468,8 +473,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 		}
 
 		skb_queue_reverse_walk(list, skb) {
-			const struct netem_skb_cb *cb
-				= (const struct netem_skb_cb *)skb->cb;
+			const struct netem_skb_cb *cb = netem_skb_cb(skb);
 
 			if (tnext >= cb->time_to_send)
 				break;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 536ca47..d29c2f8 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -81,7 +81,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 #endif
 
-	if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
+	ret = qdisc_enqueue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
 		sch->bstats.bytes += skb->len;
 		sch->bstats.packets++;
 		sch->q.qlen++;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 77098ac..b48a391 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -92,7 +92,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 			break;
 	}
 
-	ret = child->enqueue(skb, child);
+	ret = qdisc_enqueue(skb, child);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->bstats.bytes += skb->len;
 		sch->bstats.packets++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 444c227..7d705b8 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -133,7 +133,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		return NET_XMIT_DROP;
 	}
 
-	if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) {
+	ret = qdisc_enqueue(skb, q->qdisc);
+	if (ret != 0) {
 		sch->qstats.drops++;
 		return ret;
 	}


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-19 23:34 [PATCH v2 0/3] Add size table feature for qdiscs Jussi Kivilinna
  2008-07-19 23:35 ` [PATCH v2 1/3] net_sched: Add qdisc_enqueue wrapper Jussi Kivilinna
@ 2008-07-19 23:35 ` Jussi Kivilinna
  2008-07-25 10:57   ` Jarek Poplawski
  2008-07-19 23:35 ` [PATCH v2 3/3] net_sched: Add size table " Jussi Kivilinna
  2008-07-20  7:09 ` [PATCH v2 0/3] Add size table feature " David Miller
  3 siblings, 1 reply; 29+ messages in thread
From: Jussi Kivilinna @ 2008-07-19 23:35 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---

 include/net/sch_generic.h |   17 +++++++++++------
 net/sched/act_gact.c      |    2 +-
 net/sched/act_ipt.c       |    2 +-
 net/sched/act_mirred.c    |    4 ++--
 net/sched/act_nat.c       |    2 +-
 net/sched/act_pedit.c     |    2 +-
 net/sched/act_police.c    |    8 ++++----
 net/sched/act_simple.c    |    2 +-
 net/sched/sch_atm.c       |    4 ++--
 net/sched/sch_cbq.c       |   14 ++++++--------
 net/sched/sch_dsmark.c    |    2 +-
 net/sched/sch_fifo.c      |    2 +-
 net/sched/sch_gred.c      |   12 ++++++------
 net/sched/sch_hfsc.c      |   14 ++++++--------
 net/sched/sch_htb.c       |    9 +++++----
 net/sched/sch_ingress.c   |    2 +-
 net/sched/sch_netem.c     |    6 +++---
 net/sched/sch_prio.c      |    2 +-
 net/sched/sch_red.c       |    2 +-
 net/sched/sch_sfq.c       |   16 ++++++++--------
 net/sched/sch_tbf.c       |    6 +++---
 net/sched/sch_teql.c      |    6 +++---
 22 files changed, 69 insertions(+), 67 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f396dff..8229520 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -306,6 +306,11 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 	return true;
 }
 
+static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
+{
+	return skb->len;
+}
+
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	return sch->enqueue(skb, sch);
@@ -320,8 +325,8 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
 	__skb_queue_tail(list, skb);
-	sch->qstats.backlog += skb->len;
-	sch->bstats.bytes += skb->len;
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
 
 	return NET_XMIT_SUCCESS;
@@ -338,7 +343,7 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,
 	struct sk_buff *skb = __skb_dequeue(list);
 
 	if (likely(skb != NULL))
-		sch->qstats.backlog -= skb->len;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
 
 	return skb;
 }
@@ -354,7 +359,7 @@ static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch,
 	struct sk_buff *skb = __skb_dequeue_tail(list);
 
 	if (likely(skb != NULL))
-		sch->qstats.backlog -= skb->len;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
 
 	return skb;
 }
@@ -368,7 +373,7 @@ static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch,
 				  struct sk_buff_head *list)
 {
 	__skb_queue_head(list, skb);
-	sch->qstats.backlog += skb->len;
+	sch->qstats.backlog += qdisc_pkt_len(skb);
 	sch->qstats.requeues++;
 
 	return NET_XMIT_SUCCESS;
@@ -401,7 +406,7 @@ static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch,
 	struct sk_buff *skb = __qdisc_dequeue_tail(sch, list);
 
 	if (likely(skb != NULL)) {
-		unsigned int len = skb->len;
+		unsigned int len = qdisc_pkt_len(skb);
 		kfree_skb(skb);
 		return len;
 	}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 422872c..ac04289 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -139,7 +139,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 #else
 	action = gact->tcf_action;
 #endif
-	gact->tcf_bstats.bytes += skb->len;
+	gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	gact->tcf_bstats.packets++;
 	if (action == TC_ACT_SHOT)
 		gact->tcf_qstats.drops++;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index da696fd..d1263b3 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -205,7 +205,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	spin_lock(&ipt->tcf_lock);
 
 	ipt->tcf_tm.lastuse = jiffies;
-	ipt->tcf_bstats.bytes += skb->len;
+	ipt->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	ipt->tcf_bstats.packets++;
 
 	/* yes, we have to worry about both in and out dev
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1aff005..70341c0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -164,7 +164,7 @@ bad_mirred:
 		if (skb2 != NULL)
 			kfree_skb(skb2);
 		m->tcf_qstats.overlimits++;
-		m->tcf_bstats.bytes += skb->len;
+		m->tcf_bstats.bytes += qdisc_pkt_len(skb);
 		m->tcf_bstats.packets++;
 		spin_unlock(&m->tcf_lock);
 		/* should we be asking for packet to be dropped?
@@ -184,7 +184,7 @@ bad_mirred:
 		goto bad_mirred;
 	}
 
-	m->tcf_bstats.bytes += skb2->len;
+	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
 	m->tcf_bstats.packets++;
 	if (!(at & AT_EGRESS))
 		if (m->tcfm_ok_push)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 0a3c833..7b39ed4 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -124,7 +124,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	egress = p->flags & TCA_NAT_FLAG_EGRESS;
 	action = p->tcf_action;
 
-	p->tcf_bstats.bytes += skb->len;
+	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	p->tcf_bstats.packets++;
 
 	spin_unlock(&p->tcf_lock);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3cc4cb9..d5f4e34 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -182,7 +182,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 bad:
 	p->tcf_qstats.overlimits++;
 done:
-	p->tcf_bstats.bytes += skb->len;
+	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	p->tcf_bstats.packets++;
 	spin_unlock(&p->tcf_lock);
 	return p->tcf_action;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0898120..32c3f9d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -272,7 +272,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 
 	spin_lock(&police->tcf_lock);
 
-	police->tcf_bstats.bytes += skb->len;
+	police->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	police->tcf_bstats.packets++;
 
 	if (police->tcfp_ewma_rate &&
@@ -282,7 +282,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 		return police->tcf_action;
 	}
 
-	if (skb->len <= police->tcfp_mtu) {
+	if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
 		if (police->tcfp_R_tab == NULL) {
 			spin_unlock(&police->tcf_lock);
 			return police->tcfp_result;
@@ -295,12 +295,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 			ptoks = toks + police->tcfp_ptoks;
 			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
 				ptoks = (long)L2T_P(police, police->tcfp_mtu);
-			ptoks -= L2T_P(police, skb->len);
+			ptoks -= L2T_P(police, qdisc_pkt_len(skb));
 		}
 		toks += police->tcfp_toks;
 		if (toks > (long)police->tcfp_burst)
 			toks = police->tcfp_burst;
-		toks -= L2T(police, skb->len);
+		toks -= L2T(police, qdisc_pkt_len(skb));
 		if ((toks|ptoks) >= 0) {
 			police->tcfp_t_c = now;
 			police->tcfp_toks = toks;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 1d421d0..e7851ce 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -41,7 +41,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 
 	spin_lock(&d->tcf_lock);
 	d->tcf_tm.lastuse = jiffies;
-	d->tcf_bstats.bytes += skb->len;
+	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	d->tcf_bstats.packets++;
 
 	/* print policy string followed by _ then packet count
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 68ed35e..04faa83 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -437,9 +437,9 @@ drop: __maybe_unused
 			flow->qstats.drops++;
 		return ret;
 	}
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
-	flow->bstats.bytes += skb->len;
+	flow->bstats.bytes += qdisc_pkt_len(skb);
 	flow->bstats.packets++;
 	/*
 	 * Okay, this may seem weird. We pretend we've dropped the packet if
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1afe3ee..f1d2f8e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -370,7 +370,6 @@ static int
 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	int len = skb->len;
 	int uninitialized_var(ret);
 	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 
@@ -391,7 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		sch->bstats.packets++;
-		sch->bstats.bytes+=len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		cbq_mark_toplevel(q, cl);
 		if (!cl->next_alive)
 			cbq_activate_class(cl);
@@ -658,7 +657,6 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 #ifdef CONFIG_NET_CLS_ACT
 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 {
-	int len = skb->len;
 	struct Qdisc *sch = child->__parent;
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = q->rx_class;
@@ -675,7 +673,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 		if (qdisc_enqueue(skb, cl->q) == 0) {
 			sch->q.qlen++;
 			sch->bstats.packets++;
-			sch->bstats.bytes+=len;
+			sch->bstats.bytes += qdisc_pkt_len(skb);
 			if (!cl->next_alive)
 				cbq_activate_class(cl);
 			return 0;
@@ -881,7 +879,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 			if (skb == NULL)
 				goto skip_class;
 
-			cl->deficit -= skb->len;
+			cl->deficit -= qdisc_pkt_len(skb);
 			q->tx_class = cl;
 			q->tx_borrowed = borrow;
 			if (borrow != cl) {
@@ -889,11 +887,11 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 				borrow->xstats.borrows++;
 				cl->xstats.borrows++;
 #else
-				borrow->xstats.borrows += skb->len;
-				cl->xstats.borrows += skb->len;
+				borrow->xstats.borrows += qdisc_pkt_len(skb);
+				cl->xstats.borrows += qdisc_pkt_len(skb);
 #endif
 			}
-			q->tx_len = skb->len;
+			q->tx_len = qdisc_pkt_len(skb);
 
 			if (cl->deficit <= 0) {
 				q->active[prio] = cl;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 44d347e..a935676 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -258,7 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
 	sch->q.qlen++;
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 1d97fa4..23d258b 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -27,7 +27,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
-	if (likely(sch->qstats.backlog + skb->len <= q->limit))
+	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_reshape_fail(skb, sch);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 39fa285..c1ad6b8 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -188,7 +188,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	}
 
 	q->packetsin++;
-	q->bytesin += skb->len;
+	q->bytesin += qdisc_pkt_len(skb);
 
 	if (gred_wred_mode(t))
 		gred_load_wred_set(t, q);
@@ -226,8 +226,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 			break;
 	}
 
-	if (q->backlog + skb->len <= q->limit) {
-		q->backlog += skb->len;
+	if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
+		q->backlog += qdisc_pkt_len(skb);
 		return qdisc_enqueue_tail(skb, sch);
 	}
 
@@ -254,7 +254,7 @@ static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	} else {
 		if (red_is_idling(&q->parms))
 			red_end_of_idle_period(&q->parms);
-		q->backlog += skb->len;
+		q->backlog += qdisc_pkt_len(skb);
 	}
 
 	return qdisc_requeue(skb, sch);
@@ -277,7 +277,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 				       "VQ 0x%x after dequeue, screwing up "
 				       "backlog.\n", tc_index_to_dp(skb));
 		} else {
-			q->backlog -= skb->len;
+			q->backlog -= qdisc_pkt_len(skb);
 
 			if (!q->backlog && !gred_wred_mode(t))
 				red_start_of_idle_period(&q->parms);
@@ -299,7 +299,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
 
 	skb = qdisc_dequeue_tail(sch);
 	if (skb) {
-		unsigned int len = skb->len;
+		unsigned int len = qdisc_pkt_len(skb);
 		struct gred_sched_data *q;
 		u16 dp = tc_index_to_dp(skb);
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index fd61ed6..0ae7d19 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,7 +895,7 @@ qdisc_peek_len(struct Qdisc *sch)
 			printk("qdisc_peek_len: non work-conserving qdisc ?\n");
 		return 0;
 	}
-	len = skb->len;
+	len = qdisc_pkt_len(skb);
 	if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
 		if (net_ratelimit())
 			printk("qdisc_peek_len: failed to requeue\n");
@@ -1574,7 +1574,6 @@ static int
 hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct hfsc_class *cl;
-	unsigned int len;
 	int err;
 
 	cl = hfsc_classify(skb, sch, &err);
@@ -1585,7 +1584,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	len = skb->len;
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		cl->qstats.drops++;
@@ -1594,12 +1592,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	if (cl->qdisc->q.qlen == 1)
-		set_active(cl, len);
+		set_active(cl, qdisc_pkt_len(skb));
 
 	cl->bstats.packets++;
-	cl->bstats.bytes += len;
+	cl->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
-	sch->bstats.bytes += len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -1649,9 +1647,9 @@ hfsc_dequeue(struct Qdisc *sch)
 		return NULL;
 	}
 
-	update_vf(cl, skb->len, cur_time);
+	update_vf(cl, qdisc_pkt_len(skb), cur_time);
 	if (realtime)
-		cl->cl_cumul += skb->len;
+		cl->cl_cumul += qdisc_pkt_len(skb);
 
 	if (cl->qdisc->q.qlen != 0) {
 		if (cl->cl_flags & HFSC_RSC) {
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 72b5a94..30c999c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -579,13 +579,13 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	} else {
 		cl->bstats.packets +=
 			skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-		cl->bstats.bytes += skb->len;
+		cl->bstats.bytes += qdisc_pkt_len(skb);
 		htb_activate(q, cl);
 	}
 
 	sch->q.qlen++;
 	sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -642,7 +642,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 			     int level, struct sk_buff *skb)
 {
-	int bytes = skb->len;
+	int bytes = qdisc_pkt_len(skb);
 	long toks, diff;
 	enum htb_cmode old_mode;
 
@@ -855,7 +855,8 @@ next:
 	} while (cl != start);
 
 	if (likely(skb != NULL)) {
-		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
+		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
+		if (cl->un.leaf.deficit[level] < 0) {
 			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
 			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 					  ptr[0]) + prio);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 956c80a..4a2b773 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -77,7 +77,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	result = tc_classify(skb, p->filter_list, &res);
 
 	sch->bstats.packets++;
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	switch (result) {
 	case TC_ACT_SHOT:
 		result = TC_ACT_SHOT;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 13c4821..ae49be0 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -237,7 +237,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->q.qlen++;
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 	} else
 		sch->qstats.drops++;
@@ -481,8 +481,8 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 
 		__skb_queue_after(list, skb, nskb);
 
-		sch->qstats.backlog += nskb->len;
-		sch->bstats.bytes += nskb->len;
+		sch->qstats.backlog += qdisc_pkt_len(nskb);
+		sch->bstats.bytes += qdisc_pkt_len(nskb);
 		sch->bstats.packets++;
 
 		return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index d29c2f8..f849243 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -83,7 +83,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	ret = qdisc_enqueue(skb, qdisc);
 	if (ret == NET_XMIT_SUCCESS) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index b48a391..3f2d1d7 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -94,7 +94,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	ret = qdisc_enqueue(skb, child);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
 	} else {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8458f63..8589da6 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -245,7 +245,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 	if (d > 1) {
 		sfq_index x = q->dep[d + SFQ_DEPTH].next;
 		skb = q->qs[x].prev;
-		len = skb->len;
+		len = qdisc_pkt_len(skb);
 		__skb_unlink(skb, &q->qs[x]);
 		kfree_skb(skb);
 		sfq_dec(q, x);
@@ -261,7 +261,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 		q->next[q->tail] = q->next[d];
 		q->allot[q->next[d]] += q->quantum;
 		skb = q->qs[d].prev;
-		len = skb->len;
+		len = qdisc_pkt_len(skb);
 		__skb_unlink(skb, &q->qs[d]);
 		kfree_skb(skb);
 		sfq_dec(q, d);
@@ -305,7 +305,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (q->qs[x].qlen >= q->limit)
 		return qdisc_drop(skb, sch);
 
-	sch->qstats.backlog += skb->len;
+	sch->qstats.backlog += qdisc_pkt_len(skb);
 	__skb_queue_tail(&q->qs[x], skb);
 	sfq_inc(q, x);
 	if (q->qs[x].qlen == 1) {		/* The flow is new */
@@ -320,7 +320,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 	}
 	if (++sch->q.qlen <= q->limit) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		return 0;
 	}
@@ -352,7 +352,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 		q->hash[x] = hash;
 	}
 
-	sch->qstats.backlog += skb->len;
+	sch->qstats.backlog += qdisc_pkt_len(skb);
 	__skb_queue_head(&q->qs[x], skb);
 	/* If selected queue has length q->limit+1, this means that
 	 * all another queues are empty and we do simple tail drop.
@@ -363,7 +363,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 		skb = q->qs[x].prev;
 		__skb_unlink(skb, &q->qs[x]);
 		sch->qstats.drops++;
-		sch->qstats.backlog -= skb->len;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
 		kfree_skb(skb);
 		return NET_XMIT_CN;
 	}
@@ -411,7 +411,7 @@ sfq_dequeue(struct Qdisc *sch)
 	skb = __skb_dequeue(&q->qs[a]);
 	sfq_dec(q, a);
 	sch->q.qlen--;
-	sch->qstats.backlog -= skb->len;
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
 
 	/* Is the slot empty? */
 	if (q->qs[a].qlen == 0) {
@@ -423,7 +423,7 @@ sfq_dequeue(struct Qdisc *sch)
 		}
 		q->next[q->tail] = a;
 		q->allot[a] += q->quantum;
-	} else if ((q->allot[a] -= skb->len) <= 0) {
+	} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
 		q->tail = a;
 		a = q->next[a];
 		q->allot[a] += q->quantum;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7d705b8..b296672 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -123,7 +123,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	int ret;
 
-	if (skb->len > q->max_size) {
+	if (qdisc_pkt_len(skb) > q->max_size) {
 		sch->qstats.drops++;
 #ifdef CONFIG_NET_CLS_ACT
 		if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
@@ -140,7 +140,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	}
 
 	sch->q.qlen++;
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
 	return 0;
 }
@@ -181,7 +181,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		psched_time_t now;
 		long toks;
 		long ptoks = 0;
-		unsigned int len = skb->len;
+		unsigned int len = qdisc_pkt_len(skb);
 
 		now = psched_get_time();
 		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 8b0ff34..5372236 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -83,7 +83,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	if (q->q.qlen < dev->tx_queue_len) {
 		__skb_queue_tail(&q->q, skb);
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		return 0;
 	}
@@ -278,7 +278,6 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct Qdisc *start, *q;
 	int busy;
 	int nores;
-	int len = skb->len;
 	int subq = skb_get_queue_mapping(skb);
 	struct sk_buff *skb_res = NULL;
 
@@ -313,7 +312,8 @@ restart:
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
 					master->stats.tx_packets++;
-					master->stats.tx_bytes += len;
+					master->stats.tx_bytes +=
+						qdisc_pkt_len(skb);
 					return 0;
 				}
 				netif_tx_unlock(slave);


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 3/3] net_sched: Add size table for qdiscs
  2008-07-19 23:34 [PATCH v2 0/3] Add size table feature for qdiscs Jussi Kivilinna
  2008-07-19 23:35 ` [PATCH v2 1/3] net_sched: Add qdisc_enqueue wrapper Jussi Kivilinna
  2008-07-19 23:35 ` [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs Jussi Kivilinna
@ 2008-07-19 23:35 ` Jussi Kivilinna
  2008-07-20  7:09 ` [PATCH v2 0/3] Add size table feature " David Miller
  3 siblings, 0 replies; 29+ messages in thread
From: Jussi Kivilinna @ 2008-07-19 23:35 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

Add size table functions for qdiscs and calculate packet size in
qdisc_enqueue().

Based on patch by Patrick McHardy
 http://marc.info/?l=linux-netdev&m=115201979221729&w=2

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---

 include/linux/pkt_sched.h |   20 ++++++
 include/linux/rtnetlink.h |    1 
 include/net/pkt_sched.h   |    1 
 include/net/sch_generic.h |   25 +++++++
 net/sched/sch_api.c       |  151 ++++++++++++++++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c   |    1 
 net/sched/sch_netem.c     |    5 +
 7 files changed, 199 insertions(+), 5 deletions(-)

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 87f4e0f..e5de421 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -85,6 +85,26 @@ struct tc_ratespec
 
 #define TC_RTAB_SIZE	1024
 
+struct tc_sizespec {
+	unsigned char	cell_log;
+	unsigned char	size_log;
+	short		cell_align;
+	int		overhead;
+	unsigned int	linklayer;
+	unsigned int	mpu;
+	unsigned int	mtu;
+	unsigned int	tsize;
+};
+
+enum {
+	TCA_STAB_UNSPEC,
+	TCA_STAB_BASE,
+	TCA_STAB_DATA,
+	__TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
 /* FIFO section */
 
 struct tc_fifo_qopt
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index b358c70..f4d386c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -482,6 +482,7 @@ enum
 	TCA_RATE,
 	TCA_FCNT,
 	TCA_STATS2,
+	TCA_STAB,
 	__TCA_MAX
 };
 
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index e4e3005..6affcfa 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -83,6 +83,7 @@ extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
 extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 		struct nlattr *tab);
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
+extern void qdisc_put_stab(struct qdisc_size_table *tab);
 
 extern void __qdisc_run(struct Qdisc *q);
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 8229520..db9ad65 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -29,6 +29,13 @@ enum qdisc_state_t
 	__QDISC_STATE_SCHED,
 };
 
+struct qdisc_size_table {
+	struct list_head	list;
+	struct tc_sizespec	szopts;
+	int			refcnt;
+	u16			data[];
+};
+
 struct Qdisc
 {
 	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
@@ -39,6 +46,7 @@ struct Qdisc
 #define TCQ_F_INGRESS	4
 	int			padded;
 	struct Qdisc_ops	*ops;
+	struct qdisc_size_table	*stab;
 	u32			handle;
 	u32			parent;
 	atomic_t		refcnt;
@@ -165,6 +173,16 @@ struct tcf_proto
 	struct tcf_proto_ops	*ops;
 };
 
+struct qdisc_skb_cb {
+	unsigned int		pkt_len;
+	char			data[];
+};
+
+static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
+{
+	return (struct qdisc_skb_cb *)skb->cb;
+}
+
 static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
 {
 	return &qdisc->q.lock;
@@ -257,6 +275,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
 				       struct netdev_queue *dev_queue,
 				       struct Qdisc_ops *ops, u32 parentid);
+extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
+				   struct qdisc_size_table *stab);
 extern void tcf_destroy(struct tcf_proto *tp);
 extern void tcf_destroy_chain(struct tcf_proto **fl);
 
@@ -308,16 +328,19 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 
 static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
 {
-	return skb->len;
+	return qdisc_skb_cb(skb)->pkt_len;
 }
 
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
+	if (sch->stab)
+		qdisc_calculate_pkt_len(skb, sch->stab);
 	return sch->enqueue(skb, sch);
 }
 
 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 {
+	qdisc_skb_cb(skb)->pkt_len = skb->len;
 	return qdisc_enqueue(skb, sch);
 }
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fb43731..5219d5f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -286,6 +286,129 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 }
 EXPORT_SYMBOL(qdisc_put_rtab);
 
+static LIST_HEAD(qdisc_stab_list);
+static DEFINE_SPINLOCK(qdisc_stab_lock);
+
+static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
+	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
+	[TCA_STAB_DATA] = { .type = NLA_BINARY },
+};
+
+static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
+{
+	struct nlattr *tb[TCA_STAB_MAX + 1];
+	struct qdisc_size_table *stab;
+	struct tc_sizespec *s;
+	unsigned int tsize = 0;
+	u16 *tab = NULL;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
+	if (err < 0)
+		return ERR_PTR(err);
+	if (!tb[TCA_STAB_BASE])
+		return ERR_PTR(-EINVAL);
+
+	s = nla_data(tb[TCA_STAB_BASE]);
+
+	if (s->tsize > 0) {
+		if (!tb[TCA_STAB_DATA])
+			return ERR_PTR(-EINVAL);
+		tab = nla_data(tb[TCA_STAB_DATA]);
+		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
+	}
+
+	if (!s || tsize != s->tsize || (!tab && tsize > 0))
+		return ERR_PTR(-EINVAL);
+
+	spin_lock(&qdisc_stab_lock);
+
+	list_for_each_entry(stab, &qdisc_stab_list, list) {
+		if (memcmp(&stab->szopts, s, sizeof(*s)))
+			continue;
+		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
+			continue;
+		stab->refcnt++;
+		spin_unlock(&qdisc_stab_lock);
+		return stab;
+	}
+
+	spin_unlock(&qdisc_stab_lock);
+
+	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
+	if (!stab)
+		return ERR_PTR(-ENOMEM);
+
+	stab->refcnt = 1;
+	stab->szopts = *s;
+	if (tsize > 0)
+		memcpy(stab->data, tab, tsize * sizeof(u16));
+
+	spin_lock(&qdisc_stab_lock);
+	list_add_tail(&stab->list, &qdisc_stab_list);
+	spin_unlock(&qdisc_stab_lock);
+
+	return stab;
+}
+
+void qdisc_put_stab(struct qdisc_size_table *tab)
+{
+	if (!tab)
+		return;
+
+	spin_lock(&qdisc_stab_lock);
+
+	if (--tab->refcnt == 0) {
+		list_del(&tab->list);
+		kfree(tab);
+	}
+
+	spin_unlock(&qdisc_stab_lock);
+}
+EXPORT_SYMBOL(qdisc_put_stab);
+
+static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_STAB);
+	NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
+	nla_nest_end(skb, nest);
+
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+}
+
+void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+	int pkt_len, slot;
+
+	pkt_len = skb->len + stab->szopts.overhead;
+	if (unlikely(!stab->szopts.tsize))
+		goto out;
+
+	slot = pkt_len + stab->szopts.cell_align;
+	if (unlikely(slot < 0))
+		slot = 0;
+
+	slot >>= stab->szopts.cell_log;
+	if (likely(slot < stab->szopts.tsize))
+		pkt_len = stab->data[slot];
+	else
+		pkt_len = stab->data[stab->szopts.tsize - 1] *
+				(slot / stab->szopts.tsize) +
+				stab->data[slot % stab->szopts.tsize];
+
+	pkt_len <<= stab->szopts.size_log;
+out:
+	if (unlikely(pkt_len < 1))
+		pkt_len = 1;
+	qdisc_skb_cb(skb)->pkt_len = pkt_len;
+}
+EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+
 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
@@ -613,6 +736,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	struct nlattr *kind = tca[TCA_KIND];
 	struct Qdisc *sch;
 	struct Qdisc_ops *ops;
+	struct qdisc_size_table *stab;
 
 	ops = qdisc_lookup_ops(kind);
 #ifdef CONFIG_KMOD
@@ -670,6 +794,14 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	sch->handle = handle;
 
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
+		if (tca[TCA_STAB]) {
+			stab = qdisc_get_stab(tca[TCA_STAB]);
+			if (IS_ERR(stab)) {
+				err = PTR_ERR(stab);
+				goto err_out3;
+			}
+			sch->stab = stab;
+		}
 		if (tca[TCA_RATE]) {
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
 						qdisc_root_lock(sch),
@@ -691,6 +823,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		return sch;
 	}
 err_out3:
+	qdisc_put_stab(sch->stab);
 	dev_put(dev);
 	kfree((char *) sch - sch->padded);
 err_out2:
@@ -702,15 +835,26 @@ err_out:
 
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 {
-	if (tca[TCA_OPTIONS]) {
-		int err;
+	struct qdisc_size_table *stab = NULL;
+	int err = 0;
 
+	if (tca[TCA_OPTIONS]) {
 		if (sch->ops->change == NULL)
 			return -EINVAL;
 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
 		if (err)
 			return err;
 	}
+
+	if (tca[TCA_STAB]) {
+		stab = qdisc_get_stab(tca[TCA_STAB]);
+		if (IS_ERR(stab))
+			return PTR_ERR(stab);
+	}
+
+	qdisc_put_stab(sch->stab);
+	sch->stab = stab;
+
 	if (tca[TCA_RATE])
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
 				      qdisc_root_lock(sch), tca[TCA_RATE]);
@@ -994,6 +1138,9 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		goto nla_put_failure;
 	q->qstats.qlen = q->q.qlen;
 
+	if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+		goto nla_put_failure;
+
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
 					 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
 		goto nla_put_failure;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 522a41a..27a51f0 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -469,6 +469,7 @@ static void __qdisc_destroy(struct rcu_head *head)
 	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
 	const struct Qdisc_ops  *ops = qdisc->ops;
 
+	qdisc_put_stab(qdisc->stab);
 	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
 	if (ops->reset)
 		ops->reset(qdisc);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ae49be0..a590857 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -84,8 +84,9 @@ struct netem_skb_cb {
 
 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
 {
-	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct netem_skb_cb));
-	return (struct netem_skb_cb *)skb->cb;
+	BUILD_BUG_ON(sizeof(skb->cb) <
+		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
+	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 }
 
 /* init_crandom - initialize correlated random number generator


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 0/3] Add size table feature for qdiscs
  2008-07-19 23:34 [PATCH v2 0/3] Add size table feature for qdiscs Jussi Kivilinna
                   ` (2 preceding siblings ...)
  2008-07-19 23:35 ` [PATCH v2 3/3] net_sched: Add size table " Jussi Kivilinna
@ 2008-07-20  7:09 ` David Miller
  3 siblings, 0 replies; 29+ messages in thread
From: David Miller @ 2008-07-20  7:09 UTC (permalink / raw)
  To: jussi.kivilinna; +Cc: kaber, netdev

From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Sun, 20 Jul 2008 02:34:55 +0300

> Patch adds generic size table for qdiscs that is similiar to rate table
> used by htb/tbf/cbq.
> 
> Changes since v1:
>  - ingress qdisc / actions handled too
>  - size of table is now dynamic
>  - add qdisc_enqueue_root wrapper

This stuff looks fundamentally fine to me, so I've added
these changes to net-2.6

Please send any further fixups as relative changes.

Thanks.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 10:57   ` Jarek Poplawski
@ 2008-07-25 10:57     ` David Miller
  2008-07-25 11:37       ` Jarek Poplawski
  0 siblings, 1 reply; 29+ messages in thread
From: David Miller @ 2008-07-25 10:57 UTC (permalink / raw)
  To: jarkao2; +Cc: jussi.kivilinna, kaber, netdev

From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 25 Jul 2008 10:57:48 +0000

> On 20-07-2008 01:35, Jussi Kivilinna wrote:
> > Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
> ...
> > diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
> > index 1afe3ee..f1d2f8e 100644
> > --- a/net/sched/sch_cbq.c
> > +++ b/net/sched/sch_cbq.c
> > @@ -370,7 +370,6 @@ static int
> >  cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
> >  {
> >  	struct cbq_sched_data *q = qdisc_priv(sch);
> > -	int len = skb->len;
> >  	int uninitialized_var(ret);
> >  	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
> >  
> > @@ -391,7 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
> >  	if (ret == NET_XMIT_SUCCESS) {
> >  		sch->q.qlen++;
> >  		sch->bstats.packets++;
> > -		sch->bstats.bytes+=len;
> > +		sch->bstats.bytes += qdisc_pkt_len(skb);
> 
> Alas I didn't manage to read this earlier, but this type of changes
> here and elsewhere in this patch looks wrong to me: we shouldn't
> use skb pointer after ->enqueue().

It should be OK here, we have the root qdisc locked, so nobody
can remove it from the queue and if we got NET_XMIT_SUCCESS that
thing must not have been freed.

And anyways, we can't know the qdisc_pkt_len() until the enqueue
fucntion has been called.

Even TCP depends upon that NET_XMIT_* return value having some real
meaning, remember the HTB bug we tracked down last week? :-)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for   qdiscs
  2008-07-19 23:35 ` [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs Jussi Kivilinna
@ 2008-07-25 10:57   ` Jarek Poplawski
  2008-07-25 10:57     ` David Miller
  0 siblings, 1 reply; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-25 10:57 UTC (permalink / raw)
  To: Jussi Kivilinna; +Cc: Patrick McHardy, netdev

On 20-07-2008 01:35, Jussi Kivilinna wrote:
> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
...
> diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
> index 1afe3ee..f1d2f8e 100644
> --- a/net/sched/sch_cbq.c
> +++ b/net/sched/sch_cbq.c
> @@ -370,7 +370,6 @@ static int
>  cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
>  {
>  	struct cbq_sched_data *q = qdisc_priv(sch);
> -	int len = skb->len;
>  	int uninitialized_var(ret);
>  	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
>  
> @@ -391,7 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
>  	if (ret == NET_XMIT_SUCCESS) {
>  		sch->q.qlen++;
>  		sch->bstats.packets++;
> -		sch->bstats.bytes+=len;
> +		sch->bstats.bytes += qdisc_pkt_len(skb);

Alas I didn't manage to read this earlier, but this type of changes
here and elsewhere in this patch looks wrong to me: we shouldn't
use skb pointer after ->enqueue().

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 10:57     ` David Miller
@ 2008-07-25 11:37       ` Jarek Poplawski
  2008-07-25 11:49         ` David Miller
                           ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-25 11:37 UTC (permalink / raw)
  To: David Miller; +Cc: jussi.kivilinna, kaber, netdev

On Fri, Jul 25, 2008 at 03:57:12AM -0700, David Miller wrote:
> From: Jarek Poplawski <jarkao2@gmail.com>
> Date: Fri, 25 Jul 2008 10:57:48 +0000
> 
> > On 20-07-2008 01:35, Jussi Kivilinna wrote:
> > > Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
> > ...
> > > diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
> > > index 1afe3ee..f1d2f8e 100644
> > > --- a/net/sched/sch_cbq.c
> > > +++ b/net/sched/sch_cbq.c
> > > @@ -370,7 +370,6 @@ static int
> > >  cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
> > >  {
> > >  	struct cbq_sched_data *q = qdisc_priv(sch);
> > > -	int len = skb->len;
> > >  	int uninitialized_var(ret);
> > >  	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
> > >  
> > > @@ -391,7 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
> > >  	if (ret == NET_XMIT_SUCCESS) {
> > >  		sch->q.qlen++;
> > >  		sch->bstats.packets++;
> > > -		sch->bstats.bytes+=len;
> > > +		sch->bstats.bytes += qdisc_pkt_len(skb);
> > 
> > Alas I didn't manage to read this earlier, but this type of changes
> > here and elsewhere in this patch looks wrong to me: we shouldn't
> > use skb pointer after ->enqueue().
> 
> It should be OK here, we have the root qdisc locked, so nobody
> can remove it from the queue and if we got NET_XMIT_SUCCESS that
> thing must not have been freed.
> 
> And anyways, we can't know the qdisc_pkt_len() until the enqueue
> fucntion has been called.

OK, I see htb did this earlier with ".packets", so it looks like I'm
wrong with this - sorry! (Anyway, it seems to unnecessarily restrict
the way qdisc are working.)

> Even TCP depends upon that NET_XMIT_* return value having some real
> meaning, remember the HTB bug we tracked down last week? :-)

Do you mean this bug you've forgotten to fix yet?

Thanks,
Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 11:37       ` Jarek Poplawski
@ 2008-07-25 11:49         ` David Miller
  2008-07-26 13:21           ` Patrick McHardy
  2008-07-25 11:53         ` Jarek Poplawski
  2008-07-25 12:29         ` Jussi Kivilinna
  2 siblings, 1 reply; 29+ messages in thread
From: David Miller @ 2008-07-25 11:49 UTC (permalink / raw)
  To: jarkao2; +Cc: jussi.kivilinna, kaber, netdev

From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 25 Jul 2008 11:37:57 +0000

> On Fri, Jul 25, 2008 at 03:57:12AM -0700, David Miller wrote:
> > Even TCP depends upon that NET_XMIT_* return value having some real
> > meaning, remember the HTB bug we tracked down last week? :-)
> 
> Do you mean this bug you've forgotten to fix yet?

I have not forgotten, it is in TODO list. :)

I want to also inquire with Patrick about whether he is going to
perform the audit in this area which he said he wanted to do :-)

Well, we have patch which fixes the problem for the tester, so we
could just merge that and at least have HTB fixed.  But we know there
are other instances of the same return value propagation problem and
Patrick's audit is sure to turn up other similar turds...

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 11:53         ` Jarek Poplawski
@ 2008-07-25 11:52           ` David Miller
  2008-07-25 12:08             ` Jarek Poplawski
  0 siblings, 1 reply; 29+ messages in thread
From: David Miller @ 2008-07-25 11:52 UTC (permalink / raw)
  To: jarkao2; +Cc: jussi.kivilinna, kaber, netdev

From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 25 Jul 2008 11:53:55 +0000

> On Fri, Jul 25, 2008 at 11:37:57AM +0000, Jarek Poplawski wrote:
>
> As a matter of fact it's a good example: in your patch we have this:
> 
> +		ret = cl->un.leaf.q->enqueue(skb, cl->un.leaf.q);
> +		if (ret == NET_XMIT_DROP) {
> +			sch->qstats.drops++;
> +			cl->qstats.drops++;
> +		} else {
> +			cl->bstats.packets +=
> +				skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
> +			cl->bstats.bytes += skb->len;
> 
> So, if something works like noop_enqueue() and returns NET_XMIT_CN
> here, we're just using skb after kfree...

noop_enqueue() can only be a root qdisc, never a leaf hanging off of a
class

What TCP depends upon is that DROP means DROP, and that the
packet never reached the device and has been freed.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 11:37       ` Jarek Poplawski
  2008-07-25 11:49         ` David Miller
@ 2008-07-25 11:53         ` Jarek Poplawski
  2008-07-25 11:52           ` David Miller
  2008-07-25 12:29         ` Jussi Kivilinna
  2 siblings, 1 reply; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-25 11:53 UTC (permalink / raw)
  To: David Miller; +Cc: jussi.kivilinna, kaber, netdev

On Fri, Jul 25, 2008 at 11:37:57AM +0000, Jarek Poplawski wrote:
> On Fri, Jul 25, 2008 at 03:57:12AM -0700, David Miller wrote:
...
> > It should be OK here, we have the root qdisc locked, so nobody
> > can remove it from the queue and if we got NET_XMIT_SUCCESS that
> > thing must not have been freed.
> > 
> > And anyways, we can't know the qdisc_pkt_len() until the enqueue
> > fucntion has been called.
> 
> OK, I see htb did this earlier with ".packets", so it looks like I'm
> wrong with this - sorry! (Anyway, it seems to unnecessarily restrict
> the way qdisc are working.)
> 
> > Even TCP depends upon that NET_XMIT_* return value having some real
> > meaning, remember the HTB bug we tracked down last week? :-)

As a matter of fact it's a good example: in your patch we have this:

+		ret = cl->un.leaf.q->enqueue(skb, cl->un.leaf.q);
+		if (ret == NET_XMIT_DROP) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		} else {
+			cl->bstats.packets +=
+				skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
+			cl->bstats.bytes += skb->len;

So, if something works like noop_enqueue() and returns NET_XMIT_CN
here, we're just using skb after kfree...

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 11:52           ` David Miller
@ 2008-07-25 12:08             ` Jarek Poplawski
  2008-07-25 12:16               ` David Miller
  0 siblings, 1 reply; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-25 12:08 UTC (permalink / raw)
  To: David Miller; +Cc: jussi.kivilinna, kaber, netdev

On Fri, Jul 25, 2008 at 04:52:15AM -0700, David Miller wrote:
> From: Jarek Poplawski <jarkao2@gmail.com>
> Date: Fri, 25 Jul 2008 11:53:55 +0000
> 
> > On Fri, Jul 25, 2008 at 11:37:57AM +0000, Jarek Poplawski wrote:
> >
> > As a matter of fact it's a good example: in your patch we have this:
> > 
> > +		ret = cl->un.leaf.q->enqueue(skb, cl->un.leaf.q);
> > +		if (ret == NET_XMIT_DROP) {
> > +			sch->qstats.drops++;
> > +			cl->qstats.drops++;
> > +		} else {
> > +			cl->bstats.packets +=
> > +				skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
> > +			cl->bstats.bytes += skb->len;
> > 
> > So, if something works like noop_enqueue() and returns NET_XMIT_CN
> > here, we're just using skb after kfree...
> 
> noop_enqueue() can only be a root qdisc, never a leaf hanging off of a
> class

OK, sfq does the same.

> What TCP depends upon is that DROP means DROP, and that the
> packet never reached the device and has been freed.

IMHO, TCP should depend on it's skb "refcount". A qdisc, after
passing skb to other qdisc shouldn't depend on TCP's "refcount"
because not everything has to work like TCP.

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 12:08             ` Jarek Poplawski
@ 2008-07-25 12:16               ` David Miller
  0 siblings, 0 replies; 29+ messages in thread
From: David Miller @ 2008-07-25 12:16 UTC (permalink / raw)
  To: jarkao2; +Cc: jussi.kivilinna, kaber, netdev

From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 25 Jul 2008 12:08:49 +0000

> On Fri, Jul 25, 2008 at 04:52:15AM -0700, David Miller wrote:
> > What TCP depends upon is that DROP means DROP, and that the
> > packet never reached the device and has been freed.
> 
> IMHO, TCP should depend on it's skb "refcount". A qdisc, after
> passing skb to other qdisc shouldn't depend on TCP's "refcount"
> because not everything has to work like TCP.

I think it is reasonable for TCP to expect lower layers to
not return garbage.

We have cases like this all over the stack, where some code gives some
specific return value, which means "the packet was freed and not
further processed."

There is no difference for this case.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 11:37       ` Jarek Poplawski
  2008-07-25 11:49         ` David Miller
  2008-07-25 11:53         ` Jarek Poplawski
@ 2008-07-25 12:29         ` Jussi Kivilinna
  2008-07-25 12:54           ` Jarek Poplawski
  2 siblings, 1 reply; 29+ messages in thread
From: Jussi Kivilinna @ 2008-07-25 12:29 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: David Miller, kaber, netdev

Quoting "Jarek Poplawski" <jarkao2@gmail.com>:

> OK, I see htb did this earlier with ".packets", so it looks like I'm
> wrong with this - sorry! (Anyway, it seems to unnecessarily restrict
> the way qdisc are working.)
>

Would it be better for qdisc_enqueue() to pass packet length to caller?

  - Jussi



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 12:29         ` Jussi Kivilinna
@ 2008-07-25 12:54           ` Jarek Poplawski
  2008-07-25 13:15             ` Jussi Kivilinna
  0 siblings, 1 reply; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-25 12:54 UTC (permalink / raw)
  To: Jussi Kivilinna; +Cc: David Miller, kaber, netdev

On Fri, Jul 25, 2008 at 03:29:32PM +0300, Jussi Kivilinna wrote:
> Quoting "Jarek Poplawski" <jarkao2@gmail.com>:
>
>> OK, I see htb did this earlier with ".packets", so it looks like I'm
>> wrong with this - sorry! (Anyway, it seems to unnecessarily restrict
>> the way qdisc are working.)
>>
>
> Would it be better for qdisc_enqueue() to pass packet length to caller?
>

Is it's really needed? Actually, if it's only for stats, I really
can't see the reason why not to get this info old way - before
enqueing? (And this could be not enough if we want packets count
like in htb.)

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 12:54           ` Jarek Poplawski
@ 2008-07-25 13:15             ` Jussi Kivilinna
  2008-07-25 17:46               ` Jarek Poplawski
  0 siblings, 1 reply; 29+ messages in thread
From: Jussi Kivilinna @ 2008-07-25 13:15 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: David Miller, kaber, netdev

Quoting "Jarek Poplawski" <jarkao2@gmail.com>:

> On Fri, Jul 25, 2008 at 03:29:32PM +0300, Jussi Kivilinna wrote:
>> Quoting "Jarek Poplawski" <jarkao2@gmail.com>:
>>
>>> OK, I see htb did this earlier with ".packets", so it looks like I'm
>>> wrong with this - sorry! (Anyway, it seems to unnecessarily restrict
>>> the way qdisc are working.)
>>>
>>
>> Would it be better for qdisc_enqueue() to pass packet length to caller?
>>
>
> Is it's really needed? Actually, if it's only for stats, I really
> can't see the reason why not to get this info old way - before
> enqueing? (And this could be not enough if we want packets count
> like in htb.)
>

qdisc_enqueue recalculates packet size if inner qdisc has size table  
set, so qdisc_pkt_len might return different length after  
qdisc_enqueue than what it was before.

  - Jussi



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 13:15             ` Jussi Kivilinna
@ 2008-07-25 17:46               ` Jarek Poplawski
  2008-07-25 18:02                 ` Jarek Poplawski
  0 siblings, 1 reply; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-25 17:46 UTC (permalink / raw)
  To: Jussi Kivilinna; +Cc: David Miller, kaber, netdev

On Fri, Jul 25, 2008 at 04:15:03PM +0300, Jussi Kivilinna wrote:
> Quoting "Jarek Poplawski" <jarkao2@gmail.com>:
...
>> Is it's really needed? Actually, if it's only for stats, I really
>> can't see the reason why not to get this info old way - before
>> enqueing? (And this could be not enough if we want packets count
>> like in htb.)
>>
>
> qdisc_enqueue recalculates packet size if inner qdisc has size table  
> set, so qdisc_pkt_len might return different length after qdisc_enqueue 
> than what it was before.

The question is how much does it matter for statistics. Packets' size
(and number) can change many times in many places. Does it mean all
these places should count the same or "real" size? But, if it's really
necessary here, IMHO, it should be done "clean" way, like additional
parameter(s) to qdisc_enqueue(), because in cases like NET_XMIT_CN we
don't even know what happened with an skb.

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 17:46               ` Jarek Poplawski
@ 2008-07-25 18:02                 ` Jarek Poplawski
  0 siblings, 0 replies; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-25 18:02 UTC (permalink / raw)
  To: Jussi Kivilinna; +Cc: David Miller, kaber, netdev

On Fri, Jul 25, 2008 at 07:46:44PM +0200, Jarek Poplawski wrote:
...
> But, if it's really
> necessary here, IMHO, it should be done "clean" way, like additional
> parameter(s) to qdisc_enqueue(), because in cases like NET_XMIT_CN we
> don't even know what happened with an skb.

...And, we should consider that in such cases, even if it's possible
to find this out, it could additionally complicate algorithms.

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-25 11:49         ` David Miller
@ 2008-07-26 13:21           ` Patrick McHardy
  2008-07-26 14:18             ` Jarek Poplawski
  0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-07-26 13:21 UTC (permalink / raw)
  To: David Miller; +Cc: jarkao2, jussi.kivilinna, netdev

David Miller wrote:
> From: Jarek Poplawski <jarkao2@gmail.com>
> Date: Fri, 25 Jul 2008 11:37:57 +0000
> 
>> On Fri, Jul 25, 2008 at 03:57:12AM -0700, David Miller wrote:
>>> Even TCP depends upon that NET_XMIT_* return value having some real
>>> meaning, remember the HTB bug we tracked down last week? :-)
>> Do you mean this bug you've forgotten to fix yet?
> 
> I have not forgotten, it is in TODO list. :)
> 
> I want to also inquire with Patrick about whether he is going to
> perform the audit in this area which he said he wanted to do :-)

I'm a bit backlogged, so it will take some more time.

> Well, we have patch which fixes the problem for the tester, so we
> could just merge that and at least have HTB fixed.  But we know there
> are other instances of the same return value propagation problem and
> Patrick's audit is sure to turn up other similar turds...

The other problem that affects all qdiscs supporting actions is
TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
even though the packet is not queued, corrupting upper qdiscs'
qlen counters.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-26 13:21           ` Patrick McHardy
@ 2008-07-26 14:18             ` Jarek Poplawski
  2008-07-30 10:47               ` Patrick McHardy
  0 siblings, 1 reply; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-26 14:18 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David Miller, jussi.kivilinna, netdev

On Sat, Jul 26, 2008 at 03:21:42PM +0200, Patrick McHardy wrote:
> David Miller wrote:
>> From: Jarek Poplawski <jarkao2@gmail.com>
>> Date: Fri, 25 Jul 2008 11:37:57 +0000
>>
>>> On Fri, Jul 25, 2008 at 03:57:12AM -0700, David Miller wrote:
>>>> Even TCP depends upon that NET_XMIT_* return value having some real
>>>> meaning, remember the HTB bug we tracked down last week? :-)
>>> Do you mean this bug you've forgotten to fix yet?
>>
>> I have not forgotten, it is in TODO list. :)
>>
>> I want to also inquire with Patrick about whether he is going to
>> perform the audit in this area which he said he wanted to do :-)
>
> I'm a bit backlogged, so it will take some more time.
>
>> Well, we have patch which fixes the problem for the tester, so we
>> could just merge that and at least have HTB fixed.  But we know there
>> are other instances of the same return value propagation problem and
>> Patrick's audit is sure to turn up other similar turds...

My proposal (as before) is to apply this patch now (it could hit more
people than we know), but I would do small changes yet:
- to do htb_activate() if (cl->un.leaf.q->q.qlen),
- to skip bstats.packets and .bytes updating for anything but
  NET_XMIT_SUCCESS,
so, both things just like in sch_hfsc.

> The other problem that affects all qdiscs supporting actions is
> TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
> even though the packet is not queued, corrupting upper qdiscs'
> qlen counters.

Why can't we (temporarily) simply check such cl->un.leaf.q->q.qlen
before and after enqueing?

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-26 14:18             ` Jarek Poplawski
@ 2008-07-30 10:47               ` Patrick McHardy
  2008-07-30 11:19                 ` Jarek Poplawski
  0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-07-30 10:47 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: David Miller, jussi.kivilinna, netdev

Jarek Poplawski wrote:
> On Sat, Jul 26, 2008 at 03:21:42PM +0200, Patrick McHardy wrote:
>> David Miller wrote:
>>> From: Jarek Poplawski <jarkao2@gmail.com>
>>> Date: Fri, 25 Jul 2008 11:37:57 +0000
>>>
>>>> On Fri, Jul 25, 2008 at 03:57:12AM -0700, David Miller wrote:
>>>>> Even TCP depends upon that NET_XMIT_* return value having some real
>>>>> meaning, remember the HTB bug we tracked down last week? :-)
>>>> Do you mean this bug you've forgotten to fix yet?
>>> I have not forgotten, it is in TODO list. :)
>>>
>>> I want to also inquire with Patrick about whether he is going to
>>> perform the audit in this area which he said he wanted to do :-)
>> I'm a bit backlogged, so it will take some more time.
>>
>>> Well, we have patch which fixes the problem for the tester, so we
>>> could just merge that and at least have HTB fixed.  But we know there
>>> are other instances of the same return value propagation problem and
>>> Patrick's audit is sure to turn up other similar turds...
> 
> My proposal (as before) is to apply this patch now (it could hit more
> people than we know), but I would do small changes yet:
> - to do htb_activate() if (cl->un.leaf.q->q.qlen),
> - to skip bstats.packets and .bytes updating for anything but
>   NET_XMIT_SUCCESS,
> so, both things just like in sch_hfsc.
> 
>> The other problem that affects all qdiscs supporting actions is
>> TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
>> even though the packet is not queued, corrupting upper qdiscs'
>> qlen counters.
> 
> Why can't we (temporarily) simply check such cl->un.leaf.q->q.qlen
> before and after enqueing?

Thats really ugly, why not simply fix it correctly by
not lying to upper qdiscs?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-30 10:47               ` Patrick McHardy
@ 2008-07-30 11:19                 ` Jarek Poplawski
  2008-07-30 11:21                   ` Patrick McHardy
  0 siblings, 1 reply; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-30 11:19 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David Miller, jussi.kivilinna, netdev

On Wed, Jul 30, 2008 at 12:47:31PM +0200, Patrick McHardy wrote:
> Jarek Poplawski wrote:
>> On Sat, Jul 26, 2008 at 03:21:42PM +0200, Patrick McHardy wrote:
...
>>> The other problem that affects all qdiscs supporting actions is
>>> TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
>>> even though the packet is not queued, corrupting upper qdiscs'
>>> qlen counters.
>>
>> Why can't we (temporarily) simply check such cl->un.leaf.q->q.qlen
>> before and after enqueing?
>
> Thats really ugly, why not simply fix it correctly by
> not lying to upper qdiscs?

I thought it needs to wait for your audit. (Considering current
state of NET_XMIT statuses it's simple and reliable.) Otherwise
I'd prefer nice methods too.

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-30 11:19                 ` Jarek Poplawski
@ 2008-07-30 11:21                   ` Patrick McHardy
  2008-07-30 11:37                     ` Jarek Poplawski
  0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-07-30 11:21 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: David Miller, jussi.kivilinna, netdev

Jarek Poplawski wrote:
> On Wed, Jul 30, 2008 at 12:47:31PM +0200, Patrick McHardy wrote:
>   
>> Jarek Poplawski wrote:
>>     
>>> On Sat, Jul 26, 2008 at 03:21:42PM +0200, Patrick McHardy wrote:
>>>       
> ...
>   
>>>> The other problem that affects all qdiscs supporting actions is
>>>> TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
>>>> even though the packet is not queued, corrupting upper qdiscs'
>>>> qlen counters.
>>>>         
>>> Why can't we (temporarily) simply check such cl->un.leaf.q->q.qlen
>>> before and after enqueing?
>>>       
>> Thats really ugly, why not simply fix it correctly by
>> not lying to upper qdiscs?
>>     
>
> I thought it needs to wait for your audit. (Considering current
> state of NET_XMIT statuses it's simple and reliable.) Otherwise
> I'd prefer nice methods too.
>   
Well, the problems are already clear, someone just needs to fix them :)
I won't be able to do this until next week.



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-30 11:21                   ` Patrick McHardy
@ 2008-07-30 11:37                     ` Jarek Poplawski
  2008-07-30 11:40                       ` Patrick McHardy
  0 siblings, 1 reply; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-30 11:37 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David Miller, jussi.kivilinna, netdev

On Wed, Jul 30, 2008 at 01:21:22PM +0200, Patrick McHardy wrote:
> Jarek Poplawski wrote:
...
>> I thought it needs to wait for your audit. (Considering current
>> state of NET_XMIT statuses it's simple and reliable.) Otherwise
>> I'd prefer nice methods too.
>>   
> Well, the problems are already clear, someone just needs to fix them :)
> I won't be able to do this until next week.

...alas to me the solution looks less clear...

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-30 11:37                     ` Jarek Poplawski
@ 2008-07-30 11:40                       ` Patrick McHardy
  2008-07-30 11:48                         ` David Miller
  0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-07-30 11:40 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: David Miller, jussi.kivilinna, netdev

Jarek Poplawski wrote:
> On Wed, Jul 30, 2008 at 01:21:22PM +0200, Patrick McHardy wrote:
>> Jarek Poplawski wrote:
> ...
>>> I thought it needs to wait for your audit. (Considering current
>>> state of NET_XMIT statuses it's simple and reliable.) Otherwise
>>> I'd prefer nice methods too.
>>>   
>> Well, the problems are already clear, someone just needs to fix them :)
>> I won't be able to do this until next week.
> 
> ...alas to me the solution looks less clear...

The reason why it translates it at all seems to be to not increase
the drops counter. Within a single qdisc this could be avoided by
other means easily, upper qdiscs would still increase the counter
when we return anything besides NET_XMIT_SUCCESS though.

This means we need a new NET_XMIT return value to indicate this to
the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
in dev_queue_xmit, similar to NET_XMIT_BYPASS.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-30 11:40                       ` Patrick McHardy
@ 2008-07-30 11:48                         ` David Miller
  2008-07-30 11:52                           ` Patrick McHardy
  0 siblings, 1 reply; 29+ messages in thread
From: David Miller @ 2008-07-30 11:48 UTC (permalink / raw)
  To: kaber; +Cc: jarkao2, jussi.kivilinna, netdev

From: Patrick McHardy <kaber@trash.net>
Date: Wed, 30 Jul 2008 13:40:22 +0200

> The reason why it translates it at all seems to be to not increase
> the drops counter. Within a single qdisc this could be avoided by
> other means easily, upper qdiscs would still increase the counter
> when we return anything besides NET_XMIT_SUCCESS though.
> 
> This means we need a new NET_XMIT return value to indicate this to
> the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
> return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
> in dev_queue_xmit, similar to NET_XMIT_BYPASS.

Maybe these NET_XMIT_* values being passed around should be a set of
bits.

They could be composed of base meanings, combined with specific
attributes.

So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"

The attributes get masked out by the top-level ->enqueue() caller,
such that the base meanings are the only thing that make their
way up into the stack.

If it's only about communication within the qdisc tree, let's
simply code it that way.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-30 11:48                         ` David Miller
@ 2008-07-30 11:52                           ` Patrick McHardy
  2008-07-30 12:19                             ` Jarek Poplawski
  2008-07-30 20:50                             ` Jarek Poplawski
  0 siblings, 2 replies; 29+ messages in thread
From: Patrick McHardy @ 2008-07-30 11:52 UTC (permalink / raw)
  To: David Miller; +Cc: jarkao2, jussi.kivilinna, netdev

David Miller wrote:
> From: Patrick McHardy <kaber@trash.net>
> Date: Wed, 30 Jul 2008 13:40:22 +0200
> 
>> The reason why it translates it at all seems to be to not increase
>> the drops counter. Within a single qdisc this could be avoided by
>> other means easily, upper qdiscs would still increase the counter
>> when we return anything besides NET_XMIT_SUCCESS though.
>>
>> This means we need a new NET_XMIT return value to indicate this to
>> the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
>> return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
>> in dev_queue_xmit, similar to NET_XMIT_BYPASS.
> 
> Maybe these NET_XMIT_* values being passed around should be a set of
> bits.
> 
> They could be composed of base meanings, combined with specific
> attributes.
> 
> So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"
> 
> The attributes get masked out by the top-level ->enqueue() caller,
> such that the base meanings are the only thing that make their
> way up into the stack.
> 
> If it's only about communication within the qdisc tree, let's
> simply code it that way.

Thats a good suggestion. I think this should work.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-30 11:52                           ` Patrick McHardy
@ 2008-07-30 12:19                             ` Jarek Poplawski
  2008-07-30 20:50                             ` Jarek Poplawski
  1 sibling, 0 replies; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-30 12:19 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David Miller, jussi.kivilinna, netdev

On Wed, Jul 30, 2008 at 01:52:01PM +0200, Patrick McHardy wrote:
> David Miller wrote:
>> From: Patrick McHardy <kaber@trash.net>
>> Date: Wed, 30 Jul 2008 13:40:22 +0200
>>
>>> The reason why it translates it at all seems to be to not increase
>>> the drops counter. Within a single qdisc this could be avoided by
>>> other means easily, upper qdiscs would still increase the counter
>>> when we return anything besides NET_XMIT_SUCCESS though.
>>>
>>> This means we need a new NET_XMIT return value to indicate this to
>>> the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
>>> return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
>>> in dev_queue_xmit, similar to NET_XMIT_BYPASS.
>>
>> Maybe these NET_XMIT_* values being passed around should be a set of
>> bits.
>>
>> They could be composed of base meanings, combined with specific
>> attributes.
>>
>> So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"
>>
>> The attributes get masked out by the top-level ->enqueue() caller,
>> such that the base meanings are the only thing that make their
>> way up into the stack.
>>
>> If it's only about communication within the qdisc tree, let's
>> simply code it that way.
>
> Thats a good suggestion. I think this should work.
>

Yes, very nice! (I guess I can look at this more...)

Jarek P.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs
  2008-07-30 11:52                           ` Patrick McHardy
  2008-07-30 12:19                             ` Jarek Poplawski
@ 2008-07-30 20:50                             ` Jarek Poplawski
  1 sibling, 0 replies; 29+ messages in thread
From: Jarek Poplawski @ 2008-07-30 20:50 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David Miller, jussi.kivilinna, netdev

Patrick McHardy wrote, On 07/30/2008 01:52 PM:

> David Miller wrote:
>> From: Patrick McHardy <kaber@trash.net>
>> Date: Wed, 30 Jul 2008 13:40:22 +0200
>>
>>> The reason why it translates it at all seems to be to not increase
>>> the drops counter. Within a single qdisc this could be avoided by
>>> other means easily, upper qdiscs would still increase the counter
>>> when we return anything besides NET_XMIT_SUCCESS though.
>>>
>>> This means we need a new NET_XMIT return value to indicate this to
>>> the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
>>> return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
>>> in dev_queue_xmit, similar to NET_XMIT_BYPASS.
>> Maybe these NET_XMIT_* values being passed around should be a set of
>> bits.
>>
>> They could be composed of base meanings, combined with specific
>> attributes.
>>
>> So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"
>>
>> The attributes get masked out by the top-level ->enqueue() caller,
>> such that the base meanings are the only thing that make their
>> way up into the stack.
>>
>> If it's only about communication within the qdisc tree, let's
>> simply code it that way.
> 
> Thats a good suggestion. I think this should work.

I hope, I didn't misunderstood who is expected to do this, so here is
my preliminary proposal. I guess, I did misunderstood some details,
so it's not final...

BTW, htb_enqueue()/requeue() is unfinished because it will conflict
with David's patch... 

Any suggestions are welcome.

Thanks,
Jarek P.

---

 include/linux/netdevice.h |    1 +
 include/net/sch_generic.h |   15 ++++++++++++++-
 net/sched/sch_atm.c       |   12 +++++++-----
 net/sched/sch_cbq.c       |   16 ++++++++++------
 net/sched/sch_dsmark.c    |    8 +++++---
 net/sched/sch_hfsc.c      |    8 +++++---
 net/sched/sch_htb.c       |    2 +-
 net/sched/sch_prio.c      |    8 +++++---
 net/sched/sch_sfq.c       |    2 +-
 9 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4d056c..188e4e4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -64,6 +64,7 @@ struct wireless_dev;
 #define NET_XMIT_BYPASS		4	/* packet does not leave via dequeue;
 					   (TC use only - dev_queue_xmit
 					   returns this as NET_XMIT_SUCCESS) */
+#define NET_XMIT_MASK		0xFFFF	/* qdisc flags in net/sch_generic.h */
 
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b5f40d7..b79bd04 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -331,6 +331,19 @@ static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
 	return qdisc_skb_cb(skb)->pkt_len;
 }
 
+#ifdef CONFIG_NET_CLS_ACT
+/* additional qdisc xmit flags */
+enum net_xmit_qdisc_t
+{
+	__NET_XMIT_STOLEN = NET_XMIT_MASK + 1,
+};
+
+#define net_xmit_drop_count(e)	((e) & __NET_XMIT_STOLEN ? 0 : 1)
+
+#else
+#define net_xmit_drop_count(e)	(1)
+#endif
+
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 #ifdef CONFIG_NET_SCHED
@@ -343,7 +356,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 {
 	qdisc_skb_cb(skb)->pkt_len = skb->len;
-	return qdisc_enqueue(skb, sch);
+	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6b517b9..27dd773 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -415,7 +415,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
-			return NET_XMIT_SUCCESS;
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			kfree_skb(skb);
 			goto drop;
@@ -432,9 +432,11 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	ret = qdisc_enqueue(skb, flow->q);
 	if (ret != 0) {
 drop: __maybe_unused
-		sch->qstats.drops++;
-		if (flow)
-			flow->qstats.drops++;
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			if (flow)
+				flow->qstats.drops++;
+		}
 		return ret;
 	}
 	sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -530,7 +532,7 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	if (!ret) {
 		sch->q.qlen++;
 		sch->qstats.requeues++;
-	} else {
+	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
 		p->link.qstats.drops++;
 	}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 14954bf..00a0af6 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -256,7 +256,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		case TC_ACT_RECLASSIFY:
@@ -397,9 +397,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return ret;
 	}
 
-	sch->qstats.drops++;
-	cbq_mark_toplevel(q, cl);
-	cl->qstats.drops++;
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cbq_mark_toplevel(q, cl);
+		cl->qstats.drops++;
+	}
 	return ret;
 }
 
@@ -430,8 +432,10 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 			cbq_activate_class(cl);
 		return 0;
 	}
-	sch->qstats.drops++;
-	cl->qstats.drops++;
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+	}
 	return ret;
 }
 
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index a935676..7170275 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -236,7 +236,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
-			return NET_XMIT_SUCCESS;
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 
 		case TC_ACT_SHOT:
 			goto drop;
@@ -254,7 +254,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = qdisc_enqueue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
 		return err;
 	}
 
@@ -321,7 +322,8 @@ static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = p->q->ops->requeue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
 		return err;
 	}
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 0ae7d19..5cf9ae7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1166,7 +1166,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -1586,8 +1586,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
-		cl->qstats.drops++;
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
 		return err;
 	}
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 75a4095..635e8ce 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -221,7 +221,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index f849243..adb1a52 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -45,7 +45,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (err) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -88,7 +88,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
-	sch->qstats.drops++;
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
 	return ret;
 }
 
@@ -114,7 +115,8 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 		sch->qstats.requeues++;
 		return 0;
 	}
-	sch->qstats.drops++;
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
 	return NET_XMIT_DROP;
 }
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8589da6..3a456e1 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		switch (result) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return 0;
 		}

^ permalink raw reply related	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2008-07-30 20:48 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-19 23:34 [PATCH v2 0/3] Add size table feature for qdiscs Jussi Kivilinna
2008-07-19 23:35 ` [PATCH v2 1/3] net_sched: Add qdisc_enqueue wrapper Jussi Kivilinna
2008-07-19 23:35 ` [PATCH v2 2/3] net_sched: Add accessor function for packet length for qdiscs Jussi Kivilinna
2008-07-25 10:57   ` Jarek Poplawski
2008-07-25 10:57     ` David Miller
2008-07-25 11:37       ` Jarek Poplawski
2008-07-25 11:49         ` David Miller
2008-07-26 13:21           ` Patrick McHardy
2008-07-26 14:18             ` Jarek Poplawski
2008-07-30 10:47               ` Patrick McHardy
2008-07-30 11:19                 ` Jarek Poplawski
2008-07-30 11:21                   ` Patrick McHardy
2008-07-30 11:37                     ` Jarek Poplawski
2008-07-30 11:40                       ` Patrick McHardy
2008-07-30 11:48                         ` David Miller
2008-07-30 11:52                           ` Patrick McHardy
2008-07-30 12:19                             ` Jarek Poplawski
2008-07-30 20:50                             ` Jarek Poplawski
2008-07-25 11:53         ` Jarek Poplawski
2008-07-25 11:52           ` David Miller
2008-07-25 12:08             ` Jarek Poplawski
2008-07-25 12:16               ` David Miller
2008-07-25 12:29         ` Jussi Kivilinna
2008-07-25 12:54           ` Jarek Poplawski
2008-07-25 13:15             ` Jussi Kivilinna
2008-07-25 17:46               ` Jarek Poplawski
2008-07-25 18:02                 ` Jarek Poplawski
2008-07-19 23:35 ` [PATCH v2 3/3] net_sched: Add size table " Jussi Kivilinna
2008-07-20  7:09 ` [PATCH v2 0/3] Add size table feature " David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).