* [PATCH net-next-2.6 v4 1/2] net_sched: add size table functions
@ 2008-07-10 19:34 Jussi Kivilinna
2008-07-10 19:34 ` [PATCH net-next-2.6 v4 2/2] hfsc: add link layer overhead adaption Jussi Kivilinna
0 siblings, 1 reply; 9+ messages in thread
From: Jussi Kivilinna @ 2008-07-10 19:34 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev
Patch adds size table that is similiar to rate table, with difference that
size table stores link layer packet size. It's needed for HFSC link
layer adaption patch as it converts skb->len to link layer packet size
directly, unlike HTB/CFQ/etc that convert packet length to link layer
transfer time using rate tables.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
include/linux/pkt_sched.h | 10 ++++++++++
include/net/pkt_sched.h | 11 +++++++++++
include/net/sch_generic.h | 23 ++++++++++++++++++++++
net/sched/sch_api.c | 47 +++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 91 insertions(+), 0 deletions(-)
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index dbb7ac3..5bf1444 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -85,6 +85,16 @@ struct tc_ratespec
#define TC_RTAB_SIZE 1024
+struct tc_sizespec {
+ unsigned char cell_log;
+ unsigned char size_log;
+ short overhead;
+ short cell_align;
+ unsigned short mpu;
+};
+
+#define TC_STAB_SIZE 1024
+
/* FIFO section */
struct tc_fifo_qopt
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index d58c1a5..90e1826 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -82,7 +82,18 @@ extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
struct nlattr *tab);
+extern struct qdisc_size_table *__qdisc_get_stab(struct tc_sizespec *s,
+ void *tab, unsigned int tablen);
extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
+extern void qdisc_put_stab(struct qdisc_size_table *tab);
+
+static inline struct qdisc_size_table *qdisc_get_stab(struct tc_sizespec *s,
+ struct nlattr *tab)
+{
+ if (tab == NULL)
+ return NULL;
+ return __qdisc_get_stab(s, nla_data(tab), nla_len(tab));
+}
extern void __qdisc_run(struct netdev_queue *txq);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5ba66b5..db54abe 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,6 +23,13 @@ struct qdisc_rate_table
int refcnt;
};
+struct qdisc_size_table {
+ struct tc_sizespec szopts;
+ u16 data[512];
+ struct qdisc_size_table *next;
+ int refcnt;
+};
+
struct Qdisc
{
int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
@@ -394,6 +401,22 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
return rtab->data[slot];
}
+/* Length to link layer size lookup in a qdisc_size_table, to determine how
+ what size packet takes on link layer.
+ */
+static inline u32 qdisc_linklayer_sz(struct qdisc_size_table *stab, u32 pktlen)
+{
+ int slot = pktlen + stab->szopts.cell_align + stab->szopts.overhead;
+ unsigned char size_log = stab->szopts.size_log;
+ if (unlikely(slot < 0))
+ slot = 0;
+ slot >>= stab->szopts.cell_log;
+ if (unlikely(slot > 511))
+ return ((u32)stab->data[511] << size_log) * (slot >> 9) +
+ ((u32)stab->data[slot & 0x1FF] << size_log);
+ return (u32)stab->data[slot] << size_log;
+}
+
#ifdef CONFIG_NET_CLS_ACT
static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 95873f8..aea3473 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -278,6 +278,53 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
}
EXPORT_SYMBOL(qdisc_put_rtab);
+static struct qdisc_size_table *qdisc_stab_list;
+
+struct qdisc_size_table *__qdisc_get_stab(struct tc_sizespec *s,
+ void *tab, unsigned int tablen)
+{
+ struct qdisc_size_table *stab;
+
+ for (stab = qdisc_stab_list; stab; stab = stab->next) {
+ if (memcmp(&stab->szopts, s, sizeof(struct tc_sizespec)) == 0) {
+ stab->refcnt++;
+ return stab;
+ }
+ }
+
+ if (tab == NULL || tablen != TC_STAB_SIZE)
+ return NULL;
+
+ stab = kmalloc(sizeof(*stab), GFP_KERNEL);
+ if (stab) {
+ stab->szopts = *s;
+ stab->refcnt = 1;
+ memcpy(stab->data, tab, TC_STAB_SIZE);
+ stab->next = qdisc_stab_list;
+ qdisc_stab_list = stab;
+ }
+ return stab;
+}
+EXPORT_SYMBOL(__qdisc_get_stab);
+
+void qdisc_put_stab(struct qdisc_size_table *tab)
+{
+ struct qdisc_size_table *stab, **stabp;
+
+ if (!tab || --tab->refcnt)
+ return;
+
+ for (stabp = &qdisc_stab_list; (stab = *stabp) != NULL;
+ stabp = &stab->next) {
+ if (stab == tab) {
+ *stabp = stab->next;
+ kfree(stab);
+ return;
+ }
+ }
+}
+EXPORT_SYMBOL(qdisc_put_stab);
+
static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
{
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH net-next-2.6 v4 2/2] hfsc: add link layer overhead adaption
2008-07-10 19:34 [PATCH net-next-2.6 v4 1/2] net_sched: add size table functions Jussi Kivilinna
@ 2008-07-10 19:34 ` Jussi Kivilinna
2008-07-14 14:24 ` Patrick McHardy
0 siblings, 1 reply; 9+ messages in thread
From: Jussi Kivilinna @ 2008-07-10 19:34 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev
CBQ and HTB have options for emulating overhead of underlying link layer
(mpu/overhead/linklayer options). This patch makes sch_hfsc use size table
to emulate link layer overhead.
Patch uses size table to convert packet length to emulated link layer packet
length. Converted packet length is passed to hfsc calculations instead of
real. If size table isn't passed to kernel, hfsc works as before.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
include/linux/pkt_sched.h | 5 +
net/sched/sch_hfsc.c | 158 +++++++++++++++++++++++++++++++++++++--------
2 files changed, 135 insertions(+), 28 deletions(-)
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 5bf1444..46db55d 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -303,6 +303,9 @@ struct tc_htb_xstats
struct tc_hfsc_qopt
{
__u16 defcls; /* default class */
+ __u16 __reserved;
+ struct tc_sizespec szopts;
+ __u16 stab[512];
};
struct tc_service_curve
@@ -326,6 +329,8 @@ enum
TCA_HFSC_RSC,
TCA_HFSC_FSC,
TCA_HFSC_USC,
+ TCA_HFSC_SZOPTS,
+ TCA_HFSC_STAB,
__TCA_HFSC_MAX,
};
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 997d520..1dcee08 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -68,6 +68,9 @@
#include <net/pkt_cls.h>
#include <asm/div64.h>
+#define endof(type, member) \
+ (offsetof(type, member) + sizeof(((type *)0)->member))
+
/*
* kernel internal service curve representation:
* coordinates are given by 64 bit unsigned integers.
@@ -128,6 +131,8 @@ struct hfsc_class
struct list_head siblings; /* sibling classes */
struct list_head children; /* child classes */
struct Qdisc *qdisc; /* leaf qdisc */
+ struct qdisc_size_table *stab; /* size table used for link layer
+ overhead adaption */
struct rb_node el_node; /* qdisc's eligible tree member */
struct rb_root vt_tree; /* active children sorted by cl_vt */
@@ -493,6 +498,21 @@ sc2isc(struct tc_service_curve *sc, struct internal_sc *isc)
isc->ism2 = m2ism(sc->m2);
}
+/* convert packet length to link layer packet length */
+static unsigned int get_linklayer_len(struct hfsc_class *cl, unsigned int len)
+{
+ if (unlikely(!len))
+ return len;
+
+ while (!cl->stab) {
+ cl = cl->cl_parent;
+ if (!cl)
+ return len;
+ }
+
+ return qdisc_linklayer_sz(cl->stab, len);
+}
+
/*
* initialize the runtime service curve with the given internal
* service curve starting at (x, y).
@@ -974,9 +994,11 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
}
static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
- [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) },
- [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) },
- [TCA_HFSC_USC] = { .len = sizeof(struct tc_service_curve) },
+ [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) },
+ [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) },
+ [TCA_HFSC_USC] = { .len = sizeof(struct tc_service_curve) },
+ [TCA_HFSC_SZOPTS] = { .len = sizeof(struct tc_sizespec) },
+ [TCA_HFSC_STAB] = { .type = NLA_BINARY, .len = TC_STAB_SIZE }
};
static int
@@ -989,6 +1011,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_HFSC_MAX + 1];
struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
+ struct tc_sizespec *szopts = NULL;
+ struct qdisc_size_table *stab = NULL;
u64 cur_time;
int err;
@@ -999,6 +1023,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (err < 0)
return err;
+ err = -EINVAL;
if (tb[TCA_HFSC_RSC]) {
rsc = nla_data(tb[TCA_HFSC_RSC]);
if (rsc->m1 == 0 && rsc->m2 == 0)
@@ -1017,13 +1042,19 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
usc = NULL;
}
+ if (tb[TCA_HFSC_SZOPTS]) {
+ szopts = nla_data(tb[TCA_HFSC_SZOPTS]);
+ stab = qdisc_get_stab(szopts, tb[TCA_HFSC_STAB]);
+ }
+
if (cl != NULL) {
if (parentid) {
+ err = -EINVAL;
if (cl->cl_parent &&
cl->cl_parent->cl_common.classid != parentid)
- return -EINVAL;
+ goto failure;
if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
- return -EINVAL;
+ goto failure;
}
cur_time = psched_get_time();
@@ -1035,9 +1066,14 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (usc != NULL)
hfsc_change_usc(cl, usc, cur_time);
+ if (cl->stab)
+ qdisc_put_stab(cl->stab);
+ cl->stab = stab;
+
if (cl->qdisc->q.qlen != 0) {
if (cl->cl_flags & HFSC_RSC)
- update_ed(cl, qdisc_peek_len(cl->qdisc));
+ update_ed(cl, get_linklayer_len(cl,
+ qdisc_peek_len(cl->qdisc)));
if (cl->cl_flags & HFSC_FSC)
update_vf(cl, 0, cur_time);
}
@@ -1050,27 +1086,39 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return 0;
}
- if (parentid == TC_H_ROOT)
- return -EEXIST;
+ if (parentid == TC_H_ROOT) {
+ err = -EEXIST;
+ goto failure;
+ }
parent = &q->root;
if (parentid) {
parent = hfsc_find_class(parentid, sch);
- if (parent == NULL)
- return -ENOENT;
+ if (parent == NULL) {
+ err = -ENOENT;
+ goto failure;
+ }
}
- if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
- return -EINVAL;
- if (hfsc_find_class(classid, sch))
- return -EEXIST;
+ if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) {
+ err = -EINVAL;
+ goto failure;
+ }
+ if (hfsc_find_class(classid, sch)) {
+ err = -EEXIST;
+ goto failure;
+ }
- if (rsc == NULL && fsc == NULL)
- return -EINVAL;
+ if (rsc == NULL && fsc == NULL) {
+ err = -EINVAL;
+ goto failure;
+ }
cl = kzalloc(sizeof(struct hfsc_class), GFP_KERNEL);
- if (cl == NULL)
- return -ENOBUFS;
+ if (cl == NULL) {
+ err = -ENOBUFS;
+ goto failure;
+ }
if (rsc != NULL)
hfsc_change_rsc(cl, rsc, 0);
@@ -1098,6 +1146,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
hfsc_purge_queue(sch, parent);
hfsc_adjust_levels(parent);
cl->cl_pcvtoff = parent->cl_cvtoff;
+ if (cl->stab)
+ qdisc_put_stab(cl->stab);
+ cl->stab = stab;
sch_tree_unlock(sch);
qdisc_class_hash_grow(sch, &q->clhash);
@@ -1107,6 +1158,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
&sch->dev_queue->lock, tca[TCA_RATE]);
*arg = (unsigned long)cl;
return 0;
+failure:
+ if (stab)
+ qdisc_put_stab(stab);
+ return err;
}
static void
@@ -1117,6 +1172,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
tcf_destroy_chain(&cl->filter_list);
qdisc_destroy(cl->qdisc);
gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ if (cl->stab)
+ qdisc_put_stab(cl->stab);
if (cl != &q->root)
kfree(cl);
}
@@ -1330,6 +1387,21 @@ hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
return -1;
}
+static inline int
+hfsc_dump_szopts(struct sk_buff *skb, struct hfsc_class *cl)
+{
+ if (!cl->stab)
+ return 0;
+
+ NLA_PUT(skb, TCA_HFSC_SZOPTS, sizeof(cl->stab->szopts),
+ &cl->stab->szopts);
+
+ return skb->len;
+
+ nla_put_failure:
+ return -1;
+}
+
static int
hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
struct tcmsg *tcm)
@@ -1348,6 +1420,8 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
goto nla_put_failure;
if (hfsc_dump_curves(skb, cl) < 0)
goto nla_put_failure;
+ if (hfsc_dump_szopts(skb, cl) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest);
return skb->len;
@@ -1427,13 +1501,18 @@ static int
hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
{
struct hfsc_sched *q = qdisc_priv(sch);
+ struct qdisc_size_table *stab = NULL;
struct tc_hfsc_qopt *qopt;
int err;
- if (opt == NULL || nla_len(opt) < sizeof(*qopt))
+ if (opt == NULL || nla_len(opt) < endof(struct tc_hfsc_qopt, defcls))
return -EINVAL;
qopt = nla_data(opt);
+ if (nla_len(opt) >= endof(struct tc_hfsc_qopt, stab))
+ stab = __qdisc_get_stab(&qopt->szopts,
+ qopt->stab, TC_STAB_SIZE);
+
q->defcls = qopt->defcls;
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
@@ -1445,6 +1524,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
q->root.cl_common.classid = sch->handle;
q->root.refcnt = 1;
q->root.sched = q;
+ q->root.stab = stab;
q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
&pfifo_qdisc_ops,
sch->handle);
@@ -1466,14 +1546,22 @@ static int
hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt)
{
struct hfsc_sched *q = qdisc_priv(sch);
+ struct qdisc_size_table *stab = NULL;
struct tc_hfsc_qopt *qopt;
- if (opt == NULL || nla_len(opt) < sizeof(*qopt))
+ if (opt == NULL || nla_len(opt) < endof(struct tc_hfsc_qopt, defcls))
return -EINVAL;
qopt = nla_data(opt);
+ if (nla_len(opt) >= endof(struct tc_hfsc_qopt, stab))
+ stab = __qdisc_get_stab(&qopt->szopts,
+ qopt->stab, TC_STAB_SIZE);
+
sch_tree_lock(sch);
q->defcls = qopt->defcls;
+ if (q->root.stab)
+ qdisc_put_stab(q->root.stab);
+ q->root.stab = stab;
sch_tree_unlock(sch);
return 0;
@@ -1559,10 +1647,22 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
{
struct hfsc_sched *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
- struct tc_hfsc_qopt qopt;
+ u8 qopt_buf[endof(struct tc_hfsc_qopt, szopts)];
+ struct tc_hfsc_qopt *qopt = (struct tc_hfsc_qopt *)qopt_buf;
+ unsigned int qopt_len;
+
+ memset(qopt, 0, sizeof(qopt_buf));
+
+ qopt->defcls = q->defcls;
+ if (q->root.stab) {
+ qopt_len = sizeof(qopt_buf);
+ qopt->szopts = q->root.stab->szopts;
+ } else {
+ qopt_len = endof(struct tc_hfsc_qopt, defcls);
+ }
+
+ NLA_PUT(skb, TCA_OPTIONS, qopt_len, qopt);
- qopt.defcls = q->defcls;
- NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
return skb->len;
nla_put_failure:
@@ -1594,7 +1694,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
if (cl->qdisc->q.qlen == 1)
- set_active(cl, len);
+ set_active(cl, get_linklayer_len(cl, len));
cl->bstats.packets++;
cl->bstats.bytes += len;
@@ -1612,7 +1712,7 @@ hfsc_dequeue(struct Qdisc *sch)
struct hfsc_class *cl;
struct sk_buff *skb;
u64 cur_time;
- unsigned int next_len;
+ unsigned int next_len, cur_len;
int realtime = 0;
if (sch->q.qlen == 0)
@@ -1649,14 +1749,16 @@ hfsc_dequeue(struct Qdisc *sch)
return NULL;
}
- update_vf(cl, skb->len, cur_time);
+ cur_len = get_linklayer_len(cl, skb->len);
+ update_vf(cl, cur_len, cur_time);
if (realtime)
- cl->cl_cumul += skb->len;
+ cl->cl_cumul += cur_len;
if (cl->qdisc->q.qlen != 0) {
if (cl->cl_flags & HFSC_RSC) {
/* update ed */
- next_len = qdisc_peek_len(cl->qdisc);
+ next_len = get_linklayer_len(cl,
+ qdisc_peek_len(cl->qdisc));
if (realtime)
update_ed(cl, next_len);
else
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6 v4 2/2] hfsc: add link layer overhead adaption
2008-07-10 19:34 ` [PATCH net-next-2.6 v4 2/2] hfsc: add link layer overhead adaption Jussi Kivilinna
@ 2008-07-14 14:24 ` Patrick McHardy
2008-07-15 5:11 ` David Miller
2008-07-15 10:52 ` Jussi Kivilinna
0 siblings, 2 replies; 9+ messages in thread
From: Patrick McHardy @ 2008-07-14 14:24 UTC (permalink / raw)
To: Jussi Kivilinna; +Cc: netdev
Jussi Kivilinna wrote:
> CBQ and HTB have options for emulating overhead of underlying link layer
> (mpu/overhead/linklayer options). This patch makes sch_hfsc use size table
> to emulate link layer overhead.
>
> Patch uses size table to convert packet length to emulated link layer packet
> length. Converted packet length is passed to hfsc calculations instead of
> real. If size table isn't passed to kernel, hfsc works as before.
>
> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
> ---
>
> include/linux/pkt_sched.h | 5 +
> net/sched/sch_hfsc.c | 158 +++++++++++++++++++++++++++++++++++++--------
> 2 files changed, 135 insertions(+), 28 deletions(-)
>
> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
> index 5bf1444..46db55d 100644
> --- a/include/linux/pkt_sched.h
> +++ b/include/linux/pkt_sched.h
> @@ -303,6 +303,9 @@ struct tc_htb_xstats
> struct tc_hfsc_qopt
> {
> __u16 defcls; /* default class */
> + __u16 __reserved;
> + struct tc_sizespec szopts;
> + __u16 stab[512];
> };
>
> struct tc_service_curve
> @@ -326,6 +329,8 @@ enum
> TCA_HFSC_RSC,
> TCA_HFSC_FSC,
> TCA_HFSC_USC,
> + TCA_HFSC_SZOPTS,
> + TCA_HFSC_STAB,
I thought you were going to make this a generic qdisc feature.
Why is configuration still qdisc specific?
I was thinking of something like this:
- add generic attributes for configuring size tables for any
qdisc, handle those in sch_api
- add qdisc_enqueue() wrapper that calculates the size and
stores it in skb->cb, convert direct calls of sch->enqueue
to use it
- change direct uses of skb->len to use the size from the cb
The second step might need a bit more thought to make sure
qdiscs don't get confused when the dequeued packet is larger
than the enqueued one, but I think they usually don't care.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6 v4 2/2] hfsc: add link layer overhead adaption
2008-07-14 14:24 ` Patrick McHardy
@ 2008-07-15 5:11 ` David Miller
2008-07-15 10:52 ` Jussi Kivilinna
1 sibling, 0 replies; 9+ messages in thread
From: David Miller @ 2008-07-15 5:11 UTC (permalink / raw)
To: kaber; +Cc: jussi.kivilinna, netdev
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 16:24:41 +0200
> I thought you were going to make this a generic qdisc feature.
> Why is configuration still qdisc specific?
>
> I was thinking of something like this:
>
> - add generic attributes for configuring size tables for any
> qdisc, handle those in sch_api
>
> - add qdisc_enqueue() wrapper that calculates the size and
> stores it in skb->cb, convert direct calls of sch->enqueue
> to use it
>
> - change direct uses of skb->len to use the size from the cb
>
> The second step might need a bit more thought to make sure
> qdiscs don't get confused when the dequeued packet is larger
> than the enqueued one, but I think they usually don't care.
This sounds sane.
After having to edit every damn file net/sched/ over the weekend
a few things really bugged me. One of which was how so much logic
is duplicated in the configuration support code.
The worst offender is TCA_RATE, it's scattered all over the place.
If I get ambitious after all the multiqueue bits are sorted I might
try to consolidate some of this stuff.
But definitely, let's not add more of that kind of stuff.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6 v4 2/2] hfsc: add link layer overhead adaption
2008-07-14 14:24 ` Patrick McHardy
2008-07-15 5:11 ` David Miller
@ 2008-07-15 10:52 ` Jussi Kivilinna
2008-07-15 10:55 ` Patrick McHardy
1 sibling, 1 reply; 9+ messages in thread
From: Jussi Kivilinna @ 2008-07-15 10:52 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev
Quoting "Patrick McHardy" <kaber@trash.net>:
>
> I thought you were going to make this a generic qdisc feature.
> Why is configuration still qdisc specific?
>
I was thinking keeping scope of this patch within hfsc, as done with
rest of rate table qdiscs. So it would be better to have they all to
use generic size table instead of duplicating rtab code.
> I was thinking of something like this:
>
> - add generic attributes for configuring size tables for any
> qdisc, handle those in sch_api
>
> - add qdisc_enqueue() wrapper that calculates the size and
> stores it in skb->cb, convert direct calls of sch->enqueue
> to use it
>
> - change direct uses of skb->len to use the size from the cb
>
> The second step might need a bit more thought to make sure
> qdiscs don't get confused when the dequeued packet is larger
> than the enqueued one, but I think they usually don't care.
>
I guess I'll start over using your size table patch as base then.
- Jussi
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net-next-2.6 v4 2/2] hfsc: add link layer overhead adaption
2008-07-15 10:52 ` Jussi Kivilinna
@ 2008-07-15 10:55 ` Patrick McHardy
2008-07-16 23:40 ` [PATCH RFC] net_sched: add generic qdisc size table Jussi Kivilinna
0 siblings, 1 reply; 9+ messages in thread
From: Patrick McHardy @ 2008-07-15 10:55 UTC (permalink / raw)
To: Jussi Kivilinna; +Cc: netdev
Jussi Kivilinna wrote:
> Quoting "Patrick McHardy" <kaber@trash.net>:
>
>>
>> I thought you were going to make this a generic qdisc feature.
>> Why is configuration still qdisc specific?
>>
>
> I was thinking keeping scope of this patch within hfsc, as done with
> rest of rate table qdiscs. So it would be better to have they all to use
> generic size table instead of duplicating rtab code.
Well, to use this feature consistently (meaning sizes propagate
down to inner qdiscs) it needs to be a generic feature.
>> I was thinking of something like this:
>>
>> - add generic attributes for configuring size tables for any
>> qdisc, handle those in sch_api
>>
>> - add qdisc_enqueue() wrapper that calculates the size and
>> stores it in skb->cb, convert direct calls of sch->enqueue
>> to use it
>>
>> - change direct uses of skb->len to use the size from the cb
>>
>> The second step might need a bit more thought to make sure
>> qdiscs don't get confused when the dequeued packet is larger
>> than the enqueued one, but I think they usually don't care.
>>
>
> I guess I'll start over using your size table patch as base then.
I'm not sure that part is correct in my patch :)
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH RFC] net_sched: add generic qdisc size table
2008-07-15 10:55 ` Patrick McHardy
@ 2008-07-16 23:40 ` Jussi Kivilinna
2008-07-17 0:32 ` Jussi Kivilinna
2008-07-17 9:39 ` Patrick McHardy
0 siblings, 2 replies; 9+ messages in thread
From: Jussi Kivilinna @ 2008-07-16 23:40 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev
[-- Attachment #1: Type: text/plain, Size: 1451 bytes --]
Quoting "Patrick McHardy" <kaber@trash.net>:
> Jussi Kivilinna wrote:
>> Quoting "Patrick McHardy" <kaber@trash.net>:
>>
>>>
>>> I thought you were going to make this a generic qdisc feature.
>>> Why is configuration still qdisc specific?
>>>
>>
>> I was thinking keeping scope of this patch within hfsc, as done
>> with rest of rate table qdiscs. So it would be better to have they
>> all to use generic size table instead of duplicating rtab code.
>
> Well, to use this feature consistently (meaning sizes propagate
> down to inner qdiscs) it needs to be a generic feature.
>
>>> I was thinking of something like this:
>>>
>>> - add generic attributes for configuring size tables for any
>>> qdisc, handle those in sch_api
>>>
>>> - add qdisc_enqueue() wrapper that calculates the size and
>>> stores it in skb->cb, convert direct calls of sch->enqueue
>>> to use it
>>>
>>> - change direct uses of skb->len to use the size from the cb
>>>
>>> The second step might need a bit more thought to make sure
>>> qdiscs don't get confused when the dequeued packet is larger
>>> than the enqueued one, but I think they usually don't care.
>>>
>>
>> I guess I'll start over using your size table patch as base then.
>
> I'm not sure that part is correct in my patch :)
>
Here's first version of generic size table feature. Didn't get chance
to test too much yet but hfsc&sfq seems to be ok.
- Jussi
[-- Attachment #2: 01-add-qdisc_enqueue-wrapper.diff --]
[-- Type: text/x-patch, Size: 8420 bytes --]
net_sched: Add qdisc_enqueue wrapper
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
include/net/sch_generic.h | 5 +++++
net/core/dev.c | 2 +-
net/mac80211/wme.c | 2 +-
net/sched/sch_atm.c | 2 +-
net/sched/sch_cbq.c | 5 +++--
net/sched/sch_dsmark.c | 2 +-
net/sched/sch_hfsc.c | 2 +-
net/sched/sch_htb.c | 3 +--
net/sched/sch_netem.c | 19 +++++++++++--------
net/sched/sch_prio.c | 3 ++-
net/sched/sch_red.c | 2 +-
net/sched/sch_tbf.c | 3 ++-
12 files changed, 30 insertions(+), 20 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5ba66b5..e3b0d71 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -258,6 +258,11 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
return (txq->qdisc == &noop_qdisc);
}
+static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ return sch->enqueue(skb, sch);
+}
+
static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff_head *list)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index 9b49f74..ee2df34 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1733,7 +1733,7 @@ gso:
if (q->enqueue) {
/* reset queue_mapping to zero */
skb_set_queue_mapping(skb, 0);
- rc = q->enqueue(skb, q);
+ rc = qdisc_enqueue(skb, q);
qdisc_run(txq);
spin_unlock(&txq->lock);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 6ae43a3..e263917 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -212,7 +212,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
} else {
skb_set_queue_mapping(skb, queue);
qdisc = q->queues[queue];
- err = qdisc->enqueue(skb, qdisc);
+ err = qdisc_enqueue(skb, qdisc);
if (err == NET_XMIT_SUCCESS) {
qd->q.qlen++;
qd->bstats.bytes += skb->len;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 0de757e..68ed35e 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -429,7 +429,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#endif
}
- ret = flow->q->enqueue(skb, flow->q);
+ ret = qdisc_enqueue(skb, flow->q);
if (ret != 0) {
drop: __maybe_unused
sch->qstats.drops++;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4efc836..1bcb3e8 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -387,7 +387,8 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#ifdef CONFIG_NET_CLS_ACT
cl->q->__parent = sch;
#endif
- if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
+ ret = qdisc_enqueue(skb, cl->q);
+ if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
sch->bstats.packets++;
sch->bstats.bytes+=len;
@@ -671,7 +672,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
q->rx_class = cl;
cl->q->__parent = sch;
- if (cl->q->enqueue(skb, cl->q) == 0) {
+ if (qdisc_enqueue(skb, cl->q) == 0) {
sch->q.qlen++;
sch->bstats.packets++;
sch->bstats.bytes+=len;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 3aafbd1..44d347e 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -252,7 +252,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
- err = p->q->enqueue(skb, p->q);
+ err = qdisc_enqueue(skb, p->q);
if (err != NET_XMIT_SUCCESS) {
sch->qstats.drops++;
return err;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 997d520..0ded6d0 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1586,7 +1586,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
len = skb->len;
- err = cl->qdisc->enqueue(skb, cl->qdisc);
+ err = qdisc_enqueue(skb, cl->qdisc);
if (unlikely(err != NET_XMIT_SUCCESS)) {
cl->qstats.drops++;
sch->qstats.drops++;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c8ca54c..5fc0325 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -572,8 +572,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
kfree_skb(skb);
return ret;
#endif
- } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
- NET_XMIT_SUCCESS) {
+ } else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
sch->qstats.drops++;
cl->qstats.drops++;
return NET_XMIT_DROP;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index bc585f2..cbdbc6a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -82,6 +82,11 @@ struct netem_skb_cb {
psched_time_t time_to_send;
};
+static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
+{
+ return (struct netem_skb_cb *)skb->cb;
+}
+
/* init_crandom - initialize correlated random number generator
* Use entropy source for initial seed.
*/
@@ -184,7 +189,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
q->duplicate = 0;
- rootq->enqueue(skb2, rootq);
+ qdisc_enqueue(skb2, rootq);
q->duplicate = dupsave;
}
@@ -205,7 +210,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
}
- cb = (struct netem_skb_cb *)skb->cb;
+ cb = netem_skb_cb(skb);
if (q->gap == 0 /* not doing reordering */
|| q->counter < q->gap /* inside last reordering gap */
|| q->reorder < get_crandom(&q->reorder_cor)) {
@@ -218,7 +223,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
now = psched_get_time();
cb->time_to_send = now + delay;
++q->counter;
- ret = q->qdisc->enqueue(skb, q->qdisc);
+ ret = qdisc_enqueue(skb, q->qdisc);
} else {
/*
* Do re-ordering by putting one out of N packets at the front
@@ -277,8 +282,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
skb = q->qdisc->dequeue(q->qdisc);
if (skb) {
- const struct netem_skb_cb *cb
- = (const struct netem_skb_cb *)skb->cb;
+ const struct netem_skb_cb *cb = netem_skb_cb(skb);
psched_time_t now = psched_get_time();
/* if more time remaining? */
@@ -454,7 +458,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
struct sk_buff_head *list = &sch->q;
- psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
+ psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
struct sk_buff *skb;
if (likely(skb_queue_len(list) < q->limit)) {
@@ -465,8 +469,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
}
skb_queue_reverse_walk(list, skb) {
- const struct netem_skb_cb *cb
- = (const struct netem_skb_cb *)skb->cb;
+ const struct netem_skb_cb *cb = netem_skb_cb(skb);
if (tnext >= cb->time_to_send)
break;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 39157f7..918d090 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -86,7 +86,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
#endif
- if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
+ ret = qdisc_enqueue(skb, qdisc);
+ if (ret == NET_XMIT_SUCCESS) {
sch->bstats.bytes += skb->len;
sch->bstats.packets++;
sch->q.qlen++;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 77098ac..b48a391 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -92,7 +92,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
break;
}
- ret = child->enqueue(skb, child);
+ ret = qdisc_enqueue(skb, child);
if (likely(ret == NET_XMIT_SUCCESS)) {
sch->bstats.bytes += skb->len;
sch->bstats.packets++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 444c227..7d705b8 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -133,7 +133,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
return NET_XMIT_DROP;
}
- if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) {
+ ret = qdisc_enqueue(skb, q->qdisc);
+ if (ret != 0) {
sch->qstats.drops++;
return ret;
}
[-- Attachment #3: 02-add-accessor-function-for-packet-len-for-qdiscs.diff --]
[-- Type: text/x-patch, Size: 17232 bytes --]
net_sched: Add accessor function for packet length for qdiscs
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
include/net/sch_generic.h | 17 +++++++++++------
net/sched/sch_atm.c | 4 ++--
net/sched/sch_cbq.c | 14 ++++++--------
net/sched/sch_dsmark.c | 2 +-
net/sched/sch_fifo.c | 2 +-
net/sched/sch_gred.c | 12 ++++++------
net/sched/sch_hfsc.c | 14 ++++++--------
net/sched/sch_htb.c | 9 +++++----
net/sched/sch_netem.c | 6 +++---
net/sched/sch_prio.c | 2 +-
net/sched/sch_red.c | 2 +-
net/sched/sch_sfq.c | 16 ++++++++--------
net/sched/sch_tbf.c | 9 ++++++---
net/sched/sch_teql.c | 6 +++---
14 files changed, 60 insertions(+), 55 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e3b0d71..1f1de3b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -258,6 +258,11 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
return (txq->qdisc == &noop_qdisc);
}
+static inline unsigned int qdisc_tx_len(struct sk_buff *skb)
+{
+ return skb->len;
+}
+
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
return sch->enqueue(skb, sch);
@@ -267,8 +272,8 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff_head *list)
{
__skb_queue_tail(list, skb);
- sch->qstats.backlog += skb->len;
- sch->bstats.bytes += skb->len;
+ sch->qstats.backlog += qdisc_tx_len(skb);
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
return NET_XMIT_SUCCESS;
@@ -285,7 +290,7 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,
struct sk_buff *skb = __skb_dequeue(list);
if (likely(skb != NULL))
- sch->qstats.backlog -= skb->len;
+ sch->qstats.backlog -= qdisc_tx_len(skb);
return skb;
}
@@ -301,7 +306,7 @@ static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch,
struct sk_buff *skb = __skb_dequeue_tail(list);
if (likely(skb != NULL))
- sch->qstats.backlog -= skb->len;
+ sch->qstats.backlog -= qdisc_tx_len(skb);
return skb;
}
@@ -315,7 +320,7 @@ static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff_head *list)
{
__skb_queue_head(list, skb);
- sch->qstats.backlog += skb->len;
+ sch->qstats.backlog += qdisc_tx_len(skb);
sch->qstats.requeues++;
return NET_XMIT_SUCCESS;
@@ -348,7 +353,7 @@ static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch,
struct sk_buff *skb = __qdisc_dequeue_tail(sch, list);
if (likely(skb != NULL)) {
- unsigned int len = skb->len;
+ unsigned int len = qdisc_tx_len(skb);
kfree_skb(skb);
return len;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 68ed35e..fdadbf0 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -437,9 +437,9 @@ drop: __maybe_unused
flow->qstats.drops++;
return ret;
}
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
- flow->bstats.bytes += skb->len;
+ flow->bstats.bytes += qdisc_tx_len(skb);
flow->bstats.packets++;
/*
* Okay, this may seem weird. We pretend we've dropped the packet if
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1bcb3e8..a4b7351 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -370,7 +370,6 @@ static int
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- int len = skb->len;
int uninitialized_var(ret);
struct cbq_class *cl = cbq_classify(skb, sch, &ret);
@@ -391,7 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
sch->bstats.packets++;
- sch->bstats.bytes+=len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
cbq_mark_toplevel(q, cl);
if (!cl->next_alive)
cbq_activate_class(cl);
@@ -658,7 +657,6 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
#ifdef CONFIG_NET_CLS_ACT
static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
{
- int len = skb->len;
struct Qdisc *sch = child->__parent;
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = q->rx_class;
@@ -675,7 +673,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
if (qdisc_enqueue(skb, cl->q) == 0) {
sch->q.qlen++;
sch->bstats.packets++;
- sch->bstats.bytes+=len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
if (!cl->next_alive)
cbq_activate_class(cl);
return 0;
@@ -881,7 +879,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
if (skb == NULL)
goto skip_class;
- cl->deficit -= skb->len;
+ cl->deficit -= qdisc_tx_len(skb);
q->tx_class = cl;
q->tx_borrowed = borrow;
if (borrow != cl) {
@@ -889,11 +887,11 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
borrow->xstats.borrows++;
cl->xstats.borrows++;
#else
- borrow->xstats.borrows += skb->len;
- cl->xstats.borrows += skb->len;
+ borrow->xstats.borrows += qdisc_tx_len(skb);
+ cl->xstats.borrows += qdisc_tx_len(skb);
#endif
}
- q->tx_len = skb->len;
+ q->tx_len = qdisc_tx_len(skb);
if (cl->deficit <= 0) {
q->active[prio] = cl;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 44d347e..4a4cd62 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -258,7 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
}
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
sch->q.qlen++;
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 1d97fa4..9f63fb4 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -27,7 +27,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
- if (likely(sch->qstats.backlog + skb->len <= q->limit))
+ if (likely(sch->qstats.backlog + qdisc_tx_len(skb) <= q->limit))
return qdisc_enqueue_tail(skb, sch);
return qdisc_reshape_fail(skb, sch);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 39fa285..36f4326 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -188,7 +188,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
}
q->packetsin++;
- q->bytesin += skb->len;
+ q->bytesin += qdisc_tx_len(skb);
if (gred_wred_mode(t))
gred_load_wred_set(t, q);
@@ -226,8 +226,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
break;
}
- if (q->backlog + skb->len <= q->limit) {
- q->backlog += skb->len;
+ if (q->backlog + qdisc_tx_len(skb) <= q->limit) {
+ q->backlog += qdisc_tx_len(skb);
return qdisc_enqueue_tail(skb, sch);
}
@@ -254,7 +254,7 @@ static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
} else {
if (red_is_idling(&q->parms))
red_end_of_idle_period(&q->parms);
- q->backlog += skb->len;
+ q->backlog += qdisc_tx_len(skb);
}
return qdisc_requeue(skb, sch);
@@ -277,7 +277,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
"VQ 0x%x after dequeue, screwing up "
"backlog.\n", tc_index_to_dp(skb));
} else {
- q->backlog -= skb->len;
+ q->backlog -= qdisc_tx_len(skb);
if (!q->backlog && !gred_wred_mode(t))
red_start_of_idle_period(&q->parms);
@@ -299,7 +299,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
skb = qdisc_dequeue_tail(sch);
if (skb) {
- unsigned int len = skb->len;
+ unsigned int len = qdisc_tx_len(skb);
struct gred_sched_data *q;
u16 dp = tc_index_to_dp(skb);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 0ded6d0..76f8278 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,7 +895,7 @@ qdisc_peek_len(struct Qdisc *sch)
printk("qdisc_peek_len: non work-conserving qdisc ?\n");
return 0;
}
- len = skb->len;
+ len = qdisc_tx_len(skb);
if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
if (net_ratelimit())
printk("qdisc_peek_len: failed to requeue\n");
@@ -1574,7 +1574,6 @@ static int
hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct hfsc_class *cl;
- unsigned int len;
int err;
cl = hfsc_classify(skb, sch, &err);
@@ -1585,7 +1584,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
}
- len = skb->len;
err = qdisc_enqueue(skb, cl->qdisc);
if (unlikely(err != NET_XMIT_SUCCESS)) {
cl->qstats.drops++;
@@ -1594,12 +1592,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
if (cl->qdisc->q.qlen == 1)
- set_active(cl, len);
+ set_active(cl, qdisc_tx_len(skb));
cl->bstats.packets++;
- cl->bstats.bytes += len;
+ cl->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
- sch->bstats.bytes += len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -1649,9 +1647,9 @@ hfsc_dequeue(struct Qdisc *sch)
return NULL;
}
- update_vf(cl, skb->len, cur_time);
+ update_vf(cl, qdisc_tx_len(skb), cur_time);
if (realtime)
- cl->cl_cumul += skb->len;
+ cl->cl_cumul += qdisc_tx_len(skb);
if (cl->qdisc->q.qlen != 0) {
if (cl->cl_flags & HFSC_RSC) {
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5fc0325..5e1dcf8 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -579,13 +579,13 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
} else {
cl->bstats.packets +=
skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
- cl->bstats.bytes += skb->len;
+ cl->bstats.bytes += qdisc_tx_len(skb);
htb_activate(q, cl);
}
sch->q.qlen++;
sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
return NET_XMIT_SUCCESS;
}
@@ -642,7 +642,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
int level, struct sk_buff *skb)
{
- int bytes = skb->len;
+ int bytes = qdisc_tx_len(skb);
long toks, diff;
enum htb_cmode old_mode;
@@ -855,7 +855,8 @@ next:
} while (cl != start);
if (likely(skb != NULL)) {
- if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
+ cl->un.leaf.deficit[level] -= qdisc_tx_len(skb);
+ if (cl->un.leaf.deficit[level] < 0) {
cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
ptr[0]) + prio);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index cbdbc6a..bc9d6af 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -236,7 +236,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (likely(ret == NET_XMIT_SUCCESS)) {
sch->q.qlen++;
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
} else
sch->qstats.drops++;
@@ -477,8 +477,8 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
__skb_queue_after(list, skb, nskb);
- sch->qstats.backlog += nskb->len;
- sch->bstats.bytes += nskb->len;
+ sch->qstats.backlog += qdisc_tx_len(nskb);
+ sch->bstats.bytes += qdisc_tx_len(nskb);
sch->bstats.packets++;
return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 918d090..b5889e8 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -88,7 +88,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
ret = qdisc_enqueue(skb, qdisc);
if (ret == NET_XMIT_SUCCESS) {
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
sch->q.qlen++;
return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index b48a391..6f34e80 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -94,7 +94,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
ret = qdisc_enqueue(skb, child);
if (likely(ret == NET_XMIT_SUCCESS)) {
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
sch->q.qlen++;
} else {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8458f63..ca3876b 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -245,7 +245,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
if (d > 1) {
sfq_index x = q->dep[d + SFQ_DEPTH].next;
skb = q->qs[x].prev;
- len = skb->len;
+ len = qdisc_tx_len(skb);
__skb_unlink(skb, &q->qs[x]);
kfree_skb(skb);
sfq_dec(q, x);
@@ -261,7 +261,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
q->next[q->tail] = q->next[d];
q->allot[q->next[d]] += q->quantum;
skb = q->qs[d].prev;
- len = skb->len;
+ len = qdisc_tx_len(skb);
__skb_unlink(skb, &q->qs[d]);
kfree_skb(skb);
sfq_dec(q, d);
@@ -305,7 +305,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (q->qs[x].qlen >= q->limit)
return qdisc_drop(skb, sch);
- sch->qstats.backlog += skb->len;
+ sch->qstats.backlog += qdisc_tx_len(skb);
__skb_queue_tail(&q->qs[x], skb);
sfq_inc(q, x);
if (q->qs[x].qlen == 1) { /* The flow is new */
@@ -320,7 +320,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
if (++sch->q.qlen <= q->limit) {
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
return 0;
}
@@ -352,7 +352,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
q->hash[x] = hash;
}
- sch->qstats.backlog += skb->len;
+ sch->qstats.backlog += qdisc_tx_len(skb);
__skb_queue_head(&q->qs[x], skb);
/* If selected queue has length q->limit+1, this means that
* all another queues are empty and we do simple tail drop.
@@ -363,7 +363,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
skb = q->qs[x].prev;
__skb_unlink(skb, &q->qs[x]);
sch->qstats.drops++;
- sch->qstats.backlog -= skb->len;
+ sch->qstats.backlog -= qdisc_tx_len(skb);
kfree_skb(skb);
return NET_XMIT_CN;
}
@@ -411,7 +411,7 @@ sfq_dequeue(struct Qdisc *sch)
skb = __skb_dequeue(&q->qs[a]);
sfq_dec(q, a);
sch->q.qlen--;
- sch->qstats.backlog -= skb->len;
+ sch->qstats.backlog -= qdisc_tx_len(skb);
/* Is the slot empty? */
if (q->qs[a].qlen == 0) {
@@ -423,7 +423,7 @@ sfq_dequeue(struct Qdisc *sch)
}
q->next[q->tail] = a;
q->allot[a] += q->quantum;
- } else if ((q->allot[a] -= skb->len) <= 0) {
+ } else if ((q->allot[a] -= qdisc_tx_len(skb)) <= 0) {
q->tail = a;
a = q->next[a];
q->allot[a] += q->quantum;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7d705b8..1e3d52e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -123,7 +123,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
struct tbf_sched_data *q = qdisc_priv(sch);
int ret;
- if (skb->len > q->max_size) {
+ /* qdisc_tx_len() before qdisc_enqueue() wrapper, might return different
+ * length than after wrapper. Should recalculate tx_len here if q->qdisc
+ * has size table? */
+ if (qdisc_tx_len(skb) > q->max_size) {
sch->qstats.drops++;
#ifdef CONFIG_NET_CLS_ACT
if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
@@ -140,7 +143,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
}
sch->q.qlen++;
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
return 0;
}
@@ -181,7 +184,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
psched_time_t now;
long toks;
long ptoks = 0;
- unsigned int len = skb->len;
+ unsigned int len = qdisc_tx_len(skb);
now = psched_get_time();
toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 8ac0598..2b25275 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -83,7 +83,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
if (q->q.qlen < dev->tx_queue_len) {
__skb_queue_tail(&q->q, skb);
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_tx_len(skb);
sch->bstats.packets++;
return 0;
}
@@ -270,7 +270,6 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
struct Qdisc *start, *q;
int busy;
int nores;
- int len = skb->len;
int subq = skb_get_queue_mapping(skb);
struct sk_buff *skb_res = NULL;
@@ -305,7 +304,8 @@ restart:
master->slaves = NEXT_SLAVE(q);
netif_wake_queue(dev);
master->stats.tx_packets++;
- master->stats.tx_bytes += len;
+ master->stats.tx_bytes +=
+ qdisc_tx_len(skb);
return 0;
}
netif_tx_unlock(slave);
[-- Attachment #4: 03-add-size-table-for-qdiscs.diff --]
[-- Type: text/x-patch, Size: 11014 bytes --]
net_sched: Add size table for qdiscs
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Add size table functions for qdiscs and calculate packet size in
qdisc_enqueue().
Based on patch by Patrick McHardy
http://marc.info/?l=linux-netdev&m=115201979221729&w=2
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
include/linux/pkt_sched.h | 21 ++++++++
include/linux/rtnetlink.h | 1
include/net/pkt_sched.h | 1
include/net/sch_generic.h | 30 +++++++++++-
net/core/dev.c | 1
net/sched/sch_api.c | 117 ++++++++++++++++++++++++++++++++++++++++++++-
net/sched/sch_generic.c | 1
net/sched/sch_netem.c | 3 +
net/sched/sch_tbf.c | 6 +-
9 files changed, 174 insertions(+), 7 deletions(-)
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index dbb7ac3..eae53bf 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -85,6 +85,27 @@ struct tc_ratespec
#define TC_RTAB_SIZE 1024
+struct tc_sizespec {
+ unsigned char cell_log;
+ unsigned char size_log;
+ short cell_align;
+ int overhead;
+ unsigned linklayer;
+ unsigned mpu;
+ unsigned mtu;
+};
+
+#define TC_STAB_DATA_SIZE 1024
+
+enum {
+ TCA_STAB_UNSPEC,
+ TCA_STAB_BASE,
+ TCA_STAB_DATA,
+ __TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
/* FIFO section */
struct tc_fifo_qopt
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index b358c70..f4d386c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -482,6 +482,7 @@ enum
TCA_RATE,
TCA_FCNT,
TCA_STATS2,
+ TCA_STAB,
__TCA_MAX
};
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index d58c1a5..7a8a2a0 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -83,6 +83,7 @@ extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
struct nlattr *tab);
extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
+extern void qdisc_put_stab(struct qdisc_size_table *tab);
extern void __qdisc_run(struct netdev_queue *txq);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1f1de3b..81805d0 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,6 +23,13 @@ struct qdisc_rate_table
int refcnt;
};
+struct qdisc_size_table {
+ struct list_head list;
+ struct tc_sizespec szopts;
+ int refcnt;
+ u16 data[512];
+};
+
struct Qdisc
{
int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
@@ -33,6 +40,7 @@ struct Qdisc
#define TCQ_F_INGRESS 4
int padded;
struct Qdisc_ops *ops;
+ struct qdisc_size_table *stab;
u32 handle;
u32 parent;
atomic_t refcnt;
@@ -154,6 +162,16 @@ struct tcf_proto
struct tcf_proto_ops *ops;
};
+struct qdisc_skb_cb {
+ unsigned int tx_len;
+ char data[];
+};
+
+static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
+{
+ return (struct qdisc_skb_cb *)skb->cb;
+}
+
static inline struct net_device *qdisc_dev(struct Qdisc *qdisc)
{
return qdisc->dev_queue->dev;
@@ -224,6 +242,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
struct netdev_queue *dev_queue,
struct Qdisc_ops *ops, u32 parentid);
+extern void qdisc_calculate_tx_len(struct sk_buff *skb,
+ struct qdisc_size_table *stab);
extern void tcf_destroy(struct tcf_proto *tp);
extern void tcf_destroy_chain(struct tcf_proto **fl);
@@ -258,13 +278,21 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
return (txq->qdisc == &noop_qdisc);
}
+static inline void qdisc_root_init_tx_len(struct sk_buff *skb,
+ struct Qdisc *sch)
+{
+ qdisc_skb_cb(skb)->tx_len = skb->len;
+}
+
static inline unsigned int qdisc_tx_len(struct sk_buff *skb)
{
- return skb->len;
+ return qdisc_skb_cb(skb)->tx_len;
}
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
+ if (sch->stab)
+ qdisc_calculate_tx_len(skb, sch->stab);
return sch->enqueue(skb, sch);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index ee2df34..8223b56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1733,6 +1733,7 @@ gso:
if (q->enqueue) {
/* reset queue_mapping to zero */
skb_set_queue_mapping(skb, 0);
+ qdisc_root_init_tx_len(skb, q);
rc = qdisc_enqueue(skb, q);
qdisc_run(txq);
spin_unlock(&txq->lock);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 95873f8..4d98cb7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -278,6 +278,97 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
}
EXPORT_SYMBOL(qdisc_put_rtab);
+static LIST_HEAD(qdisc_stab_list);
+
+static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
+ [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
+ [TCA_STAB_DATA] = { .type = NLA_BINARY, .len = TC_STAB_DATA_SIZE },
+};
+
+static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, int *err)
+{
+ struct nlattr *tb[TCA_STAB_MAX + 1];
+ struct qdisc_size_table *stab;
+ struct tc_sizespec *s;
+ u16 *tab;
+
+ *err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
+ if (*err < 0)
+ return NULL;
+
+ s = nla_data(tb[TCA_STAB_BASE]);
+ tab = nla_data(tb[TCA_STAB_DATA]);
+
+ *err = -EINVAL;
+ if (!s || !tab || nla_len(tb[TCA_STAB_DATA]) < TC_STAB_DATA_SIZE)
+ return NULL;
+
+ list_for_each_entry(stab, &qdisc_stab_list, list) {
+ if (memcmp(&stab->szopts, s, sizeof(*s)) == 0 &&
+ memcmp(stab->data, tab, TC_STAB_DATA_SIZE) == 0) {
+ stab->refcnt++;
+ *err = 0;
+ return stab;
+ }
+ }
+
+ *err = -ENOMEM;
+ stab = kmalloc(sizeof(*stab), GFP_KERNEL);
+ if (stab) {
+ stab->szopts = *s;
+ stab->refcnt = 1;
+ memcpy(stab->data, tab, TC_STAB_DATA_SIZE);
+ list_add_tail(&stab->list, &qdisc_stab_list);
+ *err = 0;
+ }
+ return stab;
+}
+
+void qdisc_put_stab(struct qdisc_size_table *tab)
+{
+ if (!tab || --tab->refcnt)
+ return;
+ list_del(&tab->list);
+ kfree(tab);
+}
+EXPORT_SYMBOL(qdisc_put_stab);
+
+static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, TCA_STAB);
+ NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
+ NLA_PUT(skb, TCA_STAB_DATA, TC_STAB_DATA_SIZE, stab->data);
+ nla_nest_end(skb, nest);
+
+ return skb->len;
+
+nla_put_failure:
+ return -1;
+}
+
+void qdisc_calculate_tx_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+ unsigned int pktlen = skb->len;
+ unsigned char size_log;
+ int slot;
+
+ slot = pktlen + stab->szopts.cell_align + stab->szopts.overhead;
+ if (unlikely(slot < 0))
+ slot = 0;
+ slot >>= stab->szopts.cell_log;
+ size_log = stab->szopts.size_log;
+ if (unlikely(slot > 511))
+ pktlen = ((u32)stab->data[511] << size_log) * (slot >> 9) +
+ ((u32)stab->data[slot & 0x1FF] << size_log);
+ else
+ pktlen = (u32)stab->data[slot] << size_log;
+
+ qdisc_skb_cb(skb)->tx_len = pktlen;
+}
+EXPORT_SYMBOL(qdisc_calculate_tx_len);
+
static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
{
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
@@ -619,6 +710,11 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
sch->handle = handle;
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
+ if (tca[TCA_STAB]) {
+ sch->stab = qdisc_get_stab(tca[TCA_STAB], &err);
+ if (sch->stab == NULL)
+ goto err_out3;
+ }
if (tca[TCA_RATE]) {
err = gen_new_estimator(&sch->bstats, &sch->rate_est,
&sch->dev_queue->lock,
@@ -641,6 +737,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
return sch;
}
err_out3:
+ qdisc_put_stab(sch->stab);
dev_put(dev);
kfree((char *) sch - sch->padded);
err_out2:
@@ -652,15 +749,28 @@ err_out:
static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
{
- if (tca[TCA_OPTIONS]) {
- int err;
+ struct qdisc_size_table *stab = NULL;
+ int err = 0;
+ if (tca[TCA_OPTIONS]) {
if (sch->ops->change == NULL)
return -EINVAL;
err = sch->ops->change(sch, tca[TCA_OPTIONS]);
if (err)
return err;
}
+
+ if (tca[TCA_STAB]) {
+ stab = qdisc_get_stab(tca[TCA_STAB], &err);
+ if (stab == NULL)
+ return err;
+ }
+
+ spin_lock_bh(&sch->dev_queue->lock);
+ qdisc_put_stab(sch->stab);
+ sch->stab = stab;
+ spin_unlock_bh(&sch->dev_queue->lock);
+
if (tca[TCA_RATE])
gen_replace_estimator(&sch->bstats, &sch->rate_est,
&sch->dev_queue->lock, tca[TCA_RATE]);
@@ -952,6 +1062,9 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
q->qstats.qlen = q->q.qlen;
+ if (q->stab != NULL && qdisc_dump_stab(skb, q->stab) < 0)
+ goto nla_put_failure;
+
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
TCA_XSTATS, &q->dev_queue->lock, &d) < 0)
goto nla_put_failure;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 243de93..53e941b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -530,6 +530,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
return;
list_del(&qdisc->list);
+ qdisc_put_stab(qdisc->stab);
gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
if (ops->reset)
ops->reset(qdisc);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index bc9d6af..f75ba82 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -84,7 +84,7 @@ struct netem_skb_cb {
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
{
- return (struct netem_skb_cb *)skb->cb;
+ return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
}
/* init_crandom - initialize correlated random number generator
@@ -189,6 +189,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
q->duplicate = 0;
+ qdisc_root_init_tx_len(skb2, rootq);
qdisc_enqueue(skb2, rootq);
q->duplicate = dupsave;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 1e3d52e..7f7a626 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -123,9 +123,9 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
struct tbf_sched_data *q = qdisc_priv(sch);
int ret;
- /* qdisc_tx_len() before qdisc_enqueue() wrapper, might return different
- * length than after wrapper. Should recalculate tx_len here if q->qdisc
- * has size table? */
+ if (q->qdisc->stab)
+ qdisc_calculate_tx_len(skb, sch->stab);
+
if (qdisc_tx_len(skb) > q->max_size) {
sch->qstats.drops++;
#ifdef CONFIG_NET_CLS_ACT
[-- Attachment #5: 01-iproute-add-size-table.diff --]
[-- Type: text/x-patch, Size: 11346 bytes --]
[iproute2/tc] tc_core: add size table
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Patch adds generic size table that is similiar to rate table, with
difference that size table stores link layer packet size.
Based on patch by Patrick McHardy
http://marc.info/?l=linux-netdev&m=115201979221729&w=2
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
include/linux/pkt_sched.h | 21 +++++++++++++++
include/linux/rtnetlink.h | 1 +
tc/Makefile | 1 +
tc/tc_class.c | 1 +
tc/tc_common.h | 5 ++++
tc/tc_core.c | 64 ++++++++++++++++++++++++++++++++++-----------
tc/tc_core.h | 6 +++-
tc/tc_qdisc.c | 32 +++++++++++++++++++++++
tc/tc_util.c | 26 ++++++++++++++++++
tc/tc_util.h | 2 +
10 files changed, 141 insertions(+), 18 deletions(-)
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index dbb7ac3..eae53bf 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -85,6 +85,27 @@ struct tc_ratespec
#define TC_RTAB_SIZE 1024
+struct tc_sizespec {
+ unsigned char cell_log;
+ unsigned char size_log;
+ short cell_align;
+ int overhead;
+ unsigned linklayer;
+ unsigned mpu;
+ unsigned mtu;
+};
+
+#define TC_STAB_DATA_SIZE 1024
+
+enum {
+ TCA_STAB_UNSPEC,
+ TCA_STAB_BASE,
+ TCA_STAB_DATA,
+ __TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
/* FIFO section */
struct tc_fifo_qopt
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c1f2d50..a125692 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -482,6 +482,7 @@ enum
TCA_RATE,
TCA_FCNT,
TCA_STATS2,
+ TCA_STAB,
__TCA_MAX
};
diff --git a/tc/Makefile b/tc/Makefile
index bf2df00..a5ac841 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -45,6 +45,7 @@ TCLIB := tc_core.o
TCLIB += tc_red.o
TCLIB += tc_cbq.o
TCLIB += tc_estimator.o
+TCLIB += tc_stab.o
CFLAGS += -DCONFIG_GACT -DCONFIG_GACT_PROB
diff --git a/tc/tc_class.c b/tc/tc_class.c
index 774497a..6cf19d4 100644
--- a/tc/tc_class.c
+++ b/tc/tc_class.c
@@ -31,6 +31,7 @@ static void usage(void)
{
fprintf(stderr, "Usage: tc class [ add | del | change | replace | show ] dev STRING\n");
fprintf(stderr, " [ classid CLASSID ] [ root | parent CLASSID ]\n");
+ fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n");
fprintf(stderr, " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n");
fprintf(stderr, "\n");
fprintf(stderr, " tc class show [ dev STRING ] [ root | parent CLASSID ]\n");
diff --git a/tc/tc_common.h b/tc/tc_common.h
index e01b037..4f88856 100644
--- a/tc/tc_common.h
+++ b/tc/tc_common.h
@@ -11,6 +11,11 @@ extern int print_action(const struct sockaddr_nl *who, struct nlmsghdr *n, void
extern int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int print_qdisc(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int print_class(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
+extern void print_size_table(FILE *fp, const char *prefix, struct rtattr *rta);
struct tc_estimator;
extern int parse_estimator(int *p_argc, char ***p_argv, struct tc_estimator *est);
+
+struct tc_sizespec;
+extern int parse_size_table(int *p_argc, char ***p_argv, struct tc_sizespec *s);
+extern int check_size_table_opts(struct tc_sizespec *s);
diff --git a/tc/tc_core.c b/tc/tc_core.c
index 855c115..dd7885c 100644
--- a/tc/tc_core.c
+++ b/tc/tc_core.c
@@ -87,6 +87,21 @@ unsigned tc_align_to_atm(unsigned size)
return linksize;
}
+unsigned tc_adjust_size(unsigned sz, unsigned mpu, enum link_layer linklayer)
+{
+ if (sz < mpu)
+ sz = mpu;
+
+ switch (linklayer) {
+ case LINKLAYER_ATM:
+ return tc_align_to_atm(sz);
+ case LINKLAYER_ETHERNET:
+ default:
+ // No size adjustments on Ethernet
+ return sz;
+ }
+}
+
/*
rtab[pkt_len>>cell_log] = pkt_xmit_time
*/
@@ -96,6 +111,7 @@ int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
enum link_layer linklayer)
{
int i;
+ unsigned sz;
unsigned bps = r->rate;
unsigned mpu = r->mpu;
@@ -109,21 +125,7 @@ int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
}
for (i=0; i<256; i++) {
- unsigned sz = (i+1)<<cell_log;
- if (sz < mpu)
- sz = mpu;
-
- switch (linklayer) {
- case LINKLAYER_ATM:
- sz = tc_align_to_atm(sz);
- break;
- case LINKLAYER_ETHERNET:
- // No size adjustments on Ethernet
- break;
- default:
- break;
- }
-
+ sz = tc_adjust_size((i + 1) << cell_log, mpu, linklayer);
rtab[i] = tc_calc_xmittime(bps, sz);
}
@@ -132,6 +134,38 @@ int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
return cell_log;
}
+/*
+ stab[pkt_len>>cell_log] = pkt_xmit_size>>size_log
+ */
+
+int tc_calc_size_table(struct tc_sizespec *s, __u16 *stab)
+{
+ int i;
+ enum link_layer linklayer = s->linklayer;
+ unsigned mtu = s->mtu;
+ unsigned sz;
+
+ if (mtu == 0)
+ mtu = 2047;
+
+ s->cell_log = 0;
+ while ((mtu >> s->cell_log) > 512 - 1)
+ s->cell_log++;
+
+again:
+ for (i = 512 - 1; i >= 0; i--) {
+ sz = tc_adjust_size((i + 1) << s->cell_log, s->mpu, linklayer);
+ if ((sz >> s->size_log) > UINT16_MAX) {
+ s->size_log++;
+ goto again;
+ }
+ stab[i] = sz >> s->size_log;
+ }
+
+ s->cell_align = -1; // Due to the sz calc
+ return s->cell_log;
+}
+
int tc_core_init()
{
FILE *fp;
diff --git a/tc/tc_core.h b/tc/tc_core.h
index 9f835e8..b82d2b8 100644
--- a/tc/tc_core.h
+++ b/tc/tc_core.h
@@ -7,8 +7,9 @@
#define TIME_UNITS_PER_SEC 1000000
enum link_layer {
- LINKLAYER_ETHERNET=1,
- LINKLAYER_ATM =2,
+ LINKLAYER_UNSPEC,
+ LINKLAYER_ETHERNET,
+ LINKLAYER_ATM,
};
@@ -21,6 +22,7 @@ unsigned tc_calc_xmittime(unsigned rate, unsigned size);
unsigned tc_calc_xmitsize(unsigned rate, unsigned ticks);
int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
int cell_log, unsigned mtu, enum link_layer link_layer);
+int tc_calc_size_table(struct tc_sizespec *s, __u16 *stab);
int tc_setup_estimator(unsigned A, unsigned time_const, struct tc_estimator *est);
diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c
index 1256f07..60388a4 100644
--- a/tc/tc_qdisc.c
+++ b/tc/tc_qdisc.c
@@ -32,12 +32,14 @@ static int usage(void)
fprintf(stderr, "Usage: tc qdisc [ add | del | replace | change | show ] dev STRING\n");
fprintf(stderr, " [ handle QHANDLE ] [ root | ingress | parent CLASSID ]\n");
fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n");
+ fprintf(stderr, " [ stab [ help | STAB_OPTIONS] ]\n");
fprintf(stderr, " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n");
fprintf(stderr, "\n");
fprintf(stderr, " tc qdisc show [ dev STRING ] [ingress]\n");
fprintf(stderr, "Where:\n");
fprintf(stderr, "QDISC_KIND := { [p|b]fifo | tbf | prio | cbq | red | etc. }\n");
fprintf(stderr, "OPTIONS := ... try tc qdisc add <desired QDISC_KIND> help\n");
+ fprintf(stderr, "STAB_OPTIONS := ... try tc qdisc add stab help\n");
return -1;
}
@@ -45,6 +47,10 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
{
struct qdisc_util *q = NULL;
struct tc_estimator est;
+ struct {
+ struct tc_sizespec szopts;
+ __u16 data[512];
+ } stab;
char d[16];
char k[16];
struct {
@@ -54,6 +60,7 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
} req;
memset(&req, 0, sizeof(req));
+ memset(&stab, 0, sizeof(stab));
memset(&est, 0, sizeof(est));
memset(&d, 0, sizeof(d));
memset(&k, 0, sizeof(k));
@@ -108,6 +115,10 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
} else if (matches(*argv, "estimator") == 0) {
if (parse_estimator(&argc, &argv, &est))
return -1;
+ } else if (matches(*argv, "stab") == 0) {
+ if (parse_size_table(&argc, &argv, &stab.szopts) < 0)
+ return -1;
+ continue;
} else if (matches(*argv, "help") == 0) {
usage();
} else {
@@ -142,6 +153,23 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
}
}
+ if (check_size_table_opts(&stab.szopts)) {
+ struct rtattr *tail;
+
+ if (tc_calc_size_table(&stab.szopts, stab.data) < 0) {
+ fprintf(stderr, "failed to calculate size table.\n");
+ return -1;
+ }
+
+ tail = NLMSG_TAIL(&req.n);
+ addattr_l(&req.n, sizeof(req), TCA_STAB, NULL, 0);
+ addattr_l(&req.n, sizeof(req), TCA_STAB_BASE, &stab.szopts,
+ sizeof(stab.szopts));
+ addattr_l(&req.n, sizeof(req), TCA_STAB_DATA, stab.data,
+ TC_STAB_DATA_SIZE);
+ tail->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail;
+ }
+
if (d[0]) {
int idx;
@@ -223,6 +251,10 @@ int print_qdisc(const struct sockaddr_nl *who,
fprintf(fp, "[cannot parse qdisc parameters]");
}
fprintf(fp, "\n");
+ if (tb[TCA_STAB]) {
+ print_size_table(fp, " ", tb[TCA_STAB]);
+ fprintf(fp, "\n");
+ }
if (show_stats) {
struct rtattr *xstats = NULL;
diff --git a/tc/tc_util.c b/tc/tc_util.c
index cd9dd59..8ec8ec2 100644
--- a/tc/tc_util.c
+++ b/tc/tc_util.c
@@ -435,7 +435,7 @@ int action_a2n(char *arg, int *result)
return 0;
}
-int get_linklayer(unsigned int *val, const char *arg)
+int get_linklayer(unsigned *val, const char *arg)
{
int res;
@@ -452,6 +452,30 @@ int get_linklayer(unsigned int *val, const char *arg)
return 0;
}
+void print_linklayer(char *buf, int len, unsigned linklayer)
+{
+ switch (linklayer) {
+ case LINKLAYER_UNSPEC:
+ snprintf(buf, len, "%s", "unspec");
+ return;
+ case LINKLAYER_ETHERNET:
+ snprintf(buf, len, "%s", "ethernet");
+ return;
+ case LINKLAYER_ATM:
+ snprintf(buf, len, "%s", "atm");
+ return;
+ default:
+ snprintf(buf, len, "%s", "unknown");
+ return;
+ }
+}
+
+char *sprint_linklayer(unsigned linklayer, char *buf)
+{
+ print_linklayer(buf, SPRINT_BSIZE-1, linklayer);
+ return buf;
+}
+
void print_tm(FILE * f, const struct tcf_t *tm)
{
int hz = get_user_hz();
diff --git a/tc/tc_util.h b/tc/tc_util.h
index 796da54..c4a386c 100644
--- a/tc/tc_util.h
+++ b/tc/tc_util.h
@@ -57,6 +57,7 @@ extern void print_size(char *buf, int len, __u32 size);
extern void print_percent(char *buf, int len, __u32 percent);
extern void print_qdisc_handle(char *buf, int len, __u32 h);
extern void print_time(char *buf, int len, __u32 time);
+extern void print_linklayer(char *buf, int len, unsigned linklayer);
extern char * sprint_rate(__u32 rate, char *buf);
extern char * sprint_size(__u32 size, char *buf);
extern char * sprint_qdisc_handle(__u32 h, char *buf);
@@ -64,6 +65,7 @@ extern char * sprint_tc_classid(__u32 h, char *buf);
extern char * sprint_time(__u32 time, char *buf);
extern char * sprint_ticks(__u32 ticks, char *buf);
extern char * sprint_percent(__u32 percent, char *buf);
+extern char * sprint_linklayer(unsigned linklayer, char *buf);
extern void print_tcstats_attr(FILE *fp, struct rtattr *tb[], char *prefix, struct rtattr **xstats);
extern void print_tcstats2_attr(FILE *fp, struct rtattr *rta, char *prefix, struct rtattr **xstats);
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH RFC] net_sched: add generic qdisc size table
2008-07-16 23:40 ` [PATCH RFC] net_sched: add generic qdisc size table Jussi Kivilinna
@ 2008-07-17 0:32 ` Jussi Kivilinna
2008-07-17 9:39 ` Patrick McHardy
1 sibling, 0 replies; 9+ messages in thread
From: Jussi Kivilinna @ 2008-07-17 0:32 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netdev
[-- Attachment #1: Type: text/plain, Size: 267 bytes --]
Quoting "Jussi Kivilinna" <jussi.kivilinna@mbnet.fi>:
>
> Here's first version of generic size table feature. Didn't get chance
> to test too much yet but hfsc&sfq seems to be ok.
>
> - Jussi
>
Resending iproute2/tc patch, as it was missing one file.
- Jussi
[-- Attachment #2: 01-iproute-add-size-table.diff --]
[-- Type: text/x-patch, Size: 15452 bytes --]
[iproute2/tc] tc_core: add size table
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Patch adds generic size table that is similiar to rate table, with
difference that size table stores link layer packet size.
Based on patch by Patrick McHardy
http://marc.info/?l=linux-netdev&m=115201979221729&w=2
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
---
include/linux/pkt_sched.h | 21 ++++++
include/linux/rtnetlink.h | 1
tc/Makefile | 1
tc/tc_class.c | 1
tc/tc_common.h | 5 ++
tc/tc_core.c | 64 +++++++++++++++----
tc/tc_core.h | 6 +-
tc/tc_qdisc.c | 32 ++++++++++
tc/tc_stab.c | 149 +++++++++++++++++++++++++++++++++++++++++++++
tc/tc_util.c | 26 ++++++++
tc/tc_util.h | 2 +
11 files changed, 290 insertions(+), 18 deletions(-)
create mode 100644 tc/tc_stab.c
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index dbb7ac3..eae53bf 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -85,6 +85,27 @@ struct tc_ratespec
#define TC_RTAB_SIZE 1024
+struct tc_sizespec {
+ unsigned char cell_log;
+ unsigned char size_log;
+ short cell_align;
+ int overhead;
+ unsigned linklayer;
+ unsigned mpu;
+ unsigned mtu;
+};
+
+#define TC_STAB_DATA_SIZE 1024
+
+enum {
+ TCA_STAB_UNSPEC,
+ TCA_STAB_BASE,
+ TCA_STAB_DATA,
+ __TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
/* FIFO section */
struct tc_fifo_qopt
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c1f2d50..a125692 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -482,6 +482,7 @@ enum
TCA_RATE,
TCA_FCNT,
TCA_STATS2,
+ TCA_STAB,
__TCA_MAX
};
diff --git a/tc/Makefile b/tc/Makefile
index bf2df00..a5ac841 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -45,6 +45,7 @@ TCLIB := tc_core.o
TCLIB += tc_red.o
TCLIB += tc_cbq.o
TCLIB += tc_estimator.o
+TCLIB += tc_stab.o
CFLAGS += -DCONFIG_GACT -DCONFIG_GACT_PROB
diff --git a/tc/tc_class.c b/tc/tc_class.c
index 774497a..6cf19d4 100644
--- a/tc/tc_class.c
+++ b/tc/tc_class.c
@@ -31,6 +31,7 @@ static void usage(void)
{
fprintf(stderr, "Usage: tc class [ add | del | change | replace | show ] dev STRING\n");
fprintf(stderr, " [ classid CLASSID ] [ root | parent CLASSID ]\n");
+ fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n");
fprintf(stderr, " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n");
fprintf(stderr, "\n");
fprintf(stderr, " tc class show [ dev STRING ] [ root | parent CLASSID ]\n");
diff --git a/tc/tc_common.h b/tc/tc_common.h
index e01b037..4f88856 100644
--- a/tc/tc_common.h
+++ b/tc/tc_common.h
@@ -11,6 +11,11 @@ extern int print_action(const struct sockaddr_nl *who, struct nlmsghdr *n, void
extern int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int print_qdisc(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int print_class(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
+extern void print_size_table(FILE *fp, const char *prefix, struct rtattr *rta);
struct tc_estimator;
extern int parse_estimator(int *p_argc, char ***p_argv, struct tc_estimator *est);
+
+struct tc_sizespec;
+extern int parse_size_table(int *p_argc, char ***p_argv, struct tc_sizespec *s);
+extern int check_size_table_opts(struct tc_sizespec *s);
diff --git a/tc/tc_core.c b/tc/tc_core.c
index 855c115..dd7885c 100644
--- a/tc/tc_core.c
+++ b/tc/tc_core.c
@@ -87,6 +87,21 @@ unsigned tc_align_to_atm(unsigned size)
return linksize;
}
+unsigned tc_adjust_size(unsigned sz, unsigned mpu, enum link_layer linklayer)
+{
+ if (sz < mpu)
+ sz = mpu;
+
+ switch (linklayer) {
+ case LINKLAYER_ATM:
+ return tc_align_to_atm(sz);
+ case LINKLAYER_ETHERNET:
+ default:
+ // No size adjustments on Ethernet
+ return sz;
+ }
+}
+
/*
rtab[pkt_len>>cell_log] = pkt_xmit_time
*/
@@ -96,6 +111,7 @@ int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
enum link_layer linklayer)
{
int i;
+ unsigned sz;
unsigned bps = r->rate;
unsigned mpu = r->mpu;
@@ -109,21 +125,7 @@ int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
}
for (i=0; i<256; i++) {
- unsigned sz = (i+1)<<cell_log;
- if (sz < mpu)
- sz = mpu;
-
- switch (linklayer) {
- case LINKLAYER_ATM:
- sz = tc_align_to_atm(sz);
- break;
- case LINKLAYER_ETHERNET:
- // No size adjustments on Ethernet
- break;
- default:
- break;
- }
-
+ sz = tc_adjust_size((i + 1) << cell_log, mpu, linklayer);
rtab[i] = tc_calc_xmittime(bps, sz);
}
@@ -132,6 +134,38 @@ int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
return cell_log;
}
+/*
+ stab[pkt_len>>cell_log] = pkt_xmit_size>>size_log
+ */
+
+int tc_calc_size_table(struct tc_sizespec *s, __u16 *stab)
+{
+ int i;
+ enum link_layer linklayer = s->linklayer;
+ unsigned mtu = s->mtu;
+ unsigned sz;
+
+ if (mtu == 0)
+ mtu = 2047;
+
+ s->cell_log = 0;
+ while ((mtu >> s->cell_log) > 512 - 1)
+ s->cell_log++;
+
+again:
+ for (i = 512 - 1; i >= 0; i--) {
+ sz = tc_adjust_size((i + 1) << s->cell_log, s->mpu, linklayer);
+ if ((sz >> s->size_log) > UINT16_MAX) {
+ s->size_log++;
+ goto again;
+ }
+ stab[i] = sz >> s->size_log;
+ }
+
+ s->cell_align = -1; // Due to the sz calc
+ return s->cell_log;
+}
+
int tc_core_init()
{
FILE *fp;
diff --git a/tc/tc_core.h b/tc/tc_core.h
index 9f835e8..b82d2b8 100644
--- a/tc/tc_core.h
+++ b/tc/tc_core.h
@@ -7,8 +7,9 @@
#define TIME_UNITS_PER_SEC 1000000
enum link_layer {
- LINKLAYER_ETHERNET=1,
- LINKLAYER_ATM =2,
+ LINKLAYER_UNSPEC,
+ LINKLAYER_ETHERNET,
+ LINKLAYER_ATM,
};
@@ -21,6 +22,7 @@ unsigned tc_calc_xmittime(unsigned rate, unsigned size);
unsigned tc_calc_xmitsize(unsigned rate, unsigned ticks);
int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
int cell_log, unsigned mtu, enum link_layer link_layer);
+int tc_calc_size_table(struct tc_sizespec *s, __u16 *stab);
int tc_setup_estimator(unsigned A, unsigned time_const, struct tc_estimator *est);
diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c
index 1256f07..60388a4 100644
--- a/tc/tc_qdisc.c
+++ b/tc/tc_qdisc.c
@@ -32,12 +32,14 @@ static int usage(void)
fprintf(stderr, "Usage: tc qdisc [ add | del | replace | change | show ] dev STRING\n");
fprintf(stderr, " [ handle QHANDLE ] [ root | ingress | parent CLASSID ]\n");
fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n");
+ fprintf(stderr, " [ stab [ help | STAB_OPTIONS] ]\n");
fprintf(stderr, " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n");
fprintf(stderr, "\n");
fprintf(stderr, " tc qdisc show [ dev STRING ] [ingress]\n");
fprintf(stderr, "Where:\n");
fprintf(stderr, "QDISC_KIND := { [p|b]fifo | tbf | prio | cbq | red | etc. }\n");
fprintf(stderr, "OPTIONS := ... try tc qdisc add <desired QDISC_KIND> help\n");
+ fprintf(stderr, "STAB_OPTIONS := ... try tc qdisc add stab help\n");
return -1;
}
@@ -45,6 +47,10 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
{
struct qdisc_util *q = NULL;
struct tc_estimator est;
+ struct {
+ struct tc_sizespec szopts;
+ __u16 data[512];
+ } stab;
char d[16];
char k[16];
struct {
@@ -54,6 +60,7 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
} req;
memset(&req, 0, sizeof(req));
+ memset(&stab, 0, sizeof(stab));
memset(&est, 0, sizeof(est));
memset(&d, 0, sizeof(d));
memset(&k, 0, sizeof(k));
@@ -108,6 +115,10 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
} else if (matches(*argv, "estimator") == 0) {
if (parse_estimator(&argc, &argv, &est))
return -1;
+ } else if (matches(*argv, "stab") == 0) {
+ if (parse_size_table(&argc, &argv, &stab.szopts) < 0)
+ return -1;
+ continue;
} else if (matches(*argv, "help") == 0) {
usage();
} else {
@@ -142,6 +153,23 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
}
}
+ if (check_size_table_opts(&stab.szopts)) {
+ struct rtattr *tail;
+
+ if (tc_calc_size_table(&stab.szopts, stab.data) < 0) {
+ fprintf(stderr, "failed to calculate size table.\n");
+ return -1;
+ }
+
+ tail = NLMSG_TAIL(&req.n);
+ addattr_l(&req.n, sizeof(req), TCA_STAB, NULL, 0);
+ addattr_l(&req.n, sizeof(req), TCA_STAB_BASE, &stab.szopts,
+ sizeof(stab.szopts));
+ addattr_l(&req.n, sizeof(req), TCA_STAB_DATA, stab.data,
+ TC_STAB_DATA_SIZE);
+ tail->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail;
+ }
+
if (d[0]) {
int idx;
@@ -223,6 +251,10 @@ int print_qdisc(const struct sockaddr_nl *who,
fprintf(fp, "[cannot parse qdisc parameters]");
}
fprintf(fp, "\n");
+ if (tb[TCA_STAB]) {
+ print_size_table(fp, " ", tb[TCA_STAB]);
+ fprintf(fp, "\n");
+ }
if (show_stats) {
struct rtattr *xstats = NULL;
diff --git a/tc/tc_stab.c b/tc/tc_stab.c
new file mode 100644
index 0000000..9ca9f5e
--- /dev/null
+++ b/tc/tc_stab.c
@@ -0,0 +1,149 @@
+/*
+ * tc_stab.c "tc qdisc ... stab *".
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Jussi Kivilinna, <jussi.kivilinna@mbnet.fi>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+#include "tc_core.h"
+#include "tc_common.h"
+
+static void stab_help(void)
+{
+ fprintf(stderr,
+ "Usage: ... stab [ default CLASSID ] [ mtu BYTES] [ mpu BYTES ]\n"
+ " [ overhead BYTES ] [ linklayer TYPE ] ...\n"
+ " mtu : max packet size we create rate map for {2047}\n"
+ " mpu : minimum packet size used in rate computations\n"
+ " overhead : per-packet size overhead used in rate computations\n"
+ " linklayer : adapting to a linklayer e.g. atm\n"
+ "Example: ... stab overhead 20b linklayer atm\n");
+
+ return;
+}
+
+int check_size_table_opts(struct tc_sizespec *s)
+{
+ return s->linklayer >= LINKLAYER_ETHERNET || s->mpu != 0 ||
+ s->overhead != 0;
+}
+
+int parse_size_table(int *argcp, char ***argvp, struct tc_sizespec *sp)
+{
+ char **argv = *argvp;
+ int argc = *argcp;
+ unsigned linklayer = LINKLAYER_UNSPEC;
+ struct tc_sizespec s;
+
+ memset(&s, 0, sizeof(s));
+
+ NEXT_ARG();
+ if (matches(*argv, "help") == 0) {
+ stab_help();
+ return -1;
+ }
+ while (argc > 0) {
+ if (matches(*argv, "mtu") == 0) {
+ NEXT_ARG();
+ if (s.mtu)
+ duparg("mtu", *argv);
+ if (get_u32(&s.mtu, *argv, 10)) {
+ invarg("mtu", "invalid mtu");
+ return -1;
+ }
+ } else if (matches(*argv, "mpu") == 0) {
+ NEXT_ARG();
+ if (s.mpu)
+ duparg("mpu", *argv);
+ if (get_u32(&s.mpu, *argv, 10)) {
+ invarg("mpu", "invalid mpu");
+ return -1;
+ }
+ } else if (matches(*argv, "overhead") == 0) {
+ NEXT_ARG();
+ if (s.overhead)
+ duparg("overhead", *argv);
+ if (get_integer(&s.overhead, *argv, 10)) {
+ invarg("overhead", "invalid overhead");
+ return -1;
+ }
+ } else if (matches(*argv, "linklayer") == 0) {
+ NEXT_ARG();
+ if (linklayer != LINKLAYER_UNSPEC)
+ duparg("linklayer", *argv);
+ if (get_linklayer(&linklayer, *argv)) {
+ invarg("linklayer", "invalid linklayer");
+ return -1;
+ }
+ } else
+ break;
+ argc--; argv++;
+ }
+
+ if (!check_size_table_opts(&s))
+ return -1;
+
+ s.linklayer = linklayer;
+ *sp = s;
+ *argvp = argv;
+ *argcp = argc;
+ return 0;
+}
+
+void print_size_table(FILE *fp, const char *prefix, struct rtattr *rta)
+{
+ struct rtattr *tb[TCA_STAB_MAX + 1];
+
+ SPRINT_BUF(b1);
+
+ parse_rtattr_nested(tb, TCA_STAB_MAX, rta);
+
+ if (tb[TCA_STAB_BASE]) {
+ struct tc_sizespec s = {0};
+ memcpy(&s, RTA_DATA(tb[TCA_STAB_BASE]), MIN(RTA_PAYLOAD(tb[TCA_STAB_BASE]), sizeof(s)));
+
+ fprintf(fp, "%s", prefix);
+ if (s.mtu)
+ fprintf(fp, "mtu %u ", s.mtu);
+ if (s.mpu)
+ fprintf(fp, "mpu %u ", s.mpu);
+ if (s.overhead)
+ fprintf(fp, "overhead %d ", s.overhead);
+ if (s.linklayer)
+ fprintf(fp, "linklayer %s ", sprint_linklayer(s.linklayer, b1));
+ }
+
+#if 0
+ if (show_details && tb[TCA_STAB_DATA]) {
+ unsigned i, j, dlen;
+ __u16 *data = RTA_DATA(tb[TCA_STAB_DATA]);
+ dlen = RTA_PAYLOAD(tb[TCA_STAB_DATA]) / sizeof(__u16);
+
+ fprintf(fp, "\n%sstab data:", prefix);
+ for (i = 0; i < dlen/12; i++) {
+ fprintf(fp, "\n%s %3u:", prefix, i * 12);
+ for (j = 0; i * 12 + j < dlen; j++)
+ fprintf(fp, " %05x", data[i * 12 + j]);
+ }
+ }
+#endif
+}
+
diff --git a/tc/tc_util.c b/tc/tc_util.c
index cd9dd59..8ec8ec2 100644
--- a/tc/tc_util.c
+++ b/tc/tc_util.c
@@ -435,7 +435,7 @@ int action_a2n(char *arg, int *result)
return 0;
}
-int get_linklayer(unsigned int *val, const char *arg)
+int get_linklayer(unsigned *val, const char *arg)
{
int res;
@@ -452,6 +452,30 @@ int get_linklayer(unsigned int *val, const char *arg)
return 0;
}
+void print_linklayer(char *buf, int len, unsigned linklayer)
+{
+ switch (linklayer) {
+ case LINKLAYER_UNSPEC:
+ snprintf(buf, len, "%s", "unspec");
+ return;
+ case LINKLAYER_ETHERNET:
+ snprintf(buf, len, "%s", "ethernet");
+ return;
+ case LINKLAYER_ATM:
+ snprintf(buf, len, "%s", "atm");
+ return;
+ default:
+ snprintf(buf, len, "%s", "unknown");
+ return;
+ }
+}
+
+char *sprint_linklayer(unsigned linklayer, char *buf)
+{
+ print_linklayer(buf, SPRINT_BSIZE-1, linklayer);
+ return buf;
+}
+
void print_tm(FILE * f, const struct tcf_t *tm)
{
int hz = get_user_hz();
diff --git a/tc/tc_util.h b/tc/tc_util.h
index 796da54..c4a386c 100644
--- a/tc/tc_util.h
+++ b/tc/tc_util.h
@@ -57,6 +57,7 @@ extern void print_size(char *buf, int len, __u32 size);
extern void print_percent(char *buf, int len, __u32 percent);
extern void print_qdisc_handle(char *buf, int len, __u32 h);
extern void print_time(char *buf, int len, __u32 time);
+extern void print_linklayer(char *buf, int len, unsigned linklayer);
extern char * sprint_rate(__u32 rate, char *buf);
extern char * sprint_size(__u32 size, char *buf);
extern char * sprint_qdisc_handle(__u32 h, char *buf);
@@ -64,6 +65,7 @@ extern char * sprint_tc_classid(__u32 h, char *buf);
extern char * sprint_time(__u32 time, char *buf);
extern char * sprint_ticks(__u32 ticks, char *buf);
extern char * sprint_percent(__u32 percent, char *buf);
+extern char * sprint_linklayer(unsigned linklayer, char *buf);
extern void print_tcstats_attr(FILE *fp, struct rtattr *tb[], char *prefix, struct rtattr **xstats);
extern void print_tcstats2_attr(FILE *fp, struct rtattr *rta, char *prefix, struct rtattr **xstats);
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH RFC] net_sched: add generic qdisc size table
2008-07-16 23:40 ` [PATCH RFC] net_sched: add generic qdisc size table Jussi Kivilinna
2008-07-17 0:32 ` Jussi Kivilinna
@ 2008-07-17 9:39 ` Patrick McHardy
1 sibling, 0 replies; 9+ messages in thread
From: Patrick McHardy @ 2008-07-17 9:39 UTC (permalink / raw)
To: Jussi Kivilinna; +Cc: netdev
Jussi Kivilinna wrote:
> Here's first version of generic size table feature. Didn't get chance to
> test too much yet but hfsc&sfq seems to be ok.
Could you please send as one patch per mail? That makes review
and commenting easier.
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2008-07-17 9:39 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-10 19:34 [PATCH net-next-2.6 v4 1/2] net_sched: add size table functions Jussi Kivilinna
2008-07-10 19:34 ` [PATCH net-next-2.6 v4 2/2] hfsc: add link layer overhead adaption Jussi Kivilinna
2008-07-14 14:24 ` Patrick McHardy
2008-07-15 5:11 ` David Miller
2008-07-15 10:52 ` Jussi Kivilinna
2008-07-15 10:55 ` Patrick McHardy
2008-07-16 23:40 ` [PATCH RFC] net_sched: add generic qdisc size table Jussi Kivilinna
2008-07-17 0:32 ` Jussi Kivilinna
2008-07-17 9:39 ` Patrick McHardy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).