diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d10f353..2ce55d5 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -83,6 +83,21 @@ struct tc_ratespec __u32 rate; }; +struct tc_sizespec +{ + unsigned int cell_log; + unsigned int addend; +}; + +enum { + TCA_STAB_UNSPEC, + TCA_STAB_BASE, + TCA_STAB_DATA, + __TCA_STAB_MAX +}; + +#define TCA_STAB_MAX (__TCA_STAB_MAX - 1) + /* FIFO section */ struct tc_fifo_qopt diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index facd9ee..167cc22 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -821,6 +821,7 @@ enum TCA_RATE, TCA_FCNT, TCA_STATS2, + TCA_STAB, __TCA_MAX }; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 44cf69e..8fd9a42 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -223,6 +223,7 @@ extern struct Qdisc *qdisc_lookup_class( extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); extern void qdisc_put_rtab(struct qdisc_rate_table *tab); +extern void qdisc_put_stab(struct qdisc_size_table *tab); extern int qdisc_enqueue_root(struct net_device *dev, struct sk_buff *skb); extern void __qdisc_run(struct net_device *dev); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 75d7a55..76c50a1 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -23,6 +23,15 @@ struct qdisc_rate_table int refcnt; }; +struct qdisc_size_table +{ + struct list_head list; + struct tc_sizespec size; + int refcnt; + unsigned int tsize; + u32 data[]; +}; + struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); @@ -33,6 +42,7 @@ #define TCQ_F_THROTTLED 2 #define TCQ_F_INGRESS 4 int padded; struct Qdisc_ops *ops; + struct qdisc_size_table *stab; u32 handle; u32 parent; atomic_t refcnt; @@ -184,9 +194,19 @@ tcf_destroy(struct tcf_proto *tp) kfree(tp); } +struct qdisc_skb_cb { + unsigned int len; + char data[]; +}; + +static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb) +{ + return (struct qdisc_skb_cb *)skb->cb; +} + static inline unsigned int qdisc_tx_len(struct sk_buff *skb) { - return skb->len; + return qdisc_skb_cb(skb)->len; } static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c7844ba..479fc85 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -286,6 +286,78 @@ void qdisc_put_rtab(struct qdisc_rate_ta } } +static LIST_HEAD(qdisc_stab_list); + +static struct qdisc_size_table *qdisc_get_stab(struct rtattr *tab, int *err) +{ + struct qdisc_size_table *stab; + struct rtattr *tb[TCA_STAB_MAX]; + unsigned int tsize; + + *err = -EINVAL; + if (rtattr_parse_nested(tb, TCA_STAB_MAX, tab)) + return NULL; + if (tb[TCA_STAB_BASE-1] == NULL || + RTA_PAYLOAD(tb[TCA_STAB_BASE-1]) < sizeof(struct tc_sizespec)) + return NULL; + + tsize = 0; + if (tb[TCA_STAB_DATA-1] != NULL) + tsize = RTA_PAYLOAD(tb[TCA_STAB_DATA-1]) / sizeof(u32); + + list_for_each_entry(stab, &qdisc_stab_list, list) { + if (stab->tsize != tsize) + continue; + if (memcmp(&stab->size, RTA_DATA(tb[TCA_STAB_BASE-1]), + sizeof(stab->size))) + continue; + if (tsize > 0 && + memcmp(stab->data, RTA_DATA(tb[TCA_STAB_DATA-1]), + sizeof(u32) * tsize)); + continue; + stab->refcnt++; + return stab; + } + + *err = -ENOMEM; + stab = kmalloc(sizeof(*stab) + sizeof(u32) * tsize, GFP_KERNEL); + if (stab == NULL) + return stab; + memcpy(&stab->size, RTA_DATA(tb[TCA_STAB_BASE-1]), sizeof(stab->size)); + stab->tsize = tsize; + if (tsize > 0) + memcpy(stab->data, RTA_DATA(tb[TCA_STAB_DATA-1]), + sizeof(u32) * tsize); + list_add_tail(&stab->list, &qdisc_stab_list); + *err = 0; + return stab; +} + +void qdisc_put_stab(struct qdisc_size_table *stab) +{ + if (!stab || --stab->refcnt) + return; + list_del(&stab->list); + kfree(stab); +} + +static int +qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + unsigned char *b = skb->tail; + struct rtattr *rta = (struct rtattr *)b; + + RTA_PUT(skb, TCA_STAB, 0, NULL); + RTA_PUT(skb, TCA_STAB_BASE, sizeof(stab->size), &stab->size); + RTA_PUT(skb, TCA_STAB_DATA, sizeof(stab->data[0]) * stab->tsize, + stab->data); + rta->rta_len = skb->tail - b; + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} /* Allocate an unique handle from space managed by kernel */ @@ -453,6 +525,11 @@ #endif sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { + if (tca[TCA_STAB-1]) { + sch->stab = qdisc_get_stab(tca[TCA_STAB-1], &err); + if (sch->stab == NULL) + goto err_out3; + } #ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, @@ -477,6 +554,7 @@ #endif return sch; } err_out3: + qdisc_put_stab(sch->stab); dev_put(dev); kfree((char *) sch - sch->padded); err_out2: @@ -488,15 +566,26 @@ err_out: static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) { - if (tca[TCA_OPTIONS-1]) { - int err; + int err; + if (tca[TCA_OPTIONS-1]) { if (sch->ops->change == NULL) return -EINVAL; err = sch->ops->change(sch, tca[TCA_OPTIONS-1]); if (err) return err; } + if (tca[TCA_STAB-1]) { + struct qdisc_size_table *stab; + + stab = qdisc_get_stab(tca[TCA_STAB-1], &err); + if (stab == NULL) + return err; + spin_lock_bh(&sch->dev->queue_lock); + qdisc_put_stab(sch->stab); + sch->stab = stab; + spin_unlock_bh(&sch->dev->queue_lock); + } #ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&sch->bstats, &sch->rate_est, @@ -769,6 +858,9 @@ static int tc_fill_qdisc(struct sk_buff goto rtattr_failure; q->qstats.qlen = q->q.qlen; + if (q->stab != NULL && qdisc_dump_stab(skb, q->stab) < 0) + goto rtattr_failure; + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, q->stats_lock, &d) < 0) goto rtattr_failure; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2bab466..9022650 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -67,6 +67,21 @@ void qdisc_unlock_tree(struct net_device write_unlock_bh(&qdisc_tree_lock); } +static void qdisc_init_len(struct sk_buff *skb, struct Qdisc *q) +{ + unsigned int idx, len = skb->len; + struct qdisc_size_table *stab = q->stab; + + if (stab == NULL) + goto out; + idx = len >> stab->size.cell_log; + if (idx < stab->tsize) + len = stab->data[idx]; + len += stab->size.addend; +out: + ((struct qdisc_skb_cb *)skb->cb)->len = len; +} + /* dev->queue_lock serializes queue accesses for this device AND dev->qdisc pointer itself. @@ -82,6 +97,7 @@ int qdisc_enqueue_root(struct net_device int ret; spin_lock(&dev->queue_lock); + qdisc_init_len(skb, dev->qdisc); ret = dev->qdisc->enqueue(skb, dev->qdisc); qdisc_run(dev); spin_unlock(&dev->queue_lock); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index aa97ecb..15dde88 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -148,7 +148,7 @@ static long tabledist(unsigned long mu, static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; + struct netem_skb_cb *cb = (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; struct sk_buff *skb2; int ret; int count = 1; @@ -268,7 +268,7 @@ static struct sk_buff *netem_dequeue(str skb = q->qdisc->dequeue(q->qdisc); if (skb) { const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; + = (const struct netem_skb_cb *)qdisc_skb_cb(skb)->data; psched_time_t now; /* if more time remaining? */ @@ -493,13 +493,13 @@ static int tfifo_enqueue(struct sk_buff struct fifo_sched_data *q = qdisc_priv(sch); struct sk_buff_head *list = &sch->q; const struct netem_skb_cb *ncb - = (const struct netem_skb_cb *)nskb->cb; + = (const struct netem_skb_cb *)qdisc_skb_cb(nskb)->data; struct sk_buff *skb; if (likely(skb_queue_len(list) < q->limit)) { skb_queue_reverse_walk(list, skb) { const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; + = (const struct netem_skb_cb *)qdisc_skb_cb(skb)->data; if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send)) break;