* [patch net-next RFC] tbf: handle gso skbs properly
@ 2013-02-15 12:44 Jiri Pirko
2013-02-15 14:18 ` Eric Dumazet
2013-02-15 15:30 ` Eric Dumazet
0 siblings, 2 replies; 4+ messages in thread
From: Jiri Pirko @ 2013-02-15 12:44 UTC (permalink / raw)
To: netdev; +Cc: davem, edumazet, jhs, kuznet, j.vimal
According to Eric's suggestion, when gso skb can't be sent in one mtu
time, resegment it.
Note that helper will be moved to sch_api.c probably so it can be used
by other code.
Also, I'm not sure similar patch to this can be done for act_police. I
have to look at that more closely.
Thanks for review.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
include/net/sch_generic.h | 1 +
net/core/dev.c | 24 ------------------------
net/sched/sch_api.c | 27 +++++++++++++++++++++++++++
net/sched/sch_tbf.c | 38 +++++++++++++++++++++++++++++++++++++-
4 files changed, 65 insertions(+), 25 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2761c90..de6db57 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -372,6 +372,7 @@ extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
struct Qdisc_ops *ops, u32 parentid);
extern void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
+extern void qdisc_pkt_len_init(struct sk_buff *skb);
extern void tcf_destroy(struct tcf_proto *tp);
extern void tcf_destroy_chain(struct tcf_proto **fl);
diff --git a/net/core/dev.c b/net/core/dev.c
index f444736..8f86b1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2680,30 +2680,6 @@ out:
return rc;
}
-static void qdisc_pkt_len_init(struct sk_buff *skb)
-{
- const struct skb_shared_info *shinfo = skb_shinfo(skb);
-
- qdisc_skb_cb(skb)->pkt_len = skb->len;
-
- /* To get more precise estimation of bytes sent on wire,
- * we add to pkt_len the headers size of all segments
- */
- if (shinfo->gso_size) {
- unsigned int hdr_len;
-
- /* mac layer + network layer */
- hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
-
- /* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
- qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
- }
-}
-
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fe1ba54..7672259 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -29,6 +29,8 @@
#include <linux/hrtimer.h>
#include <linux/lockdep.h>
#include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -464,6 +466,31 @@ out:
}
EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
+void qdisc_pkt_len_init(struct sk_buff *skb)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+
+ /* To get more precise estimation of bytes sent on wire,
+ * we add to pkt_len the headers size of all segments
+ */
+ if (shinfo->gso_size) {
+ unsigned int hdr_len;
+
+ /* mac layer + network layer */
+ hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+ /* + transport layer */
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ hdr_len += tcp_hdrlen(skb);
+ else
+ hdr_len += sizeof(struct udphdr);
+ qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
+ }
+}
+EXPORT_SYMBOL(qdisc_pkt_len_init);
+
void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
{
if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index c8388f3..bfc89be 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -121,7 +121,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
int ret;
- if (qdisc_pkt_len(skb) > q->max_size)
+ if (qdisc_pkt_len(skb) > q->max_size && !skb_is_gso(skb))
return qdisc_reshape_fail(skb, sch);
ret = qdisc_enqueue(skb, q->qdisc);
@@ -147,6 +147,33 @@ static unsigned int tbf_drop(struct Qdisc *sch)
return len;
}
+static bool qdisc_gso_segment(struct Qdisc *qdisc, struct sk_buff *skb)
+{
+ struct sk_buff *segs;
+ struct sk_buff *next_skb;
+ struct sk_buff *prev_skb;
+ int num_skbs = 0;
+
+ segs = skb_gso_segment(skb, 0);
+ if (IS_ERR(segs) || !segs)
+ return false;
+ __skb_unlink(skb, &qdisc->q);
+ kfree_skb(skb);
+ skb = segs;
+ prev_skb = (struct sk_buff *) &qdisc->q;
+ do {
+ next_skb = skb->next;
+ qdisc_pkt_len_init(skb);
+ qdisc_calculate_pkt_len(skb, qdisc);
+ __skb_queue_after(&qdisc->q, prev_skb, skb);
+ prev_skb = skb;
+ skb = next_skb;
+ num_skbs++;
+ } while (skb);
+ qdisc_tree_decrease_qlen(qdisc, 1 - num_skbs);
+ return true;
+}
+
static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -167,6 +194,15 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
ptoks = toks + q->ptokens;
if (ptoks > q->mtu)
ptoks = q->mtu;
+ if (skb_is_gso(skb) &&
+ (s64) psched_l2t_ns(&q->peak, len) > q->mtu &&
+ qdisc_gso_segment(q->qdisc, skb)) {
+ q->qdisc->gso_skb = NULL;
+ skb = q->qdisc->ops->peek(q->qdisc);
+ if (unlikely(!skb))
+ return NULL;
+ len = qdisc_pkt_len(skb);
+ }
ptoks -= (s64) psched_l2t_ns(&q->peak, len);
}
toks += q->tokens;
--
1.8.1.2
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [patch net-next RFC] tbf: handle gso skbs properly
2013-02-15 12:44 [patch net-next RFC] tbf: handle gso skbs properly Jiri Pirko
@ 2013-02-15 14:18 ` Eric Dumazet
2013-02-15 14:28 ` Jiri Pirko
2013-02-15 15:30 ` Eric Dumazet
1 sibling, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2013-02-15 14:18 UTC (permalink / raw)
To: Jiri Pirko; +Cc: netdev, davem, edumazet, jhs, kuznet, j.vimal
On Fri, 2013-02-15 at 13:44 +0100, Jiri Pirko wrote:
> According to Eric's suggestion, when gso skb can't be sent in one mtu
> time, resegment it.
>
> Note that helper will be moved to sch_api.c probably so it can be used
> by other code.
>
> Also, I'm not sure similar patch to this can be done for act_police. I
> have to look at that more closely.
>
> Thanks for review.
>
> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
> ---
> include/net/sch_generic.h | 1 +
> net/core/dev.c | 24 ------------------------
> net/sched/sch_api.c | 27 +++++++++++++++++++++++++++
> net/sched/sch_tbf.c | 38 +++++++++++++++++++++++++++++++++++++-
> 4 files changed, 65 insertions(+), 25 deletions(-)
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 2761c90..de6db57 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -372,6 +372,7 @@ extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
> struct Qdisc_ops *ops, u32 parentid);
> extern void __qdisc_calculate_pkt_len(struct sk_buff *skb,
> const struct qdisc_size_table *stab);
> +extern void qdisc_pkt_len_init(struct sk_buff *skb);
> extern void tcf_destroy(struct tcf_proto *tp);
> extern void tcf_destroy_chain(struct tcf_proto **fl);
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index f444736..8f86b1c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2680,30 +2680,6 @@ out:
> return rc;
> }
>
> -static void qdisc_pkt_len_init(struct sk_buff *skb)
> -{
> - const struct skb_shared_info *shinfo = skb_shinfo(skb);
> -
> - qdisc_skb_cb(skb)->pkt_len = skb->len;
> -
> - /* To get more precise estimation of bytes sent on wire,
> - * we add to pkt_len the headers size of all segments
> - */
> - if (shinfo->gso_size) {
> - unsigned int hdr_len;
> -
> - /* mac layer + network layer */
> - hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
> -
> - /* + transport layer */
> - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
> - hdr_len += tcp_hdrlen(skb);
> - else
> - hdr_len += sizeof(struct udphdr);
> - qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
> - }
> -}
> -
Please dont move this function, its called in fast path and should be
inlined by compiler
> static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
> struct net_device *dev,
> struct netdev_queue *txq)
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index fe1ba54..7672259 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -29,6 +29,8 @@
> #include <linux/hrtimer.h>
> #include <linux/lockdep.h>
> #include <linux/slab.h>
> +#include <linux/tcp.h>
> +#include <linux/udp.h>
>
> #include <net/net_namespace.h>
> #include <net/sock.h>
> @@ -464,6 +466,31 @@ out:
> }
> EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
>
> +
> void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
> {
> if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
> diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
> index c8388f3..bfc89be 100644
> --- a/net/sched/sch_tbf.c
> +++ b/net/sched/sch_tbf.c
> @@ -121,7 +121,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
> struct tbf_sched_data *q = qdisc_priv(sch);
> int ret;
>
> - if (qdisc_pkt_len(skb) > q->max_size)
> + if (qdisc_pkt_len(skb) > q->max_size && !skb_is_gso(skb))
> return qdisc_reshape_fail(skb, sch);
>
> ret = qdisc_enqueue(skb, q->qdisc);
> @@ -147,6 +147,33 @@ static unsigned int tbf_drop(struct Qdisc *sch)
> return len;
> }
>
> +static bool qdisc_gso_segment(struct Qdisc *qdisc, struct sk_buff *skb)
> +{
> + struct sk_buff *segs;
> + struct sk_buff *next_skb;
> + struct sk_buff *prev_skb;
> + int num_skbs = 0;
> +
> + segs = skb_gso_segment(skb, 0);
> + if (IS_ERR(segs) || !segs)
> + return false;
> + __skb_unlink(skb, &qdisc->q);
> + kfree_skb(skb);
> + skb = segs;
> + prev_skb = (struct sk_buff *) &qdisc->q;
> + do {
> + next_skb = skb->next;
> + qdisc_pkt_len_init(skb);
> + qdisc_calculate_pkt_len(skb, qdisc);
You don't need this, packet is non gso, so you only have to
qdisc_skb_cb(skb)->pkt_len = skb->len;
qdisc_calculate_pkt_len();
> + __skb_queue_after(&qdisc->q, prev_skb, skb);
Yes, this might need different strategy for non fifo qdisc, we can
see that later.
> + prev_skb = skb;
> + skb = next_skb;
> + num_skbs++;
> + } while (skb);
> + qdisc_tree_decrease_qlen(qdisc, 1 - num_skbs);
> + return true;
> +}
> +
> static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
> {
> struct tbf_sched_data *q = qdisc_priv(sch);
> @@ -167,6 +194,15 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
> ptoks = toks + q->ptokens;
> if (ptoks > q->mtu)
> ptoks = q->mtu;
Cache psched_l2t_ns(&q->peak, len) in a variable here to avoid double
call.
> + if (skb_is_gso(skb) &&
> + (s64) psched_l2t_ns(&q->peak, len) > q->mtu &&
> + qdisc_gso_segment(q->qdisc, skb)) {
> + q->qdisc->gso_skb = NULL;
> + skb = q->qdisc->ops->peek(q->qdisc);
> + if (unlikely(!skb))
> + return NULL;
> + len = qdisc_pkt_len(skb);
> + }
> ptoks -= (s64) psched_l2t_ns(&q->peak, len);
> }
> toks += q->tokens;
Thanks !
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [patch net-next RFC] tbf: handle gso skbs properly
2013-02-15 14:18 ` Eric Dumazet
@ 2013-02-15 14:28 ` Jiri Pirko
0 siblings, 0 replies; 4+ messages in thread
From: Jiri Pirko @ 2013-02-15 14:28 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, davem, edumazet, jhs, kuznet, j.vimal
Thanks for review Eric. I will process your suggestions and submit the
patch.
Fri, Feb 15, 2013 at 03:18:49PM CET, eric.dumazet@gmail.com wrote:
>On Fri, 2013-02-15 at 13:44 +0100, Jiri Pirko wrote:
>> According to Eric's suggestion, when gso skb can't be sent in one mtu
>> time, resegment it.
>>
>> Note that helper will be moved to sch_api.c probably so it can be used
>> by other code.
>>
>> Also, I'm not sure similar patch to this can be done for act_police. I
>> have to look at that more closely.
Any idea how to fix this in act_police? I believe we can't use the same
approach since there is no queue there.
Thanks.
Jiri
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [patch net-next RFC] tbf: handle gso skbs properly
2013-02-15 12:44 [patch net-next RFC] tbf: handle gso skbs properly Jiri Pirko
2013-02-15 14:18 ` Eric Dumazet
@ 2013-02-15 15:30 ` Eric Dumazet
1 sibling, 0 replies; 4+ messages in thread
From: Eric Dumazet @ 2013-02-15 15:30 UTC (permalink / raw)
To: Jiri Pirko; +Cc: netdev, davem, edumazet, jhs, kuznet, j.vimal
On Fri, 2013-02-15 at 13:44 +0100, Jiri Pirko wrote:
> + segs = skb_gso_segment(skb, 0);
Try to find out how to avoid a full copy, if the device is SG capable.
maybe using
segs = skb_gso_segment(skb,
netif_skb_features(skb) & ~NETIF_F_GSO_MASK);
Or something like that...
> + if (IS_ERR(segs) || !segs)
> + return false;
> + __skb_unlink(skb, &qdisc->q);
> + kfree_skb(skb);
consume_skb() would be nice here.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2013-02-15 15:30 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-02-15 12:44 [patch net-next RFC] tbf: handle gso skbs properly Jiri Pirko
2013-02-15 14:18 ` Eric Dumazet
2013-02-15 14:28 ` Jiri Pirko
2013-02-15 15:30 ` Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).