All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yang Yingliang <yangyingliang@huawei.com>
To: Eric Dumazet <eric.dumazet@gmail.com>
Cc: netdev <netdev@vger.kernel.org>, <davem@davemloft.net>,
	<brouer@redhat.com>, <jpirko@redhat.com>, <jbrouer@redhat.com>
Subject: [PATCH] net: sched: tbf: fix calculation of max_size
Date: Mon, 25 Nov 2013 20:04:23 +0800	[thread overview]
Message-ID: <52933CC7.9070805@huawei.com> (raw)
In-Reply-To: <5292C76E.1070701@huawei.com>

From: Yang Yingliang <yangyingliang@huawei.com>

Current max_size is caluated from rate table. Now, the rate table
has been replaced and it's wrong to caculate max_size based on this
rate table. It can lead wrong calculation of max_size.

The burst in kernel may be lower than user asked, because burst may gets
some loss when transform it to buffer(E.g. "burst 40kb rate 30mbit/s")
and it seems we cannot avoid this loss. And burst's value(max_size) based
on rate table may be equal user asked. If a packet's length is max_size,
this packet will be stalled in tbf_dequeue() because its length is above
the burst in kernel so that it cannot get enough tokens. The max_size guards
against enqueuing packet sizes above q->buffer "time" in tbf_enqueue().

This patch fixes the calculation of max_size by using psched_l2t_ns() and
q->buffer to recalculate burst(max_size).

Also, add support to get burst from userland directly. We can avoid loss
in byte-to-time transform by using burst directly. Iproute2 will need a
patch to send burst to kernel.

Suggested-by: Jesper Dangaard Brouer <jbrouer@redhat.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 include/uapi/linux/pkt_sched.h |   2 +
 net/sched/sch_tbf.c            | 110 ++++++++++++++++++++++++++---------------
 2 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 307f293..b5a0976 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -173,6 +173,8 @@ enum {
 	TCA_TBF_PTAB,
 	TCA_TBF_RATE64,
 	TCA_TBF_PRATE64,
+	TCA_TBF_BURST,
+	TCA_TBF_PBURST,
 	__TCA_TBF_MAX,
 };
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a609005..3d01bf0 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -281,8 +281,12 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
 	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 	[TCA_TBF_RATE64]	= { .type = NLA_U64 },
 	[TCA_TBF_PRATE64]	= { .type = NLA_U64 },
+	[TCA_TBF_BURST]		= { .type = NLA_U32 },
+	[TCA_TBF_PBURST]	= { .type = NLA_U32 },
 };
 
+#define MAX_PKT_LEN 65535
+
 static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	int err;
@@ -292,7 +296,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 	struct qdisc_rate_table *rtab = NULL;
 	struct qdisc_rate_table *ptab = NULL;
 	struct Qdisc *child = NULL;
-	int max_size, n;
+	int max_size;
 	u64 rate64 = 0, prate64 = 0;
 
 	err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
@@ -304,38 +308,20 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 		goto done;
 
 	qopt = nla_data(tb[TCA_TBF_PARMS]);
-	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
-	if (rtab == NULL)
-		goto done;
-
-	if (qopt->peakrate.rate) {
-		if (qopt->peakrate.rate > qopt->rate.rate)
-			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
-		if (ptab == NULL)
-			goto done;
+	if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
+		rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
+		if (rtab) {
+			qdisc_put_rtab(rtab);
+			rtab = NULL;
+		}
 	}
-
-	for (n = 0; n < 256; n++)
-		if (rtab->data[n] > qopt->buffer)
-			break;
-	max_size = (n << qopt->rate.cell_log) - 1;
-	if (ptab) {
-		int size;
-
-		for (n = 0; n < 256; n++)
-			if (ptab->data[n] > qopt->mtu)
-				break;
-		size = (n << qopt->peakrate.cell_log) - 1;
-		if (size < max_size)
-			max_size = size;
+	if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) {
+		ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
+		if (ptab) {
+			qdisc_put_rtab(ptab);
+			ptab = NULL;
+		}
 	}
-	if (max_size < 0)
-		goto done;
-
-	if (max_size < psched_mtu(qdisc_dev(sch)))
-		pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n",
-				    max_size, qdisc_dev(sch)->name,
-				    psched_mtu(qdisc_dev(sch)));
 
 	if (q->qdisc != &noop_qdisc) {
 		err = fifo_set_limit(q->qdisc, qopt->limit);
@@ -357,30 +343,74 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 	}
 	q->limit = qopt->limit;
 	q->mtu = PSCHED_TICKS2NS(qopt->mtu);
-	q->max_size = max_size;
 	q->buffer = PSCHED_TICKS2NS(qopt->buffer);
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
 
 	if (tb[TCA_TBF_RATE64])
 		rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
-	psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64);
-	if (ptab) {
+	psched_ratecfg_precompute(&q->rate, &qopt->rate, rate64);
+	if (!q->rate.rate_bytes_ps)
+		goto unlock_done;
+
+	if (tb[TCA_TBF_BURST]) {
+		u32 burst = nla_get_u32(tb[TCA_TBF_BURST]);
+		q->buffer = psched_l2t_ns(&q->rate, burst);
+		max_size = min_t(u32, burst, MAX_PKT_LEN);
+	} else {
+		for (max_size = 0; max_size < MAX_PKT_LEN; max_size++)
+			if (psched_l2t_ns(&q->rate, max_size) > q->buffer)
+				break;
+		max_size--;
+		if (max_size < 0)
+			goto unlock_done;
+	}
+
+	if (qopt->peakrate.rate) {
+		int size = 0;
 		if (tb[TCA_TBF_PRATE64])
 			prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
-		psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64);
+		psched_ratecfg_precompute(&q->peak, &qopt->peakrate, prate64);
+		if (q->peak.rate_bytes_ps <= q->rate.rate_bytes_ps) {
+			pr_err("Rate must be lower than peakrate!\n");
+			goto unlock_done;
+		}
+
+		if (tb[TCA_TBF_PBURST]) {
+			u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
+			q->mtu = psched_l2t_ns(&q->peak, pburst);
+			size = min_t(u32, pburst, MAX_PKT_LEN);
+		} else {
+			for (size = 0; size < MAX_PKT_LEN; size++)
+				if (psched_l2t_ns(&q->peak, size) > q->mtu)
+					break;
+			size--;
+			if (size < 0)
+				goto unlock_done;
+		}
+		max_size = min_t(u32, max_size, size);
 		q->peak_present = true;
 	} else {
 		q->peak_present = false;
 	}
 
+	if (!max_size)
+		goto unlock_done;
+
+	if (max_size < psched_mtu(qdisc_dev(sch)))
+		pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n",
+				    max_size, qdisc_dev(sch)->name,
+				    psched_mtu(qdisc_dev(sch)));
+
+	q->max_size = max_size;
+
 	sch_tree_unlock(sch);
-	err = 0;
+	return 0;
+
+unlock_done:
+	sch_tree_unlock(sch);
+	err = -EINVAL;
 done:
-	if (rtab)
-		qdisc_put_rtab(rtab);
-	if (ptab)
-		qdisc_put_rtab(ptab);
 	return err;
 }
 
-- 1.8.0

  reply	other threads:[~2013-11-25 12:04 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-19  7:25 [PATCH net v3 0/2] net: sched: fix some issues Yang Yingliang
2013-11-19  7:25 ` [PATCH net v3 1/2] net: sched: tbf: fix calculation of max_size Yang Yingliang
2013-11-19  9:38   ` Jesper Dangaard Brouer
2013-11-20  2:14     ` Yang Yingliang
2013-11-20 10:04       ` Jesper Dangaard Brouer
2013-11-20 12:50         ` Yang Yingliang
2013-11-23 19:06   ` Eric Dumazet
2013-11-24  7:28     ` Yang Yingliang
2013-11-24 18:40       ` Eric Dumazet
2013-11-25  3:43         ` Yang Yingliang
2013-11-25 12:04           ` Yang Yingliang [this message]
2013-11-25 12:22             ` [PATCH] " David Laight
2013-11-26  1:28               ` Yang Yingliang
2013-11-26  2:35                 ` Yang Yingliang
2013-12-02  1:11               ` David Miller
2013-12-02 10:29                 ` David Laight
2013-12-02 16:45             ` David Miller
2013-12-03  0:59               ` Yang Yingliang
2013-11-19  7:25 ` [PATCH net v3 2/2] net: sched: htb: fix calculation of quantum Yang Yingliang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52933CC7.9070805@huawei.com \
    --to=yangyingliang@huawei.com \
    --cc=brouer@redhat.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=jbrouer@redhat.com \
    --cc=jpirko@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.