stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Jesper Dangaard Brouer <brouer@redhat.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [ 25/46] net_sched: restore "linklayer atm" handling
Date: Thu, 12 Sep 2013 10:58:36 -0700	[thread overview]
Message-ID: <20130912175723.743750832@linuxfoundation.org> (raw)
In-Reply-To: <20130912175721.001906199@linuxfoundation.org>

3.10-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Jesper Dangaard Brouer <brouer@redhat.com>

[ Upstream commit 8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 ]

commit 56b765b79 ("htb: improved accuracy at high rates")
broke the "linklayer atm" handling.

 tc class add ... htb rate X ceil Y linklayer atm

The linklayer setting is implemented by modifying the rate table
which is send to the kernel.  No direct parameter were
transferred to the kernel indicating the linklayer setting.

The commit 56b765b79 ("htb: improved accuracy at high rates")
removed the use of the rate table system.

To keep compatible with older iproute2 utils, this patch detects
the linklayer by parsing the rate table.  It also supports future
versions of iproute2 to send this linklayer parameter to the
kernel directly. This is done by using the __reserved field in
struct tc_ratespec, to convey the choosen linklayer option, but
only using the lower 4 bits of this field.

Linklayer detection is limited to speeds below 100Mbit/s, because
at high rates the rtab is gets too inaccurate, so bad that
several fields contain the same values, this resembling the ATM
detect.  Fields even start to contain "0" time to send, e.g. at
1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have
been more broken than we first realized.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sch_generic.h      |    9 ++++++++-
 include/uapi/linux/pkt_sched.h |   10 +++++++++-
 net/sched/sch_api.c            |   41 +++++++++++++++++++++++++++++++++++++++++
 net/sched/sch_generic.c        |    1 +
 net/sched/sch_htb.c            |   13 +++++++++++++
 5 files changed, 72 insertions(+), 2 deletions(-)

--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -682,13 +682,19 @@ struct psched_ratecfg {
 	u64	rate_bps;
 	u32	mult;
 	u16	overhead;
+	u8	linklayer;
 	u8	shift;
 };
 
 static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
 				unsigned int len)
 {
-	return ((u64)(len + r->overhead) * r->mult) >> r->shift;
+	len += r->overhead;
+
+	if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
+		return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
+
+	return ((u64)len * r->mult) >> r->shift;
 }
 
 extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf);
@@ -699,6 +705,7 @@ static inline void psched_ratecfg_getrat
 	memset(res, 0, sizeof(*res));
 	res->rate = r->rate_bps >> 3;
 	res->overhead = r->overhead;
+	res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
 #endif
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -73,9 +73,17 @@ struct tc_estimator {
 #define TC_H_ROOT	(0xFFFFFFFFU)
 #define TC_H_INGRESS    (0xFFFFFFF1U)
 
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+	TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+	TC_LINKLAYER_ETHERNET,
+	TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
 struct tc_ratespec {
 	unsigned char	cell_log;
-	unsigned char	__reserved;
+	__u8		linklayer; /* lower 4 bits */
 	unsigned short	overhead;
 	short		cell_align;
 	unsigned short	mpu;
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_op
 	return q;
 }
 
+/* The linklayer setting were not transferred from iproute2, in older
+ * versions, and the rate tables lookup systems have been dropped in
+ * the kernel. To keep backward compatible with older iproute2 tc
+ * utils, we detect the linklayer setting by detecting if the rate
+ * table were modified.
+ *
+ * For linklayer ATM table entries, the rate table will be aligned to
+ * 48 bytes, thus some table entries will contain the same value.  The
+ * mpu (min packet unit) is also encoded into the old rate table, thus
+ * starting from the mpu, we find low and high table entries for
+ * mapping this cell.  If these entries contain the same value, when
+ * the rate tables have been modified for linklayer ATM.
+ *
+ * This is done by rounding mpu to the nearest 48 bytes cell/entry,
+ * and then roundup to the next cell, calc the table entry one below,
+ * and compare.
+ */
+static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
+{
+	int low       = roundup(r->mpu, 48);
+	int high      = roundup(low+1, 48);
+	int cell_low  = low >> r->cell_log;
+	int cell_high = (high >> r->cell_log) - 1;
+
+	/* rtab is too inaccurate at rates > 100Mbit/s */
+	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
+		pr_debug("TC linklayer: Giving up ATM detection\n");
+		return TC_LINKLAYER_ETHERNET;
+	}
+
+	if ((cell_high > cell_low) && (cell_high < 256)
+	    && (rtab[cell_low] == rtab[cell_high])) {
+		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
+			 cell_low, cell_high, rtab[cell_high]);
+		return TC_LINKLAYER_ATM;
+	}
+	return TC_LINKLAYER_ETHERNET;
+}
+
 static struct qdisc_rate_table *qdisc_rtab_list;
 
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
@@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(
 		rtab->rate = *r;
 		rtab->refcnt = 1;
 		memcpy(rtab->data, nla_data(tab), 1024);
+		if (r->linklayer == TC_LINKLAYER_UNAWARE)
+			r->linklayer = __detect_linklayer(r, rtab->data);
 		rtab->next = qdisc_rtab_list;
 		qdisc_rtab_list = rtab;
 	}
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -908,6 +908,7 @@ void psched_ratecfg_precompute(struct ps
 	memset(r, 0, sizeof(*r));
 	r->overhead = conf->overhead;
 	r->rate_bps = (u64)conf->rate << 3;
+	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
 	r->mult = 1;
 	/*
 	 * Calibrate mult, shift so that token counting is accurate
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1312,6 +1312,7 @@ static int htb_change_class(struct Qdisc
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
 	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_opt *hopt;
 
@@ -1333,6 +1334,18 @@ static int htb_change_class(struct Qdisc
 	if (!hopt->rate.rate || !hopt->ceil.rate)
 		goto failure;
 
+	/* Keeping backward compatible with rate_table based iproute2 tc */
+	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
+		rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
+		if (rtab)
+			qdisc_put_rtab(rtab);
+	}
+	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
+		ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
+		if (ctab)
+			qdisc_put_rtab(ctab);
+	}
+
 	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
 		int prio;



  parent reply	other threads:[~2013-09-12 17:58 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-12 17:58 [ 00/46] 3.10.12-stable review Greg Kroah-Hartman
2013-09-12 17:58 ` [ 01/46] htb: fix sign extension bug Greg Kroah-Hartman
2013-09-12 17:58 ` [ 02/46] net: rtm_to_ifaddr: free ifa if ifa_cacheinfo processing fails Greg Kroah-Hartman
2013-09-12 17:58 ` [ 03/46] net: check net.core.somaxconn sysctl values Greg Kroah-Hartman
2013-09-12 17:58 ` [ 04/46] macvlan: validate flags Greg Kroah-Hartman
2013-09-12 17:58 ` [ 05/46] neighbour: populate neigh_parms on alloc before calling ndo_neigh_setup Greg Kroah-Hartman
2013-09-12 17:58 ` [ 06/46] bonding: modify only neigh_parms owned by us Greg Kroah-Hartman
2013-09-12 17:58 ` [ 07/46] fib_trie: remove potential out of bound access Greg Kroah-Hartman
2013-09-12 17:58 ` [ 08/46] bridge: dont try to update timers in case of broken MLD queries Greg Kroah-Hartman
2013-09-12 17:58 ` [ 09/46] tcp: cubic: fix overflow error in bictcp_update() Greg Kroah-Hartman
2013-09-12 17:58 ` [ 10/46] tcp: cubic: fix bug in bictcp_acked() Greg Kroah-Hartman
2013-09-12 17:58 ` [ 11/46] ipv6: dont stop backtracking in fib6_lookup_1 if subtree does not match Greg Kroah-Hartman
2013-09-12 17:58 ` [ 12/46] ip_gre: fix ipgre_header to return correct offset MIME-Version: 1.0 Greg Kroah-Hartman
2013-09-12 17:58 ` [ 13/46] 8139cp: Fix skb leak in rx_status_loop failure path Greg Kroah-Hartman
2013-09-12 17:58 ` [ 14/46] rtnetlink: Fix inverted check in ndo_dflt_fdb_del() Greg Kroah-Hartman
2013-09-12 17:58 ` [ 15/46] genl: Fix genl dumpit() locking Greg Kroah-Hartman
2013-09-12 17:58 ` [ 16/46] genl: Hold reference on correct module while netlink-dump Greg Kroah-Hartman
2013-09-12 17:58 ` [ 17/46] ip_tunnel: Do not use inner ip-header-id for tunnel ip-header-id Greg Kroah-Hartman
2013-09-12 17:58 ` [ 18/46] rtnetlink: rtnl_bridge_getlink: Call nlmsg_find_attr() with ifinfomsg header Greg Kroah-Hartman
2013-09-12 17:58 ` [ 19/46] tun: signedness bug in tun_get_user() Greg Kroah-Hartman
2013-09-12 17:58 ` [ 20/46] ipv6: remove max_addresses check from ipv6_create_tempaddr Greg Kroah-Hartman
2013-09-12 17:58 ` [ 21/46] ipv6: drop packets with multiple fragmentation headers Greg Kroah-Hartman
2013-09-12 17:58 ` [ 22/46] tcp: set timestamps for restored skb-s Greg Kroah-Hartman
2013-09-12 17:58 ` [ 23/46] packet: restore packet statistics tp_packets to include drops Greg Kroah-Hartman
2013-09-12 17:58 ` [ 24/46] bridge: Use the correct bit length for bitmap functions in the VLAN code Greg Kroah-Hartman
2013-09-12 17:58 ` Greg Kroah-Hartman [this message]
2013-09-12 17:58 ` [ 26/46] sfc: Fix lookup of default RX MAC filters when steered using ethtool Greg Kroah-Hartman
2013-09-12 17:58 ` [ 27/46] be2net: fix disabling TX in be_close() Greg Kroah-Hartman
2013-09-12 17:58 ` [ 28/46] net: usb: Add HP hs2434 device to ZLP exception table Greg Kroah-Hartman
2013-09-12 17:58 ` [ 29/46] tcp: initialize rcv_tstamp for restored sockets Greg Kroah-Hartman
2013-09-12 17:58 ` [ 30/46] tcp: dont apply tsoffset if rcv_tsecr is zero Greg Kroah-Hartman
2013-09-12 17:58 ` [ 31/46] ipv4: sendto/hdrincl: dont use destination address found in header Greg Kroah-Hartman
2013-09-12 17:58 ` [ 32/46] ipv6: Dont depend on per socket memory for neighbour discovery messages Greg Kroah-Hartman
2013-09-12 17:58 ` [ 33/46] tcp: tcp_make_synack() should use sock_wmalloc Greg Kroah-Hartman
2013-09-12 17:58 ` [ 34/46] tipc: set sk_err correctly when connection fails Greg Kroah-Hartman
2013-09-12 17:58 ` [ 35/46] net: revert 8728c544a9c ("net: dev_pick_tx() fix") Greg Kroah-Hartman
2013-09-12 17:58 ` [ 36/46] net: bridge: convert MLDv2 Query MRC into msecs_to_jiffies for max_delay Greg Kroah-Hartman
2013-09-12 17:58 ` [ 37/46] ICMPv6: treat dest unreachable codes 5 and 6 as EACCES, not EPROTO Greg Kroah-Hartman
2013-09-12 17:58 ` [ 38/46] tg3: Dont turn off led on 5719 serdes port 0 Greg Kroah-Hartman
2013-09-12 17:58 ` [ 39/46] vhost_net: poll vhost queue after marking DMA is done Greg Kroah-Hartman
2013-09-12 17:58 ` [ 40/46] ipv6: fix null pointer dereference in __ip6addrlbl_add Greg Kroah-Hartman
2013-09-12 17:58 ` [ 41/46] net: ipv6: tcp: fix potential use after free in tcp_v6_do_rcv Greg Kroah-Hartman
2013-09-12 17:58 ` [ 42/46] net: mvneta: properly disable HW PHY polling and ensure adjust_link() works Greg Kroah-Hartman
2013-09-12 17:58 ` [ 43/46] crypto: xor - Check for osxsave as well as avx in crypto/xor Greg Kroah-Hartman
2013-09-12 17:58 ` [ 44/46] drivers/rtc/rtc-max77686.c: Fix wrong register Greg Kroah-Hartman
2013-09-12 17:58 ` [ 45/46] mwifiex: do not create AP and P2P interfaces upon driver loading Greg Kroah-Hartman
2013-09-12 17:58 ` [ 46/46] ARM: at91: dt: sam9260: add i2c gpio pinctrl Greg Kroah-Hartman
2013-09-12 22:35 ` [ 00/46] 3.10.12-stable review Guenter Roeck
2013-09-12 23:07   ` Greg Kroah-Hartman
2013-09-13 23:02 ` Shuah Khan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130912175723.743750832@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=brouer@redhat.com \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).