From mboxrd@z Thu Jan  1 00:00:00 1970
From: Toke =?utf-8?Q?H=C3=B8iland-J=C3=B8rgensen?= <toke@toke.dk>
Subject: Re: [PATCH net-next v10 6/7] sch_cake: Add overhead compensation support to the rate shaper
Date: Tue, 15 May 2018 12:22:24 +0200
Message-ID: <878t8lw74v.fsf@toke.dk>
References: <152632431302.4861.16657365789045735410.stgit@alrua-kau> <152632442995.4861.6626480556330393864.stgit@alrua-kau>
Mime-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
Cc: cake@lists.bufferbloat.net
To: netdev@vger.kernel.org
Return-path: <netdev-owner@vger.kernel.org>
Received: from mail.toke.dk ([52.28.52.200]:48265 "EHLO mail.toke.dk"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1752059AbeEOKWd (ORCPT <rfc822;netdev@vger.kernel.org>);
        Tue, 15 May 2018 06:22:33 -0400
In-Reply-To: <152632442995.4861.6626480556330393864.stgit@alrua-kau>
Sender: netdev-owner@vger.kernel.org
List-ID: <netdev.vger.kernel.org>

Toke H=C3=B8iland-J=C3=B8rgensen <toke@toke.dk> writes:

> This commit adds configurable overhead compensation support to the rate
> shaper. With this feature, userspace can configure the actual bottleneck
> link overhead and encapsulation mode used, which will be used by the shap=
er
> to calculate the precise duration of each packet on the wire.
>
> This feature is needed because CAKE is often deployed one or two hops
> upstream of the actual bottleneck (which can be, e.g., inside a DSL or
> cable modem). In this case, the link layer characteristics and overhead
> reported by the kernel does not match the actual bottleneck. Being able to
> set the actual values in use makes it possible to configure the shaper ra=
te
> much closer to the actual bottleneck rate (our experience shows it is
> possible to get with 0.1% of the actual physical bottleneck rate), thus
> keeping latency low without sacrificing bandwidth.
>
> The overhead compensation has three tunables: A fixed per-packet overhead
> size (which, if set, will be accounted from the IP packet header), a
> minimum packet size (MPU) and a framing mode supporting either ATM or PTM
> framing. We include a set of common keywords in TC to help users configure
> the right parameters. If no overhead value is set, the value reported by
> the kernel is used.
>
> Signed-off-by: Toke H=C3=B8iland-J=C3=B8rgensen <toke@toke.dk>
> ---
>  net/sched/sch_cake.c |  123 ++++++++++++++++++++++++++++++++++++++++++++=
++++++
>  1 file changed, 122 insertions(+), 1 deletion(-)
>
> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
> index ccc6f26b306c..6314a089a204 100644
> --- a/net/sched/sch_cake.c
> +++ b/net/sched/sch_cake.c
> @@ -275,6 +275,7 @@ enum {
>=20=20
>  struct cobalt_skb_cb {
>  	cobalt_time_t enqueue_time;
> +	u32           adjusted_len;
>  };
>=20=20
>  static cobalt_time_t cobalt_get_time(void)
> @@ -1130,6 +1131,87 @@ static cobalt_time_t cake_ewma(cobalt_time_t avg, =
cobalt_time_t sample,
>  	return avg;
>  }
>=20=20
> +static u32 cake_overhead(struct cake_sched_data *q, struct sk_buff *skb)
> +{
> +	const struct skb_shared_info *shinfo =3D skb_shinfo(skb);
> +	u32 off =3D skb_network_offset(skb);
> +	u32 len =3D qdisc_pkt_len(skb);
> +	u16 segs =3D 1;
> +
> +	if (unlikely(shinfo->gso_size)) {
> +		/* borrowed from qdisc_pkt_len_init() */
> +		unsigned int hdr_len;
> +
> +		hdr_len =3D skb_transport_header(skb) - skb_mac_header(skb);
> +
> +		/* + transport layer */
> +		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
> +					       SKB_GSO_TCPV6))) {
> +			const struct tcphdr *th;
> +			struct tcphdr _tcphdr;
> +
> +			th =3D skb_header_pointer(skb, skb_transport_offset(skb),
> +						sizeof(_tcphdr), &_tcphdr);
> +			if (likely(th))
> +				hdr_len +=3D __tcp_hdrlen(th);
> +		} else {
> +			struct udphdr _udphdr;
> +
> +			if (skb_header_pointer(skb, skb_transport_offset(skb),
> +					       sizeof(_udphdr), &_udphdr))
> +				hdr_len +=3D sizeof(struct udphdr);
> +		}
> +
> +		if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
> +			segs =3D DIV_ROUND_UP(skb->len - hdr_len,
> +					    shinfo->gso_size);
> +		else
> +			segs =3D shinfo->gso_segs;
> +
> +		/* The last segment may be shorter; we ignore this, which means
> +		 * that we will over-estimate the size of the whole GSO segment
> +		 * by the difference in size. This is conservative, so we live
> +		 * with that to avoid the complexity of dealing with it.
> +		 */
> +		len =3D shinfo->gso_size + hdr_len;
> +	}
> +
> +	q->avg_netoff =3D cake_ewma(q->avg_netoff, off << 16, 8);
> +
> +	if (q->rate_flags & CAKE_FLAG_OVERHEAD)
> +		len -=3D off;
> +
> +	if (q->max_netlen < len)
> +		q->max_netlen =3D len;
> +	if (q->min_netlen > len)
> +		q->min_netlen =3D len;
> +
> +	len +=3D q->rate_overhead;
> +
> +	if (len < q->rate_mpu)
> +		len =3D q->rate_mpu;
> +
> +	if (q->atm_mode =3D=3D CAKE_ATM_ATM) {
> +		len +=3D 47;
> +		len /=3D 48;
> +		len *=3D 53;
> +	} else if (q->atm_mode =3D=3D CAKE_ATM_PTM) {
> +		/* Add one byte per 64 bytes or part thereof.
> +		 * This is conservative and easier to calculate than the
> +		 * precise value.
> +		 */
> +		len +=3D (len + 63) / 64;
> +	}
> +
> +	if (q->max_adjlen < len)
> +		q->max_adjlen =3D len;
> +	if (q->min_adjlen > len)
> +		q->min_adjlen =3D len;
> +
> +	get_cobalt_cb(skb)->adjusted_len =3D len * segs;
> +	return len;

Well, this is embarrassing; seems that I broke this somewhere along the
way. Will resend with a fix...

-Toke