netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kernel Netdev Mailing List <netdev@vger.kernel.org>
Subject: [XFRM]: Improve MTU estimation
Date: Fri, 04 Aug 2006 10:50:16 +0200	[thread overview]
Message-ID: <44D30A48.4050403@trash.net> (raw)

[-- Attachment #1: Type: text/plain, Size: 376 bytes --]

While trying to track down a PMTUD problem with IPsec, I
misread the current MTU estimation code and initially thought
it would overestimate the MTU. I then noticed that this was
wrong, but by that time had already replaced it by an exact
calculation. It fixes the common underestimation of the MTU
by two bytes with ESP and should be a bit faster, so it still
looks useful.


[-- Attachment #2: x --]
[-- Type: text/plain, Size: 5677 bytes --]

[XFRM]: Improve MTU estimation

Replace the probing based MTU estimation, which usually takes 2-3
iterations to find a fitting value and may underestimate the MTU,
by an exact calculation.

Signed-off-by: Patrick McHardy <kaber@trash.net>

---
commit d5722ea7c8c7d3526788cd4fc3ab3e1237273fa8
tree 56ddec256902370864e93bb7bd095281162aea3b
parent a205729e2cd8e51257cd0ea738524c64da99b9e0
author Patrick McHardy <kaber@trash.net> Fri, 04 Aug 2006 10:37:09 +0200
committer Patrick McHardy <kaber@trash.net> Fri, 04 Aug 2006 10:37:09 +0200

 include/net/xfrm.h    |    3 +--
 net/ipv4/esp4.c       |   28 +++++++++++++++-------------
 net/ipv6/esp6.c       |   25 +++++++++++--------------
 net/xfrm/xfrm_state.c |   36 ++++++++----------------------------
 4 files changed, 35 insertions(+), 57 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9c5ee9f..ea1b028 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -262,8 +262,7 @@ struct xfrm_type
 	void			(*destructor)(struct xfrm_state *);
 	int			(*input)(struct xfrm_state *, struct sk_buff *skb);
 	int			(*output)(struct xfrm_state *, struct sk_buff *pskb);
-	/* Estimate maximal size of result of transformation of a dgram */
-	u32			(*get_max_size)(struct xfrm_state *, int size);
+	u32			(*get_mtu)(struct xfrm_state *, int size);
 };
 
 extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index fc2f8ce..5393dc2 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -251,21 +251,19 @@ out:
 	return -EINVAL;
 }
 
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
-	u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
 
-	if (x->props.mode) {
-		mtu = ALIGN(mtu + 2, blksize);
-	} else {
-		/* The worst case. */
-		mtu = ALIGN(mtu + 2, 4) + blksize - 4;
-	}
-	if (esp->conf.padlen)
-		mtu = ALIGN(mtu, esp->conf.padlen);
+	if (esp->conf.padlen > align)
+		align = esp->conf.padlen;
 
-	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+	mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+	mtu &= ~(align - 1);
+	mtu -= 2;
+
+	return mtu;
 }
 
 static void esp4_err(struct sk_buff *skb, u32 info)
@@ -307,6 +305,7 @@ static void esp_destroy(struct xfrm_stat
 static int esp_init_state(struct xfrm_state *x)
 {
 	struct esp_data *esp = NULL;
+	u32 align;
 
 	/* null auth and encryption can have zero length keys */
 	if (x->aalg) {
@@ -385,7 +384,10 @@ static int esp_init_state(struct xfrm_st
 		}
 	}
 	x->data = esp;
-	x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+	align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	if (esp->conf.padlen)
+		align = ALIGN(align, esp->conf.padlen);
+	x->props.trailer_len = align - 1 + esp->auth.icv_trunc_len;
 	return 0;
 
 error:
@@ -402,7 +404,7 @@ static struct xfrm_type esp_type =
 	.proto	     	= IPPROTO_ESP,
 	.init_state	= esp_init_state,
 	.destructor	= esp_destroy,
-	.get_max_size	= esp4_get_max_size,
+	.get_mtu	= esp4_get_mtu,
 	.input		= esp_input,
 	.output		= esp_output
 };
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a278d5e..b8d0a05 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -222,22 +222,19 @@ out:
 	return ret;
 }
 
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
-	u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
-
-	if (x->props.mode) {
-		mtu = ALIGN(mtu + 2, blksize);
-	} else {
-		/* The worst case. */
-		u32 padsize = ((blksize - 1) & 7) + 1;
-		mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
-	}
-	if (esp->conf.padlen)
-		mtu = ALIGN(mtu, esp->conf.padlen);
+	u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+
+	if (esp->conf.padlen > align)
+		align = esp->conf.padlen;
+	
+	mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+	mtu &= ~(align - 1);
+	mtu -= 2;
 
-	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+	return mtu;
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -363,7 +360,7 @@ static struct xfrm_type esp6_type =
 	.proto	     	= IPPROTO_ESP,
 	.init_state	= esp6_init_state,
 	.destructor	= esp6_destroy,
-	.get_max_size	= esp6_get_max_size,
+	.get_mtu	= esp6_get_mtu,
 	.input		= esp6_input,
 	.output		= esp6_output
 };
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0021aad..39d9169 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1129,37 +1129,17 @@ void xfrm_state_delete_tunnel(struct xfr
 }
 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
 
-/*
- * This function is NOT optimal.  For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal.  However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 {
-	int res = mtu;
-
-	res -= x->props.header_len;
-
-	for (;;) {
-		int m = res;
-
-		if (m < 68)
-			return 68;
-
-		spin_lock_bh(&x->lock);
-		if (x->km.state == XFRM_STATE_VALID &&
-		    x->type && x->type->get_max_size)
-			m = x->type->get_max_size(x, m);
-		else
-			m += x->props.header_len;
-		spin_unlock_bh(&x->lock);
-
-		if (m <= mtu)
-			break;
-		res -= (m - mtu);
-	}
+	int res;
 
+	spin_lock_bh(&x->lock);
+	if (x->km.state == XFRM_STATE_VALID &&
+	    x->type && x->type->get_mtu)
+		res = x->type->get_mtu(x, mtu);
+	else
+		res = mtu;
+	spin_unlock_bh(&x->lock);
 	return res;
 }
 

             reply	other threads:[~2006-08-04  8:52 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-08-04  8:50 Patrick McHardy [this message]
2006-08-04  9:27 ` [XFRM]: Improve MTU estimation Patrick McHardy
2006-08-04 10:01   ` Herbert Xu
2006-08-04 10:09     ` Patrick McHardy
2006-08-04 10:13       ` Herbert Xu
2006-08-04 11:11         ` Patrick McHardy
2006-08-04 11:16           ` Herbert Xu
2006-08-04 11:21             ` Patrick McHardy
2006-08-04 11:25             ` Herbert Xu
2006-08-04 11:50               ` Patrick McHardy
2006-08-04 11:51                 ` Patrick McHardy
2006-08-04 11:55                   ` Herbert Xu
2006-08-04 11:54                 ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44D30A48.4050403@trash.net \
    --to=kaber@trash.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).