* [XFRM]: Improve MTU estimation
@ 2006-08-04 8:50 Patrick McHardy
2006-08-04 9:27 ` Patrick McHardy
0 siblings, 1 reply; 13+ messages in thread
From: Patrick McHardy @ 2006-08-04 8:50 UTC (permalink / raw)
To: Herbert Xu; +Cc: Kernel Netdev Mailing List
[-- Attachment #1: Type: text/plain, Size: 376 bytes --]
While trying to track down a PMTUD problem with IPsec, I
misread the current MTU estimation code and initially thought
it would overestimate the MTU. I then noticed that this was
wrong, but by that time had already replaced it by an exact
calculation. It fixes the common underestimation of the MTU
by two bytes with ESP and should be a bit faster, so it still
looks useful.
[-- Attachment #2: x --]
[-- Type: text/plain, Size: 5677 bytes --]
[XFRM]: Improve MTU estimation
Replace the probing based MTU estimation, which usually takes 2-3
iterations to find a fitting value and may underestimate the MTU,
by an exact calculation.
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
commit d5722ea7c8c7d3526788cd4fc3ab3e1237273fa8
tree 56ddec256902370864e93bb7bd095281162aea3b
parent a205729e2cd8e51257cd0ea738524c64da99b9e0
author Patrick McHardy <kaber@trash.net> Fri, 04 Aug 2006 10:37:09 +0200
committer Patrick McHardy <kaber@trash.net> Fri, 04 Aug 2006 10:37:09 +0200
include/net/xfrm.h | 3 +--
net/ipv4/esp4.c | 28 +++++++++++++++-------------
net/ipv6/esp6.c | 25 +++++++++++--------------
net/xfrm/xfrm_state.c | 36 ++++++++----------------------------
4 files changed, 35 insertions(+), 57 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9c5ee9f..ea1b028 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -262,8 +262,7 @@ struct xfrm_type
void (*destructor)(struct xfrm_state *);
int (*input)(struct xfrm_state *, struct sk_buff *skb);
int (*output)(struct xfrm_state *, struct sk_buff *pskb);
- /* Estimate maximal size of result of transformation of a dgram */
- u32 (*get_max_size)(struct xfrm_state *, int size);
+ u32 (*get_mtu)(struct xfrm_state *, int size);
};
extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index fc2f8ce..5393dc2 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -251,21 +251,19 @@ out:
return -EINVAL;
}
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
- if (x->props.mode) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
- mtu = ALIGN(mtu + 2, 4) + blksize - 4;
- }
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
+ if (esp->conf.padlen > align)
+ align = esp->conf.padlen;
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ mtu &= ~(align - 1);
+ mtu -= 2;
+
+ return mtu;
}
static void esp4_err(struct sk_buff *skb, u32 info)
@@ -307,6 +305,7 @@ static void esp_destroy(struct xfrm_stat
static int esp_init_state(struct xfrm_state *x)
{
struct esp_data *esp = NULL;
+ u32 align;
/* null auth and encryption can have zero length keys */
if (x->aalg) {
@@ -385,7 +384,10 @@ static int esp_init_state(struct xfrm_st
}
}
x->data = esp;
- x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+ align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ if (esp->conf.padlen)
+ align = ALIGN(align, esp->conf.padlen);
+ x->props.trailer_len = align - 1 + esp->auth.icv_trunc_len;
return 0;
error:
@@ -402,7 +404,7 @@ static struct xfrm_type esp_type =
.proto = IPPROTO_ESP,
.init_state = esp_init_state,
.destructor = esp_destroy,
- .get_max_size = esp4_get_max_size,
+ .get_mtu = esp4_get_mtu,
.input = esp_input,
.output = esp_output
};
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a278d5e..b8d0a05 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -222,22 +222,19 @@ out:
return ret;
}
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
-
- if (x->props.mode) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
- u32 padsize = ((blksize - 1) & 7) + 1;
- mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
- }
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
+ u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+
+ if (esp->conf.padlen > align)
+ align = esp->conf.padlen;
+
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ mtu &= ~(align - 1);
+ mtu -= 2;
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ return mtu;
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -363,7 +360,7 @@ static struct xfrm_type esp6_type =
.proto = IPPROTO_ESP,
.init_state = esp6_init_state,
.destructor = esp6_destroy,
- .get_max_size = esp6_get_max_size,
+ .get_mtu = esp6_get_mtu,
.input = esp6_input,
.output = esp6_output
};
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0021aad..39d9169 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1129,37 +1129,17 @@ void xfrm_state_delete_tunnel(struct xfr
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-/*
- * This function is NOT optimal. For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal. However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
int xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
- int res = mtu;
-
- res -= x->props.header_len;
-
- for (;;) {
- int m = res;
-
- if (m < 68)
- return 68;
-
- spin_lock_bh(&x->lock);
- if (x->km.state == XFRM_STATE_VALID &&
- x->type && x->type->get_max_size)
- m = x->type->get_max_size(x, m);
- else
- m += x->props.header_len;
- spin_unlock_bh(&x->lock);
-
- if (m <= mtu)
- break;
- res -= (m - mtu);
- }
+ int res;
+ spin_lock_bh(&x->lock);
+ if (x->km.state == XFRM_STATE_VALID &&
+ x->type && x->type->get_mtu)
+ res = x->type->get_mtu(x, mtu);
+ else
+ res = mtu;
+ spin_unlock_bh(&x->lock);
return res;
}
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 8:50 [XFRM]: Improve MTU estimation Patrick McHardy
@ 2006-08-04 9:27 ` Patrick McHardy
2006-08-04 10:01 ` Herbert Xu
0 siblings, 1 reply; 13+ messages in thread
From: Patrick McHardy @ 2006-08-04 9:27 UTC (permalink / raw)
To: Herbert Xu; +Cc: Kernel Netdev Mailing List
[-- Attachment #1: Type: text/plain, Size: 803 bytes --]
Patrick McHardy wrote:
> int xfrm_state_mtu(struct xfrm_state *x, int mtu)
> {
> - int res = mtu;
> -
> - res -= x->props.header_len;
> -
> - for (;;) {
> - int m = res;
> -
> - if (m < 68)
> - return 68;
> -
> - spin_lock_bh(&x->lock);
> - if (x->km.state == XFRM_STATE_VALID &&
> - x->type && x->type->get_max_size)
> - m = x->type->get_max_size(x, m);
> - else
> - m += x->props.header_len;
> - spin_unlock_bh(&x->lock);
> -
> - if (m <= mtu)
> - break;
> - res -= (m - mtu);
> - }
> + int res;
>
> + spin_lock_bh(&x->lock);
> + if (x->km.state == XFRM_STATE_VALID &&
> + x->type && x->type->get_mtu)
> + res = x->type->get_mtu(x, mtu);
> + else
> + res = mtu;
> + spin_unlock_bh(&x->lock);
> return res;
> }
That broke estimation for AH. This one should be fine.
[-- Attachment #2: x --]
[-- Type: text/plain, Size: 5729 bytes --]
[XFRM]: Improve MTU estimation
Replace the probing based MTU estimation, which usually takes 2-3
iterations to find a fitting value and may underestimate the MTU,
by an exact calculation.
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
commit 503d86d9420b2c21121a5c2cbda9e42cc559031f
tree a65939379cd3f200070a784fb079854270d26940
parent a205729e2cd8e51257cd0ea738524c64da99b9e0
author Patrick McHardy <kaber@trash.net> Fri, 04 Aug 2006 11:27:20 +0200
committer Patrick McHardy <kaber@trash.net> Fri, 04 Aug 2006 11:27:20 +0200
include/net/xfrm.h | 3 +--
net/ipv4/esp4.c | 28 +++++++++++++++-------------
net/ipv6/esp6.c | 25 +++++++++++--------------
net/xfrm/xfrm_state.c | 37 ++++++++++---------------------------
4 files changed, 37 insertions(+), 56 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9c5ee9f..ea1b028 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -262,8 +262,7 @@ struct xfrm_type
void (*destructor)(struct xfrm_state *);
int (*input)(struct xfrm_state *, struct sk_buff *skb);
int (*output)(struct xfrm_state *, struct sk_buff *pskb);
- /* Estimate maximal size of result of transformation of a dgram */
- u32 (*get_max_size)(struct xfrm_state *, int size);
+ u32 (*get_mtu)(struct xfrm_state *, int size);
};
extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index fc2f8ce..5393dc2 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -251,21 +251,19 @@ out:
return -EINVAL;
}
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
- if (x->props.mode) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
- mtu = ALIGN(mtu + 2, 4) + blksize - 4;
- }
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
+ if (esp->conf.padlen > align)
+ align = esp->conf.padlen;
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ mtu &= ~(align - 1);
+ mtu -= 2;
+
+ return mtu;
}
static void esp4_err(struct sk_buff *skb, u32 info)
@@ -307,6 +305,7 @@ static void esp_destroy(struct xfrm_stat
static int esp_init_state(struct xfrm_state *x)
{
struct esp_data *esp = NULL;
+ u32 align;
/* null auth and encryption can have zero length keys */
if (x->aalg) {
@@ -385,7 +384,10 @@ static int esp_init_state(struct xfrm_st
}
}
x->data = esp;
- x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+ align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ if (esp->conf.padlen)
+ align = ALIGN(align, esp->conf.padlen);
+ x->props.trailer_len = align - 1 + esp->auth.icv_trunc_len;
return 0;
error:
@@ -402,7 +404,7 @@ static struct xfrm_type esp_type =
.proto = IPPROTO_ESP,
.init_state = esp_init_state,
.destructor = esp_destroy,
- .get_max_size = esp4_get_max_size,
+ .get_mtu = esp4_get_mtu,
.input = esp_input,
.output = esp_output
};
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a278d5e..b8d0a05 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -222,22 +222,19 @@ out:
return ret;
}
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
-
- if (x->props.mode) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
- u32 padsize = ((blksize - 1) & 7) + 1;
- mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
- }
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
+ u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+
+ if (esp->conf.padlen > align)
+ align = esp->conf.padlen;
+
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ mtu &= ~(align - 1);
+ mtu -= 2;
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ return mtu;
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -363,7 +360,7 @@ static struct xfrm_type esp6_type =
.proto = IPPROTO_ESP,
.init_state = esp6_init_state,
.destructor = esp6_destroy,
- .get_max_size = esp6_get_max_size,
+ .get_mtu = esp6_get_mtu,
.input = esp6_input,
.output = esp6_output
};
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0021aad..7b0b100 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1129,37 +1129,20 @@ void xfrm_state_delete_tunnel(struct xfr
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-/*
- * This function is NOT optimal. For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal. However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
int xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
- int res = mtu;
-
- res -= x->props.header_len;
-
- for (;;) {
- int m = res;
-
- if (m < 68)
- return 68;
-
- spin_lock_bh(&x->lock);
- if (x->km.state == XFRM_STATE_VALID &&
- x->type && x->type->get_max_size)
- m = x->type->get_max_size(x, m);
- else
- m += x->props.header_len;
- spin_unlock_bh(&x->lock);
+ int res;
- if (m <= mtu)
- break;
- res -= (m - mtu);
- }
+ spin_lock_bh(&x->lock);
+ if (x->km.state == XFRM_STATE_VALID &&
+ x->type && x->type->get_mtu)
+ res = x->type->get_mtu(x, mtu);
+ else
+ res = mtu - x->props.header_len;
+ spin_unlock_bh(&x->lock);
+ if (res < 68)
+ res = 68;
return res;
}
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 9:27 ` Patrick McHardy
@ 2006-08-04 10:01 ` Herbert Xu
2006-08-04 10:09 ` Patrick McHardy
0 siblings, 1 reply; 13+ messages in thread
From: Herbert Xu @ 2006-08-04 10:01 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Kernel Netdev Mailing List
On Fri, Aug 04, 2006 at 11:27:11AM +0200, Patrick McHardy wrote:
>
> [XFRM]: Improve MTU estimation
>
> Replace the probing based MTU estimation, which usually takes 2-3
> iterations to find a fitting value and may underestimate the MTU,
> by an exact calculation.
Thanks Patrick, this is definitely the better way to do it.
> -static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
> +static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
> {
> struct esp_data *esp = x->data;
> - u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
> + u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
>
> - if (x->props.mode) {
> - mtu = ALIGN(mtu + 2, blksize);
> - } else {
> - /* The worst case. */
> - mtu = ALIGN(mtu + 2, 4) + blksize - 4;
> - }
> - if (esp->conf.padlen)
> - mtu = ALIGN(mtu, esp->conf.padlen);
> + if (esp->conf.padlen > align)
> + align = esp->conf.padlen;
>
> - return mtu + x->props.header_len + esp->auth.icv_trunc_len;
> + mtu -= x->props.header_len + esp->auth.icv_trunc_len;
> + mtu &= ~(align - 1);
> + mtu -= 2;
> +
> + return mtu;
I haven't actually done the math, but I don't think this can be right
from a quick look. The reason is that transport mode is fundamentally
different from tunnel mode in that the IP options are not encrypted and
therefore do not contribute to the encryption block padding.
So as the code doesn't distinguish between transport mode and tunnel
mode, you might be producing an overestimate for transport mode.
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 10:01 ` Herbert Xu
@ 2006-08-04 10:09 ` Patrick McHardy
2006-08-04 10:13 ` Herbert Xu
0 siblings, 1 reply; 13+ messages in thread
From: Patrick McHardy @ 2006-08-04 10:09 UTC (permalink / raw)
To: Herbert Xu; +Cc: Kernel Netdev Mailing List
Herbert Xu wrote:
> On Fri, Aug 04, 2006 at 11:27:11AM +0200, Patrick McHardy wrote:
>
>>-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
>>+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
>> {
>> struct esp_data *esp = x->data;
>>- u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
>>+ u32 align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
>>
>>- if (x->props.mode) {
>>- mtu = ALIGN(mtu + 2, blksize);
>>- } else {
>>- /* The worst case. */
>>- mtu = ALIGN(mtu + 2, 4) + blksize - 4;
>>- }
>>- if (esp->conf.padlen)
>>- mtu = ALIGN(mtu, esp->conf.padlen);
>>+ if (esp->conf.padlen > align)
>>+ align = esp->conf.padlen;
>>
>>- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
>>+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
>>+ mtu &= ~(align - 1);
>>+ mtu -= 2;
>>+
>>+ return mtu;
>
>
> I haven't actually done the math, but I don't think this can be right
> from a quick look. The reason is that transport mode is fundamentally
> different from tunnel mode in that the IP options are not encrypted and
> therefore do not contribute to the encryption block padding.
>
> So as the code doesn't distinguish between transport mode and tunnel
> mode, you might be producing an overestimate for transport mode.
I was wondering why the old code distinguished between transport mode
and tunnel mode, I couldn't spot anything that would be affected. I'll
look into the transport mode case again.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 10:09 ` Patrick McHardy
@ 2006-08-04 10:13 ` Herbert Xu
2006-08-04 11:11 ` Patrick McHardy
0 siblings, 1 reply; 13+ messages in thread
From: Herbert Xu @ 2006-08-04 10:13 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Kernel Netdev Mailing List
On Fri, Aug 04, 2006 at 12:09:18PM +0200, Patrick McHardy wrote:
>
> I was wondering why the old code distinguished between transport mode
> and tunnel mode, I couldn't spot anything that would be affected. I'll
> look into the transport mode case again.
The problem is basically you don't know a priori the size of the IP
options. So you assume the worst case where the IP options causes
the largest amount of padding for encryption (IP option length itself
must be a multiple of 4, so for a block size of 8 the worst is 4,
and the worst is 12 for a block size of 16).
Of course it gets hairier if you have ESP padding. I'm not even sure
if the current code gets that right.
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 10:13 ` Herbert Xu
@ 2006-08-04 11:11 ` Patrick McHardy
2006-08-04 11:16 ` Herbert Xu
0 siblings, 1 reply; 13+ messages in thread
From: Patrick McHardy @ 2006-08-04 11:11 UTC (permalink / raw)
To: Herbert Xu; +Cc: Kernel Netdev Mailing List
Herbert Xu wrote:
> On Fri, Aug 04, 2006 at 12:09:18PM +0200, Patrick McHardy wrote:
>
>>I was wondering why the old code distinguished between transport mode
>>and tunnel mode, I couldn't spot anything that would be affected. I'll
>>look into the transport mode case again.
>
>
> The problem is basically you don't know a priori the size of the IP
> options. So you assume the worst case where the IP options causes
> the largest amount of padding for encryption (IP option length itself
> must be a multiple of 4, so for a block size of 8 the worst is 4,
> and the worst is 12 for a block size of 16).
>
> Of course it gets hairier if you have ESP padding. I'm not even sure
> if the current code gets that right.
Unless I'm missing something, the padding caused by IP options
is always less than the worst case that can happen anyway
(max(block size, padlen)-1), so it can simply be ignored.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 11:11 ` Patrick McHardy
@ 2006-08-04 11:16 ` Herbert Xu
2006-08-04 11:21 ` Patrick McHardy
2006-08-04 11:25 ` Herbert Xu
0 siblings, 2 replies; 13+ messages in thread
From: Herbert Xu @ 2006-08-04 11:16 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Kernel Netdev Mailing List
On Fri, Aug 04, 2006 at 01:11:19PM +0200, Patrick McHardy wrote:
>
> > Of course it gets hairier if you have ESP padding. I'm not even sure
> > if the current code gets that right.
>
> Unless I'm missing something, the padding caused by IP options
> is always less than the worst case that can happen anyway
> (max(block size, padlen)-1), so it can simply be ignored.
Are you talking about the ESP padding case, or transport mode in
general?
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 11:16 ` Herbert Xu
@ 2006-08-04 11:21 ` Patrick McHardy
2006-08-04 11:25 ` Herbert Xu
1 sibling, 0 replies; 13+ messages in thread
From: Patrick McHardy @ 2006-08-04 11:21 UTC (permalink / raw)
To: Herbert Xu; +Cc: Kernel Netdev Mailing List
Herbert Xu wrote:
> On Fri, Aug 04, 2006 at 01:11:19PM +0200, Patrick McHardy wrote:
>
>>>Of course it gets hairier if you have ESP padding. I'm not even sure
>>>if the current code gets that right.
>>
>>Unless I'm missing something, the padding caused by IP options
>>is always less than the worst case that can happen anyway
>>(max(block size, padlen)-1), so it can simply be ignored.
>
>
> Are you talking about the ESP padding case, or transport mode in
> general?
Transport mode in general.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 11:16 ` Herbert Xu
2006-08-04 11:21 ` Patrick McHardy
@ 2006-08-04 11:25 ` Herbert Xu
2006-08-04 11:50 ` Patrick McHardy
1 sibling, 1 reply; 13+ messages in thread
From: Herbert Xu @ 2006-08-04 11:25 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Kernel Netdev Mailing List
On Fri, Aug 04, 2006 at 09:16:04PM +1000, herbert wrote:
>
> Are you talking about the ESP padding case, or transport mode in
> general?
I've reread your patches and your handling of ESP padding is spot on.
It's anyone's guess whether the current code gets it right or not :)
However, I believe that the transport mode handling does run into
problems with IP options. Basically, your calculation returns a
length that is a precise multiple of block size minus 2.
Now imagine that we have 4 bytes of IP options, given a block size
of 8 taking away 4 bytes from inside the encrypted area simply causes
them to be padded out so the encrypted length does not change. However,
we have to put those 4 bytes outside the encrypted area. The problem is
that we may not have those 4 bytes given the MTU.
For a standard 1500 MTU and the block size of 8 it just happens that
we do have 4 bytes (because 1500 % 8 == 4). However, this breaks down
if you start with say 1480 (standard MTU for 1500 with IPIP on the
outside).
You run into problems even with 1500 if your block size happens to be
16 (AES).
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 11:25 ` Herbert Xu
@ 2006-08-04 11:50 ` Patrick McHardy
2006-08-04 11:51 ` Patrick McHardy
2006-08-04 11:54 ` Herbert Xu
0 siblings, 2 replies; 13+ messages in thread
From: Patrick McHardy @ 2006-08-04 11:50 UTC (permalink / raw)
To: Herbert Xu; +Cc: Kernel Netdev Mailing List
Herbert Xu wrote:
> I've reread your patches and your handling of ESP padding is spot on.
> It's anyone's guess whether the current code gets it right or not :)
>
> However, I believe that the transport mode handling does run into
> problems with IP options. Basically, your calculation returns a
> length that is a precise multiple of block size minus 2.
>
> Now imagine that we have 4 bytes of IP options, given a block size
> of 8 taking away 4 bytes from inside the encrypted area simply causes
> them to be padded out so the encrypted length does not change. However,
> we have to put those 4 bytes outside the encrypted area. The problem is
> that we may not have those 4 bytes given the MTU.
>
> For a standard 1500 MTU and the block size of 8 it just happens that
> we do have 4 bytes (because 1500 % 8 == 4). However, this breaks down
> if you start with say 1480 (standard MTU for 1500 with IPIP on the
> outside).
>
> You run into problems even with 1500 if your block size happens to be
> 16 (AES).
Now I get it, thanks :) I missed that the IP header isn't part of the
length when it is aligned. So the worst-case increases by block-size
- 4 (- 8 for IPv6). How does this look?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 11:50 ` Patrick McHardy
@ 2006-08-04 11:51 ` Patrick McHardy
2006-08-04 11:55 ` Herbert Xu
2006-08-04 11:54 ` Herbert Xu
1 sibling, 1 reply; 13+ messages in thread
From: Patrick McHardy @ 2006-08-04 11:51 UTC (permalink / raw)
To: Herbert Xu; +Cc: Kernel Netdev Mailing List
[-- Attachment #1: Type: text/plain, Size: 233 bytes --]
Patrick McHardy wrote:
> Now I get it, thanks :) I missed that the IP header isn't part of the
> length when it is aligned. So the worst-case increases by block-size
> - 4 (- 8 for IPv6). How does this look?
Forgot the patch ..
[-- Attachment #2: x --]
[-- Type: text/plain, Size: 4965 bytes --]
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9c5ee9f..ea1b028 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -262,8 +262,7 @@ struct xfrm_type
void (*destructor)(struct xfrm_state *);
int (*input)(struct xfrm_state *, struct sk_buff *skb);
int (*output)(struct xfrm_state *, struct sk_buff *pskb);
- /* Estimate maximal size of result of transformation of a dgram */
- u32 (*get_max_size)(struct xfrm_state *, int size);
+ u32 (*get_mtu)(struct xfrm_state *, int size);
};
extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index fc2f8ce..1ce06cc 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -251,21 +251,23 @@ out:
return -EINVAL;
}
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ u32 align;
- if (x->props.mode) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
- mtu = ALIGN(mtu + 2, 4) + blksize - 4;
- }
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
+ align = blksize;
+ if (esp->conf.padlen > align)
+ align = esp->conf.padlen;
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ mtu &= ~(align - 1);
+ mtu -= 2;
+ if (!x->props.mode)
+ mtu -= blksize - 4;
+
+ return mtu;
}
static void esp4_err(struct sk_buff *skb, u32 info)
@@ -307,6 +309,7 @@ static void esp_destroy(struct xfrm_stat
static int esp_init_state(struct xfrm_state *x)
{
struct esp_data *esp = NULL;
+ u32 align;
/* null auth and encryption can have zero length keys */
if (x->aalg) {
@@ -385,7 +388,10 @@ static int esp_init_state(struct xfrm_st
}
}
x->data = esp;
- x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+ align = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+ if (esp->conf.padlen)
+ align = ALIGN(align, esp->conf.padlen);
+ x->props.trailer_len = align - 1 + esp->auth.icv_trunc_len;
return 0;
error:
@@ -402,7 +408,7 @@ static struct xfrm_type esp_type =
.proto = IPPROTO_ESP,
.init_state = esp_init_state,
.destructor = esp_destroy,
- .get_max_size = esp4_get_max_size,
+ .get_mtu = esp4_get_mtu,
.input = esp_input,
.output = esp_output
};
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a278d5e..0c9c159 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -222,22 +222,23 @@ out:
return ret;
}
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
-
- if (x->props.mode) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
- u32 padsize = ((blksize - 1) & 7) + 1;
- mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
- }
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
-
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ u32 align;
+
+ align = blksize;
+ if (esp->conf.padlen > align)
+ align = esp->conf.padlen;
+
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ mtu &= ~(align - 1);
+ mtu -= 2;
+ if (!x->props.mode && blksize >= 8)
+ mtu -= blksize - 8;
+
+ return mtu;
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -363,7 +364,7 @@ static struct xfrm_type esp6_type =
.proto = IPPROTO_ESP,
.init_state = esp6_init_state,
.destructor = esp6_destroy,
- .get_max_size = esp6_get_max_size,
+ .get_mtu = esp6_get_mtu,
.input = esp6_input,
.output = esp6_output
};
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0021aad..7b0b100 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1129,37 +1129,20 @@ void xfrm_state_delete_tunnel(struct xfr
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-/*
- * This function is NOT optimal. For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal. However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
int xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
- int res = mtu;
-
- res -= x->props.header_len;
-
- for (;;) {
- int m = res;
-
- if (m < 68)
- return 68;
-
- spin_lock_bh(&x->lock);
- if (x->km.state == XFRM_STATE_VALID &&
- x->type && x->type->get_max_size)
- m = x->type->get_max_size(x, m);
- else
- m += x->props.header_len;
- spin_unlock_bh(&x->lock);
+ int res;
- if (m <= mtu)
- break;
- res -= (m - mtu);
- }
+ spin_lock_bh(&x->lock);
+ if (x->km.state == XFRM_STATE_VALID &&
+ x->type && x->type->get_mtu)
+ res = x->type->get_mtu(x, mtu);
+ else
+ res = mtu - x->props.header_len;
+ spin_unlock_bh(&x->lock);
+ if (res < 68)
+ res = 68;
return res;
}
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 11:50 ` Patrick McHardy
2006-08-04 11:51 ` Patrick McHardy
@ 2006-08-04 11:54 ` Herbert Xu
1 sibling, 0 replies; 13+ messages in thread
From: Herbert Xu @ 2006-08-04 11:54 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Kernel Netdev Mailing List
On Fri, Aug 04, 2006 at 01:50:15PM +0200, Patrick McHardy wrote:
>
> Now I get it, thanks :) I missed that the IP header isn't part of the
> length when it is aligned. So the worst-case increases by block-size
> - 4 (- 8 for IPv6). How does this look?
That should work.
If you want to be fancy you can also take any bits you shaved off outside
the encrypted block into account. For example, in the 1500 case we have
to shave off 4 bytes to make it a multiple of 8, therefore there is no
extra overhead for transport mode at all.
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [XFRM]: Improve MTU estimation
2006-08-04 11:51 ` Patrick McHardy
@ 2006-08-04 11:55 ` Herbert Xu
0 siblings, 0 replies; 13+ messages in thread
From: Herbert Xu @ 2006-08-04 11:55 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Kernel Netdev Mailing List
On Fri, Aug 04, 2006 at 01:51:24PM +0200, Patrick McHardy wrote:
>
> + if (!x->props.mode)
> + mtu -= blksize - 4;
You probably want to check for overflows here since mtu is unsigned.
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2006-08-04 11:55 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-04 8:50 [XFRM]: Improve MTU estimation Patrick McHardy
2006-08-04 9:27 ` Patrick McHardy
2006-08-04 10:01 ` Herbert Xu
2006-08-04 10:09 ` Patrick McHardy
2006-08-04 10:13 ` Herbert Xu
2006-08-04 11:11 ` Patrick McHardy
2006-08-04 11:16 ` Herbert Xu
2006-08-04 11:21 ` Patrick McHardy
2006-08-04 11:25 ` Herbert Xu
2006-08-04 11:50 ` Patrick McHardy
2006-08-04 11:51 ` Patrick McHardy
2006-08-04 11:55 ` Herbert Xu
2006-08-04 11:54 ` Herbert Xu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).