netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Fan Du <fan.du@intel.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, fengyuleidian0615@gmail.com
Subject: [PATCH net-next 3/3] ipv4: Create probe timer for tcp PMTU as per RFC4821
Date: Fri, 13 Feb 2015 16:16:45 +0800	[thread overview]
Message-ID: <1423815405-32644-4-git-send-email-fan.du@intel.com> (raw)
In-Reply-To: <1423815405-32644-1-git-send-email-fan.du@intel.com>

As per RFC4821 7.3.  Selecting Probe Size, a probe timer should
be armed once probing has converged. Once this timer expired,
probing again to take advantage of any path PMTU change. The
recommended probing interval is 10 minutes per RFC1981.

Signed-off-by: Fan Du <fan.du@intel.com>
---
 include/net/inet_connection_sock.h |    2 ++
 include/net/netns/ipv4.h           |    1 +
 include/net/tcp.h                  |    3 +++
 net/ipv4/sysctl_net_ipv4.c         |    7 +++++++
 net/ipv4/tcp.c                     |    2 ++
 net/ipv4/tcp_ipv4.c                |    1 +
 net/ipv4/tcp_output.c              |   23 ++++++++++++++++++++++-
 7 files changed, 38 insertions(+), 1 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 3d0932e..e78e5ab 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -126,6 +126,8 @@ struct inet_connection_sock {
 
 		int		  search_high_sav;
 		int		  search_low_sav;
+
+		struct timer_list probe_timer;
 
 		/* Information on the current probe. */
 		int		  probe_size;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index dbe2254..bb2c2d1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -84,6 +84,7 @@ struct netns_ipv4 {
 	int sysctl_tcp_fwmark_accept;
 	int sysctl_tcp_mtu_probing;
 	int sysctl_tcp_base_mss;
+	u32 sysctl_tcp_probe_interval;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7b57e5b..16fa2e6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -67,6 +67,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		1024
 
+/* probing interval, default to 10 minutes as per RFC4821 */
+#define TCP_PROBE_INTERVAL	600
+
 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
 #define TCP_FASTRETRANS_THRESH 3
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d151539..4fa5d31 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -883,6 +883,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_probe_interval",
+		.data		= &init_net.ipv4.sysctl_tcp_probe_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9d72a0f..46413ee 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1986,6 +1986,7 @@ void tcp_close(struct sock *sk, long timeout)
 	struct sk_buff *skb;
 	int data_was_unread = 0;
 	int state;
+	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	lock_sock(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
@@ -2149,6 +2150,7 @@ adjudge_to_death:
 	/* Otherwise, socket is reprieved until protocol close. */
 
 out:
+	del_timer(&icsk->icsk_mtup.probe_timer);
 	bh_unlock_sock(sk);
 	local_bh_enable();
 	sock_put(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a2dfed..3cc71b3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2460,6 +2460,7 @@ static int __net_init tcp_sk_init(struct net *net)
 	}
 	net->ipv4.sysctl_tcp_ecn = 2;
 	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
 	return 0;
 
 fail:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0a60deb..461b4a4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1342,6 +1342,18 @@ int tcp_mss_to_mtu(struct sock *sk, int mss)
 	return mtu;
 }
 
+static void icsk_mtup_probe_timer(unsigned long arg)
+{
+	struct sock *sk = (struct sock *)arg;
+	struct net *net = sock_net(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	/* Restore orignal search range */
+	icsk->icsk_mtup.search_high = icsk->icsk_mtup.search_high_sav;
+	icsk->icsk_mtup.search_low = icsk->icsk_mtup.search_low_sav;
+	icsk->icsk_mtup.probe_size = 0;
+}
+
 /* MTU probing init per socket */
 void tcp_mtup_init(struct sock *sk)
 {
@@ -1357,6 +1369,9 @@ void tcp_mtup_init(struct sock *sk)
 	icsk->icsk_mtup.search_high_sav = icsk->icsk_mtup.search_high;
 	icsk->icsk_mtup.search_low_sav = icsk->icsk_mtup.search_low;
 	icsk->icsk_mtup.probe_size = 0;
+
+	setup_timer(&icsk->icsk_mtup.probe_timer, icsk_mtup_probe_timer,
+		    (unsigned long)sk);
 }
 EXPORT_SYMBOL(tcp_mtup_init);
 
@@ -1840,6 +1855,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sk_buff *skb, *nskb, *next;
+	struct net *net = sock_net(sk);
 	int len;
 	int probe_size;
 	int size_needed;
@@ -1865,7 +1881,12 @@ static int tcp_mtu_probe(struct sock *sk)
 	probe_size = (icsk->icsk_mtup.search_high + icsk->icsk_mtup.search_low) >> 1;
 	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
 	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
-		/* TODO: set timer for probe_converge_event */
+		u32 probe_interval = net->ipv4.sysctl_tcp_probe_interval;
+
+		/* Search has been converged, start the timer,
+		 * take advantage of path changing */
+		mod_timer(&icsk->icsk_mtup.probe_timer,
+			  jiffies + msecs_to_jiffies(1000*probe_interval));
 		return -1;
 	}
 
-- 
1.7.1

  parent reply	other threads:[~2015-02-13  8:21 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-13  8:16 [PATCH net-next 0/3] Small fix for TCP PMTU Fan Du
2015-02-13  8:16 ` [PATCH net-next 1/3] ipv4: Raise tcp PMTU probe mss base size Fan Du
2015-02-13  9:49   ` yzhu1
2015-02-16  5:15     ` Fan Du
2015-02-13  8:16 ` [PATCH net-next 2/3] ipv4: Use binary search to choose tcp PMTU probe_size Fan Du
2015-02-13 17:52   ` John Heffner
2015-02-16  5:27     ` Fan Du
2015-02-16 23:59       ` John Heffner
2015-02-13  8:16 ` Fan Du [this message]
2015-02-13  9:59   ` [PATCH net-next 3/3] ipv4: Create probe timer for tcp PMTU as per RFC4821 Ying Xue
2015-02-16  5:28     ` Fan Du
2015-02-13 12:31   ` Eric Dumazet
2015-02-16  5:38     ` Fan Du
2015-02-16 12:19       ` Eric Dumazet
2015-02-26  3:49 ` [PATCHv2 net-next 0/4] Small fix for TCP PMTU Fan Du
2015-02-26  3:49   ` [PATCHv2 net-next 1/4] ipv4: Raise tcp PMTU probe mss base size Fan Du
2015-02-26  3:49   ` [PATCHv2 net-next 2/4] ipv4: Use binary search to choose tcp PMTU probe_size Fan Du
2015-02-27 22:17     ` David Miller
2015-02-26  3:49   ` [PATCHv2 net-next 3/4] ipv4: shrink current mss for tcp PMTU blackhole detection Fan Du
2015-02-26  3:49   ` [PATCHv2 net-next 4/4] ipv4: Create probe timer for tcp PMTU as per RFC4821 Fan Du
2015-02-26  4:19     ` Eric Dumazet
2015-02-26  6:24       ` Fan Du
2015-02-26 13:40   ` [PATCHv2 net-next 0/4] Small fix for TCP PMTU David Laight
2015-02-27  5:37     ` Fan Du

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1423815405-32644-4-git-send-email-fan.du@intel.com \
    --to=fan.du@intel.com \
    --cc=davem@davemloft.net \
    --cc=fengyuleidian0615@gmail.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).