All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Emelyanov <xemul@parallels.com>
To: Linux Netdev List <netdev@vger.kernel.org>,
	Tejun Heo <tj@kernel.org>, Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Subject: [PATCH 2/2] tcp: Initial repair mode
Date: Wed, 29 Feb 2012 19:14:22 +0400	[thread overview]
Message-ID: <4F4E40CE.9080205@parallels.com> (raw)
In-Reply-To: <4F4E4084.9080804@parallels.com>

This includes (according the the previous description):

* TCP_REPAIR sockoption
* Sequences sockoptions
* Ability to forcibly bind a socket to a port
* Immediate connect modification
* Silent close modification

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>

---
 include/linux/tcp.h             |    6 ++++-
 net/ipv4/inet_connection_sock.c |    3 ++
 net/ipv4/tcp.c                  |   43 ++++++++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_ipv4.c             |   19 ++++++++++++++--
 net/ipv4/tcp_output.c           |    1 -
 5 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 115389e..0b2e01c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -106,6 +106,9 @@ enum {
 #define TCP_THIN_LINEAR_TIMEOUTS 16      /* Use linear timeouts for thin streams*/
 #define TCP_THIN_DUPACK         17      /* Fast retrans. after 1 dupack */
 #define TCP_USER_TIMEOUT	18	/* How long for loss retry before timeout */
+#define TCP_REPAIR		19	/* TCP sock is under repair right now */
+#define TCP_WRITE_SEQ		20
+#define TCP_RCV_NXT		21
 
 /* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
@@ -353,7 +356,8 @@ struct tcp_sock {
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		thin_dupack : 1,/* Fast retransmit on first dupack      */
-		unused      : 2;
+		repair      : 1,
+		unused      : 1;
 
 /* RTT measurement */
 	u32	srtt;		/* smoothed round trip time << 3	*/
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19d66ce..92788af 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -172,6 +172,9 @@ have_snum:
 	goto tb_not_found;
 tb_found:
 	if (!hlist_empty(&tb->owners)) {
+		if (sk->sk_reuse == 2)
+			goto success;
+
 		if (tb->fastreuse > 0 &&
 		    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
 		    smallest_size == -1) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 22ef5f9..768306d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1932,7 +1932,9 @@ void tcp_close(struct sock *sk, long timeout)
 	 * advertise a zero window, then kill -9 the FTP client, wheee...
 	 * Note: timeout is always zero in such a case.
 	 */
-	if (data_was_unread) {
+	if (tcp_sk(sk)->repair) {
+		sk->sk_prot->disconnect(sk, 0);
+	} else if (data_was_unread) {
 		/* Unread data was tossed, zap the connection. */
 		NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
 		tcp_set_state(sk, TCP_CLOSE);
@@ -2071,6 +2073,8 @@ int tcp_disconnect(struct sock *sk, int flags)
 	/* ABORT function of RFC793 */
 	if (old_state == TCP_LISTEN) {
 		inet_csk_listen_stop(sk);
+	} else if (unlikely(tp->repair)) {
+		sk->sk_err = ECONNABORTED;
 	} else if (tcp_need_reset(old_state) ||
 		   (tp->snd_nxt != tp->write_seq &&
 		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2294,6 +2298,33 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->thin_dupack = val;
 		break;
 
+	case TCP_REPAIR:
+		if (!capable(CAP_SYS_ADMIN))
+			err = -EPERM;
+		else if (val < 0 || val > 1)
+			err = -EINVAL;
+		else {
+			tp->repair = val;
+			sk->sk_reuse = (val << 1);
+			if (val == 0)
+				tcp_send_window_probe(sk);
+		}
+		break;
+
+	case TCP_WRITE_SEQ:
+		if (!tp->repair)
+			err = -EPERM;
+		else
+			tp->write_seq = val;
+		break;
+
+	case TCP_RCV_NXT:
+		if (!tp->repair)
+			err = -EPERM;
+		else
+			tp->copied_seq = tp->rcv_nxt = val;
+		break;
+
 	case TCP_CORK:
 		/* When set indicates to always queue non-full frames.
 		 * Later the user clears this option and we transmit
@@ -2629,6 +2658,18 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		val = tp->thin_dupack;
 		break;
 
+	case TCP_REPAIR:
+		val = tp->repair;
+		break;
+
+	case TCP_WRITE_SEQ:
+		val = tp->write_seq;
+		break;
+
+	case TCP_RCV_NXT:
+		val = tp->rcv_nxt;
+		break;
+
 	case TCP_USER_TIMEOUT:
 		val = jiffies_to_msecs(icsk->icsk_user_timeout);
 		break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94abee8..6118486 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -137,6 +137,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
+static int tcp_repair_connect(struct sock *sk)
+{
+	tcp_connect_init(sk);
+	tcp_finish_connect(sk, NULL);
+
+	return 0;
+}
+
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -195,7 +203,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		/* Reset inherited state */
 		tp->rx_opt.ts_recent	   = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
-		tp->write_seq		   = 0;
+		if (!tp->repair)
+			tp->write_seq	   = 0;
 	}
 
 	if (tcp_death_row.sysctl_tw_recycle &&
@@ -246,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk->sk_gso_type = SKB_GSO_TCPV4;
 	sk_setup_caps(sk, &rt->dst);
 
-	if (!tp->write_seq)
+	if (!tp->write_seq && !tp->repair)
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 							   inet->inet_daddr,
 							   inet->inet_sport,
@@ -254,7 +263,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	inet->inet_id = tp->write_seq ^ jiffies;
 
-	err = tcp_connect(sk);
+	if (likely(!tp->repair))
+		err = tcp_connect(sk);
+	else
+		err = tcp_repair_connect(sk);
+
 	rt = NULL;
 	if (err)
 		goto failure;
-- 
1.5.5.6

  parent reply	other threads:[~2012-02-29 15:14 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-29 15:13 [RFC][PATCH 0/2] TCP connection repair Pavel Emelyanov
2012-02-29 15:13 ` [PATCH 1/2] tcp: Move code around Pavel Emelyanov
2012-02-29 15:14 ` Pavel Emelyanov [this message]
2012-03-05  2:42   ` [PATCH 2/2] tcp: Initial repair mode David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F4E40CE.9080205@parallels.com \
    --to=xemul@parallels.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.