netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH,RFC] explicit connection confirmation
@ 2002-11-07  9:32 Lennert Buytenhek
  2002-11-07 11:27 ` bert hubert
  2003-08-14 13:11 ` Lennert Buytenhek
  0 siblings, 2 replies; 14+ messages in thread
From: Lennert Buytenhek @ 2002-11-07  9:32 UTC (permalink / raw)
  To: netdev

(please CC on replies, I am not on this list)

Hi,

This patch gives userland the ability to decide whether to react
with an incoming TCP SYN with a SYN-ACK or a RST.  It was hacked
up after Linux Kongress 2001 and has been sitting on my patch
pile since april this year or something.

The basic idea is this:
- Put the listening TCP socket in TCP_CONFIRM_CONNECT mode.
- Sockets returned from accept() on this socket after this will be
  sockets in the SYN_RECV state instead of the ESTABLISHED state
  (unless syncookies had to be used).  By writing to the socket,
  you cause a SYN-ACK to be sent, and by immediately closing the
  socket you cause a RST to be sent.

There are two issues left, AFAICS:
- SYN_RECV sockets currently don't time out for some reason
- it deadlocks instantly on SMP

It's against 2.4.18.  Could someone have a look at it please?  I
unfortunately haven't had any time at all lately, so I would be
really happy if someone else could take this over.  (Well, I can
dream, can't I?)


cheers,
Lennert



--- linux-2.4.18-11umpr/include/linux/tcp.h.orig	Thu Nov 22 20:47:11 2001
+++ linux-2.4.18-11umpr/include/linux/tcp.h	Thu Apr 18 19:33:19 2002
@@ -127,6 +127,7 @@
 #define TCP_WINDOW_CLAMP	10	/* Bound advertised window */
 #define TCP_INFO		11	/* Information about this connection. */
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
+#define TCP_CONFIRM_CONNECT	13	/* Let user control connection acceptance */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
--- linux-2.4.18-11umpr/include/net/sock.h.orig	Fri Dec 21 18:42:04 2001
+++ linux-2.4.18-11umpr/include/net/sock.h	Thu Apr 18 19:37:52 2002
@@ -302,6 +302,7 @@
 	__u8	reordering;	/* Packet reordering metric.		*/
 	__u8	queue_shrunk;	/* Write queue has been shrunk recently.*/
 	__u8	defer_accept;	/* User waits for some data after accept() */
+	__u8	confirm_connect;/* User wants control over conn. acceptance */
 
 /* RTT measurement */
 	__u8	backoff;	/* backoff				*/
@@ -411,6 +412,11 @@
 	struct open_request	*accept_queue;
 	struct open_request	*accept_queue_tail;
 
+	/* Our corresponding open_request if this socket is unconfirmed
+	 * (i.e. if we haven't sent SYN-ACK or RST yet)
+	 */
+	struct open_request	*unconfirmed_openreq;
+
 	int			write_pending;	/* A write to socket waits to start. */
 
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
--- linux-2.4.18-11umpr/include/net/tcp.h.orig	Thu Nov 22 20:47:22 2001
+++ linux-2.4.18-11umpr/include/net/tcp.h	Fri Apr 19 10:42:51 2002
@@ -505,7 +505,8 @@
 		sack_ok : 1,
 		wscale_ok : 1,
 		ecn_ok : 1,
-		acked : 1;
+		acked : 1,
+		unconfirmed : 1;
 	/* The following two fields can be easily recomputed I think -AK */
 	__u32			window_clamp;	/* window clamp at creation time */
 	__u32			rcv_wnd;	/* rcv_wnd offered first time */
@@ -533,6 +534,17 @@
 	tcp_openreq_fastfree(req);
 }
 
+static inline int tcp_is_unconfirmed(struct tcp_opt *tp)
+{
+	struct open_request *req;
+
+	req = tp->unconfirmed_openreq;
+	if (req != NULL && req->unconfirmed)
+		return 1;
+
+	return 0;
+}
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
 #else
@@ -1661,6 +1673,7 @@
 	req->acked = 0;
 	req->ecn_ok = 0;
 	req->rmt_port = skb->h.th->source;
+	req->unconfirmed = 0;
 }
 
 #define TCP_MEM_QUANTUM	((int)PAGE_SIZE)
--- linux-2.4.18-11umpr/net/ipv4/tcp.c.orig	Fri Dec 21 18:42:05 2001
+++ linux-2.4.18-11umpr/net/ipv4/tcp.c	Fri Apr 19 20:50:29 2002
@@ -204,6 +204,7 @@
  *		Andi Kleen 	:	Make poll agree with SIGIO
  *	Salvatore Sanfilippo	:	Support SO_LINGER with linger == 1 and
  *					lingertime == 0 (RFC 793 ABORT Call)
+ *	Lennert Buytenhek	:	Explicit connection confirmation
  *					
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -366,6 +367,15 @@
 	return sk->tp_pinfo.af_tcp.accept_queue ? (POLLIN | POLLRDNORM) : 0;
 }
 
+static void tcp_confirm(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct open_request *req = tp->unconfirmed_openreq;
+
+	req->unconfirmed = 0;
+	req->class->rtx_syn_ack(sk, req, NULL);
+}
+
 /*
  *	Wait for a TCP event.
  *
@@ -650,6 +660,9 @@
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
 
+	if (tcp_is_unconfirmed(tp))
+		tcp_confirm(sk);
+
 	while((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
 		if(sk->err)
 			return sock_error(sk);
@@ -1814,7 +1827,7 @@
 void tcp_close(struct sock *sk, long timeout)
 {
 	struct sk_buff *skb;
-	int data_was_unread = 0;
+	int should_send_rst = 0;
 
 	lock_sock(sk);
 	sk->shutdown = SHUTDOWN_MASK;
@@ -1834,12 +1847,19 @@
 	 */
 	while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) {
 		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - skb->h.th->fin;
-		data_was_unread += len;
+		should_send_rst += len;
 		__kfree_skb(skb);
 	}
 
 	tcp_mem_reclaim(sk);
 
+	if (sk->tp_pinfo.af_tcp.unconfirmed_openreq != NULL) {
+		if (tcp_is_unconfirmed(&(sk->tp_pinfo.af_tcp)))
+			should_send_rst = 1;
+		tcp_openreq_free(sk->tp_pinfo.af_tcp.unconfirmed_openreq);
+		sk->tp_pinfo.af_tcp.unconfirmed_openreq = NULL;
+	}
+
 	/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
 	 * 3.10, we send a RST here because data was lost.  To
 	 * witness the awful effects of the old behavior of always
@@ -1849,7 +1869,7 @@
 	 * the FTP client, wheee...  Note: timeout is always zero
 	 * in such a case.
 	 */
-	if(data_was_unread != 0) {
+	if(should_send_rst) {
 		/* Unread data was tossed, zap the connection. */
 		NET_INC_STATS_USER(TCPAbortOnClose);
 		tcp_set_state(sk, TCP_CLOSE);
@@ -2026,6 +2046,11 @@
 #endif
 	}
 
+	if (tp->unconfirmed_openreq) {
+		tcp_openreq_free(tp->unconfirmed_openreq);
+		tp->unconfirmed_openreq = NULL;
+	}
+
 	sk->shutdown = 0;
 	sk->done = 0;
 	tp->srtt = 0;
@@ -2139,8 +2164,10 @@
 
  	newsk = req->sk;
 	tcp_acceptq_removed(sk);
-	tcp_openreq_fastfree(req);
-	BUG_TRAP(newsk->state != TCP_SYN_RECV);
+	if (newsk->tp_pinfo.af_tcp.unconfirmed_openreq == NULL)
+		tcp_openreq_fastfree(req);
+	BUG_TRAP(newsk->tp_pinfo.af_tcp.unconfirmed_openreq ||
+		 newsk->state != TCP_SYN_RECV);
 	release_sock(sk);
 	return newsk;
 
@@ -2305,6 +2332,10 @@
 		}
 		break;
 
+	case TCP_CONFIRM_CONNECT:
+		tp->confirm_connect = !!val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2429,6 +2460,9 @@
 	case TCP_QUICKACK:
 		val = !tp->ack.pingpong;
 		break;
+	case TCP_CONFIRM_CONNECT:
+		val = tp->confirm_connect || tcp_is_unconfirmed(tp);
+		break;
 	default:
 		return -ENOPROTOOPT;
 	};
--- linux-2.4.18-11umpr/net/ipv4/tcp_input.c.orig	Mon Feb 25 20:38:14 2002
+++ linux-2.4.18-11umpr/net/ipv4/tcp_input.c	Fri Apr 19 10:52:27 2002
@@ -3749,6 +3749,11 @@
 		switch(sk->state) {
 		case TCP_SYN_RECV:
 			if (acceptable) {
+				if (tp->unconfirmed_openreq != NULL) {
+					tcp_openreq_free(tp->unconfirmed_openreq);
+					tp->unconfirmed_openreq = NULL;
+				}
+
 				tp->copied_seq = tp->rcv_nxt;
 				mb();
 				tcp_set_state(sk, TCP_ESTABLISHED);
--- linux-2.4.18-11umpr/net/ipv4/tcp_minisocks.c.orig	Mon Oct  1 18:19:57 2001
+++ linux-2.4.18-11umpr/net/ipv4/tcp_minisocks.c	Fri Apr 19 10:24:22 2002
@@ -696,6 +696,7 @@
 		tcp_init_wl(newtp, req->snt_isn, req->rcv_isn);
 
 		newtp->retransmits = 0;
+		newtp->confirm_connect = 0;
 		newtp->backoff = 0;
 		newtp->srtt = 0;
 		newtp->mdev = TCP_TIMEOUT_INIT;
@@ -839,7 +840,8 @@
 		 * Enforce "SYN-ACK" according to figure 8, figure 6
 		 * of RFC793, fixed by RFC1122.
 		 */
-		req->class->rtx_syn_ack(sk, req, NULL);
+		if (!req->unconfirmed)
+			req->class->rtx_syn_ack(sk, req, NULL);
 		return NULL;
 	}
 
@@ -864,7 +866,7 @@
 	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
 					  req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) {
 		/* Out of window: send ACK and drop. */
-		if (!(flg & TCP_FLAG_RST))
+		if (!req->unconfirmed && !(flg & TCP_FLAG_RST))
 			req->class->send_ack(skb, req);
 		if (paws_reject)
 			NET_INC_STATS_BH(PAWSEstabRejected);
@@ -907,6 +909,12 @@
 		return NULL;
 	}
 
+	/* @@@ If we are in SYN_RECV and haven't confirmed/rejected
+	 * the connection yet, this ACK is acking a never-sent packet.
+	 */
+	if (tcp_is_unconfirmed(tp))
+		return NULL;
+
 	/* OK, ACK is valid, create big socket and
 	 * feed this segment to it. It will repeat all
 	 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
--- linux-2.4.18-11umpr/net/ipv4/tcp_ipv4.c.orig	Mon Feb 25 20:38:14 2002
+++ linux-2.4.18-11umpr/net/ipv4/tcp_ipv4.c	Fri Apr 19 18:56:45 2002
@@ -1270,12 +1270,14 @@
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_opt *master_tp = &(sk->tp_pinfo.af_tcp);
 	struct tcp_opt tp;
 	struct open_request *req;
 	__u32 saddr = skb->nh.iph->saddr;
 	__u32 daddr = skb->nh.iph->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
+	int dont_confirm = 0;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1312,6 +1314,9 @@
 	if (req == NULL)
 		goto drop;
 
+	if (!want_cookie && master_tp->confirm_connect)
+		dont_confirm = 1;
+
 	tcp_clear_options(&tp);
 	tp.mss_clamp = 536;
 	tp.user_mss = sk->tp_pinfo.af_tcp.user_mss;
@@ -1396,11 +1401,31 @@
 	}
 	req->snt_isn = isn;
 
-	if (tcp_v4_send_synack(sk, req, dst))
+	if (!dont_confirm && tcp_v4_send_synack(sk, req, dst))
 		goto drop_and_free;
 
 	if (want_cookie) {
 	   	tcp_openreq_free(req); 
+	} else if (dont_confirm) {
+		struct sock *child;
+		__u8 rcv_wscale;
+
+		req->window_clamp = dst?dst->window:0;
+		tcp_select_initial_window(tcp_full_space(sk), req->mss,
+				&req->rcv_wnd, &req->window_clamp,
+				0, &rcv_wscale);
+		req->rcv_wscale = rcv_wscale;
+
+		child = tcp_v4_syn_recv_sock(sk, skb, req, NULL);
+		if (child != NULL) {
+			req->unconfirmed = 1;
+			child->tp_pinfo.af_tcp.unconfirmed_openreq = req;
+			tcp_acceptq_queue(sk, req, child);
+			sk->data_ready(sk, 0);
+			sock_put(child);
+		} else {
+			tcp_openreq_free(req);
+		}
 	} else {
 		tcp_v4_synq_add(sk, req);
 	}
--- linux-2.4.18-11umpr/net/ipv4/tcp_timer.c.orig	Mon Oct  1 18:19:57 2001
+++ linux-2.4.18-11umpr/net/ipv4/tcp_timer.c	Thu Apr 18 19:49:06 2002
@@ -512,7 +512,8 @@
 			if ((long)(now - req->expires) >= 0) {
 				if ((req->retrans < thresh ||
 				     (req->acked && req->retrans < max_retries))
-				    && !req->class->rtx_syn_ack(sk, req, NULL)) {
+				    && (req->unconfirmed ||
+					!req->class->rtx_syn_ack(sk, req, NULL))) {
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)
--- linux-2.4.18-11umpr/net/ipv4/af_inet.c.orig	Fri Dec 21 18:42:05 2001
+++ linux-2.4.18-11umpr/net/ipv4/af_inet.c	Wed Apr 17 20:45:06 2002
@@ -693,7 +693,7 @@
 
 	lock_sock(sk2);
 
-	BUG_TRAP((1<<sk2->state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_CLOSE));
+	BUG_TRAP((1<<sk2->state)&(TCPF_SYN_RECV|TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_CLOSE));
 
 	sock_graft(sk2, newsock);
 

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2003-08-25 11:09 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-11-07  9:32 [PATCH,RFC] explicit connection confirmation Lennert Buytenhek
2002-11-07 11:27 ` bert hubert
2002-11-07 12:09   ` Lennert Buytenhek
2002-11-07 13:36     ` jamal
2002-11-07 15:27       ` Lennert Buytenhek
2002-11-08 11:22         ` jamal
2002-11-08 11:52           ` bert hubert
2002-11-08 11:56             ` Marc Boucher
2002-11-08 18:28           ` Lennert Buytenhek
2002-11-07 13:49     ` bert hubert
2002-11-07 14:30       ` Lennert Buytenhek
2002-11-07 16:24         ` bert hubert
2003-08-14 13:11 ` Lennert Buytenhek
2003-08-25 11:09   ` Harald Welte

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).