netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2 net-next 0/3] TCP experimental option for SMC rendezvous
@ 2017-10-18 14:29 Ursula Braun
  2017-10-18 14:29 ` [PATCH V2 net-next 1/3] smc: fix mutex unlocks during link group creation Ursula Braun
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Ursula Braun @ 2017-10-18 14:29 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-s390, jwi, schwidefsky, heiko.carstens, raspl,
	ubraun

Dave,

SMC-capability is to be negotiated with a TCP experimental option.
As requested during code review of our previous approach using
netfilter hooks, here's a new version. It touches tcp-code in the
first patch and exploits the new tcp flag in the smc-code.

Kind regards, Ursula

Changelog:

V2:
* switch to current jump labels API
* remove static key checking in smc_set_capability()
  (comment from Eric Dumazet)
* use inet_request_sock parameter for smc_set_option_cond()
* smc_listen_work(): replace local variable lgr_lock_taken by new labels
                     and separate this change into a prerequisite first
                     patch

Ursula Braun (3):
  smc: fix mutex unlocks during link group creation 
  tcp: TCP experimental option for SMC
  smc: add SMC rendezvous protocol

 include/linux/tcp.h      |  9 +++++--
 include/net/inet_sock.h  |  3 ++-
 include/net/tcp.h        |  8 ++++++
 net/ipv4/tcp.c           |  6 +++++
 net/ipv4/tcp_input.c     | 34 +++++++++++++++++++++++++
 net/ipv4/tcp_minisocks.c | 19 ++++++++++++++
 net/ipv4/tcp_output.c    | 66 +++++++++++++++++++++++++++++++++++++++++++++---
 net/smc/af_smc.c         | 42 ++++++++++++++++++++++--------
 8 files changed, 170 insertions(+), 17 deletions(-)

-- 
2.13.5

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH V2 net-next 1/3] smc: fix mutex unlocks during link group creation
  2017-10-18 14:29 [PATCH V2 net-next 0/3] TCP experimental option for SMC rendezvous Ursula Braun
@ 2017-10-18 14:29 ` Ursula Braun
  2017-10-18 14:29 ` [PATCH V2 net-next 2/3] tcp: TCP experimental option for SMC Ursula Braun
  2017-10-18 14:30 ` [PATCH V2 net-next 3/3] smc: add SMC rendezvous protocol Ursula Braun
  2 siblings, 0 replies; 5+ messages in thread
From: Ursula Braun @ 2017-10-18 14:29 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-s390, jwi, schwidefsky, heiko.carstens, raspl,
	ubraun

Link group creation is synchronized with the smc_create_lgr_pending
lock. In smc_listen_work() this mutex is sometimes unlocked, even
though it has not been locked before. This issue will surface in
presence of the SMC rendezvous code.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
 net/smc/af_smc.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 745f145d4c4d..70f7640f5090 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -808,7 +808,7 @@ static void smc_listen_work(struct work_struct *work)
 		rc = local_contact;
 		if (rc == -ENOMEM)
 			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
-		goto decline_rdma;
+		goto decline_rdma_unlock;
 	}
 	link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
@@ -816,7 +816,7 @@ static void smc_listen_work(struct work_struct *work)
 	rc = smc_buf_create(new_smc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_MEM;
-		goto decline_rdma;
+		goto decline_rdma_unlock;
 	}
 
 	smc_close_init(new_smc);
@@ -831,7 +831,7 @@ static void smc_listen_work(struct work_struct *work)
 					     buf_desc->mr_rx[SMC_SINGLE_LINK]);
 			if (rc) {
 				reason_code = SMC_CLC_DECL_INTERR;
-				goto decline_rdma;
+				goto decline_rdma_unlock;
 			}
 		}
 	}
@@ -839,15 +839,15 @@ static void smc_listen_work(struct work_struct *work)
 
 	rc = smc_clc_send_accept(new_smc, local_contact);
 	if (rc)
-		goto out_err;
+		goto out_err_unlock;
 
 	/* receive SMC Confirm CLC message */
 	reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
 				       SMC_CLC_CONFIRM);
 	if (reason_code < 0)
-		goto out_err;
+		goto out_err_unlock;
 	if (reason_code > 0)
-		goto decline_rdma;
+		goto decline_rdma_unlock;
 	smc_conn_save_peer_info(new_smc, &cclc);
 	if (local_contact == SMC_FIRST_CONTACT)
 		smc_link_save_peer_info(link, &cclc);
@@ -855,34 +855,34 @@ static void smc_listen_work(struct work_struct *work)
 	rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_INTERR;
-		goto decline_rdma;
+		goto decline_rdma_unlock;
 	}
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		rc = smc_ib_ready_link(link);
 		if (rc) {
 			reason_code = SMC_CLC_DECL_INTERR;
-			goto decline_rdma;
+			goto decline_rdma_unlock;
 		}
 		/* QP confirmation over RoCE fabric */
 		reason_code = smc_serv_conf_first_link(new_smc);
 		if (reason_code < 0) {
 			/* peer is not aware of a problem */
 			rc = reason_code;
-			goto out_err;
+			goto out_err_unlock;
 		}
 		if (reason_code > 0)
-			goto decline_rdma;
+			goto decline_rdma_unlock;
 	}
 
 	smc_tx_init(new_smc);
+	mutex_unlock(&smc_create_lgr_pending);
 
 out_connected:
 	sk_refcnt_debug_inc(newsmcsk);
 	if (newsmcsk->sk_state == SMC_INIT)
 		newsmcsk->sk_state = SMC_ACTIVE;
 enqueue:
-	mutex_unlock(&smc_create_lgr_pending);
 	lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
 	if (lsmc->sk.sk_state == SMC_LISTEN) {
 		smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -896,6 +896,8 @@ static void smc_listen_work(struct work_struct *work)
 	sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
 	return;
 
+decline_rdma_unlock:
+	mutex_unlock(&smc_create_lgr_pending);
 decline_rdma:
 	/* RDMA setup failed, switch back to TCP */
 	smc_conn_free(&new_smc->conn);
@@ -907,6 +909,8 @@ static void smc_listen_work(struct work_struct *work)
 	}
 	goto out_connected;
 
+out_err_unlock:
+	mutex_unlock(&smc_create_lgr_pending);
 out_err:
 	newsmcsk->sk_state = SMC_CLOSED;
 	smc_conn_free(&new_smc->conn);
-- 
2.13.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V2 net-next 2/3] tcp: TCP experimental option for SMC
  2017-10-18 14:29 [PATCH V2 net-next 0/3] TCP experimental option for SMC rendezvous Ursula Braun
  2017-10-18 14:29 ` [PATCH V2 net-next 1/3] smc: fix mutex unlocks during link group creation Ursula Braun
@ 2017-10-18 14:29 ` Ursula Braun
  2017-10-25  0:55   ` David Miller
  2017-10-18 14:30 ` [PATCH V2 net-next 3/3] smc: add SMC rendezvous protocol Ursula Braun
  2 siblings, 1 reply; 5+ messages in thread
From: Ursula Braun @ 2017-10-18 14:29 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-s390, jwi, schwidefsky, heiko.carstens, raspl,
	ubraun

The SMC protocol [1] relies on the use of a new TCP experimental
option [2, 3]. With this option, SMC capabilities are exchanged
between peers during the TCP three way handshake. This patch adds
support for this experimental option to TCP.

References:
[1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609
[2] Shared Use of TCP Experimental Options RFC 6994:
    https://tools.ietf.org/rfc/rfc6994.txt
[3] IANA ExID SMCR:
http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-exids

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
 include/linux/tcp.h      |  9 +++++--
 include/net/inet_sock.h  |  3 ++-
 include/net/tcp.h        |  8 ++++++
 net/ipv4/tcp.c           |  6 +++++
 net/ipv4/tcp_input.c     | 34 ++++++++++++++++++++++++++
 net/ipv4/tcp_minisocks.c | 19 +++++++++++++++
 net/ipv4/tcp_output.c    | 63 +++++++++++++++++++++++++++++++++++++++++++++---
 7 files changed, 136 insertions(+), 6 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1d2c44e09e31..3fb28954a1b7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -98,7 +98,8 @@ struct tcp_options_received {
 		tstamp_ok : 1,	/* TIMESTAMP seen on SYN packet		*/
 		dsack : 1,	/* D-SACK is scheduled			*/
 		wscale_ok : 1,	/* Wscale seen on SYN packet		*/
-		sack_ok : 4,	/* SACK seen on SYN packet		*/
+		sack_ok : 3,	/* SACK seen on SYN packet		*/
+		smc_ok : 1,	/* SMC seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
 	u8	num_sacks;	/* Number of SACK blocks		*/
@@ -110,6 +111,9 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
 	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
 	rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+#if IS_ENABLED(CONFIG_SMC)
+	rx_opt->smc_ok = 0;
+#endif
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
@@ -228,7 +232,8 @@ struct tcp_sock {
 		syn_fastopen_ch:1, /* Active TFO re-enabling probe */
 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
 		save_syn:1,	/* Save headers of SYN packet */
-		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
+		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
+		syn_smc:1;	/* SYN includes SMC */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 /* RTT measurement */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index aa95053dfc78..600a7626eba0 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -92,7 +92,8 @@ struct inet_request_sock {
 				wscale_ok  : 1,
 				ecn_ok	   : 1,
 				acked	   : 1,
-				no_srccheck: 1;
+				no_srccheck: 1,
+				smc_ok	   : 1;
 	kmemcheck_bitfield_end(flags);
 	u32                     ir_mark;
 	union {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3b3b9b968e2d..a2e87ac8cdc0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -45,6 +45,7 @@
 
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
+#include <linux/unaligned/access_ok.h>
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -191,6 +192,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
  * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
  */
 #define TCPOPT_FASTOPEN_MAGIC	0xF989
+#define TCPOPT_SMC_MAGIC	0xE2D4C3D9
 
 /*
  *     TCP option lengths
@@ -203,6 +205,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_MD5SIG         18
 #define TCPOLEN_FASTOPEN_BASE  2
 #define TCPOLEN_EXP_FASTOPEN_BASE  4
+#define TCPOLEN_EXP_SMC_BASE   6
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -213,6 +216,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_PERBLOCK		8
 #define TCPOLEN_MD5SIG_ALIGNED		20
 #define TCPOLEN_MSS_ALIGNED		4
+#define TCPOLEN_EXP_SMC_BASE_ALIGNED	8
 
 /* Flags in tp->nonagle */
 #define TCP_NAGLE_OFF		1	/* Nagle's algo is disabled */
@@ -2101,4 +2105,8 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
 {
 	return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
 }
+
+#if IS_ENABLED(CONFIG_SMC)
+extern struct static_key_false tcp_have_smc;
+#endif
 #endif	/* _TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3b34850d361f..e782e7d4088f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -270,6 +270,7 @@
 #include <linux/time.h>
 #include <linux/slab.h>
 #include <linux/errqueue.h>
+#include <linux/static_key.h>
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
@@ -300,6 +301,11 @@ EXPORT_SYMBOL(sysctl_tcp_wmem);
 atomic_long_t tcp_memory_allocated;	/* Current allocated memory. */
 EXPORT_SYMBOL(tcp_memory_allocated);
 
+#if IS_ENABLED(CONFIG_SMC)
+DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
+EXPORT_SYMBOL(tcp_have_smc);
+#endif
+
 /*
  * Current number of TCP sockets.
  */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d0682ce2a5d6..d507ab4ecc34 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -75,6 +75,7 @@
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
 #include <linux/errqueue.h>
+#include <linux/static_key.h>
 
 int sysctl_tcp_fack __read_mostly;
 int sysctl_tcp_max_reordering __read_mostly = 300;
@@ -3735,6 +3736,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
 	foc->exp = exp_opt;
 }
 
+static void smc_parse_options(const struct tcphdr *th,
+			      struct tcp_options_received *opt_rx,
+			      const unsigned char *ptr,
+			      int opsize)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (th->syn && !(opsize & 1) &&
+		    opsize >= TCPOLEN_EXP_SMC_BASE &&
+		    get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+			opt_rx->smc_ok = 1;
+	}
+#endif
+}
+
 /* Look for tcp options. Normally only called on SYN and SYNACK packets.
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
@@ -3842,6 +3858,9 @@ void tcp_parse_options(const struct net *net,
 					tcp_parse_fastopen_option(opsize -
 						TCPOLEN_EXP_FASTOPEN_BASE,
 						ptr + 2, th->syn, foc, true);
+				else
+					smc_parse_options(th, opt_rx, ptr,
+							  opsize);
 				break;
 
 			}
@@ -5594,6 +5613,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 	return false;
 }
 
+static void smc_check_reset_syn(struct tcp_sock *tp)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (tp->syn_smc && !tp->rx_opt.smc_ok)
+			tp->syn_smc = 0;
+	}
+#endif
+}
+
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 const struct tcphdr *th)
 {
@@ -5700,6 +5729,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 * is initialized. */
 		tp->copied_seq = tp->rcv_nxt;
 
+		smc_check_reset_syn(tp);
+
 		smp_mb();
 
 		tcp_finish_connect(sk, skb);
@@ -6152,6 +6183,9 @@ static void tcp_openreq_init(struct request_sock *req,
 	ireq->ir_rmt_port = tcp_hdr(skb)->source;
 	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
 	ireq->ir_mark = inet_request_mark(sk, skb);
+#if IS_ENABLED(CONFIG_SMC)
+	ireq->smc_ok = rx_opt->smc_ok;
+#endif
 }
 
 struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 2341b9f857b6..edca83d4c9ec 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
+#include <linux/static_key.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
 #include <net/xfrm.h>
@@ -417,6 +418,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
 }
 EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
 
+static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
+				    struct request_sock *req,
+				    struct tcp_sock *newtp)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	struct inet_request_sock *ireq;
+
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		ireq = inet_rsk(req);
+		if (oldtp->syn_smc && !ireq->smc_ok)
+			newtp->syn_smc = 0;
+	}
+#endif
+}
+
 /* This is not only more efficient than what we used to do, it eliminates
  * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
  *
@@ -434,6 +450,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 		struct tcp_sock *newtp = tcp_sk(newsk);
+		struct tcp_sock *oldtp = tcp_sk(sk);
+
+		smc_check_reset_syn_req(oldtp, req, newtp);
 
 		/* Now setup tcp_sock */
 		newtp->pred_flags = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6c74f2a39778..29b3b4ae60fa 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,6 +41,7 @@
 #include <linux/compiler.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
+#include <linux/static_key.h>
 
 #include <trace/events/tcp.h>
 
@@ -422,6 +423,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_MD5		(1 << 2)
 #define OPTION_WSCALE		(1 << 3)
 #define OPTION_FAST_OPEN_COOKIE	(1 << 8)
+#define OPTION_SMC		(1 << 9)
+
+static void smc_options_write(__be32 *ptr, u16 *options)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (unlikely(OPTION_SMC & *options)) {
+			*ptr++ = htonl((TCPOPT_NOP  << 24) |
+				       (TCPOPT_NOP  << 16) |
+				       (TCPOPT_EXP <<  8) |
+				       (TCPOLEN_EXP_SMC_BASE));
+			*ptr++ = htonl(TCPOPT_SMC_MAGIC);
+		}
+	}
+#endif
+}
 
 struct tcp_out_options {
 	u16 options;		/* bit field of OPTION_* */
@@ -540,6 +557,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 		}
 		ptr += (len + 3) >> 2;
 	}
+
+	smc_options_write(ptr, &options);
+}
+
+static void smc_set_option(const struct tcp_sock *tp,
+			   struct tcp_out_options *opts,
+			   unsigned int *remaining)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (tp->syn_smc) {
+			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+				opts->options |= OPTION_SMC;
+				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+			}
+		}
+	}
+#endif
+}
+
+static void smc_set_option_cond(const struct tcp_sock *tp,
+				const struct inet_request_sock *ireq,
+				struct tcp_out_options *opts,
+				unsigned int *remaining)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (tp->syn_smc && ireq->smc_ok) {
+			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+				opts->options |= OPTION_SMC;
+				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+			}
+		}
+	}
+#endif
 }
 
 /* Compute TCP options for SYN packets. This is not the final
@@ -607,11 +659,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
+	smc_set_option(tp, opts, &remaining);
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
 /* Set up TCP options for SYN-ACKs. */
-static unsigned int tcp_synack_options(struct request_sock *req,
+static unsigned int tcp_synack_options(const struct sock *sk,
+				       struct request_sock *req,
 				       unsigned int mss, struct sk_buff *skb,
 				       struct tcp_out_options *opts,
 				       const struct tcp_md5sig_key *md5,
@@ -667,6 +722,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
 		}
 	}
 
+	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -3193,8 +3250,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
 #endif
 	skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
-	tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
-			  sizeof(*th);
+	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+					     foc) + sizeof(*th);
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
-- 
2.13.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V2 net-next 3/3] smc: add SMC rendezvous protocol
  2017-10-18 14:29 [PATCH V2 net-next 0/3] TCP experimental option for SMC rendezvous Ursula Braun
  2017-10-18 14:29 ` [PATCH V2 net-next 1/3] smc: fix mutex unlocks during link group creation Ursula Braun
  2017-10-18 14:29 ` [PATCH V2 net-next 2/3] tcp: TCP experimental option for SMC Ursula Braun
@ 2017-10-18 14:30 ` Ursula Braun
  2 siblings, 0 replies; 5+ messages in thread
From: Ursula Braun @ 2017-10-18 14:30 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-s390, jwi, schwidefsky, heiko.carstens, raspl,
	ubraun

The SMC protocol [1] uses a rendezvous protocol to negotiate SMC
capability between peers. The current Linux implementation does not yet
use this rendezvous protocol and, thus, is not compliant to RFC7609 and
incompatible with other SMC implementations like in zOS.
This patch adds support for the SMC rendezvous protocol. It uses a new
TCP experimental option. With this option, SMC capabilities are
exchanged between the peers during the TCP three way handshake.

[1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
 net/smc/af_smc.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 70f7640f5090..6451c5013e06 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -390,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc)
 	int rc = 0;
 	u8 ibport;
 
+	if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
+		/* peer has not signalled SMC-capability */
+		smc->use_fallback = true;
+		goto out_connected;
+	}
+
 	/* IPSec connections opt out of SMC-R optimizations */
 	if (using_ipsec(smc)) {
 		reason_code = SMC_CLC_DECL_IPSEC;
@@ -555,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 	}
 
 	smc_copy_sock_settings_to_clc(smc);
+	tcp_sk(smc->clcsock->sk)->syn_smc = 1;
 	rc = kernel_connect(smc->clcsock, addr, alen, flags);
 	if (rc)
 		goto out;
@@ -759,6 +766,12 @@ static void smc_listen_work(struct work_struct *work)
 	u8 prefix_len;
 	u8 ibport;
 
+	/* check if peer is smc capable */
+	if (!tcp_sk(newclcsock->sk)->syn_smc) {
+		new_smc->use_fallback = true;
+		goto out_connected;
+	}
+
 	/* do inband token exchange -
 	 *wait for and receive SMC Proposal CLC message
 	 */
@@ -967,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog)
 	 * them to the clc socket -- copy smc socket options to clc socket
 	 */
 	smc_copy_sock_settings_to_clc(smc);
+	tcp_sk(smc->clcsock->sk)->syn_smc = 1;
 
 	rc = kernel_listen(smc->clcsock, backlog);
 	if (rc)
@@ -1409,6 +1423,7 @@ static int __init smc_init(void)
 		goto out_sock;
 	}
 
+	static_branch_enable(&tcp_have_smc);
 	return 0;
 
 out_sock:
@@ -1433,6 +1448,7 @@ static void __exit smc_exit(void)
 		list_del_init(&lgr->list);
 		smc_lgr_free(lgr); /* free link group */
 	}
+	static_branch_disable(&tcp_have_smc);
 	smc_ib_unregister_client();
 	sock_unregister(PF_SMC);
 	proto_unregister(&smc_proto);
-- 
2.13.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH V2 net-next 2/3] tcp: TCP experimental option for SMC
  2017-10-18 14:29 ` [PATCH V2 net-next 2/3] tcp: TCP experimental option for SMC Ursula Braun
@ 2017-10-25  0:55   ` David Miller
  0 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2017-10-25  0:55 UTC (permalink / raw)
  To: ubraun; +Cc: netdev, linux-s390, jwi, schwidefsky, heiko.carstens, raspl

From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Wed, 18 Oct 2017 16:29:59 +0200

> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 3b3b9b968e2d..a2e87ac8cdc0 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -45,6 +45,7 @@
>  
>  #include <linux/seq_file.h>
>  #include <linux/memcontrol.h>
> +#include <linux/unaligned/access_ok.h>

Only tcp_input.c needs this include.  If you include it here, then the
rest of the kernel pays the price of this include.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-10-25  0:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-10-18 14:29 [PATCH V2 net-next 0/3] TCP experimental option for SMC rendezvous Ursula Braun
2017-10-18 14:29 ` [PATCH V2 net-next 1/3] smc: fix mutex unlocks during link group creation Ursula Braun
2017-10-18 14:29 ` [PATCH V2 net-next 2/3] tcp: TCP experimental option for SMC Ursula Braun
2017-10-25  0:55   ` David Miller
2017-10-18 14:30 ` [PATCH V2 net-next 3/3] smc: add SMC rendezvous protocol Ursula Braun

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).