netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ursula Braun <ubraun@linux.ibm.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, linux-s390@vger.kernel.org,
	schwidefsky@de.ibm.com, heiko.carstens@de.ibm.com,
	raspl@linux.vnet.ibm.com, ubraun@linux.vnet.ibm.com
Subject: [PATCH net-next 2/4] net/smc: handle sockopt TCP_NODELAY
Date: Tue, 17 Apr 2018 17:18:13 +0200	[thread overview]
Message-ID: <20180417151815.77191-3-ubraun@linux.ibm.com> (raw)
In-Reply-To: <20180417151815.77191-1-ubraun@linux.ibm.com>

From: Ursula Braun <ubraun@linux.vnet.ibm.com>

TCP sockopts must not interfere with the CLC handshake on the
CLC socket. Therefore, we defer some of them till the CLC
handshake has completed, like resetting TCP_NODELAY.

While touching setsockopt, the TCP_FASTOPEN sockopts are
ignored, since SMC-connection setup is based on the TCP
three-way-handshake.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
 net/smc/af_smc.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/smc.h    |   4 ++
 2 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5f8046c62d90..96f4d182f998 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -377,6 +377,22 @@ static void smc_link_save_peer_info(struct smc_link *link,
 	link->peer_mtu = clc->qp_mtu;
 }
 
+/* deferred setsockopt's not desired during clc handshake */
+static void smc_apply_deferred_sockopts(struct smc_sock *smc)
+{
+	struct smc_sock *opt_smc = smc;
+	u8 val;
+
+	if (smc->listen_smc)
+		opt_smc = smc->listen_smc;
+	if (opt_smc->deferred_nodelay_reset) {
+		val = 0;
+		kernel_setsockopt(smc->clcsock, SOL_TCP, TCP_NODELAY, &val,
+				  sizeof(val));
+		opt_smc->deferred_nodelay_reset = 0;
+	}
+}
+
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
@@ -506,6 +522,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 	smc_tx_init(smc);
 
 out_connected:
+	smc_apply_deferred_sockopts(smc);
 	smc_copy_sock_settings_to_clc(smc);
 	if (smc->sk.sk_state == SMC_INIT)
 		smc->sk.sk_state = SMC_ACTIVE;
@@ -908,6 +925,7 @@ static void smc_listen_work(struct work_struct *work)
 	mutex_unlock(&smc_create_lgr_pending);
 
 out_connected:
+	smc_apply_deferred_sockopts(new_smc);
 	sk_refcnt_debug_inc(newsmcsk);
 	if (newsmcsk->sk_state == SMC_INIT)
 		newsmcsk->sk_state = SMC_ACTIVE;
@@ -1280,9 +1298,60 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 {
 	struct sock *sk = sock->sk;
 	struct smc_sock *smc;
+	int val;
 
 	smc = smc_sk(sk);
+	if (smc->use_fallback || level != SOL_TCP)
+		goto clcsock;
+
+	/* level SOL_TCP */
+	switch (optname) {
+	case TCP_CONGESTION:
+	case TCP_ULP:
+		/* sockopts without integer value; do not apply to SMC */
+		goto clcsock;
+	default:
+		break;
+	}
 
+	if (optlen < sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	lock_sock(sk);
+	switch (optname) {
+	case TCP_NODELAY:
+		if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+			release_sock(sk);
+			goto clcsock;
+		}
+		/* for the CLC-handshake TCP_NODELAY is desired;
+		 * in case of fallback to TCP, a nodelay reset is
+		 * triggered afterwards.
+		 */
+		if (val)
+			smc->deferred_nodelay_reset = 0;
+		else
+			smc->deferred_nodelay_reset = 1;
+		break;
+	case TCP_FASTOPEN:
+	case TCP_FASTOPEN_CONNECT:
+	case TCP_FASTOPEN_KEY:
+	case TCP_FASTOPEN_NO_COOKIE:
+		/* ignore these options; 3-way handshake shouldn't be
+		 * bypassed with SMC
+		 */
+		break;
+	default:
+		/* apply option to the CLC socket */
+		release_sock(sk);
+		goto clcsock;
+	}
+	release_sock(sk);
+	return 0;
+
+clcsock:
 	/* generic setsockopts reaching us here always apply to the
 	 * CLC socket
 	 */
@@ -1293,10 +1362,41 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 static int smc_getsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, int __user *optlen)
 {
+	struct sock *sk = sock->sk;
 	struct smc_sock *smc;
+	int val, len;
 
-	smc = smc_sk(sock->sk);
-	/* socket options apply to the CLC socket */
+	smc = smc_sk(sk);
+
+	if (smc->use_fallback || level != SOL_TCP)
+		goto clcsock;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+	len = min_t(unsigned int, len, sizeof(int));
+	if (len < 0)
+		return -EINVAL;
+
+	/* level SOL_TCP */
+	switch (optname) {
+	case TCP_NODELAY:
+		if (smc->deferred_nodelay_reset)
+			val = 0;
+		else
+			goto clcsock;
+		break;
+	default:
+		goto clcsock;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+
+clcsock:
+	/* socket options applying to the CLC socket */
 	return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
 					     optval, optlen);
 }
@@ -1387,6 +1487,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 	int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
 	struct smc_sock *smc;
 	struct sock *sk;
+	u8 val = 1;
 	int rc;
 
 	rc = -ESOCKTNOSUPPORT;
@@ -1412,6 +1513,10 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 		sk_common_release(sk);
 		goto out;
 	}
+	/* clc handshake should run with disabled Nagle algorithm */
+	kernel_setsockopt(smc->clcsock, SOL_TCP, TCP_NODELAY, &val,
+			  sizeof(val));
+	smc->deferred_nodelay_reset = 1; /* TCP_NODELAY is not the default */
 	smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
 	smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
 
diff --git a/net/smc/smc.h b/net/smc/smc.h
index e4829a2f46ba..6dfc1c90bed2 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -185,6 +185,10 @@ struct smc_sock {				/* smc sock container */
 						 * started, waiting for unsent
 						 * data to be sent
 						 */
+	u8			deferred_nodelay_reset : 1;
+						/* defer Nagle after CLC
+						 * handshake
+						 */
 };
 
 static inline struct smc_sock *smc_sk(const struct sock *sk)
-- 
2.13.5

  parent reply	other threads:[~2018-04-17 15:18 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-17 15:18 [PATCH net-next 0/4] net/smc: fixes 2018-04-17 Ursula Braun
2018-04-17 15:18 ` [PATCH net-next 1/4] net/smc: fix structure size Ursula Braun
2018-04-17 15:18 ` Ursula Braun [this message]
2018-04-17 19:23   ` [PATCH net-next 2/4] net/smc: handle sockopt TCP_NODELAY David Miller
2018-04-17 15:18 ` [PATCH net-next 3/4] net/smc: handle sockopt TCP_CORK Ursula Braun
2018-04-17 15:18 ` [PATCH net-next 4/4] net/smc: handle sockopt TCP_DEFER_ACCEPT Ursula Braun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180417151815.77191-3-ubraun@linux.ibm.com \
    --to=ubraun@linux.ibm.com \
    --cc=davem@davemloft.net \
    --cc=heiko.carstens@de.ibm.com \
    --cc=linux-s390@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=raspl@linux.vnet.ibm.com \
    --cc=schwidefsky@de.ibm.com \
    --cc=ubraun@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).