public inbox for kernel-tls-handshake@lists.linux.dev
 help / color / mirror / Atom feed
From: Chuck Lever <cel@kernel.org>
To: Hannes Reinecke <hare@suse.de>, Olga Kornievskaia <okorniev@redhat.com>
Cc: kernel-tls-handshake@lists.linux.dev,
	Chuck Lever <chuck.lever@oracle.com>
Subject: [RFC PATCH 4/4] sunrpc: Remove sock_recvmsg path from svcsock TCP receives
Date: Tue, 17 Feb 2026 17:20:33 -0500	[thread overview]
Message-ID: <20260217222033.1929211-5-cel@kernel.org> (raw)
In-Reply-To: <20260217222033.1929211-1-cel@kernel.org>

From: Chuck Lever <chuck.lever@oracle.com>

The svcsock TCP receive path maintains two code paths: one
using read_sock/read_sock_cmsg and a legacy path using
sock_recvmsg. Plain TCP sockets already provide read_sock
(tcp_read_sock) in their proto_ops, so the read_sock_cmsg
path can handle all cases relevant to NFSD by falling back
to read_sock when kTLS is not active.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/svcsock.c | 314 +++----------------------------------------
 1 file changed, 22 insertions(+), 292 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9600d15287e7..7d614dc44a05 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -238,140 +238,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
 	return len;
 }
 
-static int
-svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
-			  struct cmsghdr *cmsg, int ret)
-{
-	u8 content_type = tls_get_record_type(sock->sk, cmsg);
-	u8 level, description;
-
-	switch (content_type) {
-	case 0:
-		break;
-	case TLS_RECORD_TYPE_DATA:
-		/* TLS sets EOR at the end of each application data
-		 * record, even though there might be more frames
-		 * waiting to be decrypted.
-		 */
-		msg->msg_flags &= ~MSG_EOR;
-		break;
-	case TLS_RECORD_TYPE_ALERT:
-		tls_alert_recv(sock->sk, msg, &level, &description);
-		ret = (level == TLS_ALERT_LEVEL_FATAL) ?
-			-ENOTCONN : -EAGAIN;
-		break;
-	default:
-		/* discard this record type */
-		ret = -EAGAIN;
-	}
-	return ret;
-}
-
-static int
-svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags)
-{
-	union {
-		struct cmsghdr	cmsg;
-		u8		buf[CMSG_SPACE(sizeof(u8))];
-	} u;
-	u8 alert[2];
-	struct kvec alert_kvec = {
-		.iov_base = alert,
-		.iov_len = sizeof(alert),
-	};
-	struct msghdr msg = {
-		.msg_flags = *msg_flags,
-		.msg_control = &u,
-		.msg_controllen = sizeof(u),
-	};
-	int ret;
-
-	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
-		      alert_kvec.iov_len);
-	ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
-	if (ret > 0 &&
-	    tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) {
-		iov_iter_revert(&msg.msg_iter, ret);
-		ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN);
-	}
-	return ret;
-}
-
-static int
-svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg)
-{
-	int ret;
-	struct socket *sock = svsk->sk_sock;
-
-	ret = sock_recvmsg(sock, msg, MSG_DONTWAIT);
-	if (msg->msg_flags & MSG_CTRUNC) {
-		msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR);
-		if (ret == 0 || ret == -EIO)
-			ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags);
-	}
-	return ret;
-}
-
-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
-{
-	struct bvec_iter bi = {
-		.bi_size	= size + seek,
-	};
-	struct bio_vec bv;
-
-	bvec_iter_advance(bvec, &bi, seek & PAGE_MASK);
-	for_each_bvec(bv, bvec, bi, bi)
-		flush_dcache_page(bv.bv_page);
-}
-#else
-static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size,
-				  size_t seek)
-{
-}
-#endif
-
-/*
- * Read from @rqstp's transport socket. The incoming message fills whole
- * pages in @rqstp's rq_pages array until the last page of the message
- * has been received into a partial page.
- */
-static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
-				size_t seek)
-{
-	struct svc_sock *svsk =
-		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-	struct bio_vec *bvec = rqstp->rq_bvec;
-	struct msghdr msg = { NULL };
-	unsigned int i;
-	ssize_t len;
-	size_t t;
-
-	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-
-	for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE)
-		bvec_set_page(&bvec[i], rqstp->rq_pages[i], PAGE_SIZE, 0);
-	rqstp->rq_respages = &rqstp->rq_pages[i];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-	iov_iter_bvec(&msg.msg_iter, ITER_DEST, bvec, i, buflen);
-	if (seek) {
-		iov_iter_advance(&msg.msg_iter, seek);
-		buflen -= seek;
-	}
-	len = svc_tcp_sock_recvmsg(svsk, &msg);
-	if (len > 0)
-		svc_flush_bvec(bvec, len, seek);
-
-	/* If we read a full record, then assume there may be more
-	 * data to read (stream based sockets only!)
-	 */
-	if (len == buflen)
-		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-
-	return len;
-}
-
 /*
  * Set socket snd and rcv buffer lengths
  */
@@ -1038,50 +904,6 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 	svsk->sk_datalen = 0;
 }
 
-/*
- * Receive fragment record header into sk_marker.
- */
-static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
-				   struct svc_rqst *rqstp)
-{
-	ssize_t want, len;
-
-	/* If we haven't gotten the record length yet,
-	 * get the next four bytes.
-	 */
-	if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
-		struct msghdr	msg = { NULL };
-		struct kvec	iov;
-
-		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
-		iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
-		iov.iov_len  = want;
-		iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want);
-		len = svc_tcp_sock_recvmsg(svsk, &msg);
-		if (len < 0)
-			return len;
-		svsk->sk_tcplen += len;
-		if (len < want) {
-			/* call again to read the remaining bytes */
-			goto err_short;
-		}
-		trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker);
-		if (svc_sock_reclen(svsk) + svsk->sk_datalen >
-		    svsk->sk_xprt.xpt_server->sv_max_mesg)
-			goto err_too_large;
-	}
-	return svc_sock_reclen(svsk);
-
-err_too_large:
-	net_notice_ratelimited("svc: %s oversized RPC fragment (%u octets) from %pISpc\n",
-			       svsk->sk_xprt.xpt_server->sv_name,
-			       svc_sock_reclen(svsk),
-			       (struct sockaddr *)&svsk->sk_xprt.xpt_remote);
-	svc_xprt_deferred_close(&svsk->sk_xprt);
-err_short:
-	return -EAGAIN;
-}
-
 static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
 	struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
@@ -1252,7 +1074,21 @@ static int svc_tcp_cmsg_actor(read_descriptor_t *desc,
 	return -EAGAIN;
 }
 
-static int svc_tcp_recvfrom_readsock(struct svc_rqst *rqstp)
+/**
+ * svc_tcp_recvfrom - Receive data from a TCP socket
+ * @rqstp: request structure into which to receive an RPC Call
+ *
+ * Called in a loop when XPT_DATA has been set.
+ *
+ * Returns:
+ *   On success, the number of bytes in a received RPC Call, or
+ *   %0 if a complete RPC Call message was not ready to return
+ *
+ * The zero return case handles partial receives and callback Replies.
+ * The state of a partial receive is preserved in the svc_sock for
+ * the next call to svc_tcp_recvfrom.
+ */
+static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 {
 	struct svc_sock *svsk =
 		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
@@ -1278,9 +1114,13 @@ static int svc_tcp_recvfrom_readsock(struct svc_rqst *rqstp)
 
 	desc.count = serv->sv_max_mesg;
 	lock_sock(sk);
-	len = svsk->sk_sock->ops->read_sock_cmsg(sk, &desc,
-						  svc_tcp_recv_actor,
-						  svc_tcp_cmsg_actor);
+	if (svsk->sk_sock->ops->read_sock_cmsg)
+		len = svsk->sk_sock->ops->read_sock_cmsg(sk, &desc,
+							  svc_tcp_recv_actor,
+							  svc_tcp_cmsg_actor);
+	else
+		len = svsk->sk_sock->ops->read_sock(sk, &desc,
+						     svc_tcp_recv_actor);
 	release_sock(sk);
 
 	if (desc.error == -EMSGSIZE)
@@ -1366,116 +1206,6 @@ static int svc_tcp_recvfrom_readsock(struct svc_rqst *rqstp)
 	return 0;
 }
 
-/**
- * svc_tcp_recvfrom - Receive data from a TCP socket
- * @rqstp: request structure into which to receive an RPC Call
- *
- * Called in a loop when XPT_DATA has been set.
- *
- * Read the 4-byte stream record marker, then use the record length
- * in that marker to set up exactly the resources needed to receive
- * the next RPC message into @rqstp.
- *
- * Returns:
- *   On success, the number of bytes in a received RPC Call, or
- *   %0 if a complete RPC Call message was not ready to return
- *
- * The zero return case handles partial receives and callback Replies.
- * The state of a partial receive is preserved in the svc_sock for
- * the next call to svc_tcp_recvfrom.
- */
-static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
-{
-	struct svc_sock	*svsk =
-		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
-	size_t want, base;
-	ssize_t len;
-	__be32 *p;
-	__be32 calldir;
-
-	if (svsk->sk_sock->ops->read_sock_cmsg)
-		return svc_tcp_recvfrom_readsock(rqstp);
-
-	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-	len = svc_tcp_read_marker(svsk, rqstp);
-	if (len < 0)
-		goto error;
-
-	base = svc_tcp_restore_pages(svsk, rqstp);
-	want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
-	len = svc_tcp_read_msg(rqstp, base + want, base);
-	if (len >= 0) {
-		trace_svcsock_tcp_recv(&svsk->sk_xprt, len);
-		svsk->sk_tcplen += len;
-		svsk->sk_datalen += len;
-	}
-	if (len != want || !svc_sock_final_rec(svsk))
-		goto err_incomplete;
-	if (svsk->sk_datalen < 8)
-		goto err_nuts;
-
-	rqstp->rq_arg.len = svsk->sk_datalen;
-	rqstp->rq_arg.page_base = 0;
-	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
-		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
-		rqstp->rq_arg.page_len = 0;
-	} else
-		rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
-
-	rqstp->rq_xprt_ctxt   = NULL;
-	rqstp->rq_prot	      = IPPROTO_TCP;
-	if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
-		set_bit(RQ_LOCAL, &rqstp->rq_flags);
-	else
-		clear_bit(RQ_LOCAL, &rqstp->rq_flags);
-
-	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
-	calldir = p[1];
-	if (calldir)
-		len = receive_cb_reply(svsk, rqstp);
-
-	/* Reset TCP read info */
-	svsk->sk_datalen = 0;
-	svc_tcp_fragment_received(svsk);
-
-	if (len < 0)
-		goto error;
-
-	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
-	if (serv->sv_stats)
-		serv->sv_stats->nettcpcnt++;
-
-	svc_sock_secure_port(rqstp);
-	svc_xprt_received(rqstp->rq_xprt);
-	return rqstp->rq_arg.len;
-
-err_incomplete:
-	svc_tcp_save_pages(svsk, rqstp);
-	if (len < 0 && len != -EAGAIN)
-		goto err_delete;
-	if (len == want)
-		svc_tcp_fragment_received(svsk);
-	else
-		trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
-				svc_sock_reclen(svsk),
-				svsk->sk_tcplen - sizeof(rpc_fraghdr));
-	goto err_noclose;
-error:
-	if (len != -EAGAIN)
-		goto err_delete;
-	trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0);
-	goto err_noclose;
-err_nuts:
-	svsk->sk_datalen = 0;
-err_delete:
-	trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len);
-	svc_xprt_deferred_close(&svsk->sk_xprt);
-err_noclose:
-	svc_xprt_received(rqstp->rq_xprt);
-	return 0;	/* record not complete */
-}
-
 /*
  * MSG_SPLICE_PAGES is used exclusively to reduce the number of
  * copy operations in this path. Therefore the caller must ensure
-- 
2.53.0


      parent reply	other threads:[~2026-02-17 22:20 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-17 22:20 [RFC PATCH 0/4] ->read_sock with cmsg Chuck Lever
2026-02-17 22:20 ` [RFC PATCH 1/4] net: Introduce read_sock_cmsg proto_ops for control message delivery Chuck Lever
2026-02-18  7:29   ` Hannes Reinecke
2026-02-18 14:33     ` Chuck Lever
2026-02-18 15:52       ` Hannes Reinecke
2026-02-18 16:12         ` Chuck Lever
2026-02-19  4:06           ` Alistair Francis
2026-02-19  8:05             ` Hannes Reinecke
2026-02-19  8:10           ` Hannes Reinecke
2026-02-19 13:59             ` Chuck Lever
2026-02-28 11:09             ` Alistair Francis
2026-02-17 22:20 ` [RFC PATCH 2/4] tls: Implement read_sock_cmsg for kTLS software path Chuck Lever
2026-02-17 22:20 ` [RFC PATCH 3/4] sunrpc: Use read_sock_cmsg for svcsock TCP receives Chuck Lever
2026-02-17 22:20 ` Chuck Lever [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260217222033.1929211-5-cel@kernel.org \
    --to=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=hare@suse.de \
    --cc=kernel-tls-handshake@lists.linux.dev \
    --cc=okorniev@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox