netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
	borisp@nvidia.com, john.fastabend@gmail.com, maximmi@nvidia.com,
	tariqt@nvidia.com, vfedorenko@novek.ru,
	Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH net-next v2 01/11] tls: rx: allow only one reader at a time
Date: Thu, 14 Jul 2022 22:22:25 -0700	[thread overview]
Message-ID: <20220715052235.1452170-2-kuba@kernel.org> (raw)
In-Reply-To: <20220715052235.1452170-1-kuba@kernel.org>

recvmsg() in TLS gets data from the skb list (rx_list) or fresh
skbs we read from TCP via strparser. The former holds skbs which were
already decrypted for peek or decrypted and partially consumed.

tls_wait_data() only notices appearance of fresh skbs coming out
of TCP (or psock). It is possible, if there is a concurrent call
to peek() and recv() that the peek() will move the data from input
to rx_list without recv() noticing. recv() will then read data out
of order or never wake up.

This is not a practical use case/concern, but it makes the self
tests less reliable. This patch solves the problem by allowing
only one reader in.

Because having multiple processes calling read()/peek() is not
normal avoid adding a lock and try to fast-path the single reader
case.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tls.h |  3 +++
 net/tls/tls_sw.c  | 61 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 8742e13bc362..e8935cfe0cd6 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -116,11 +116,14 @@ struct tls_sw_context_rx {
 	void (*saved_data_ready)(struct sock *sk);
 
 	struct sk_buff *recv_pkt;
+	u8 reader_present;
 	u8 async_capable:1;
 	u8 zc_capable:1;
+	u8 reader_contended:1;
 	atomic_t decrypt_pending;
 	/* protect crypto_wait with decrypt_pending*/
 	spinlock_t decrypt_compl_lock;
+	struct wait_queue_head wq;
 };
 
 struct tls_record_info {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 68d79ee48a56..761a63751616 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1753,6 +1753,51 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
 	sk_flush_backlog(sk);
 }
 
+static long tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+			       bool nonblock)
+{
+	long timeo;
+
+	lock_sock(sk);
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+
+	while (unlikely(ctx->reader_present)) {
+		DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+		ctx->reader_contended = 1;
+
+		add_wait_queue(&ctx->wq, &wait);
+		sk_wait_event(sk, &timeo,
+			      !READ_ONCE(ctx->reader_present), &wait);
+		remove_wait_queue(&ctx->wq, &wait);
+
+		if (!timeo)
+			return -EAGAIN;
+		if (signal_pending(current))
+			return sock_intr_errno(timeo);
+	}
+
+	WRITE_ONCE(ctx->reader_present, 1);
+
+	return timeo;
+}
+
+static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+{
+	if (unlikely(ctx->reader_contended)) {
+		if (wq_has_sleeper(&ctx->wq))
+			wake_up(&ctx->wq);
+		else
+			ctx->reader_contended = 0;
+
+		WARN_ON_ONCE(!ctx->reader_present);
+	}
+
+	WRITE_ONCE(ctx->reader_present, 0);
+	release_sock(sk);
+}
+
 int tls_sw_recvmsg(struct sock *sk,
 		   struct msghdr *msg,
 		   size_t len,
@@ -1782,7 +1827,9 @@ int tls_sw_recvmsg(struct sock *sk,
 		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
 
 	psock = sk_psock_get(sk);
-	lock_sock(sk);
+	timeo = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
+	if (timeo < 0)
+		return timeo;
 	bpf_strp_enabled = sk_psock_strp_enabled(psock);
 
 	/* If crypto failed the connection is broken */
@@ -1801,7 +1848,6 @@ int tls_sw_recvmsg(struct sock *sk,
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 	len = len - copied;
-	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek &&
 		ctx->zc_capable;
@@ -1956,7 +2002,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	copied += decrypted;
 
 end:
-	release_sock(sk);
+	tls_rx_reader_unlock(sk, ctx);
 	if (psock)
 		sk_psock_put(sk, psock);
 	return copied ? : err;
@@ -1978,9 +2024,9 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	long timeo;
 	int chunk;
 
-	lock_sock(sk);
-
-	timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+	timeo = tls_rx_reader_lock(sk, ctx, flags & SPLICE_F_NONBLOCK);
+	if (timeo < 0)
+		return timeo;
 
 	from_queue = !skb_queue_empty(&ctx->rx_list);
 	if (from_queue) {
@@ -2029,7 +2075,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	}
 
 splice_read_end:
-	release_sock(sk);
+	tls_rx_reader_unlock(sk, ctx);
 	return copied ? : err;
 }
 
@@ -2371,6 +2417,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	} else {
 		crypto_init_wait(&sw_ctx_rx->async_wait);
 		spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+		init_waitqueue_head(&sw_ctx_rx->wq);
 		crypto_info = &ctx->crypto_recv.info;
 		cctx = &ctx->rx;
 		skb_queue_head_init(&sw_ctx_rx->rx_list);
-- 
2.36.1


  reply	other threads:[~2022-07-15  5:22 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-15  5:22 [PATCH net-next v2 00/11] tls: rx: avoid skb_cow_data() Jakub Kicinski
2022-07-15  5:22 ` Jakub Kicinski [this message]
2022-07-20  8:37   ` [PATCH net-next v2 01/11] tls: rx: allow only one reader at a time Eric Dumazet
2022-07-20 16:59     ` Jakub Kicinski
2022-07-20 17:09       ` Eric Dumazet
2022-07-20 17:19         ` Eric Dumazet
2022-07-15  5:22 ` [PATCH net-next v2 02/11] tls: rx: don't try to keep the skbs always on the list Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 03/11] tls: rx: don't keep decrypted skbs on ctx->recv_pkt Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 04/11] tls: rx: remove the message decrypted tracking Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 05/11] tls: rx: factor out device darg update Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 06/11] tls: rx: read the input skb from ctx->recv_pkt Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 07/11] tls: rx: return the decrypted skb via darg Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 08/11] tls: rx: async: adjust record geometry immediately Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 09/11] tls: rx: async: hold onto the input skb Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 10/11] tls: rx: async: don't put async zc on the list Jakub Kicinski
2022-07-15  5:22 ` [PATCH net-next v2 11/11] tls: rx: decrypt into a fresh skb Jakub Kicinski
2022-07-18 10:40 ` [PATCH net-next v2 00/11] tls: rx: avoid skb_cow_data() patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220715052235.1452170-2-kuba@kernel.org \
    --to=kuba@kernel.org \
    --cc=borisp@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=maximmi@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=tariqt@nvidia.com \
    --cc=vfedorenko@novek.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).