netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@suse.de>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org, Jakub Kicinski <kuba@kernel.org>,
	Eric Dumazet <edumazet@google.com>,
	Paolo Abeni <pabeni@redhat.com>,
	netdev@vger.kernel.org, Hannes Reinecke <hare@suse.de>
Subject: [PATCH 14/17] nvmet-tcp: reference counting for queues
Date: Fri, 11 Aug 2023 14:17:52 +0200	[thread overview]
Message-ID: <20230811121755.24715-15-hare@suse.de> (raw)
In-Reply-To: <20230811121755.24715-1-hare@suse.de>

The 'queue' structure is referenced from various places and
used as an argument of asynchronous functions, so it's really
hard to figure out if the queue is still valid when the
asynchronous function triggers.
So add reference counting to validate the queue structure.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/target/tcp.c | 74 ++++++++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f19ea9d923fd..84b726dfc1c4 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -127,6 +127,7 @@ enum nvmet_tcp_queue_state {
 };
 
 struct nvmet_tcp_queue {
+	struct kref		kref;
 	struct socket		*sock;
 	struct nvmet_tcp_port	*port;
 	struct work_struct	io_work;
@@ -192,6 +193,9 @@ static struct workqueue_struct *nvmet_tcp_wq;
 static const struct nvmet_fabrics_ops nvmet_tcp_ops;
 static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
 static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd);
+static int nvmet_tcp_get_queue(struct nvmet_tcp_queue *queue);
+static void nvmet_tcp_put_queue(struct nvmet_tcp_queue *queue);
+static void nvmet_tcp_data_ready(struct sock *sk);
 
 static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
 		struct nvmet_tcp_cmd *cmd)
@@ -1437,11 +1441,21 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue)
 	struct socket *sock = queue->sock;
 
 	write_lock_bh(&sock->sk->sk_callback_lock);
+	/*
+	 * Check if nvmet_tcp_set_queue_sock() has been called;
+	 * if not the queue reference has not been increased
+	 * and we're getting an refcount error on exit.
+	 */
+	if (sock->sk->sk_data_ready != nvmet_tcp_data_ready) {
+		write_unlock_bh(&sock->sk->sk_callback_lock);
+		return;
+	}
 	sock->sk->sk_data_ready =  queue->data_ready;
 	sock->sk->sk_state_change = queue->state_change;
 	sock->sk->sk_write_space = queue->write_space;
 	sock->sk->sk_user_data = NULL;
 	write_unlock_bh(&sock->sk->sk_callback_lock);
+	nvmet_tcp_put_queue(queue);
 }
 
 static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
@@ -1474,6 +1488,30 @@ static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue)
 		nvmet_tcp_free_cmd_buffers(&queue->connect);
 }
 
+static void nvmet_tcp_release_queue_final(struct kref *kref)
+{
+	struct nvmet_tcp_queue *queue = container_of(kref, struct nvmet_tcp_queue, kref);
+
+	WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
+	nvmet_tcp_free_cmds(queue);
+	ida_free(&nvmet_tcp_queue_ida, queue->idx);
+	/* ->sock will be released by fput() */
+	fput(queue->sock->file);
+	kfree(queue);
+}
+
+static int nvmet_tcp_get_queue(struct nvmet_tcp_queue *queue)
+{
+	if (!queue)
+		return 0;
+	return kref_get_unless_zero(&queue->kref);
+}
+
+static void nvmet_tcp_put_queue(struct nvmet_tcp_queue *queue)
+{
+	kref_put(&queue->kref, nvmet_tcp_release_queue_final);
+}
+
 static void nvmet_tcp_release_queue_work(struct work_struct *w)
 {
 	struct page *page;
@@ -1493,15 +1531,11 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
 	nvmet_sq_destroy(&queue->nvme_sq);
 	cancel_work_sync(&queue->io_work);
 	nvmet_tcp_free_cmd_data_in_buffers(queue);
-	/* ->sock will be released by fput() */
-	fput(queue->sock->file);
-	nvmet_tcp_free_cmds(queue);
 	if (queue->hdr_digest || queue->data_digest)
 		nvmet_tcp_free_crypto(queue);
-	ida_free(&nvmet_tcp_queue_ida, queue->idx);
 	page = virt_to_head_page(queue->pf_cache.va);
 	__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
-	kfree(queue);
+	nvmet_tcp_put_queue(queue);
 }
 
 static void nvmet_tcp_data_ready(struct sock *sk)
@@ -1512,8 +1546,10 @@ static void nvmet_tcp_data_ready(struct sock *sk)
 
 	read_lock_bh(&sk->sk_callback_lock);
 	queue = sk->sk_user_data;
-	if (likely(queue))
+	if (likely(nvmet_tcp_get_queue(queue))) {
 		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+		nvmet_tcp_put_queue(queue);
+	}
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
@@ -1523,18 +1559,16 @@ static void nvmet_tcp_write_space(struct sock *sk)
 
 	read_lock_bh(&sk->sk_callback_lock);
 	queue = sk->sk_user_data;
-	if (unlikely(!queue))
+	if (unlikely(!nvmet_tcp_get_queue(queue)))
 		goto out;
 
 	if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) {
 		queue->write_space(sk);
-		goto out;
-	}
-
-	if (sk_stream_is_writeable(sk)) {
+	} else if (sk_stream_is_writeable(sk)) {
 		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
 	}
+	nvmet_tcp_put_queue(queue);
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1545,7 +1579,7 @@ static void nvmet_tcp_state_change(struct sock *sk)
 
 	read_lock_bh(&sk->sk_callback_lock);
 	queue = sk->sk_user_data;
-	if (!queue)
+	if (!nvmet_tcp_get_queue(queue))
 		goto done;
 
 	switch (sk->sk_state) {
@@ -1562,6 +1596,7 @@ static void nvmet_tcp_state_change(struct sock *sk)
 		pr_warn("queue %d unhandled state %d\n",
 			queue->idx, sk->sk_state);
 	}
+	nvmet_tcp_put_queue(queue);
 done:
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1582,6 +1617,9 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
 	if (ret < 0)
 		return ret;
 
+	if (unlikely(!nvmet_tcp_get_queue(queue)))
+		return -ECONNRESET;
+
 	/*
 	 * Cleanup whatever is sitting in the TCP transmit queue on socket
 	 * close. This is done to prevent stale data from being sent should
@@ -1603,6 +1641,7 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
 		 * If the socket is already closing, don't even start
 		 * consuming it
 		 */
+		nvmet_tcp_put_queue(queue);
 		ret = -ENOTCONN;
 	} else {
 		sock->sk->sk_user_data = queue;
@@ -1636,6 +1675,7 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
 
 	INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
 	INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
+	kref_init(&queue->kref);
 	queue->sock = newsock;
 	queue->port = port;
 	queue->nr_cmds = 0;
@@ -1672,15 +1712,15 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
 	mutex_unlock(&nvmet_tcp_queue_mutex);
 
 	ret = nvmet_tcp_set_queue_sock(queue);
-	if (ret)
-		goto out_destroy_sq;
+	if (!ret)
+		return;
 
-	return;
-out_destroy_sq:
+	queue->state = NVMET_TCP_Q_DISCONNECTING;
 	mutex_lock(&nvmet_tcp_queue_mutex);
 	list_del_init(&queue->queue_list);
 	mutex_unlock(&nvmet_tcp_queue_mutex);
-	nvmet_sq_destroy(&queue->nvme_sq);
+	nvmet_tcp_put_queue(queue);
+	return;
 out_free_connect:
 	nvmet_tcp_free_cmd(&queue->connect);
 out_ida_remove:
-- 
2.35.3


  parent reply	other threads:[~2023-08-11 12:19 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-11 12:17 [PATCHv8 00/17] nvme: In-kernel TLS support for TCP Hannes Reinecke
2023-08-11 12:17 ` [PATCH 01/17] nvme-keyring: register '.nvme' keyring Hannes Reinecke
2023-08-11 12:17 ` [PATCH 02/17] nvme-keyring: define a 'psk' keytype Hannes Reinecke
2023-08-11 12:17 ` [PATCH 03/17] nvme: add TCP TSAS definitions Hannes Reinecke
2023-08-11 12:17 ` [PATCH 04/17] nvme-tcp: add definitions for TLS cipher suites Hannes Reinecke
2023-08-11 12:17 ` [PATCH 05/17] nvme-keyring: implement nvme_tls_psk_default() Hannes Reinecke
2023-08-11 12:17 ` [PATCH 06/17] security/keys: export key_lookup() Hannes Reinecke
2023-08-11 12:17 ` [PATCH 07/17] nvme-tcp: allocate socket file Hannes Reinecke
2023-08-11 12:17 ` [PATCH 08/17] nvme-tcp: enable TLS handshake upcall Hannes Reinecke
2023-08-13 13:52   ` Sagi Grimberg
2023-08-11 12:17 ` [PATCH 09/17] nvme-tcp: control message handling for recvmsg() Hannes Reinecke
2023-08-11 12:17 ` [PATCH 10/17] nvme-fabrics: parse options 'keyring' and 'tls_key' Hannes Reinecke
2023-08-13 13:53   ` Sagi Grimberg
2023-08-11 12:17 ` [PATCH 11/17] nvmet: make TCP sectype settable via configfs Hannes Reinecke
2023-08-11 12:17 ` [PATCH 12/17] nvmet-tcp: make nvmet_tcp_alloc_queue() a void function Hannes Reinecke
2023-08-11 12:17 ` [PATCH 13/17] nvmet-tcp: allocate socket file Hannes Reinecke
2023-08-11 12:17 ` Hannes Reinecke [this message]
2023-08-13 14:01   ` [PATCH 14/17] nvmet-tcp: reference counting for queues Sagi Grimberg
2023-08-13 14:38     ` Hannes Reinecke
2023-08-14  7:32       ` Sagi Grimberg
2023-08-11 12:17 ` [PATCH 15/17] nvmet: Set 'TREQ' to 'required' when TLS is enabled Hannes Reinecke
2023-08-13 14:02   ` Sagi Grimberg
2023-08-11 12:17 ` [PATCH 16/17] nvmet-tcp: enable TLS handshake upcall Hannes Reinecke
2023-08-11 12:17 ` [PATCH 17/17] nvmet-tcp: control messages for recvmsg() Hannes Reinecke
2023-08-13 14:04   ` Sagi Grimberg
  -- strict thread matches above, loose matches on Subject: below --
2023-08-10 15:06 [PATCHv7 00/17] nvme: In-kernel TLS support for TCP Hannes Reinecke
2023-08-10 15:06 ` [PATCH 14/17] nvmet-tcp: reference counting for queues Hannes Reinecke
2023-08-11 10:30   ` Simon Horman
2023-08-11 10:33     ` Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230811121755.24715-15-hare@suse.de \
    --to=hare@suse.de \
    --cc=edumazet@google.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).