nvmet: race condition while CQE are getting processed concurrently with the DISCONNECTED event

linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

From: sagi@grimberg.me (Sagi Grimberg)
Subject: nvmet: race condition while CQE are getting processed concurrently with the DISCONNECTED event
Date: Thu, 9 Mar 2017 13:45:37 +0200	[thread overview]
Message-ID: <7c08d54c-7a2d-74dd-79df-807113b032c3@grimberg.me> (raw)
In-Reply-To: <CY1PR12MB07747089AFFCB6115BBCF5ECBC210@CY1PR12MB0774.namprd12.prod.outlook.com>

> Hi Sagi,

Hi Raju (CC'ing Yi)

>
> I had tried each of the below patches individually,  issue is still seen with both the patches.
>
> with patch #1, from the dmesg I see that NULL pointer dereference issue is hit before the 3,4,5 (see below) were finished successfully for that queue
>
> Where :
> 1. rdma_diconnect
> 2. nvmet_sq_destroy
> 3. ib_drain_qp
> 4. rdma_destroy_qp
> 5. ib_free_cq (which flushes the cq worker)

I took a deeper look here, and I think that the root cause has nothing
to do with the 2 (still useful) patches I sent. Actually, the fact that
patch (1) caused you to get the NULL deref even before 3,4,5 tells me
that the qp and cq are not free at all, and for some reason we see them
as NULL.

So in nvmet_rdma_recv_done() if the queue is not in state
NVMET_RDMA_Q_LIVE, we simply restore the rsp back to the queue free
list:

static inline void
nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
{
         unsigned long flags;

         spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
         list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
         spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
}

However we only set rsp->queue in nvmet_rdma_handle_command() which
does not take place because, as I mentioned, we are in disconnect
state...

I think this patch should make this issue go away:
--
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 973b674ab55b..06a8c6114098 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -180,9 +180,9 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
  {
         unsigned long flags;

-       spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
-       list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
-       spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
+       spin_lock_irqsave(&rsp->cmd->queue->rsps_lock, flags);
+       list_add_tail(&rsp->free_list, &rsp->cmd->queue->free_rsps);
+       spin_unlock_irqrestore(&rsp->cmd->queue->rsps_lock, flags);
  }

  static void nvmet_rdma_free_sgl(struct scatterlist *sgl, unsigned int 
nents)
@@ -473,7 +473,7 @@ static void nvmet_rdma_process_wr_wait_list(struct 
nvmet_rdma_queue *queue)

  static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
  {
-       struct nvmet_rdma_queue *queue = rsp->queue;
+       struct nvmet_rdma_queue *queue = rsp->cmd->queue;

         atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);

@@ -517,7 +517,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, 
struct ib_wc *wc)
                      wc->status != IB_WC_WR_FLUSH_ERR)) {
                 pr_err("SEND for CQE 0x%p failed with status %s (%d).\n",
                         wc->wr_cqe, ib_wc_status_msg(wc->status), 
wc->status);
-               nvmet_rdma_error_comp(rsp->queue);
+               nvmet_rdma_error_comp(rsp->cmd->queue);
         }
  }

@@ -525,7 +525,7 @@ static void nvmet_rdma_queue_response(struct 
nvmet_req *req)
  {
         struct nvmet_rdma_rsp *rsp =
                 container_of(req, struct nvmet_rdma_rsp, req);
-       struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+       struct rdma_cm_id *cm_id = rsp->cmd->queue->cm_id;
         struct ib_send_wr *first_wr, *bad_wr;

         if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
@@ -541,9 +541,9 @@ static void nvmet_rdma_queue_response(struct 
nvmet_req *req)
         else
                 first_wr = &rsp->send_wr;

-       nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
+       nvmet_rdma_post_recv(rsp->cmd->queue->dev, rsp->cmd);

-       ib_dma_sync_single_for_device(rsp->queue->dev->device,
+       ib_dma_sync_single_for_device(rsp->cmd->queue->dev->device,
                 rsp->send_sge.addr, rsp->send_sge.length,
                 DMA_TO_DEVICE);

@@ -614,7 +614,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct 
nvmet_rdma_rsp *rsp)
  static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
                 struct nvme_keyed_sgl_desc *sgl, bool invalidate)
  {
-       struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+       struct rdma_cm_id *cm_id = rsp->cmd->queue->cm_id;
         u64 addr = le64_to_cpu(sgl->addr);
         u32 len = get_unaligned_le24(sgl->length);
         u32 key = get_unaligned_le32(sgl->key);
@@ -676,7 +676,7 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp 
*rsp)

  static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
  {
-       struct nvmet_rdma_queue *queue = rsp->queue;
+       struct nvmet_rdma_queue *queue = rsp->cmd->queue;

         if (unlikely(atomic_sub_return(1 + rsp->n_rdma,
                         &queue->sq_wr_avail) < 0)) {
@@ -703,11 +703,6 @@ static void nvmet_rdma_handle_command(struct 
nvmet_rdma_queue *queue,
  {
         u16 status;

-       cmd->queue = queue;
-       cmd->n_rdma = 0;
-       cmd->req.port = queue->port;
-
-
         ib_dma_sync_single_for_cpu(queue->dev->device,
                 cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length,
                 DMA_FROM_DEVICE);
@@ -763,6 +758,8 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, 
struct ib_wc *wc)
         rsp->cmd = cmd;
         rsp->flags = 0;
         rsp->req.cmd = cmd->nvme_cmd;
+       rsp->n_rdma = 0;
+       rsp->req.port = queue->port;

         if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
                 unsigned long flags;
--

next      parent reply	other threads:[~2017-03-09 11:45 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CY1PR12MB07747089AFFCB6115BBCF5ECBC210@CY1PR12MB0774.namprd12.prod.outlook.com>
2017-03-09 11:45 ` Sagi Grimberg [this message]
2017-03-11  8:29   ` nvmet: race condition while CQE are getting processed concurrently with the DISCONNECTED event Raju  Rangoju
2017-03-13  2:22   ` Yi Zhang
2017-03-13  8:05     ` Sagi Grimberg
2017-03-14 13:22       ` Yi Zhang
     [not found] <CY1PR12MB077418C35F6EA2499CBACA45BC2C0@CY1PR12MB0774.namprd12.prod.outlook.com>
2017-03-07 13:33 ` Sagi Grimberg
2017-03-08 15:46   ` Christoph Hellwig
2017-03-08 19:34     ` Sagi Grimberg

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:973b674ab55 dfblob:06a8c611409 )
 OR (
bs:"nvmet: race condition while CQE are getting processed concurrently with the DISCONNECTED event" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7c08d54c-7a2d-74dd-79df-807113b032c3@grimberg.me \
    --to=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).