public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed
From: Max Gurtovoy <mgurtovoy@nvidia.com>
To: Sagi Grimberg <sagi@grimberg.me>,
	<linux-nvme@lists.infradead.org>, <hch@lst.de>,
	<kbusch@kernel.org>
Cc: <chaitanyak@nvidia.com>, <israelr@nvidia.com>, <oren@nvidia.com>,
	<jsmart2021@gmail.com>
Subject: Re: [PATCH 5/7] nvme/nvme-fabrics: introduce nvmf_error_recovery_work API
Date: Tue, 19 Oct 2021 16:17:12 +0300	[thread overview]
Message-ID: <7ed884cd-7f54-a719-36dd-5151655d05da@nvidia.com> (raw)
In-Reply-To: <f170b07c-0307-eeb4-72d8-196a471b660b@grimberg.me>


On 10/19/2021 3:43 PM, Sagi Grimberg wrote:
>
>
> On 10/18/21 4:40 PM, Max Gurtovoy wrote:
>> Error recovery work is duplicated in RDMA and TCP transports. Move this
>> logic to common code. For that, introduce 2 new ctrl ops to teardown IO
>> and admin queue.
>>
>> Also update the RDMA/TCP transport drivers to use this API and remove
>> the duplicated code.
>>
>> Reviewed-by: Israel Rukshin <israelr@nvidia.com>
>> Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
>> ---
>>   drivers/nvme/host/fabrics.c | 23 +++++++++++++++
>>   drivers/nvme/host/fabrics.h |  1 +
>>   drivers/nvme/host/nvme.h    |  4 +++
>>   drivers/nvme/host/rdma.c    | 56 ++++++++++++++++---------------------
>>   drivers/nvme/host/tcp.c     | 56 +++++++++++++++----------------------
>>   5 files changed, 75 insertions(+), 65 deletions(-)
>
> Diffstat dry stats are not in your favor...
>
>>
>> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
>> index 2edd086fa922..544195369c97 100644
>> --- a/drivers/nvme/host/fabrics.c
>> +++ b/drivers/nvme/host/fabrics.c
>> @@ -493,6 +493,29 @@ void nvmf_reconnect_or_remove(struct nvme_ctrl 
>> *ctrl)
>>   }
>>   EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
>>   +void nvmf_error_recovery_work(struct work_struct *work)
>> +{
>> +    struct nvme_ctrl *ctrl = container_of(work,
>> +                struct nvme_ctrl, err_work);
>> +
>> +    nvme_stop_keep_alive(ctrl);
>> +    ctrl->ops->teardown_ctrl_io_queues(ctrl);
>> +    /* unquiesce to fail fast pending requests */
>> +    nvme_start_queues(ctrl);
>> +    ctrl->ops->teardown_ctrl_admin_queue(ctrl);
>> +    blk_mq_unquiesce_queue(ctrl->admin_q);
>> +
>> +    if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
>> +        /* state change failure is ok if we started ctrl delete */
>> +        WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
>> +                 ctrl->state != NVME_CTRL_DELETING_NOIO);
>> +        return;
>> +    }
>> +
>> +    nvmf_reconnect_or_remove(ctrl);
>
> We need James to provide feedback how can this be useful for FC.
>
>> +}
>> +EXPORT_SYMBOL_GPL(nvmf_error_recovery_work);
>> +
>>   void nvmf_error_recovery(struct nvme_ctrl *ctrl)
>>   {
>>       if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
>> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
>> index 3d8ec7133fc8..8655eff74ed0 100644
>> --- a/drivers/nvme/host/fabrics.h
>> +++ b/drivers/nvme/host/fabrics.h
>> @@ -190,6 +190,7 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char 
>> *buf, int size);
>>   bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
>>   void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl);
>>   void nvmf_error_recovery(struct nvme_ctrl *ctrl);
>> +void nvmf_error_recovery_work(struct work_struct *work);
>>   bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
>>           struct nvmf_ctrl_options *opts);
>>   diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>> index f9e1ce93d61d..1573edf6e97f 100644
>> --- a/drivers/nvme/host/nvme.h
>> +++ b/drivers/nvme/host/nvme.h
>> @@ -493,6 +493,10 @@ struct nvme_ctrl_ops {
>>       void (*submit_async_event)(struct nvme_ctrl *ctrl);
>>       void (*delete_ctrl)(struct nvme_ctrl *ctrl);
>>       int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
>> +
>> +    /* Fabrics only */
>> +    void (*teardown_ctrl_io_queues)(struct nvme_ctrl *ctrl);
>> +    void (*teardown_ctrl_admin_queue)(struct nvme_ctrl *ctrl);
>
> This becomes strange that we have teardown without a setup callback...

We can do it incrementally.

It's not the first time we do it :)

>
>>   };
>>     /*
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index 1c57e371af61..f4e4ebf673d2 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -1031,6 +1031,11 @@ static void 
>> nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
>>       nvme_rdma_destroy_admin_queue(ctrl, remove);
>>   }
>>   +static void _nvme_rdma_teardown_admin_queue(struct nvme_ctrl *ctrl)
>> +{
>> +    nvme_rdma_teardown_admin_queue(to_rdma_ctrl(ctrl), false);
>> +}
>> +
>>   static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
>>           bool remove)
>>   {
>> @@ -1046,6 +1051,11 @@ static void 
>> nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
>>       }
>>   }
>>   +static void _nvme_rdma_teardown_io_queues(struct nvme_ctrl *ctrl)
>> +{
>> +    nvme_rdma_teardown_io_queues(to_rdma_ctrl(ctrl), false);
>> +}
>> +
>>   static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
>>   {
>>       struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
>> @@ -1164,27 +1174,6 @@ static void 
>> nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
>>       nvmf_reconnect_or_remove(&ctrl->ctrl);
>>   }
>>   -static void nvme_rdma_error_recovery_work(struct work_struct *work)
>> -{
>> -    struct nvme_rdma_ctrl *ctrl = container_of(work,
>> -            struct nvme_rdma_ctrl, ctrl.err_work);
>> -
>> -    nvme_stop_keep_alive(&ctrl->ctrl);
>> -    nvme_rdma_teardown_io_queues(ctrl, false);
>> -    nvme_start_queues(&ctrl->ctrl);
>> -    nvme_rdma_teardown_admin_queue(ctrl, false);
>> -    blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
>> -
>> -    if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
>> -        /* state change failure is ok if we started ctrl delete */
>> -        WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
>> -                 ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
>> -        return;
>> -    }
>> -
>> -    nvmf_reconnect_or_remove(&ctrl->ctrl);
>> -}
>> -
>>   static void nvme_rdma_end_request(struct nvme_rdma_request *req)
>>   {
>>       struct request *rq = blk_mq_rq_from_pdu(req);
>> @@ -2240,16 +2229,19 @@ static void nvme_rdma_reset_ctrl_work(struct 
>> work_struct *work)
>>   }
>>     static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
>> -    .name            = "rdma",
>> -    .module            = THIS_MODULE,
>> -    .flags            = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
>> -    .reg_read32        = nvmf_reg_read32,
>> -    .reg_read64        = nvmf_reg_read64,
>> -    .reg_write32        = nvmf_reg_write32,
>> -    .free_ctrl        = nvme_rdma_free_ctrl,
>> -    .submit_async_event    = nvme_rdma_submit_async_event,
>> -    .delete_ctrl        = nvme_rdma_delete_ctrl,
>> -    .get_address        = nvmf_get_address,
>> +    .name                = "rdma",
>> +    .module                = THIS_MODULE,
>> +    .flags                = NVME_F_FABRICS |
>> +                      NVME_F_METADATA_SUPPORTED,
>> +    .reg_read32            = nvmf_reg_read32,
>> +    .reg_read64            = nvmf_reg_read64,
>> +    .reg_write32            = nvmf_reg_write32,
>> +    .free_ctrl            = nvme_rdma_free_ctrl,
>> +    .submit_async_event        = nvme_rdma_submit_async_event,
>> +    .delete_ctrl            = nvme_rdma_delete_ctrl,
>> +    .get_address            = nvmf_get_address,
>> +    .teardown_ctrl_io_queues    = _nvme_rdma_teardown_io_queues,
>> +    .teardown_ctrl_admin_queue    = _nvme_rdma_teardown_admin_queue,
>>   };
>>     /*
>> @@ -2329,7 +2321,7 @@ static struct nvme_ctrl 
>> *nvme_rdma_create_ctrl(struct device *dev,
>>         INIT_DELAYED_WORK(&ctrl->ctrl.connect_work,
>>               nvme_rdma_reconnect_ctrl_work);
>> -    INIT_WORK(&ctrl->ctrl.err_work, nvme_rdma_error_recovery_work);
>> +    INIT_WORK(&ctrl->ctrl.err_work, nvmf_error_recovery_work);
>
> This initialization needs to move to the core or fabrics lib.

It's done in the next patches.



  reply	other threads:[~2021-10-19 13:17 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-18 13:40 [PATCH v1 0/7] Centrelize common fabrics code to core drivers Max Gurtovoy
2021-10-18 13:40 ` [PATCH 1/7] nvme: add connect_work attribute to nvme ctrl Max Gurtovoy
2021-10-19 12:32   ` Sagi Grimberg
2021-10-19 13:20   ` Hannes Reinecke
2021-10-20 13:34   ` Himanshu Madhani
2021-10-18 13:40 ` [PATCH 2/7] nvme-fabrics: introduce nvmf_reconnect_or_remove API Max Gurtovoy
2021-10-19  6:26   ` Chaitanya Kulkarni
2021-10-19 12:36   ` Sagi Grimberg
2021-10-19 12:58     ` Max Gurtovoy
2021-10-19 13:21   ` Hannes Reinecke
2021-10-20 13:34   ` Himanshu Madhani
2021-10-18 13:40 ` [PATCH 3/7] nvme: add err_work attribute to nvme ctrl Max Gurtovoy
2021-10-19 12:36   ` Sagi Grimberg
2021-10-20 13:34   ` Himanshu Madhani
2021-10-18 13:40 ` [PATCH 4/7] nvme-fabrics: introduce nvmf_error_recovery API Max Gurtovoy
2021-10-19 13:27   ` Hannes Reinecke
2021-10-20 13:34   ` Himanshu Madhani
2021-10-18 13:40 ` [PATCH 5/7] nvme/nvme-fabrics: introduce nvmf_error_recovery_work API Max Gurtovoy
2021-10-19  6:29   ` Chaitanya Kulkarni
2021-10-19 12:43   ` Sagi Grimberg
2021-10-19 13:17     ` Max Gurtovoy [this message]
2021-10-19 13:34   ` Hannes Reinecke
2021-10-18 13:40 ` [PATCH 6/7] nvme/nvme-fabrics: introduce nvmf_reconnect_ctrl_work API Max Gurtovoy
2021-10-19  6:29   ` Chaitanya Kulkarni
2021-10-19 12:44   ` Sagi Grimberg
2021-10-19 13:18     ` Max Gurtovoy
2021-10-19 13:41   ` Hannes Reinecke
2021-10-18 13:40 ` [PATCH 7/7] nvme-fabrics: add nvmf_init_ctrl/nvmf_teardown_ctrl API Max Gurtovoy
2021-10-19 12:46   ` Sagi Grimberg
2021-10-19 13:20     ` Max Gurtovoy
2021-10-18 14:08 ` [PATCH v1 0/7] Centrelize common fabrics code to core drivers James Smart
2021-10-19  5:36   ` Christoph Hellwig
2021-10-19  6:24 ` Chaitanya Kulkarni
2021-10-19 12:32 ` Sagi Grimberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7ed884cd-7f54-a719-36dd-5151655d05da@nvidia.com \
    --to=mgurtovoy@nvidia.com \
    --cc=chaitanyak@nvidia.com \
    --cc=hch@lst.de \
    --cc=israelr@nvidia.com \
    --cc=jsmart2021@gmail.com \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=oren@nvidia.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox