* [PATCH v2 1/8] nvme-rdma: use ib_client API to wrap ib_device
2018-06-04 12:29 [PATCH v2 0/8] use ib_client API to wrap ib_device Roman Pen
@ 2018-06-04 12:29 ` Roman Pen
2018-06-04 21:38 ` Jason Gunthorpe
2018-06-04 12:29 ` [PATCH v2 2/8] nvmet-rdma: wrap raw kref_get/put() with corresponding helpers Roman Pen
2018-06-04 12:29 ` [PATCH v2 3/8] nvmet-rdma: use ib_client API to wrap ib_device Roman Pen
2 siblings, 1 reply; 12+ messages in thread
From: Roman Pen @ 2018-06-04 12:29 UTC (permalink / raw)
ib_client API provides a way to wrap an ib_device with a specific ULP
structure. Using that API local lists and mutexes can be completely
avoided and allocation/removal paths become a bit cleaner.
Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
Cc: Christoph Hellwig <hch at lst.de>
Cc: Steve Wise <swise at opengridcomputing.com>
Cc: Bart Van Assche <bart.vanassche at sandisk.com>
Cc: Sagi Grimberg <sagi at grimberg.me>
Cc: Doug Ledford <dledford at redhat.com>
Cc: linux-nvme at lists.infradead.org
---
drivers/nvme/host/rdma.c | 82 ++++++++++++++++++++++--------------------------
1 file changed, 38 insertions(+), 44 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1eb4438a8763..dd79250c9df4 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -46,7 +46,6 @@ struct nvme_rdma_device {
struct ib_device *dev;
struct ib_pd *pd;
struct kref ref;
- struct list_head entry;
};
struct nvme_rdma_qe {
@@ -124,9 +123,7 @@ static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
}
-static LIST_HEAD(device_list);
-static DEFINE_MUTEX(device_list_mutex);
-
+static struct ib_client nvme_rdma_ib_client;
static LIST_HEAD(nvme_rdma_ctrl_list);
static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
@@ -325,17 +322,14 @@ static void nvme_rdma_free_dev(struct kref *ref)
struct nvme_rdma_device *ndev =
container_of(ref, struct nvme_rdma_device, ref);
- mutex_lock(&device_list_mutex);
- list_del(&ndev->entry);
- mutex_unlock(&device_list_mutex);
-
+ ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL);
ib_dealloc_pd(ndev->pd);
kfree(ndev);
}
-static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
+static int nvme_rdma_dev_put(struct nvme_rdma_device *dev)
{
- kref_put(&dev->ref, nvme_rdma_free_dev);
+ return kref_put(&dev->ref, nvme_rdma_free_dev);
}
static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
@@ -348,43 +342,42 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
{
struct nvme_rdma_device *ndev;
- mutex_lock(&device_list_mutex);
- list_for_each_entry(ndev, &device_list, entry) {
- if (ndev->dev->node_guid == cm_id->device->node_guid &&
- nvme_rdma_dev_get(ndev))
- goto out_unlock;
+ ndev = ib_get_client_data(cm_id->device, &nvme_rdma_ib_client);
+ if (ndev && WARN_ON(!nvme_rdma_dev_get(ndev)))
+ ndev = NULL;
+
+ return ndev;
+}
+
+static struct nvme_rdma_device *
+nvme_rdma_alloc_device(struct ib_device *device)
+{
+ struct nvme_rdma_device *ndev;
+
+ if (!(device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS)) {
+ pr_err("Memory registrations not supported.\n");
+ return NULL;
}
ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
- if (!ndev)
- goto out_err;
+ if (unlikely(!ndev))
+ return NULL;
- ndev->dev = cm_id->device;
+ ndev->dev = device;
kref_init(&ndev->ref);
ndev->pd = ib_alloc_pd(ndev->dev,
register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY);
- if (IS_ERR(ndev->pd))
+ if (unlikely(IS_ERR(ndev->pd)))
goto out_free_dev;
- if (!(ndev->dev->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS)) {
- dev_err(&ndev->dev->dev,
- "Memory registrations not supported.\n");
- goto out_free_pd;
- }
+ ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, ndev);
- list_add(&ndev->entry, &device_list);
-out_unlock:
- mutex_unlock(&device_list_mutex);
return ndev;
-out_free_pd:
- ib_dealloc_pd(ndev->pd);
out_free_dev:
kfree(ndev);
-out_err:
- mutex_unlock(&device_list_mutex);
return NULL;
}
@@ -2017,22 +2010,21 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
.create_ctrl = nvme_rdma_create_ctrl,
};
-static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+static void nvme_rdma_add_one(struct ib_device *ib_device)
{
- struct nvme_rdma_ctrl *ctrl;
struct nvme_rdma_device *ndev;
- bool found = false;
- mutex_lock(&device_list_mutex);
- list_for_each_entry(ndev, &device_list, entry) {
- if (ndev->dev == ib_device) {
- found = true;
- break;
- }
- }
- mutex_unlock(&device_list_mutex);
+ ndev = nvme_rdma_alloc_device(ib_device);
+ if (unlikely(!ndev))
+ pr_info("Allocation of a device failed.\n");
+}
+
+static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+ struct nvme_rdma_device *ndev = client_data;
+ struct nvme_rdma_ctrl *ctrl;
- if (!found)
+ if (unlikely(!ndev))
return;
/* Delete all controllers using this device */
@@ -2045,10 +2037,12 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
mutex_unlock(&nvme_rdma_ctrl_mutex);
flush_workqueue(nvme_delete_wq);
+ WARN_ON(!nvme_rdma_dev_put(ndev));
}
static struct ib_client nvme_rdma_ib_client = {
.name = "nvme_rdma",
+ .add = nvme_rdma_add_one,
.remove = nvme_rdma_remove_one
};
--
2.13.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v2 1/8] nvme-rdma: use ib_client API to wrap ib_device
2018-06-04 12:29 ` [PATCH v2 1/8] nvme-rdma: " Roman Pen
@ 2018-06-04 21:38 ` Jason Gunthorpe
2018-06-05 8:24 ` Max Gurtovoy
2018-06-05 10:18 ` Roman Penyaev
0 siblings, 2 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2018-06-04 21:38 UTC (permalink / raw)
On Mon, Jun 04, 2018@02:29:56PM +0200, Roman Pen wrote:
> ib_client API provides a way to wrap an ib_device with a specific ULP
> structure. Using that API local lists and mutexes can be completely
> avoided and allocation/removal paths become a bit cleaner.
>
> Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
> Cc: Christoph Hellwig <hch at lst.de>
> Cc: Steve Wise <swise at opengridcomputing.com>
> Cc: Bart Van Assche <bart.vanassche at sandisk.com>
> Cc: Sagi Grimberg <sagi at grimberg.me>
> Cc: Doug Ledford <dledford at redhat.com>
> Cc: linux-nvme at lists.infradead.org
> drivers/nvme/host/rdma.c | 82 ++++++++++++++++++++++--------------------------
> 1 file changed, 38 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index 1eb4438a8763..dd79250c9df4 100644
> +++ b/drivers/nvme/host/rdma.c
> @@ -46,7 +46,6 @@ struct nvme_rdma_device {
> struct ib_device *dev;
> struct ib_pd *pd;
> struct kref ref;
> - struct list_head entry;
> };
>
> struct nvme_rdma_qe {
> @@ -124,9 +123,7 @@ static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
> return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
> }
>
> -static LIST_HEAD(device_list);
> -static DEFINE_MUTEX(device_list_mutex);
> -
> +static struct ib_client nvme_rdma_ib_client;
> static LIST_HEAD(nvme_rdma_ctrl_list);
> static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
>
> @@ -325,17 +322,14 @@ static void nvme_rdma_free_dev(struct kref *ref)
> struct nvme_rdma_device *ndev =
> container_of(ref, struct nvme_rdma_device, ref);
>
> - mutex_lock(&device_list_mutex);
> - list_del(&ndev->entry);
> - mutex_unlock(&device_list_mutex);
> -
> + ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL);
> ib_dealloc_pd(ndev->pd);
> kfree(ndev);
> }
>
> -static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
> +static int nvme_rdma_dev_put(struct nvme_rdma_device *dev)
> {
> - kref_put(&dev->ref, nvme_rdma_free_dev);
> + return kref_put(&dev->ref, nvme_rdma_free_dev);
> }
>
> static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
> @@ -348,43 +342,42 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
> {
> struct nvme_rdma_device *ndev;
>
> - mutex_lock(&device_list_mutex);
> - list_for_each_entry(ndev, &device_list, entry) {
> - if (ndev->dev->node_guid == cm_id->device->node_guid &&
> - nvme_rdma_dev_get(ndev))
> - goto out_unlock;
> + ndev = ib_get_client_data(cm_id->device, &nvme_rdma_ib_client);
> + if (ndev && WARN_ON(!nvme_rdma_dev_get(ndev)))
> + ndev = NULL;
I think this is a much better idea to use the client data than
maintaining an internal list - this is what client data was for..
But I wonder if the allocation of the client data should be deferred
until the ulp actually needs to use the device?
Jason
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH v2 1/8] nvme-rdma: use ib_client API to wrap ib_device
2018-06-04 21:38 ` Jason Gunthorpe
@ 2018-06-05 8:24 ` Max Gurtovoy
2018-06-05 10:18 ` Roman Penyaev
2018-06-05 10:18 ` Roman Penyaev
1 sibling, 1 reply; 12+ messages in thread
From: Max Gurtovoy @ 2018-06-05 8:24 UTC (permalink / raw)
On 6/5/2018 12:38 AM, Jason Gunthorpe wrote:
> On Mon, Jun 04, 2018@02:29:56PM +0200, Roman Pen wrote:
>> ib_client API provides a way to wrap an ib_device with a specific ULP
>> structure. Using that API local lists and mutexes can be completely
>> avoided and allocation/removal paths become a bit cleaner.
>>
>> Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
>> Cc: Christoph Hellwig <hch at lst.de>
>> Cc: Steve Wise <swise at opengridcomputing.com>
>> Cc: Bart Van Assche <bart.vanassche at sandisk.com>
>> Cc: Sagi Grimberg <sagi at grimberg.me>
>> Cc: Doug Ledford <dledford at redhat.com>
>> Cc: linux-nvme at lists.infradead.org
>> drivers/nvme/host/rdma.c | 82 ++++++++++++++++++++++--------------------------
>> 1 file changed, 38 insertions(+), 44 deletions(-)
>>
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index 1eb4438a8763..dd79250c9df4 100644
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -46,7 +46,6 @@ struct nvme_rdma_device {
>> struct ib_device *dev;
>> struct ib_pd *pd;
>> struct kref ref;
>> - struct list_head entry;
>> };
>>
>> struct nvme_rdma_qe {
>> @@ -124,9 +123,7 @@ static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
>> return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
>> }
>>
>> -static LIST_HEAD(device_list);
>> -static DEFINE_MUTEX(device_list_mutex);
>> -
>> +static struct ib_client nvme_rdma_ib_client;
>> static LIST_HEAD(nvme_rdma_ctrl_list);
>> static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
>>
>> @@ -325,17 +322,14 @@ static void nvme_rdma_free_dev(struct kref *ref)
>> struct nvme_rdma_device *ndev =
>> container_of(ref, struct nvme_rdma_device, ref);
>>
>> - mutex_lock(&device_list_mutex);
>> - list_del(&ndev->entry);
>> - mutex_unlock(&device_list_mutex);
>> -
>> + ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL);
>> ib_dealloc_pd(ndev->pd);
>> kfree(ndev);
>> }
>>
>> -static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
>> +static int nvme_rdma_dev_put(struct nvme_rdma_device *dev)
>> {
>> - kref_put(&dev->ref, nvme_rdma_free_dev);
>> + return kref_put(&dev->ref, nvme_rdma_free_dev);
>> }
>>
>> static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
>> @@ -348,43 +342,42 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
>> {
>> struct nvme_rdma_device *ndev;
>>
>> - mutex_lock(&device_list_mutex);
>> - list_for_each_entry(ndev, &device_list, entry) {
>> - if (ndev->dev->node_guid == cm_id->device->node_guid &&
>> - nvme_rdma_dev_get(ndev))
>> - goto out_unlock;
>> + ndev = ib_get_client_data(cm_id->device, &nvme_rdma_ib_client);
>> + if (ndev && WARN_ON(!nvme_rdma_dev_get(ndev)))
>> + ndev = NULL;
>
> I think this is a much better idea to use the client data than
> maintaining an internal list - this is what client data was for..
>
> But I wonder if the allocation of the client data should be deferred
> until the ulp actually needs to use the device?
I agree with Jason here. You will create ndev for every ib_device and
this is redundant.
Also, any reason you use likely/unlikely statements not in the fast path ?
Max
>
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH v2 1/8] nvme-rdma: use ib_client API to wrap ib_device
2018-06-05 8:24 ` Max Gurtovoy
@ 2018-06-05 10:18 ` Roman Penyaev
0 siblings, 0 replies; 12+ messages in thread
From: Roman Penyaev @ 2018-06-05 10:18 UTC (permalink / raw)
On Tue, Jun 5, 2018@10:24 AM, Max Gurtovoy <maxg@mellanox.com> wrote:
>
>
> On 6/5/2018 12:38 AM, Jason Gunthorpe wrote:
>>
>> On Mon, Jun 04, 2018@02:29:56PM +0200, Roman Pen wrote:
>>>
>>> ib_client API provides a way to wrap an ib_device with a specific ULP
>>> structure. Using that API local lists and mutexes can be completely
>>> avoided and allocation/removal paths become a bit cleaner.
>>>
>>> Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
>>> Cc: Christoph Hellwig <hch at lst.de>
>>> Cc: Steve Wise <swise at opengridcomputing.com>
>>> Cc: Bart Van Assche <bart.vanassche at sandisk.com>
>>> Cc: Sagi Grimberg <sagi at grimberg.me>
>>> Cc: Doug Ledford <dledford at redhat.com>
>>> Cc: linux-nvme at lists.infradead.org
>>> drivers/nvme/host/rdma.c | 82
>>> ++++++++++++++++++++++--------------------------
>>> 1 file changed, 38 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>>> index 1eb4438a8763..dd79250c9df4 100644
>>> +++ b/drivers/nvme/host/rdma.c
>>> @@ -46,7 +46,6 @@ struct nvme_rdma_device {
>>> struct ib_device *dev;
>>> struct ib_pd *pd;
>>> struct kref ref;
>>> - struct list_head entry;
>>> };
>>> struct nvme_rdma_qe {
>>> @@ -124,9 +123,7 @@ static inline struct nvme_rdma_ctrl
>>> *to_rdma_ctrl(struct nvme_ctrl *ctrl)
>>> return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
>>> }
>>> -static LIST_HEAD(device_list);
>>> -static DEFINE_MUTEX(device_list_mutex);
>>> -
>>> +static struct ib_client nvme_rdma_ib_client;
>>> static LIST_HEAD(nvme_rdma_ctrl_list);
>>> static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
>>> @@ -325,17 +322,14 @@ static void nvme_rdma_free_dev(struct kref *ref)
>>> struct nvme_rdma_device *ndev =
>>> container_of(ref, struct nvme_rdma_device, ref);
>>> - mutex_lock(&device_list_mutex);
>>> - list_del(&ndev->entry);
>>> - mutex_unlock(&device_list_mutex);
>>> -
>>> + ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL);
>>> ib_dealloc_pd(ndev->pd);
>>> kfree(ndev);
>>> }
>>> -static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
>>> +static int nvme_rdma_dev_put(struct nvme_rdma_device *dev)
>>> {
>>> - kref_put(&dev->ref, nvme_rdma_free_dev);
>>> + return kref_put(&dev->ref, nvme_rdma_free_dev);
>>> }
>>> static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
>>> @@ -348,43 +342,42 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
>>> {
>>> struct nvme_rdma_device *ndev;
>>> - mutex_lock(&device_list_mutex);
>>> - list_for_each_entry(ndev, &device_list, entry) {
>>> - if (ndev->dev->node_guid == cm_id->device->node_guid &&
>>> - nvme_rdma_dev_get(ndev))
>>> - goto out_unlock;
>>> + ndev = ib_get_client_data(cm_id->device, &nvme_rdma_ib_client);
>>> + if (ndev && WARN_ON(!nvme_rdma_dev_get(ndev)))
>>> + ndev = NULL;
>>
>>
>> I think this is a much better idea to use the client data than
>> maintaining an internal list - this is what client data was for..
>>
>> But I wonder if the allocation of the client data should be deferred
>> until the ulp actually needs to use the device?
>
>
> I agree with Jason here. You will create ndev for every ib_device and this
> is redundant.
It seems this is a common pattern: wrap all ib devices in *add_one()
and use them calling ib_get_client_data(). But of course ulp dev
can be created on demand, when it is not found. I sent a chunk of
the code in reply to Jason's email, please take a look.
>
> Also, any reason you use likely/unlikely statements not in the fast path ?
IMO sometimes it is easier to read the code, e.g. error branches better
distinguishable.
--
Roman
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 1/8] nvme-rdma: use ib_client API to wrap ib_device
2018-06-04 21:38 ` Jason Gunthorpe
2018-06-05 8:24 ` Max Gurtovoy
@ 2018-06-05 10:18 ` Roman Penyaev
1 sibling, 0 replies; 12+ messages in thread
From: Roman Penyaev @ 2018-06-05 10:18 UTC (permalink / raw)
On Mon, Jun 4, 2018@11:38 PM, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> On Mon, Jun 04, 2018@02:29:56PM +0200, Roman Pen wrote:
>> ib_client API provides a way to wrap an ib_device with a specific ULP
>> structure. Using that API local lists and mutexes can be completely
>> avoided and allocation/removal paths become a bit cleaner.
>>
>> Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
>> Cc: Christoph Hellwig <hch at lst.de>
>> Cc: Steve Wise <swise at opengridcomputing.com>
>> Cc: Bart Van Assche <bart.vanassche at sandisk.com>
>> Cc: Sagi Grimberg <sagi at grimberg.me>
>> Cc: Doug Ledford <dledford at redhat.com>
>> Cc: linux-nvme at lists.infradead.org
>> drivers/nvme/host/rdma.c | 82 ++++++++++++++++++++++--------------------------
>> 1 file changed, 38 insertions(+), 44 deletions(-)
>>
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index 1eb4438a8763..dd79250c9df4 100644
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -46,7 +46,6 @@ struct nvme_rdma_device {
>> struct ib_device *dev;
>> struct ib_pd *pd;
>> struct kref ref;
>> - struct list_head entry;
>> };
>>
>> struct nvme_rdma_qe {
>> @@ -124,9 +123,7 @@ static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
>> return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
>> }
>>
>> -static LIST_HEAD(device_list);
>> -static DEFINE_MUTEX(device_list_mutex);
>> -
>> +static struct ib_client nvme_rdma_ib_client;
>> static LIST_HEAD(nvme_rdma_ctrl_list);
>> static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
>>
>> @@ -325,17 +322,14 @@ static void nvme_rdma_free_dev(struct kref *ref)
>> struct nvme_rdma_device *ndev =
>> container_of(ref, struct nvme_rdma_device, ref);
>>
>> - mutex_lock(&device_list_mutex);
>> - list_del(&ndev->entry);
>> - mutex_unlock(&device_list_mutex);
>> -
>> + ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL);
>> ib_dealloc_pd(ndev->pd);
>> kfree(ndev);
>> }
>>
>> -static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
>> +static int nvme_rdma_dev_put(struct nvme_rdma_device *dev)
>> {
>> - kref_put(&dev->ref, nvme_rdma_free_dev);
>> + return kref_put(&dev->ref, nvme_rdma_free_dev);
>> }
>>
>> static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
>> @@ -348,43 +342,42 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
>> {
>> struct nvme_rdma_device *ndev;
>>
>> - mutex_lock(&device_list_mutex);
>> - list_for_each_entry(ndev, &device_list, entry) {
>> - if (ndev->dev->node_guid == cm_id->device->node_guid &&
>> - nvme_rdma_dev_get(ndev))
>> - goto out_unlock;
>> + ndev = ib_get_client_data(cm_id->device, &nvme_rdma_ib_client);
>> + if (ndev && WARN_ON(!nvme_rdma_dev_get(ndev)))
>> + ndev = NULL;
>
> I think this is a much better idea to use the client data than
> maintaining an internal list - this is what client data was for..
>
> But I wonder if the allocation of the client data should be deferred
> until the ulp actually needs to use the device?
No, actually it should not, I've reused common pattern: wrap all ib
devices in *add_one() and use them calling ib_get_client_data().
Code might be rewritten as following with the help of a global mutex:
static DEFINE_MUTEX(nvme_devices_mutex);
static struct nvme_rdma_device *
nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
{
struct nvme_rdma_device *ndev;
mutex_lock(&nvme_devices_mutex);
ndev = ib_get_client_data(cm_id->device, &nvme_rdma_ib_client);
if (ndev && WARN_ON(!nvme_rdma_dev_get(ndev)))
ndev = NULL;
else if (!ndev) {
ndev = nvme_rdma_alloc_device(cmd_id->device);
if (likely(ndev))
ib_set_client_data(cm_id->device,
&nvmet_rdma_ib_client,
ndev);
}
mutex_unlock(&nvme_devices_mutex);
return ndev;
}
static void nvme_rdma_free_dev(struct kref *ref)
{
...
/* Remove pointer from internal lists before actual free */
ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL);
kfree(ndev);
}
static void
nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
{
...
flush_workqueue(nvme_delete_wq);
/*
* Here at this point all refs on ndev should be put,
* ndev freed and client_data set to NULL.
*/
WARN_ON(ib_get_client_data(ib_device, &nvme_rdma_ib_client));
}
--
Roman
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 2/8] nvmet-rdma: wrap raw kref_get/put() with corresponding helpers
2018-06-04 12:29 [PATCH v2 0/8] use ib_client API to wrap ib_device Roman Pen
2018-06-04 12:29 ` [PATCH v2 1/8] nvme-rdma: " Roman Pen
@ 2018-06-04 12:29 ` Roman Pen
2018-06-04 21:49 ` Jason Gunthorpe
2018-06-04 12:29 ` [PATCH v2 3/8] nvmet-rdma: use ib_client API to wrap ib_device Roman Pen
2 siblings, 1 reply; 12+ messages in thread
From: Roman Pen @ 2018-06-04 12:29 UTC (permalink / raw)
This is just a preparation patch: put kref_put() and kref_get_unless_zero()
inside helpers with more suitable names.
Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
Cc: Christoph Hellwig <hch at lst.de>
Cc: Steve Wise <swise at opengridcomputing.com>
Cc: Bart Van Assche <bart.vanassche at sandisk.com>
Cc: Sagi Grimberg <sagi at grimberg.me>
Cc: Doug Ledford <dledford at redhat.com>
Cc: linux-nvme at lists.infradead.org
---
drivers/nvme/target/rdma.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d579a7..4304b8d8d027 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -775,7 +775,7 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
return ret;
}
-static void nvmet_rdma_free_dev(struct kref *ref)
+static void nvmet_rdma_free_device(struct kref *ref)
{
struct nvmet_rdma_device *ndev =
container_of(ref, struct nvmet_rdma_device, ref);
@@ -790,6 +790,16 @@ static void nvmet_rdma_free_dev(struct kref *ref)
kfree(ndev);
}
+static int nvmet_rdma_dev_put(struct nvmet_rdma_device *dev)
+{
+ return kref_put(&dev->ref, nvmet_rdma_free_device);
+}
+
+static int nvmet_rdma_dev_get(struct nvmet_rdma_device *dev)
+{
+ return kref_get_unless_zero(&dev->ref);
+}
+
static struct nvmet_rdma_device *
nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
{
@@ -799,7 +809,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
mutex_lock(&device_list_mutex);
list_for_each_entry(ndev, &device_list, entry) {
if (ndev->device->node_guid == cm_id->device->node_guid &&
- kref_get_unless_zero(&ndev->ref))
+ nvmet_rdma_dev_get(ndev))
goto out_unlock;
}
@@ -945,8 +955,7 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
struct nvmet_rdma_device *dev = queue->dev;
nvmet_rdma_free_queue(queue);
-
- kref_put(&dev->ref, nvmet_rdma_free_dev);
+ nvmet_rdma_dev_put(dev);
}
static int
@@ -1163,7 +1172,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
return 0;
put_device:
- kref_put(&ndev->ref, nvmet_rdma_free_dev);
+ nvmet_rdma_dev_put(ndev);
return ret;
}
--
2.13.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v2 2/8] nvmet-rdma: wrap raw kref_get/put() with corresponding helpers
2018-06-04 12:29 ` [PATCH v2 2/8] nvmet-rdma: wrap raw kref_get/put() with corresponding helpers Roman Pen
@ 2018-06-04 21:49 ` Jason Gunthorpe
2018-06-05 4:43 ` Christoph Hellwig
2018-06-05 10:19 ` Roman Penyaev
0 siblings, 2 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2018-06-04 21:49 UTC (permalink / raw)
On Mon, Jun 04, 2018@02:29:57PM +0200, Roman Pen wrote:
> This is just a preparation patch: put kref_put() and kref_get_unless_zero()
> inside helpers with more suitable names.
>
> Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
> Cc: Christoph Hellwig <hch at lst.de>
> Cc: Steve Wise <swise at opengridcomputing.com>
> Cc: Bart Van Assche <bart.vanassche at sandisk.com>
> Cc: Sagi Grimberg <sagi at grimberg.me>
> Cc: Doug Ledford <dledford at redhat.com>
> Cc: linux-nvme at lists.infradead.org
> drivers/nvme/target/rdma.c | 19 ++++++++++++++-----
> 1 file changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 52e0c5d579a7..4304b8d8d027 100644
> +++ b/drivers/nvme/target/rdma.c
> @@ -775,7 +775,7 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
> return ret;
> }
>
> -static void nvmet_rdma_free_dev(struct kref *ref)
> +static void nvmet_rdma_free_device(struct kref *ref)
> {
> struct nvmet_rdma_device *ndev =
> container_of(ref, struct nvmet_rdma_device, ref);
> @@ -790,6 +790,16 @@ static void nvmet_rdma_free_dev(struct kref *ref)
> kfree(ndev);
> }
>
> +static int nvmet_rdma_dev_put(struct nvmet_rdma_device *dev)
> +{
> + return kref_put(&dev->ref, nvmet_rdma_free_device);
> +}
> +
> +static int nvmet_rdma_dev_get(struct nvmet_rdma_device *dev)
> +{
> + return kref_get_unless_zero(&dev->ref);
> +}
Don't wrap get_unless_zero with something called get..
unless_zero should only be used in special situations where something tricky
is going on.. In this case it looks like it is because a
nvmet_rdma_device can continue to reside in the device_list() even after
being fully 'put'..
Which does pose a question to this whole series, what have you done to
ensure the client data access isn't racing with the removal of the
device/client?
We might want to adjust the core code to make this a little saner, eg
maybe the client_data should have an option that manages a kref'd
memory blob internally with proper locking or something..
Jason
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH v2 2/8] nvmet-rdma: wrap raw kref_get/put() with corresponding helpers
2018-06-04 21:49 ` Jason Gunthorpe
@ 2018-06-05 4:43 ` Christoph Hellwig
2018-06-05 10:19 ` Roman Penyaev
2018-06-05 10:19 ` Roman Penyaev
1 sibling, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2018-06-05 4:43 UTC (permalink / raw)
On Mon, Jun 04, 2018@03:49:16PM -0600, Jason Gunthorpe wrote:
> > +{
> > + return kref_put(&dev->ref, nvmet_rdma_free_device);
> > +}
> > +
> > +static int nvmet_rdma_dev_get(struct nvmet_rdma_device *dev)
> > +{
> > + return kref_get_unless_zero(&dev->ref);
> > +}
>
> Don't wrap get_unless_zero with something called get..
Agreed. And I hate all these wrappers. Either we use kref_*
or we use wrappers and plain refcount_* ops, not both.
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH v2 2/8] nvmet-rdma: wrap raw kref_get/put() with corresponding helpers
2018-06-05 4:43 ` Christoph Hellwig
@ 2018-06-05 10:19 ` Roman Penyaev
0 siblings, 0 replies; 12+ messages in thread
From: Roman Penyaev @ 2018-06-05 10:19 UTC (permalink / raw)
On Tue, Jun 5, 2018@6:43 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Jun 04, 2018@03:49:16PM -0600, Jason Gunthorpe wrote:
>> > +{
>> > + return kref_put(&dev->ref, nvmet_rdma_free_device);
>> > +}
>> > +
>> > +static int nvmet_rdma_dev_get(struct nvmet_rdma_device *dev)
>> > +{
>> > + return kref_get_unless_zero(&dev->ref);
>> > +}
>>
>> Don't wrap get_unless_zero with something called get..
>
> Agreed. And I hate all these wrappers. Either we use kref_*
> or we use wrappers and plain refcount_* ops, not both.
What I did not mention in the commit message is that this is
done only to unify the code with host/rdma.c, which has
exactly the same wrappers. Since I am touching both sides
(host and target) this code difference annoys.
You think better to remove wrappers from nvme host or just
leave everything as is?
--
Roman
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 2/8] nvmet-rdma: wrap raw kref_get/put() with corresponding helpers
2018-06-04 21:49 ` Jason Gunthorpe
2018-06-05 4:43 ` Christoph Hellwig
@ 2018-06-05 10:19 ` Roman Penyaev
1 sibling, 0 replies; 12+ messages in thread
From: Roman Penyaev @ 2018-06-05 10:19 UTC (permalink / raw)
On Mon, Jun 4, 2018@11:49 PM, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> On Mon, Jun 04, 2018@02:29:57PM +0200, Roman Pen wrote:
>> This is just a preparation patch: put kref_put() and kref_get_unless_zero()
>> inside helpers with more suitable names.
>>
>> Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
>> Cc: Christoph Hellwig <hch at lst.de>
>> Cc: Steve Wise <swise at opengridcomputing.com>
>> Cc: Bart Van Assche <bart.vanassche at sandisk.com>
>> Cc: Sagi Grimberg <sagi at grimberg.me>
>> Cc: Doug Ledford <dledford at redhat.com>
>> Cc: linux-nvme at lists.infradead.org
>> drivers/nvme/target/rdma.c | 19 ++++++++++++++-----
>> 1 file changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
>> index 52e0c5d579a7..4304b8d8d027 100644
>> +++ b/drivers/nvme/target/rdma.c
>> @@ -775,7 +775,7 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
>> return ret;
>> }
>>
>> -static void nvmet_rdma_free_dev(struct kref *ref)
>> +static void nvmet_rdma_free_device(struct kref *ref)
>> {
>> struct nvmet_rdma_device *ndev =
>> container_of(ref, struct nvmet_rdma_device, ref);
>> @@ -790,6 +790,16 @@ static void nvmet_rdma_free_dev(struct kref *ref)
>> kfree(ndev);
>> }
>>
>> +static int nvmet_rdma_dev_put(struct nvmet_rdma_device *dev)
>> +{
>> + return kref_put(&dev->ref, nvmet_rdma_free_device);
>> +}
>> +
>> +static int nvmet_rdma_dev_get(struct nvmet_rdma_device *dev)
>> +{
>> + return kref_get_unless_zero(&dev->ref);
>> +}
>
> Don't wrap get_unless_zero with something called get..
>
> unless_zero should only be used in special situations where something tricky
> is going on.. In this case it looks like it is because a
> nvmet_rdma_device can continue to reside in the device_list() even after
> being fully 'put'..
>
> Which does pose a question to this whole series, what have you done to
> ensure the client data access isn't racing with the removal of the
> device/client?
It races, but nvmet_rdma_remove_one() has guarantees that all queued
release works will finish, so no users of ndev are left at the point
when flush_scheduled_work() returns control:
static void
nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
{
...
flush_workqueue(nvme_delete_wq);
/*
* Here at this point no users of ndev are left
*/
}
--
Roman
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 3/8] nvmet-rdma: use ib_client API to wrap ib_device
2018-06-04 12:29 [PATCH v2 0/8] use ib_client API to wrap ib_device Roman Pen
2018-06-04 12:29 ` [PATCH v2 1/8] nvme-rdma: " Roman Pen
2018-06-04 12:29 ` [PATCH v2 2/8] nvmet-rdma: wrap raw kref_get/put() with corresponding helpers Roman Pen
@ 2018-06-04 12:29 ` Roman Pen
2 siblings, 0 replies; 12+ messages in thread
From: Roman Pen @ 2018-06-04 12:29 UTC (permalink / raw)
ib_client API provides a way to wrap an ib_device with a specific ULP
structure. Using that API local lists and mutexes can be completely
avoided and allocation/removal paths become a bit cleaner.
Signed-off-by: Roman Pen <roman.penyaev at profitbricks.de>
Cc: Christoph Hellwig <hch at lst.de>
Cc: Steve Wise <swise at opengridcomputing.com>
Cc: Bart Van Assche <bart.vanassche at sandisk.com>
Cc: Sagi Grimberg <sagi at grimberg.me>
Cc: Doug Ledford <dledford at redhat.com>
Cc: linux-nvme at lists.infradead.org
---
drivers/nvme/target/rdma.c | 70 ++++++++++++++++++++++------------------------
1 file changed, 33 insertions(+), 37 deletions(-)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 4304b8d8d027..5699b544b23e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -125,9 +125,7 @@ MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
static DEFINE_IDA(nvmet_rdma_queue_ida);
static LIST_HEAD(nvmet_rdma_queue_list);
static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
-
-static LIST_HEAD(device_list);
-static DEFINE_MUTEX(device_list_mutex);
+static struct ib_client nvmet_rdma_ib_client;
static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
@@ -780,13 +778,9 @@ static void nvmet_rdma_free_device(struct kref *ref)
struct nvmet_rdma_device *ndev =
container_of(ref, struct nvmet_rdma_device, ref);
- mutex_lock(&device_list_mutex);
- list_del(&ndev->entry);
- mutex_unlock(&device_list_mutex);
-
+ ib_set_client_data(ndev->device, &nvmet_rdma_ib_client, NULL);
nvmet_rdma_destroy_srq(ndev);
ib_dealloc_pd(ndev->pd);
-
kfree(ndev);
}
@@ -804,24 +798,29 @@ static struct nvmet_rdma_device *
nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
{
struct nvmet_rdma_device *ndev;
- int ret;
- mutex_lock(&device_list_mutex);
- list_for_each_entry(ndev, &device_list, entry) {
- if (ndev->device->node_guid == cm_id->device->node_guid &&
- nvmet_rdma_dev_get(ndev))
- goto out_unlock;
- }
+ ndev = ib_get_client_data(cm_id->device, &nvmet_rdma_ib_client);
+ if (ndev && WARN_ON(!nvmet_rdma_dev_get(ndev)))
+ ndev = NULL;
+
+ return ndev;
+}
+
+static struct nvmet_rdma_device *
+nvmet_rdma_alloc_device(struct ib_device *device)
+{
+ struct nvmet_rdma_device *ndev;
+ int ret;
ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
- if (!ndev)
- goto out_err;
+ if (unlikely(!ndev))
+ return NULL;
- ndev->device = cm_id->device;
+ ndev->device = device;
kref_init(&ndev->ref);
ndev->pd = ib_alloc_pd(ndev->device, 0);
- if (IS_ERR(ndev->pd))
+ if (unlikely(IS_ERR(ndev->pd)))
goto out_free_dev;
if (nvmet_rdma_use_srq) {
@@ -829,19 +828,15 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
if (ret)
goto out_free_pd;
}
-
- list_add(&ndev->entry, &device_list);
-out_unlock:
- mutex_unlock(&device_list_mutex);
+ ib_set_client_data(ndev->device, &nvmet_rdma_ib_client, ndev);
pr_debug("added %s.\n", ndev->device->name);
+
return ndev;
out_free_pd:
ib_dealloc_pd(ndev->pd);
out_free_dev:
kfree(ndev);
-out_err:
- mutex_unlock(&device_list_mutex);
return NULL;
}
@@ -1475,22 +1470,21 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.disc_traddr = nvmet_rdma_disc_port_addr,
};
-static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+static void nvmet_rdma_add_one(struct ib_device *ib_device)
{
- struct nvmet_rdma_queue *queue, *tmp;
struct nvmet_rdma_device *ndev;
- bool found = false;
- mutex_lock(&device_list_mutex);
- list_for_each_entry(ndev, &device_list, entry) {
- if (ndev->device == ib_device) {
- found = true;
- break;
- }
- }
- mutex_unlock(&device_list_mutex);
+ ndev = nvmet_rdma_alloc_device(ib_device);
+ if (unlikely(!ndev))
+ pr_info("Allocation of a device failed.\n");
+}
+
+static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+ struct nvmet_rdma_device *ndev = client_data;
+ struct nvmet_rdma_queue *queue, *tmp;
- if (!found)
+ if (unlikely(!ndev))
return;
/*
@@ -1510,10 +1504,12 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data
mutex_unlock(&nvmet_rdma_queue_mutex);
flush_scheduled_work();
+ WARN_ON(!nvmet_rdma_dev_put(ndev));
}
static struct ib_client nvmet_rdma_ib_client = {
.name = "nvmet_rdma",
+ .add = nvmet_rdma_add_one,
.remove = nvmet_rdma_remove_one
};
--
2.13.1
^ permalink raw reply related [flat|nested] 12+ messages in thread