All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] nvme: add virt boundary per ctrl
@ 2017-08-16 10:20 Max Gurtovoy
  2017-08-16 10:20 ` [PATCH 2/3] nvme: introduce max_segments controller attribute Max Gurtovoy
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Max Gurtovoy @ 2017-08-16 10:20 UTC (permalink / raw)


Prepare an infrastructure for controllers that can handle arbitrarily
sized bios (e.g advanced RDMA and PCI ctrls). Add virt_boundary_mask
according to each ctrl constraints.

Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
 drivers/nvme/host/core.c   |    2 +-
 drivers/nvme/host/fc.c     |    1 +
 drivers/nvme/host/nvme.h   |    1 +
 drivers/nvme/host/pci.c    |    1 +
 drivers/nvme/host/rdma.c   |    1 +
 drivers/nvme/target/loop.c |    1 +
 6 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37046ac..8cf37e7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1503,7 +1503,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 	}
 	if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
 		blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
-	blk_queue_virt_boundary(q, ctrl->page_size - 1);
+	blk_queue_virt_boundary(q, ctrl->virt_boundary_mask);
 	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
 		vwc = true;
 	blk_queue_write_cache(q, vwc, vwc);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5c2a08e..707db4d 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2354,6 +2354,7 @@ enum {
 	segs = min_t(u32, NVME_FC_MAX_SEGMENTS,
 			ctrl->lport->ops->max_sgl_segments);
 	ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9);
+	ctrl->ctrl.virt_boundary_mask = ctrl->ctrl.page_size - 1;
 
 	ret = nvme_init_identify(&ctrl->ctrl);
 	if (ret)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8f2a168..803c211 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -167,6 +167,7 @@ struct nvme_ctrl {
 	struct work_struct scan_work;
 	struct work_struct async_event_work;
 	struct delayed_work ka_work;
+	unsigned long virt_boundary_mask;
 
 	/* Power saving configuration */
 	u64 ps_max_latency_us;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 74a124a..da4e130 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2140,6 +2140,7 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
+	dev->ctrl.virt_boundary_mask = dev->ctrl.page_size - 1;
 	result = nvme_init_identify(&dev->ctrl);
 	if (result)
 		goto out;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index da04df1..04a0d33 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1584,6 +1584,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 
 	ctrl->ctrl.max_hw_sectors =
 		(ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9);
+	ctrl->ctrl.virt_boundary_mask = ctrl->ctrl.page_size - 1;
 
 	error = nvme_init_identify(&ctrl->ctrl);
 	if (error)
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 717ed7d..b0a831d 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -402,6 +402,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 
 	ctrl->ctrl.max_hw_sectors =
 		(NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
+	ctrl->ctrl.virt_boundary_mask = ctrl->ctrl.page_size - 1;
 
 	error = nvme_init_identify(&ctrl->ctrl);
 	if (error)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-16 10:20 [PATCH 1/3] nvme: add virt boundary per ctrl Max Gurtovoy
@ 2017-08-16 10:20 ` Max Gurtovoy
  2017-08-16 13:29   ` Sagi Grimberg
  2017-08-16 10:21 ` [PATCH 3/3] nvme-rdma: fix virtual boundary calculation Max Gurtovoy
  2017-08-21 22:45 ` [PATCH 1/3] nvme: add virt boundary per ctrl James Smart
  2 siblings, 1 reply; 12+ messages in thread
From: Max Gurtovoy @ 2017-08-16 10:20 UTC (permalink / raw)


Each ctrl will limit max_segments according to it's hardware
restrictions. Implement the restriction for RDMA transport.

Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
 drivers/nvme/host/core.c |    4 ++--
 drivers/nvme/host/nvme.h |    1 +
 drivers/nvme/host/rdma.c |    1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8cf37e7..cc0681e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1495,8 +1495,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 	bool vwc = false;
 
 	if (ctrl->max_hw_sectors) {
-		u32 max_segments =
-			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
+		u32 max_segments = min_not_zero(ctrl->max_segments,
+			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
 
 		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
 		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 803c211..0d6a91a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -147,6 +147,7 @@ struct nvme_ctrl {
 	u64 cap;
 	u32 page_size;
 	u32 max_hw_sectors;
+	u32 max_segments;
 	u16 oncs;
 	u16 vid;
 	u16 oacs;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 04a0d33..cf71895 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1582,6 +1582,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
+	ctrl->ctrl.max_segments = ctrl->max_fr_pages;
 	ctrl->ctrl.max_hw_sectors =
 		(ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9);
 	ctrl->ctrl.virt_boundary_mask = ctrl->ctrl.page_size - 1;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] nvme-rdma: fix virtual boundary calculation
  2017-08-16 10:20 [PATCH 1/3] nvme: add virt boundary per ctrl Max Gurtovoy
  2017-08-16 10:20 ` [PATCH 2/3] nvme: introduce max_segments controller attribute Max Gurtovoy
@ 2017-08-16 10:21 ` Max Gurtovoy
  2017-08-21 22:45 ` [PATCH 1/3] nvme: add virt boundary per ctrl James Smart
  2 siblings, 0 replies; 12+ messages in thread
From: Max Gurtovoy @ 2017-08-16 10:21 UTC (permalink / raw)


Set the virtual boundary to correspond the mapping restrictions
of the HCA. This fixes the mismatch in systems with page size > 4KiB.

Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
 drivers/nvme/host/rdma.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index cf71895..8ab5a71 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1585,7 +1585,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	ctrl->ctrl.max_segments = ctrl->max_fr_pages;
 	ctrl->ctrl.max_hw_sectors =
 		(ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9);
-	ctrl->ctrl.virt_boundary_mask = ctrl->ctrl.page_size - 1;
+	ctrl->ctrl.virt_boundary_mask = PAGE_SIZE - 1;
 
 	error = nvme_init_identify(&ctrl->ctrl);
 	if (error)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-16 10:20 ` [PATCH 2/3] nvme: introduce max_segments controller attribute Max Gurtovoy
@ 2017-08-16 13:29   ` Sagi Grimberg
  2017-08-16 14:35     ` Max Gurtovoy
  0 siblings, 1 reply; 12+ messages in thread
From: Sagi Grimberg @ 2017-08-16 13:29 UTC (permalink / raw)



> Each ctrl will limit max_segments according to it's hardware
> restrictions. Implement the restriction for RDMA transport.
> 
> Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
> ---
>   drivers/nvme/host/core.c |    4 ++--
>   drivers/nvme/host/nvme.h |    1 +
>   drivers/nvme/host/rdma.c |    1 +
>   3 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8cf37e7..cc0681e 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1495,8 +1495,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
>   	bool vwc = false;
>   
>   	if (ctrl->max_hw_sectors) {
> -		u32 max_segments =
> -			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
> +		u32 max_segments = min_not_zero(ctrl->max_segments,
> +			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);

Why is this needed with patches 1,3 applied?

We set the page_size and we set max_hw_sectors accordingly.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-16 13:29   ` Sagi Grimberg
@ 2017-08-16 14:35     ` Max Gurtovoy
  2017-08-16 19:04       ` Sagi Grimberg
  0 siblings, 1 reply; 12+ messages in thread
From: Max Gurtovoy @ 2017-08-16 14:35 UTC (permalink / raw)




On 8/16/2017 4:29 PM, Sagi Grimberg wrote:
>
>> Each ctrl will limit max_segments according to it's hardware
>> restrictions. Implement the restriction for RDMA transport.
>>
>> Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
>> ---
>>   drivers/nvme/host/core.c |    4 ++--
>>   drivers/nvme/host/nvme.h |    1 +
>>   drivers/nvme/host/rdma.c |    1 +
>>   3 files changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 8cf37e7..cc0681e 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -1495,8 +1495,8 @@ static void nvme_set_queue_limits(struct
>> nvme_ctrl *ctrl,
>>       bool vwc = false;
>>         if (ctrl->max_hw_sectors) {
>> -        u32 max_segments =
>> -            (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
>> +        u32 max_segments = min_not_zero(ctrl->max_segments,
>> +            (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
>
> Why is this needed with patches 1,3 applied?
>
> We set the page_size and we set max_hw_sectors accordingly.

We didn't touch the ctrl->page_size. It's always 4k (the target checks it).
This may cause max_segments to be bigger than hw limitation (in PPC for 
example).

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-16 14:35     ` Max Gurtovoy
@ 2017-08-16 19:04       ` Sagi Grimberg
  2017-08-17  7:40         ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Sagi Grimberg @ 2017-08-16 19:04 UTC (permalink / raw)



>> Why is this needed with patches 1,3 applied?
>>
>> We set the page_size and we set max_hw_sectors accordingly.
> 
> We didn't touch the ctrl->page_size. It's always 4k (the target checks it).
> This may cause max_segments to be bigger than hw limitation (in PPC for 
> example).

Right, got confused for a second, but why shouldn't we modify the
ctrl page_size in rdma? Its not really a controller attribute, but
rather the local HCA attribute really.

Or, we could simply fix nvme-core to take PAGE_SIZE if supported by
the ctrl and fallback to 4k otherwise.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-16 19:04       ` Sagi Grimberg
@ 2017-08-17  7:40         ` Christoph Hellwig
  2017-08-20  6:35           ` Sagi Grimberg
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2017-08-17  7:40 UTC (permalink / raw)


On Wed, Aug 16, 2017@10:04:09PM +0300, Sagi Grimberg wrote:
>
>>> Why is this needed with patches 1,3 applied?
>>>
>>> We set the page_size and we set max_hw_sectors accordingly.
>>
>> We didn't touch the ctrl->page_size. It's always 4k (the target checks it).
>> This may cause max_segments to be bigger than hw limitation (in PPC for 
>> example).
>
> Right, got confused for a second, but why shouldn't we modify the
> ctrl page_size in rdma? Its not really a controller attribute, but
> rather the local HCA attribute really.

But the controller needs to support it, and NVMe controllers are
only required to support a 4k page size, the rest is optional.

> Or, we could simply fix nvme-core to take PAGE_SIZE if supported by
> the ctrl and fallback to 4k otherwise.

For PCIe (before fabrics existed) we used to do that, but we changed
it for ppc64 in this commit:

c5c9f25b98 ("NVMe: default to 4k device page size")

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-17  7:40         ` Christoph Hellwig
@ 2017-08-20  6:35           ` Sagi Grimberg
  2017-08-22  6:32             ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Sagi Grimberg @ 2017-08-20  6:35 UTC (permalink / raw)



>>>> Why is this needed with patches 1,3 applied?
>>>>
>>>> We set the page_size and we set max_hw_sectors accordingly.
>>>
>>> We didn't touch the ctrl->page_size. It's always 4k (the target checks it).
>>> This may cause max_segments to be bigger than hw limitation (in PPC for
>>> example).
>>
>> Right, got confused for a second, but why shouldn't we modify the
>> ctrl page_size in rdma? Its not really a controller attribute, but
>> rather the local HCA attribute really.
> 
> But the controller needs to support it, and NVMe controllers are
> only required to support a 4k page size, the rest is optional.

How exacly is the controller required to support the pag_size for
fabrics?

It could have implications on mdts, but AFAIR, mdts is reported in
units of MPSMIN so I'm not exactly sure how exactly the controller
needs to support it.

>> Or, we could simply fix nvme-core to take PAGE_SIZE if supported by
>> the ctrl and fallback to 4k otherwise.
> 
> For PCIe (before fabrics existed) we used to do that, but we changed
> it for ppc64 in this commit:
> 
> c5c9f25b98 ("NVMe: default to 4k device page size")

Yea... I'm wandering if this is specific to NVMe or it can trigger
in RDMA as well.

Note that for iSER we always use 4k page vectors, but that was
historically done before we had gaps detection in the block layer
and we had higher chance to do bounce buffering in iSER.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] nvme: add virt boundary per ctrl
  2017-08-16 10:20 [PATCH 1/3] nvme: add virt boundary per ctrl Max Gurtovoy
  2017-08-16 10:20 ` [PATCH 2/3] nvme: introduce max_segments controller attribute Max Gurtovoy
  2017-08-16 10:21 ` [PATCH 3/3] nvme-rdma: fix virtual boundary calculation Max Gurtovoy
@ 2017-08-21 22:45 ` James Smart
  2 siblings, 0 replies; 12+ messages in thread
From: James Smart @ 2017-08-21 22:45 UTC (permalink / raw)


On 8/16/2017 3:20 AM, Max Gurtovoy wrote:
> Prepare an infrastructure for controllers that can handle arbitrarily
> sized bios (e.g advanced RDMA and PCI ctrls). Add virt_boundary_mask
> according to each ctrl constraints.
>
> Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
> ---
>   drivers/nvme/host/core.c   |    2 +-
>   drivers/nvme/host/fc.c     |    1 +
>   drivers/nvme/host/nvme.h   |    1 +
>   drivers/nvme/host/pci.c    |    1 +
>   drivers/nvme/host/rdma.c   |    1 +
>   drivers/nvme/target/loop.c |    1 +
>   6 files changed, 6 insertions(+), 1 deletions(-)
>
>
Looks fine to me.

Reviewed-By: James Smart <james.smart at broadcom.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-20  6:35           ` Sagi Grimberg
@ 2017-08-22  6:32             ` Christoph Hellwig
  2017-08-22  9:36               ` Max Gurtovoy
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2017-08-22  6:32 UTC (permalink / raw)


On Sun, Aug 20, 2017@09:35:25AM +0300, Sagi Grimberg wrote:
>> But the controller needs to support it, and NVMe controllers are
>> only required to support a 4k page size, the rest is optional.
>
> How exacly is the controller required to support the pag_size for
> fabrics?

It doesn't matter at all, except as the scaling factor for a few
reported values.

>> For PCIe (before fabrics existed) we used to do that, but we changed
>> it for ppc64 in this commit:
>>
>> c5c9f25b98 ("NVMe: default to 4k device page size")
>
> Yea... I'm wandering if this is specific to NVMe or it can trigger
> in RDMA as well.

As far as I can tell we could hit it with RDMA under the same
circumstances (weird iommu with a smaller page size than the system
page size), but for RDMA the nvme controller page size isn't the
one that matter, but rather the MR page size.

So for now to be safe it might be a good idea to always use a 4k
MR page size..

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-22  6:32             ` Christoph Hellwig
@ 2017-08-22  9:36               ` Max Gurtovoy
  2017-08-22  9:40                 ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Max Gurtovoy @ 2017-08-22  9:36 UTC (permalink / raw)


> So for now to be safe it might be a good idea to always use a 4k
> MR page size..
>

So should I send a fix only for the MR page size (4k) for now ?
Or should we add the infrastructure for virt_boundary and max_segments 
per ctrl too ?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] nvme: introduce max_segments controller attribute
  2017-08-22  9:36               ` Max Gurtovoy
@ 2017-08-22  9:40                 ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2017-08-22  9:40 UTC (permalink / raw)


On Tue, Aug 22, 2017@12:36:49PM +0300, Max Gurtovoy wrote:
>> So for now to be safe it might be a good idea to always use a 4k
>> MR page size..
>>
>
> So should I send a fix only for the MR page size (4k) for now ?
> Or should we add the infrastructure for virt_boundary and max_segments per 
> ctrl too ?

We'll eventually want both, but for 4.13 fixing the MR page size to
4k should be fine.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-08-22  9:40 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-16 10:20 [PATCH 1/3] nvme: add virt boundary per ctrl Max Gurtovoy
2017-08-16 10:20 ` [PATCH 2/3] nvme: introduce max_segments controller attribute Max Gurtovoy
2017-08-16 13:29   ` Sagi Grimberg
2017-08-16 14:35     ` Max Gurtovoy
2017-08-16 19:04       ` Sagi Grimberg
2017-08-17  7:40         ` Christoph Hellwig
2017-08-20  6:35           ` Sagi Grimberg
2017-08-22  6:32             ` Christoph Hellwig
2017-08-22  9:36               ` Max Gurtovoy
2017-08-22  9:40                 ` Christoph Hellwig
2017-08-16 10:21 ` [PATCH 3/3] nvme-rdma: fix virtual boundary calculation Max Gurtovoy
2017-08-21 22:45 ` [PATCH 1/3] nvme: add virt boundary per ctrl James Smart

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.