Linux RDMA and InfiniBand development

Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed

* [PATCH 1/2] mm: add locked parameter to get_user_pages_remote()
From: Lorenzo Stoakes @ 2016-10-27  9:51 UTC (permalink / raw)
  To: linux-mm
  Cc: Michal Hocko, Linus Torvalds, Jan Kara, Hugh Dickins, Dave Hansen,
	Rik van Riel, Mel Gorman, Andrew Morton, Paolo Bonzini,
	Radim Krčmář, kvm, linux-kernel,
	linux-security-module, linux-rdma, dri-devel, linux-fsdevel,
	Lorenzo Stoakes
In-Reply-To: <20161027095141.2569-1-lstoakes@gmail.com>

This patch adds a int *locked parameter to get_user_pages_remote() to allow
VM_FAULT_RETRY faulting behaviour similar to get_user_pages_[un]locked().

Taking into account the previous adjustments to get_user_pages*() functions
allowing for the passing of gup_flags, we are now in a position where
__get_user_pages_unlocked() need only be exported for his ability to allow
VM_FAULT_RETRY behaviour, this adjustment allows us to subsequently unexport
__get_user_pages_unlocked() as well as allowing for future flexibility in the
use of get_user_pages_remote().

Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  2 +-
 drivers/infiniband/core/umem_odp.c      |  2 +-
 fs/exec.c                               |  2 +-
 include/linux/mm.h                      |  2 +-
 kernel/events/uprobes.c                 |  4 ++--
 mm/gup.c                                | 12 ++++++++----
 mm/memory.c                             |  2 +-
 security/tomoyo/domain.c                |  2 +-
 9 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 0370b84..0c69a97f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -763,7 +763,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 	down_read(&mm->mmap_sem);
 	while (pinned < npages) {
 		ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
-					    flags, pvec + pinned, NULL);
+					    flags, pvec + pinned, NULL, NULL);
 		if (ret < 0)
 			break;

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index c6f780f..836b525 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -522,7 +522,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 					 obj->userptr.ptr + pinned * PAGE_SIZE,
 					 npages - pinned,
 					 flags,
-					 pvec + pinned, NULL);
+					 pvec + pinned, NULL, NULL);
 				if (ret < 0)
 					break;

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 1f0fe32..6b079a3 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -578,7 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 		 */
 		npages = get_user_pages_remote(owning_process, owning_mm,
 				user_virt, gup_num_pages,
-				flags, local_page_list, NULL);
+				flags, local_page_list, NULL, NULL);
 		up_read(&owning_mm->mmap_sem);

 		if (npages < 0)
diff --git a/fs/exec.c b/fs/exec.c
index 4e497b9..2cf049d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -209,7 +209,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 	 * doing the exec and bprm->mm is the new process's mm.
 	 */
 	ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
-			&page, NULL);
+			&page, NULL, NULL);
 	if (ret <= 0)
 		return NULL;

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a92c8d7..cc15445 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1274,7 +1274,7 @@ extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long start, unsigned long nr_pages,
 			    unsigned int gup_flags, struct page **pages,
-			    struct vm_area_struct **vmas);
+			    struct vm_area_struct **vmas, int *locked);
 long get_user_pages(unsigned long start, unsigned long nr_pages,
 			    unsigned int gup_flags, struct page **pages,
 			    struct vm_area_struct **vmas);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f9ec9ad..215871b 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -301,7 +301,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 retry:
 	/* Read the page with vaddr into memory */
 	ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
-			&vma);
+			&vma, NULL);
 	if (ret <= 0)
 		return ret;

@@ -1712,7 +1712,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
 	 * essentially a kernel access to the memory.
 	 */
 	result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
-			NULL);
+			NULL, NULL);
 	if (result < 0)
 		return result;

diff --git a/mm/gup.c b/mm/gup.c
index ec4f827..0567851 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -920,6 +920,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
  *		only intends to ensure the pages are faulted in.
  * @vmas:	array of pointers to vmas corresponding to each page.
  *		Or NULL if the caller does not require them.
+ * @locked:	pointer to lock flag indicating whether lock is held and
+ *		subsequently whether VM_FAULT_RETRY functionality can be
+ *		utilised. Lock must initially be held.
  *
  * Returns number of pages pinned. This may be fewer than the number
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
@@ -963,10 +966,10 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, unsigned long nr_pages,
 		unsigned int gup_flags, struct page **pages,
-		struct vm_area_struct **vmas)
+		struct vm_area_struct **vmas, int *locked)
 {
 	return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
-				       NULL, false,
+				       locked, true,
 				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
@@ -974,8 +977,9 @@ EXPORT_SYMBOL(get_user_pages_remote);
 /*
  * This is the same as get_user_pages_remote(), just with a
  * less-flexible calling convention where we assume that the task
- * and mm being operated on are the current task's.  We also
- * obviously don't pass FOLL_REMOTE in here.
+ * and mm being operated on are the current task's and don't allow
+ * passing of a locked parameter.  We also obviously don't pass
+ * FOLL_REMOTE in here.
  */
 long get_user_pages(unsigned long start, unsigned long nr_pages,
 		unsigned int gup_flags, struct page **pages,
diff --git a/mm/memory.c b/mm/memory.c
index e18c57b..2f3949b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3883,7 +3883,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 		struct page *page = NULL;

 		ret = get_user_pages_remote(tsk, mm, addr, 1,
-				gup_flags, &page, &vma);
+				gup_flags, &page, &vma, NULL);
 		if (ret <= 0) {
 #ifndef CONFIG_HAVE_IOREMAP_PROT
 			break;
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 682b73a..838ffa7 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -881,7 +881,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
 	 * the execve().
 	 */
 	if (get_user_pages_remote(current, bprm->mm, pos, 1,
-				FOLL_FORCE, &page, NULL) <= 0)
+				FOLL_FORCE, &page, NULL, NULL) <= 0)
 		return false;
 #else
 	page = bprm->page[pos / PAGE_SIZE];
--
2.10.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH 0/2] mm: unexport __get_user_pages_unlocked()
From: Lorenzo Stoakes @ 2016-10-27  9:51 UTC (permalink / raw)
  To: linux-mm
  Cc: Michal Hocko, Linus Torvalds, Jan Kara, Hugh Dickins, Dave Hansen,
	Rik van Riel, Mel Gorman, Andrew Morton, Paolo Bonzini,
	Radim Krčmář, kvm, linux-kernel,
	linux-security-module, linux-rdma, dri-devel, linux-fsdevel

This patch series continues the cleanup of get_user_pages*() functions taking
advantage of the fact we can now pass gup_flags as we please.

It firstly adds an additional 'locked' parameter to get_user_pages_remote() to
allow for its callers to utilise VM_FAULT_RETRY functionality. This is necessary
as the invocation of __get_user_pages_unlocked() in process_vm_rw_single_vec()
makes use of this and no other existing higher level function would allow it to
do so.

Secondly existing callers of __get_user_pages_unlocked() are replaced with the
appropriate higher-level replacement - get_user_pages_unlocked() if the current
task and memory descriptor are referenced, or get_user_pages_remote() if other
task/memory descriptors are referenced (having acquiring mmap_sem.)

Lorenzo Stoakes (2):
  mm: add locked parameter to get_user_pages_remote()
  mm: unexport __get_user_pages_unlocked()

 drivers/gpu/drm/etnaviv/etnaviv_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  2 +-
 drivers/infiniband/core/umem_odp.c      |  2 +-
 fs/exec.c                               |  2 +-
 include/linux/mm.h                      |  5 +----
 kernel/events/uprobes.c                 |  4 ++--
 mm/gup.c                                | 20 ++++++++++++--------
 mm/memory.c                             |  2 +-
 mm/nommu.c                              |  7 +++----
 mm/process_vm_access.c                  | 12 ++++++++----
 security/tomoyo/domain.c                |  2 +-
 virt/kvm/async_pf.c                     | 10 +++++++---
 virt/kvm/kvm_main.c                     |  5 ++---
 13 files changed, 41 insertions(+), 34 deletions(-)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: A question regarding "multiple SGL"
From: Sagi Grimberg @ 2016-10-27  9:02 UTC (permalink / raw)
  To: Christoph Hellwig, Qiuxin (robert)
  Cc: Bart Van Assche, Jens Axboe,
	linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	James Bottomley, Martin K. Petersen, Mike Snitzer,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Ming Lei,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org,
	Keith Busch, Doug Ledford,
	linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Laurence Oberman, Tiger zhao
In-Reply-To: <20161027071009.GA6434-jcswGhMUV9g@public.gmane.org>


> Hi Robert,

Hey Robert, Christoph,

> please explain your use cases that isn't handled.  The one and only
> reason to set MSDBD to 1 is to make the code a lot simpler given that
> there is no real use case for supporting more.
>
> RDMA uses memory registrations to register large and possibly
> discontiguous data regions for a single rkey, aka single SGL descriptor
> in NVMe terms.  There would be two reasons to support multiple SGL
> descriptors:  a) to support a larger I/O size than supported by a single
> MR, or b) to support a data region format not mappable by a single
> MR.
>
> iSER only supports a single rkey (or stag in IETF terminology) and has
> been doing fine on a) and mostly fine on b).   There are a few possible
> data layouts not supported by the traditional IB/iWarp FR WRs, but the
> limit is in fact exactly the same as imposed by the NVMe PRPs used for
> PCIe NVMe devices, so the Linux block layer has support to not generate
> them.  Also with modern Mellanox IB/RoCE hardware we can actually
> register completely arbitrary SGLs.  iSER supports using this registration
> mode already with a trivial code addition, but for NVMe we didn't have a
> pressing need yet.

Good explanation :)

The IO transfer size is a bit more pressing on some devices (e.g.
cxgb3/4) where the number of pages per-MR can be indeed lower than
a reasonable transfer size (Steve can correct me if I'm wrong).

However, if there is a real demand for this we'll happily accept
patches :)

Just a note, having this feature in-place can bring unexpected behavior
depending on how we implement it:
- If we can use multiple MRs per IO (for multiple SGLs) we can either
prepare for the worst-case and allocate enough MRs to satisfy the
various IO patterns. This will be much heavier in terms of resource
allocation and can limit the scalability of the host driver.
- Or we can implement a shared MR pool with a reasonable number of MRs.
In this case each IO can consume one or more MRs on the expense of
other IOs. In this case we may need to requeue the IO later when we
have enough available MRs to satisfy the IO. This can yield some
unexpected performance gaps for some workloads.

Cheers,
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: Trouble enabling iSER for ConnectX-4 Lx
From: Sagi Grimberg @ 2016-10-27  8:48 UTC (permalink / raw)
  To: Robert LeBlanc, linux-rdma
In-Reply-To: <CAANLjFr_we+33Nen-NYp1xQPzQ-wbR=GL4LBkEZb9azMUN-_=Q@mail.gmail.com>

Hi Robert,

AFAIK, MLNX_OFED does includes isert only for specific distros.

This is probably a compat issue between stock isert and MLNX
provided RDMA stack.

Any specific reason not to use upstream (or stock 4.4.27) kernel?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 06/12] blk-mq: Add a kick_requeue_list argument to blk_mq_requeue_request()
From: Johannes Thumshirn @ 2016-10-27  8:28 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lei, Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <3944826d-bfde-f1e6-40ec-2c9a3c259e3a@sandisk.com>

On Wed, Oct 26, 2016 at 03:53:39PM -0700, Bart Van Assche wrote:
> Most blk_mq_requeue_request() and blk_mq_add_to_requeue_list() calls
> are followed by kicking the requeue list. Hence add an argument to
> these two functions that allows to kick the requeue list. This was
> proposed by Christoph Hellwig.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Christoph Hellwig <hch@lst.de>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply

* Re: [PATCH 07/12] dm: Use BLK_MQ_S_STOPPED instead of QUEUE_FLAG_STOPPED in blk-mq code
From: Johannes Thumshirn @ 2016-10-27  8:28 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lei, Laurence Oberman,
	linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org
In-Reply-To: <a0733adf-065e-2099-3850-cb1c55df1e35-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>

On Wed, Oct 26, 2016 at 03:54:07PM -0700, Bart Van Assche wrote:
> Instead of manipulating both QUEUE_FLAG_STOPPED and BLK_MQ_S_STOPPED
> in the dm start and stop queue functions, only manipulate the latter
> flag. Change blk_queue_stopped() tests into blk_mq_queue_stopped().
> 
> Signed-off-by: Bart Van Assche <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
> Reviewed-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn-l3A5Bk7waGM@public.gmane.org>

-- 
Johannes Thumshirn                                          Storage
jthumshirn-l3A5Bk7waGM@public.gmane.org                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 09/12] SRP transport: Move queuecommand() wait code to SCSI core
From: Johannes Thumshirn @ 2016-10-27  8:27 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lei, Laurence Oberman,
	linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org
In-Reply-To: <e86cdaf9-6305-d2cb-6068-0a050c023d73-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>

On Wed, Oct 26, 2016 at 03:55:00PM -0700, Bart Van Assche wrote:
> Additionally, rename srp_wait_for_queuecommand() into
> scsi_wait_for_queuecommand() and add a comment about the
> queuecommand() call from scsi_send_eh_cmnd().
> 
> Signed-off-by: Bart Van Assche <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
> Reviewed-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn-l3A5Bk7waGM@public.gmane.org>

-- 
Johannes Thumshirn                                          Storage
jthumshirn-l3A5Bk7waGM@public.gmane.org                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 10/12] SRP transport, scsi-mq: Wait for .queue_rq() if necessary
From: Johannes Thumshirn @ 2016-10-27  8:27 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lei, Laurence Oberman,
	linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org
In-Reply-To: <0cd77719-1f11-d5c3-3186-1c7c3cfd6886-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>

On Wed, Oct 26, 2016 at 03:55:34PM -0700, Bart Van Assche wrote:
> Ensure that if scsi-mq is enabled that scsi_wait_for_queuecommand()
> waits until ongoing shost->hostt->queuecommand() calls have finished.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
> Reviewed-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn-l3A5Bk7waGM@public.gmane.org>

-- 
Johannes Thumshirn                                          Storage
jthumshirn-l3A5Bk7waGM@public.gmane.org                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 04/12] blk-mq: Move more code into blk_mq_direct_issue_request()
From: Johannes Thumshirn @ 2016-10-27  8:18 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lei, Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <32b0bd88-cb8e-754a-89fc-b1825778b05a@sandisk.com>

On Wed, Oct 26, 2016 at 03:52:35PM -0700, Bart Van Assche wrote:
> Move the "hctx stopped" test and the insert request calls into
> blk_mq_direct_issue_request(). Rename that function into
> blk_mq_try_issue_directly() to reflect its new semantics. Pass
> the hctx pointer to that function instead of looking it up a
> second time. These changes avoid that code has to be duplicated
> in the next patch.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Christoph Hellwig <hch@lst.de>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply

* Re: [PATCH 04/12] blk-mq: Move more code into blk_mq_direct_issue_request()
From: Johannes Thumshirn @ 2016-10-27  8:17 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lei, Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <32b0bd88-cb8e-754a-89fc-b1825778b05a@sandisk.com>

On Wed, Oct 26, 2016 at 03:52:35PM -0700, Bart Van Assche wrote:
> Move the "hctx stopped" test and the insert request calls into
> blk_mq_direct_issue_request(). Rename that function into
> blk_mq_try_issue_directly() to reflect its new semantics. Pass
> the hctx pointer to that function instead of looking it up a
> second time. These changes avoid that code has to be duplicated
> in the next patch.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Christoph Hellwig <hch@lst.de>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply

* Re: [PATCH 03/12] blk-mq: Introduce blk_mq_queue_stopped()
From: Johannes Thumshirn @ 2016-10-27  8:16 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lei, Laurence Oberman,
	linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org
In-Reply-To: <f68b2997-8b0d-7aea-2859-5fbda4f6bf71-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>

On Wed, Oct 26, 2016 at 03:52:05PM -0700, Bart Van Assche wrote:
> The function blk_queue_stopped() allows to test whether or not a
> traditional request queue has been stopped. Introduce a helper
> function that allows block drivers to query easily whether or not
> one or more hardware contexts of a blk-mq queue have been stopped.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
> Reviewed-by: Hannes Reinecke <hare-IBi9RG/b67k@public.gmane.org>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn-l3A5Bk7waGM@public.gmane.org>

-- 
Johannes Thumshirn                                          Storage
jthumshirn-l3A5Bk7waGM@public.gmane.org                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 02/12] blk-mq: Introduce blk_mq_hctx_stopped()
From: Johannes Thumshirn @ 2016-10-27  8:15 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lei, Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <0de50789-e3b7-0a07-73c1-4fb87b1f957e@sandisk.com>

On Wed, Oct 26, 2016 at 03:51:33PM -0700, Bart Van Assche wrote:
> Multiple functions test the BLK_MQ_S_STOPPED bit so introduce
> a helper function that performs this test.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Christoph Hellwig <hch@lst.de>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply

* Re: [PATCH 01/12] blk-mq: Do not invoke .queue_rq() for a stopped queue
From: Johannes Thumshirn @ 2016-10-27  8:14 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Keith Busch,
	Ming Lin, Laurence Oberman,
	linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org
In-Reply-To: <1debcf7f-c950-308b-d297-3e48a77e08d7-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>

On Wed, Oct 26, 2016 at 03:50:44PM -0700, Bart Van Assche wrote:
> The meaning of the BLK_MQ_S_STOPPED flag is "do not call
> .queue_rq()". Hence modify blk_mq_make_request() such that requests
> are queued instead of issued if a queue has been stopped.
> 
> Reported-by: Ming Lei <tom.leiming-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> Signed-off-by: Bart Van Assche <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
> Reviewed-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn-l3A5Bk7waGM@public.gmane.org>

-- 
Johannes Thumshirn                                          Storage
jthumshirn-l3A5Bk7waGM@public.gmane.org                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 perftest] Support for Chelsio T6 devices
From: Leon Romanovsky @ 2016-10-27  7:54 UTC (permalink / raw)
  To: Steve Wise
  Cc: 'Gil Rockah', 'Zohar Ben Aharon',
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <00c501d22ecc$aca8dec0$05fa9c40$@opengridcomputing.com>

[-- Attachment #1: Type: text/plain, Size: 1575 bytes --]

On Tue, Oct 25, 2016 at 09:32:55AM -0500, Steve Wise wrote:
> Hey guys,
>
> Has this patch been integrated yet?  Also, where is the official upstream
> perftest git repo now?

Hi Steve,

Sorry for the late response, due to the holidays our responses are
delaying a little bit.

We moved perftest repo to be under github's linux-rdma organization [1]
and it is now [2].

I'll remind to Zohar to take it.

[1] https://github.com/linux-rdma/
[2] https://github.com/linux-rdma/perftest

>
> Thanks,
>
> Steve.
>
>
> > -----Original Message-----
> > From: Steve Wise [mailto:swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org]
> > Sent: Tuesday, October 11, 2016 4:34 PM
> > To: 'Gil Rockah'; 'Zohar Ben Aharon'
> > Subject: RE: [PATCH v2 perftest] Support for Chelsio T6 devices
> >
> >
> > > -----Original Message-----
> > > From: Gil Rockah [mailto:gilr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org]
> > > Sent: Monday, September 26, 2016 2:01 AM
> > > To: Zohar Ben Aharon; swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org
> > > Subject: FW: [PATCH v2 perftest] Support for Chelsio T6 devices
> > >
> > > Hi Steve,
> > > Thanks for the patch.
> > > Please notice that Zohar is the new owner of perftest.
> > >
> > > Thanks,
> > > Gil
> > >
> >
> > Hey Zohar,
> >
> > Where is the formal perftest git repo now?
> >
> > Thanks,
> >
> > Steve.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* RE: [bug report] qedr: Add GSI support
From: Amrani, Ram @ 2016-10-27  7:46 UTC (permalink / raw)
  To: Dan Carpenter; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <20161026132501.GA3628-mgFCXtclrQlZLf2FXnZxJA@public.gmane.org>

> The patch 048867793046: "qedr: Add GSI support" from Oct 10, 2016, leads to
> the following Smatch complaint:
> 
> drivers/infiniband/hw/qedr/qedr_cm.c:284 qedr_gsi_build_header()
> 	 warn: variable dereferenced before check 'sgid_attr.ndev' (see line 281)
> 
> drivers/infiniband/hw/qedr/qedr_cm.c
>    280
>    281		vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
>                                                 ^^^^^^^^^^^^^^ Dereference inside function.
> 
>    282		if (vlan_id < VLAN_CFI_MASK)
>    283			has_vlan = true;
>    284		if (sgid_attr.ndev)
>                     ^^^^^^^^^^^^^^
> Check too late.
> 
>    285			dev_put(sgid_attr.ndev);
>    286

Thanks Dan. I'll fix this.
Ram
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2] Avoid possible hang on device removal
From: Leon Romanovsky @ 2016-10-27  7:29 UTC (permalink / raw)
  To: Mustafa Ismail
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb,
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW
In-Reply-To: <1477438557-15884-1-git-send-email-mustafa.ismail-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 644 bytes --]

On Tue, Oct 25, 2016 at 06:35:57PM -0500, Mustafa Ismail wrote:
> When we get an RDMA_CM_EVENT_DEVICE_REMOVAL the cm_thread will
> exit and because flush errors are ignored the cb->sem may not get signaled.
> So just signal on device removal event.
>
> v1 -> v2: Add Fixes tag
>
> Fixes: 612eae1f6fe3 ("rping: ignore flushed completions")
> Signed-off-by: Mustafa Ismail <mustafa.ismail-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
>  librdmacm/examples/rping.c | 2 ++
>  1 file changed, 2 insertions(+)

Mustafa,

For future submissions, please use rdma-core notation in [PATCH ..]
section, so we will be able to handle them faster.

Thanks

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: Trouble enabling iSER for ConnectX-4 Lx
From: Alaa Hleihel @ 2016-10-27  7:29 UTC (permalink / raw)
  To: Robert LeBlanc, linux-rdma
In-Reply-To: <CAANLjFr_we+33Nen-NYp1xQPzQ-wbR=GL4LBkEZb9azMUN-_=Q@mail.gmail.com>

Hi Robert,

You've installed mlnx-en package, which does not provide iSER modules.
Instead, you should get MLNX_OFED from:
http://www.mellanox.com/page/products_dyn?product_family=26&mtag=linux_sw_drivers

Note that ib_isert in MLNX_OFED is not backported to all kernels, and it's enabled by default only for a specific kernels.
To force building the module against your kernel, use the following command for MLNX_OFED installation:
# MLNX_EXTRA_FLAGS=--with-isert ./mlnxofedinstall --force --add-kernel-support  --with-isert --skip-repo

As for configuring RoCE, please refer to the MLNX_OFED User Manual:
http://www.mellanox.com/related-docs/prod_software/Mellanox_OFED_Linux_User_Manual_v3.40.pdf

Regards,
Alaa


On 10/27/2016 02:13, Robert LeBlanc wrote:
> We have some ConnectX-4 Lx cards that I'm trying to test RoCE and iSER
> on. I downloaded and installed the Mellanox drivers with VMA [0]. I
> was able to run the ib_read_bw tests over the adapters after
> installing the infiniband-diags and perftest RPMs. When I went to
> configure LIO for iSER, I'm getting the message "Cannot change iser"
> on step 6 in the procedure here [1] which I've done many times with
> Infiniband without issues. I navigated to
> /sys/kernel/config/target/iscsi/{iqn}/tpgt_1/np/{portal_ip:port} and
> sure enough, I can't write '1' into iser. The kernel is not giving any
> messages and the ib_isert module is loaded. This is on 4.4.27,
> Mellanox driver 3.4-1.0.0.3 built with `./install --add-kernel-support
> --skip-repo --tmpdir /root/junk --vma`
>
> # mstflint -d 4:00.0 q
> Image type:          FS3
> FW Version:          14.16.1020
> FW Release Date:     20.6.2016
> Rom Info:            type=UEFI version=14.10.16
>                     type=PXE version=3.4.812 devid=4117
> Description:         UID                GuidsNumber
> Base GUID:           0cc47a000089f706        4
> Base MAC:            00000cc47a89f706        4
> Image VSD:
> Device VSD:
> PSID:                SM_2001000001034
>
> # ibstatus
> Infiniband device 'mlx5_0' port 1 status:
>        default gid:     fe80:0000:0000:0000:0ec4:7aff:fe89:f706
>        base lid:        0x0
>        sm lid:          0x0
>        state:           4: ACTIVE
>        phys state:      5: LinkUp
>        rate:            25 Gb/sec (1X EDR)
>        link_layer:      Ethernet
>
> Infiniband device 'mlx5_1' port 1 status:
>        default gid:     fe80:0000:0000:0000:0ec4:7aff:fe89:f707
>        base lid:        0x0
>        sm lid:          0x0
>        state:           4: ACTIVE
>        phys state:      5: LinkUp
>        rate:            25 Gb/sec (1X EDR)
>        link_layer:      Ethernet
>
> Any ideas of what I'm doing wrong here? I don't have any experience
> with RoCE, so I'm sure I'm doing something wrong. And the manual has
> nothing about configuring RoCE other than enabling --vma when
> installing the drivers [2].
>
> Thanks,
> Robert LeBlanc
>
> [0] http://www.mellanox.com/page/products_dyn?product_family=27
> [1] https://community.mellanox.com/docs/DOC-1472
> [2] http://www.mellanox.com/related-docs/prod_software/Mellanox_EN_for_Linux_User_Manual_v3_40.pdf
> ----------------
> Robert LeBlanc
> PGP Fingerprint 79A2 9CA4 6CC4 45DD A904  C70E E654 3BB2 FA62 B9F1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 07/12] dm: Use BLK_MQ_S_STOPPED instead of QUEUE_FLAG_STOPPED in blk-mq code
From: Hannes Reinecke @ 2016-10-27  7:27 UTC (permalink / raw)
  To: Bart Van Assche, Jens Axboe
  Cc: Christoph Hellwig, James Bottomley, Martin K. Petersen,
	Mike Snitzer, Doug Ledford, Keith Busch, Ming Lei,
	Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <a0733adf-065e-2099-3850-cb1c55df1e35@sandisk.com>

On 10/27/2016 12:54 AM, Bart Van Assche wrote:
> Instead of manipulating both QUEUE_FLAG_STOPPED and BLK_MQ_S_STOPPED
> in the dm start and stop queue functions, only manipulate the latter
> flag. Change blk_queue_stopped() tests into blk_mq_queue_stopped().
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Cc: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/md/dm-rq.c | 18 ++----------------
>  1 file changed, 2 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index fbd37b4..d47a504 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -75,12 +75,6 @@ static void dm_old_start_queue(struct request_queue *q)
>  
>  static void dm_mq_start_queue(struct request_queue *q)
>  {
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(q->queue_lock, flags);
> -	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
> -	spin_unlock_irqrestore(q->queue_lock, flags);
> -
>  	blk_mq_start_stopped_hw_queues(q, true);
>  	blk_mq_kick_requeue_list(q);
>  }
> @@ -105,16 +99,8 @@ static void dm_old_stop_queue(struct request_queue *q)
>  
>  static void dm_mq_stop_queue(struct request_queue *q)
>  {
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(q->queue_lock, flags);
> -	if (blk_queue_stopped(q)) {
> -		spin_unlock_irqrestore(q->queue_lock, flags);
> +	if (blk_mq_queue_stopped(q))
>  		return;
> -	}
> -
> -	queue_flag_set(QUEUE_FLAG_STOPPED, q);
> -	spin_unlock_irqrestore(q->queue_lock, flags);
>  
>  	/* Avoid that requeuing could restart the queue. */
>  	blk_mq_cancel_requeue_work(q);
> @@ -341,7 +327,7 @@ static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long mse
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(q->queue_lock, flags);
> -	if (!blk_queue_stopped(q))
> +	if (!blk_mq_queue_stopped(q))
>  		blk_mq_delay_kick_requeue_list(q, msecs);
>  	spin_unlock_irqrestore(q->queue_lock, flags);
>  }
> 
Ah. Right. That answers my previous question.

Reviewed-by: Hannes Reinecke <hare@suse.com>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply

* Re: [PATCH 06/12] blk-mq: Add a kick_requeue_list argument to blk_mq_requeue_request()
From: Hannes Reinecke @ 2016-10-27  7:26 UTC (permalink / raw)
  To: Bart Van Assche, Jens Axboe
  Cc: Christoph Hellwig, James Bottomley, Martin K. Petersen,
	Mike Snitzer, Doug Ledford, Keith Busch, Ming Lei,
	Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <3944826d-bfde-f1e6-40ec-2c9a3c259e3a@sandisk.com>

On 10/27/2016 12:53 AM, Bart Van Assche wrote:
> Most blk_mq_requeue_request() and blk_mq_add_to_requeue_list() calls
> are followed by kicking the requeue list. Hence add an argument to
> these two functions that allows to kick the requeue list. This was
> proposed by Christoph Hellwig.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: Sagi Grimberg <sagi@grimberg.me>
> Cc: Johannes Thumshirn <jthumshirn@suse.de>
> ---
>  block/blk-flush.c            |  5 +----
>  block/blk-mq.c               | 10 +++++++---
>  drivers/block/xen-blkfront.c |  2 +-
>  drivers/md/dm-rq.c           |  2 +-
>  drivers/nvme/host/core.c     |  2 +-
>  drivers/scsi/scsi_lib.c      |  4 +---
>  include/linux/blk-mq.h       |  5 +++--
>  7 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index 6a14b68..a834aed 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -134,10 +134,7 @@ static void blk_flush_restore_request(struct request *rq)
>  static bool blk_flush_queue_rq(struct request *rq, bool add_front)
>  {
>  	if (rq->q->mq_ops) {
> -		struct request_queue *q = rq->q;
> -
> -		blk_mq_add_to_requeue_list(rq, add_front);
> -		blk_mq_kick_requeue_list(q);
> +		blk_mq_add_to_requeue_list(rq, add_front, true);
>  		return false;
>  	} else {
>  		if (add_front)
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 4945437..688bcc3 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -492,12 +492,12 @@ static void __blk_mq_requeue_request(struct request *rq)
>  	}
>  }
>  
> -void blk_mq_requeue_request(struct request *rq)
> +void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
>  {
>  	__blk_mq_requeue_request(rq);
>  
>  	BUG_ON(blk_queued_rq(rq));
> -	blk_mq_add_to_requeue_list(rq, true);
> +	blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
>  }
>  EXPORT_SYMBOL(blk_mq_requeue_request);
>  
> @@ -535,7 +535,8 @@ static void blk_mq_requeue_work(struct work_struct *work)
>  	blk_mq_start_hw_queues(q);
>  }
>  
> -void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
> +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
> +				bool kick_requeue_list)
>  {
>  	struct request_queue *q = rq->q;
>  	unsigned long flags;
> @@ -554,6 +555,9 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
>  		list_add_tail(&rq->queuelist, &q->requeue_list);
>  	}
>  	spin_unlock_irqrestore(&q->requeue_lock, flags);
> +
> +	if (kick_requeue_list)
> +		blk_mq_kick_requeue_list(q);
>  }
>  EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
>  
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 9908597..1ca702d 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -2043,7 +2043,7 @@ static int blkif_recover(struct blkfront_info *info)
>  		/* Requeue pending requests (flush or discard) */
>  		list_del_init(&req->queuelist);
>  		BUG_ON(req->nr_phys_segments > segs);
> -		blk_mq_requeue_request(req);
> +		blk_mq_requeue_request(req, false);
>  	}
>  	blk_mq_kick_requeue_list(info->rq);
>  
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index dc75bea..fbd37b4 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -354,7 +354,7 @@ EXPORT_SYMBOL(dm_mq_kick_requeue_list);
>  
>  static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
>  {
> -	blk_mq_requeue_request(rq);
> +	blk_mq_requeue_request(rq, false);
>  	__dm_mq_kick_requeue_list(rq->q, msecs);
>  }
>  
Hmm. __dm_mq_kick_requeue_list() does essentially the same.
Have you checked if you can use 'true' here and drop the call to it?
However, it does take the queue_lock when doing so.
Is that required? None of the other drivers do it ...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply

* Re: [PATCH v4 00/10] infiniband: Remove semaphores
From: Leon Romanovsky @ 2016-10-27  7:13 UTC (permalink / raw)
  To: Binoy Jayan
  Cc: Doug Ledford, Sean Hefty, Hal Rosenstock, Arnd Bergmann,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1477551554-30349-1-git-send-email-binoy.jayan-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 449 bytes --]

On Thu, Oct 27, 2016 at 12:29:04PM +0530, Binoy Jayan wrote:
> Hi,
>
> These are a set of patches [v4] which removes semaphores from infiniband.
> These are part of a bigger effort to eliminate all semaphores from the
> linux kernel.
>
> v3 -> v4:
>
> IB/mlx5: Added patch - Replace semaphore umr_common:sem with wait_event
> IB/mlx5: Fixed a bug pointed out by Leon Romanovsky

Please keep full changelog for your next submissions/respins.

Thanks

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: A question regarding "multiple SGL"
From: Christoph Hellwig @ 2016-10-27  7:10 UTC (permalink / raw)
  To: Qiuxin (robert)
  Cc: Bart Van Assche, Jens Axboe, linux-block@vger.kernel.org,
	James Bottomley, Martin K. Petersen, Mike Snitzer,
	linux-rdma@vger.kernel.org, Ming Lei,
	linux-nvme@lists.infradead.org, Keith Busch, Doug Ledford,
	linux-scsi@vger.kernel.org, Laurence Oberman, Tiger zhao
In-Reply-To: <B78D1A958924E54594997EFF975251625BD49445@szxeml592-mbx.china.huawei.com>

Hi Robert,

please explain your use cases that isn't handled.  The one and only
reason to set MSDBD to 1 is to make the code a lot simpler given that
there is no real use case for supporting more.

RDMA uses memory registrations to register large and possibly
discontiguous data regions for a single rkey, aka single SGL descriptor
in NVMe terms.  There would be two reasons to support multiple SGL
descriptors:  a) to support a larger I/O size than supported by a single
MR, or b) to support a data region format not mappable by a single
MR.

iSER only supports a single rkey (or stag in IETF terminology) and has
been doing fine on a) and mostly fine on b).   There are a few possible
data layouts not supported by the traditional IB/iWarp FR WRs, but the
limit is in fact exactly the same as imposed by the NVMe PRPs used for
PCIe NVMe devices, so the Linux block layer has support to not generate
them.  Also with modern Mellanox IB/RoCE hardware we can actually
register completely arbitrary SGLs.  iSER supports using this registration
mode already with a trivial code addition, but for NVMe we didn't have a
pressing need yet.

^ permalink raw reply

* [PATCH v4 10/10] IB/mlx5: Simplify completion into a wait_event
From: Binoy Jayan @ 2016-10-27  6:59 UTC (permalink / raw)
  To: Doug Ledford, Sean Hefty, Hal Rosenstock
  Cc: Arnd Bergmann, linux-rdma, linux-kernel, Binoy Jayan
In-Reply-To: <1477551554-30349-1-git-send-email-binoy.jayan@linaro.org>

Convert the completion 'mlx5_ib_umr_context:done' to a wait_event as it
just waits for the return value to be filled.

Signed-off-by: Binoy Jayan <binoy.jayan@linaro.org>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +-
 drivers/infiniband/hw/mlx5/mr.c      | 9 +++++----
 include/rdma/ib_verbs.h              | 1 +
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index de31b5f..cf496b5 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -524,7 +524,7 @@ struct mlx5_ib_mw {
 struct mlx5_ib_umr_context {
 	struct ib_cqe		cqe;
 	enum ib_wc_status	status;
-	struct completion	done;
+	wait_queue_head_t	wq;
 };
 
 struct umr_common {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index dfaf6f6..49ff2af 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -846,14 +846,14 @@ static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
 		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
 
 	context->status = wc->status;
-	complete(&context->done);
+	wake_up(&context->wq);
 }
 
 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
 {
 	context->cqe.done = mlx5_ib_umr_done;
-	context->status = -1;
-	init_completion(&context->done);
+	context->status = IB_WC_STATUS_NONE;
+	init_waitqueue_head(&context->wq);
 }
 
 static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
@@ -873,7 +873,8 @@ static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 	if (err) {
 		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
 	} else {
-		wait_for_completion(&umr_context.done);
+		wait_event(umr_context.wq,
+			   umr_context.status != IB_WC_STATUS_NONE);
 		if (umr_context.status != IB_WC_SUCCESS) {
 			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
 				     umr_context.status);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5ad43a4..8b15b6f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -823,6 +823,7 @@ struct ib_ah_attr {
 };
 
 enum ib_wc_status {
+	IB_WC_STATUS_NONE = -1,
 	IB_WC_SUCCESS,
 	IB_WC_LOC_LEN_ERR,
 	IB_WC_LOC_QP_OP_ERR,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply related

* [PATCH v4 09/10] IB/mlx5: Replace semaphore umr_common:sem with wait_event
From: Binoy Jayan @ 2016-10-27  6:59 UTC (permalink / raw)
  To: Doug Ledford, Sean Hefty, Hal Rosenstock
  Cc: Arnd Bergmann, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Binoy Jayan
In-Reply-To: <1477551554-30349-1-git-send-email-binoy.jayan-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>

Remove semaphore umr_common:sem used to limit concurrent access to umr qp
and introduce an atomic value 'users' to keep track of the same. Use a
wait_event to block when the limit is reached.

Signed-off-by: Binoy Jayan <binoy.jayan-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
---
 drivers/infiniband/hw/mlx5/main.c    | 6 +-----
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 ++++++-
 drivers/infiniband/hw/mlx5/mr.c      | 6 ++++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2217477..eb72bff 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2437,10 +2437,6 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev)
 	ib_dealloc_pd(dev->umrc.pd);
 }
 
-enum {
-	MAX_UMR_WR = 128,
-};
-
 static int create_umr_res(struct mlx5_ib_dev *dev)
 {
 	struct ib_qp_init_attr *init_attr = NULL;
@@ -2520,7 +2516,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
 	dev->umrc.cq = cq;
 	dev->umrc.pd = pd;
 
-	sema_init(&dev->umrc.sem, MAX_UMR_WR);
+	init_waitqueue_head(&dev->umrc.wq);
 	ret = mlx5_mr_cache_init(dev);
 	if (ret) {
 		mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index dcdcd19..de31b5f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -533,7 +533,12 @@ struct umr_common {
 	struct ib_qp	*qp;
 	/* control access to UMR QP
 	 */
-	struct semaphore	sem;
+	wait_queue_head_t	wq;
+	atomic_t		users;
+};
+
+enum {
+	MAX_UMR_WR = 128,
 };
 
 enum {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1593856..dfaf6f6 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -867,7 +867,8 @@ static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 	mlx5_ib_init_umr_context(&umr_context);
 	umrwr->wr.wr_cqe = &umr_context.cqe;
 
-	down(&umrc->sem);
+	/* limit number of concurrent ib_post_send() on qp */
+	wait_event(umrc->wq, atomic_add_unless(&umrc->users, 1, MAX_UMR_WR));
 	err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
 	if (err) {
 		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
@@ -879,7 +880,8 @@ static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 			err = -EFAULT;
 		}
 	}
-	up(&umrc->sem);
+	atomic_dec(&umrc->users);
+	wake_up(&umrc->wq);
 	return err;
 }
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v4 08/10] IB/mlx5: Add helper mlx5_ib_post_send_wait
From: Binoy Jayan @ 2016-10-27  6:59 UTC (permalink / raw)
  To: Doug Ledford, Sean Hefty, Hal Rosenstock
  Cc: Arnd Bergmann, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Binoy Jayan
In-Reply-To: <1477551554-30349-1-git-send-email-binoy.jayan-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>

Clean up the following common code (to post a list of work requests to the
send queue of the specified QP) at various places and add a helper function
'mlx5_ib_post_send_wait' to implement the same.

 - Initialize 'mlx5_ib_umr_context' on stack
 - Assign "mlx5_umr_wr:wr:wr_cqe to umr_context.cqe
 - Acquire the semaphore
 - call ib_post_send with a single ib_send_wr
 - wait_for_completion()
 - Check for umr_context.status
 - Release the semaphore

As semaphores are going away in the future, moving all of these into the
shared helper leaves only a single function using the semaphore, which
can then be rewritten to use something else.

Signed-off-by: Binoy Jayan <binoy.jayan-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
---
 drivers/infiniband/hw/mlx5/mr.c | 115 +++++++++++-----------------------------
 1 file changed, 32 insertions(+), 83 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d4ad672..1593856 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -856,16 +856,40 @@ static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
 	init_completion(&context->done);
 }
 
+static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
+					 struct mlx5_umr_wr *umrwr)
+{
+	struct umr_common *umrc = &dev->umrc;
+	struct ib_send_wr *bad;
+	int err;
+	struct mlx5_ib_umr_context umr_context;
+
+	mlx5_ib_init_umr_context(&umr_context);
+	umrwr->wr.wr_cqe = &umr_context.cqe;
+
+	down(&umrc->sem);
+	err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
+	if (err) {
+		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
+	} else {
+		wait_for_completion(&umr_context.done);
+		if (umr_context.status != IB_WC_SUCCESS) {
+			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+				     umr_context.status);
+			err = -EFAULT;
+		}
+	}
+	up(&umrc->sem);
+	return err;
+}
+
 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 				  u64 virt_addr, u64 len, int npages,
 				  int page_shift, int order, int access_flags)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct device *ddev = dev->ib_dev.dma_device;
-	struct umr_common *umrc = &dev->umrc;
-	struct mlx5_ib_umr_context umr_context;
 	struct mlx5_umr_wr umrwr = {};
-	struct ib_send_wr *bad;
 	struct mlx5_ib_mr *mr;
 	struct ib_sge sg;
 	int size;
@@ -894,24 +918,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	if (err)
 		goto free_mr;
 
-	mlx5_ib_init_umr_context(&umr_context);
-
-	umrwr.wr.wr_cqe = &umr_context.cqe;
 	prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
 			 page_shift, virt_addr, len, access_flags);
 
-	down(&umrc->sem);
-	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
-	if (err) {
-		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
+	err = mlx5_ib_post_send_wait(dev, &umrwr);
+	if (err && err != -EFAULT)
 		goto unmap_dma;
-	} else {
-		wait_for_completion(&umr_context.done);
-		if (umr_context.status != IB_WC_SUCCESS) {
-			mlx5_ib_warn(dev, "reg umr failed\n");
-			err = -EFAULT;
-		}
-	}
 
 	mr->mmkey.iova = virt_addr;
 	mr->mmkey.size = len;
@@ -920,7 +932,6 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	mr->live = 1;
 
 unmap_dma:
-	up(&umrc->sem);
 	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 
 	kfree(mr_pas);
@@ -940,13 +951,10 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 {
 	struct mlx5_ib_dev *dev = mr->dev;
 	struct device *ddev = dev->ib_dev.dma_device;
-	struct umr_common *umrc = &dev->umrc;
-	struct mlx5_ib_umr_context umr_context;
 	struct ib_umem *umem = mr->umem;
 	int size;
 	__be64 *pas;
 	dma_addr_t dma;
-	struct ib_send_wr *bad;
 	struct mlx5_umr_wr wr;
 	struct ib_sge sg;
 	int err = 0;
@@ -1011,10 +1019,7 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 
 		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
 
-		mlx5_ib_init_umr_context(&umr_context);
-
 		memset(&wr, 0, sizeof(wr));
-		wr.wr.wr_cqe = &umr_context.cqe;
 
 		sg.addr = dma;
 		sg.length = ALIGN(npages * sizeof(u64),
@@ -1031,19 +1036,7 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 		wr.mkey = mr->mmkey.key;
 		wr.target.offset = start_page_index;
 
-		down(&umrc->sem);
-		err = ib_post_send(umrc->qp, &wr.wr, &bad);
-		if (err) {
-			mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
-		} else {
-			wait_for_completion(&umr_context.done);
-			if (umr_context.status != IB_WC_SUCCESS) {
-				mlx5_ib_err(dev, "UMR completion failed, code %d\n",
-					    umr_context.status);
-				err = -EFAULT;
-			}
-		}
-		up(&umrc->sem);
+		err = mlx5_ib_post_send_wait(dev, &wr);
 	}
 	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 
@@ -1210,39 +1203,14 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
-	struct umr_common *umrc = &dev->umrc;
-	struct mlx5_ib_umr_context umr_context;
 	struct mlx5_umr_wr umrwr = {};
-	struct ib_send_wr *bad;
-	int err;
 
 	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		return 0;
 
-	mlx5_ib_init_umr_context(&umr_context);
-
-	umrwr.wr.wr_cqe = &umr_context.cqe;
 	prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
 
-	down(&umrc->sem);
-	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
-	if (err) {
-		up(&umrc->sem);
-		mlx5_ib_dbg(dev, "err %d\n", err);
-		goto error;
-	} else {
-		wait_for_completion(&umr_context.done);
-		up(&umrc->sem);
-	}
-	if (umr_context.status != IB_WC_SUCCESS) {
-		mlx5_ib_warn(dev, "unreg umr failed\n");
-		err = -EFAULT;
-		goto error;
-	}
-	return 0;
-
-error:
-	return err;
+	return mlx5_ib_post_send_wait(dev, &umrwr);
 }
 
 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
@@ -1251,19 +1219,13 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct device *ddev = dev->ib_dev.dma_device;
-	struct mlx5_ib_umr_context umr_context;
-	struct ib_send_wr *bad;
 	struct mlx5_umr_wr umrwr = {};
 	struct ib_sge sg;
-	struct umr_common *umrc = &dev->umrc;
 	dma_addr_t dma = 0;
 	__be64 *mr_pas = NULL;
 	int size;
 	int err;
 
-	mlx5_ib_init_umr_context(&umr_context);
-
-	umrwr.wr.wr_cqe = &umr_context.cqe;
 	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 
 	if (flags & IB_MR_REREG_TRANS) {
@@ -1291,21 +1253,8 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
 	}
 
 	/* post send request to UMR QP */
-	down(&umrc->sem);
-	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
+	err = mlx5_ib_post_send_wait(dev, &umrwr);
 
-	if (err) {
-		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
-	} else {
-		wait_for_completion(&umr_context.done);
-		if (umr_context.status != IB_WC_SUCCESS) {
-			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
-				     umr_context.status);
-			err = -EFAULT;
-		}
-	}
-
-	up(&umrc->sem);
 	if (flags & IB_MR_REREG_TRANS) {
 		dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 		kfree(mr_pas);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v4 07/10] IB/mthca: Replace counting semaphore event_sem with wait_event
From: Binoy Jayan @ 2016-10-27  6:59 UTC (permalink / raw)
  To: Doug Ledford, Sean Hefty, Hal Rosenstock
  Cc: Arnd Bergmann, linux-rdma, linux-kernel, Binoy Jayan
In-Reply-To: <1477551554-30349-1-git-send-email-binoy.jayan@linaro.org>

Counting semaphores are going away in the future, so replace the semaphore
mthca_cmd::event_sem with a conditional wait_event.

Signed-off-by: Binoy Jayan <binoy.jayan@linaro.org>
---
 drivers/infiniband/hw/mthca/mthca_cmd.c | 47 ++++++++++++++++++++++-----------
 drivers/infiniband/hw/mthca/mthca_dev.h |  3 ++-
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 49c6e19..d6a048a 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -405,6 +405,34 @@ void mthca_cmd_event(struct mthca_dev *dev,
 	complete(&context->done);
 }
 
+static inline struct mthca_cmd_context *
+mthca_try_get_context(struct mthca_cmd *cmd)
+{
+	struct mthca_cmd_context *context = NULL;
+
+	spin_lock(&cmd->context_lock);
+
+	if (cmd->free_head < 0)
+		goto out;
+
+	context = &cmd->context[cmd->free_head];
+	context->token += cmd->token_mask + 1;
+	cmd->free_head = context->next;
+out:
+	spin_unlock(&cmd->context_lock);
+	return context;
+}
+
+/* wait for and acquire a free context */
+static inline struct mthca_cmd_context *
+mthca_get_free_context(struct mthca_cmd *cmd)
+{
+	struct mthca_cmd_context *context;
+
+	wait_event(cmd->wq, (context = mthca_try_get_context(cmd)));
+	return context;
+}
+
 static int mthca_cmd_wait(struct mthca_dev *dev,
 			  u64 in_param,
 			  u64 *out_param,
@@ -417,15 +445,7 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
 	int err = 0;
 	struct mthca_cmd_context *context;
 
-	down(&dev->cmd.event_sem);
-
-	spin_lock(&dev->cmd.context_lock);
-	BUG_ON(dev->cmd.free_head < 0);
-	context = &dev->cmd.context[dev->cmd.free_head];
-	context->token += dev->cmd.token_mask + 1;
-	dev->cmd.free_head = context->next;
-	spin_unlock(&dev->cmd.context_lock);
-
+	context = mthca_get_free_context(&dev->cmd);
 	init_completion(&context->done);
 
 	err = mthca_cmd_post(dev, in_param,
@@ -458,8 +478,8 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
 	context->next = dev->cmd.free_head;
 	dev->cmd.free_head = context - dev->cmd.context;
 	spin_unlock(&dev->cmd.context_lock);
+	wake_up(&dev->cmd.wq);
 
-	up(&dev->cmd.event_sem);
 	return err;
 }
 
@@ -571,7 +591,7 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
 	dev->cmd.context[dev->cmd.max_cmds - 1].next = -1;
 	dev->cmd.free_head = 0;
 
-	sema_init(&dev->cmd.event_sem, dev->cmd.max_cmds);
+	init_waitqueue_head(&dev->cmd.wq);
 	spin_lock_init(&dev->cmd.context_lock);
 
 	for (dev->cmd.token_mask = 1;
@@ -590,12 +610,9 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
  */
 void mthca_cmd_use_polling(struct mthca_dev *dev)
 {
-	int i;
-
 	dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS;
 
-	for (i = 0; i < dev->cmd.max_cmds; ++i)
-		down(&dev->cmd.event_sem);
+	dev->cmd.free_head = -1;
 
 	kfree(dev->cmd.context);
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 87ab964..2fc86db 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -46,6 +46,7 @@
 #include <linux/list.h>
 #include <linux/semaphore.h>
 
+#include <rdma/ib_sa.h>
 #include "mthca_provider.h"
 #include "mthca_doorbell.h"
 
@@ -121,7 +122,7 @@ struct mthca_cmd {
 	struct pci_pool          *pool;
 	struct mutex              hcr_mutex;
 	struct mutex		  poll_mutex;
-	struct semaphore 	  event_sem;
+	wait_queue_head_t	  wq;
 	int              	  max_cmds;
 	spinlock_t                context_lock;
 	int                       free_head;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox