linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Artemy Kovalyov
	<artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Subject: [PATCH rdma-next V1 06/10] IB/umem: Add contiguous ODP support
Date: Wed,  5 Apr 2017 09:23:55 +0300	[thread overview]
Message-ID: <20170405062359.26623-7-leon@kernel.org> (raw)
In-Reply-To: <20170405062359.26623-1-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

From: Artemy Kovalyov <artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Currenlty ODP supports only regular MMU pages.
Add ODP support for regions consisting of physically contiguous chunks
of arbitrary order (huge pages for instance) to improve performance.

Signed-off-by: Artemy Kovalyov <artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 drivers/infiniband/core/umem_odp.c | 50 +++++++++++++++++++++++---------------
 include/rdma/ib_umem.h             |  4 +--
 2 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 8ee30163497d..73053c8a9e3b 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -504,7 +504,6 @@ void ib_umem_odp_release(struct ib_umem *umem)
 static int ib_umem_odp_map_dma_single_page(
 		struct ib_umem *umem,
 		int page_index,
-		u64 base_virt_addr,
 		struct page *page,
 		u64 access_mask,
 		unsigned long current_seq)
@@ -527,7 +526,7 @@ static int ib_umem_odp_map_dma_single_page(
 	if (!(umem->odp_data->dma_list[page_index])) {
 		dma_addr = ib_dma_map_page(dev,
 					   page,
-					   0, PAGE_SIZE,
+					   0, BIT(umem->page_shift),
 					   DMA_BIDIRECTIONAL);
 		if (ib_dma_mapping_error(dev, dma_addr)) {
 			ret = -EFAULT;
@@ -555,8 +554,9 @@ static int ib_umem_odp_map_dma_single_page(
 	if (remove_existing_mapping && umem->context->invalidate_range) {
 		invalidate_page_trampoline(
 			umem,
-			base_virt_addr + (page_index * PAGE_SIZE),
-			base_virt_addr + ((page_index+1)*PAGE_SIZE),
+			ib_umem_start(umem) + (page_index >> umem->page_shift),
+			ib_umem_start(umem) + ((page_index + 1) >>
+					       umem->page_shift),
 			NULL);
 		ret = -EAGAIN;
 	}
@@ -595,10 +595,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 	struct task_struct *owning_process  = NULL;
 	struct mm_struct   *owning_mm       = NULL;
 	struct page       **local_page_list = NULL;
-	u64 off;
-	int j, k, ret = 0, start_idx, npages = 0;
-	u64 base_virt_addr;
+	u64 page_mask, off;
+	int j, k, ret = 0, start_idx, npages = 0, page_shift;
 	unsigned int flags = 0;
+	phys_addr_t p = 0;
 
 	if (access_mask == 0)
 		return -EINVAL;
@@ -611,9 +611,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 	if (!local_page_list)
 		return -ENOMEM;
 
-	off = user_virt & (~PAGE_MASK);
-	user_virt = user_virt & PAGE_MASK;
-	base_virt_addr = user_virt;
+	page_shift = umem->page_shift;
+	page_mask = ~(BIT(page_shift) - 1);
+	off = user_virt & (~page_mask);
+	user_virt = user_virt & page_mask;
 	bcnt += off; /* Charge for the first page offset as well. */
 
 	owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
@@ -631,13 +632,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 	if (access_mask & ODP_WRITE_ALLOWED_BIT)
 		flags |= FOLL_WRITE;
 
-	start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+	start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
 	k = start_idx;
 
 	while (bcnt > 0) {
-		const size_t gup_num_pages =
-			min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
-			      PAGE_SIZE / sizeof(struct page *));
+		const size_t gup_num_pages = min_t(size_t,
+				(bcnt + BIT(page_shift) - 1) >> page_shift,
+				PAGE_SIZE / sizeof(struct page *));
 
 		down_read(&owning_mm->mmap_sem);
 		/*
@@ -656,14 +657,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 			break;
 
 		bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
-		user_virt += npages << PAGE_SHIFT;
 		mutex_lock(&umem->odp_data->umem_mutex);
-		for (j = 0; j < npages; ++j) {
+		for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
+			if (user_virt & ~page_mask) {
+				p += PAGE_SIZE;
+				if (page_to_phys(local_page_list[j]) != p) {
+					ret = -EFAULT;
+					break;
+				}
+				put_page(local_page_list[j]);
+				continue;
+			}
+
 			ret = ib_umem_odp_map_dma_single_page(
-				umem, k, base_virt_addr, local_page_list[j],
-				access_mask, current_seq);
+					umem, k, local_page_list[j],
+					access_mask, current_seq);
 			if (ret < 0)
 				break;
+
+			p = page_to_phys(local_page_list[j]);
 			k++;
 		}
 		mutex_unlock(&umem->odp_data->umem_mutex);
@@ -708,7 +720,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 	 * once. */
 	mutex_lock(&umem->odp_data->umem_mutex);
 	for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
-		idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+		idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
 		if (umem->odp_data->page_list[idx]) {
 			struct page *page = umem->odp_data->page_list[idx];
 			dma_addr_t dma = umem->odp_data->dma_list[idx];
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 7f4af1e1ae64..23159dd5be18 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -72,12 +72,12 @@ static inline unsigned long ib_umem_start(struct ib_umem *umem)
 /* Returns the address of the page after the last one of an ODP umem. */
 static inline unsigned long ib_umem_end(struct ib_umem *umem)
 {
-	return PAGE_ALIGN(umem->address + umem->length);
+	return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
 }
 
 static inline size_t ib_umem_num_pages(struct ib_umem *umem)
 {
-	return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
+	return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift;
 }
 
 #ifdef CONFIG_INFINIBAND_USER_MEM
-- 
2.12.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-04-05  6:23 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-05  6:23 [PATCH rdma-next V1 00/10] ODP Fixes and Improvements Leon Romanovsky
     [not found] ` <20170405062359.26623-1-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05  6:23   ` [PATCH rdma-next V1 01/10] IB: Replace ib_umem page_size by page_shift Leon Romanovsky
     [not found]     ` <20170405062359.26623-2-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05  9:48       ` Amrani, Ram
2017-04-05 16:38       ` Saleem, Shiraz
2017-04-05 17:18       ` Selvin Xavier
2017-04-05 17:30       ` Adit Ranadive
2017-04-05  6:23   ` [PATCH rdma-next V1 02/10] IB/mlx5: Fix function updating xlt emergency path Leon Romanovsky
2017-04-05  6:23   ` [PATCH rdma-next V1 03/10] IB/mlx5: Fix UMR size calculation Leon Romanovsky
2017-04-05  6:23   ` [PATCH rdma-next V1 04/10] IB/mlx5: Fix implicit MR GC Leon Romanovsky
2017-04-05  6:23   ` [PATCH rdma-next V1 05/10] IB/mlx5: Decrease verbosity level of ODP errors Leon Romanovsky
2017-04-05  6:23   ` Leon Romanovsky [this message]
2017-04-05  6:23   ` [PATCH rdma-next V1 07/10] IB/mlx5: Add contiguous ODP support Leon Romanovsky
2017-04-05  6:23   ` [PATCH rdma-next V1 08/10] IB/umem: Add support to huge ODP Leon Romanovsky
     [not found]     ` <20170405062359.26623-9-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-04-05 16:45       ` Shiraz Saleem
     [not found]         ` <20170405164539.GA9232-GOXS9JX10wfOxmVO0tvppfooFf0ArEBIu+b9c/7xato@public.gmane.org>
2017-04-05 17:33           ` Leon Romanovsky
2017-04-05  6:23   ` [PATCH rdma-next V1 09/10] IB/mlx5: Extract page fault code Leon Romanovsky
2017-04-05  6:23   ` [PATCH rdma-next V1 10/10] IB/mlx5: Add ODP support to MW Leon Romanovsky
2017-04-25 19:41   ` [PATCH rdma-next V1 00/10] ODP Fixes and Improvements Doug Ledford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170405062359.26623-7-leon@kernel.org \
    --to=leon-dgejt+ai2ygdnm+yrofe0a@public.gmane.org \
    --cc=artemyko-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).