From: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: Harish Chegondi
<harish.chegondi-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,
linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH for-next 07/16] IB/hfi1: Clean up hfi1_user_exp_rcv_setup function
Date: Mon, 21 Aug 2017 18:26:51 -0700 [thread overview]
Message-ID: <20170822012650.32701.82203.stgit@scvm10.sc.intel.com> (raw)
In-Reply-To: <20170822011657.32701.22207.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
From: Harish Chegondi <harish.chegondi-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Clean up hfi1_user_exp_rcv_setup function by moving page pinning and
unpinning related code to separate functions. In order to reduce the
number of parameters passed between functions, a new data structure
struct tid_user_buf is defined and used.
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Harish Chegondi <harish.chegondi-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/infiniband/hw/hfi1/user_exp_rcv.c | 232 ++++++++++++++++++-----------
drivers/infiniband/hw/hfi1/user_exp_rcv.h | 9 +
2 files changed, 153 insertions(+), 88 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index d9036ba..04be178 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -75,20 +75,21 @@ struct tid_pageset {
static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
struct exp_tid_set *set,
struct hfi1_filedata *fd);
-static u32 find_phys_blocks(struct page **pages, unsigned npages,
- struct tid_pageset *list);
-static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
+static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages);
+static int set_rcvarray_entry(struct hfi1_filedata *fd,
+ struct tid_user_buf *tbuf,
u32 rcventry, struct tid_group *grp,
- struct page **pages, unsigned npages);
+ u16 pageidx, unsigned int npages);
static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
-static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
- struct tid_group *grp, struct tid_pageset *sets,
- unsigned start, u16 count, struct page **pages,
- u32 *tidlist, unsigned *tididx, unsigned *pmapped);
+static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
+ struct tid_group *grp,
+ unsigned int start, u16 count,
+ u32 *tidlist, unsigned int *tididx,
+ unsigned int *pmapped);
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
@@ -198,6 +199,92 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
fd->entry_to_rb = NULL;
}
+/**
+ * Release pinned receive buffer pages.
+ *
+ * @mapped - true if the pages have been DMA mapped. false otherwise.
+ * @idx - Index of the first page to unpin.
+ * @npages - No of pages to unpin.
+ *
+ * If the pages have been DMA mapped (indicated by mapped parameter), their
+ * info will be passed via a struct tid_rb_node. If they haven't been mapped,
+ * their info will be passed via a struct tid_user_buf.
+ */
+static void unpin_rcv_pages(struct hfi1_filedata *fd,
+ struct tid_user_buf *tidbuf,
+ struct tid_rb_node *node,
+ unsigned int idx,
+ unsigned int npages,
+ bool mapped)
+{
+ struct page **pages;
+ struct hfi1_devdata *dd = fd->uctxt->dd;
+
+ if (mapped) {
+ pci_unmap_single(dd->pcidev, node->dma_addr,
+ node->mmu.len, PCI_DMA_FROMDEVICE);
+ pages = &node->pages[idx];
+ } else {
+ pages = &tidbuf->pages[idx];
+ }
+ hfi1_release_user_pages(fd->mm, pages, npages, mapped);
+ fd->tid_n_pinned -= npages;
+}
+
+/**
+ * Pin receive buffer pages.
+ */
+static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
+{
+ int pinned;
+ unsigned int npages;
+ unsigned long vaddr = tidbuf->vaddr;
+ struct page **pages = NULL;
+ struct hfi1_devdata *dd = fd->uctxt->dd;
+
+ /* Get the number of pages the user buffer spans */
+ npages = num_user_pages(vaddr, tidbuf->length);
+ if (!npages)
+ return -EINVAL;
+
+ if (npages > fd->uctxt->expected_count) {
+ dd_dev_err(dd, "Expected buffer too big\n");
+ return -EINVAL;
+ }
+
+ /* Verify that access is OK for the user buffer */
+ if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
+ npages * PAGE_SIZE)) {
+ dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
+ (void *)vaddr, npages);
+ return -EFAULT;
+ }
+ /* Allocate the array of struct page pointers needed for pinning */
+ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ /*
+ * Pin all the pages of the user buffer. If we can't pin all the
+ * pages, accept the amount pinned so far and program only that.
+ * User space knows how to deal with partially programmed buffers.
+ */
+ if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
+ kfree(pages);
+ return -ENOMEM;
+ }
+
+ pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
+ if (pinned <= 0) {
+ kfree(pages);
+ return pinned;
+ }
+ tidbuf->pages = pages;
+ tidbuf->npages = npages;
+ fd->tid_n_pinned += pinned;
+ return pinned;
+}
+
/*
* RcvArray entry allocation for Expected Receives is done by the
* following algorithm:
@@ -253,62 +340,33 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
int ret = 0, need_group = 0, pinned;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
+ unsigned int ngroups, pageidx = 0, pageset_count,
tididx = 0, mapped, mapped_pages = 0;
- unsigned long vaddr = tinfo->vaddr;
- struct page **pages = NULL;
u32 *tidlist = NULL;
- struct tid_pageset *pagesets = NULL;
-
- /* Get the number of pages the user buffer spans */
- npages = num_user_pages(vaddr, tinfo->length);
- if (!npages)
- return -EINVAL;
-
- if (npages > uctxt->expected_count) {
- dd_dev_err(dd, "Expected buffer too big\n");
- return -EINVAL;
- }
-
- /* Verify that access is OK for the user buffer */
- if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
- npages * PAGE_SIZE)) {
- dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
- (void *)vaddr, npages);
- return -EFAULT;
- }
+ struct tid_user_buf *tidbuf;
- pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets),
- GFP_KERNEL);
- if (!pagesets)
+ tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL);
+ if (!tidbuf)
return -ENOMEM;
- /* Allocate the array of struct page pointers needed for pinning */
- pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- goto bail;
- }
-
- /*
- * Pin all the pages of the user buffer. If we can't pin all the
- * pages, accept the amount pinned so far and program only that.
- * User space knows how to deal with partially programmed buffers.
- */
- if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
- ret = -ENOMEM;
- goto bail;
+ tidbuf->vaddr = tinfo->vaddr;
+ tidbuf->length = tinfo->length;
+ tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets),
+ GFP_KERNEL);
+ if (!tidbuf->psets) {
+ kfree(tidbuf);
+ return -ENOMEM;
}
- pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
+ pinned = pin_rcv_pages(fd, tidbuf);
if (pinned <= 0) {
- ret = pinned;
- goto bail;
+ kfree(tidbuf->psets);
+ kfree(tidbuf);
+ return pinned;
}
- fd->tid_n_pinned += npages;
/* Find sets of physically contiguous pages */
- npagesets = find_phys_blocks(pages, pinned, pagesets);
+ tidbuf->n_psets = find_phys_blocks(tidbuf, pinned);
/*
* We don't need to access this under a lock since tid_used is per
@@ -316,10 +374,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
* and hfi1_user_exp_rcv_setup() at the same time.
*/
spin_lock(&fd->tid_lock);
- if (fd->tid_used + npagesets > fd->tid_limit)
+ if (fd->tid_used + tidbuf->n_psets > fd->tid_limit)
pageset_count = fd->tid_limit - fd->tid_used;
else
- pageset_count = npagesets;
+ pageset_count = tidbuf->n_psets;
spin_unlock(&fd->tid_lock);
if (!pageset_count)
@@ -347,9 +405,9 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
struct tid_group *grp =
tid_group_pop(&uctxt->tid_group_list);
- ret = program_rcvarray(fd, vaddr, grp, pagesets,
+ ret = program_rcvarray(fd, tidbuf, grp,
pageidx, dd->rcv_entries.group_size,
- pages, tidlist, &tididx, &mapped);
+ tidlist, &tididx, &mapped);
/*
* If there was a failure to program the RcvArray
* entries for the entire group, reset the grp fields
@@ -393,8 +451,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
unsigned use = min_t(unsigned, pageset_count - pageidx,
grp->size - grp->used);
- ret = program_rcvarray(fd, vaddr, grp, pagesets,
- pageidx, use, pages, tidlist,
+ ret = program_rcvarray(fd, tidbuf, grp,
+ pageidx, use, tidlist,
&tididx, &mapped);
if (ret < 0) {
hfi1_cdbg(TID,
@@ -454,16 +512,14 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
* If not everything was mapped (due to insufficient RcvArray entries,
* for example), unpin all unmapped pages so we can pin them nex time.
*/
- if (mapped_pages != pinned) {
- hfi1_release_user_pages(fd->mm, &pages[mapped_pages],
- pinned - mapped_pages,
- false);
- fd->tid_n_pinned -= pinned - mapped_pages;
- }
+ if (mapped_pages != pinned)
+ unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages,
+ (pinned - mapped_pages), false);
bail:
- kfree(pagesets);
- kfree(pages);
+ kfree(tidbuf->psets);
kfree(tidlist);
+ kfree(tidbuf->pages);
+ kfree(tidbuf);
return ret > 0 ? 0 : ret;
}
@@ -553,11 +609,12 @@ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
return ret;
}
-static u32 find_phys_blocks(struct page **pages, unsigned npages,
- struct tid_pageset *list)
+static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages)
{
unsigned pagecount, pageidx, setcount = 0, i;
unsigned long pfn, this_pfn;
+ struct page **pages = tidbuf->pages;
+ struct tid_pageset *list = tidbuf->psets;
if (!npages)
return 0;
@@ -620,13 +677,13 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
/**
* program_rcvarray() - program an RcvArray group with receive buffers
* @fd: filedata pointer
- * @vaddr: starting user virtual address
+ * @tbuf: pointer to struct tid_user_buf that has the user buffer starting
+ * virtual address, buffer length, page pointers, pagesets (array of
+ * struct tid_pageset holding information on physically contiguous
+ * chunks from the user buffer), and other fields.
* @grp: RcvArray group
- * @sets: array of struct tid_pageset holding information on physically
- * contiguous chunks from the user buffer
* @start: starting index into sets array
* @count: number of struct tid_pageset's to program
- * @pages: an array of struct page * for the user buffer
* @tidlist: the array of u32 elements when the information about the
* programmed RcvArray entries is to be encoded.
* @tididx: starting offset into tidlist
@@ -644,11 +701,11 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
* -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
* number of RcvArray entries programmed.
*/
-static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
+static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf,
struct tid_group *grp,
- struct tid_pageset *sets,
- unsigned start, u16 count, struct page **pages,
- u32 *tidlist, unsigned *tididx, unsigned *pmapped)
+ unsigned int start, u16 count,
+ u32 *tidlist, unsigned int *tididx,
+ unsigned int *pmapped)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
@@ -687,11 +744,11 @@ static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
}
rcventry = grp->base + useidx;
- npages = sets[setidx].count;
- pageidx = sets[setidx].idx;
+ npages = tbuf->psets[setidx].count;
+ pageidx = tbuf->psets[setidx].idx;
- ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE),
- rcventry, grp, pages + pageidx,
+ ret = set_rcvarray_entry(fd, tbuf,
+ rcventry, grp, pageidx,
npages);
if (ret)
return ret;
@@ -712,15 +769,17 @@ static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
return idx;
}
-static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
+static int set_rcvarray_entry(struct hfi1_filedata *fd,
+ struct tid_user_buf *tbuf,
u32 rcventry, struct tid_group *grp,
- struct page **pages, unsigned npages)
+ u16 pageidx, unsigned int npages)
{
int ret;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
dma_addr_t phys;
+ struct page **pages = tbuf->pages + pageidx;
/*
* Allocate the node first so we can handle a potential
@@ -741,7 +800,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
return -EFAULT;
}
- node->mmu.addr = vaddr;
+ node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
node->mmu.len = npages * PAGE_SIZE;
node->phys = page_to_phys(pages[0]);
node->npages = npages;
@@ -820,10 +879,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
*/
hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
- pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
- PCI_DMA_FROMDEVICE);
- hfi1_release_user_pages(fd->mm, node->pages, node->npages, true);
- fd->tid_n_pinned -= node->npages;
+ unpin_rcv_pages(fd, NULL, node, 0, node->npages, true);
node->grp->used--;
node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 6cbaa4c..8c4eb5d 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -51,6 +51,15 @@
#include "exp_rcv.h"
+struct tid_user_buf {
+ unsigned long vaddr;
+ unsigned long length;
+ unsigned int npages;
+ struct page **pages;
+ struct tid_pageset *psets;
+ unsigned int n_psets;
+};
+
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt);
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2017-08-22 1:26 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-22 1:26 [PATCH for-next 00/16] IB/hfi1, qib, rdmavt: patches for next 08/21/2017 Dennis Dalessandro
2017-08-22 1:26 ` [PATCH for-next 02/16] IB/{qib, hfi1}: Avoid flow control testing for RDMA write operation Dennis Dalessandro
[not found] ` <20170822011657.32701.22207.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-08-22 1:26 ` [PATCH for-next 01/16] IB/rdmavt: Use rvt_put_swqe() in rvt_clear_mr_ref() Dennis Dalessandro
2017-08-22 1:26 ` [PATCH for-next 03/16] IB/qib: Remove unnecessary memory allocation for boardname Dennis Dalessandro
2017-08-22 1:26 ` [PATCH for-next 04/16] IB/qib: Stricter bounds checking for copy and array access Dennis Dalessandro
2017-08-22 1:26 ` [PATCH for-next 05/16] IB/hfi1: Ratelimit prints from sdma_interrupt Dennis Dalessandro
2017-08-22 1:26 ` [PATCH for-next 06/16] IB/hfi1: Improve local kmem_cache_alloc performance Dennis Dalessandro
2017-08-22 1:26 ` Dennis Dalessandro [this message]
2017-08-22 1:26 ` [PATCH for-next 08/16] IB/hfi1: Clean up user_sdma_send_pkts() function Dennis Dalessandro
2017-08-22 1:27 ` [PATCH for-next 09/16] IB/hfi1: Clean up pin_vector_pages() function Dennis Dalessandro
[not found] ` <20170822012702.32701.90032.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-08-22 15:46 ` Leon Romanovsky
[not found] ` <20170822154659.GE1724-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-08-22 18:39 ` Harish Chegondi
[not found] ` <599C7A74.9010301-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2017-08-23 4:49 ` Leon Romanovsky
[not found] ` <20170823044900.GK1724-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-08-28 5:00 ` Harish Chegondi
2017-08-22 1:27 ` [PATCH for-next 10/16] IB/hfi1: Fix the bail out code in " Dennis Dalessandro
2017-08-22 1:27 ` [PATCH for-next 11/16] IB/hfi1: Remove duplicate definitions of num_user_pages() function Dennis Dalessandro
2017-08-22 1:27 ` [PATCH for-next 12/16] IB/hfi1: Move structure definitions from user_exp_rcv.c to user_exp_rcv.h Dennis Dalessandro
2017-08-22 1:27 ` [PATCH for-next 13/16] IB/hfi1: Move structure and MACRO definitions in user_sdma.c to user_sdma.h Dennis Dalessandro
[not found] ` <20170822012728.32701.38661.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-08-22 15:40 ` Leon Romanovsky
[not found] ` <20170822154025.GD1724-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-08-28 5:04 ` Harish Chegondi
2017-08-22 1:27 ` [PATCH for-next 14/16] IB/hfi1: Fix whitespace alignment issue for MAD Dennis Dalessandro
2017-08-22 1:27 ` [PATCH for-next 15/16] IB/hfi1: Add received request info to qp_stats Dennis Dalessandro
2017-08-22 1:27 ` [PATCH for-next 16/16] IB/hfi1: Add opcode states " Dennis Dalessandro
2017-08-28 23:16 ` [PATCH for-next 00/16] IB/hfi1, qib, rdmavt: patches for next 08/21/2017 Doug Ledford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170822012650.32701.82203.stgit@scvm10.sc.intel.com \
--to=dennis.dalessandro-ral2jqcrhueavxtiumwx3w@public.gmane.org \
--cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=harish.chegondi-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox