* [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework)
@ 2020-11-05 23:26 ` Si-Wei Liu
0 siblings, 0 replies; 12+ messages in thread
From: Si-Wei Liu @ 2020-11-05 23:26 UTC (permalink / raw)
To: mst, jasowang, lingshan.zhu
Cc: si-wei.liu, virtualization, boris.ostrovsky, linux-kernel
Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.
The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.
Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
Changes in v3:
- Turn explicit last_pfn check to a WARN_ON() (Jason)
Changes in v2:
- Drop the reversion patch
- Fix unhandled page leak towards the end of page_list
drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 62 insertions(+), 18 deletions(-)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..5b13dfd 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+ else
+ atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
return r;
}
@@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
- unsigned long locked, lock_limit, pinned, i;
+ unsigned long lock_limit, sz2pin, nchunks, i;
u64 iova = msg->iova;
+ long pinned;
int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1))
return -EEXIST;
+ /* Limit the use of memory for bookkeeping */
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
@@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
- if (!npages)
- return -EINVAL;
+ if (!npages) {
+ ret = -EINVAL;
+ goto free;
+ }
mmap_read_lock(dev->mm);
- locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
- if (locked > lock_limit) {
+ if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
- goto out;
+ goto unlock;
}
cur_base = msg->uaddr & PAGE_MASK;
iova &= PAGE_MASK;
+ nchunks = 0;
while (npages) {
- pinned = min_t(unsigned long, npages, list_size);
- ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
- if (ret != pinned)
+ sz2pin = min_t(unsigned long, npages, list_size);
+ pinned = pin_user_pages(cur_base, sz2pin,
+ gup_flags, page_list, NULL);
+ if (sz2pin != pinned) {
+ if (pinned < 0) {
+ ret = pinned;
+ } else {
+ unpin_user_pages(page_list, pinned);
+ ret = -ENOMEM;
+ }
goto out;
+ }
+ nchunks++;
if (!last_pfn)
map_pfn = page_to_pfn(page_list[0]);
- for (i = 0; i < ret; i++) {
+ for (i = 0; i < pinned; i++) {
unsigned long this_pfn = page_to_pfn(page_list[i]);
u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
/* Pin a contiguous chunk of memory */
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
- if (vhost_vdpa_map(v, iova, csize,
- map_pfn << PAGE_SHIFT,
- msg->perm))
+ ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+ if (ret) {
+ /*
+ * Unpin the pages that are left unmapped
+ * from this point on in the current
+ * page_list. The remaining outstanding
+ * ones which may stride across several
+ * chunks will be covered in the common
+ * error path subsequently.
+ */
+ unpin_user_pages(&page_list[i],
+ pinned - i);
goto out;
+ }
+
map_pfn = this_pfn;
iova += csize;
+ nchunks = 0;
}
last_pfn = this_pfn;
}
- cur_base += ret << PAGE_SHIFT;
- npages -= ret;
+ cur_base += pinned << PAGE_SHIFT;
+ npages -= pinned;
}
/* Pin the rest chunk */
@@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
map_pfn << PAGE_SHIFT, msg->perm);
out:
if (ret) {
+ if (nchunks) {
+ unsigned long pfn;
+
+ /*
+ * Unpin the outstanding pages which are yet to be
+ * mapped but haven't due to vdpa_map() or
+ * pin_user_pages() failure.
+ *
+ * Mapped pages are accounted in vdpa_map(), hence
+ * the corresponding unpinning will be handled by
+ * vdpa_unmap().
+ */
+ WARN_ON(!last_pfn);
+ for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+ unpin_user_page(pfn_to_page(pfn));
+ }
vhost_vdpa_unmap(v, msg->iova, msg->size);
- atomic64_sub(npages, &dev->mm->pinned_vm);
}
+unlock:
mmap_read_unlock(dev->mm);
+free:
free_page((unsigned long)page_list);
return ret;
}
--
1.8.3.1
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) @ 2020-11-05 23:26 ` Si-Wei Liu 0 siblings, 0 replies; 12+ messages in thread From: Si-Wei Liu @ 2020-11-05 23:26 UTC (permalink / raw) To: mst, jasowang, lingshan.zhu Cc: joao.m.martins, boris.ostrovsky, linux-kernel, virtualization, si-wei.liu Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. The memory usage for bookkeeping pinned pages is reverted to what it was before: only one single free page is needed. This helps reduce the host memory demand for VM with a large amount of memory, or in the situation where host is running short of free memory. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> --- Changes in v3: - Turn explicit last_pfn check to a WARN_ON() (Jason) Changes in v2: - Drop the reversion patch - Fix unhandled page leak towards the end of page_list drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b6d9016..5b13dfd 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + else + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); return r; } @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long lock_limit, sz2pin, nchunks, i; u64 iova = msg->iova; + long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; + /* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, gup_flags |= FOLL_WRITE; npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; - if (!npages) - return -EINVAL; + if (!npages) { + ret = -EINVAL; + goto free; + } mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; + nchunks = 0; while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); - if (ret != pinned) + sz2pin = min_t(unsigned long, npages, list_size); + pinned = pin_user_pages(cur_base, sz2pin, + gup_flags, page_list, NULL); + if (sz2pin != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } goto out; + } + nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); - for (i = 0; i < ret; i++) { + for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) + ret = vhost_vdpa_map(v, iova, csize, + map_pfn << PAGE_SHIFT, + msg->perm); + if (ret) { + /* + * Unpin the pages that are left unmapped + * from this point on in the current + * page_list. The remaining outstanding + * ones which may stride across several + * chunks will be covered in the common + * error path subsequently. + */ + unpin_user_pages(&page_list[i], + pinned - i); goto out; + } + map_pfn = this_pfn; iova += csize; + nchunks = 0; } last_pfn = this_pfn; } - cur_base += ret << PAGE_SHIFT; - npages -= ret; + cur_base += pinned << PAGE_SHIFT; + npages -= pinned; } /* Pin the rest chunk */ @@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, map_pfn << PAGE_SHIFT, msg->perm); out: if (ret) { + if (nchunks) { + unsigned long pfn; + + /* + * Unpin the outstanding pages which are yet to be + * mapped but haven't due to vdpa_map() or + * pin_user_pages() failure. + * + * Mapped pages are accounted in vdpa_map(), hence + * the corresponding unpinning will be handled by + * vdpa_unmap(). + */ + WARN_ON(!last_pfn); + for (pfn = map_pfn; pfn <= last_pfn; pfn++) + unpin_user_page(pfn_to_page(pfn)); + } vhost_vdpa_unmap(v, msg->iova, msg->size); - atomic64_sub(npages, &dev->mm->pinned_vm); } +unlock: mmap_read_unlock(dev->mm); +free: free_page((unsigned long)page_list); return ret; } -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) 2020-11-05 23:26 ` Si-Wei Liu @ 2020-11-10 3:28 ` Jason Wang -1 siblings, 0 replies; 12+ messages in thread From: Jason Wang @ 2020-11-10 3:28 UTC (permalink / raw) To: Si-Wei Liu, mst, lingshan.zhu Cc: virtualization, boris.ostrovsky, linux-kernel On 2020/11/6 上午7:26, Si-Wei Liu wrote: > Pinned pages are not properly accounted particularly when > mapping error occurs on IOTLB update. Clean up dangling > pinned pages for the error path. > > The memory usage for bookkeeping pinned pages is reverted > to what it was before: only one single free page is needed. > This helps reduce the host memory demand for VM with a large > amount of memory, or in the situation where host is running > short of free memory. > > Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") > Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> > --- > Changes in v3: > - Turn explicit last_pfn check to a WARN_ON() (Jason) > > Changes in v2: > - Drop the reversion patch > - Fix unhandled page leak towards the end of page_list Acked-by: Jason Wang <jasowang@redhat.com> Thanks > > drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 62 insertions(+), 18 deletions(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index b6d9016..5b13dfd 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > > if (r) > vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); > + else > + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); > > return r; > } > @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > unsigned long list_size = PAGE_SIZE / sizeof(struct page *); > unsigned int gup_flags = FOLL_LONGTERM; > unsigned long npages, cur_base, map_pfn, last_pfn = 0; > - unsigned long locked, lock_limit, pinned, i; > + unsigned long lock_limit, sz2pin, nchunks, i; > u64 iova = msg->iova; > + long pinned; > int ret = 0; > > if (vhost_iotlb_itree_first(iotlb, msg->iova, > msg->iova + msg->size - 1)) > return -EEXIST; > > + /* Limit the use of memory for bookkeeping */ > page_list = (struct page **) __get_free_page(GFP_KERNEL); > if (!page_list) > return -ENOMEM; > @@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > gup_flags |= FOLL_WRITE; > > npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; > - if (!npages) > - return -EINVAL; > + if (!npages) { > + ret = -EINVAL; > + goto free; > + } > > mmap_read_lock(dev->mm); > > - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); > lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - > - if (locked > lock_limit) { > + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { > ret = -ENOMEM; > - goto out; > + goto unlock; > } > > cur_base = msg->uaddr & PAGE_MASK; > iova &= PAGE_MASK; > + nchunks = 0; > > while (npages) { > - pinned = min_t(unsigned long, npages, list_size); > - ret = pin_user_pages(cur_base, pinned, > - gup_flags, page_list, NULL); > - if (ret != pinned) > + sz2pin = min_t(unsigned long, npages, list_size); > + pinned = pin_user_pages(cur_base, sz2pin, > + gup_flags, page_list, NULL); > + if (sz2pin != pinned) { > + if (pinned < 0) { > + ret = pinned; > + } else { > + unpin_user_pages(page_list, pinned); > + ret = -ENOMEM; > + } > goto out; > + } > + nchunks++; > > if (!last_pfn) > map_pfn = page_to_pfn(page_list[0]); > > - for (i = 0; i < ret; i++) { > + for (i = 0; i < pinned; i++) { > unsigned long this_pfn = page_to_pfn(page_list[i]); > u64 csize; > > if (last_pfn && (this_pfn != last_pfn + 1)) { > /* Pin a contiguous chunk of memory */ > csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; > - if (vhost_vdpa_map(v, iova, csize, > - map_pfn << PAGE_SHIFT, > - msg->perm)) > + ret = vhost_vdpa_map(v, iova, csize, > + map_pfn << PAGE_SHIFT, > + msg->perm); > + if (ret) { > + /* > + * Unpin the pages that are left unmapped > + * from this point on in the current > + * page_list. The remaining outstanding > + * ones which may stride across several > + * chunks will be covered in the common > + * error path subsequently. > + */ > + unpin_user_pages(&page_list[i], > + pinned - i); > goto out; > + } > + > map_pfn = this_pfn; > iova += csize; > + nchunks = 0; > } > > last_pfn = this_pfn; > } > > - cur_base += ret << PAGE_SHIFT; > - npages -= ret; > + cur_base += pinned << PAGE_SHIFT; > + npages -= pinned; > } > > /* Pin the rest chunk */ > @@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > map_pfn << PAGE_SHIFT, msg->perm); > out: > if (ret) { > + if (nchunks) { > + unsigned long pfn; > + > + /* > + * Unpin the outstanding pages which are yet to be > + * mapped but haven't due to vdpa_map() or > + * pin_user_pages() failure. > + * > + * Mapped pages are accounted in vdpa_map(), hence > + * the corresponding unpinning will be handled by > + * vdpa_unmap(). > + */ > + WARN_ON(!last_pfn); > + for (pfn = map_pfn; pfn <= last_pfn; pfn++) > + unpin_user_page(pfn_to_page(pfn)); > + } > vhost_vdpa_unmap(v, msg->iova, msg->size); > - atomic64_sub(npages, &dev->mm->pinned_vm); > } > +unlock: > mmap_read_unlock(dev->mm); > +free: > free_page((unsigned long)page_list); > return ret; > } _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) @ 2020-11-10 3:28 ` Jason Wang 0 siblings, 0 replies; 12+ messages in thread From: Jason Wang @ 2020-11-10 3:28 UTC (permalink / raw) To: Si-Wei Liu, mst, lingshan.zhu Cc: joao.m.martins, boris.ostrovsky, linux-kernel, virtualization On 2020/11/6 上午7:26, Si-Wei Liu wrote: > Pinned pages are not properly accounted particularly when > mapping error occurs on IOTLB update. Clean up dangling > pinned pages for the error path. > > The memory usage for bookkeeping pinned pages is reverted > to what it was before: only one single free page is needed. > This helps reduce the host memory demand for VM with a large > amount of memory, or in the situation where host is running > short of free memory. > > Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") > Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> > --- > Changes in v3: > - Turn explicit last_pfn check to a WARN_ON() (Jason) > > Changes in v2: > - Drop the reversion patch > - Fix unhandled page leak towards the end of page_list Acked-by: Jason Wang <jasowang@redhat.com> Thanks > > drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 62 insertions(+), 18 deletions(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index b6d9016..5b13dfd 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > > if (r) > vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); > + else > + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); > > return r; > } > @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > unsigned long list_size = PAGE_SIZE / sizeof(struct page *); > unsigned int gup_flags = FOLL_LONGTERM; > unsigned long npages, cur_base, map_pfn, last_pfn = 0; > - unsigned long locked, lock_limit, pinned, i; > + unsigned long lock_limit, sz2pin, nchunks, i; > u64 iova = msg->iova; > + long pinned; > int ret = 0; > > if (vhost_iotlb_itree_first(iotlb, msg->iova, > msg->iova + msg->size - 1)) > return -EEXIST; > > + /* Limit the use of memory for bookkeeping */ > page_list = (struct page **) __get_free_page(GFP_KERNEL); > if (!page_list) > return -ENOMEM; > @@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > gup_flags |= FOLL_WRITE; > > npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; > - if (!npages) > - return -EINVAL; > + if (!npages) { > + ret = -EINVAL; > + goto free; > + } > > mmap_read_lock(dev->mm); > > - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); > lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - > - if (locked > lock_limit) { > + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { > ret = -ENOMEM; > - goto out; > + goto unlock; > } > > cur_base = msg->uaddr & PAGE_MASK; > iova &= PAGE_MASK; > + nchunks = 0; > > while (npages) { > - pinned = min_t(unsigned long, npages, list_size); > - ret = pin_user_pages(cur_base, pinned, > - gup_flags, page_list, NULL); > - if (ret != pinned) > + sz2pin = min_t(unsigned long, npages, list_size); > + pinned = pin_user_pages(cur_base, sz2pin, > + gup_flags, page_list, NULL); > + if (sz2pin != pinned) { > + if (pinned < 0) { > + ret = pinned; > + } else { > + unpin_user_pages(page_list, pinned); > + ret = -ENOMEM; > + } > goto out; > + } > + nchunks++; > > if (!last_pfn) > map_pfn = page_to_pfn(page_list[0]); > > - for (i = 0; i < ret; i++) { > + for (i = 0; i < pinned; i++) { > unsigned long this_pfn = page_to_pfn(page_list[i]); > u64 csize; > > if (last_pfn && (this_pfn != last_pfn + 1)) { > /* Pin a contiguous chunk of memory */ > csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; > - if (vhost_vdpa_map(v, iova, csize, > - map_pfn << PAGE_SHIFT, > - msg->perm)) > + ret = vhost_vdpa_map(v, iova, csize, > + map_pfn << PAGE_SHIFT, > + msg->perm); > + if (ret) { > + /* > + * Unpin the pages that are left unmapped > + * from this point on in the current > + * page_list. The remaining outstanding > + * ones which may stride across several > + * chunks will be covered in the common > + * error path subsequently. > + */ > + unpin_user_pages(&page_list[i], > + pinned - i); > goto out; > + } > + > map_pfn = this_pfn; > iova += csize; > + nchunks = 0; > } > > last_pfn = this_pfn; > } > > - cur_base += ret << PAGE_SHIFT; > - npages -= ret; > + cur_base += pinned << PAGE_SHIFT; > + npages -= pinned; > } > > /* Pin the rest chunk */ > @@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > map_pfn << PAGE_SHIFT, msg->perm); > out: > if (ret) { > + if (nchunks) { > + unsigned long pfn; > + > + /* > + * Unpin the outstanding pages which are yet to be > + * mapped but haven't due to vdpa_map() or > + * pin_user_pages() failure. > + * > + * Mapped pages are accounted in vdpa_map(), hence > + * the corresponding unpinning will be handled by > + * vdpa_unmap(). > + */ > + WARN_ON(!last_pfn); > + for (pfn = map_pfn; pfn <= last_pfn; pfn++) > + unpin_user_page(pfn_to_page(pfn)); > + } > vhost_vdpa_unmap(v, msg->iova, msg->size); > - atomic64_sub(npages, &dev->mm->pinned_vm); > } > +unlock: > mmap_read_unlock(dev->mm); > +free: > free_page((unsigned long)page_list); > return ret; > } ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) 2020-11-10 3:28 ` Jason Wang @ 2020-11-10 22:41 ` si-wei liu -1 siblings, 0 replies; 12+ messages in thread From: si-wei liu @ 2020-11-10 22:41 UTC (permalink / raw) To: Jason Wang, mst, lingshan.zhu Cc: virtualization, boris.ostrovsky, linux-kernel On 11/9/2020 7:28 PM, Jason Wang wrote: > > On 2020/11/6 上午7:26, Si-Wei Liu wrote: >> Pinned pages are not properly accounted particularly when >> mapping error occurs on IOTLB update. Clean up dangling >> pinned pages for the error path. >> >> The memory usage for bookkeeping pinned pages is reverted >> to what it was before: only one single free page is needed. >> This helps reduce the host memory demand for VM with a large >> amount of memory, or in the situation where host is running >> short of free memory. >> >> Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> >> --- >> Changes in v3: >> - Turn explicit last_pfn check to a WARN_ON() (Jason) >> >> Changes in v2: >> - Drop the reversion patch >> - Fix unhandled page leak towards the end of page_list > > > Acked-by: Jason Wang <jasowang@redhat.com> > > Thanks > Thanks Jason for your review! -Siwei > >> >> drivers/vhost/vdpa.c | 80 >> ++++++++++++++++++++++++++++++++++++++++------------ >> 1 file changed, 62 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >> index b6d9016..5b13dfd 100644 >> --- a/drivers/vhost/vdpa.c >> +++ b/drivers/vhost/vdpa.c >> @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, >> if (r) >> vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); >> + else >> + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); >> return r; >> } >> @@ -591,14 +593,16 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> unsigned long list_size = PAGE_SIZE / sizeof(struct page *); >> unsigned int gup_flags = FOLL_LONGTERM; >> unsigned long npages, cur_base, map_pfn, last_pfn = 0; >> - unsigned long locked, lock_limit, pinned, i; >> + unsigned long lock_limit, sz2pin, nchunks, i; >> u64 iova = msg->iova; >> + long pinned; >> int ret = 0; >> if (vhost_iotlb_itree_first(iotlb, msg->iova, >> msg->iova + msg->size - 1)) >> return -EEXIST; >> + /* Limit the use of memory for bookkeeping */ >> page_list = (struct page **) __get_free_page(GFP_KERNEL); >> if (!page_list) >> return -ENOMEM; >> @@ -607,52 +611,75 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> gup_flags |= FOLL_WRITE; >> npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> >> PAGE_SHIFT; >> - if (!npages) >> - return -EINVAL; >> + if (!npages) { >> + ret = -EINVAL; >> + goto free; >> + } >> mmap_read_lock(dev->mm); >> - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); >> lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; >> - >> - if (locked > lock_limit) { >> + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { >> ret = -ENOMEM; >> - goto out; >> + goto unlock; >> } >> cur_base = msg->uaddr & PAGE_MASK; >> iova &= PAGE_MASK; >> + nchunks = 0; >> while (npages) { >> - pinned = min_t(unsigned long, npages, list_size); >> - ret = pin_user_pages(cur_base, pinned, >> - gup_flags, page_list, NULL); >> - if (ret != pinned) >> + sz2pin = min_t(unsigned long, npages, list_size); >> + pinned = pin_user_pages(cur_base, sz2pin, >> + gup_flags, page_list, NULL); >> + if (sz2pin != pinned) { >> + if (pinned < 0) { >> + ret = pinned; >> + } else { >> + unpin_user_pages(page_list, pinned); >> + ret = -ENOMEM; >> + } >> goto out; >> + } >> + nchunks++; >> if (!last_pfn) >> map_pfn = page_to_pfn(page_list[0]); >> - for (i = 0; i < ret; i++) { >> + for (i = 0; i < pinned; i++) { >> unsigned long this_pfn = page_to_pfn(page_list[i]); >> u64 csize; >> if (last_pfn && (this_pfn != last_pfn + 1)) { >> /* Pin a contiguous chunk of memory */ >> csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; >> - if (vhost_vdpa_map(v, iova, csize, >> - map_pfn << PAGE_SHIFT, >> - msg->perm)) >> + ret = vhost_vdpa_map(v, iova, csize, >> + map_pfn << PAGE_SHIFT, >> + msg->perm); >> + if (ret) { >> + /* >> + * Unpin the pages that are left unmapped >> + * from this point on in the current >> + * page_list. The remaining outstanding >> + * ones which may stride across several >> + * chunks will be covered in the common >> + * error path subsequently. >> + */ >> + unpin_user_pages(&page_list[i], >> + pinned - i); >> goto out; >> + } >> + >> map_pfn = this_pfn; >> iova += csize; >> + nchunks = 0; >> } >> last_pfn = this_pfn; >> } >> - cur_base += ret << PAGE_SHIFT; >> - npages -= ret; >> + cur_base += pinned << PAGE_SHIFT; >> + npages -= pinned; >> } >> /* Pin the rest chunk */ >> @@ -660,10 +687,27 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> map_pfn << PAGE_SHIFT, msg->perm); >> out: >> if (ret) { >> + if (nchunks) { >> + unsigned long pfn; >> + >> + /* >> + * Unpin the outstanding pages which are yet to be >> + * mapped but haven't due to vdpa_map() or >> + * pin_user_pages() failure. >> + * >> + * Mapped pages are accounted in vdpa_map(), hence >> + * the corresponding unpinning will be handled by >> + * vdpa_unmap(). >> + */ >> + WARN_ON(!last_pfn); >> + for (pfn = map_pfn; pfn <= last_pfn; pfn++) >> + unpin_user_page(pfn_to_page(pfn)); >> + } >> vhost_vdpa_unmap(v, msg->iova, msg->size); >> - atomic64_sub(npages, &dev->mm->pinned_vm); >> } >> +unlock: >> mmap_read_unlock(dev->mm); >> +free: >> free_page((unsigned long)page_list); >> return ret; >> } > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) @ 2020-11-10 22:41 ` si-wei liu 0 siblings, 0 replies; 12+ messages in thread From: si-wei liu @ 2020-11-10 22:41 UTC (permalink / raw) To: Jason Wang, mst, lingshan.zhu Cc: joao.m.martins, boris.ostrovsky, linux-kernel, virtualization On 11/9/2020 7:28 PM, Jason Wang wrote: > > On 2020/11/6 上午7:26, Si-Wei Liu wrote: >> Pinned pages are not properly accounted particularly when >> mapping error occurs on IOTLB update. Clean up dangling >> pinned pages for the error path. >> >> The memory usage for bookkeeping pinned pages is reverted >> to what it was before: only one single free page is needed. >> This helps reduce the host memory demand for VM with a large >> amount of memory, or in the situation where host is running >> short of free memory. >> >> Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> >> --- >> Changes in v3: >> - Turn explicit last_pfn check to a WARN_ON() (Jason) >> >> Changes in v2: >> - Drop the reversion patch >> - Fix unhandled page leak towards the end of page_list > > > Acked-by: Jason Wang <jasowang@redhat.com> > > Thanks > Thanks Jason for your review! -Siwei > >> >> drivers/vhost/vdpa.c | 80 >> ++++++++++++++++++++++++++++++++++++++++------------ >> 1 file changed, 62 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >> index b6d9016..5b13dfd 100644 >> --- a/drivers/vhost/vdpa.c >> +++ b/drivers/vhost/vdpa.c >> @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, >> if (r) >> vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); >> + else >> + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); >> return r; >> } >> @@ -591,14 +593,16 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> unsigned long list_size = PAGE_SIZE / sizeof(struct page *); >> unsigned int gup_flags = FOLL_LONGTERM; >> unsigned long npages, cur_base, map_pfn, last_pfn = 0; >> - unsigned long locked, lock_limit, pinned, i; >> + unsigned long lock_limit, sz2pin, nchunks, i; >> u64 iova = msg->iova; >> + long pinned; >> int ret = 0; >> if (vhost_iotlb_itree_first(iotlb, msg->iova, >> msg->iova + msg->size - 1)) >> return -EEXIST; >> + /* Limit the use of memory for bookkeeping */ >> page_list = (struct page **) __get_free_page(GFP_KERNEL); >> if (!page_list) >> return -ENOMEM; >> @@ -607,52 +611,75 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> gup_flags |= FOLL_WRITE; >> npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> >> PAGE_SHIFT; >> - if (!npages) >> - return -EINVAL; >> + if (!npages) { >> + ret = -EINVAL; >> + goto free; >> + } >> mmap_read_lock(dev->mm); >> - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); >> lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; >> - >> - if (locked > lock_limit) { >> + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { >> ret = -ENOMEM; >> - goto out; >> + goto unlock; >> } >> cur_base = msg->uaddr & PAGE_MASK; >> iova &= PAGE_MASK; >> + nchunks = 0; >> while (npages) { >> - pinned = min_t(unsigned long, npages, list_size); >> - ret = pin_user_pages(cur_base, pinned, >> - gup_flags, page_list, NULL); >> - if (ret != pinned) >> + sz2pin = min_t(unsigned long, npages, list_size); >> + pinned = pin_user_pages(cur_base, sz2pin, >> + gup_flags, page_list, NULL); >> + if (sz2pin != pinned) { >> + if (pinned < 0) { >> + ret = pinned; >> + } else { >> + unpin_user_pages(page_list, pinned); >> + ret = -ENOMEM; >> + } >> goto out; >> + } >> + nchunks++; >> if (!last_pfn) >> map_pfn = page_to_pfn(page_list[0]); >> - for (i = 0; i < ret; i++) { >> + for (i = 0; i < pinned; i++) { >> unsigned long this_pfn = page_to_pfn(page_list[i]); >> u64 csize; >> if (last_pfn && (this_pfn != last_pfn + 1)) { >> /* Pin a contiguous chunk of memory */ >> csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; >> - if (vhost_vdpa_map(v, iova, csize, >> - map_pfn << PAGE_SHIFT, >> - msg->perm)) >> + ret = vhost_vdpa_map(v, iova, csize, >> + map_pfn << PAGE_SHIFT, >> + msg->perm); >> + if (ret) { >> + /* >> + * Unpin the pages that are left unmapped >> + * from this point on in the current >> + * page_list. The remaining outstanding >> + * ones which may stride across several >> + * chunks will be covered in the common >> + * error path subsequently. >> + */ >> + unpin_user_pages(&page_list[i], >> + pinned - i); >> goto out; >> + } >> + >> map_pfn = this_pfn; >> iova += csize; >> + nchunks = 0; >> } >> last_pfn = this_pfn; >> } >> - cur_base += ret << PAGE_SHIFT; >> - npages -= ret; >> + cur_base += pinned << PAGE_SHIFT; >> + npages -= pinned; >> } >> /* Pin the rest chunk */ >> @@ -660,10 +687,27 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> map_pfn << PAGE_SHIFT, msg->perm); >> out: >> if (ret) { >> + if (nchunks) { >> + unsigned long pfn; >> + >> + /* >> + * Unpin the outstanding pages which are yet to be >> + * mapped but haven't due to vdpa_map() or >> + * pin_user_pages() failure. >> + * >> + * Mapped pages are accounted in vdpa_map(), hence >> + * the corresponding unpinning will be handled by >> + * vdpa_unmap(). >> + */ >> + WARN_ON(!last_pfn); >> + for (pfn = map_pfn; pfn <= last_pfn; pfn++) >> + unpin_user_page(pfn_to_page(pfn)); >> + } >> vhost_vdpa_unmap(v, msg->iova, msg->size); >> - atomic64_sub(npages, &dev->mm->pinned_vm); >> } >> +unlock: >> mmap_read_unlock(dev->mm); >> +free: >> free_page((unsigned long)page_list); >> return ret; >> } > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) 2020-11-10 3:28 ` Jason Wang @ 2020-11-19 23:26 ` si-wei liu -1 siblings, 0 replies; 12+ messages in thread From: si-wei liu @ 2020-11-19 23:26 UTC (permalink / raw) To: mst; +Cc: lingshan.zhu, linux-kernel, virtualization, boris.ostrovsky A gentle reminder. Any chance this patch will get picked soon? Thanks, -Siwei On 11/9/2020 7:28 PM, Jason Wang wrote: > > On 2020/11/6 上午7:26, Si-Wei Liu wrote: >> Pinned pages are not properly accounted particularly when >> mapping error occurs on IOTLB update. Clean up dangling >> pinned pages for the error path. >> >> The memory usage for bookkeeping pinned pages is reverted >> to what it was before: only one single free page is needed. >> This helps reduce the host memory demand for VM with a large >> amount of memory, or in the situation where host is running >> short of free memory. >> >> Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> >> --- >> Changes in v3: >> - Turn explicit last_pfn check to a WARN_ON() (Jason) >> >> Changes in v2: >> - Drop the reversion patch >> - Fix unhandled page leak towards the end of page_list > > > Acked-by: Jason Wang <jasowang@redhat.com> > > Thanks > > >> >> drivers/vhost/vdpa.c | 80 >> ++++++++++++++++++++++++++++++++++++++++------------ >> 1 file changed, 62 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >> index b6d9016..5b13dfd 100644 >> --- a/drivers/vhost/vdpa.c >> +++ b/drivers/vhost/vdpa.c >> @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, >> if (r) >> vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); >> + else >> + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); >> return r; >> } >> @@ -591,14 +593,16 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> unsigned long list_size = PAGE_SIZE / sizeof(struct page *); >> unsigned int gup_flags = FOLL_LONGTERM; >> unsigned long npages, cur_base, map_pfn, last_pfn = 0; >> - unsigned long locked, lock_limit, pinned, i; >> + unsigned long lock_limit, sz2pin, nchunks, i; >> u64 iova = msg->iova; >> + long pinned; >> int ret = 0; >> if (vhost_iotlb_itree_first(iotlb, msg->iova, >> msg->iova + msg->size - 1)) >> return -EEXIST; >> + /* Limit the use of memory for bookkeeping */ >> page_list = (struct page **) __get_free_page(GFP_KERNEL); >> if (!page_list) >> return -ENOMEM; >> @@ -607,52 +611,75 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> gup_flags |= FOLL_WRITE; >> npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> >> PAGE_SHIFT; >> - if (!npages) >> - return -EINVAL; >> + if (!npages) { >> + ret = -EINVAL; >> + goto free; >> + } >> mmap_read_lock(dev->mm); >> - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); >> lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; >> - >> - if (locked > lock_limit) { >> + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { >> ret = -ENOMEM; >> - goto out; >> + goto unlock; >> } >> cur_base = msg->uaddr & PAGE_MASK; >> iova &= PAGE_MASK; >> + nchunks = 0; >> while (npages) { >> - pinned = min_t(unsigned long, npages, list_size); >> - ret = pin_user_pages(cur_base, pinned, >> - gup_flags, page_list, NULL); >> - if (ret != pinned) >> + sz2pin = min_t(unsigned long, npages, list_size); >> + pinned = pin_user_pages(cur_base, sz2pin, >> + gup_flags, page_list, NULL); >> + if (sz2pin != pinned) { >> + if (pinned < 0) { >> + ret = pinned; >> + } else { >> + unpin_user_pages(page_list, pinned); >> + ret = -ENOMEM; >> + } >> goto out; >> + } >> + nchunks++; >> if (!last_pfn) >> map_pfn = page_to_pfn(page_list[0]); >> - for (i = 0; i < ret; i++) { >> + for (i = 0; i < pinned; i++) { >> unsigned long this_pfn = page_to_pfn(page_list[i]); >> u64 csize; >> if (last_pfn && (this_pfn != last_pfn + 1)) { >> /* Pin a contiguous chunk of memory */ >> csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; >> - if (vhost_vdpa_map(v, iova, csize, >> - map_pfn << PAGE_SHIFT, >> - msg->perm)) >> + ret = vhost_vdpa_map(v, iova, csize, >> + map_pfn << PAGE_SHIFT, >> + msg->perm); >> + if (ret) { >> + /* >> + * Unpin the pages that are left unmapped >> + * from this point on in the current >> + * page_list. The remaining outstanding >> + * ones which may stride across several >> + * chunks will be covered in the common >> + * error path subsequently. >> + */ >> + unpin_user_pages(&page_list[i], >> + pinned - i); >> goto out; >> + } >> + >> map_pfn = this_pfn; >> iova += csize; >> + nchunks = 0; >> } >> last_pfn = this_pfn; >> } >> - cur_base += ret << PAGE_SHIFT; >> - npages -= ret; >> + cur_base += pinned << PAGE_SHIFT; >> + npages -= pinned; >> } >> /* Pin the rest chunk */ >> @@ -660,10 +687,27 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> map_pfn << PAGE_SHIFT, msg->perm); >> out: >> if (ret) { >> + if (nchunks) { >> + unsigned long pfn; >> + >> + /* >> + * Unpin the outstanding pages which are yet to be >> + * mapped but haven't due to vdpa_map() or >> + * pin_user_pages() failure. >> + * >> + * Mapped pages are accounted in vdpa_map(), hence >> + * the corresponding unpinning will be handled by >> + * vdpa_unmap(). >> + */ >> + WARN_ON(!last_pfn); >> + for (pfn = map_pfn; pfn <= last_pfn; pfn++) >> + unpin_user_page(pfn_to_page(pfn)); >> + } >> vhost_vdpa_unmap(v, msg->iova, msg->size); >> - atomic64_sub(npages, &dev->mm->pinned_vm); >> } >> +unlock: >> mmap_read_unlock(dev->mm); >> +free: >> free_page((unsigned long)page_list); >> return ret; >> } > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) @ 2020-11-19 23:26 ` si-wei liu 0 siblings, 0 replies; 12+ messages in thread From: si-wei liu @ 2020-11-19 23:26 UTC (permalink / raw) To: mst Cc: Jason Wang, lingshan.zhu, joao.m.martins, boris.ostrovsky, linux-kernel, virtualization A gentle reminder. Any chance this patch will get picked soon? Thanks, -Siwei On 11/9/2020 7:28 PM, Jason Wang wrote: > > On 2020/11/6 上午7:26, Si-Wei Liu wrote: >> Pinned pages are not properly accounted particularly when >> mapping error occurs on IOTLB update. Clean up dangling >> pinned pages for the error path. >> >> The memory usage for bookkeeping pinned pages is reverted >> to what it was before: only one single free page is needed. >> This helps reduce the host memory demand for VM with a large >> amount of memory, or in the situation where host is running >> short of free memory. >> >> Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> >> --- >> Changes in v3: >> - Turn explicit last_pfn check to a WARN_ON() (Jason) >> >> Changes in v2: >> - Drop the reversion patch >> - Fix unhandled page leak towards the end of page_list > > > Acked-by: Jason Wang <jasowang@redhat.com> > > Thanks > > >> >> drivers/vhost/vdpa.c | 80 >> ++++++++++++++++++++++++++++++++++++++++------------ >> 1 file changed, 62 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >> index b6d9016..5b13dfd 100644 >> --- a/drivers/vhost/vdpa.c >> +++ b/drivers/vhost/vdpa.c >> @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, >> if (r) >> vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); >> + else >> + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); >> return r; >> } >> @@ -591,14 +593,16 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> unsigned long list_size = PAGE_SIZE / sizeof(struct page *); >> unsigned int gup_flags = FOLL_LONGTERM; >> unsigned long npages, cur_base, map_pfn, last_pfn = 0; >> - unsigned long locked, lock_limit, pinned, i; >> + unsigned long lock_limit, sz2pin, nchunks, i; >> u64 iova = msg->iova; >> + long pinned; >> int ret = 0; >> if (vhost_iotlb_itree_first(iotlb, msg->iova, >> msg->iova + msg->size - 1)) >> return -EEXIST; >> + /* Limit the use of memory for bookkeeping */ >> page_list = (struct page **) __get_free_page(GFP_KERNEL); >> if (!page_list) >> return -ENOMEM; >> @@ -607,52 +611,75 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> gup_flags |= FOLL_WRITE; >> npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> >> PAGE_SHIFT; >> - if (!npages) >> - return -EINVAL; >> + if (!npages) { >> + ret = -EINVAL; >> + goto free; >> + } >> mmap_read_lock(dev->mm); >> - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); >> lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; >> - >> - if (locked > lock_limit) { >> + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { >> ret = -ENOMEM; >> - goto out; >> + goto unlock; >> } >> cur_base = msg->uaddr & PAGE_MASK; >> iova &= PAGE_MASK; >> + nchunks = 0; >> while (npages) { >> - pinned = min_t(unsigned long, npages, list_size); >> - ret = pin_user_pages(cur_base, pinned, >> - gup_flags, page_list, NULL); >> - if (ret != pinned) >> + sz2pin = min_t(unsigned long, npages, list_size); >> + pinned = pin_user_pages(cur_base, sz2pin, >> + gup_flags, page_list, NULL); >> + if (sz2pin != pinned) { >> + if (pinned < 0) { >> + ret = pinned; >> + } else { >> + unpin_user_pages(page_list, pinned); >> + ret = -ENOMEM; >> + } >> goto out; >> + } >> + nchunks++; >> if (!last_pfn) >> map_pfn = page_to_pfn(page_list[0]); >> - for (i = 0; i < ret; i++) { >> + for (i = 0; i < pinned; i++) { >> unsigned long this_pfn = page_to_pfn(page_list[i]); >> u64 csize; >> if (last_pfn && (this_pfn != last_pfn + 1)) { >> /* Pin a contiguous chunk of memory */ >> csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; >> - if (vhost_vdpa_map(v, iova, csize, >> - map_pfn << PAGE_SHIFT, >> - msg->perm)) >> + ret = vhost_vdpa_map(v, iova, csize, >> + map_pfn << PAGE_SHIFT, >> + msg->perm); >> + if (ret) { >> + /* >> + * Unpin the pages that are left unmapped >> + * from this point on in the current >> + * page_list. The remaining outstanding >> + * ones which may stride across several >> + * chunks will be covered in the common >> + * error path subsequently. >> + */ >> + unpin_user_pages(&page_list[i], >> + pinned - i); >> goto out; >> + } >> + >> map_pfn = this_pfn; >> iova += csize; >> + nchunks = 0; >> } >> last_pfn = this_pfn; >> } >> - cur_base += ret << PAGE_SHIFT; >> - npages -= ret; >> + cur_base += pinned << PAGE_SHIFT; >> + npages -= pinned; >> } >> /* Pin the rest chunk */ >> @@ -660,10 +687,27 @@ static int >> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> map_pfn << PAGE_SHIFT, msg->perm); >> out: >> if (ret) { >> + if (nchunks) { >> + unsigned long pfn; >> + >> + /* >> + * Unpin the outstanding pages which are yet to be >> + * mapped but haven't due to vdpa_map() or >> + * pin_user_pages() failure. >> + * >> + * Mapped pages are accounted in vdpa_map(), hence >> + * the corresponding unpinning will be handled by >> + * vdpa_unmap(). >> + */ >> + WARN_ON(!last_pfn); >> + for (pfn = map_pfn; pfn <= last_pfn; pfn++) >> + unpin_user_page(pfn_to_page(pfn)); >> + } >> vhost_vdpa_unmap(v, msg->iova, msg->size); >> - atomic64_sub(npages, &dev->mm->pinned_vm); >> } >> +unlock: >> mmap_read_unlock(dev->mm); >> +free: >> free_page((unsigned long)page_list); >> return ret; >> } > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) 2020-11-05 23:26 ` Si-Wei Liu @ 2020-11-25 9:30 ` Michael S. Tsirkin -1 siblings, 0 replies; 12+ messages in thread From: Michael S. Tsirkin @ 2020-11-25 9:30 UTC (permalink / raw) To: Si-Wei Liu; +Cc: lingshan.zhu, linux-kernel, virtualization, boris.ostrovsky On Thu, Nov 05, 2020 at 06:26:33PM -0500, Si-Wei Liu wrote: > Pinned pages are not properly accounted particularly when > mapping error occurs on IOTLB update. Clean up dangling > pinned pages for the error path. > > The memory usage for bookkeeping pinned pages is reverted > to what it was before: only one single free page is needed. > This helps reduce the host memory demand for VM with a large > amount of memory, or in the situation where host is running > short of free memory. > > Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") > Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> Not sure which tree this is against, I had to apply this with minor tweaks. Pls take a look at the vhost tree and let me know whether it looks ok to you. > --- > Changes in v3: > - Turn explicit last_pfn check to a WARN_ON() (Jason) > > Changes in v2: > - Drop the reversion patch > - Fix unhandled page leak towards the end of page_list > > drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 62 insertions(+), 18 deletions(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index b6d9016..5b13dfd 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > > if (r) > vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); > + else > + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); > > return r; > } > @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > unsigned long list_size = PAGE_SIZE / sizeof(struct page *); > unsigned int gup_flags = FOLL_LONGTERM; > unsigned long npages, cur_base, map_pfn, last_pfn = 0; > - unsigned long locked, lock_limit, pinned, i; > + unsigned long lock_limit, sz2pin, nchunks, i; > u64 iova = msg->iova; > + long pinned; > int ret = 0; > > if (vhost_iotlb_itree_first(iotlb, msg->iova, > msg->iova + msg->size - 1)) > return -EEXIST; > > + /* Limit the use of memory for bookkeeping */ > page_list = (struct page **) __get_free_page(GFP_KERNEL); > if (!page_list) > return -ENOMEM; > @@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > gup_flags |= FOLL_WRITE; > > npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; > - if (!npages) > - return -EINVAL; > + if (!npages) { > + ret = -EINVAL; > + goto free; > + } > > mmap_read_lock(dev->mm); > > - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); > lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - > - if (locked > lock_limit) { > + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { > ret = -ENOMEM; > - goto out; > + goto unlock; > } > > cur_base = msg->uaddr & PAGE_MASK; > iova &= PAGE_MASK; > + nchunks = 0; > > while (npages) { > - pinned = min_t(unsigned long, npages, list_size); > - ret = pin_user_pages(cur_base, pinned, > - gup_flags, page_list, NULL); > - if (ret != pinned) > + sz2pin = min_t(unsigned long, npages, list_size); > + pinned = pin_user_pages(cur_base, sz2pin, > + gup_flags, page_list, NULL); > + if (sz2pin != pinned) { > + if (pinned < 0) { > + ret = pinned; > + } else { > + unpin_user_pages(page_list, pinned); > + ret = -ENOMEM; > + } > goto out; > + } > + nchunks++; > > if (!last_pfn) > map_pfn = page_to_pfn(page_list[0]); > > - for (i = 0; i < ret; i++) { > + for (i = 0; i < pinned; i++) { > unsigned long this_pfn = page_to_pfn(page_list[i]); > u64 csize; > > if (last_pfn && (this_pfn != last_pfn + 1)) { > /* Pin a contiguous chunk of memory */ > csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; > - if (vhost_vdpa_map(v, iova, csize, > - map_pfn << PAGE_SHIFT, > - msg->perm)) > + ret = vhost_vdpa_map(v, iova, csize, > + map_pfn << PAGE_SHIFT, > + msg->perm); > + if (ret) { > + /* > + * Unpin the pages that are left unmapped > + * from this point on in the current > + * page_list. The remaining outstanding > + * ones which may stride across several > + * chunks will be covered in the common > + * error path subsequently. > + */ > + unpin_user_pages(&page_list[i], > + pinned - i); > goto out; > + } > + > map_pfn = this_pfn; > iova += csize; > + nchunks = 0; > } > > last_pfn = this_pfn; > } > > - cur_base += ret << PAGE_SHIFT; > - npages -= ret; > + cur_base += pinned << PAGE_SHIFT; > + npages -= pinned; > } > > /* Pin the rest chunk */ > @@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > map_pfn << PAGE_SHIFT, msg->perm); > out: > if (ret) { > + if (nchunks) { > + unsigned long pfn; > + > + /* > + * Unpin the outstanding pages which are yet to be > + * mapped but haven't due to vdpa_map() or > + * pin_user_pages() failure. > + * > + * Mapped pages are accounted in vdpa_map(), hence > + * the corresponding unpinning will be handled by > + * vdpa_unmap(). > + */ > + WARN_ON(!last_pfn); > + for (pfn = map_pfn; pfn <= last_pfn; pfn++) > + unpin_user_page(pfn_to_page(pfn)); > + } > vhost_vdpa_unmap(v, msg->iova, msg->size); > - atomic64_sub(npages, &dev->mm->pinned_vm); > } > +unlock: > mmap_read_unlock(dev->mm); > +free: > free_page((unsigned long)page_list); > return ret; > } > -- > 1.8.3.1 _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) @ 2020-11-25 9:30 ` Michael S. Tsirkin 0 siblings, 0 replies; 12+ messages in thread From: Michael S. Tsirkin @ 2020-11-25 9:30 UTC (permalink / raw) To: Si-Wei Liu Cc: jasowang, lingshan.zhu, joao.m.martins, boris.ostrovsky, linux-kernel, virtualization On Thu, Nov 05, 2020 at 06:26:33PM -0500, Si-Wei Liu wrote: > Pinned pages are not properly accounted particularly when > mapping error occurs on IOTLB update. Clean up dangling > pinned pages for the error path. > > The memory usage for bookkeeping pinned pages is reverted > to what it was before: only one single free page is needed. > This helps reduce the host memory demand for VM with a large > amount of memory, or in the situation where host is running > short of free memory. > > Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") > Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> Not sure which tree this is against, I had to apply this with minor tweaks. Pls take a look at the vhost tree and let me know whether it looks ok to you. > --- > Changes in v3: > - Turn explicit last_pfn check to a WARN_ON() (Jason) > > Changes in v2: > - Drop the reversion patch > - Fix unhandled page leak towards the end of page_list > > drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 62 insertions(+), 18 deletions(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index b6d9016..5b13dfd 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > > if (r) > vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); > + else > + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); > > return r; > } > @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > unsigned long list_size = PAGE_SIZE / sizeof(struct page *); > unsigned int gup_flags = FOLL_LONGTERM; > unsigned long npages, cur_base, map_pfn, last_pfn = 0; > - unsigned long locked, lock_limit, pinned, i; > + unsigned long lock_limit, sz2pin, nchunks, i; > u64 iova = msg->iova; > + long pinned; > int ret = 0; > > if (vhost_iotlb_itree_first(iotlb, msg->iova, > msg->iova + msg->size - 1)) > return -EEXIST; > > + /* Limit the use of memory for bookkeeping */ > page_list = (struct page **) __get_free_page(GFP_KERNEL); > if (!page_list) > return -ENOMEM; > @@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > gup_flags |= FOLL_WRITE; > > npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; > - if (!npages) > - return -EINVAL; > + if (!npages) { > + ret = -EINVAL; > + goto free; > + } > > mmap_read_lock(dev->mm); > > - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); > lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - > - if (locked > lock_limit) { > + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { > ret = -ENOMEM; > - goto out; > + goto unlock; > } > > cur_base = msg->uaddr & PAGE_MASK; > iova &= PAGE_MASK; > + nchunks = 0; > > while (npages) { > - pinned = min_t(unsigned long, npages, list_size); > - ret = pin_user_pages(cur_base, pinned, > - gup_flags, page_list, NULL); > - if (ret != pinned) > + sz2pin = min_t(unsigned long, npages, list_size); > + pinned = pin_user_pages(cur_base, sz2pin, > + gup_flags, page_list, NULL); > + if (sz2pin != pinned) { > + if (pinned < 0) { > + ret = pinned; > + } else { > + unpin_user_pages(page_list, pinned); > + ret = -ENOMEM; > + } > goto out; > + } > + nchunks++; > > if (!last_pfn) > map_pfn = page_to_pfn(page_list[0]); > > - for (i = 0; i < ret; i++) { > + for (i = 0; i < pinned; i++) { > unsigned long this_pfn = page_to_pfn(page_list[i]); > u64 csize; > > if (last_pfn && (this_pfn != last_pfn + 1)) { > /* Pin a contiguous chunk of memory */ > csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; > - if (vhost_vdpa_map(v, iova, csize, > - map_pfn << PAGE_SHIFT, > - msg->perm)) > + ret = vhost_vdpa_map(v, iova, csize, > + map_pfn << PAGE_SHIFT, > + msg->perm); > + if (ret) { > + /* > + * Unpin the pages that are left unmapped > + * from this point on in the current > + * page_list. The remaining outstanding > + * ones which may stride across several > + * chunks will be covered in the common > + * error path subsequently. > + */ > + unpin_user_pages(&page_list[i], > + pinned - i); > goto out; > + } > + > map_pfn = this_pfn; > iova += csize; > + nchunks = 0; > } > > last_pfn = this_pfn; > } > > - cur_base += ret << PAGE_SHIFT; > - npages -= ret; > + cur_base += pinned << PAGE_SHIFT; > + npages -= pinned; > } > > /* Pin the rest chunk */ > @@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > map_pfn << PAGE_SHIFT, msg->perm); > out: > if (ret) { > + if (nchunks) { > + unsigned long pfn; > + > + /* > + * Unpin the outstanding pages which are yet to be > + * mapped but haven't due to vdpa_map() or > + * pin_user_pages() failure. > + * > + * Mapped pages are accounted in vdpa_map(), hence > + * the corresponding unpinning will be handled by > + * vdpa_unmap(). > + */ > + WARN_ON(!last_pfn); > + for (pfn = map_pfn; pfn <= last_pfn; pfn++) > + unpin_user_page(pfn_to_page(pfn)); > + } > vhost_vdpa_unmap(v, msg->iova, msg->size); > - atomic64_sub(npages, &dev->mm->pinned_vm); > } > +unlock: > mmap_read_unlock(dev->mm); > +free: > free_page((unsigned long)page_list); > return ret; > } > -- > 1.8.3.1 ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) 2020-11-25 9:30 ` Michael S. Tsirkin @ 2020-11-25 19:48 ` si-wei liu -1 siblings, 0 replies; 12+ messages in thread From: si-wei liu @ 2020-11-25 19:48 UTC (permalink / raw) To: Michael S. Tsirkin Cc: lingshan.zhu, linux-kernel, virtualization, boris.ostrovsky On 11/25/2020 1:30 AM, Michael S. Tsirkin wrote: > On Thu, Nov 05, 2020 at 06:26:33PM -0500, Si-Wei Liu wrote: >> Pinned pages are not properly accounted particularly when >> mapping error occurs on IOTLB update. Clean up dangling >> pinned pages for the error path. >> >> The memory usage for bookkeeping pinned pages is reverted >> to what it was before: only one single free page is needed. >> This helps reduce the host memory demand for VM with a large >> amount of memory, or in the situation where host is running >> short of free memory. >> >> Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> > > Not sure which tree this is against, I had to apply this with > minor tweaks. Pls take a look at the vhost tree and > let me know whether it looks ok to you. Thanks Michael, the commit ad89653f79f1882d55d9df76c9b2b94f008c4e27 in the vhost tree looks good. Sorry, I don't think I ever attempted to merge with linux-next when v3 was posted, although I did it for the first two submissions. I will pay attention to it next time. Thanks, -Siwei > >> --- >> Changes in v3: >> - Turn explicit last_pfn check to a WARN_ON() (Jason) >> >> Changes in v2: >> - Drop the reversion patch >> - Fix unhandled page leak towards the end of page_list >> >> drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------ >> 1 file changed, 62 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >> index b6d9016..5b13dfd 100644 >> --- a/drivers/vhost/vdpa.c >> +++ b/drivers/vhost/vdpa.c >> @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, >> >> if (r) >> vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); >> + else >> + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); >> >> return r; >> } >> @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> unsigned long list_size = PAGE_SIZE / sizeof(struct page *); >> unsigned int gup_flags = FOLL_LONGTERM; >> unsigned long npages, cur_base, map_pfn, last_pfn = 0; >> - unsigned long locked, lock_limit, pinned, i; >> + unsigned long lock_limit, sz2pin, nchunks, i; >> u64 iova = msg->iova; >> + long pinned; >> int ret = 0; >> >> if (vhost_iotlb_itree_first(iotlb, msg->iova, >> msg->iova + msg->size - 1)) >> return -EEXIST; >> >> + /* Limit the use of memory for bookkeeping */ >> page_list = (struct page **) __get_free_page(GFP_KERNEL); >> if (!page_list) >> return -ENOMEM; >> @@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> gup_flags |= FOLL_WRITE; >> >> npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; >> - if (!npages) >> - return -EINVAL; >> + if (!npages) { >> + ret = -EINVAL; >> + goto free; >> + } >> >> mmap_read_lock(dev->mm); >> >> - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); >> lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; >> - >> - if (locked > lock_limit) { >> + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { >> ret = -ENOMEM; >> - goto out; >> + goto unlock; >> } >> >> cur_base = msg->uaddr & PAGE_MASK; >> iova &= PAGE_MASK; >> + nchunks = 0; >> >> while (npages) { >> - pinned = min_t(unsigned long, npages, list_size); >> - ret = pin_user_pages(cur_base, pinned, >> - gup_flags, page_list, NULL); >> - if (ret != pinned) >> + sz2pin = min_t(unsigned long, npages, list_size); >> + pinned = pin_user_pages(cur_base, sz2pin, >> + gup_flags, page_list, NULL); >> + if (sz2pin != pinned) { >> + if (pinned < 0) { >> + ret = pinned; >> + } else { >> + unpin_user_pages(page_list, pinned); >> + ret = -ENOMEM; >> + } >> goto out; >> + } >> + nchunks++; >> >> if (!last_pfn) >> map_pfn = page_to_pfn(page_list[0]); >> >> - for (i = 0; i < ret; i++) { >> + for (i = 0; i < pinned; i++) { >> unsigned long this_pfn = page_to_pfn(page_list[i]); >> u64 csize; >> >> if (last_pfn && (this_pfn != last_pfn + 1)) { >> /* Pin a contiguous chunk of memory */ >> csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; >> - if (vhost_vdpa_map(v, iova, csize, >> - map_pfn << PAGE_SHIFT, >> - msg->perm)) >> + ret = vhost_vdpa_map(v, iova, csize, >> + map_pfn << PAGE_SHIFT, >> + msg->perm); >> + if (ret) { >> + /* >> + * Unpin the pages that are left unmapped >> + * from this point on in the current >> + * page_list. The remaining outstanding >> + * ones which may stride across several >> + * chunks will be covered in the common >> + * error path subsequently. >> + */ >> + unpin_user_pages(&page_list[i], >> + pinned - i); >> goto out; >> + } >> + >> map_pfn = this_pfn; >> iova += csize; >> + nchunks = 0; >> } >> >> last_pfn = this_pfn; >> } >> >> - cur_base += ret << PAGE_SHIFT; >> - npages -= ret; >> + cur_base += pinned << PAGE_SHIFT; >> + npages -= pinned; >> } >> >> /* Pin the rest chunk */ >> @@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> map_pfn << PAGE_SHIFT, msg->perm); >> out: >> if (ret) { >> + if (nchunks) { >> + unsigned long pfn; >> + >> + /* >> + * Unpin the outstanding pages which are yet to be >> + * mapped but haven't due to vdpa_map() or >> + * pin_user_pages() failure. >> + * >> + * Mapped pages are accounted in vdpa_map(), hence >> + * the corresponding unpinning will be handled by >> + * vdpa_unmap(). >> + */ >> + WARN_ON(!last_pfn); >> + for (pfn = map_pfn; pfn <= last_pfn; pfn++) >> + unpin_user_page(pfn_to_page(pfn)); >> + } >> vhost_vdpa_unmap(v, msg->iova, msg->size); >> - atomic64_sub(npages, &dev->mm->pinned_vm); >> } >> +unlock: >> mmap_read_unlock(dev->mm); >> +free: >> free_page((unsigned long)page_list); >> return ret; >> } >> -- >> 1.8.3.1 _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) @ 2020-11-25 19:48 ` si-wei liu 0 siblings, 0 replies; 12+ messages in thread From: si-wei liu @ 2020-11-25 19:48 UTC (permalink / raw) To: Michael S. Tsirkin Cc: jasowang, lingshan.zhu, joao.m.martins, boris.ostrovsky, linux-kernel, virtualization On 11/25/2020 1:30 AM, Michael S. Tsirkin wrote: > On Thu, Nov 05, 2020 at 06:26:33PM -0500, Si-Wei Liu wrote: >> Pinned pages are not properly accounted particularly when >> mapping error occurs on IOTLB update. Clean up dangling >> pinned pages for the error path. >> >> The memory usage for bookkeeping pinned pages is reverted >> to what it was before: only one single free page is needed. >> This helps reduce the host memory demand for VM with a large >> amount of memory, or in the situation where host is running >> short of free memory. >> >> Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com> > > Not sure which tree this is against, I had to apply this with > minor tweaks. Pls take a look at the vhost tree and > let me know whether it looks ok to you. Thanks Michael, the commit ad89653f79f1882d55d9df76c9b2b94f008c4e27 in the vhost tree looks good. Sorry, I don't think I ever attempted to merge with linux-next when v3 was posted, although I did it for the first two submissions. I will pay attention to it next time. Thanks, -Siwei > >> --- >> Changes in v3: >> - Turn explicit last_pfn check to a WARN_ON() (Jason) >> >> Changes in v2: >> - Drop the reversion patch >> - Fix unhandled page leak towards the end of page_list >> >> drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------ >> 1 file changed, 62 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >> index b6d9016..5b13dfd 100644 >> --- a/drivers/vhost/vdpa.c >> +++ b/drivers/vhost/vdpa.c >> @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, >> >> if (r) >> vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); >> + else >> + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); >> >> return r; >> } >> @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> unsigned long list_size = PAGE_SIZE / sizeof(struct page *); >> unsigned int gup_flags = FOLL_LONGTERM; >> unsigned long npages, cur_base, map_pfn, last_pfn = 0; >> - unsigned long locked, lock_limit, pinned, i; >> + unsigned long lock_limit, sz2pin, nchunks, i; >> u64 iova = msg->iova; >> + long pinned; >> int ret = 0; >> >> if (vhost_iotlb_itree_first(iotlb, msg->iova, >> msg->iova + msg->size - 1)) >> return -EEXIST; >> >> + /* Limit the use of memory for bookkeeping */ >> page_list = (struct page **) __get_free_page(GFP_KERNEL); >> if (!page_list) >> return -ENOMEM; >> @@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> gup_flags |= FOLL_WRITE; >> >> npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; >> - if (!npages) >> - return -EINVAL; >> + if (!npages) { >> + ret = -EINVAL; >> + goto free; >> + } >> >> mmap_read_lock(dev->mm); >> >> - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); >> lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; >> - >> - if (locked > lock_limit) { >> + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { >> ret = -ENOMEM; >> - goto out; >> + goto unlock; >> } >> >> cur_base = msg->uaddr & PAGE_MASK; >> iova &= PAGE_MASK; >> + nchunks = 0; >> >> while (npages) { >> - pinned = min_t(unsigned long, npages, list_size); >> - ret = pin_user_pages(cur_base, pinned, >> - gup_flags, page_list, NULL); >> - if (ret != pinned) >> + sz2pin = min_t(unsigned long, npages, list_size); >> + pinned = pin_user_pages(cur_base, sz2pin, >> + gup_flags, page_list, NULL); >> + if (sz2pin != pinned) { >> + if (pinned < 0) { >> + ret = pinned; >> + } else { >> + unpin_user_pages(page_list, pinned); >> + ret = -ENOMEM; >> + } >> goto out; >> + } >> + nchunks++; >> >> if (!last_pfn) >> map_pfn = page_to_pfn(page_list[0]); >> >> - for (i = 0; i < ret; i++) { >> + for (i = 0; i < pinned; i++) { >> unsigned long this_pfn = page_to_pfn(page_list[i]); >> u64 csize; >> >> if (last_pfn && (this_pfn != last_pfn + 1)) { >> /* Pin a contiguous chunk of memory */ >> csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; >> - if (vhost_vdpa_map(v, iova, csize, >> - map_pfn << PAGE_SHIFT, >> - msg->perm)) >> + ret = vhost_vdpa_map(v, iova, csize, >> + map_pfn << PAGE_SHIFT, >> + msg->perm); >> + if (ret) { >> + /* >> + * Unpin the pages that are left unmapped >> + * from this point on in the current >> + * page_list. The remaining outstanding >> + * ones which may stride across several >> + * chunks will be covered in the common >> + * error path subsequently. >> + */ >> + unpin_user_pages(&page_list[i], >> + pinned - i); >> goto out; >> + } >> + >> map_pfn = this_pfn; >> iova += csize; >> + nchunks = 0; >> } >> >> last_pfn = this_pfn; >> } >> >> - cur_base += ret << PAGE_SHIFT; >> - npages -= ret; >> + cur_base += pinned << PAGE_SHIFT; >> + npages -= pinned; >> } >> >> /* Pin the rest chunk */ >> @@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >> map_pfn << PAGE_SHIFT, msg->perm); >> out: >> if (ret) { >> + if (nchunks) { >> + unsigned long pfn; >> + >> + /* >> + * Unpin the outstanding pages which are yet to be >> + * mapped but haven't due to vdpa_map() or >> + * pin_user_pages() failure. >> + * >> + * Mapped pages are accounted in vdpa_map(), hence >> + * the corresponding unpinning will be handled by >> + * vdpa_unmap(). >> + */ >> + WARN_ON(!last_pfn); >> + for (pfn = map_pfn; pfn <= last_pfn; pfn++) >> + unpin_user_page(pfn_to_page(pfn)); >> + } >> vhost_vdpa_unmap(v, msg->iova, msg->size); >> - atomic64_sub(npages, &dev->mm->pinned_vm); >> } >> +unlock: >> mmap_read_unlock(dev->mm); >> +free: >> free_page((unsigned long)page_list); >> return ret; >> } >> -- >> 1.8.3.1 ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2020-11-25 19:48 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2020-11-05 23:26 [PATCH v3] vhost-vdpa: fix page pinning leakage in error path (rework) Si-Wei Liu 2020-11-05 23:26 ` Si-Wei Liu 2020-11-10 3:28 ` Jason Wang 2020-11-10 3:28 ` Jason Wang 2020-11-10 22:41 ` si-wei liu 2020-11-10 22:41 ` si-wei liu 2020-11-19 23:26 ` si-wei liu 2020-11-19 23:26 ` si-wei liu 2020-11-25 9:30 ` Michael S. Tsirkin 2020-11-25 9:30 ` Michael S. Tsirkin 2020-11-25 19:48 ` si-wei liu 2020-11-25 19:48 ` si-wei liu
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.