stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Peter Xu <peterx@redhat.com>,
	Kirti Wankhede <kwankhede@nvidia.com>,
	Alex Williamson <alex.williamson@redhat.com>
Subject: [PATCH 4.10 41/93] vfio/type1: Remove locked page accounting workqueue
Date: Thu, 18 May 2017 12:47:10 +0200	[thread overview]
Message-ID: <20170518104744.814832465@linuxfoundation.org> (raw)
In-Reply-To: <20170518104743.163522815@linuxfoundation.org>

4.10-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Alex Williamson <alex.williamson@redhat.com>

commit 0cfef2b7410b64d7a430947e0b533314c4f97153 upstream.

If the mmap_sem is contented then the vfio type1 IOMMU backend will
defer locked page accounting updates to a workqueue task.  This has a
few problems and depending on which side the user tries to play, they
might be over-penalized for unmaps that haven't yet been accounted or
race the workqueue to enter more mappings than they're allowed.  The
original intent of this workqueue mechanism seems to be focused on
reducing latency through the ioctl, but we cannot do so at the cost
of correctness.  Remove this workqueue mechanism and update the
callers to allow for failure.  We can also now recheck the limit under
write lock to make sure we don't exceed it.

vfio_pin_pages_remote() also now necessarily includes an unwind path
which we can jump to directly if the consecutive page pinning finds
that we're exceeding the user's memory limits.  This avoids the
current lazy approach which does accounting and mapping up to the
fault, only to return an error on the next iteration to unwind the
entire vfio_dma.

Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/vfio/vfio_iommu_type1.c |  110 ++++++++++++++++++----------------------
 1 file changed, 51 insertions(+), 59 deletions(-)

--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -243,69 +243,46 @@ static int vfio_iova_put_vfio_pfn(struct
 	return ret;
 }
 
-struct vwork {
-	struct mm_struct	*mm;
-	long			npage;
-	struct work_struct	work;
-};
-
-/* delayed decrement/increment for locked_vm */
-static void vfio_lock_acct_bg(struct work_struct *work)
-{
-	struct vwork *vwork = container_of(work, struct vwork, work);
-	struct mm_struct *mm;
-
-	mm = vwork->mm;
-	down_write(&mm->mmap_sem);
-	mm->locked_vm += vwork->npage;
-	up_write(&mm->mmap_sem);
-	mmput(mm);
-	kfree(vwork);
-}
-
-static void vfio_lock_acct(struct task_struct *task, long npage)
+static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap)
 {
-	struct vwork *vwork;
 	struct mm_struct *mm;
 	bool is_current;
+	int ret;
 
 	if (!npage)
-		return;
+		return 0;
 
 	is_current = (task->mm == current->mm);
 
 	mm = is_current ? task->mm : get_task_mm(task);
 	if (!mm)
-		return; /* process exited */
+		return -ESRCH; /* process exited */
 
-	if (down_write_trylock(&mm->mmap_sem)) {
-		mm->locked_vm += npage;
-		up_write(&mm->mmap_sem);
-		if (!is_current)
-			mmput(mm);
-		return;
-	}
+	ret = down_write_killable(&mm->mmap_sem);
+	if (!ret) {
+		if (npage > 0) {
+			if (lock_cap ? !*lock_cap :
+			    !has_capability(task, CAP_IPC_LOCK)) {
+				unsigned long limit;
+
+				limit = task_rlimit(task,
+						RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
-	if (is_current) {
-		mm = get_task_mm(task);
-		if (!mm)
-			return;
+				if (mm->locked_vm + npage > limit)
+					ret = -ENOMEM;
+			}
+		}
+
+		if (!ret)
+			mm->locked_vm += npage;
+
+		up_write(&mm->mmap_sem);
 	}
 
-	/*
-	 * Couldn't get mmap_sem lock, so must setup to update
-	 * mm->locked_vm later. If locked_vm were atomic, we
-	 * wouldn't need this silliness
-	 */
-	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
-	if (WARN_ON(!vwork)) {
+	if (!is_current)
 		mmput(mm);
-		return;
-	}
-	INIT_WORK(&vwork->work, vfio_lock_acct_bg);
-	vwork->mm = mm;
-	vwork->npage = npage;
-	schedule_work(&vwork->work);
+
+	return ret;
 }
 
 /*
@@ -402,7 +379,7 @@ static int vaddr_get_pfn(struct mm_struc
 static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 				  long npage, unsigned long *pfn_base)
 {
-	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	unsigned long pfn = 0, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 	bool lock_cap = capable(CAP_IPC_LOCK);
 	long ret, pinned = 0, lock_acct = 0;
 	bool rsvd;
@@ -439,8 +416,6 @@ static long vfio_pin_pages_remote(struct
 	/* Lock all the consecutive pages from pfn_base */
 	for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage;
 	     pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) {
-		unsigned long pfn = 0;
-
 		ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn);
 		if (ret)
 			break;
@@ -457,14 +432,25 @@ static long vfio_pin_pages_remote(struct
 				put_pfn(pfn, dma->prot);
 				pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
 					__func__, limit << PAGE_SHIFT);
-				break;
+				ret = -ENOMEM;
+				goto unpin_out;
 			}
 			lock_acct++;
 		}
 	}
 
 out:
-	vfio_lock_acct(current, lock_acct);
+	ret = vfio_lock_acct(current, lock_acct, &lock_cap);
+
+unpin_out:
+	if (ret) {
+		if (!rsvd) {
+			for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
+				put_pfn(pfn, dma->prot);
+		}
+
+		return ret;
+	}
 
 	return pinned;
 }
@@ -485,7 +471,7 @@ static long vfio_unpin_pages_remote(stru
 	}
 
 	if (do_accounting)
-		vfio_lock_acct(dma->task, locked - unlocked);
+		vfio_lock_acct(dma->task, locked - unlocked, NULL);
 
 	return unlocked;
 }
@@ -519,8 +505,14 @@ static int vfio_pin_page_external(struct
 		goto pin_page_exit;
 	}
 
-	if (!rsvd && do_accounting)
-		vfio_lock_acct(dma->task, 1);
+	if (!rsvd && do_accounting) {
+		ret = vfio_lock_acct(dma->task, 1, &lock_cap);
+		if (ret) {
+			put_pfn(*pfn_base, dma->prot);
+			goto pin_page_exit;
+		}
+	}
+
 	ret = 1;
 
 pin_page_exit:
@@ -540,7 +532,7 @@ static int vfio_unpin_page_external(stru
 	unlocked = vfio_iova_put_vfio_pfn(dma, vpfn);
 
 	if (do_accounting)
-		vfio_lock_acct(dma->task, -unlocked);
+		vfio_lock_acct(dma->task, -unlocked, NULL);
 
 	return unlocked;
 }
@@ -737,7 +729,7 @@ static long vfio_unmap_unpin(struct vfio
 
 	dma->iommu_mapped = false;
 	if (do_accounting) {
-		vfio_lock_acct(dma->task, -unlocked);
+		vfio_lock_acct(dma->task, -unlocked, NULL);
 		return 0;
 	}
 	return unlocked;
@@ -1346,7 +1338,7 @@ static void vfio_iommu_unmap_unpin_reacc
 			if (!is_invalid_reserved_pfn(vpfn->pfn))
 				locked++;
 		}
-		vfio_lock_acct(dma->task, locked - unlocked);
+		vfio_lock_acct(dma->task, locked - unlocked, NULL);
 	}
 }
 

  parent reply	other threads:[~2017-05-18 10:47 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-18 10:46 [PATCH 4.10 00/93] 4.10.17-stable review Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 01/93] xen: adjust early dom0 p2m handling to xen hypervisor behavior Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 02/93] target: Fix compare_and_write_callback handling for non GOOD status Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 03/93] target/fileio: Fix zero-length READ and WRITE handling Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 04/93] iscsi-target: Set session_fall_back_to_erl0 when forcing reinstatement Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 05/93] usb: xhci: bInterval quirk for TI TUSB73x0 Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 06/93] usb: host: xhci: print correct command ring address Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 07/93] USB: serial: ftdi_sio: add device ID for Microsemi/Arrow SF2PLUS Dev Kit Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 08/93] USB: Proper handling of Race Condition when two USB class drivers try to call init_usb_class simultaneously Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 10/93] staging: vt6656: use off stack for in buffer USB transfers Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 11/93] staging: vt6656: use off stack for out " Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 12/93] staging: gdm724x: gdm_mux: fix use-after-free on module unload Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 13/93] staging: wilc1000: Fix problem with wrong vif index Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 14/93] staging: comedi: jr3_pci: fix possible null pointer dereference Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 15/93] staging: comedi: jr3_pci: cope with jiffies wraparound Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 16/93] usb: misc: add missing continue in switch Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 17/93] usb: gadget: legacy gadgets are optional Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 18/93] usb: Make sure usb/phy/of gets built-in Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 19/93] usb: hub: Fix error loop seen after hub communication errors Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 20/93] usb: hub: Do not attempt to autosuspend disconnected devices Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 21/93] usb: misc: legousbtower: Fix buffers on stack Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 22/93] x86/boot: Fix BSS corruption/overwrite bug in early x86 kernel startup Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 23/93] selftests/x86/ldt_gdt_32: Work around a glibc sigaction() bug Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 24/93] x86, pmem: Fix cache flushing for iovec write < 8 bytes Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 25/93] um: Fix PTRACE_POKEUSER on x86_64 Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 26/93] perf/x86: Fix Broadwell-EP DRAM RAPL events Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 28/93] KVM: arm/arm64: fix races in kvm_psci_vcpu_on Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 29/93] arm64: KVM: Fix decoding of Rt/Rt2 when trapping AArch32 CP accesses Greg Kroah-Hartman
2017-05-18 10:46 ` [PATCH 4.10 30/93] block: fix blk_integrity_register to use templates interval_exp if not 0 Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 31/93] crypto: s5p-sss - Close possible race for completed requests Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 32/93] crypto: algif_aead - Require setkey before accept(2) Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 33/93] crypto: ccp - Use only the relevant interrupt bits Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 34/93] crypto: ccp - Disable interrupts early on unload Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 35/93] crypto: ccp - Change ISR handler method for a v3 CCP Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 36/93] crypto: ccp - Change ISR handler method for a v5 CCP Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 37/93] dm crypt: rewrite (wipe) key in crypto layer using random data Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 38/93] dm era: save spacemap metadata root after the pre-commit Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 39/93] dm rq: check blk_mq_register_dev() return value in dm_mq_init_request_queue() Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 40/93] dm thin: fix a memory leak when passing discard bio down Greg Kroah-Hartman
2017-05-18 10:47 ` Greg Kroah-Hartman [this message]
2017-05-18 10:47 ` [PATCH 4.10 42/93] iov_iter: dont revert iov buffer if csum error Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 43/93] IB/core: Fix sysfs registration error flow Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 44/93] IB/core: For multicast functions, verify that LIDs are multicast LIDs Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 45/93] IB/IPoIB: ibX: failed to create mcg debug file Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 46/93] IB/mlx4: Fix ib device initialization error flow Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 47/93] IB/mlx4: Reduce SRIOV multicast cleanup warning message to debug level Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 48/93] IB/hfi1: Prevent kernel QP post send hard lockups Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 49/93] perf auxtrace: Fix no_size logic in addr_filter__resolve_kernel_syms() Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 50/93] perf annotate s390: Fix perf annotate error -95 (4.10 regression) Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 51/93] perf annotate s390: Implement jump types for perf annotate Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 52/93] jbd2: fix dbench4 performance regression for nobarrier mounts Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 53/93] ext4: evict inline data when writing to memory map Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 54/93] orangefs: fix bounds check for listxattr Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 55/93] orangefs: clean up oversize xattr validation Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 56/93] orangefs: do not set getattr_time on orangefs_lookup Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 57/93] orangefs: do not check possibly stale size on truncate Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 58/93] fs/xattr.c: zero out memory copied to userspace in getxattr Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 59/93] ceph: fix memory leak in __ceph_setxattr() Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 60/93] fs/block_dev: always invalidate cleancache in invalidate_bdev() Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 61/93] mm: prevent potential recursive reclaim due to clearing PF_MEMALLOC Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 62/93] Fix match_prepath() Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 63/93] Set unicode flag on cifs echo request to avoid Mac error Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 64/93] SMB3: Work around mount failure when using SMB3 dialect to Macs Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 66/93] cifs: fix leak in FSCTL_ENUM_SNAPS response handling Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 67/93] cifs: fix CIFS_ENUMERATE_SNAPSHOTS oops Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 68/93] CIFS: fix oplock break deadlocks Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 69/93] cifs: fix CIFS_IOC_GET_MNT_INFO oops Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 71/93] ovl: do not set overlay.opaque on non-dir create Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 72/93] padata: free correct variable Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 73/93] md/raid1: avoid reusing a resync bio after error handling Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 74/93] device-dax: fix cdev leak Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 75/93] device-dax: fix sysfs attribute deadlock Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 76/93] dax: prevent invalidation of mapped DAX entries Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 77/93] mm: fix data corruption due to stale mmap reads Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 78/93] f2fs: fix fs corruption due to zero inode page Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 79/93] fscrypt: fix context consistency check when key(s) unavailable Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 80/93] serial: samsung: Use right device for DMA-mapping calls Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 81/93] serial: omap: fix runtime-pm handling on unbind Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 82/93] serial: omap: suspend device on probe errors Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 83/93] tty: pty: Fix ldisc flush after userspace become aware of the data already Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 84/93] Bluetooth: Fix user channel for 32bit userspace on 64bit kernel Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 85/93] Bluetooth: hci_bcm: add missing tty-device sanity check Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 86/93] Bluetooth: hci_intel: " Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 87/93] ipmi: Fix kernel panic at ipmi_ssif_thread() Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 88/93] libnvdimm, region: fix flush hint detection crash Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 89/93] libnvdimm, pmem: fix a NULL pointer BUG in nd_pmem_notify Greg Kroah-Hartman
2017-05-18 10:47 ` [PATCH 4.10 90/93] libnvdimm: fix nvdimm_bus_lock() vs device_lock() ordering Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.10 91/93] libnvdimm, pfn: fix npfns vs section alignment Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.10 92/93] pstore: Fix flags to enable dumps on powerpc Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.10 93/93] pstore: Shut down worker when unregistering Greg Kroah-Hartman
2017-05-18 17:31 ` [PATCH 4.10 00/93] 4.10.17-stable review Shuah Khan
2017-05-19  1:10 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170518104744.814832465@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=alex.williamson@redhat.com \
    --cc=kwankhede@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterx@redhat.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).