stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Peter Xu <peterx@redhat.com>,
	Kirti Wankhede <kwankhede@nvidia.com>,
	Alex Williamson <alex.williamson@redhat.com>
Subject: [PATCH 4.4 28/56] vfio/type1: Remove locked page accounting workqueue
Date: Thu, 18 May 2017 12:48:45 +0200	[thread overview]
Message-ID: <20170518104841.596751047@linuxfoundation.org> (raw)
In-Reply-To: <20170518104840.395932131@linuxfoundation.org>

4.4-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Alex Williamson <alex.williamson@redhat.com>

commit 0cfef2b7410b64d7a430947e0b533314c4f97153 upstream.

If the mmap_sem is contented then the vfio type1 IOMMU backend will
defer locked page accounting updates to a workqueue task.  This has a
few problems and depending on which side the user tries to play, they
might be over-penalized for unmaps that haven't yet been accounted or
race the workqueue to enter more mappings than they're allowed.  The
original intent of this workqueue mechanism seems to be focused on
reducing latency through the ioctl, but we cannot do so at the cost
of correctness.  Remove this workqueue mechanism and update the
callers to allow for failure.  We can also now recheck the limit under
write lock to make sure we don't exceed it.

vfio_pin_pages_remote() also now necessarily includes an unwind path
which we can jump to directly if the consecutive page pinning finds
that we're exceeding the user's memory limits.  This avoids the
current lazy approach which does accounting and mapping up to the
fault, only to return an error on the next iteration to unwind the
entire vfio_dma.

Cc: stable@vger.kernel.org
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>


---
 drivers/vfio/vfio_iommu_type1.c |  100 ++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 58 deletions(-)

--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -130,57 +130,34 @@ static void vfio_unlink_dma(struct vfio_
 	rb_erase(&old->node, &iommu->dma_list);
 }
 
-struct vwork {
-	struct mm_struct	*mm;
-	long			npage;
-	struct work_struct	work;
-};
-
-/* delayed decrement/increment for locked_vm */
-static void vfio_lock_acct_bg(struct work_struct *work)
+static int vfio_lock_acct(long npage, bool *lock_cap)
 {
-	struct vwork *vwork = container_of(work, struct vwork, work);
-	struct mm_struct *mm;
+	int ret = 0;
 
-	mm = vwork->mm;
-	down_write(&mm->mmap_sem);
-	mm->locked_vm += vwork->npage;
-	up_write(&mm->mmap_sem);
-	mmput(mm);
-	kfree(vwork);
-}
+	if (!npage)
+		return 0;
 
-static void vfio_lock_acct(long npage)
-{
-	struct vwork *vwork;
-	struct mm_struct *mm;
+	if (!current->mm)
+		return -ESRCH; /* process exited */
 
-	if (!current->mm || !npage)
-		return; /* process exited or nothing to do */
+	down_write(&current->mm->mmap_sem);
+	if (npage > 0) {
+		if (lock_cap ? !*lock_cap : !capable(CAP_IPC_LOCK)) {
+			unsigned long limit;
 
-	if (down_write_trylock(&current->mm->mmap_sem)) {
-		current->mm->locked_vm += npage;
-		up_write(&current->mm->mmap_sem);
-		return;
-	}
+			limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
-	/*
-	 * Couldn't get mmap_sem lock, so must setup to update
-	 * mm->locked_vm later. If locked_vm were atomic, we
-	 * wouldn't need this silliness
-	 */
-	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
-	if (!vwork)
-		return;
-	mm = get_task_mm(current);
-	if (!mm) {
-		kfree(vwork);
-		return;
+			if (current->mm->locked_vm + npage > limit)
+				ret = -ENOMEM;
+		}
 	}
-	INIT_WORK(&vwork->work, vfio_lock_acct_bg);
-	vwork->mm = mm;
-	vwork->npage = npage;
-	schedule_work(&vwork->work);
+
+	if (!ret)
+		current->mm->locked_vm += npage;
+
+	up_write(&current->mm->mmap_sem);
+
+	return ret;
 }
 
 /*
@@ -262,9 +239,9 @@ static int vaddr_get_pfn(unsigned long v
 static long vfio_pin_pages(unsigned long vaddr, long npage,
 			   int prot, unsigned long *pfn_base)
 {
-	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	unsigned long pfn = 0, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 	bool lock_cap = capable(CAP_IPC_LOCK);
-	long ret, i;
+	long ret, i = 1;
 	bool rsvd;
 
 	if (!current->mm)
@@ -283,16 +260,11 @@ static long vfio_pin_pages(unsigned long
 		return -ENOMEM;
 	}
 
-	if (unlikely(disable_hugepages)) {
-		if (!rsvd)
-			vfio_lock_acct(1);
-		return 1;
-	}
+	if (unlikely(disable_hugepages))
+		goto out;
 
 	/* Lock all the consecutive pages from pfn_base */
-	for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
-		unsigned long pfn = 0;
-
+	for (vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
 		ret = vaddr_get_pfn(vaddr, prot, &pfn);
 		if (ret)
 			break;
@@ -308,12 +280,24 @@ static long vfio_pin_pages(unsigned long
 			put_pfn(pfn, prot);
 			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
 				__func__, limit << PAGE_SHIFT);
-			break;
+			ret = -ENOMEM;
+			goto unpin_out;
 		}
 	}
 
+out:
 	if (!rsvd)
-		vfio_lock_acct(i);
+		ret = vfio_lock_acct(i, &lock_cap);
+
+unpin_out:
+	if (ret) {
+		if (!rsvd) {
+			for (pfn = *pfn_base ; i ; pfn++, i--)
+				put_pfn(pfn, prot);
+		}
+
+		return ret;
+	}
 
 	return i;
 }
@@ -328,7 +312,7 @@ static long vfio_unpin_pages(unsigned lo
 		unlocked += put_pfn(pfn++, prot);
 
 	if (do_accounting)
-		vfio_lock_acct(-unlocked);
+		vfio_lock_acct(-unlocked, NULL);
 
 	return unlocked;
 }
@@ -390,7 +374,7 @@ static void vfio_unmap_unpin(struct vfio
 		cond_resched();
 	}
 
-	vfio_lock_acct(-unlocked);
+	vfio_lock_acct(-unlocked, NULL);
 }
 
 static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)

  parent reply	other threads:[~2017-05-18 10:48 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-18 10:48 [PATCH 4.4 00/56] 4.4.69-stable review Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 01/56] xen: adjust early dom0 p2m handling to xen hypervisor behavior Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 02/56] target: Fix compare_and_write_callback handling for non GOOD status Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 03/56] target/fileio: Fix zero-length READ and WRITE handling Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 04/56] target: Convert ACL change queue_depth se_session reference usage Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 05/56] iscsi-target: Set session_fall_back_to_erl0 when forcing reinstatement Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 06/56] usb: host: xhci: print correct command ring address Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 07/56] USB: serial: ftdi_sio: add device ID for Microsemi/Arrow SF2PLUS Dev Kit Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 08/56] USB: Proper handling of Race Condition when two USB class drivers try to call init_usb_class simultaneously Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 09/56] staging: vt6656: use off stack for in buffer USB transfers Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 10/56] staging: vt6656: use off stack for out " Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 11/56] staging: gdm724x: gdm_mux: fix use-after-free on module unload Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 12/56] staging: comedi: jr3_pci: fix possible null pointer dereference Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 13/56] staging: comedi: jr3_pci: cope with jiffies wraparound Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 14/56] usb: misc: add missing continue in switch Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 15/56] usb: Make sure usb/phy/of gets built-in Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 16/56] usb: hub: Fix error loop seen after hub communication errors Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 17/56] usb: hub: Do not attempt to autosuspend disconnected devices Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 18/56] usb: misc: legousbtower: Fix buffers on stack Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 19/56] x86/boot: Fix BSS corruption/overwrite bug in early x86 kernel startup Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 20/56] selftests/x86/ldt_gdt_32: Work around a glibc sigaction() bug Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 21/56] x86, pmem: Fix cache flushing for iovec write < 8 bytes Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 22/56] um: Fix PTRACE_POKEUSER on x86_64 Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 24/56] KVM: arm/arm64: fix races in kvm_psci_vcpu_on Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 25/56] block: fix blk_integrity_register to use templates interval_exp if not 0 Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 26/56] crypto: algif_aead - Require setkey before accept(2) Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 27/56] dm era: save spacemap metadata root after the pre-commit Greg Kroah-Hartman
2017-05-18 10:48 ` Greg Kroah-Hartman [this message]
2017-05-18 10:48 ` [PATCH 4.4 29/56] IB/core: Fix sysfs registration error flow Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 30/56] IB/IPoIB: ibX: failed to create mcg debug file Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 31/56] IB/mlx4: Fix ib device initialization error flow Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 32/56] IB/mlx4: Reduce SRIOV multicast cleanup warning message to debug level Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 33/56] ext4: evict inline data when writing to memory map Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 34/56] fs/xattr.c: zero out memory copied to userspace in getxattr Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 35/56] ceph: fix memory leak in __ceph_setxattr() Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 36/56] fs/block_dev: always invalidate cleancache in invalidate_bdev() Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 37/56] Set unicode flag on cifs echo request to avoid Mac error Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 38/56] SMB3: Work around mount failure when using SMB3 dialect to Macs Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 40/56] cifs: fix CIFS_IOC_GET_MNT_INFO oops Greg Kroah-Hartman
2017-05-18 10:48 ` [PATCH 4.4 42/56] padata: free correct variable Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 43/56] arm64: KVM: Fix decoding of Rt/Rt2 when trapping AArch32 CP accesses Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 44/56] serial: samsung: Use right device for DMA-mapping calls Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 45/56] serial: omap: fix runtime-pm handling on unbind Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 46/56] serial: omap: suspend device on probe errors Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 47/56] tty: pty: Fix ldisc flush after userspace become aware of the data already Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 48/56] Bluetooth: Fix user channel for 32bit userspace on 64bit kernel Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 49/56] Bluetooth: hci_bcm: add missing tty-device sanity check Greg Kroah-Hartman
2017-05-24  1:36   ` Ben Hutchings
2017-05-24 10:19     ` Johan Hovold
2017-05-18 10:49 ` [PATCH 4.4 50/56] Bluetooth: hci_intel: " Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 51/56] mac80211: pass RX aggregation window size to driver Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 52/56] mac80211: pass block ack session timeout to " Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 53/56] mac80211: RX BA support for sta max_rx_aggregation_subframes Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 54/56] wlcore: Pass win_size taken from ieee80211_sta to FW Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 55/56] wlcore: Add RX_BA_WIN_SIZE_CHANGE_EVENT event Greg Kroah-Hartman
2017-05-18 10:49 ` [PATCH 4.4 56/56] ipmi: Fix kernel panic at ipmi_ssif_thread() Greg Kroah-Hartman
2017-05-18 17:33 ` [PATCH 4.4 00/56] 4.4.69-stable review Shuah Khan
2017-05-19  1:10 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170518104841.596751047@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=alex.williamson@redhat.com \
    --cc=kwankhede@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterx@redhat.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).