From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: linuxppc-dev@lists.ozlabs.org
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>,
Alex Williamson <alex.williamson@redhat.com>,
Benjamin Herrenschmidt <benh@kernel.crashing.org>,
David Gibson <david@gibson.dropbear.id.au>,
Gavin Shan <gwshan@linux.vnet.ibm.com>,
Paul Mackerras <paulus@samba.org>,
Wei Yang <weiyang@linux.vnet.ibm.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH kernel v12 09/34] vfio: powerpc/spapr: Move locked_vm accounting to helpers
Date: Fri, 5 Jun 2015 16:35:01 +1000 [thread overview]
Message-ID: <1433486126-23551-10-git-send-email-aik@ozlabs.ru> (raw)
In-Reply-To: <1433486126-23551-1-git-send-email-aik@ozlabs.ru>
There moves locked pages accounting to helpers.
Later they will be reused for Dynamic DMA windows (DDW).
This reworks debug messages to show the current value and the limit.
This stores the locked pages number in the container so when unlocking
the iommu table pointer won't be needed. This does not have an effect
now but it will with the multiple tables per container as then we will
allow attaching/detaching groups on fly and we may end up having
a container with no group attached but with the counter incremented.
While we are here, update the comment explaining why RLIMIT_MEMLOCK
might be required to be bigger than the guest RAM. This also prints
pid of the current process in pr_warn/pr_debug.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
Changes:
v12:
* added WARN_ON_ONCE() to decrement_locked_vm() for the sake of documentation
v4:
* new helpers do nothing if @npages == 0
* tce_iommu_disable() now can decrement the counter if the group was
detached (not possible now but will be in the future)
---
drivers/vfio/vfio_iommu_spapr_tce.c | 82 ++++++++++++++++++++++++++++---------
1 file changed, 63 insertions(+), 19 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 64300cc..6e2e15f 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -29,6 +29,51 @@
static void tce_iommu_detach_group(void *iommu_data,
struct iommu_group *iommu_group);
+static long try_increment_locked_vm(long npages)
+{
+ long ret = 0, locked, lock_limit;
+
+ if (!current || !current->mm)
+ return -ESRCH; /* process exited */
+
+ if (!npages)
+ return 0;
+
+ down_write(¤t->mm->mmap_sem);
+ locked = current->mm->locked_vm + npages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ ret = -ENOMEM;
+ else
+ current->mm->locked_vm += npages;
+
+ pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
+ npages << PAGE_SHIFT,
+ current->mm->locked_vm << PAGE_SHIFT,
+ rlimit(RLIMIT_MEMLOCK),
+ ret ? " - exceeded" : "");
+
+ up_write(¤t->mm->mmap_sem);
+
+ return ret;
+}
+
+static void decrement_locked_vm(long npages)
+{
+ if (!current || !current->mm || !npages)
+ return; /* process exited */
+
+ down_write(¤t->mm->mmap_sem);
+ if (WARN_ON_ONCE(npages > current->mm->locked_vm))
+ npages = current->mm->locked_vm;
+ current->mm->locked_vm -= npages;
+ pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
+ npages << PAGE_SHIFT,
+ current->mm->locked_vm << PAGE_SHIFT,
+ rlimit(RLIMIT_MEMLOCK));
+ up_write(¤t->mm->mmap_sem);
+}
+
/*
* VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
*
@@ -45,6 +90,7 @@ struct tce_container {
struct mutex lock;
struct iommu_table *tbl;
bool enabled;
+ unsigned long locked_pages;
};
static bool tce_page_is_contained(struct page *page, unsigned page_shift)
@@ -60,7 +106,7 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
static int tce_iommu_enable(struct tce_container *container)
{
int ret = 0;
- unsigned long locked, lock_limit, npages;
+ unsigned long locked;
struct iommu_table *tbl = container->tbl;
if (!container->tbl)
@@ -89,21 +135,22 @@ static int tce_iommu_enable(struct tce_container *container)
* Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
* that would effectively kill the guest at random points, much better
* enforcing the limit based on the max that the guest can map.
+ *
+ * Unfortunately at the moment it counts whole tables, no matter how
+ * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
+ * each with 2GB DMA window, 8GB will be counted here. The reason for
+ * this is that we cannot tell here the amount of RAM used by the guest
+ * as this information is only available from KVM and VFIO is
+ * KVM agnostic.
*/
- down_write(¤t->mm->mmap_sem);
- npages = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
- locked = current->mm->locked_vm + npages;
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
- pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
- rlimit(RLIMIT_MEMLOCK));
- ret = -ENOMEM;
- } else {
+ locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
+ ret = try_increment_locked_vm(locked);
+ if (ret)
+ return ret;
- current->mm->locked_vm += npages;
- container->enabled = true;
- }
- up_write(¤t->mm->mmap_sem);
+ container->locked_pages = locked;
+
+ container->enabled = true;
return ret;
}
@@ -115,13 +162,10 @@ static void tce_iommu_disable(struct tce_container *container)
container->enabled = false;
- if (!container->tbl || !current->mm)
+ if (!current->mm)
return;
- down_write(¤t->mm->mmap_sem);
- current->mm->locked_vm -= (container->tbl->it_size <<
- container->tbl->it_page_shift) >> PAGE_SHIFT;
- up_write(¤t->mm->mmap_sem);
+ decrement_locked_vm(container->locked_pages);
}
static void *tce_iommu_open(unsigned long arg)
--
2.4.0.rc3.8.gfb3e7d5
next prev parent reply other threads:[~2015-06-05 6:37 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-05 6:34 [PATCH kernel v12 00/34] powerpc/iommu/vfio: Enable Dynamic DMA windows Alexey Kardashevskiy
2015-06-05 6:34 ` [PATCH kernel v12 01/34] powerpc/eeh/ioda2: Use device::iommu_group to check IOMMU group Alexey Kardashevskiy
2015-06-05 6:34 ` [PATCH kernel v12 02/34] powerpc/iommu/powernv: Get rid of set_iommu_table_base_and_group Alexey Kardashevskiy
2015-06-05 6:34 ` [PATCH kernel v12 03/34] powerpc/powernv/ioda: Clean up IOMMU group registration Alexey Kardashevskiy
2015-06-05 6:34 ` [PATCH kernel v12 04/34] powerpc/iommu: Put IOMMU group explicitly Alexey Kardashevskiy
2015-06-05 6:34 ` [PATCH kernel v12 05/34] powerpc/iommu: Always release iommu_table in iommu_free_table() Alexey Kardashevskiy
2015-06-05 6:34 ` [PATCH kernel v12 06/34] vfio: powerpc/spapr: Move page pinning from arch code to VFIO IOMMU driver Alexey Kardashevskiy
2015-06-05 6:34 ` [PATCH kernel v12 07/34] vfio: powerpc/spapr: Check that IOMMU page is fully contained by system page Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 08/34] vfio: powerpc/spapr: Use it_page_size Alexey Kardashevskiy
2015-06-05 6:35 ` Alexey Kardashevskiy [this message]
2015-06-09 4:22 ` [PATCH kernel v12 09/34] vfio: powerpc/spapr: Move locked_vm accounting to helpers David Gibson
2015-06-05 6:35 ` [PATCH kernel v12 10/34] vfio: powerpc/spapr: Disable DMA mappings on disabled container Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 11/34] vfio: powerpc/spapr: Moving pinning/unpinning to helpers Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 12/34] vfio: powerpc/spapr: Rework groups attaching Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 13/34] powerpc/powernv: Do not set "read" flag if direction==DMA_NONE Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 14/34] powerpc/iommu: Move tce_xxx callbacks from ppc_md to iommu_table Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 15/34] powerpc/powernv/ioda/ioda2: Rework TCE invalidation in tce_build()/tce_free() Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 16/34] powerpc/spapr: vfio: Replace iommu_table with iommu_table_group Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 17/34] powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group Alexey Kardashevskiy
2015-06-09 2:36 ` David Gibson
2015-06-09 5:59 ` [PATCH kernel] powerpc/pseries: Fix compile error when CONFIG_IOMMU_API if off Alexey Kardashevskiy
2015-06-09 12:23 ` [PATCH kernel v12 17/34] powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group Michael Ellerman
2015-06-10 3:08 ` [PATCH kernel] powerpc/powernv: Fix crash when CONFIG_IOMMU_API is off Alexey Kardashevskiy
2015-06-10 7:33 ` [kernel, v12, 17/34] powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group Michael Ellerman
2015-06-11 4:28 ` [PATCH kernel v12.2] powerpc/powernv: Fix crash when CONFIG_IOMMU_API is off Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 18/34] vfio: powerpc/spapr/iommu/powernv/ioda2: Rework IOMMU ownership control Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 19/34] powerpc/iommu: Fix IOMMU ownership control functions Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 20/34] powerpc/powernv/ioda2: Move TCE kill register address to PE Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 21/34] powerpc/powernv/ioda2: Add TCE invalidation for all attached groups Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 22/34] powerpc/powernv: Implement accessor to TCE entry Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 23/34] powerpc/iommu/powernv: Release replaced TCE Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 24/34] powerpc/powernv/ioda2: Rework iommu_table creation Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 25/34] powerpc/powernv/ioda2: Introduce helpers to allocate TCE pages Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 26/34] powerpc/powernv/ioda2: Introduce pnv_pci_ioda2_set_window Alexey Kardashevskiy
2015-06-09 4:23 ` David Gibson
2015-06-05 6:35 ` [PATCH kernel v12 27/34] powerpc/powernv: Implement multilevel TCE tables Alexey Kardashevskiy
2015-06-09 3:57 ` David Gibson
2015-06-05 6:35 ` [PATCH kernel v12 28/34] vfio: powerpc/spapr: powerpc/powernv/ioda: Define and implement DMA windows API Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 29/34] powerpc/powernv/ioda2: Use new helpers to do proper cleanup on PE release Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 30/34] powerpc/iommu/ioda2: Add get_table_size() to calculate the size of future table Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 31/34] vfio: powerpc/spapr: powerpc/powernv/ioda2: Use DMA windows API in ownership control Alexey Kardashevskiy
2015-06-05 6:35 ` [PATCH kernel v12 32/34] powerpc/mmu: Add userspace-to-physical addresses translation cache Alexey Kardashevskiy
2015-06-09 4:05 ` David Gibson
2015-06-05 6:35 ` [PATCH kernel v12 33/34] vfio: powerpc/spapr: Register memory and define IOMMU v2 Alexey Kardashevskiy
2015-06-09 4:21 ` David Gibson
2015-06-05 6:35 ` [PATCH kernel v12 34/34] vfio: powerpc/spapr: Support Dynamic DMA windows Alexey Kardashevskiy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1433486126-23551-10-git-send-email-aik@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=alex.williamson@redhat.com \
--cc=benh@kernel.crashing.org \
--cc=david@gibson.dropbear.id.au \
--cc=gwshan@linux.vnet.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=paulus@samba.org \
--cc=weiyang@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).