From: Jike Song <jike.song@intel.com>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: Kirti Wankhede <kwankhede@nvidia.com>,
pbonzini@redhat.com, kraxel@redhat.com, cjia@nvidia.com,
qemu-devel@nongnu.org, kvm@vger.kernel.org, kevin.tian@intel.com,
bjsdjshi@linux.vnet.ibm.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v11 10/22] vfio iommu type1: Add support for mediated devices
Date: Tue, 08 Nov 2016 10:20:14 +0800 [thread overview]
Message-ID: <5821365E.6020304@intel.com> (raw)
In-Reply-To: <20161107161619.66e03d8f@t450s.home>
On 11/08/2016 07:16 AM, Alex Williamson wrote:
> On Sat, 5 Nov 2016 02:40:44 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
>
>> VFIO IOMMU drivers are designed for the devices which are IOMMU capable.
>> Mediated device only uses IOMMU APIs, the underlying hardware can be
>> managed by an IOMMU domain.
>>
>> Aim of this change is:
>> - To use most of the code of TYPE1 IOMMU driver for mediated devices
>> - To support direct assigned device and mediated device in single module
>>
>> This change adds pin and unpin support for mediated device to TYPE1 IOMMU
>> backend module. More details:
>> - vfio_pin_pages() callback here uses task and address space of vfio_dma,
>> that is, of the process who mapped that iova range.
>> - Added pfn_list tracking logic to address space structure. All pages
>> pinned through this interface are trached in its address space.
> ^ k
> ------------------------------------------|
>
>> - Pinned pages list is used to verify unpinning request and to unpin
>> remaining pages while detaching the group for that device.
>> - Page accounting is updated to account in its address space where the
>> pages are pinned/unpinned.
>> - Accouting for mdev device is only done if there is no iommu capable
>> domain in the container. When there is a direct device assigned to the
>> container and that domain is iommu capable, all pages are already pinned
>> during DMA_MAP.
>> - Page accouting is updated on hot plug and unplug mdev device and pass
>> through device.
>>
>> Tested by assigning below combinations of devices to a single VM:
>> - GPU pass through only
>> - vGPU device only
>> - One GPU pass through and one vGPU device
>> - Linux VM hot plug and unplug vGPU device while GPU pass through device
>> exist
>> - Linux VM hot plug and unplug GPU pass through device while vGPU device
>> exist
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Signed-off-by: Neo Jia <cjia@nvidia.com>
>> Change-Id: I295d6f0f2e0579b8d9882bfd8fd5a4194b97bd9a
>> ---
>> drivers/vfio/vfio_iommu_type1.c | 538 +++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 500 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index 8d64528dcc22..e511073446a0 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -36,6 +36,7 @@
>> #include <linux/uaccess.h>
>> #include <linux/vfio.h>
>> #include <linux/workqueue.h>
>> +#include <linux/mdev.h>
>>
>> #define DRIVER_VERSION "0.2"
>> #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
>> @@ -56,6 +57,7 @@ MODULE_PARM_DESC(disable_hugepages,
>> struct vfio_iommu {
>> struct list_head domain_list;
>> struct list_head addr_space_list;
>> + struct vfio_domain *external_domain; /* domain for external user */
>> struct mutex lock;
>> struct rb_root dma_list;
>> bool v2;
>> @@ -67,6 +69,9 @@ struct vfio_addr_space {
>> struct mm_struct *mm;
>> struct list_head next;
>> atomic_t ref_count;
>> + /* external user pinned pfns */
>> + struct rb_root pfn_list; /* pinned Host pfn list */
>> + struct mutex pfn_list_lock; /* mutex for pfn_list */
>> };
>>
>> struct vfio_domain {
>> @@ -83,6 +88,7 @@ struct vfio_dma {
>> unsigned long vaddr; /* Process virtual addr */
>> size_t size; /* Map size (bytes) */
>> int prot; /* IOMMU_READ/WRITE */
>> + bool iommu_mapped;
>> struct vfio_addr_space *addr_space;
>> struct task_struct *task;
>> bool mlock_cap;
>> @@ -94,6 +100,19 @@ struct vfio_group {
>> };
>>
>> /*
>> + * Guest RAM pinning working set or DMA target
>> + */
>> +struct vfio_pfn {
>> + struct rb_node node;
>> + unsigned long pfn; /* Host pfn */
>> + int prot;
>> + atomic_t ref_count;
>> +};
>> +
>> +#define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
>> + (!list_empty(&iommu->domain_list))
>> +
>> +/*
>> * This code handles mapping and unmapping of user data buffers
>> * into DMA'ble space using the IOMMU
>> */
>> @@ -153,6 +172,93 @@ static struct vfio_addr_space *vfio_find_addr_space(struct vfio_iommu *iommu,
>> return NULL;
>> }
>>
>> +/*
>> + * Helper Functions for host pfn list
>> + */
>> +static struct vfio_pfn *vfio_find_pfn(struct vfio_addr_space *addr_space,
>> + unsigned long pfn)
>> +{
>> + struct vfio_pfn *vpfn;
>> + struct rb_node *node = addr_space->pfn_list.rb_node;
>> +
>> + while (node) {
>> + vpfn = rb_entry(node, struct vfio_pfn, node);
>> +
>> + if (pfn < vpfn->pfn)
>> + node = node->rb_left;
>> + else if (pfn > vpfn->pfn)
>> + node = node->rb_right;
>> + else
>> + return vpfn;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +static void vfio_link_pfn(struct vfio_addr_space *addr_space,
>> + struct vfio_pfn *new)
>> +{
>> + struct rb_node **link, *parent = NULL;
>> + struct vfio_pfn *vpfn;
>> +
>> + link = &addr_space->pfn_list.rb_node;
>> + while (*link) {
>> + parent = *link;
>> + vpfn = rb_entry(parent, struct vfio_pfn, node);
>> +
>> + if (new->pfn < vpfn->pfn)
>> + link = &(*link)->rb_left;
>> + else
>> + link = &(*link)->rb_right;
>> + }
>> +
>> + rb_link_node(&new->node, parent, link);
>> + rb_insert_color(&new->node, &addr_space->pfn_list);
>> +}
>> +
>> +static void vfio_unlink_pfn(struct vfio_addr_space *addr_space,
>> + struct vfio_pfn *old)
>> +{
>> + rb_erase(&old->node, &addr_space->pfn_list);
>> +}
>> +
>> +static int vfio_add_to_pfn_list(struct vfio_addr_space *addr_space,
>> + unsigned long pfn, int prot)
>> +{
>> + struct vfio_pfn *vpfn;
>> +
>> + vpfn = kzalloc(sizeof(*vpfn), GFP_KERNEL);
>> + if (!vpfn)
>> + return -ENOMEM;
>> +
>> + vpfn->pfn = pfn;
>> + vpfn->prot = prot;
>> + atomic_set(&vpfn->ref_count, 1);
>> + vfio_link_pfn(addr_space, vpfn);
>> + return 0;
>> +}
>> +
>> +static void vfio_remove_from_pfn_list(struct vfio_addr_space *addr_space,
>> + struct vfio_pfn *vpfn)
>> +{
>> + vfio_unlink_pfn(addr_space, vpfn);
>> + kfree(vpfn);
>> +}
>> +
>> +static int vfio_pfn_account(struct vfio_addr_space *addr_space,
>> + unsigned long pfn)
>> +{
>> + struct vfio_pfn *p;
>> + int ret = 1;
>> +
>> + mutex_lock(&addr_space->pfn_list_lock);
>> + p = vfio_find_pfn(addr_space, pfn);
>> + if (p)
>> + ret = 0;
>> + mutex_unlock(&addr_space->pfn_list_lock);
>> + return ret;
>> +}
>> +
>> struct vwork {
>> struct mm_struct *mm;
>> long npage;
>> @@ -304,16 +410,18 @@ static long __vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>> unsigned long limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>> bool lock_cap = dma->mlock_cap;
>> struct mm_struct *mm = dma->addr_space->mm;
>> - long ret, i;
>> + long ret, i, lock_acct;
>> bool rsvd;
>>
>> ret = vaddr_get_pfn(mm, vaddr, prot, pfn_base);
>> if (ret)
>> return ret;
>>
>> + lock_acct = vfio_pfn_account(dma->addr_space, *pfn_base);
>> +
>> rsvd = is_invalid_reserved_pfn(*pfn_base);
>>
>> - if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) {
>> + if (!rsvd && !lock_cap && mm->locked_vm + lock_acct > limit) {
>> put_pfn(*pfn_base, prot);
>> pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
>> limit << PAGE_SHIFT);
>> @@ -340,8 +448,10 @@ static long __vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>> break;
>> }
>>
>> + lock_acct += vfio_pfn_account(dma->addr_space, pfn);
>> +
>> if (!rsvd && !lock_cap &&
>> - mm->locked_vm + i + 1 > limit) {
>> + mm->locked_vm + lock_acct + 1 > limit) {
>> put_pfn(pfn, prot);
>> pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
>> __func__, limit << PAGE_SHIFT);
>> @@ -350,7 +460,7 @@ static long __vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>> }
>>
>> if (!rsvd)
>> - vfio_lock_acct(mm, i);
>> + vfio_lock_acct(mm, lock_acct);
>>
>> return i;
>> }
>> @@ -370,14 +480,214 @@ static long __vfio_unpin_pages_remote(struct vfio_dma *dma, unsigned long pfn,
>> return unlocked;
>> }
>>
>> -static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
>> +static int __vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
>> + int prot, unsigned long *pfn_base,
>> + bool do_accounting)
>> +{
>> + struct task_struct *task = dma->task;
>> + unsigned long limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>> + bool lock_cap = dma->mlock_cap;
>> + struct mm_struct *mm = dma->addr_space->mm;
>> + int ret;
>> + bool rsvd;
>> +
>> + ret = vaddr_get_pfn(mm, vaddr, prot, pfn_base);
>> + if (ret)
>> + return ret;
>> +
>> + rsvd = is_invalid_reserved_pfn(*pfn_base);
>> +
>> + if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) {
>> + put_pfn(*pfn_base, prot);
>> + pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK (%ld) exceeded\n",
>> + __func__, task->comm, task_pid_nr(task),
>> + limit << PAGE_SHIFT);
>> + return -ENOMEM;
>> + }
>> +
>> + if (!rsvd && do_accounting)
>> + vfio_lock_acct(mm, 1);
>> +
>> + return 1;
>> +}
>> +
>> +static void __vfio_unpin_page_external(struct vfio_addr_space *addr_space,
>> + unsigned long pfn, int prot,
>> + bool do_accounting)
>> +{
>> + put_pfn(pfn, prot);
>> +
>> + if (do_accounting)
>> + vfio_lock_acct(addr_space->mm, -1);
>
> Can't we batch this like we do elsewhere? Intel folks, AIUI you intend
> to pin all VM memory through this side channel, have you tested the
> scalability and performance of this with larger VMs? Our vfio_pfn
> data structure alone is 40 bytes per pinned page, which means for
> each 1GB of VM memory, we have 10MBs worth of struct vfio_pfn!
> Additionally, unmapping each 1GB of VM memory will result in 256k
> separate vfio_lock_acct() callbacks. I'm concerned that we're not
> being efficient enough in either space or time.
Hi Alex,
Sorry for being confusing, Intel vGPU actually doesn't necessarily need
to pin all guest memory. A vGPU has its page table (GTT), whose access
is trapped. Whenever guest driver wants to specify a page for DMA, it
writes the GTT entry - thereby we could know the event and pin that
page only.
Performance data will be shared once available. Thanks :)
--
Thanks,
Jike
WARNING: multiple messages have this Message-ID (diff)
From: Jike Song <jike.song@intel.com>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: Kirti Wankhede <kwankhede@nvidia.com>,
pbonzini@redhat.com, kraxel@redhat.com, cjia@nvidia.com,
qemu-devel@nongnu.org, kvm@vger.kernel.org, kevin.tian@intel.com,
bjsdjshi@linux.vnet.ibm.com, linux-kernel@vger.kernel.org
Subject: Re: [Qemu-devel] [PATCH v11 10/22] vfio iommu type1: Add support for mediated devices
Date: Tue, 08 Nov 2016 10:20:14 +0800 [thread overview]
Message-ID: <5821365E.6020304@intel.com> (raw)
In-Reply-To: <20161107161619.66e03d8f@t450s.home>
On 11/08/2016 07:16 AM, Alex Williamson wrote:
> On Sat, 5 Nov 2016 02:40:44 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
>
>> VFIO IOMMU drivers are designed for the devices which are IOMMU capable.
>> Mediated device only uses IOMMU APIs, the underlying hardware can be
>> managed by an IOMMU domain.
>>
>> Aim of this change is:
>> - To use most of the code of TYPE1 IOMMU driver for mediated devices
>> - To support direct assigned device and mediated device in single module
>>
>> This change adds pin and unpin support for mediated device to TYPE1 IOMMU
>> backend module. More details:
>> - vfio_pin_pages() callback here uses task and address space of vfio_dma,
>> that is, of the process who mapped that iova range.
>> - Added pfn_list tracking logic to address space structure. All pages
>> pinned through this interface are trached in its address space.
> ^ k
> ------------------------------------------|
>
>> - Pinned pages list is used to verify unpinning request and to unpin
>> remaining pages while detaching the group for that device.
>> - Page accounting is updated to account in its address space where the
>> pages are pinned/unpinned.
>> - Accouting for mdev device is only done if there is no iommu capable
>> domain in the container. When there is a direct device assigned to the
>> container and that domain is iommu capable, all pages are already pinned
>> during DMA_MAP.
>> - Page accouting is updated on hot plug and unplug mdev device and pass
>> through device.
>>
>> Tested by assigning below combinations of devices to a single VM:
>> - GPU pass through only
>> - vGPU device only
>> - One GPU pass through and one vGPU device
>> - Linux VM hot plug and unplug vGPU device while GPU pass through device
>> exist
>> - Linux VM hot plug and unplug GPU pass through device while vGPU device
>> exist
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Signed-off-by: Neo Jia <cjia@nvidia.com>
>> Change-Id: I295d6f0f2e0579b8d9882bfd8fd5a4194b97bd9a
>> ---
>> drivers/vfio/vfio_iommu_type1.c | 538 +++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 500 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index 8d64528dcc22..e511073446a0 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -36,6 +36,7 @@
>> #include <linux/uaccess.h>
>> #include <linux/vfio.h>
>> #include <linux/workqueue.h>
>> +#include <linux/mdev.h>
>>
>> #define DRIVER_VERSION "0.2"
>> #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
>> @@ -56,6 +57,7 @@ MODULE_PARM_DESC(disable_hugepages,
>> struct vfio_iommu {
>> struct list_head domain_list;
>> struct list_head addr_space_list;
>> + struct vfio_domain *external_domain; /* domain for external user */
>> struct mutex lock;
>> struct rb_root dma_list;
>> bool v2;
>> @@ -67,6 +69,9 @@ struct vfio_addr_space {
>> struct mm_struct *mm;
>> struct list_head next;
>> atomic_t ref_count;
>> + /* external user pinned pfns */
>> + struct rb_root pfn_list; /* pinned Host pfn list */
>> + struct mutex pfn_list_lock; /* mutex for pfn_list */
>> };
>>
>> struct vfio_domain {
>> @@ -83,6 +88,7 @@ struct vfio_dma {
>> unsigned long vaddr; /* Process virtual addr */
>> size_t size; /* Map size (bytes) */
>> int prot; /* IOMMU_READ/WRITE */
>> + bool iommu_mapped;
>> struct vfio_addr_space *addr_space;
>> struct task_struct *task;
>> bool mlock_cap;
>> @@ -94,6 +100,19 @@ struct vfio_group {
>> };
>>
>> /*
>> + * Guest RAM pinning working set or DMA target
>> + */
>> +struct vfio_pfn {
>> + struct rb_node node;
>> + unsigned long pfn; /* Host pfn */
>> + int prot;
>> + atomic_t ref_count;
>> +};
>> +
>> +#define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
>> + (!list_empty(&iommu->domain_list))
>> +
>> +/*
>> * This code handles mapping and unmapping of user data buffers
>> * into DMA'ble space using the IOMMU
>> */
>> @@ -153,6 +172,93 @@ static struct vfio_addr_space *vfio_find_addr_space(struct vfio_iommu *iommu,
>> return NULL;
>> }
>>
>> +/*
>> + * Helper Functions for host pfn list
>> + */
>> +static struct vfio_pfn *vfio_find_pfn(struct vfio_addr_space *addr_space,
>> + unsigned long pfn)
>> +{
>> + struct vfio_pfn *vpfn;
>> + struct rb_node *node = addr_space->pfn_list.rb_node;
>> +
>> + while (node) {
>> + vpfn = rb_entry(node, struct vfio_pfn, node);
>> +
>> + if (pfn < vpfn->pfn)
>> + node = node->rb_left;
>> + else if (pfn > vpfn->pfn)
>> + node = node->rb_right;
>> + else
>> + return vpfn;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +static void vfio_link_pfn(struct vfio_addr_space *addr_space,
>> + struct vfio_pfn *new)
>> +{
>> + struct rb_node **link, *parent = NULL;
>> + struct vfio_pfn *vpfn;
>> +
>> + link = &addr_space->pfn_list.rb_node;
>> + while (*link) {
>> + parent = *link;
>> + vpfn = rb_entry(parent, struct vfio_pfn, node);
>> +
>> + if (new->pfn < vpfn->pfn)
>> + link = &(*link)->rb_left;
>> + else
>> + link = &(*link)->rb_right;
>> + }
>> +
>> + rb_link_node(&new->node, parent, link);
>> + rb_insert_color(&new->node, &addr_space->pfn_list);
>> +}
>> +
>> +static void vfio_unlink_pfn(struct vfio_addr_space *addr_space,
>> + struct vfio_pfn *old)
>> +{
>> + rb_erase(&old->node, &addr_space->pfn_list);
>> +}
>> +
>> +static int vfio_add_to_pfn_list(struct vfio_addr_space *addr_space,
>> + unsigned long pfn, int prot)
>> +{
>> + struct vfio_pfn *vpfn;
>> +
>> + vpfn = kzalloc(sizeof(*vpfn), GFP_KERNEL);
>> + if (!vpfn)
>> + return -ENOMEM;
>> +
>> + vpfn->pfn = pfn;
>> + vpfn->prot = prot;
>> + atomic_set(&vpfn->ref_count, 1);
>> + vfio_link_pfn(addr_space, vpfn);
>> + return 0;
>> +}
>> +
>> +static void vfio_remove_from_pfn_list(struct vfio_addr_space *addr_space,
>> + struct vfio_pfn *vpfn)
>> +{
>> + vfio_unlink_pfn(addr_space, vpfn);
>> + kfree(vpfn);
>> +}
>> +
>> +static int vfio_pfn_account(struct vfio_addr_space *addr_space,
>> + unsigned long pfn)
>> +{
>> + struct vfio_pfn *p;
>> + int ret = 1;
>> +
>> + mutex_lock(&addr_space->pfn_list_lock);
>> + p = vfio_find_pfn(addr_space, pfn);
>> + if (p)
>> + ret = 0;
>> + mutex_unlock(&addr_space->pfn_list_lock);
>> + return ret;
>> +}
>> +
>> struct vwork {
>> struct mm_struct *mm;
>> long npage;
>> @@ -304,16 +410,18 @@ static long __vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>> unsigned long limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>> bool lock_cap = dma->mlock_cap;
>> struct mm_struct *mm = dma->addr_space->mm;
>> - long ret, i;
>> + long ret, i, lock_acct;
>> bool rsvd;
>>
>> ret = vaddr_get_pfn(mm, vaddr, prot, pfn_base);
>> if (ret)
>> return ret;
>>
>> + lock_acct = vfio_pfn_account(dma->addr_space, *pfn_base);
>> +
>> rsvd = is_invalid_reserved_pfn(*pfn_base);
>>
>> - if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) {
>> + if (!rsvd && !lock_cap && mm->locked_vm + lock_acct > limit) {
>> put_pfn(*pfn_base, prot);
>> pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
>> limit << PAGE_SHIFT);
>> @@ -340,8 +448,10 @@ static long __vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>> break;
>> }
>>
>> + lock_acct += vfio_pfn_account(dma->addr_space, pfn);
>> +
>> if (!rsvd && !lock_cap &&
>> - mm->locked_vm + i + 1 > limit) {
>> + mm->locked_vm + lock_acct + 1 > limit) {
>> put_pfn(pfn, prot);
>> pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
>> __func__, limit << PAGE_SHIFT);
>> @@ -350,7 +460,7 @@ static long __vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>> }
>>
>> if (!rsvd)
>> - vfio_lock_acct(mm, i);
>> + vfio_lock_acct(mm, lock_acct);
>>
>> return i;
>> }
>> @@ -370,14 +480,214 @@ static long __vfio_unpin_pages_remote(struct vfio_dma *dma, unsigned long pfn,
>> return unlocked;
>> }
>>
>> -static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
>> +static int __vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
>> + int prot, unsigned long *pfn_base,
>> + bool do_accounting)
>> +{
>> + struct task_struct *task = dma->task;
>> + unsigned long limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>> + bool lock_cap = dma->mlock_cap;
>> + struct mm_struct *mm = dma->addr_space->mm;
>> + int ret;
>> + bool rsvd;
>> +
>> + ret = vaddr_get_pfn(mm, vaddr, prot, pfn_base);
>> + if (ret)
>> + return ret;
>> +
>> + rsvd = is_invalid_reserved_pfn(*pfn_base);
>> +
>> + if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) {
>> + put_pfn(*pfn_base, prot);
>> + pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK (%ld) exceeded\n",
>> + __func__, task->comm, task_pid_nr(task),
>> + limit << PAGE_SHIFT);
>> + return -ENOMEM;
>> + }
>> +
>> + if (!rsvd && do_accounting)
>> + vfio_lock_acct(mm, 1);
>> +
>> + return 1;
>> +}
>> +
>> +static void __vfio_unpin_page_external(struct vfio_addr_space *addr_space,
>> + unsigned long pfn, int prot,
>> + bool do_accounting)
>> +{
>> + put_pfn(pfn, prot);
>> +
>> + if (do_accounting)
>> + vfio_lock_acct(addr_space->mm, -1);
>
> Can't we batch this like we do elsewhere? Intel folks, AIUI you intend
> to pin all VM memory through this side channel, have you tested the
> scalability and performance of this with larger VMs? Our vfio_pfn
> data structure alone is 40 bytes per pinned page, which means for
> each 1GB of VM memory, we have 10MBs worth of struct vfio_pfn!
> Additionally, unmapping each 1GB of VM memory will result in 256k
> separate vfio_lock_acct() callbacks. I'm concerned that we're not
> being efficient enough in either space or time.
Hi Alex,
Sorry for being confusing, Intel vGPU actually doesn't necessarily need
to pin all guest memory. A vGPU has its page table (GTT), whose access
is trapped. Whenever guest driver wants to specify a page for DMA, it
writes the GTT entry - thereby we could know the event and pin that
page only.
Performance data will be shared once available. Thanks :)
--
Thanks,
Jike
next prev parent reply other threads:[~2016-11-08 2:20 UTC|newest]
Thread overview: 149+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-04 21:10 [PATCH v11 00/22] Add Mediated device support Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 01/22] vfio: Mediated device Core driver Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 6:40 ` Tian, Kevin
2016-11-07 6:40 ` [Qemu-devel] " Tian, Kevin
2016-11-08 9:25 ` Dong Jia Shi
2016-11-08 9:25 ` [Qemu-devel] " Dong Jia Shi
2016-11-08 21:06 ` Kirti Wankhede
2016-11-08 21:06 ` [Qemu-devel] " Kirti Wankhede
2016-11-09 1:09 ` Dong Jia Shi
2016-11-09 1:09 ` [Qemu-devel] " Dong Jia Shi
2016-11-04 21:10 ` [PATCH v11 02/22] vfio: VFIO based driver for Mediated devices Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-09 3:25 ` Dong Jia Shi
2016-11-09 3:25 ` Dong Jia Shi
2016-11-04 21:10 ` [PATCH v11 03/22] vfio: Rearrange functions to get vfio_group from dev Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 04/22] vfio: Common function to increment container_users Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 05/22] vfio iommu: Added pin and unpin callback functions to vfio_iommu_driver_ops Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 19:36 ` Alex Williamson
2016-11-07 19:36 ` [Qemu-devel] " Alex Williamson
2016-11-08 13:55 ` Kirti Wankhede
2016-11-08 13:55 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 16:39 ` Alex Williamson
2016-11-08 16:39 ` [Qemu-devel] " Alex Williamson
2016-11-08 18:47 ` Kirti Wankhede
2016-11-08 18:47 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 19:14 ` Alex Williamson
2016-11-08 19:14 ` [Qemu-devel] " Alex Williamson
2016-11-04 21:10 ` [PATCH v11 06/22] vfio iommu type1: Update arguments of vfio_lock_acct Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 07/22] vfio iommu type1: Update argument of vaddr_get_pfn() Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 8:42 ` Alexey Kardashevskiy
2016-11-07 8:42 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-04 21:10 ` [PATCH v11 08/22] vfio iommu type1: Add find_iommu_group() function Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-10 7:29 ` Dong Jia Shi
2016-11-10 7:29 ` [Qemu-devel] " Dong Jia Shi
2016-11-04 21:10 ` [PATCH v11 09/22] vfio iommu type1: Add task structure to vfio_dma Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 21:03 ` Alex Williamson
2016-11-07 21:03 ` [Qemu-devel] " Alex Williamson
2016-11-08 14:13 ` Kirti Wankhede
2016-11-08 14:13 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 16:43 ` Alex Williamson
2016-11-08 16:43 ` [Qemu-devel] " Alex Williamson
2016-11-10 8:24 ` Dong Jia Shi
2016-11-10 8:24 ` Dong Jia Shi
2016-11-04 21:10 ` [PATCH v11 10/22] vfio iommu type1: Add support for mediated devices Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 23:16 ` Alex Williamson
2016-11-07 23:16 ` [Qemu-devel] " Alex Williamson
2016-11-08 2:20 ` Jike Song [this message]
2016-11-08 2:20 ` Jike Song
2016-11-08 16:18 ` Alex Williamson
2016-11-08 16:18 ` [Qemu-devel] " Alex Williamson
2016-11-08 15:06 ` Kirti Wankhede
2016-11-08 15:06 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 17:05 ` Alex Williamson
2016-11-08 17:05 ` [Qemu-devel] " Alex Williamson
2016-11-08 6:52 ` Alexey Kardashevskiy
2016-11-08 6:52 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-15 5:17 ` Alexey Kardashevskiy
2016-11-15 5:17 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-15 6:33 ` Kirti Wankhede
2016-11-15 6:33 ` [Qemu-devel] " Kirti Wankhede
2016-11-15 7:27 ` Alexey Kardashevskiy
2016-11-15 7:27 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-15 7:56 ` Kirti Wankhede
2016-11-15 7:56 ` [Qemu-devel] " Kirti Wankhede
2016-11-14 2:49 ` Dong Jia Shi
2016-11-14 2:49 ` Dong Jia Shi
2016-11-04 21:10 ` [PATCH v11 11/22] vfio iommu: Add blocking notifier to notify DMA_UNMAP Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 23:45 ` Alex Williamson
2016-11-07 23:45 ` [Qemu-devel] " Alex Williamson
2016-11-08 16:26 ` Kirti Wankhede
2016-11-08 16:26 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 17:46 ` Alex Williamson
2016-11-08 17:46 ` [Qemu-devel] " Alex Williamson
2016-11-08 19:59 ` Kirti Wankhede
2016-11-08 19:59 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 21:28 ` Alex Williamson
2016-11-08 21:28 ` [Qemu-devel] " Alex Williamson
2016-11-14 7:52 ` Kirti Wankhede
2016-11-14 7:52 ` [Qemu-devel] " Kirti Wankhede
2016-11-14 7:52 ` Kirti Wankhede
2016-11-14 15:37 ` Alex Williamson
2016-11-14 15:37 ` [Qemu-devel] " Alex Williamson
2016-11-04 21:10 ` [PATCH v11 12/22] vfio: Add notifier callback to parent's ops structure of mdev Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 23:51 ` Alex Williamson
2016-11-07 23:51 ` [Qemu-devel] " Alex Williamson
2016-11-04 21:10 ` [PATCH v11 13/22] vfio: Introduce common function to add capabilities Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 7:29 ` Alexey Kardashevskiy
2016-11-08 7:29 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-08 20:46 ` Kirti Wankhede
2016-11-08 20:46 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 21:42 ` Alex Williamson
2016-11-08 21:42 ` [Qemu-devel] " Alex Williamson
2016-11-09 2:23 ` Alexey Kardashevskiy
2016-11-09 2:23 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-04 21:10 ` [PATCH v11 14/22] vfio_pci: Update vfio_pci to use vfio_info_add_capability() Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 15/22] vfio: Introduce vfio_set_irqs_validate_and_prepare() Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 8:46 ` Alexey Kardashevskiy
2016-11-08 8:46 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-08 20:22 ` Kirti Wankhede
2016-11-08 20:22 ` [Qemu-devel] " Kirti Wankhede
2016-11-09 3:07 ` Alexey Kardashevskiy
2016-11-09 3:07 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-09 3:35 ` Alex Williamson
2016-11-09 3:35 ` [Qemu-devel] " Alex Williamson
2016-11-04 21:10 ` [PATCH v11 16/22] vfio_pci: Updated to use vfio_set_irqs_validate_and_prepare() Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 17/22] vfio_platform: " Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-08 8:52 ` Alexey Kardashevskiy
2016-11-08 8:52 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-08 20:41 ` Kirti Wankhede
2016-11-08 20:41 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 18/22] vfio: Define device_api strings Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 19/22] docs: Add Documentation for Mediated devices Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 20/22] docs: Sysfs ABI for mediated device framework Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 21/22] docs: Sample driver to demonstrate how to use Mediated " Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-04 21:10 ` [PATCH v11 22/22] MAINTAINERS: Add entry VFIO based Mediated device drivers Kirti Wankhede
2016-11-04 21:10 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 3:30 ` [PATCH v11 00/22] Add Mediated device support Alexey Kardashevskiy
2016-11-07 3:30 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-07 3:59 ` Kirti Wankhede
2016-11-07 3:59 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 5:06 ` Kirti Wankhede
2016-11-07 5:06 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 6:15 ` Alexey Kardashevskiy
2016-11-07 6:15 ` [Qemu-devel] " Alexey Kardashevskiy
2016-11-07 6:36 ` Kirti Wankhede
2016-11-07 6:36 ` [Qemu-devel] " Kirti Wankhede
2016-11-07 6:46 ` Alexey Kardashevskiy
2016-11-07 6:46 ` [Qemu-devel] " Alexey Kardashevskiy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5821365E.6020304@intel.com \
--to=jike.song@intel.com \
--cc=alex.williamson@redhat.com \
--cc=bjsdjshi@linux.vnet.ibm.com \
--cc=cjia@nvidia.com \
--cc=kevin.tian@intel.com \
--cc=kraxel@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=kwankhede@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.