* [RFC] Balloon support for device assignment
@ 2010-02-17 9:43 borove
2010-02-17 9:43 ` [RFC] KVM: " borove
0 siblings, 1 reply; 8+ messages in thread
From: borove @ 2010-02-17 9:43 UTC (permalink / raw)
To: kvm; +Cc: borove, benami
Currently device assignment forces pinning the entire guest memory. The following kernel and qemu patches add
balloon support for device assignment. When the balloon inflates, the corresponding pages are unmapped from the IOMMU and unpinned, and accordingly they are remapped and pinned when the balloon deflates.
The kernel patch applies to tag v2.6.32
Comments appreciated.
Regards,
Eran.
^ permalink raw reply [flat|nested] 8+ messages in thread
* [RFC] KVM: Balloon support for device assignment
2010-02-17 9:43 [RFC] Balloon support for device assignment borove
@ 2010-02-17 9:43 ` borove
2010-02-17 9:43 ` [RFC] QEMU: " borove
2010-02-17 10:27 ` [RFC] KVM: " Avi Kivity
0 siblings, 2 replies; 8+ messages in thread
From: borove @ 2010-02-17 9:43 UTC (permalink / raw)
To: kvm; +Cc: borove, benami
From: Eran Borovik <borove@il.ibm.com>
This patch adds modifications to allow correct
balloon operation when a virtual guest uses a direct assigned device.
The modifications include a new interface between qemu and kvm to allow
mapping and unmapping the pages from the IOMMU as well as pinning and unpinning as needed.
Signed-off-by: Eran Borovik <borove@il.ibm.com>
---
include/linux/kvm.h | 3 ++
include/linux/kvm_host.h | 4 ++
virt/kvm/iommu.c | 86 +++++++++++++++++++++++++++++++++++++++++++--
virt/kvm/kvm_main.c | 9 +++++
4 files changed, 98 insertions(+), 4 deletions(-)
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f8f8900..567f5f8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -514,6 +514,9 @@ struct kvm_irqfd {
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
+#define KVM_IOMMU_UNMAP_PAGE _IOW(KVMIO, 0x49, __u64)
+#define KVM_IOMMU_MAP_PAGE _IOW(KVMIO, 0x50, __u64)
+
/* Device model IOC */
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b7bbb5d..ad904ec 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -411,6 +411,10 @@ int kvm_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev);
int kvm_deassign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev);
+void kvm_iommu_unmap_page(struct kvm *kvm,
+ gfn_t base_gfn);
+int kvm_iommu_map_page(struct kvm *kvm,
+ gfn_t base_gfn);
#else /* CONFIG_IOMMU_API */
static inline int kvm_iommu_map_pages(struct kvm *kvm,
gfn_t base_gfn,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 1514758..54cfd33 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -190,23 +190,101 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t gfn = base_gfn;
pfn_t pfn;
struct iommu_domain *domain = kvm->arch.iommu_domain;
- unsigned long i;
+ unsigned long i, iommu_pages;
u64 phys;
/* check if iommu exists and in use */
if (!domain)
return;
- for (i = 0; i < npages; i++) {
+ for (i = 0, iommu_pages = 0; i < npages; i++, gfn++) {
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+
+ /*Because of ballooning, there can be holes in the
+ range. In that case, we simply unmap everything
+ till now, and continue forward.
+ */
+ if (!phys) {
+
+ /*No consecutive IOMMU pages here*/
+ if (iommu_pages == 0)
+ continue;
+ iommu_unmap_range(domain,
+ gfn_to_gpa(base_gfn),
+ PAGE_SIZE*iommu_pages);
+
+ /*Reset consequtive iommu range counters*/
+ base_gfn = gfn + 1;
+ iommu_pages = 0;
+ continue;
+ }
pfn = phys >> PAGE_SHIFT;
kvm_release_pfn_clean(pfn);
- gfn++;
+ ++iommu_pages;
}
- iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages);
+ /*Unmap the last iommu range if any*/
+ if (iommu_pages != 0)
+ iommu_unmap_range(domain,
+ gfn_to_gpa(base_gfn),
+ PAGE_SIZE * iommu_pages);
+}
+
+/*Called to map a page from IOMMU */
+int kvm_iommu_map_page(struct kvm *kvm,
+ gfn_t base_gfn)
+{
+ gfn_t gfn = base_gfn;
+ pfn_t pfn;
+ struct iommu_domain *domain = kvm->arch.iommu_domain;
+ u64 phys;
+ int rc;
+ int flags;
+
+ /* check if iommu exists and in use */
+ if (!domain)
+ return 0;
+ phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+
+ /*Verify addres is not mapped already*/
+ if (phys)
+ return 0;
+ flags = IOMMU_READ | IOMMU_WRITE;
+ if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+ flags |= IOMMU_CACHE;
+ pfn = gfn_to_pfn(kvm, gfn);
+ rc = iommu_map_range(domain,
+ gfn_to_gpa(gfn),
+ pfn_to_hpa(pfn),
+ PAGE_SIZE, flags);
+ return rc;
+}
+
+
+
+/*Called to unmap a page from IOMMU */
+void kvm_iommu_unmap_page(struct kvm *kvm,
+ gfn_t base_gfn)
+{
+ gfn_t gfn = base_gfn;
+ pfn_t pfn;
+ struct iommu_domain *domain = kvm->arch.iommu_domain;
+ u64 phys;
+
+ /* check if iommu exists and in use */
+ if (!domain)
+ return;
+ phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+
+ /*Verify addres is mapped*/
+ if (!phys)
+ return;
+ pfn = phys >> PAGE_SHIFT;
+ kvm_release_pfn_clean(pfn);
+ iommu_unmap_range(domain, gfn_to_gpa(gfn), PAGE_SIZE);
}
+
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
int i;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7495ce3..560a3ff 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2184,6 +2184,15 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EOPNOTSUPP;
break;
}
+ case KVM_IOMMU_MAP_PAGE: {
+ r = kvm_iommu_map_page(kvm, arg);
+ break;
+ }
+ case KVM_IOMMU_UNMAP_PAGE:{
+ kvm_iommu_unmap_page(kvm, arg);
+ r = 0;
+ break;
+ }
#ifdef KVM_CAP_ASSIGN_DEV_IRQ
case KVM_ASSIGN_DEV_IRQ: {
struct kvm_assigned_irq assigned_irq;
--
1.6.0.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC] QEMU: Balloon support for device assignment
2010-02-17 9:43 ` [RFC] KVM: " borove
@ 2010-02-17 9:43 ` borove
2010-02-17 10:28 ` Avi Kivity
2010-02-17 10:27 ` [RFC] KVM: " Avi Kivity
1 sibling, 1 reply; 8+ messages in thread
From: borove @ 2010-02-17 9:43 UTC (permalink / raw)
To: kvm; +Cc: borove, benami
From: Eran Borovik <borove@il.ibm.com>
This patch adds modifications to allow correct
balloon operation when a virtual guest uses a direct assigned device.
The modifications include a new interface between qemu and kvm to allow
mapping and unmapping the pages from the IOMMU as well as pinning and unpinning as needed.
Signed-off-by: Eran Borovik <borove@il.ibm.com>
---
hw/virtio-balloon.c | 13 ++++++++++---
kvm/include/linux/kvm.h | 2 ++
kvm/libkvm/libkvm.h | 4 ++++
qemu-kvm.c | 10 ++++++++++
qemu-kvm.h | 4 ++++
5 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 3792012..337f717 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -132,6 +132,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
elem.out_sg, elem.out_num) == 4) {
ram_addr_t pa;
ram_addr_t addr;
+ bool deflate;
pa = (ram_addr_t)ldl_p(&pfn) << VIRTIO_BALLOON_PFN_SHIFT;
offset += 4;
@@ -139,12 +140,18 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
addr = cpu_get_physical_page_desc(pa);
if ((addr & ~TARGET_PAGE_MASK) != IO_MEM_RAM)
continue;
+ deflate = !!(vq == s->dvq);
+# ifdef KVM_CAP_DEVICE_ASSIGNMENT
+ if (deflate)
+ kvm_map_pfn(NULL, pfn);
+ else
+ kvm_unmap_pfn(NULL, pfn);
+# endif
/* Using qemu_get_ram_ptr is bending the rules a bit, but
should be OK because we only want a single page. */
- balloon_page(qemu_get_ram_ptr(addr), !!(vq == s->dvq));
- }
-
+ balloon_page(qemu_get_ram_ptr(addr), deflate);
+ }
virtqueue_push(vq, &elem, offset);
virtio_notify(vdev, vq);
}
diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index 6485981..90f7723 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -595,6 +595,8 @@ struct kvm_clock_data {
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
+#define KVM_IOMMU_UNMAP_PAGE _IOW(KVMIO, 0x49, __u64)
+#define KVM_IOMMU_MAP_PAGE _IOW(KVMIO, 0x50, __u64)
/* Device model IOC */
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index 4821a1e..7fa83b5 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -714,6 +714,10 @@ int kvm_s390_store_status(kvm_context_t kvm, int slot, unsigned long addr);
int kvm_assign_pci_device(kvm_context_t kvm,
struct kvm_assigned_pci_dev *assigned_dev);
+int kvm_deflate_pfn(kvm_context_t kvm, uint32_t pfn);
+
+int kvm_inflate_pfn(kvm_context_t kvm, uint32_t pfn);
+
/*!
* \brief Assign IRQ for an assigned device
*
diff --git a/qemu-kvm.c b/qemu-kvm.c
index a305907..a5ca029 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1081,6 +1081,16 @@ static int kvm_old_assign_irq(kvm_context_t kvm,
return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
}
+int kvm_unmap_pfn(kvm_context_t kvm, uint32_t pfn)
+{
+ return kvm_vm_ioctl(kvm_state, KVM_IOMMU_UNMAP_PAGE, pfn);
+}
+
+int kvm_map_pfn(kvm_context_t kvm, uint32_t pfn)
+{
+ return kvm_vm_ioctl(kvm_state, KVM_IOMMU_MAP_PAGE, pfn);
+}
+
#ifdef KVM_CAP_ASSIGN_DEV_IRQ
int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
{
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 6b3e5a1..861c336 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -691,6 +691,10 @@ int kvm_s390_store_status(kvm_context_t kvm, int slot, unsigned long addr);
int kvm_assign_pci_device(kvm_context_t kvm,
struct kvm_assigned_pci_dev *assigned_dev);
+int kvm_unmap_pfn(kvm_context_t kvm, uint32_t pfn);
+
+int kvm_map_pfn(kvm_context_t kvm, uint32_t pfn);
+
/*!
* \brief Assign IRQ for an assigned device
*
--
1.6.0.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [RFC] KVM: Balloon support for device assignment
2010-02-17 9:43 ` [RFC] KVM: " borove
2010-02-17 9:43 ` [RFC] QEMU: " borove
@ 2010-02-17 10:27 ` Avi Kivity
2010-02-17 17:52 ` Muli Ben-Yehuda
1 sibling, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2010-02-17 10:27 UTC (permalink / raw)
To: borove; +Cc: kvm, benami, Gerd Hoffmann
On 02/17/2010 11:43 AM, borove@il.ibm.com wrote:
> From: Eran Borovik<borove@il.ibm.com>
>
> This patch adds modifications to allow correct
> balloon operation when a virtual guest uses a direct assigned device.
> The modifications include a new interface between qemu and kvm to allow
> mapping and unmapping the pages from the IOMMU as well as pinning and unpinning as needed.
>
The plan for iommu support is to push it into uio. Instead of kvm
managing the iommu directly, I'd like qemu to open a uio device and set
up an iommu mapping there, which will just happen to match the kvm
memory slots. Similarly, interrupts will be forwarded using irqfds.
This will allow using the iommu without kvm, and reduce the amount of
special purpose kvm code.
These patches make the transition more difficult which worries me. I
know Gerd looked at making the move, but no longer.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] QEMU: Balloon support for device assignment
2010-02-17 9:43 ` [RFC] QEMU: " borove
@ 2010-02-17 10:28 ` Avi Kivity
0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2010-02-17 10:28 UTC (permalink / raw)
To: borove; +Cc: kvm, benami
On 02/17/2010 11:43 AM, borove@il.ibm.com wrote:
> From: Eran Borovik<borove@il.ibm.com>
>
> This patch adds modifications to allow correct
> balloon operation when a virtual guest uses a direct assigned device.
> The modifications include a new interface between qemu and kvm to allow
> mapping and unmapping the pages from the IOMMU as well as pinning and unpinning as needed.
>
Note, on reset we deflate the balloon completely, since the BIOS and
boot loader (and possibly the OS post-reboot) are not aware of ballooning.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] KVM: Balloon support for device assignment
2010-02-17 10:27 ` [RFC] KVM: " Avi Kivity
@ 2010-02-17 17:52 ` Muli Ben-Yehuda
2010-02-21 8:53 ` Avi Kivity
0 siblings, 1 reply; 8+ messages in thread
From: Muli Ben-Yehuda @ 2010-02-17 17:52 UTC (permalink / raw)
To: Avi Kivity; +Cc: Eran Borovik, kvm, Ben-Ami Yassour, Gerd Hoffmann
On Wed, Feb 17, 2010 at 12:27:09PM +0200, Avi Kivity wrote:
> On 02/17/2010 11:43 AM, borove@il.ibm.com wrote:
> >From: Eran Borovik<borove@il.ibm.com>
> >
> >This patch adds modifications to allow correct
> >balloon operation when a virtual guest uses a direct assigned device.
> >The modifications include a new interface between qemu and kvm to allow
> >mapping and unmapping the pages from the IOMMU as well as pinning and unpinning as needed.
>
> The plan for iommu support is to push it into uio. Instead of kvm
> managing the iommu directly, I'd like qemu to open a uio device and
> set up an iommu mapping there, which will just happen to match the
> kvm memory slots. Similarly, interrupts will be forwarded using
> irqfds. This will allow using the iommu without kvm, and reduce the
> amount of special purpose kvm code.
>
> These patches make the transition more difficult which worries me.
That's a fair point, but they also address a real short-coming of the
current device assignment code, which pins all of the guest's memory
unconditionally. Unless the uio effort is in progress and expected to
complete shortly, I would think the benefit of these simple patches
trumps the cost.
Cheers,
Muli
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] KVM: Balloon support for device assignment
2010-02-17 17:52 ` Muli Ben-Yehuda
@ 2010-02-21 8:53 ` Avi Kivity
[not found] ` <20100221131732.GC7677@il.ibm.com>
0 siblings, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2010-02-21 8:53 UTC (permalink / raw)
To: Muli Ben-Yehuda; +Cc: Eran Borovik, kvm, Ben-Ami Yassour, Gerd Hoffmann
On 02/17/2010 07:52 PM, Muli Ben-Yehuda wrote:
> On Wed, Feb 17, 2010 at 12:27:09PM +0200, Avi Kivity wrote:
>
>> On 02/17/2010 11:43 AM, borove@il.ibm.com wrote:
>>
>>> From: Eran Borovik<borove@il.ibm.com>
>>>
>>> This patch adds modifications to allow correct
>>> balloon operation when a virtual guest uses a direct assigned device.
>>> The modifications include a new interface between qemu and kvm to allow
>>> mapping and unmapping the pages from the IOMMU as well as pinning and unpinning as needed.
>>>
>> The plan for iommu support is to push it into uio. Instead of kvm
>> managing the iommu directly, I'd like qemu to open a uio device and
>> set up an iommu mapping there, which will just happen to match the
>> kvm memory slots. Similarly, interrupts will be forwarded using
>> irqfds. This will allow using the iommu without kvm, and reduce the
>> amount of special purpose kvm code.
>>
>> These patches make the transition more difficult which worries me.
>>
> That's a fair point, but they also address a real short-coming of the
> current device assignment code, which pins all of the guest's memory
> unconditionally. Unless the uio effort is in progress and expected to
> complete shortly, I would think the benefit of these simple patches
> trumps the cost.
>
I'm not worried about transition cost (that's for whoever makes the
transition to pay) but about how the APIs would translate to the new way
of doing things.
For example, it might be done using mmu notifiers. We might use the
existing madvise(MADV_DONTNEED) as the entry point (though I don't know
how we would indicate deflation).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] KVM: Balloon support for device assignment
[not found] ` <20100221131732.GC7677@il.ibm.com>
@ 2010-02-21 13:25 ` Avi Kivity
0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2010-02-21 13:25 UTC (permalink / raw)
To: Muli Ben-Yehuda; +Cc: Eran Borovik, kvm, Ben-Ami Yassour, Gerd Hoffmann
On 02/21/2010 03:17 PM, Muli Ben-Yehuda wrote:
> On Sun, Feb 21, 2010 at 10:53:25AM +0200, Avi Kivity wrote:
>
>
>> I'm not worried about transition cost (that's for whoever makes the
>> transition to pay) but about how the APIs would translate to the new
>> way of doing things.
>>
>> For example, it might be done using mmu notifiers. We might use the
>> existing madvise(MADV_DONTNEED) as the entry point (though I don't
>> know how we would indicate deflation).
>>
> I guess we could extend mmu notifiers to notify on any PTE transition,
> but that seems like a lot of VM complexity for a little gain, given
> the simple KVM-internal alternatives.
>
mmu notifiers already notify on any pte transition.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2010-02-21 13:25 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-17 9:43 [RFC] Balloon support for device assignment borove
2010-02-17 9:43 ` [RFC] KVM: " borove
2010-02-17 9:43 ` [RFC] QEMU: " borove
2010-02-17 10:28 ` Avi Kivity
2010-02-17 10:27 ` [RFC] KVM: " Avi Kivity
2010-02-17 17:52 ` Muli Ben-Yehuda
2010-02-21 8:53 ` Avi Kivity
[not found] ` <20100221131732.GC7677@il.ibm.com>
2010-02-21 13:25 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox