[PATCH 0/2][RFC][v2] Batch writes to MMIO

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/2][RFC][v2] Batch writes to MMIO
@ 2008-05-15 13:52 Laurent Vivier
  2008-05-15 13:52 ` [PATCH 1/2][RFC][v2] kvm: " Laurent Vivier
  0 siblings, 1 reply; 9+ messages in thread
From: Laurent Vivier @ 2008-05-15 13:52 UTC (permalink / raw)
  To: kvm-devel; +Cc: Laurent Vivier, Avi Kivity

These two patches allow to batch writes to MMIO.

When kernel has to send MMIO writes to userspace, it stores them
in memory until it has to pass the hand to userspace for another
reason. This avoids to have too many context switches on operations
that can wait.

These patches introduce an ioctl() to define MMIO allowed to be delayed.

I made some bentchmark with iperf and e1000:

average on 10 runs

WITH            WITHOUT
PATCH		PATCH

257.2 MB/s      193.7 MB/s      33% faster

I've measured host_state_reload on WinXP boot:

WITH            WITHOUT
PATCH		PATCH

561397          739708          24% less

I've measured host_state_reload on a VGA text scroll:

WITH            WITHOUT
PATCH		PATCH

3976242         13779849        70% less...

[PATCH 1/2] kvm: Batch writes to MMIO
	- kernel part

[PATCH 2/2] kvm-userspace: Batch writes to MMIO
	- userspace part

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft 
Defy all challenges. Microsoft(R) Visual Studio 2008. 
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2][RFC][v2] kvm: Batch writes to MMIO
  2008-05-15 13:52 [PATCH 0/2][RFC][v2] Batch writes to MMIO Laurent Vivier
@ 2008-05-15 13:52 ` Laurent Vivier
  2008-05-15 13:52   ` [PATCH 2/2][RFC][v2] kvm-userspace: " Laurent Vivier
       [not found]   ` <482FDD1E.2090304@qumranet.com>
  0 siblings, 2 replies; 9+ messages in thread
From: Laurent Vivier @ 2008-05-15 13:52 UTC (permalink / raw)
  To: kvm-devel; +Cc: Laurent Vivier, Avi Kivity

This patch is the kernel part of the "batch writes to MMIO" patch.

It intoduces the ioctl interface to define MMIO zone it is allowed to delay.
Inside a zone, we can define sub-part we must not delay.

If an MMIO can be delayed, it is stored in a ring buffer which common for all VCPUs.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
---
 arch/x86/kvm/x86.c         |  172 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/kvm.h      |    7 ++
 include/asm-x86/kvm_host.h |   23 ++++++
 include/linux/kvm.h        |   16 ++++
 virt/kvm/kvm_main.c        |    3 +
 5 files changed, 221 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dab3d4f..930986b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1518,6 +1518,103 @@ out:
 	return r;
 }
 
+static struct kvm_delayed_mmio_zone *kvm_mmio_find_zone(struct kvm *kvm,
+							u64 addr, u32 size)
+{
+	int i;
+	struct kvm_delayed_mmio_zone *zone;
+
+	for (i = 0; i < kvm->arch.nb_mmio_zones; i++) {
+		zone = &kvm->arch.mmio_zone[i];
+
+		/* (addr,size) is fully included in
+		 * (zone->addr, zone->size)
+		 */
+
+		if (zone->addr <= addr &&
+		    addr + size <= zone->addr + zone->size)
+			return zone;
+	}
+	return NULL;
+}
+
+static struct kvm_excluded_mmio_zone *
+kvm_mmio_find_excluded(struct kvm_delayed_mmio_zone *zone, u64 addr, u32 size)
+{
+	static struct kvm_excluded_mmio_zone *excluded;
+	int i;
+
+	addr -= zone->addr;
+	for (i = 0; i < zone->nb_excluded_zones; i++) {
+		excluded = &zone->excluded[i];
+
+		if ((excluded->offset <= addr &&
+		     addr < excluded->offset + excluded->size) ||
+		     (excluded->offset < addr + size &&
+		      addr + size <= excluded->offset +
+				    excluded->size))
+			return excluded;
+	}
+	return NULL;
+}
+
+static int kvm_is_delayed_mmio(struct kvm *kvm, u64 addr, u32 size)
+{
+	struct kvm_delayed_mmio_zone *zone;
+	struct kvm_excluded_mmio_zone *excluded;
+
+	zone = kvm_mmio_find_zone(kvm, addr, size);
+	if (zone == NULL)
+		return 0;	/* not a delayed MMIO address */
+
+	excluded = kvm_mmio_find_excluded(zone, addr, size);
+	return excluded == NULL;
+}
+
+static int kvm_vm_ioctl_set_mmio(struct kvm *kvm,
+				 struct kvm_mmio_zone *zone)
+{
+	struct kvm_delayed_mmio_zone *z;
+
+	if (zone->is_delayed &&
+	    kvm->arch.nb_mmio_zones >= KVM_MAX_DELAYED_MMIO_ZONE)
+		return -ENOMEM;
+
+	if (zone->is_delayed) {
+
+		/* already defined ? */
+
+		if (kvm_mmio_find_zone(kvm, zone->addr, 1) ||
+		    kvm_mmio_find_zone(kvm, zone->addr + zone->size - 1, 1))
+			return 0;
+
+		z = &kvm->arch.mmio_zone[kvm->arch.nb_mmio_zones];
+		z->addr = zone->addr;
+		z->size = zone->size;
+		kvm->arch.nb_mmio_zones++;
+		return 0;
+	}
+
+	/* exclude some parts of the delayed MMIO zone */
+
+	z = kvm_mmio_find_zone(kvm, zone->addr, zone->size);
+	if (z == NULL)
+		return -EINVAL;
+
+	if (z->nb_excluded_zones >= KVM_MAX_EXCLUDED_MMIO_ZONE)
+		return -ENOMEM;
+
+	if (kvm_mmio_find_excluded(z, zone->addr, 1) ||
+	    kvm_mmio_find_excluded(z, zone->addr + zone->size - 1, 1))
+		return 0;
+
+	z->excluded[z->nb_excluded_zones].offset = zone->addr - z->addr;
+	z->excluded[z->nb_excluded_zones].size = zone->size;
+	z->nb_excluded_zones++;
+
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -1671,6 +1768,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_SET_MMIO: {
+		struct kvm_mmio_zone zone;
+		r = -EFAULT;
+		if (copy_from_user(&zone, argp, sizeof zone))
+			goto out;
+		r = -ENXIO;
+		r = kvm_vm_ioctl_set_mmio(kvm, &zone);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		;
 	}
@@ -2706,6 +2815,52 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
 }
 
+static int batch_mmio(struct kvm_vcpu *vcpu)
+{
+	struct kvm_batch *batch = vcpu->kvm->arch.batch;
+	spinlock_t *lock = &vcpu->kvm->arch.batch_lock;
+	int next;
+
+	/* check if this MMIO can be delayed */
+
+	if (!kvm_is_delayed_mmio(vcpu->kvm,
+				 vcpu->mmio_phys_addr, vcpu->mmio_size))
+		return 0;
+
+	/* check if ring is full
+	 * we have no lock on "first"
+	 * as it can only increase we can only have
+	 * a false "full".
+	 */
+
+	spin_lock(lock);
+
+	/* last is the first free entry
+	 * check if we don't meet the first used entry
+	 * there is always one unused entry in the buffer
+	 */
+
+	next = (batch->last + 1) % KVM_MAX_BATCH;
+	if (next == batch->first) {
+		/* full */
+		spin_unlock(lock);
+		return 0;
+	}
+
+	/* batch it */
+
+	/* copy data in first free entry of the ring */
+
+	batch->mmio[batch->last].phys_addr = vcpu->mmio_phys_addr;
+	batch->mmio[batch->last].len = vcpu->mmio_size;
+	memcpy(batch->mmio[batch->last].data, vcpu->mmio_data, vcpu->mmio_size);
+	batch->last = next;
+
+	spin_unlock(lock);
+
+	return 1;
+}
+
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -2857,6 +3012,11 @@ again:
 			goto again;
 	}
 
+	if (!r &&
+	    vcpu->mmio_is_write && kvm_run->exit_reason == KVM_EXIT_MMIO
+	    && !need_resched() && batch_mmio(vcpu))
+		goto again;
+
 out:
 	up_read(&vcpu->kvm->slots_lock);
 	if (r > 0) {
@@ -3856,12 +4016,22 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 struct  kvm *kvm_arch_create_vm(void)
 {
 	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+	struct page *page;
 
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		kfree(kvm);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 
+	kvm->arch.batch_lock = __SPIN_LOCK_UNLOCKED(batch_lock);
+	kvm->arch.batch = (struct kvm_batch *)page_address(page);
+
 	return kvm;
 }
 
@@ -3902,6 +4072,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		put_page(kvm->arch.apic_access_page);
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
+	if (kvm->arch.batch)
+		free_page((unsigned long)kvm->arch.batch);
 	kfree(kvm);
 }
 
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 6f18408..3c4a611 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -209,6 +209,13 @@ struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
 
+struct kvm_mmio_zone {
+	__u8 is_delayed;
+	__u8 pad[3];
+	__u32 size;
+	__u64 addr;
+};
+
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1466c3f..df42cdb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -26,6 +26,7 @@
 #define KVM_PRIVATE_MEM_SLOTS 4
 
 #define KVM_PIO_PAGE_OFFSET 1
+#define KVM_MMIO_PAGE_OFFSET 2
 
 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
@@ -293,6 +294,21 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
+#define KVM_MAX_DELAYED_MMIO_ZONE 10
+#define KVM_MAX_EXCLUDED_MMIO_ZONE 10
+
+struct kvm_excluded_mmio_zone {
+	u32 offset;
+	u32 size;
+};
+
+struct kvm_delayed_mmio_zone {
+	u64 addr;
+	u32 size;
+	u32 nb_excluded_zones;
+	struct kvm_excluded_mmio_zone excluded[KVM_MAX_EXCLUDED_MMIO_ZONE];
+};
+
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -317,6 +333,13 @@ struct kvm_arch{
 
 	struct page *ept_identity_pagetable;
 	bool ept_identity_pagetable_done;
+
+	/* MMIO batch */
+
+	spinlock_t batch_lock;
+	struct kvm_batch *batch;
+	int nb_mmio_zones;
+	struct kvm_delayed_mmio_zone mmio_zone[KVM_MAX_DELAYED_MMIO_ZONE];
 };
 
 struct kvm_vm_stat {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a281afe..b57010d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -173,6 +173,21 @@ struct kvm_run {
 	};
 };
 
+struct kvm_mmio {
+	__u64 phys_addr;
+	__u32 len;
+	__u32 pad;
+	__u8  data[8];
+};
+
+struct kvm_batch {
+	__u32 first, last;
+	struct kvm_mmio mmio[0];
+};
+
+#define KVM_MAX_BATCH ((PAGE_SIZE - sizeof(struct kvm_batch)) / \
+						sizeof(struct kvm_mmio))
+
 /* for KVM_TRANSLATE */
 struct kvm_translation {
 	/* in */
@@ -371,6 +386,7 @@ struct kvm_trace_rec {
 #define KVM_CREATE_PIT		  _IO(KVMIO,  0x64)
 #define KVM_GET_PIT		  _IOWR(KVMIO, 0x65, struct kvm_pit_state)
 #define KVM_SET_PIT		  _IOR(KVMIO,  0x66, struct kvm_pit_state)
+#define KVM_SET_MMIO		  _IOW(KVMIO,  0x67, struct kvm_mmio_zone)
 
 /*
  * ioctls for vcpu fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 64ed402..c8f1bdf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -824,6 +824,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 #ifdef CONFIG_X86
 	else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
 		page = virt_to_page(vcpu->arch.pio_data);
+	else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
+		page = virt_to_page(vcpu->kvm->arch.batch);
 #endif
 	else
 		return VM_FAULT_SIGBUS;
@@ -1230,6 +1232,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = PAGE_SIZE;     /* struct kvm_run */
 #ifdef CONFIG_X86
 		r += PAGE_SIZE;    /* pio data page */
+		r += PAGE_SIZE;    /* mmio batch page */
 #endif
 		break;
 	case KVM_TRACE_ENABLE:
-- 
1.5.2.4


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft 
Defy all challenges. Microsoft(R) Visual Studio 2008. 
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2][RFC][v2] kvm-userspace: Batch writes to MMIO
  2008-05-15 13:52 ` [PATCH 1/2][RFC][v2] kvm: " Laurent Vivier
@ 2008-05-15 13:52   ` Laurent Vivier
       [not found]   ` <482FDD1E.2090304@qumranet.com>
  1 sibling, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2008-05-15 13:52 UTC (permalink / raw)
  To: kvm-devel; +Cc: Laurent Vivier, Avi Kivity

This patch is userspace part of the "batch writes to MMIO" patch.

It defines delayed MMIO zone using kvm_set_mmio() (for VGA and e1000).
It empties the ring buffer and process the MMIO accesses.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
---
 libkvm/libkvm-x86.c  |   18 ++++++++++++++++++
 libkvm/libkvm.c      |   13 +++++++++++++
 libkvm/libkvm.h      |    2 ++
 qemu/hw/cirrus_vga.c |    2 ++
 qemu/hw/e1000.c      |    8 ++++++++
 qemu/hw/vga.c        |    4 ++++
 qemu/qemu-kvm.c      |    6 ++++++
 qemu/qemu-kvm.h      |    2 ++
 8 files changed, 55 insertions(+), 0 deletions(-)

diff --git a/libkvm/libkvm-x86.c b/libkvm/libkvm-x86.c
index d46fdcc..911e079 100644
--- a/libkvm/libkvm-x86.c
+++ b/libkvm/libkvm-x86.c
@@ -391,6 +391,24 @@ int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s)
 
 #endif
 
+int kvm_set_mmio(kvm_context_t kvm,
+		 uint8_t is_delayed, uint64_t addr, uint32_t size)
+{
+	struct kvm_mmio_zone zone;
+	int r;
+
+	zone.is_delayed = is_delayed;
+	zone.addr = addr;
+	zone.size = size;
+
+	r = ioctl(kvm->vm_fd, KVM_SET_MMIO, &zone);
+	if (r == -1) {
+		r = -errno;
+		perror("kvm_set_mmio");
+	}
+	return r;
+}
+
 void kvm_show_code(kvm_context_t kvm, int vcpu)
 {
 #define SHOW_CODE_LEN 50
diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index d1e95a4..b891630 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -861,6 +861,9 @@ int kvm_run(kvm_context_t kvm, int vcpu)
 	int r;
 	int fd = kvm->vcpu_fd[vcpu];
 	struct kvm_run *run = kvm->run[vcpu];
+#if defined(__x86_64__) || defined(__i386__)
+	struct kvm_batch *batch = (void *)run + 2 * PAGE_SIZE;
+#endif
 
 again:
 	if (!kvm->irqchip_in_kernel)
@@ -879,6 +882,16 @@ again:
 
 	post_kvm_run(kvm, vcpu);
 
+#if defined(__x86_64__) || defined(__i386__)
+	while (batch->first != batch->last) {
+		kvm->callbacks->mmio_write(kvm->opaque,
+					   batch->mmio[batch->first].phys_addr,
+					   &batch->mmio[batch->first].data[0],
+					   batch->mmio[batch->first].len);
+		batch->first = (batch->first + 1) % KVM_MAX_BATCH;
+	}
+#endif
+
 	if (r == -1) {
 		r = handle_io_window(kvm);
 		goto more;
diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index 31c0d59..1f453e1 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -448,6 +448,8 @@ int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
 			      unsigned long end_addr, void *buf, void*opaque,
 			      int (*cb)(unsigned long start, unsigned long len,
 					void*bitmap, void *opaque));
+int kvm_set_mmio(kvm_context_t kvm,
+		 uint8_t is_delayed, uint64_t addr, uint32_t size);
 
 /*!
  * \brief Create a memory alias
diff --git a/qemu/hw/cirrus_vga.c b/qemu/hw/cirrus_vga.c
index 2c4aeec..4ef8085 100644
--- a/qemu/hw/cirrus_vga.c
+++ b/qemu/hw/cirrus_vga.c
@@ -3291,6 +3291,8 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci)
                                            cirrus_vga_mem_write, s);
     cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
                                  vga_io_memory);
+    if (kvm_enabled())
+        qemu_kvm_set_mmio(1, isa_mem_base + 0x000a0000, 0x20000);
 
     s->sr[0x06] = 0x0f;
     if (device_id == CIRRUS_ID_CLGD5446) {
diff --git a/qemu/hw/e1000.c b/qemu/hw/e1000.c
index 0728539..d223631 100644
--- a/qemu/hw/e1000.c
+++ b/qemu/hw/e1000.c
@@ -26,6 +26,7 @@
 #include "hw.h"
 #include "pci.h"
 #include "net.h"
+#include "qemu-kvm.h"
 
 #include "e1000_hw.h"
 
@@ -938,6 +939,13 @@ e1000_mmio_map(PCIDevice *pci_dev, int region_num,
 
     d->mmio_base = addr;
     cpu_register_physical_memory(addr, PNPMMIO_SIZE, d->mmio_index);
+
+    if (kvm_enabled()) {
+        qemu_kvm_set_mmio(1, addr, PNPMMIO_SIZE);
+        qemu_kvm_set_mmio(0, addr + E1000_TCTL, 4);
+        qemu_kvm_set_mmio(0, addr + E1000_TDT, 4);
+        qemu_kvm_set_mmio(0, addr + E1000_ICR, 4);
+    }
 }
 
 static int
diff --git a/qemu/hw/vga.c b/qemu/hw/vga.c
index 3a49573..844c2a7 100644
--- a/qemu/hw/vga.c
+++ b/qemu/hw/vga.c
@@ -2257,6 +2257,8 @@ void vga_init(VGAState *s)
     vga_io_memory = cpu_register_io_memory(0, vga_mem_read, vga_mem_write, s);
     cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
                                  vga_io_memory);
+    if (kvm_enabled())
+    	qemu_kvm_set_mmio(1, isa_mem_base + 0x000a0000, 0x20000);
 }
 
 /* Memory mapped interface */
@@ -2332,6 +2334,8 @@ static void vga_mm_init(VGAState *s, target_phys_addr_t vram_base,
     cpu_register_physical_memory(ctrl_base, 0x100000, s_ioport_ctrl);
     s->bank_offset = 0;
     cpu_register_physical_memory(vram_base + 0x000a0000, 0x20000, vga_io_memory);
+    if (kvm_enabled())
+        qemu_kvm_set_mmio(1, vram_base + 0x000a0000, 0x20000);
 }
 
 int isa_vga_init(DisplayState *ds, uint8_t *vga_ram_base,
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index e522269..9f03ab1 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -955,3 +955,9 @@ void kvm_mutex_lock(void)
     pthread_mutex_lock(&qemu_mutex);
     cpu_single_env = NULL;
 }
+
+int qemu_kvm_set_mmio(int is_delayed,
+		      target_phys_addr_t addr, unsigned int size)
+{
+    return kvm_set_mmio(kvm_context, is_delayed, addr, size);
+}
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 21606e9..37d4d11 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -75,6 +75,8 @@ int handle_tpr_access(void *opaque, int vcpu,
 void kvm_tpr_vcpu_start(CPUState *env);
 
 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf);
+int qemu_kvm_set_mmio(int is_delayed,
+		      target_phys_addr_t addr, unsigned int size);
 
 void qemu_kvm_system_reset_request(void);
 
-- 
1.5.2.4


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft 
Defy all challenges. Microsoft(R) Visual Studio 2008. 
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply related	[flat|nested] 9+ messages in thread

[parent not found: <482FDD1E.2090304@qumranet.com>]

* Re: [PATCH 1/2][RFC][v2] kvm: Batch writes to MMIO
       [not found]   ` <482FDD1E.2090304@qumranet.com>
@ 2008-05-19  8:36     ` Laurent Vivier
  2008-05-19 10:21       ` Avi Kivity
  2008-05-20  3:04       ` Hollis Blanchard
  0 siblings, 2 replies; 9+ messages in thread
From: Laurent Vivier @ 2008-05-19  8:36 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Hollis Blanchard, Zhang, Xiantao

Thank you for your comments.

I agree with all.

I'll move my code to mulator_write_emulated_onepage().
I'll use kvm_io_device mechanism: is there an existing ioctl() to
register MMIO zone ?
I'll add a KVM_CAP_

Regards,
Laurent

Le dimanche 18 mai 2008 à 10:39 +0300, Avi Kivity a écrit :
> [resend to new list]
> 
> Laurent Vivier wrote:
> > This patch is the kernel part of the "batch writes to MMIO" patch.
> >
> > It intoduces the ioctl interface to define MMIO zone it is allowed to delay.
> > Inside a zone, we can define sub-part we must not delay.
> >
> > If an MMIO can be delayed, it is stored in a ring buffer which common for all VCPUs.
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index dab3d4f..930986b 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1518,6 +1518,103 @@ out:
> >  	return r;
> >  }
> >  
> > +static struct kvm_delayed_mmio_zone *kvm_mmio_find_zone(struct kvm *kvm,
> > +							u64 addr, u32 size)
> >   
> 
> gpa_t addr
> 
> > +{
> > +	int i;
> > +	struct kvm_delayed_mmio_zone *zone;
> > +
> > +	for (i = 0; i < kvm->arch.nb_mmio_zones; i++) {
> > +		zone = &kvm->arch.mmio_zone[i];
> > +
> > +		/* (addr,size) is fully included in
> > +		 * (zone->addr, zone->size)
> > +		 */
> > +
> > +		if (zone->addr <= addr &&
> > +		    addr + size <= zone->addr + zone->size)
> > +			return zone;
> > +	}
> > +	return NULL;
> > +}
> > +
> > +static struct kvm_excluded_mmio_zone *
> > +kvm_mmio_find_excluded(struct kvm_delayed_mmio_zone *zone, u64 addr, u32 size)
> > +{
> > +	static struct kvm_excluded_mmio_zone *excluded;
> > +	int i;
> > +
> > +	addr -= zone->addr;
> > +	for (i = 0; i < zone->nb_excluded_zones; i++) {
> > +		excluded = &zone->excluded[i];
> > +
> > +		if ((excluded->offset <= addr &&
> > +		     addr < excluded->offset + excluded->size) ||
> > +		     (excluded->offset < addr + size &&
> > +		      addr + size <= excluded->offset +
> > +				    excluded->size))
> > +			return excluded;
> > +	}
> > +	return NULL;
> > +}
> > +
> >   
> 
> Instead of included/excluded zones (with the default excluded), I
> suggest having just included zones (with the default excluded).  If the
> user specifies an excluded zone within an included zone, simply split
> the included zone into two:
> 
>   [100, 200) + ^[150, 160) = [100, 150) + [160, 200)
> 
> Or maybe, push all this logic to userspace and only allow adding include
> zones (and clearing previously set include zones).
> 
> > +static int kvm_vm_ioctl_set_mmio(struct kvm *kvm,
> > +				 struct kvm_mmio_zone *zone)
> >   
> 
> "mmio" is too generic a name.  Please use delayed mmio or batched mmio
> consistently.
> 
> > +{
> > +	struct kvm_delayed_mmio_zone *z;
> > +
> > +	if (zone->is_delayed &&
> > +	    kvm->arch.nb_mmio_zones >= KVM_MAX_DELAYED_MMIO_ZONE)
> > +		return -ENOMEM;
> >   
> 
> -ENOBUFS.  -ENOMEM is generally not recoverable, ENOBUFS means just this
> specific operation failed.
> 
> > +
> > +	if (zone->is_delayed) {
> > +
> > +		/* already defined ? */
> > +
> > +		if (kvm_mmio_find_zone(kvm, zone->addr, 1) ||
> > +		    kvm_mmio_find_zone(kvm, zone->addr + zone->size - 1, 1))
> > +			return 0;
> > +
> > +		z = &kvm->arch.mmio_zone[kvm->arch.nb_mmio_zones];
> > +		z->addr = zone->addr;
> > +		z->size = zone->size;
> > +		kvm->arch.nb_mmio_zones++;
> > +		return 0;
> > +	}
> > +
> > +	/* exclude some parts of the delayed MMIO zone */
> > +
> > +	z = kvm_mmio_find_zone(kvm, zone->addr, zone->size);
> > +	if (z == NULL)
> > +		return -EINVAL;
> > +
> > +	if (z->nb_excluded_zones >= KVM_MAX_EXCLUDED_MMIO_ZONE)
> > +		return -ENOMEM;
> > +
> > +	if (kvm_mmio_find_excluded(z, zone->addr, 1) ||
> > +	    kvm_mmio_find_excluded(z, zone->addr + zone->size - 1, 1))
> > +		return 0;
> > +
> > +	z->excluded[z->nb_excluded_zones].offset = zone->addr - z->addr;
> > +	z->excluded[z->nb_excluded_zones].size = zone->size;
> > +	z->nb_excluded_zones++;
> > +
> > +	return 0;
> > +}
> > +
> >   
> 
> The whole thing needs locking against concurrent calls and against users
> of the structure.
> 
> >  
> > +static int batch_mmio(struct kvm_vcpu *vcpu)
> > +{
> > +	struct kvm_batch *batch = vcpu->kvm->arch.batch;
> > +	spinlock_t *lock = &vcpu->kvm->arch.batch_lock;
> > +	int next;
> >   
> 
> Not sure a new lock is warranted. I think we can reuse kvm->lock.
> 
> > +
> > +	/* check if this MMIO can be delayed */
> > +
> > +	if (!kvm_is_delayed_mmio(vcpu->kvm,
> > +				 vcpu->mmio_phys_addr, vcpu->mmio_size))
> > +		return 0;
> >   
> 
> This check is performed without any locks.
> 
> > +
> >  static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  {
> >  	int r;
> > @@ -2857,6 +3012,11 @@ again:
> >  			goto again;
> >  	}
> >  
> > +	if (!r &&
> > +	    vcpu->mmio_is_write && kvm_run->exit_reason == KVM_EXIT_MMIO
> > +	    && !need_resched() && batch_mmio(vcpu))
> > +		goto again;
> > +
> >   
> 
> Better to do this in emulator_write_emulated_onepage(), similar to how
> local mmio is handled.
> 
> In fact, we can register batched mmio regions using the existing
> kvm_io_device mechanism.
> 
> >  
> > +#define KVM_MAX_DELAYED_MMIO_ZONE 10
> > +#define KVM_MAX_EXCLUDED_MMIO_ZONE 10
> >   
> 
> We'll want more, esp. as they're cheap.
> 
> >  
> >  /*
> >   * ioctls for vcpu fds
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 64ed402..c8f1bdf 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -824,6 +824,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> >  #ifdef CONFIG_X86
> >  	else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
> >  		page = virt_to_page(vcpu->arch.pio_data);
> > +	else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
> > +		page = virt_to_page(vcpu->kvm->arch.batch);
> >  #endif
> >   
> 
> I expect this will be interesting for ppc and ia64.
> 
> I didn't see a KVM_CAP_ for this.
> 
-- 
------------- Laurent.Vivier@bull.net ---------------
"The best way to predict the future is to invent it."
- Alan Kay


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2][RFC][v2] kvm: Batch writes to MMIO
  2008-05-19  8:36     ` [PATCH 1/2][RFC][v2] kvm: " Laurent Vivier
@ 2008-05-19 10:21       ` Avi Kivity
  2008-05-20  3:04       ` Hollis Blanchard
  1 sibling, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2008-05-19 10:21 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: kvm, Hollis Blanchard, Zhang, Xiantao

Laurent Vivier wrote:
> I'll use kvm_io_device mechanism: is there an existing ioctl() to
> register MMIO zone 

No, you still need your new ioctl.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2][RFC][v2] kvm: Batch writes to MMIO
  2008-05-19  8:36     ` [PATCH 1/2][RFC][v2] kvm: " Laurent Vivier
  2008-05-19 10:21       ` Avi Kivity
@ 2008-05-20  3:04       ` Hollis Blanchard
  2008-05-20  7:39         ` Laurent Vivier
  1 sibling, 1 reply; 9+ messages in thread
From: Hollis Blanchard @ 2008-05-20  3:04 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: Avi Kivity, kvm, Zhang, Xiantao

On Monday 19 May 2008 03:36:30 Laurent Vivier wrote:
> Thank you for your comments.
> 
> I agree with all.
> 
> I'll move my code to mulator_write_emulated_onepage().
> I'll use kvm_io_device mechanism: is there an existing ioctl() to
> register MMIO zone ?
> I'll add a KVM_CAP_

Does that mean you'll move "batch" from kvm_vcpu_arch to kvm_vcpu as well?

-- 
Hollis Blanchard
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2][RFC][v2] kvm: Batch writes to MMIO
  2008-05-20  3:04       ` Hollis Blanchard
@ 2008-05-20  7:39         ` Laurent Vivier
  2008-05-20 15:43           ` Hollis Blanchard
  0 siblings, 1 reply; 9+ messages in thread
From: Laurent Vivier @ 2008-05-20  7:39 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: Avi Kivity, kvm, Zhang, Xiantao

Le lundi 19 mai 2008 à 22:04 -0500, Hollis Blanchard a écrit :
> On Monday 19 May 2008 03:36:30 Laurent Vivier wrote:
> > Thank you for your comments.
> > 
> > I agree with all.
> > 
> > I'll move my code to mulator_write_emulated_onepage().
> > I'll use kvm_io_device mechanism: is there an existing ioctl() to
> > register MMIO zone ?
> > I'll add a KVM_CAP_
> 
> Does that mean you'll move "batch" from kvm_vcpu_arch to kvm_vcpu as well?

No, except if people like you (non-x86 developers) asks for it.
(but I can't compile/test).

Regards,
Laurent
-- 
------------- Laurent.Vivier@bull.net ---------------
"The best way to predict the future is to invent it."
- Alan Kay


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2][RFC][v2] kvm: Batch writes to MMIO
  2008-05-20  7:39         ` Laurent Vivier
@ 2008-05-20 15:43           ` Hollis Blanchard
  2008-05-20 15:52             ` Laurent Vivier
  0 siblings, 1 reply; 9+ messages in thread
From: Hollis Blanchard @ 2008-05-20 15:43 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: Avi Kivity, kvm, Zhang, Xiantao

On Tuesday 20 May 2008 02:39:42 Laurent Vivier wrote:
> Le lundi 19 mai 2008 à 22:04 -0500, Hollis Blanchard a écrit :
> > On Monday 19 May 2008 03:36:30 Laurent Vivier wrote:
> > > Thank you for your comments.
> > > 
> > > I agree with all.
> > > 
> > > I'll move my code to mulator_write_emulated_onepage().
> > > I'll use kvm_io_device mechanism: is there an existing ioctl() to
> > > register MMIO zone ?
> > > I'll add a KVM_CAP_
> > 
> > Does that mean you'll move "batch" from kvm_vcpu_arch to kvm_vcpu as well?
> 
> No, except if people like you (non-x86 developers) asks for it.
> (but I can't compile/test).

Please include it in the existing CONFIG_HAS_IOMEM block inside struct 
kvm_vcpu.

Most of the new functions you've introduced should similarly go into 
kvm_main.c, since there is nothing x86-specific in them.

-- 
Hollis Blanchard
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2][RFC][v2] kvm: Batch writes to MMIO
  2008-05-20 15:43           ` Hollis Blanchard
@ 2008-05-20 15:52             ` Laurent Vivier
  0 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2008-05-20 15:52 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: Avi Kivity, kvm, Zhang, Xiantao

Le mardi 20 mai 2008 à 10:43 -0500, Hollis Blanchard a écrit :
> On Tuesday 20 May 2008 02:39:42 Laurent Vivier wrote:
> > Le lundi 19 mai 2008 à 22:04 -0500, Hollis Blanchard a écrit :
> > > On Monday 19 May 2008 03:36:30 Laurent Vivier wrote:
> > > > Thank you for your comments.
> > > > 
> > > > I agree with all.
> > > > 
> > > > I'll move my code to mulator_write_emulated_onepage().
> > > > I'll use kvm_io_device mechanism: is there an existing ioctl() to
> > > > register MMIO zone ?
> > > > I'll add a KVM_CAP_
> > > 
> > > Does that mean you'll move "batch" from kvm_vcpu_arch to kvm_vcpu as well?
> > 
> > No, except if people like you (non-x86 developers) asks for it.
> > (but I can't compile/test).
> 
> Please include it in the existing CONFIG_HAS_IOMEM block inside struct 
> kvm_vcpu.
> 
> Most of the new functions you've introduced should similarly go into 
> kvm_main.c, since there is nothing x86-specific in them.

OK, I'll do it

Laurent
-- 
------------- Laurent.Vivier@bull.net ---------------
"The best way to predict the future is to invent it."
- Alan Kay


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-05-20 15:52 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-15 13:52 [PATCH 0/2][RFC][v2] Batch writes to MMIO Laurent Vivier
2008-05-15 13:52 ` [PATCH 1/2][RFC][v2] kvm: " Laurent Vivier
2008-05-15 13:52   ` [PATCH 2/2][RFC][v2] kvm-userspace: " Laurent Vivier
     [not found]   ` <482FDD1E.2090304@qumranet.com>
2008-05-19  8:36     ` [PATCH 1/2][RFC][v2] kvm: " Laurent Vivier
2008-05-19 10:21       ` Avi Kivity
2008-05-20  3:04       ` Hollis Blanchard
2008-05-20  7:39         ` Laurent Vivier
2008-05-20 15:43           ` Hollis Blanchard
2008-05-20 15:52             ` Laurent Vivier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox