public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* KVM: PCIPT: direct mmio
@ 2008-06-03 11:21 Ben-Ami Yassour
  2008-06-04 14:16 ` Avi Kivity
  0 siblings, 1 reply; 4+ messages in thread
From: Ben-Ami Yassour @ 2008-06-03 11:21 UTC (permalink / raw)
  To: Amit Shah; +Cc: kvm, Han Weidong, Kay, Allen M, Muli Ben-Yehuda

Amit,

Below is the patch for PCI passthrough tree, it enables a guest to access 
a device's
memory mapped I/O regions directly, without requiring the host to trap and
emulate every MMIO access.

This patch requires only userspace changes and it is relaying on the 
kernel patch by Anthony: "Handle vma regions with no backing page". Note 
that this patch requires CONFIG_NUMA to be set. It does require a change 
to the VT-d that Allen sent a while ago, to avoid mapping of memory slots 
with no backing page.

This patch was tested with the pci-passthrough VT-d using an e1000 NIC.

Regards,
Ben

>From 8fe13bcea014a3b896a79fca5d15ddd32050694c Mon Sep 17 00:00:00 2001
From: Ben-Ami Yassour <benami@il.ibm.com>
Date: Tue, 3 Jun 2008 12:34:51 +0300
Subject: [PATCH] KVM: PCIPT: direct mmio

This patch for PCI passthrough devices enables a guest to access a
device's memory mapped I/O regions directly, without requiring the host to trap
and emulate every MMIO access.

Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
---
  libkvm/libkvm.c           |   24 ++++++++----
  qemu/hw/pci-passthrough.c |   89 +++++++++++----------------------------------
  qemu/hw/pci-passthrough.h |    2 +
  3 files changed, 40 insertions(+), 75 deletions(-)

diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index d1e95a4..ce062cb 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -400,7 +400,7 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start,
  {
  	int r;
  	int prot = PROT_READ;
-	void *ptr;
+	void *ptr = NULL;
  	struct kvm_userspace_memory_region memory = {
  		.memory_size = len,
  		.guest_phys_addr = phys_start,
@@ -410,16 +410,24 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start,
  	if (writable)
  		prot |= PROT_WRITE;

-	ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
-	if (ptr == MAP_FAILED) {
-		fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno));
-		return 0;
-	}
+	if (len > 0) {
+		ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+		if (ptr == MAP_FAILED) {
+			fprintf(stderr, "create_userspace_phys_mem: %s",
+				strerror(errno));
+			return 0;
+		}

-	memset(ptr, 0, len);
+		memset(ptr, 0, len);
+	}

  	memory.userspace_addr = (unsigned long)ptr;
-	memory.slot = get_free_slot(kvm);
+
+	if (len > 0)
+		memory.slot = get_free_slot(kvm);
+	else
+		memory.slot = get_slot(phys_start);
+
  	r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
  	if (r == -1) {
  		fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno));
diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c
index 62953ae..1a3e50c 100644
--- a/qemu/hw/pci-passthrough.c
+++ b/qemu/hw/pci-passthrough.c
@@ -25,18 +25,6 @@ typedef __u64 resource_size_t;
  extern kvm_context_t kvm_context;
  extern FILE *logfile;

-CPUReadMemoryFunc *pt_mmio_read_cb[3] = {
-	pt_mmio_readb,
-	pt_mmio_readw,
-	pt_mmio_readl
-};
-
-CPUWriteMemoryFunc *pt_mmio_write_cb[3] = {
-	pt_mmio_writeb,
-	pt_mmio_writew,
-	pt_mmio_writel
-};
-
  //#define PT_DEBUG

  #ifdef PT_DEBUG
@@ -45,47 +33,6 @@ CPUWriteMemoryFunc *pt_mmio_write_cb[3] = {
  #define DEBUG(fmt, args...)
  #endif

-#define pt_mmio_write(suffix, type)					\
-void pt_mmio_write##suffix(void *opaque, target_phys_addr_t e_phys,	\
-				uint32_t value)				\
-{									\
-	pt_region_t *r_access = (pt_region_t *)opaque;			\
-	void *r_virt = (u8 *)r_access->r_virtbase + 			\
-			(e_phys - r_access->e_physbase);		\
-	if (r_access->debug & PT_DEBUG_MMIO) {				\
-		fprintf(logfile, "pt_mmio_write" #suffix		\
-			": e_physbase=%p e_phys=%p r_virt=%p value=%08x\n", \
-			(void *)r_access->e_physbase, (void *)e_phys,	\
-			r_virt, value);					\
-	}								\
-	*(type *)r_virt = (type)value;					\
-}
-
-pt_mmio_write(b, u8)
-pt_mmio_write(w, u16)
-pt_mmio_write(l, u32)
-
-#define pt_mmio_read(suffix, type)					\
-uint32_t pt_mmio_read##suffix(void *opaque, target_phys_addr_t e_phys)	\
-{									\
-	pt_region_t *r_access = (pt_region_t *)opaque;			\
-	void *r_virt = (u8 *)r_access->r_virtbase + 			\
-			(e_phys - r_access->e_physbase);		\
-	uint32_t value = (u32) (*(type *) r_virt);			\
-	if (r_access->debug & PT_DEBUG_MMIO) {				\
-		fprintf(logfile,					\
-			"pt_mmio_read" #suffix ": e_physbase=%p "	\
-			"e_phys=%p r_virt=%p value=%08x\n",		\
-			(void *)r_access->e_physbase,			\
-			(void *)e_phys, r_virt, value);			\
-	}								\
-	return value;							\
-}
-
-pt_mmio_read(b, u8)
-pt_mmio_read(w, u16)
-pt_mmio_read(l, u32)
-
  #define pt_ioport_write(suffix)						\
  void pt_ioport_write##suffix(void *opaque, uint32_t addr, uint32_t value) \
  {									\
@@ -127,22 +74,33 @@ pt_ioport_read(b)
  pt_ioport_read(w)
  pt_ioport_read(l)

-static void pt_iomem_map(PCIDevice * d, int region_num,
-			 uint32_t e_phys, uint32_t e_size, int type)
+void pt_iomem_map(PCIDevice * pci_dev, int region_num, uint32_t e_phys,
+		  uint32_t e_size, int type)
  {
-	pt_dev_t *r_dev = (pt_dev_t *) d;
-
-	r_dev->v_addrs[region_num].e_physbase = e_phys;
+	pt_dev_t *r_dev = (pt_dev_t *) pci_dev;
+	pt_region_t *region = &r_dev->v_addrs[region_num];
+	int first_map = (region->e_size == 0);
+	int ret = 0;

  	DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
  	      e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size,
  	      region_num);

-	cpu_register_physical_memory(e_phys,
-				     r_dev->dev.io_regions[region_num].size,
-				     r_dev->v_addrs[region_num].memory_index);
-}
+	region->e_physbase = e_phys;
+	region->e_size = e_size;
+
+	if (!first_map)
+		kvm_destroy_phys_mem(kvm_context, e_phys, e_size);

+	if (e_size > 0)
+		ret = kvm_register_userspace_phys_mem(kvm_context,
+						      e_phys,
+						      region->r_virtbase,
+						      e_size,
+						      0);
+	if (ret != 0)
+		fprintf(logfile, "Error: create new mapping failed\n");
+}

  static void pt_ioport_map(PCIDevice * pci_dev, int region_num,
  			  uint32_t addr, uint32_t size, int type)
@@ -265,6 +223,8 @@ static int pt_register_regions(pci_region_t * io_regions,
  					(uint32_t) (cur_region->base_addr));
  				return (-1);
  			}
+			pci_dev->v_addrs[i].r_size = cur_region->size;
+			pci_dev->v_addrs[i].e_size = 0;

  			/* add offset */
  			pci_dev->v_addrs[i].r_virtbase +=
@@ -274,11 +234,6 @@ static int pt_register_regions(pci_region_t * io_regions,
  					       cur_region->size, t,
  					       pt_iomem_map);

-			pci_dev->v_addrs[i].memory_index =
-			    cpu_register_io_memory(0, pt_mmio_read_cb,
-						   pt_mmio_write_cb,
-						   (void *) &(pci_dev->v_addrs[i]));
-
  			continue;
  		}
  		/* handle port io regions */
diff --git a/qemu/hw/pci-passthrough.h b/qemu/hw/pci-passthrough.h
index 1dcf752..67ac4dc 100644
--- a/qemu/hw/pci-passthrough.h
+++ b/qemu/hw/pci-passthrough.h
@@ -54,6 +54,8 @@ typedef struct pt_region_s {
  	uint32_t memory_index;
  	void *r_virtbase;	/* mmapped access address */
  	int num;		/* our index within v_addrs[] */
+	uint32_t e_size;          /* emulated size of region in bytes */
+	uint32_t r_size;          /* real size of region in bytes */
  	uint32_t debug;
  } pt_region_t;

-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread
[parent not found: <OF02BEEB69.3AC5BC32-ONC225745E.007719ED-C225745E.00772C47@il.ibm.com>]

end of thread, other threads:[~2008-06-05 13:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-03 11:21 KVM: PCIPT: direct mmio Ben-Ami Yassour
2008-06-04 14:16 ` Avi Kivity
     [not found] <OF02BEEB69.3AC5BC32-ONC225745E.007719ED-C225745E.00772C47@il.ibm.com>
2008-06-04 22:01 ` Ben-Ami Yassour
2008-06-05 13:20   ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox