* [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. @ 2008-04-16 13:26 benami 2008-04-16 13:26 ` benami 2008-04-18 15:56 ` Avi Kivity 0 siblings, 2 replies; 9+ messages in thread From: benami @ 2008-04-16 13:26 UTC (permalink / raw) To: amit.shah; +Cc: kvm-devel, allen.m.kay, andrea, benami From: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> --- libkvm/libkvm.c | 24 ++++++++---- qemu/hw/pci-passthrough.c | 89 +++++++++++---------------------------------- qemu/hw/pci-passthrough.h | 2 + 3 files changed, 40 insertions(+), 75 deletions(-) diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c index de91328..8c02af9 100644 --- a/libkvm/libkvm.c +++ b/libkvm/libkvm.c @@ -400,7 +400,7 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start, { int r; int prot = PROT_READ; - void *ptr; + void *ptr = NULL; struct kvm_userspace_memory_region memory = { .memory_size = len, .guest_phys_addr = phys_start, @@ -410,16 +410,24 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start, if (writable) prot |= PROT_WRITE; - ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); - if (ptr == MAP_FAILED) { - fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno)); - return 0; - } + if (len > 0) { + ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (ptr == MAP_FAILED) { + fprintf(stderr, "create_userspace_phys_mem: %s", + strerror(errno)); + return 0; + } - memset(ptr, 0, len); + memset(ptr, 0, len); + } memory.userspace_addr = (unsigned long)ptr; - memory.slot = get_free_slot(kvm); + + if (len > 0) + memory.slot = get_free_slot(kvm); + else + memory.slot = get_slot(phys_start); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); if (r == -1) { fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno)); diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c index 7ffcc7b..a5894d9 100644 --- a/qemu/hw/pci-passthrough.c +++ b/qemu/hw/pci-passthrough.c @@ -25,18 +25,6 @@ typedef __u64 resource_size_t; extern kvm_context_t kvm_context; extern FILE *logfile; -CPUReadMemoryFunc *pt_mmio_read_cb[3] = { - pt_mmio_readb, - pt_mmio_readw, - pt_mmio_readl -}; - -CPUWriteMemoryFunc *pt_mmio_write_cb[3] = { - pt_mmio_writeb, - pt_mmio_writew, - pt_mmio_writel -}; - //#define PT_DEBUG #ifdef PT_DEBUG @@ -45,47 +33,6 @@ CPUWriteMemoryFunc *pt_mmio_write_cb[3] = { #define DEBUG(fmt, args...) #endif -#define pt_mmio_write(suffix, type) \ -void pt_mmio_write##suffix(void *opaque, target_phys_addr_t e_phys, \ - uint32_t value) \ -{ \ - pt_region_t *r_access = (pt_region_t *)opaque; \ - void *r_virt = (u8 *)r_access->r_virtbase + \ - (e_phys - r_access->e_physbase); \ - if (r_access->debug & PT_DEBUG_MMIO) { \ - fprintf(logfile, "pt_mmio_write" #suffix \ - ": e_physbase=%p e_phys=%p r_virt=%p value=%08x\n", \ - (void *)r_access->e_physbase, (void *)e_phys, \ - r_virt, value); \ - } \ - *(type *)r_virt = (type)value; \ -} - -pt_mmio_write(b, u8) -pt_mmio_write(w, u16) -pt_mmio_write(l, u32) - -#define pt_mmio_read(suffix, type) \ -uint32_t pt_mmio_read##suffix(void *opaque, target_phys_addr_t e_phys) \ -{ \ - pt_region_t *r_access = (pt_region_t *)opaque; \ - void *r_virt = (u8 *)r_access->r_virtbase + \ - (e_phys - r_access->e_physbase); \ - uint32_t value = (u32) (*(type *) r_virt); \ - if (r_access->debug & PT_DEBUG_MMIO) { \ - fprintf(logfile, \ - "pt_mmio_read" #suffix ": e_physbase=%p " \ - "e_phys=%p r_virt=%p value=%08x\n", \ - (void *)r_access->e_physbase, \ - (void *)e_phys, r_virt, value); \ - } \ - return value; \ -} - -pt_mmio_read(b, u8) -pt_mmio_read(w, u16) -pt_mmio_read(l, u32) - #define pt_ioport_write(suffix) \ void pt_ioport_write##suffix(void *opaque, uint32_t addr, uint32_t value) \ { \ @@ -127,22 +74,33 @@ pt_ioport_read(b) pt_ioport_read(w) pt_ioport_read(l) -static void pt_iomem_map(PCIDevice * d, int region_num, - uint32_t e_phys, uint32_t e_size, int type) +void pt_iomem_map(PCIDevice * pci_dev, int region_num, uint32_t e_phys, + uint32_t e_size, int type) { - pt_dev_t *r_dev = (pt_dev_t *) d; - - r_dev->v_addrs[region_num].e_physbase = e_phys; + pt_dev_t *r_dev = (pt_dev_t *) pci_dev; + pt_region_t *region = &r_dev->v_addrs[region_num]; + int first_map = (region->e_size == 0); + int ret = 0; DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n", e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size, region_num); - cpu_register_physical_memory(e_phys, - r_dev->dev.io_regions[region_num].size, - r_dev->v_addrs[region_num].memory_index); -} + region->e_physbase = e_phys; + region->e_size = e_size; + + if (!first_map) + kvm_destroy_phys_mem(kvm_context, e_phys, e_size); + if (e_size > 0) + ret = kvm_register_userspace_phys_mem(kvm_context, + e_phys, + region->r_virtbase, + e_size, + 0); + if (ret != 0) + fprintf(logfile, "Error: create new mapping failed\n"); +} static void pt_ioport_map(PCIDevice * pci_dev, int region_num, uint32_t addr, uint32_t size, int type) @@ -265,6 +223,8 @@ static int pt_register_regions(pci_region_t * io_regions, (uint32_t) (cur_region->base_addr)); return (-1); } + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; /* add offset */ pci_dev->v_addrs[i].r_virtbase += @@ -274,11 +234,6 @@ static int pt_register_regions(pci_region_t * io_regions, cur_region->size, t, pt_iomem_map); - pci_dev->v_addrs[i].memory_index = - cpu_register_io_memory(0, pt_mmio_read_cb, - pt_mmio_write_cb, - (void *) &(pci_dev->v_addrs[i])); - continue; } /* handle port io regions */ diff --git a/qemu/hw/pci-passthrough.h b/qemu/hw/pci-passthrough.h index 012014a..49db1d2 100644 --- a/qemu/hw/pci-passthrough.h +++ b/qemu/hw/pci-passthrough.h @@ -54,6 +54,8 @@ typedef struct pt_region_s { uint32_t memory_index; void *r_virtbase; /* mmapped access address */ int num; /* our index within v_addrs[] */ + uint32_t e_size; /* emulated size of region in bytes */ + uint32_t r_size; /* real size of region in bytes */ uint32_t debug; } pt_region_t; -- 1.5.4.5 ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. 2008-04-16 13:26 [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly benami @ 2008-04-16 13:26 ` benami 2008-04-18 15:56 ` Avi Kivity 1 sibling, 0 replies; 9+ messages in thread From: benami @ 2008-04-16 13:26 UTC (permalink / raw) To: amit.shah; +Cc: kvm-devel, allen.m.kay, andrea, benami From: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> --- libkvm/libkvm.c | 24 ++++++++---- qemu/hw/pci-passthrough.c | 89 +++++++++++---------------------------------- qemu/hw/pci-passthrough.h | 2 + 3 files changed, 40 insertions(+), 75 deletions(-) diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c index de91328..8c02af9 100644 --- a/libkvm/libkvm.c +++ b/libkvm/libkvm.c @@ -400,7 +400,7 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start, { int r; int prot = PROT_READ; - void *ptr; + void *ptr = NULL; struct kvm_userspace_memory_region memory = { .memory_size = len, .guest_phys_addr = phys_start, @@ -410,16 +410,24 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start, if (writable) prot |= PROT_WRITE; - ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); - if (ptr == MAP_FAILED) { - fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno)); - return 0; - } + if (len > 0) { + ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (ptr == MAP_FAILED) { + fprintf(stderr, "create_userspace_phys_mem: %s", + strerror(errno)); + return 0; + } - memset(ptr, 0, len); + memset(ptr, 0, len); + } memory.userspace_addr = (unsigned long)ptr; - memory.slot = get_free_slot(kvm); + + if (len > 0) + memory.slot = get_free_slot(kvm); + else + memory.slot = get_slot(phys_start); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); if (r == -1) { fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno)); diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c index 7ffcc7b..a5894d9 100644 --- a/qemu/hw/pci-passthrough.c +++ b/qemu/hw/pci-passthrough.c @@ -25,18 +25,6 @@ typedef __u64 resource_size_t; extern kvm_context_t kvm_context; extern FILE *logfile; -CPUReadMemoryFunc *pt_mmio_read_cb[3] = { - pt_mmio_readb, - pt_mmio_readw, - pt_mmio_readl -}; - -CPUWriteMemoryFunc *pt_mmio_write_cb[3] = { - pt_mmio_writeb, - pt_mmio_writew, - pt_mmio_writel -}; - //#define PT_DEBUG #ifdef PT_DEBUG @@ -45,47 +33,6 @@ CPUWriteMemoryFunc *pt_mmio_write_cb[3] = { #define DEBUG(fmt, args...) #endif -#define pt_mmio_write(suffix, type) \ -void pt_mmio_write##suffix(void *opaque, target_phys_addr_t e_phys, \ - uint32_t value) \ -{ \ - pt_region_t *r_access = (pt_region_t *)opaque; \ - void *r_virt = (u8 *)r_access->r_virtbase + \ - (e_phys - r_access->e_physbase); \ - if (r_access->debug & PT_DEBUG_MMIO) { \ - fprintf(logfile, "pt_mmio_write" #suffix \ - ": e_physbase=%p e_phys=%p r_virt=%p value=%08x\n", \ - (void *)r_access->e_physbase, (void *)e_phys, \ - r_virt, value); \ - } \ - *(type *)r_virt = (type)value; \ -} - -pt_mmio_write(b, u8) -pt_mmio_write(w, u16) -pt_mmio_write(l, u32) - -#define pt_mmio_read(suffix, type) \ -uint32_t pt_mmio_read##suffix(void *opaque, target_phys_addr_t e_phys) \ -{ \ - pt_region_t *r_access = (pt_region_t *)opaque; \ - void *r_virt = (u8 *)r_access->r_virtbase + \ - (e_phys - r_access->e_physbase); \ - uint32_t value = (u32) (*(type *) r_virt); \ - if (r_access->debug & PT_DEBUG_MMIO) { \ - fprintf(logfile, \ - "pt_mmio_read" #suffix ": e_physbase=%p " \ - "e_phys=%p r_virt=%p value=%08x\n", \ - (void *)r_access->e_physbase, \ - (void *)e_phys, r_virt, value); \ - } \ - return value; \ -} - -pt_mmio_read(b, u8) -pt_mmio_read(w, u16) -pt_mmio_read(l, u32) - #define pt_ioport_write(suffix) \ void pt_ioport_write##suffix(void *opaque, uint32_t addr, uint32_t value) \ { \ @@ -127,22 +74,33 @@ pt_ioport_read(b) pt_ioport_read(w) pt_ioport_read(l) -static void pt_iomem_map(PCIDevice * d, int region_num, - uint32_t e_phys, uint32_t e_size, int type) +void pt_iomem_map(PCIDevice * pci_dev, int region_num, uint32_t e_phys, + uint32_t e_size, int type) { - pt_dev_t *r_dev = (pt_dev_t *) d; - - r_dev->v_addrs[region_num].e_physbase = e_phys; + pt_dev_t *r_dev = (pt_dev_t *) pci_dev; + pt_region_t *region = &r_dev->v_addrs[region_num]; + int first_map = (region->e_size == 0); + int ret = 0; DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n", e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size, region_num); - cpu_register_physical_memory(e_phys, - r_dev->dev.io_regions[region_num].size, - r_dev->v_addrs[region_num].memory_index); -} + region->e_physbase = e_phys; + region->e_size = e_size; + + if (!first_map) + kvm_destroy_phys_mem(kvm_context, e_phys, e_size); + if (e_size > 0) + ret = kvm_register_userspace_phys_mem(kvm_context, + e_phys, + region->r_virtbase, + e_size, + 0); + if (ret != 0) + fprintf(logfile, "Error: create new mapping failed\n"); +} static void pt_ioport_map(PCIDevice * pci_dev, int region_num, uint32_t addr, uint32_t size, int type) @@ -265,6 +223,8 @@ static int pt_register_regions(pci_region_t * io_regions, (uint32_t) (cur_region->base_addr)); return (-1); } + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; /* add offset */ pci_dev->v_addrs[i].r_virtbase += @@ -274,11 +234,6 @@ static int pt_register_regions(pci_region_t * io_regions, cur_region->size, t, pt_iomem_map); - pci_dev->v_addrs[i].memory_index = - cpu_register_io_memory(0, pt_mmio_read_cb, - pt_mmio_write_cb, - (void *) &(pci_dev->v_addrs[i])); - continue; } /* handle port io regions */ diff --git a/qemu/hw/pci-passthrough.h b/qemu/hw/pci-passthrough.h index 012014a..49db1d2 100644 --- a/qemu/hw/pci-passthrough.h +++ b/qemu/hw/pci-passthrough.h @@ -54,6 +54,8 @@ typedef struct pt_region_s { uint32_t memory_index; void *r_virtbase; /* mmapped access address */ int num; /* our index within v_addrs[] */ + uint32_t e_size; /* emulated size of region in bytes */ + uint32_t r_size; /* real size of region in bytes */ uint32_t debug; } pt_region_t; -- 1.5.4.5 ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. 2008-04-16 13:26 [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly benami 2008-04-16 13:26 ` benami @ 2008-04-18 15:56 ` Avi Kivity 2008-04-19 14:56 ` Muli Ben-Yehuda 1 sibling, 1 reply; 9+ messages in thread From: Avi Kivity @ 2008-04-18 15:56 UTC (permalink / raw) To: benami; +Cc: kvm-devel, andrea, allen.m.kay benami@il.ibm.com wrote: > From: Ben-Ami Yassour <benami@il.ibm.com> > > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> > Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> > --- > libkvm/libkvm.c | 24 ++++++++---- > qemu/hw/pci-passthrough.c | 89 +++++++++++---------------------------------- > qemu/hw/pci-passthrough.h | 2 + > 3 files changed, 40 insertions(+), 75 deletions(-) > > diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c > index de91328..8c02af9 100644 > --- a/libkvm/libkvm.c > +++ b/libkvm/libkvm.c > @@ -400,7 +400,7 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start, > { > int r; > int prot = PROT_READ; > - void *ptr; > + void *ptr = NULL; > struct kvm_userspace_memory_region memory = { > .memory_size = len, > .guest_phys_addr = phys_start, > @@ -410,16 +410,24 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start, > if (writable) > prot |= PROT_WRITE; > > - ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); > - if (ptr == MAP_FAILED) { > - fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno)); > - return 0; > - } > + if (len > 0) { > + ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); > + if (ptr == MAP_FAILED) { > + fprintf(stderr, "create_userspace_phys_mem: %s", > + strerror(errno)); > + return 0; > + } > > - memset(ptr, 0, len); > + memset(ptr, 0, len); > + } > > memory.userspace_addr = (unsigned long)ptr; > - memory.slot = get_free_slot(kvm); > + > + if (len > 0) > + memory.slot = get_free_slot(kvm); > + else > + memory.slot = get_slot(phys_start); > + > r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); > if (r == -1) { > fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno)); > This looks like support for zero-length memory slots? Why is it needed? It needs to be in a separate patch. > diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c > index 7ffcc7b..a5894d9 100644 > --- a/qemu/hw/pci-passthrough.c > +++ b/qemu/hw/pci-passthrough.c > @@ -25,18 +25,6 @@ typedef __u64 resource_size_t; > extern kvm_context_t kvm_context; > extern FILE *logfile; > > -CPUReadMemoryFunc *pt_mmio_read_cb[3] = { > - pt_mmio_readb, > - pt_mmio_readw, > - pt_mmio_readl > -}; > - > -CPUWriteMemoryFunc *pt_mmio_write_cb[3] = { > - pt_mmio_writeb, > - pt_mmio_writew, > - pt_mmio_writel > -}; > - > There's at least one use case for keeping mmio in userspace: reverse-engineering a device driver. So if it doesn't cause too much trouble, please keep this an option. -- Any sufficiently difficult bug is indistinguishable from a feature. ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. 2008-04-18 15:56 ` Avi Kivity @ 2008-04-19 14:56 ` Muli Ben-Yehuda 0 siblings, 0 replies; 9+ messages in thread From: Muli Ben-Yehuda @ 2008-04-19 14:56 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel, allen.m.kay, andrea, Ben-Ami Yassour1 On Fri, Apr 18, 2008 at 06:56:41PM +0300, Avi Kivity wrote: > benami@il.ibm.com wrote: > > From: Ben-Ami Yassour <benami@il.ibm.com> > > > > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> > > Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> > > --- > > libkvm/libkvm.c | 24 ++++++++---- > > qemu/hw/pci-passthrough.c | 89 +++++++++++---------------------------------- > > qemu/hw/pci-passthrough.h | 2 + > > 3 files changed, 40 insertions(+), 75 deletions(-) > > > > diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c > > index de91328..8c02af9 100644 > > --- a/libkvm/libkvm.c > > +++ b/libkvm/libkvm.c > > @@ -400,7 +400,7 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start, > > { > > int r; > > int prot = PROT_READ; > > - void *ptr; > > + void *ptr = NULL; > > struct kvm_userspace_memory_region memory = { > > .memory_size = len, > > .guest_phys_addr = phys_start, > > @@ -410,16 +410,24 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start, > > if (writable) > > prot |= PROT_WRITE; > > > > - ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); > > - if (ptr == MAP_FAILED) { > > - fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno)); > > - return 0; > > - } > > + if (len > 0) { > > + ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); > > + if (ptr == MAP_FAILED) { > > + fprintf(stderr, "create_userspace_phys_mem: %s", > > + strerror(errno)); > > + return 0; > > + } > > > > - memset(ptr, 0, len); > > + memset(ptr, 0, len); > > + } > > > > memory.userspace_addr = (unsigned long)ptr; > > - memory.slot = get_free_slot(kvm); > > + > > + if (len > 0) > > + memory.slot = get_free_slot(kvm); > > + else > > + memory.slot = get_slot(phys_start); > > + > > r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); > > if (r == -1) { > > fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno)); > > > > This looks like support for zero-length memory slots? Why is it > needed? > > It needs to be in a separate patch. We need an interface to remove a memslot. When the guest writes to a direct assigned device's BAR and changes an MMIO region, we need to remove the old slot and establish a new one. The kernel side treats 0-sized memslot as "remove", but the userspace side didn't quite handle it properly. Personally I would've preferred a proper "remove" interface, rather than shoehorning it into kvm_create_userspace_phys_mem and a 0-sized slot. If that's acceptable, we'll put together a patch. > > diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c > > index 7ffcc7b..a5894d9 100644 > > --- a/qemu/hw/pci-passthrough.c > > +++ b/qemu/hw/pci-passthrough.c > > @@ -25,18 +25,6 @@ typedef __u64 resource_size_t; > > extern kvm_context_t kvm_context; > > extern FILE *logfile; > > > > -CPUReadMemoryFunc *pt_mmio_read_cb[3] = { > > - pt_mmio_readb, > > - pt_mmio_readw, > > - pt_mmio_readl > > -}; > > - > > -CPUWriteMemoryFunc *pt_mmio_write_cb[3] = { > > - pt_mmio_writeb, > > - pt_mmio_writew, > > - pt_mmio_writel > > -}; > > - > > > > There's at least one use case for keeping mmio in userspace: > reverse-engineering a device driver. So if it doesn't cause too much > trouble, please keep this an option. I don't think it's a big deal to support both, although I'm not sure how useful it would be (especially considering mmiotrace). Did you have a user-interface for specifying it in mind? Cheers, Muli ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply [flat|nested] 9+ messages in thread
* direct mmio for passthrough - kernel part @ 2008-04-16 13:25 benami 2008-04-16 13:25 ` [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly benami 0 siblings, 1 reply; 9+ messages in thread From: benami @ 2008-04-16 13:25 UTC (permalink / raw) To: amit.shah; +Cc: kvm-devel, allen.m.kay, andrea, benami This patch for PCI passthrough devices enables a guest to access a device's memory mapped I/O regions directly, without requiring the host to trap and emulate every MMIO access. Updated from last version: we create a memory slot for each MMIO region of the guest's devices, and then use the /sys/bus/pci/.../resource# mapping to find the hfn for that MMIO region. The kernel part and the userspace part of this patchset apply to Amit's pv-dma tree. Tested on a Lenovo M57p with an e1000 NIC assigned directly to an FC8 guest. Comments are appreciated. ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. 2008-04-16 13:25 direct mmio for passthrough - kernel part benami @ 2008-04-16 13:25 ` benami 2008-04-16 13:25 ` benami 2008-04-18 15:50 ` Avi Kivity 0 siblings, 2 replies; 9+ messages in thread From: benami @ 2008-04-16 13:25 UTC (permalink / raw) To: amit.shah; +Cc: kvm-devel, allen.m.kay, andrea, benami From: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> --- arch/x86/kvm/mmu.c | 59 +++++++++++++++++++++++++++++-------------- arch/x86/kvm/paging_tmpl.h | 19 +++++++++---- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 17 +++++++++++- 4 files changed, 69 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 078a7f1..c89029d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -112,6 +112,8 @@ static int dbg = 1; #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 +#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define PT64_LEVEL_BITS 9 @@ -237,6 +239,9 @@ static int is_dirty_pte(unsigned long pte) static int is_rmap_pte(u64 pte) { + if (pte & PT_SHADOW_IO_MARK) + return false; + return is_shadow_present_pte(pte); } @@ -1034,7 +1039,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, int *ptwrite, int largepage, gfn_t gfn, - pfn_t pfn, bool speculative) + pfn_t pfn, bool speculative, + int direct_mmio) { u64 spte; int was_rmapped = 0; @@ -1114,6 +1120,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } + if (direct_mmio) + spte |= PT_SHADOW_IO_MARK; + unshadowed: if (pte_access & ACC_WRITE_MASK) @@ -1129,16 +1138,19 @@ unshadowed: ++vcpu->kvm->stat.lpages; page_header_update_slot(vcpu->kvm, shadow_pte, gfn); - if (!was_rmapped) { - rmap_add(vcpu, shadow_pte, gfn, largepage); - if (!is_rmap_pte(*shadow_pte)) - kvm_release_pfn_clean(pfn); - } else { - if (was_writeble) - kvm_release_pfn_dirty(pfn); - else - kvm_release_pfn_clean(pfn); + if (!direct_mmio) { + if (!was_rmapped) { + rmap_add(vcpu, shadow_pte, gfn, largepage); + if (!is_rmap_pte(*shadow_pte)) + kvm_release_pfn_clean(pfn); + } else { + if (was_writeble) + kvm_release_pfn_dirty(pfn); + else + kvm_release_pfn_clean(pfn); + } } + if (!ptwrite || !*ptwrite) vcpu->arch.last_pte_updated = shadow_pte; } @@ -1149,7 +1161,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int largepage, gfn_t gfn, pfn_t pfn, - int level) + int level, int direct_mmio) { hpa_t table_addr = vcpu->arch.mmu.root_hpa; int pt_write = 0; @@ -1163,13 +1175,15 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, if (level == 1) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, 0, gfn, pfn, false); + 0, write, 1, &pt_write, 0, gfn, pfn, + false, direct_mmio); return pt_write; } if (largepage && level == 2) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, 1, gfn, pfn, false); + 0, write, 1, &pt_write, 1, gfn, pfn, + false, direct_mmio); return pt_write; } @@ -1200,6 +1214,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) int r; int largepage = 0; pfn_t pfn; + int direct_mmio = 0; down_read(¤t->mm->mmap_sem); if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { @@ -1207,10 +1222,10 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); - /* mmio */ + /* handle emulated mmio */ if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); return 1; @@ -1219,7 +1234,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, v, write, largepage, gfn, pfn, - PT32E_ROOT_LEVEL); + PT32E_ROOT_LEVEL, direct_mmio); spin_unlock(&vcpu->kvm->mmu_lock); @@ -1355,6 +1370,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, int r; int largepage = 0; gfn_t gfn = gpa >> PAGE_SHIFT; + int direct_mmio = 0; ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); @@ -1368,7 +1384,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); @@ -1377,7 +1393,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, - largepage, gfn, pfn, TDP_ROOT_LEVEL); + largepage, gfn, pfn, TDP_ROOT_LEVEL, + direct_mmio); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -1643,6 +1660,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int r; u64 gpte = 0; pfn_t pfn; + int direct_mmio = 0; vcpu->arch.update_pte.largepage = 0; @@ -1678,9 +1696,12 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); vcpu->arch.update_pte.largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); + if (direct_mmio) + return; + if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); return; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 156fe10..e85d8ae 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -264,9 +264,10 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (is_error_pfn(pfn)) return; kvm_get_pfn(pfn); + mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), - pfn, true); + pfn, true, false); } /* @@ -275,7 +276,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *walker, int user_fault, int write_fault, int largepage, - int *ptwrite, pfn_t pfn) + int *ptwrite, pfn_t pfn, int direct_mmio) { hpa_t shadow_addr; int level; @@ -349,11 +350,15 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, user_fault, write_fault, walker->ptes[walker->level-1] & PT_DIRTY_MASK, - ptwrite, largepage, walker->gfn, pfn, false); + ptwrite, largepage, walker->gfn, pfn, false, + direct_mmio); return shadow_ent; } +static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr); + + /* * Page fault handler. There are several causes for a page fault: * - there is no shadow pte for the guest pte @@ -380,6 +385,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int r; pfn_t pfn; int largepage = 0; + int direct_mmio = 0; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); @@ -413,10 +419,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, largepage = 1; } } - pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); + pfn = gfn_to_pfn(vcpu->kvm, walker.gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); - /* mmio */ + /* handle emulated mmio */ if (is_error_pfn(pfn)) { pgprintk("gfn %x is mmio\n", walker.gfn); kvm_release_pfn_clean(pfn); @@ -426,7 +432,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, - largepage, &write_pt, pfn); + largepage, &write_pt, pfn, + direct_mmio); pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, shadow_pte, *shadow_pte, write_pt); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 578c363..0910cc1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -173,7 +173,7 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *direct_mmio); void kvm_release_pfn_dirty(pfn_t); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a52c08..07b95f7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -526,20 +526,33 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) /* * Requires current->mm->mmap_sem to be held */ -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *direct_mmio) { struct page *page[1]; unsigned long addr; int npages; + struct vm_area_struct *vma; might_sleep(); + if (direct_mmio) + *direct_mmio = 0; + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) { get_page(bad_page); return page_to_pfn(bad_page); } + /* handle mmio */ + vma = find_vma(current->mm, addr); + if (vma->vm_flags & VM_IO) { + if (direct_mmio) + *direct_mmio = 1; + + return ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + } + npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, NULL); @@ -555,7 +568,7 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { - return pfn_to_page(gfn_to_pfn(kvm, gfn)); + return pfn_to_page(gfn_to_pfn(kvm, gfn, NULL)); } EXPORT_SYMBOL_GPL(gfn_to_page); -- 1.5.4.5 ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. 2008-04-16 13:25 ` [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly benami @ 2008-04-16 13:25 ` benami 2008-04-18 15:50 ` Avi Kivity 1 sibling, 0 replies; 9+ messages in thread From: benami @ 2008-04-16 13:25 UTC (permalink / raw) To: amit.shah; +Cc: kvm-devel, allen.m.kay, andrea, benami From: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> --- arch/x86/kvm/mmu.c | 59 +++++++++++++++++++++++++++++-------------- arch/x86/kvm/paging_tmpl.h | 19 +++++++++---- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 17 +++++++++++- 4 files changed, 69 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 078a7f1..c89029d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -112,6 +112,8 @@ static int dbg = 1; #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 +#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define PT64_LEVEL_BITS 9 @@ -237,6 +239,9 @@ static int is_dirty_pte(unsigned long pte) static int is_rmap_pte(u64 pte) { + if (pte & PT_SHADOW_IO_MARK) + return false; + return is_shadow_present_pte(pte); } @@ -1034,7 +1039,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, int *ptwrite, int largepage, gfn_t gfn, - pfn_t pfn, bool speculative) + pfn_t pfn, bool speculative, + int direct_mmio) { u64 spte; int was_rmapped = 0; @@ -1114,6 +1120,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } + if (direct_mmio) + spte |= PT_SHADOW_IO_MARK; + unshadowed: if (pte_access & ACC_WRITE_MASK) @@ -1129,16 +1138,19 @@ unshadowed: ++vcpu->kvm->stat.lpages; page_header_update_slot(vcpu->kvm, shadow_pte, gfn); - if (!was_rmapped) { - rmap_add(vcpu, shadow_pte, gfn, largepage); - if (!is_rmap_pte(*shadow_pte)) - kvm_release_pfn_clean(pfn); - } else { - if (was_writeble) - kvm_release_pfn_dirty(pfn); - else - kvm_release_pfn_clean(pfn); + if (!direct_mmio) { + if (!was_rmapped) { + rmap_add(vcpu, shadow_pte, gfn, largepage); + if (!is_rmap_pte(*shadow_pte)) + kvm_release_pfn_clean(pfn); + } else { + if (was_writeble) + kvm_release_pfn_dirty(pfn); + else + kvm_release_pfn_clean(pfn); + } } + if (!ptwrite || !*ptwrite) vcpu->arch.last_pte_updated = shadow_pte; } @@ -1149,7 +1161,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int largepage, gfn_t gfn, pfn_t pfn, - int level) + int level, int direct_mmio) { hpa_t table_addr = vcpu->arch.mmu.root_hpa; int pt_write = 0; @@ -1163,13 +1175,15 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, if (level == 1) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, 0, gfn, pfn, false); + 0, write, 1, &pt_write, 0, gfn, pfn, + false, direct_mmio); return pt_write; } if (largepage && level == 2) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, 1, gfn, pfn, false); + 0, write, 1, &pt_write, 1, gfn, pfn, + false, direct_mmio); return pt_write; } @@ -1200,6 +1214,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) int r; int largepage = 0; pfn_t pfn; + int direct_mmio = 0; down_read(¤t->mm->mmap_sem); if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { @@ -1207,10 +1222,10 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); - /* mmio */ + /* handle emulated mmio */ if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); return 1; @@ -1219,7 +1234,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, v, write, largepage, gfn, pfn, - PT32E_ROOT_LEVEL); + PT32E_ROOT_LEVEL, direct_mmio); spin_unlock(&vcpu->kvm->mmu_lock); @@ -1355,6 +1370,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, int r; int largepage = 0; gfn_t gfn = gpa >> PAGE_SHIFT; + int direct_mmio = 0; ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); @@ -1368,7 +1384,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); @@ -1377,7 +1393,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, - largepage, gfn, pfn, TDP_ROOT_LEVEL); + largepage, gfn, pfn, TDP_ROOT_LEVEL, + direct_mmio); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -1643,6 +1660,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int r; u64 gpte = 0; pfn_t pfn; + int direct_mmio = 0; vcpu->arch.update_pte.largepage = 0; @@ -1678,9 +1696,12 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); vcpu->arch.update_pte.largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); + if (direct_mmio) + return; + if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); return; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 156fe10..e85d8ae 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -264,9 +264,10 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (is_error_pfn(pfn)) return; kvm_get_pfn(pfn); + mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), - pfn, true); + pfn, true, false); } /* @@ -275,7 +276,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *walker, int user_fault, int write_fault, int largepage, - int *ptwrite, pfn_t pfn) + int *ptwrite, pfn_t pfn, int direct_mmio) { hpa_t shadow_addr; int level; @@ -349,11 +350,15 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, user_fault, write_fault, walker->ptes[walker->level-1] & PT_DIRTY_MASK, - ptwrite, largepage, walker->gfn, pfn, false); + ptwrite, largepage, walker->gfn, pfn, false, + direct_mmio); return shadow_ent; } +static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr); + + /* * Page fault handler. There are several causes for a page fault: * - there is no shadow pte for the guest pte @@ -380,6 +385,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int r; pfn_t pfn; int largepage = 0; + int direct_mmio = 0; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); @@ -413,10 +419,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, largepage = 1; } } - pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); + pfn = gfn_to_pfn(vcpu->kvm, walker.gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); - /* mmio */ + /* handle emulated mmio */ if (is_error_pfn(pfn)) { pgprintk("gfn %x is mmio\n", walker.gfn); kvm_release_pfn_clean(pfn); @@ -426,7 +432,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, - largepage, &write_pt, pfn); + largepage, &write_pt, pfn, + direct_mmio); pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, shadow_pte, *shadow_pte, write_pt); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 578c363..0910cc1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -173,7 +173,7 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *direct_mmio); void kvm_release_pfn_dirty(pfn_t); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a52c08..07b95f7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -526,20 +526,33 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) /* * Requires current->mm->mmap_sem to be held */ -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *direct_mmio) { struct page *page[1]; unsigned long addr; int npages; + struct vm_area_struct *vma; might_sleep(); + if (direct_mmio) + *direct_mmio = 0; + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) { get_page(bad_page); return page_to_pfn(bad_page); } + /* handle mmio */ + vma = find_vma(current->mm, addr); + if (vma->vm_flags & VM_IO) { + if (direct_mmio) + *direct_mmio = 1; + + return ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + } + npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, NULL); @@ -555,7 +568,7 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { - return pfn_to_page(gfn_to_pfn(kvm, gfn)); + return pfn_to_page(gfn_to_pfn(kvm, gfn, NULL)); } EXPORT_SYMBOL_GPL(gfn_to_page); -- 1.5.4.5 ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. 2008-04-16 13:25 ` [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly benami 2008-04-16 13:25 ` benami @ 2008-04-18 15:50 ` Avi Kivity 2008-04-19 14:35 ` Muli Ben-Yehuda 1 sibling, 1 reply; 9+ messages in thread From: Avi Kivity @ 2008-04-18 15:50 UTC (permalink / raw) To: benami; +Cc: kvm-devel, andrea, allen.m.kay benami@il.ibm.com wrote: > From: Ben-Ami Yassour <benami@il.ibm.com> > > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> > Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> > --- > arch/x86/kvm/mmu.c | 59 +++++++++++++++++++++++++++++-------------- > arch/x86/kvm/paging_tmpl.h | 19 +++++++++---- > include/linux/kvm_host.h | 2 +- > virt/kvm/kvm_main.c | 17 +++++++++++- > 4 files changed, 69 insertions(+), 28 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 078a7f1..c89029d 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -112,6 +112,8 @@ static int dbg = 1; > #define PT_FIRST_AVAIL_BITS_SHIFT 9 > #define PT64_SECOND_AVAIL_BITS_SHIFT 52 > > +#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) > + > Please rename this PT_SHADOW_MMIO_MASK. > #define VALID_PAGE(x) ((x) != INVALID_PAGE) > > #define PT64_LEVEL_BITS 9 > @@ -237,6 +239,9 @@ static int is_dirty_pte(unsigned long pte) > > static int is_rmap_pte(u64 pte) > { > + if (pte & PT_SHADOW_IO_MARK) > + return false; > + > return is_shadow_present_pte(pte); > } > Why avoid rmap on mmio pages? Sure it's unnecessary work, but having less cases improves overall reliability. You can use pfn_valid() in gfn_to_pfn() and kvm_release_pfn_*() to conditionally update the page refcounts. -- Any sufficiently difficult bug is indistinguishable from a feature. ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. 2008-04-18 15:50 ` Avi Kivity @ 2008-04-19 14:35 ` Muli Ben-Yehuda 2008-04-20 10:29 ` Avi Kivity 0 siblings, 1 reply; 9+ messages in thread From: Muli Ben-Yehuda @ 2008-04-19 14:35 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel, allen.m.kay, andrea, Ben-Ami Yassour1 On Fri, Apr 18, 2008 at 06:50:03PM +0300, Avi Kivity wrote: > benami@il.ibm.com wrote: > > From: Ben-Ami Yassour <benami@il.ibm.com> > > > > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> > > Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> > > --- > > arch/x86/kvm/mmu.c | 59 +++++++++++++++++++++++++++++-------------- > > arch/x86/kvm/paging_tmpl.h | 19 +++++++++---- > > include/linux/kvm_host.h | 2 +- > > virt/kvm/kvm_main.c | 17 +++++++++++- > > 4 files changed, 69 insertions(+), 28 deletions(-) > > > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > > index 078a7f1..c89029d 100644 > > --- a/arch/x86/kvm/mmu.c > > +++ b/arch/x86/kvm/mmu.c > > @@ -112,6 +112,8 @@ static int dbg = 1; > > #define PT_FIRST_AVAIL_BITS_SHIFT 9 > > #define PT64_SECOND_AVAIL_BITS_SHIFT 52 > > > > +#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) > > + > > > > Please rename this PT_SHADOW_MMIO_MASK. Sure thing. > > #define VALID_PAGE(x) ((x) != INVALID_PAGE) > > > > #define PT64_LEVEL_BITS 9 > > @@ -237,6 +239,9 @@ static int is_dirty_pte(unsigned long pte) > > > > static int is_rmap_pte(u64 pte) > > { > > + if (pte & PT_SHADOW_IO_MARK) > > + return false; > > + > > return is_shadow_present_pte(pte); > > } > > > > Why avoid rmap on mmio pages? Sure it's unnecessary work, but > having less cases improves overall reliability. The rmap functions already have a check to bail out if the pte is not an rmap pte, so in that sense, we aren't adding a new case for the code to handle, just adding direct MMIO ptes to the existing list of non-rmap ptes. > You can use pfn_valid() in gfn_to_pfn() and kvm_release_pfn_*() to > conditionally update the page refcounts. Since rmap isn't useful for direct MMIO ptes, doesn't it make more sense to "bail out" early rather than in the bowls of the rmap code? Chag Same'ach Muli ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly. 2008-04-19 14:35 ` Muli Ben-Yehuda @ 2008-04-20 10:29 ` Avi Kivity 0 siblings, 0 replies; 9+ messages in thread From: Avi Kivity @ 2008-04-20 10:29 UTC (permalink / raw) To: Muli Ben-Yehuda; +Cc: kvm-devel, allen.m.kay, andrea, Ben-Ami Yassour1 Muli Ben-Yehuda wrote: >>> >>> >> Why avoid rmap on mmio pages? Sure it's unnecessary work, but >> having less cases improves overall reliability. >> > > The rmap functions already have a check to bail out if the pte is not > an rmap pte, so in that sense, we aren't adding a new case for the > code to handle, just adding direct MMIO ptes to the existing list of > non-rmap ptes. > > I'm worried about the huge chain of direct_mmio parameters passed to functions, impact on the audit code (at the end of mmu.c, and the poor souls who debug the mmu. >> You can use pfn_valid() in gfn_to_pfn() and kvm_release_pfn_*() to >> conditionally update the page refcounts. >> > > Since rmap isn't useful for direct MMIO ptes, doesn't it make more > sense to "bail out" early rather than in the bowls of the rmap code? > It does, from a purist point of view (which also favors explicit parameters a la direct_mmio rather than indirect parameters like pfn_valid()), but I'm looking from the practical point of view now. With mmu notifiers, we don't need to hold the refcount at all. So presuming we drop the refcounting code completely, are any changes actually necessary here? -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2008-04-20 10:29 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-04-16 13:26 [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly benami 2008-04-16 13:26 ` benami 2008-04-18 15:56 ` Avi Kivity 2008-04-19 14:56 ` Muli Ben-Yehuda -- strict thread matches above, loose matches on Subject: below -- 2008-04-16 13:25 direct mmio for passthrough - kernel part benami 2008-04-16 13:25 ` [PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly benami 2008-04-16 13:25 ` benami 2008-04-18 15:50 ` Avi Kivity 2008-04-19 14:35 ` Muli Ben-Yehuda 2008-04-20 10:29 ` Avi Kivity
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox