All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
To: Zhi Wang <zhi.a.wang@intel.com>,
	intel-gfx@lists.freedesktop.org, igvt-g@lists.01.org
Cc: daniel.vetter@ffwll.ch, david.j.cowperthwaite@intel.com
Subject: Re: [RFC 17/29] gvt: Xen hypervisor GVT-g MPT module
Date: Thu, 28 Jan 2016 13:33:33 +0200	[thread overview]
Message-ID: <1453980813.5004.18.camel@linux.intel.com> (raw)
In-Reply-To: <1453976511-27322-18-git-send-email-zhi.a.wang@intel.com>

Hi,

See the file MAINTAINERS and add Cc: lines according to "XEN HYPERVISOR
INTERFACE". Also I think it'll be useful to split the i915 changes to a
separate patch next int he series (as the reviewer will be different).

We will have to wait for Xen maintainers to take a position on this. Is
there KVM counterparts for this stuff incoming?

On to, 2016-01-28 at 18:21 +0800, Zhi Wang wrote:
> This is the xen hypervisor MPT module which let GVT-g be able to run
> under
> Xen hypervisor.
> 

Cc: xen-devel@lists.xenproject.org
...and so on...

Regards, Joonas

> Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
> ---
>  arch/x86/include/asm/xen/hypercall.h |    7 +
>  arch/x86/include/asm/xen/interface.h |    1 +
>  arch/x86/xen/mmu.c                   |   83 +++
>  drivers/gpu/drm/i915/gvt/gvt.c       |   10 +
>  drivers/gpu/drm/i915/gvt/gvt.h       |   14 +
>  drivers/xen/Kconfig                  |    5 +
>  drivers/xen/Makefile                 |    6 +
>  drivers/xen/xengt.c                  | 1153
> ++++++++++++++++++++++++++++++++++
>  include/xen/interface/hvm/hvm_op.h   |  177 +++++-
>  include/xen/interface/hvm/ioreq.h    |  132 ++++
>  include/xen/interface/memory.h       |   28 +
>  include/xen/interface/xen.h          |  106 ++++
>  include/xen/xen-ops.h                |    5 +
>  13 files changed, 1726 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/xen/xengt.c
>  create mode 100644 include/xen/interface/hvm/ioreq.h
> 
> diff --git a/arch/x86/include/asm/xen/hypercall.h
> b/arch/x86/include/asm/xen/hypercall.h
> index 3bcdcc8..aea97e3 100644
> --- a/arch/x86/include/asm/xen/hypercall.h
> +++ b/arch/x86/include/asm/xen/hypercall.h
> @@ -459,6 +459,13 @@ HYPERVISOR_hvm_op(int op, void *arg)
>  }
>  
>  static inline int
> +HYPERVISOR_domctl(
> +        struct xen_domctl *arg)
> +{
> +        return _hypercall1(int, domctl, arg);
> +}
> +
> +static inline int
>  HYPERVISOR_tmem_op(
>  	struct tmem_op *op)
>  {
> diff --git a/arch/x86/include/asm/xen/interface.h
> b/arch/x86/include/asm/xen/interface.h
> index 6ff4986..a4ee3f4 100644
> --- a/arch/x86/include/asm/xen/interface.h
> +++ b/arch/x86/include/asm/xen/interface.h
> @@ -89,6 +89,7 @@ typedef long xen_long_t;
>  /* Guest handles for primitive C types. */
>  __DEFINE_GUEST_HANDLE(uchar, unsigned char);
>  __DEFINE_GUEST_HANDLE(uint,  unsigned int);
> +__DEFINE_GUEST_HANDLE(ulong,  unsigned long);
>  DEFINE_GUEST_HANDLE(char);
>  DEFINE_GUEST_HANDLE(int);
>  DEFINE_GUEST_HANDLE(void);
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index c913ca4..da95d45 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -2931,3 +2931,86 @@ int xen_unmap_domain_gfn_range(struct
> vm_area_struct *vma,
>  #endif
>  }
>  EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
> +
> +/* Note: here 'mfn' is actually gfn!!! */
> +struct vm_struct * xen_remap_domain_mfn_range_in_kernel(unsigned
> long mfn,
> +		int nr, unsigned domid)
> +{
> +	struct vm_struct *area;
> +	struct remap_data rmd;
> +	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
> +	int batch;
> +	unsigned long range, addr;
> +	pgprot_t prot;
> +	int err;
> +
> +	WARN_ON(in_interrupt() || irqs_disabled());
> +
> +	area = alloc_vm_area(nr << PAGE_SHIFT, NULL);
> +	if (!area)
> +		return NULL;
> +
> +	addr = (unsigned long)area->addr;
> +
> +	prot = __pgprot(pgprot_val(PAGE_KERNEL));
> +
> +	rmd.mfn = &mfn;
> +	rmd.prot = prot;
> +
> +	while (nr) {
> +		batch = min(REMAP_BATCH_SIZE, nr);
> +		range = (unsigned long)batch << PAGE_SHIFT;
> +
> +		rmd.mmu_update = mmu_update;
> +		err = apply_to_page_range(&init_mm, addr, range,
> +				remap_area_mfn_pte_fn, &rmd);
> +		if (err || HYPERVISOR_mmu_update(mmu_update, batch,
> NULL, domid) < 0)
> +			goto err;
> +
> +		nr -= batch;
> +		addr += range;
> +	}
> +
> +	xen_flush_tlb_all();
> +	return area;
> +err:
> +	free_vm_area(area);
> +	xen_flush_tlb_all();
> +	return NULL;
> +}
> +EXPORT_SYMBOL(xen_remap_domain_mfn_range_in_kernel);
> +
> +void xen_unmap_domain_mfn_range_in_kernel(struct vm_struct *area,
> int nr,
> +		unsigned domid)
> +{
> +	struct remap_data rmd;
> +	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
> +	int batch;
> +	unsigned long range, addr = (unsigned long)area->addr;
> +#define INVALID_MFN (~0UL)
> +	unsigned long invalid_mfn = INVALID_MFN;
> +	int err;
> +
> +	WARN_ON(in_interrupt() || irqs_disabled());
> +
> +	rmd.mfn = &invalid_mfn;
> +	rmd.prot = PAGE_NONE;
> +
> +	while (nr) {
> +		batch = min(REMAP_BATCH_SIZE, nr);
> +		range = (unsigned long)batch << PAGE_SHIFT;
> +
> +		rmd.mmu_update = mmu_update;
> +		err = apply_to_page_range(&init_mm, addr, range,
> +				remap_area_mfn_pte_fn, &rmd);
> +		BUG_ON(err);
> +		BUG_ON(HYPERVISOR_mmu_update(mmu_update, batch,
> NULL, domid) < 0);
> +
> +		nr -= batch;
> +		addr += range;
> +	}
> +
> +	free_vm_area(area);
> +	xen_flush_tlb_all();
> +}
> +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range_in_kernel);
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.c
> b/drivers/gpu/drm/i915/gvt/gvt.c
> index a71873c..28a51d9 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.c
> +++ b/drivers/gpu/drm/i915/gvt/gvt.c
> @@ -21,12 +21,14 @@
>   * SOFTWARE.
>   */
>  
> +#include <linux/types.h>
>  #include <xen/xen.h>
>  #include <linux/kthread.h>
>  
>  #include "gvt.h"
>  
>  struct gvt_host gvt_host;
> +EXPORT_SYMBOL(gvt_host);
>  
>  extern struct gvt_kernel_dm xengt_kdm;
>  extern struct gvt_kernel_dm kvmgt_kdm;
> @@ -36,6 +38,13 @@ static struct gvt_io_emulation_ops
> default_io_emulation_ops = {
>  	.emulate_mmio_write = gvt_emulate_mmio_write,
>  };
>  
> +unsigned int pa_to_mmio_offset(struct vgt_device *vgt,
> +               uint64_t pa);
> +
> +static struct gvt_mpt_ops default_export_mpt_ops = {
> +	.pa_to_mmio_offset = pa_to_mmio_offset,
> +};
> +
>  static const char *supported_hypervisors[] = {
>  	[GVT_HYPERVISOR_TYPE_XEN] = "Xen Hypervisor",
>  	[GVT_HYPERVISOR_TYPE_KVM] = "KVM",
> @@ -78,6 +87,7 @@ static bool gvt_init_host(void)
>  			supported_hypervisors[host-
> >hypervisor_type]);
>  
>  	host->emulate_ops = &default_io_emulation_ops;
> +	host->mpt_ops = &default_export_mpt_ops;
>  	idr_init(&host->device_idr);
>  	mutex_init(&host->device_idr_lock);
>  
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h
> b/drivers/gpu/drm/i915/gvt/gvt.h
> index eb5fd47..83f90a2 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -58,6 +58,10 @@ struct gvt_io_emulation_ops {
>  	bool (*emulate_cfg_write)(struct vgt_device *, unsigned int,
> void *, int);
>  };
>  
> +struct gvt_mpt_ops {
> +	unsigned int (*pa_to_mmio_offset)(struct vgt_device *, u64);
> +};
> +
>  struct gvt_host {
>  	bool initialized;
>  	int hypervisor_type;
> @@ -65,6 +69,7 @@ struct gvt_host {
>  	struct idr device_idr;
>  	struct gvt_kernel_dm *kdm;
>  	struct gvt_io_emulation_ops *emulate_ops;
> +	struct gvt_mpt_ops *mpt_ops;
>  };
>  
>  extern struct gvt_host gvt_host;
> @@ -123,6 +128,9 @@ struct vgt_device {
>  	struct gvt_virtual_device_state state;
>  	struct gvt_statistics stat;
>  	struct gvt_vgtt_info gtt;
> +	void *hypervisor_data;
> +	unsigned long low_mem_max_gpfn;
> +	atomic_t crashing;
>  };
>  
>  struct gvt_gm_allocator {
> @@ -423,6 +431,12 @@ static inline int gvt_pci_mmio_is_enabled(struct
> vgt_device *vgt)
>  		_REGBIT_CFG_COMMAND_MEMORY;
>  }
>  
> +static inline uint64_t gvt_mmio_bar_base(struct vgt_device *vgt)
> +{
> +        char *cfg_space = &vgt->state.cfg.space[0];
> +        return *(u64 *)(cfg_space + GVT_REG_CFG_SPACE_BAR0);
> +}
> +
>  #define __vreg(vgt, off) (*(u32*)(vgt->state.mmio.vreg + off))
>  #define __vreg8(vgt, off) (*(u8*)(vgt->state.mmio.vreg + off))
>  #define __vreg16(vgt, off) (*(u16*)(vgt->state.mmio.vreg + off))
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 73708ac..9ee2033 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -291,4 +291,9 @@ config XEN_SYMS
>  config XEN_HAVE_VPMU
>         bool
>  
> +config XENGT
> +        tristate "Xen Dom0 support for i915 gvt device model"
> +        depends on XEN_DOM0 && I915_GVT
> +        default m
> +
>  endmenu
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 9b7a35c..ff75c36 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -9,6 +9,10 @@ CFLAGS_features.o			:=
> $(nostackp)
>  
>  CFLAGS_efi.o				+= -fshort-wchar
>  
> +
> +I915                     := drivers/gpu/drm/i915
> +CFLAGS_xengt.o          += -Wall -Werror -I$(I915) -I$(I915)/gvt
> +
>  dom0-$(CONFIG_PCI) += pci.o
>  dom0-$(CONFIG_USB_SUPPORT) += dbgp.o
>  dom0-$(CONFIG_XEN_ACPI) += acpi.o $(xen-pad-y)
> @@ -36,6 +40,8 @@ obj-$(CONFIG_XEN_ACPI_PROCESSOR)	+= xen-
> acpi-processor.o
>  obj-$(CONFIG_XEN_EFI)			+= efi.o
>  obj-$(CONFIG_XEN_SCSI_BACKEND)		+= xen-scsiback.o
>  obj-$(CONFIG_XEN_AUTO_XLATE)		+= xlate_mmu.o
> +obj-$(CONFIG_XENGT)                     += xengt.o
> +
>  xen-evtchn-y				:= evtchn.o
>  xen-gntdev-y				:= gntdev.o
>  xen-gntalloc-y				:= gntalloc.o
> diff --git a/drivers/xen/xengt.c b/drivers/xen/xengt.c
> new file mode 100644
> index 0000000..6c600adc
> --- /dev/null
> +++ b/drivers/xen/xengt.c
> @@ -0,0 +1,1153 @@
> +/*
> + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of Version 2 of the GNU General Public License
> as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-
> 1301 USA.
> + */
> +
> +/*
> + * NOTE:
> + * This file contains hypervisor specific interactions to
> + * implement the concept of mediated pass-through framework.
> + * What this file provides is actually a general abstraction
> + * of in-kernel device model, which is not vgt specific.
> + *
> + * Now temporarily in vgt code. long-term this should be
> + * in hypervisor (xen/kvm) specific directory
> + */
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/types.h>
> +#include <linux/kthread.h>
> +#include <linux/time.h>
> +#include <linux/freezer.h>
> +#include <linux/wait.h>
> +#include <linux/sched.h>
> +
> +#include <asm/xen/hypercall.h>
> +#include <asm/xen/page.h>
> +#include <xen/xen-ops.h>
> +#include <xen/events.h>
> +#include <xen/interface/hvm/params.h>
> +#include <xen/interface/hvm/hvm_op.h>
> +#include <xen/interface/hvm/ioreq.h>
> +#include <xen/interface/memory.h>
> +#include <xen/interface/platform.h>
> +#include <xen/interface/vcpu.h>
> +
> +#include "gvt.h"
> +
> +MODULE_AUTHOR("Intel Corporation");
> +MODULE_DESCRIPTION("XenGT mediated passthrough driver");
> +MODULE_LICENSE("GPL");
> +MODULE_VERSION("0.1");
> +
> +#define MAX_HVM_VCPUS_SUPPORTED 128
> +struct gvt_hvm_info {
> +	/* iopage_vma->addr is just iopage. We need iopage_vma on VM
> destroy */
> +	shared_iopage_t *iopage;
> +	struct vm_struct *iopage_vma;
> +	int *evtchn_irq; /* the event channle irqs to handle HVM io
> request
> +				index is vcpu id */
> +
> +	DECLARE_BITMAP(ioreq_pending, MAX_HVM_VCPUS_SUPPORTED);
> +	wait_queue_head_t io_event_wq;
> +	struct task_struct *emulation_thread;
> +
> +	int nr_vcpu;
> +
> +	ioservid_t iosrv_id;    /* io-request server id */
> +
> +#define VMEM_1MB		(1ULL << 20)	/* the size of
> the first 1MB */
> +#define VMEM_BUCK_SHIFT		20
> +#define VMEM_BUCK_SIZE		(1ULL << VMEM_BUCK_SHIFT)
> +#define VMEM_BUCK_MASK		(~(VMEM_BUCK_SIZE - 1))
> +	uint64_t vmem_sz;
> +	/* for the 1st 1MB memory of HVM: each vm_struct means one
> 4K-page */
> +	struct vm_struct **vmem_vma_low_1mb;
> +	/* for >1MB memory of HVM: each vm_struct means 1MB */
> +	struct vm_struct **vmem_vma;
> +	/* for >1MB memory of HVM: each vm_struct means 4KB */
> +	struct vm_struct **vmem_vma_4k;
> +};
> +
> +static int xen_pause_domain(int vm_id);
> +static int xen_shutdown_domain(int vm_id);
> +static void *xen_gpa_to_va(struct vgt_device *vgt, unsigned long
> gpa);
> +
> +#define XEN_ASSERT_VM(x, vgt)					
> 	\
> +	do {								
> \
> +		if (!(x)) {						
> \
> +			printk("Assert at %s line %d\n",		
> \
> +				__FILE__, __LINE__);			
> \
> +			if (atomic_cmpxchg(&(vgt)->crashing, 0, 1))	
> \
> +				break;				
> 	\
> +			gvt_err("Killing VM%d\n", (vgt)->vm_id);	
> \
> +			if (!xen_pause_domain((vgt->vm_id)))		
> \
> +				xen_shutdown_domain((vgt->vm_id));	
> \
> +		}							
> \
> +	} while (0)
> +
> +/* Translate from VM's guest pfn to machine pfn */
> +static unsigned long xen_g2m_pfn(int vm_id, unsigned long g_pfn)
> +{
> +	struct xen_get_mfn_from_pfn pfn_arg;
> +	int rc;
> +	unsigned long pfn_list[1];
> +
> +	pfn_list[0] = g_pfn;
> +
> +	set_xen_guest_handle(pfn_arg.pfn_list, pfn_list);
> +	pfn_arg.nr_pfns = 1;
> +	pfn_arg.domid = vm_id;
> +
> +	rc = HYPERVISOR_memory_op(XENMEM_get_mfn_from_pfn,
> &pfn_arg);
> +	if(rc < 0){
> +		printk("failed to get mfn for gpfn(0x%lx)\n,
> errno=%d\n", g_pfn, rc);
> +		return INVALID_MFN;
> +	}
> +
> +	return pfn_list[0];
> +}
> +
> +static int xen_get_max_gpfn(int vm_id)
> +{
> +	domid_t dom_id = vm_id;
> +	int max_gpfn = HYPERVISOR_memory_op(XENMEM_maximum_gpfn,
> &dom_id);
> +	BUG_ON(max_gpfn < 0);
> +	return max_gpfn;
> +}
> +
> +static int xen_pause_domain(int vm_id)
> +{
> +	int rc;
> +	struct xen_domctl domctl;
> +
> +	domctl.domain = vm_id;
> +	domctl.cmd = XEN_DOMCTL_pausedomain;
> +	domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION;
> +
> +	rc = HYPERVISOR_domctl(&domctl);
> +	if (rc != 0)
> +		printk("HYPERVISOR_domctl pausedomain fail with
> %d!\n", rc);
> +
> +	return rc;
> +}
> +
> +static int xen_shutdown_domain(int vm_id)
> +{
> +	int rc;
> +	struct sched_remote_shutdown r;
> +
> +	r.reason = SHUTDOWN_crash;
> +	r.domain_id = vm_id;
> +	rc = HYPERVISOR_sched_op(SCHEDOP_remote_shutdown, &r);
> +	if (rc != 0)
> +		printk("HYPERVISOR_sched_op failed: %d\n", rc);
> +	return rc;
> +}
> +
> +static int xen_domain_iomem_perm(uint32_t domain_id, uint64_t
> first_mfn,
> +                               uint64_t nr_mfns, uint8_t
> allow_access)
> +{
> +	struct xen_domctl arg;
> +	int rc;
> +
> +	arg.domain = domain_id;
> +	arg.cmd = XEN_DOMCTL_iomem_permission;
> +	arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION;
> +	arg.u.iomem_perm.first_mfn = first_mfn;
> +	arg.u.iomem_perm.nr_mfns = nr_mfns;
> +	arg.u.iomem_perm.allow_access = allow_access;
> +	rc = HYPERVISOR_domctl(&arg);
> +
> +	return rc;
> +}
> +
> +static int xen_hvm_memory_mapping(int vm_id, uint64_t first_gfn,
> uint64_t first_mfn,
> +				  uint32_t nr_mfns, uint32_t
> add_mapping)
> +{
> +	struct xen_domctl arg;
> +	int rc;
> +
> +	if (add_mapping) {
> +		rc = xen_domain_iomem_perm(vm_id, first_mfn,
> nr_mfns, 1);
> +	        if (rc < 0) {
> +			printk(KERN_ERR "xen_domain_iomem_perm
> failed: %d\n", rc);
> +			return rc;
> +		}
> +	}
> +
> +	arg.domain = vm_id;
> +	arg.cmd = XEN_DOMCTL_memory_mapping;
> +	arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION;
> +	arg.u.memory_mapping.first_gfn = first_gfn;
> +	arg.u.memory_mapping.first_mfn = first_mfn;
> +	arg.u.memory_mapping.nr_mfns = nr_mfns;
> +	arg.u.memory_mapping.add_mapping = add_mapping;
> +
> +	rc = HYPERVISOR_domctl(&arg);
> +	if (rc < 0) {
> +		printk(KERN_ERR "HYPERVISOR_domctl failed: %d\n",
> rc);
> +		return rc;
> +	}
> +
> +	if (!add_mapping) {
> +		rc = xen_domain_iomem_perm(vm_id, first_mfn,
> nr_mfns, 0);
> +	        if (rc < 0) {
> +			printk(KERN_ERR "xen_domain_iomem_perm
> failed: %d\n", rc);
> +			return rc;
> +		}
> +	}
> +
> +	return rc;
> +}
> +
> +static int xen_map_mfn_to_gpfn(int vm_id, unsigned long gpfn,
> +	unsigned long mfn, int nr, int map, enum map_type type)
> +{
> +	int rc;
> +	rc = xen_hvm_memory_mapping(vm_id, gpfn, mfn, nr,
> +			map ? DPCI_ADD_MAPPING :
> DPCI_REMOVE_MAPPING);
> +	if (rc != 0)
> +		printk("xen_hvm_memory_mapping failed: %d\n", rc);
> +	return rc;
> +}
> +
> +static int xen_get_nr_vcpu(int vm_id)
> +{
> +	struct xen_domctl arg;
> +	int rc;
> +
> +	arg.domain = vm_id;
> +	arg.cmd = XEN_DOMCTL_getdomaininfo;
> +	arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION;
> +
> +	rc = HYPERVISOR_domctl(&arg);
> +	if (rc<0){
> +		printk(KERN_ERR "HYPERVISOR_domctl fail
> ret=%d\n",rc);
> +		/* assume it is UP */
> +		return 1;
> +	}
> +
> +	return arg.u.getdomaininfo.max_vcpu_id + 1;
> +}
> +
> +static int hvm_create_iorequest_server(struct vgt_device *vgt)
> +{
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +	struct xen_hvm_create_ioreq_server arg;
> +	int r;
> +
> +	arg.domid = vgt->vm_id;
> +	arg.handle_bufioreq = 0;
> +	r = HYPERVISOR_hvm_op(HVMOP_create_ioreq_server, &arg);
> +	if (r < 0) {
> +		printk(KERN_ERR "Cannot create io-requset server:
> %d!\n", r);
> +		return r;
> +	}
> +	info->iosrv_id = arg.id;
> +
> +	return r;
> +}
> +
> +static int hvm_toggle_iorequest_server(struct vgt_device *vgt, bool
> enable)
> +{
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +	struct xen_hvm_set_ioreq_server_state arg;
> +	int r;
> +
> +	arg.domid = vgt->vm_id;
> +	arg.id = info->iosrv_id;
> +	arg.enabled = enable;
> +	r = HYPERVISOR_hvm_op(HVMOP_set_ioreq_server_state, &arg);
> +	if (r < 0) {
> +		printk(KERN_ERR "Cannot %s io-request server:
> %d!\n",
> +			enable ? "enable" : "disbale",  r);
> +		return r;
> +	}
> +
> +       return r;
> +}
> +
> +static int hvm_get_ioreq_pfn(struct vgt_device *vgt, uint64_t
> *value)
> +{
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +	struct xen_hvm_get_ioreq_server_info arg;
> +	int r;
> +
> +	arg.domid = vgt->vm_id;
> +	arg.id = info->iosrv_id;
> +	r = HYPERVISOR_hvm_op(HVMOP_get_ioreq_server_info, &arg);
> +	if (r < 0) {
> +		printk(KERN_ERR "Cannot get ioreq pfn: %d!\n", r);
> +		return r;
> +	}
> +	*value = arg.ioreq_pfn;
> +	return r;
> +}
> +
> +static int hvm_destroy_iorequest_server(struct vgt_device *vgt)
> +{
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +	struct xen_hvm_destroy_ioreq_server arg;
> +	int r;
> +
> +	arg.domid = vgt->vm_id;
> +	arg.id = info->iosrv_id;
> +	r = HYPERVISOR_hvm_op(HVMOP_destroy_ioreq_server, &arg);
> +	if (r < 0) {
> +		printk(KERN_ERR "Cannot destroy io-request
> server(%d): %d!\n",
> +			info->iosrv_id, r);
> +		return r;
> +	}
> +	info->iosrv_id = 0;
> +
> +	return r;
> +}
> +
> +static int hvm_map_io_range_to_ioreq_server(struct vgt_device *vgt,
> +	int is_mmio, uint64_t start, uint64_t end, int map)
> +{
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +	xen_hvm_io_range_t arg;
> +	int rc;
> +
> +	arg.domid = vgt->vm_id;
> +	arg.id = info->iosrv_id;
> +	arg.type = is_mmio ? HVMOP_IO_RANGE_MEMORY :
> HVMOP_IO_RANGE_PORT;
> +	arg.start = start;
> +	arg.end = end;
> +
> +	if (map)
> +		rc =
> HYPERVISOR_hvm_op(HVMOP_map_io_range_to_ioreq_server, &arg);
> +	else
> +		rc =
> HYPERVISOR_hvm_op(HVMOP_unmap_io_range_from_ioreq_server, &arg);
> +
> +	return rc;
> +}
> +
> +static int hvm_map_pcidev_to_ioreq_server(struct vgt_device *vgt,
> uint64_t sbdf)
> +{
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +	xen_hvm_io_range_t arg;
> +	int rc;
> +
> +	arg.domid = vgt->vm_id;
> +	arg.id = info->iosrv_id;
> +	arg.type = HVMOP_IO_RANGE_PCI;
> +	arg.start = arg.end = sbdf;
> +	rc = HYPERVISOR_hvm_op(HVMOP_map_io_range_to_ioreq_server,
> &arg);
> +	if (rc < 0) {
> +		printk(KERN_ERR "Cannot map pci_dev to ioreq_server:
> %d!\n", rc);
> +		return rc;
> +	}
> +
> +	return rc;
> +}
> +
> +static int hvm_set_mem_type(struct vgt_device *vgt,
> +	uint16_t mem_type, uint64_t first_pfn, uint64_t nr)
> +{
> +	xen_hvm_set_mem_type_t args;
> +	int rc;
> +
> +	args.domid = vgt->vm_id;
> +	args.hvmmem_type = mem_type;
> +	args.first_pfn = first_pfn;
> +	args.nr = 1;
> +	rc = HYPERVISOR_hvm_op(HVMOP_set_mem_type, &args);
> +
> +	return rc;
> +}
> +
> +static int hvm_wp_page_to_ioreq_server(struct vgt_device *vgt,
> unsigned long page, int set)
> +{
> +	int rc = 0;
> +	uint64_t start, end;
> +	uint16_t mem_type;
> +
> +	start = page << PAGE_SHIFT;
> +	end = ((page + 1) << PAGE_SHIFT) - 1;
> +
> +	rc = hvm_map_io_range_to_ioreq_server(vgt, 1, start, end,
> set);
> +	if (rc < 0) {
> +		printk(KERN_ERR "Failed to %s page 0x%lx to
> ioreq_server: %d!\n",
> +			set ? "map":"unmap", page , rc);
> +		return rc;
> +	}
> +
> +	mem_type = set ? HVMMEM_mmio_write_dm : HVMMEM_ram_rw;
> +	rc = hvm_set_mem_type(vgt, mem_type, page, 1);
> +	if (rc < 0) {
> +		printk(KERN_ERR "Failed to set mem type of page
> 0x%lx to %s!\n", page,
> +			set ?
> "HVMMEM_mmio_write_dm":"HVMMEM_ram_rw");
> +		return rc;
> +	}
> +	return rc;
> +}
> +
> +static int xen_set_trap_area(struct vgt_device *vgt, uint64_t start,
> uint64_t end, bool map)
> +{
> +	if (!gvt_pci_mmio_is_enabled(vgt))
> +		return 0;
> +
> +	return hvm_map_io_range_to_ioreq_server(vgt, 1, start, end,
> map);
> +}
> +
> +static struct vm_struct *xen_map_iopage(struct vgt_device *vgt)
> +{
> +	uint64_t ioreq_pfn;
> +	int rc;
> +
> +	rc = hvm_create_iorequest_server(vgt);
> +	if (rc < 0)
> +		return NULL;
> +	rc = hvm_get_ioreq_pfn(vgt, &ioreq_pfn);
> +	if (rc < 0) {
> +		hvm_destroy_iorequest_server(vgt);
> +		return NULL;
> +	}
> +
> +	return xen_remap_domain_mfn_range_in_kernel(ioreq_pfn, 1,
> vgt->vm_id);
> +}
> +
> +static bool xen_set_guest_page_writeprotection(struct vgt_device
> *vgt,
> +		guest_page_t *guest_page)
> +{
> +	int r;
> +
> +	if (guest_page->writeprotection)
> +		return true;
> +
> +	r = hvm_wp_page_to_ioreq_server(vgt, guest_page->gfn, 1);
> +	if (r) {
> +		gvt_err("fail to set write protection.\n");
> +		return false;
> +	}
> +
> +	guest_page->writeprotection = true;
> +
> +	atomic_inc(&vgt->gtt.n_write_protected_guest_page);
> +
> +	return true;
> +}
> +
> +static bool xen_clear_guest_page_writeprotection(struct vgt_device
> *vgt,
> +		guest_page_t *guest_page)
> +{
> +	int r;
> +
> +	if (!guest_page->writeprotection)
> +		return true;
> +
> +	r = hvm_wp_page_to_ioreq_server(vgt, guest_page->gfn, 0);
> +	if (r) {
> +		gvt_err("fail to clear write protection.\n");
> +		return false;
> +	}
> +
> +	guest_page->writeprotection = false;
> +
> +	atomic_dec(&vgt->gtt.n_write_protected_guest_page);
> +
> +	return true;
> +}
> +
> +static int xen_detect_host(void)
> +{
> +	return xen_initial_domain();
> +}
> +
> +static int xen_virt_to_mfn(void *addr)
> +{
> +	return virt_to_mfn(addr);
> +}
> +
> +static void *xen_mfn_to_virt(int mfn)
> +{
> +	return mfn_to_virt(mfn);
> +}
> +
> +static int xen_inject_msi(int vm_id, u32 addr_lo, u16 data)
> +{
> +	struct xen_hvm_inject_msi info = {
> +		.domid	= vm_id,
> +		.addr	= addr_lo, /* only low addr used */
> +		.data	= data,
> +	};
> +
> +	return HYPERVISOR_hvm_op(HVMOP_inject_msi, &info);
> +}
> +
> +static int vgt_hvm_vmem_init(struct vgt_device *vgt)
> +{
> +	unsigned long i, j, gpfn, count;
> +	unsigned long nr_low_1mb_bkt, nr_high_bkt, nr_high_4k_bkt;
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +
> +	if (!vgt->vm_id)
> +		return 0;
> +
> +	ASSERT(info->vmem_vma == NULL && info->vmem_vma_low_1mb ==
> NULL);
> +
> +	info->vmem_sz = xen_get_max_gpfn(vgt->vm_id) + 1;
> +	info->vmem_sz <<= PAGE_SHIFT;
> +
> +	/* warn on non-1MB-aligned memory layout of HVM */
> +	if (info->vmem_sz & ~VMEM_BUCK_MASK)
> +		gvt_err("VM%d: vmem_sz=0x%llx!\n", vgt->vm_id, info-
> >vmem_sz);
> +
> +	nr_low_1mb_bkt = VMEM_1MB >> PAGE_SHIFT;
> +	nr_high_bkt = (info->vmem_sz >> VMEM_BUCK_SHIFT);
> +	nr_high_4k_bkt = (info->vmem_sz >> PAGE_SHIFT);
> +
> +	info->vmem_vma_low_1mb =
> +		vzalloc(sizeof(*info->vmem_vma) * nr_low_1mb_bkt);
> +	info->vmem_vma =
> +		vzalloc(sizeof(*info->vmem_vma) * nr_high_bkt);
> +	info->vmem_vma_4k =
> +		vzalloc(sizeof(*info->vmem_vma) * nr_high_4k_bkt);
> +
> +	if (info->vmem_vma_low_1mb == NULL || info->vmem_vma == NULL
> ||
> +		info->vmem_vma_4k == NULL) {
> +		gvt_err("Insufficient memory for vmem_vma,
> vmem_sz=0x%llx\n",
> +				info->vmem_sz );
> +		goto err;
> +	}
> +
> +	/* map the low 1MB memory */
> +	for (i = 0; i < nr_low_1mb_bkt; i++) {
> +		info->vmem_vma_low_1mb[i] =
> +			xen_remap_domain_mfn_range_in_kernel(i, 1,
> vgt->vm_id);
> +
> +		if (info->vmem_vma_low_1mb[i] != NULL)
> +			continue;
> +
> +		/* Don't warn on [0xa0000, 0x100000): a known non-
> RAM hole */
> +		if (i < (0xa0000 >> PAGE_SHIFT))
> +			printk(KERN_ERR "GVT: VM%d: can't map GPFN
> %ld!\n",
> +				vgt->vm_id, i);
> +	}
> +
> +	printk("start vmem_map\n");
> +	count = 0;
> +	/* map the >1MB memory */
> +	for (i = 1; i < nr_high_bkt; i++) {
> +		gpfn = i << (VMEM_BUCK_SHIFT - PAGE_SHIFT);
> +		info->vmem_vma[i] =
> xen_remap_domain_mfn_range_in_kernel(
> +				gpfn, VMEM_BUCK_SIZE >> PAGE_SHIFT,
> vgt->vm_id);
> +
> +		if (info->vmem_vma[i] != NULL)
> +			continue;
> +
> +
> +		/* for <4G GPFNs: skip the hole after
> low_mem_max_gpfn */
> +		if (gpfn < (1 << (32 - PAGE_SHIFT)) &&
> +			vgt->low_mem_max_gpfn != 0 &&
> +			gpfn > vgt->low_mem_max_gpfn)
> +			continue;
> +
> +		for (j = gpfn;
> +		     j < ((i + 1) << (VMEM_BUCK_SHIFT -
> PAGE_SHIFT));
> +		     j++) {
> +			info->vmem_vma_4k[j] =
> xen_remap_domain_mfn_range_in_kernel(j, 1, vgt->vm_id);
> +
> +			if (info->vmem_vma_4k[j]) {
> +				count++;
> +				printk(KERN_ERR "map 4k gpa
> (%lx)\n", j << PAGE_SHIFT);
> +			}
> +		}
> +
> +		/* To reduce the number of err messages(some of
> them, due to
> +		 * the MMIO hole, are spurious and harmless) we only
> print a
> +		 * message if it's at every 64MB boundary or >4GB
> memory.
> +		 */
> +		if ((i % 64 == 0) || (i >= (1ULL << (32 -
> VMEM_BUCK_SHIFT))))
> +			printk(KERN_ERR "GVT: VM%d: can't map
> %ldKB\n",
> +				vgt->vm_id, i);
> +	}
> +	printk("end vmem_map (%ld 4k mappings)\n", count);
> +
> +	return 0;
> +err:
> +	vfree(info->vmem_vma);
> +	vfree(info->vmem_vma_low_1mb);
> +	vfree(info->vmem_vma_4k);
> +	info->vmem_vma = info->vmem_vma_low_1mb = info->vmem_vma_4k
> = NULL;
> +	return -ENOMEM;
> +}
> +
> +static void vgt_vmem_destroy(struct vgt_device *vgt)
> +{
> +	int i, j;
> +	unsigned long nr_low_1mb_bkt, nr_high_bkt, nr_high_bkt_4k;
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +
> +	if (vgt->vm_id == 0)
> +		return;
> +
> +	/*
> +	 * Maybe the VM hasn't accessed GEN MMIO(e.g., still in the
> legacy VGA
> +	 * mode), so no mapping is created yet.
> +	 */
> +	if (info->vmem_vma == NULL && info->vmem_vma_low_1mb ==
> NULL)
> +		return;
> +
> +	ASSERT(info->vmem_vma != NULL && info->vmem_vma_low_1mb !=
> NULL);
> +
> +	nr_low_1mb_bkt = VMEM_1MB >> PAGE_SHIFT;
> +	nr_high_bkt = (info->vmem_sz >> VMEM_BUCK_SHIFT);
> +	nr_high_bkt_4k = (info->vmem_sz >> PAGE_SHIFT);
> +
> +	for (i = 0; i < nr_low_1mb_bkt; i++) {
> +		if (info->vmem_vma_low_1mb[i] == NULL)
> +			continue;
> +		xen_unmap_domain_mfn_range_in_kernel(info-
> >vmem_vma_low_1mb[i],
> +				1, vgt->vm_id);
> +	}
> +
> +	for (i = 1; i < nr_high_bkt; i++) {
> +		if (info->vmem_vma[i] == NULL) {
> +			for (j = (i << (VMEM_BUCK_SHIFT -
> PAGE_SHIFT));
> +			     j < ((i + 1) << (VMEM_BUCK_SHIFT -
> PAGE_SHIFT));
> +			     j++) {
> +				if (info->vmem_vma_4k[j] == NULL)
> +					continue;
> +				xen_unmap_domain_mfn_range_in_kernel
> (
> +					info->vmem_vma_4k[j], 1,
> vgt->vm_id);
> +			}
> +			continue;
> +		}
> +		xen_unmap_domain_mfn_range_in_kernel(
> +			info->vmem_vma[i], VMEM_BUCK_SIZE >>
> PAGE_SHIFT,
> +			vgt->vm_id);
> +	}
> +
> +	vfree(info->vmem_vma);
> +	vfree(info->vmem_vma_low_1mb);
> +	vfree(info->vmem_vma_4k);
> +}
> +
> +static int _hvm_mmio_emulation(struct vgt_device *vgt, struct ioreq
> *req)
> +{
> +	int i, sign;
> +	void *gva;
> +	unsigned long gpa;
> +	uint64_t base = gvt_mmio_bar_base(vgt);
> +	uint64_t tmp;
> +	int pvinfo_page;
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +
> +	if (info->vmem_vma == NULL) {
> +		tmp = gvt_host.mpt_ops->pa_to_mmio_offset(vgt, req-
> >addr);
> +		pvinfo_page = (tmp >= VGT_PVINFO_PAGE
> +				&& tmp < (VGT_PVINFO_PAGE +
> VGT_PVINFO_SIZE));
> +		/*
> +		 * hvmloader will read PVINFO to identify if HVM is
> in GVT
> +		 * or VTD. So we don't trigger HVM mapping logic
> here.
> +		 */
> +		if (!pvinfo_page && vgt_hvm_vmem_init(vgt) < 0) {
> +			gvt_err("can not map the memory of
> VM%d!!!\n", vgt->vm_id);
> +			XEN_ASSERT_VM(info->vmem_vma != NULL, vgt);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	sign = req->df ? -1 : 1;
> +
> +	if (req->dir == IOREQ_READ) {
> +		/* MMIO READ */
> +		if (!req->data_is_ptr) {
> +			if (req->count != 1)
> +				goto err_ioreq_count;
> +
> +			//vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_read:
> target register (%lx).\n",
> +			//	(unsigned long)req->addr);
> +			if (!gvt_host.emulate_ops-
> >emulate_mmio_read(vgt, req->addr, &req->data, req->size))
> +				return -EINVAL;
> +		}
> +		else {
> +			if ((req->addr + sign * req->count * req-
> >size < base)
> +			   || (req->addr + sign * req->count * req-
> >size >=
> +				base + vgt->state.cfg.bar_size[0]))
> +				goto err_ioreq_range;
> +			//vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_read:
> rep %d target memory %lx, slow!\n",
> +			//	req->count, (unsigned long)req-
> >addr);
> +
> +			for (i = 0; i < req->count; i++) {
> +				if (!gvt_host.emulate_ops-
> >emulate_mmio_read(vgt, req->addr + sign * i * req->size,
> +					&tmp, req->size))
> +					return -EINVAL;
> +				gpa = req->data + sign * i * req-
> >size;
> +				if(!vgt->vm_id)
> +					gva = (char
> *)xen_mfn_to_virt(gpa >> PAGE_SHIFT) + offset_in_page(gpa);
> +				else
> +					gva = xen_gpa_to_va(vgt,
> gpa);
> +				if (gva) {
> +					memcpy(gva, &tmp, req-
> >size);
> +				} else
> +					gvt_err("VM %d is trying to
> store mmio data block to invalid gpa: 0x%lx.\n", vgt->vm_id, gpa);
> +			}
> +		}
> +	}
> +	else { /* MMIO Write */
> +		if (!req->data_is_ptr) {
> +			if (req->count != 1)
> +				goto err_ioreq_count;
> +			//vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_write:
> target register (%lx).\n", (unsigned long)req->addr);
> +			if (!gvt_host.emulate_ops-
> >emulate_mmio_write(vgt, req->addr, &req->data, req->size))
> +				return -EINVAL;
> +		}
> +		else {
> +			if ((req->addr + sign * req->count * req-
> >size < base)
> +			    || (req->addr + sign * req->count * req-
> >size >=
> +				base + vgt->state.cfg.bar_size[0]))
> +				goto err_ioreq_range;
> +			//vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_write:
> rep %d target memory %lx, slow!\n",
> +			//	req->count, (unsigned long)req-
> >addr);
> +
> +			for (i = 0; i < req->count; i++) {
> +				gpa = req->data + sign * i * req-
> >size;
> +				if(!vgt->vm_id)
> +					gva = (char
> *)xen_mfn_to_virt(gpa >> PAGE_SHIFT) + offset_in_page(gpa);
> +				else
> +					gva = xen_gpa_to_va(vgt,
> gpa);
> +
> +				if (gva != NULL)
> +					memcpy(&tmp, gva, req-
> >size);
> +				else {
> +					tmp = 0;
> +					printk(KERN_ERR "GVT: can
> not read gpa = 0x%lx!!!\n", gpa);
> +				}
> +				if (!gvt_host.emulate_ops-
> >emulate_mmio_write(vgt, req->addr + sign * i * req->size, &tmp, req-
> >size))
> +					return -EINVAL;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +
> +err_ioreq_count:
> +	gvt_err("VM(%d): Unexpected %s request count(%d)\n",
> +		vgt->vm_id, req->dir == IOREQ_READ ? "read" :
> "write",
> +		req->count);
> +	return -EINVAL;
> +
> +err_ioreq_range:
> +	gvt_err("VM(%d): Invalid %s request addr end(%016llx)\n",
> +		vgt->vm_id, req->dir == IOREQ_READ ? "read" :
> "write",
> +		req->addr + sign * req->count * req->size);
> +	return -ERANGE;
> +}
> +
> +static bool vgt_hvm_write_cfg_space(struct vgt_device *vgt,
> +	uint64_t addr, unsigned int bytes, unsigned long val)
> +{
> +	/* Low 32 bit of addr is real address, high 32 bit is bdf */
> +	unsigned int port = addr & 0xffffffff;
> +
> +	ASSERT(((bytes == 4) && ((port & 3) == 0)) ||
> +		((bytes == 2) && ((port & 1) == 0)) || (bytes ==
> 1));
> +	gvt_host.emulate_ops->emulate_cfg_write(vgt, port, &val,
> bytes);
> +	return true;
> +}
> +
> +static bool vgt_hvm_read_cfg_space(struct vgt_device *vgt,
> +	uint64_t addr, unsigned int bytes, unsigned long *val)
> +{
> +	unsigned long data;
> +	/* Low 32 bit of addr is real address, high 32 bit is bdf */
> +	unsigned int port = addr & 0xffffffff;
> +
> +	ASSERT (((bytes == 4) && ((port & 3) == 0)) ||
> +		((bytes == 2) && ((port & 1) == 0)) || (bytes ==
> 1));
> +	gvt_host.emulate_ops->emulate_cfg_read(vgt, port, &data,
> bytes);
> +	memcpy(val, &data, bytes);
> +	return true;
> +}
> +
> +static int _hvm_pio_emulation(struct vgt_device *vgt, struct ioreq
> *ioreq)
> +{
> +	int sign;
> +
> +	sign = ioreq->df ? -1 : 1;
> +
> +	if (ioreq->dir == IOREQ_READ) {
> +		/* PIO READ */
> +		if (!ioreq->data_is_ptr) {
> +			if(!vgt_hvm_read_cfg_space(vgt,
> +				ioreq->addr,
> +				ioreq->size,
> +				(unsigned long*)&ioreq->data))
> +				return -EINVAL;
> +		} else {
> +			printk(KERN_ERR "GVT: _hvm_pio_emulation
> read data_ptr %lx\n",
> +			(long)ioreq->data);
> +			goto err_data_ptr;
> +		}
> +	} else {
> +		/* PIO WRITE */
> +		if (!ioreq->data_is_ptr) {
> +			if (!vgt_hvm_write_cfg_space(vgt,
> +				ioreq->addr,
> +				ioreq->size,
> +				(unsigned long)ioreq->data))
> +				return -EINVAL;
> +		} else {
> +			printk(KERN_ERR "GVT: _hvm_pio_emulation
> write data_ptr %lx\n",
> +			(long)ioreq->data);
> +			goto err_data_ptr;
> +		}
> +	}
> +	return 0;
> +err_data_ptr:
> +	/* The data pointer of emulation is guest physical address
> +	 * so far, which goes to Qemu emulation, but hard for
> +	 * GVT driver which doesn't know gpn_2_mfn translation.
> +	 * We may ask hypervisor to use mfn for GVT driver.
> +	 * We mark it as unsupported in case guest really it.
> +	 */
> +	gvt_err("VM(%d): Unsupported %s data_ptr(%lx)\n",
> +		vgt->vm_id, ioreq->dir == IOREQ_READ ? "read" :
> "write",
> +		(long)ioreq->data);
> +	return -EINVAL;
> +}
> +
> +#define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
> +
> +static int vgt_hvm_do_ioreq(struct vgt_device *vgt, struct ioreq
> *ioreq)
> +{
> +	struct pgt_device *pdev = vgt->pdev;
> +	struct pci_dev *pci_dev = pdev->dev_priv->dev->pdev;
> +	uint64_t bdf = PCI_BDF2(pci_dev->bus->number, pci_dev-
> >devfn);
> +
> +	/* When using ioreq-server, sometimes an event channal
> +	 * notification is received with invalid ioreq. Don't
> +	 * know the root cause. Put the workaround here.
> +	 */
> +	if (ioreq->state == STATE_IOREQ_NONE)
> +		return 0;
> +
> +	if (ioreq->type == IOREQ_TYPE_INVALIDATE)
> +		return 0;
> +
> +	switch (ioreq->type) {
> +		case IOREQ_TYPE_PCI_CONFIG:
> +		/* High 32 bit of ioreq->addr is bdf */
> +		if ((ioreq->addr >> 32) != bdf) {
> +			printk(KERN_ERR "GVT: Unexpected PCI Dev %lx
> emulation\n",
> +				(unsigned long) (ioreq->addr>>32));
> +				return -EINVAL;
> +			} else
> +				return _hvm_pio_emulation(vgt,
> ioreq);
> +			break;
> +		case IOREQ_TYPE_COPY:	/* MMIO */
> +			return _hvm_mmio_emulation(vgt, ioreq);
> +			break;
> +		default:
> +			printk(KERN_ERR "GVT: Unknown ioreq type %x
> addr %llx size %u state %u\n",
> +				ioreq->type, ioreq->addr, ioreq-
> >size, ioreq->state);
> +			return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct ioreq *vgt_get_hvm_ioreq(struct vgt_device *vgt, int
> vcpu)
> +{
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +	return &(info->iopage->vcpu_ioreq[vcpu]);
> +}
> +
> +static int vgt_emulation_thread(void *priv)
> +{
> +	struct vgt_device *vgt = (struct vgt_device *)priv;
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +
> +	int vcpu;
> +	int nr_vcpus = info->nr_vcpu;
> +
> +	struct ioreq *ioreq;
> +	int irq, ret;
> +
> +	gvt_info("start kthread for VM%d\n", vgt->vm_id);
> +
> +	ASSERT(info->nr_vcpu <= MAX_HVM_VCPUS_SUPPORTED);
> +
> +	set_freezable();
> +	while (1) {
> +		ret = wait_event_freezable(info->io_event_wq,
> +			kthread_should_stop() ||
> +			bitmap_weight(info->ioreq_pending,
> nr_vcpus));
> +
> +		if (kthread_should_stop())
> +			return 0;
> +
> +		if (ret)
> +			gvt_err("Emulation thread(%d) waken up"
> +				 "by unexpected signal!\n", vgt-
> >vm_id);
> +
> +		for (vcpu = 0; vcpu < nr_vcpus; vcpu++) {
> +			if (!test_and_clear_bit(vcpu, info-
> >ioreq_pending))
> +				continue;
> +
> +			ioreq = vgt_get_hvm_ioreq(vgt, vcpu);
> +
> +			if (vgt_hvm_do_ioreq(vgt, ioreq)) {
> +				xen_pause_domain(vgt->vm_id);
> +				xen_shutdown_domain(vgt->vm_id);
> +			}
> +
> +			ioreq->state = STATE_IORESP_READY;
> +
> +			irq = info->evtchn_irq[vcpu];
> +			notify_remote_via_irq(irq);
> +		}
> +	}
> +
> +	BUG(); /* It's actually impossible to reach here */
> +	return 0;
> +}
> +
> +static inline void vgt_raise_emulation_request(struct vgt_device
> *vgt,
> +	int vcpu)
> +{
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +	set_bit(vcpu, info->ioreq_pending);
> +	if (waitqueue_active(&info->io_event_wq))
> +		wake_up(&info->io_event_wq);
> +}
> +
> +static irqreturn_t vgt_hvm_io_req_handler(int irq, void* dev)
> +{
> +	struct vgt_device *vgt;
> +	struct gvt_hvm_info *info;
> +	int vcpu;
> +
> +	vgt = (struct vgt_device *)dev;
> +	info = vgt->hypervisor_data;
> +
> +	for(vcpu=0; vcpu < info->nr_vcpu; vcpu++){
> +		if(info->evtchn_irq[vcpu] == irq)
> +			break;
> +	}
> +	if (vcpu == info->nr_vcpu){
> +		/*opps, irq is not the registered one*/
> +		gvt_info("Received a IOREQ w/o vcpu target\n");
> +		gvt_info("Possible a false request from event
> binding\n");
> +		return IRQ_NONE;
> +	}
> +
> +	vgt_raise_emulation_request(vgt, vcpu);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static void xen_hvm_exit(struct vgt_device *vgt)
> +{
> +	struct gvt_hvm_info *info;
> +	int vcpu;
> +
> +	info = vgt->hypervisor_data;
> +
> +	if (info == NULL)
> +		return;
> +
> +	if (info->emulation_thread != NULL)
> +		kthread_stop(info->emulation_thread);
> +
> +	if (!info->nr_vcpu || info->evtchn_irq == NULL)
> +		goto out1;
> +
> +	if (info->iosrv_id != 0)
> +		hvm_destroy_iorequest_server(vgt);
> +
> +	for (vcpu = 0; vcpu < info->nr_vcpu; vcpu++){
> +		if(info->evtchn_irq[vcpu] >= 0)
> +			unbind_from_irqhandler(info-
> >evtchn_irq[vcpu], vgt);
> +	}
> +
> +	if (info->iopage_vma != NULL)
> +		xen_unmap_domain_mfn_range_in_kernel(info-
> >iopage_vma, 1, vgt->vm_id);
> +
> +	kfree(info->evtchn_irq);
> +
> +out1:
> +	vgt_vmem_destroy(vgt);
> +	kfree(info);
> +}
> +
> +static int xen_hvm_init(struct vgt_device *vgt)
> +{
> +	struct gvt_hvm_info *info;
> +	int vcpu, irq, rc = 0;
> +	struct task_struct *thread;
> +	struct pgt_device *pdev = vgt->pdev;
> +	struct pci_dev *pci_dev = pdev->dev_priv->dev->pdev;
> +
> +	info = kzalloc(sizeof(struct gvt_hvm_info), GFP_KERNEL);
> +	if (info == NULL)
> +		return -ENOMEM;
> +
> +	vgt->hypervisor_data = info;
> +
> +	info->iopage_vma = xen_map_iopage(vgt);
> +	if (info->iopage_vma == NULL) {
> +		printk(KERN_ERR "Failed to map HVM I/O page for
> VM%d\n", vgt->vm_id);
> +		rc = -EFAULT;
> +		goto err;
> +	}
> +	info->iopage = info->iopage_vma->addr;
> +
> +	init_waitqueue_head(&info->io_event_wq);
> +
> +	info->nr_vcpu = xen_get_nr_vcpu(vgt->vm_id);
> +	ASSERT(info->nr_vcpu > 0);
> +	ASSERT(info->nr_vcpu <= MAX_HVM_VCPUS_SUPPORTED);
> +
> +	info->evtchn_irq = kmalloc(info->nr_vcpu * sizeof(int),
> GFP_KERNEL);
> +	if (info->evtchn_irq == NULL){
> +		rc = -ENOMEM;
> +		goto err;
> +	}
> +	for( vcpu = 0; vcpu < info->nr_vcpu; vcpu++ )
> +		info->evtchn_irq[vcpu] = -1;
> +
> +	rc = hvm_map_pcidev_to_ioreq_server(vgt, PCI_BDF2(pci_dev-
> >bus->number, pci_dev->devfn));
> +	if (rc < 0)
> +		goto err;
> +	rc = hvm_toggle_iorequest_server(vgt, 1);
> +	if (rc < 0)
> +		goto err;
> +
> +	for (vcpu = 0; vcpu < info->nr_vcpu; vcpu++){
> +		irq = bind_interdomain_evtchn_to_irqhandler( vgt-
> >vm_id,
> +				info->iopage-
> >vcpu_ioreq[vcpu].vp_eport,
> +				vgt_hvm_io_req_handler, 0,
> +				"vgt", vgt );
> +		if ( irq < 0 ){
> +			rc = irq;
> +			printk(KERN_ERR "Failed to bind event
> channle for vgt HVM IO handler, rc=%d\n", rc);
> +			goto err;
> +		}
> +		info->evtchn_irq[vcpu] = irq;
> +	}
> +
> +	thread = kthread_run(vgt_emulation_thread, vgt,
> +			"vgt_emulation:%d", vgt->vm_id);
> +	if(IS_ERR(thread))
> +		goto err;
> +	info->emulation_thread = thread;
> +
> +	return 0;
> +
> +err:
> +	xen_hvm_exit(vgt);
> +	return rc;
> +}
> +
> +static void *xen_gpa_to_va(struct vgt_device *vgt, unsigned long
> gpa)
> +{
> +	unsigned long buck_index, buck_4k_index;
> +	struct gvt_hvm_info *info = vgt->hypervisor_data;
> +
> +	if (!vgt->vm_id)
> +		return (char*)xen_mfn_to_virt(gpa>>PAGE_SHIFT) +
> (gpa & (PAGE_SIZE-1));
> +	/*
> +	 * At the beginning of _hvm_mmio_emulation(), we already
> initialize
> +	 * info->vmem_vma and info->vmem_vma_low_1mb.
> +	 */
> +	ASSERT(info->vmem_vma != NULL && info->vmem_vma_low_1mb !=
> NULL);
> +
> +	/* handle the low 1MB memory */
> +	if (gpa < VMEM_1MB) {
> +		buck_index = gpa >> PAGE_SHIFT;
> +		if (!info->vmem_vma_low_1mb[buck_index])
> +			return NULL;
> +
> +		return (char*)(info->vmem_vma_low_1mb[buck_index]-
> >addr) +
> +			(gpa & ~PAGE_MASK);
> +
> +	}
> +
> +	/* handle the >1MB memory */
> +	buck_index = gpa >> VMEM_BUCK_SHIFT;
> +
> +	if (!info->vmem_vma[buck_index]) {
> +		buck_4k_index = gpa >> PAGE_SHIFT;
> +		if (!info->vmem_vma_4k[buck_4k_index]) {
> +			if (buck_4k_index > vgt->low_mem_max_gpfn)
> +				gvt_err("GVT failed to map
> gpa=0x%lx?\n", gpa);
> +			return NULL;
> +		}
> +
> +		return (char*)(info->vmem_vma_4k[buck_4k_index]-
> >addr) +
> +			(gpa & ~PAGE_MASK);
> +	}
> +
> +	return (char*)(info->vmem_vma[buck_index]->addr) +
> +		(gpa & (VMEM_BUCK_SIZE -1));
> +}
> +
> +static bool xen_read_va(struct vgt_device *vgt, void *va, void *val,
> +		int len, int atomic)
> +{
> +	memcpy(val, va, len);
> +
> +	return true;
> +}
> +
> +static bool xen_write_va(struct vgt_device *vgt, void *va, void
> *val,
> +		int len, int atomic)
> +{
> +	memcpy(va, val, len);
> +	return true;
> +}
> +
> +static struct gvt_kernel_dm xengt_kdm = {
> +	.name = "xengt_kdm",
> +	.g2m_pfn = xen_g2m_pfn,
> +	.pause_domain = xen_pause_domain,
> +	.shutdown_domain = xen_shutdown_domain,
> +	.map_mfn_to_gpfn = xen_map_mfn_to_gpfn,
> +	.set_trap_area = xen_set_trap_area,
> +	.set_wp_pages = xen_set_guest_page_writeprotection,
> +	.unset_wp_pages = xen_clear_guest_page_writeprotection,
> +	.detect_host = xen_detect_host,
> +	.from_virt_to_mfn = xen_virt_to_mfn,
> +	.from_mfn_to_virt = xen_mfn_to_virt,
> +	.inject_msi = xen_inject_msi,
> +	.hvm_init = xen_hvm_init,
> +	.hvm_exit = xen_hvm_exit,
> +	.gpa_to_va = xen_gpa_to_va,
> +	.read_va = xen_read_va,
> +	.write_va = xen_write_va,
> +};
> +EXPORT_SYMBOL(xengt_kdm);
> +
> +static int __init xengt_init(void)
> +{
> +       if (!xen_initial_domain())
> +               return -EINVAL;
> +       printk(KERN_INFO "xengt: loaded\n");
> +       return 0;
> +}
> +
> +static void __exit xengt_exit(void)
> +{
> +	printk(KERN_INFO "xengt: unloaded\n");
> +}
> +
> +module_init(xengt_init);
> +module_exit(xengt_exit);
> diff --git a/include/xen/interface/hvm/hvm_op.h
> b/include/xen/interface/hvm/hvm_op.h
> index 956a046..20577cc 100644
> --- a/include/xen/interface/hvm/hvm_op.h
> +++ b/include/xen/interface/hvm/hvm_op.h
> @@ -21,6 +21,8 @@
>  #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
>  #define __XEN_PUBLIC_HVM_HVM_OP_H__
>  
> +#include <xen/interface/event_channel.h>
> +
>  /* Get/set subcommands: the second argument of the hypercall is a
>   * pointer to a xen_hvm_param struct. */
>  #define HVMOP_set_param           0
> @@ -42,12 +44,41 @@ struct xen_hvm_pagetable_dying {
>  };
>  typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
>  DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
> - 
> +
> +/* MSI injection for emulated devices */
> +#define HVMOP_inject_msi         16
> +struct xen_hvm_inject_msi {
> +    /* Domain to be injected */
> +    domid_t   domid;
> +    /* Data -- lower 32 bits */
> +    uint32_t  data;
> +    /* Address (0xfeexxxxx) */
> +    uint64_t  addr;
> +};
> +typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_inject_msi_t);
> +
>  enum hvmmem_type_t {
>      HVMMEM_ram_rw,             /* Normal read/write guest RAM */
>      HVMMEM_ram_ro,             /* Read-only; writes are discarded */
>      HVMMEM_mmio_dm,            /* Reads and write go to the device
> model */
> +    HVMMEM_mmio_write_dm       /* Read-only; writes go to the device
> model */
> +};
> +
> +#define HVMOP_set_mem_type    8
> +/* Notify that a region of memory is to be treated in a specific
> way. */
> +struct xen_hvm_set_mem_type {
> +        /* Domain to be updated. */
> +        domid_t domid;
> +        /* Memory type */
> +        uint16_t hvmmem_type;
> +        /* Number of pages. */
> +        uint32_t nr;
> +        /* First pfn. */
> +        uint64_t first_pfn;
>  };
> +typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_mem_type_t);
>  
>  #define HVMOP_get_mem_type    15
>  /* Return hvmmem_type_t for the specified pfn. */
> @@ -62,4 +93,148 @@ struct xen_hvm_get_mem_type {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type);
>  
> +#define HVMOP_vgt_wp_pages         27  /* writeprotection to guest
> pages */
> +#define MAX_WP_BATCH_PAGES         128
> +struct xen_hvm_vgt_wp_pages {
> +	uint16_t domid;
> +	uint16_t set;            /* 1: set WP, 0: remove WP */
> +	uint16_t nr_pages;
> +	unsigned long  wp_pages[MAX_WP_BATCH_PAGES];
> +};
> +typedef struct xen_hvm_vgt_wp_pages xen_hvm_vgt_wp_pages_t;
> +
> +/*
> + * IOREQ Servers
> + *
> + * The interface between an I/O emulator an Xen is called an IOREQ
> Server.
> + * A domain supports a single 'legacy' IOREQ Server which is
> instantiated if
> + * parameter...
> + *
> + * HVM_PARAM_IOREQ_PFN is read (to get the gmfn containing the
> synchronous
> + * ioreq structures), or...
> + * HVM_PARAM_BUFIOREQ_PFN is read (to get the gmfn containing the
> buffered
> + * ioreq ring), or...
> + * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that
> Xen uses
> + * to request buffered I/O emulation).
> + *
> + * The following hypercalls facilitate the creation of IOREQ Servers
> for
> + * 'secondary' emulators which are invoked to implement port I/O,
> memory, or
> + * PCI config space ranges which they explicitly register.
> + */
> +typedef uint16_t ioservid_t;
> +
> +/*
> + * HVMOP_create_ioreq_server: Instantiate a new IOREQ Server for a
> secondary
> + *                            emulator servicing domain <domid>.
> + *
> + * The <id> handed back is unique for <domid>. If <handle_bufioreq>
> is zero
> + * the buffered ioreq ring will not be allocated and hence all
> emulation
> + * requestes to this server will be synchronous.
> + */
> +#define HVMOP_create_ioreq_server 17
> +struct xen_hvm_create_ioreq_server {
> +    domid_t domid;           /* IN - domain to be serviced */
> +    uint8_t handle_bufioreq; /* IN - should server handle buffered
> ioreqs */
> +    ioservid_t id;           /* OUT - server id */
> +};
> +typedef struct xen_hvm_create_ioreq_server
> xen_hvm_create_ioreq_server_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_create_ioreq_server_t);
> +
> +/*
> + * HVMOP_get_ioreq_server_info: Get all the information necessary to
> access
> + *                              IOREQ Server <id>.
> + *
> + * The emulator needs to map the synchronous ioreq structures and
> buffered
> + * ioreq ring (if it exists) that Xen uses to request emulation.
> These are
> + * hosted in domain <domid>'s gmfns <ioreq_pfn> and <bufioreq_pfn>
> + * respectively. In addition, if the IOREQ Server is handling
> buffered
> + * emulation requests, the emulator needs to bind to event channel
> + * <bufioreq_port> to listen for them. (The event channels used for
> + * synchronous emulation requests are specified in the per-CPU ioreq
> + * structures in <ioreq_pfn>).
> + * If the IOREQ Server is not handling buffered emulation requests
> then the
> + * values handed back in <bufioreq_pfn> and <bufioreq_port> will
> both be 0.
> + */
> +#define HVMOP_get_ioreq_server_info 18
> +struct xen_hvm_get_ioreq_server_info {
> +    domid_t domid;                 /* IN - domain to be serviced */
> +    ioservid_t id;                 /* IN - server id */
> +    evtchn_port_t bufioreq_port;   /* OUT - buffered ioreq port */
> +    uint64_t ioreq_pfn;    /* OUT - sync ioreq pfn */
> +    uint64_t bufioreq_pfn; /* OUT - buffered ioreq pfn */
> +};
> +typedef struct xen_hvm_get_ioreq_server_info
> xen_hvm_get_ioreq_server_info_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_ioreq_server_info_t);
> +
> +/*
> + * HVM_map_io_range_to_ioreq_server: Register an I/O range of domain
> <domid>
> + *                                   for emulation by the client of
> IOREQ
> + *                                   Server <id>
> + * HVM_unmap_io_range_from_ioreq_server: Deregister an I/O range of
> <domid>
> + *                                       for emulation by the client
> of IOREQ
> + *                                       Server <id>
> + *
> + * There are three types of I/O that can be emulated: port I/O,
> memory accesses
> + * and PCI config space accesses. The <type> field denotes which
> type of range
> + * the <start> and <end> (inclusive) fields are specifying.
> + * PCI config space ranges are specified by
> segment/bus/device/function values
> + * which should be encoded using the HVMOP_PCI_SBDF helper macro
> below.
> + *
> + * NOTE: unless an emulation request falls entirely within a range
> mapped
> + * by a secondary emulator, it will not be passed to that emulator.
> + */
> +#define HVMOP_map_io_range_to_ioreq_server 19
> +#define HVMOP_unmap_io_range_from_ioreq_server 20
> +struct xen_hvm_io_range {
> +    domid_t domid;               /* IN - domain to be serviced */
> +    ioservid_t id;               /* IN - server id */
> +    uint32_t type;               /* IN - type of range */
> +# define HVMOP_IO_RANGE_PORT   0 /* I/O port range */
> +# define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */
> +# define HVMOP_IO_RANGE_PCI    2 /* PCI segment/bus/dev/func range
> */
> +    uint64_t start, end; /* IN - inclusive start and end of range */
> +};
> +typedef struct xen_hvm_io_range xen_hvm_io_range_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_io_range_t);
> +
> +#define HVMOP_PCI_SBDF(s,b,d,f)                 \
> +       ((((s) & 0xffff) << 16) |                   \
> +        (((b) & 0xff) << 8) |                      \
> +        (((d) & 0x1f) << 3) |                      \
> +        ((f) & 0x07))
> +
> +/*
> + * HVMOP_destroy_ioreq_server: Destroy the IOREQ Server <id>
> servicing domain
> + *                             <domid>.
> + *
> + * Any registered I/O ranges will be automatically deregistered.
> + */
> +#define HVMOP_destroy_ioreq_server 21
> +struct xen_hvm_destroy_ioreq_server {
> +    domid_t domid; /* IN - domain to be serviced */
> +    ioservid_t id; /* IN - server id */
> +};
> +typedef struct xen_hvm_destroy_ioreq_server
> xen_hvm_destroy_ioreq_server_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_destroy_ioreq_server_t);
> +
> +
> +/*
> + * HVMOP_set_ioreq_server_state: Enable or disable the IOREQ Server
> <id> servicing
> + *                               domain <domid>.
> + *
> + * The IOREQ Server will not be passed any emulation requests until
> it is in the
> + * enabled state.
> + * Note that the contents of the ioreq_pfn and bufioreq_fn (see
> + * HVMOP_get_ioreq_server_info) are not meaningful until the IOREQ
> Server is in
> + * the enabled state.
> + */
> +#define HVMOP_set_ioreq_server_state 22
> +struct xen_hvm_set_ioreq_server_state {
> +    domid_t domid;   /* IN - domain to be serviced */
> +    ioservid_t id;   /* IN - server id */
> +    uint8_t enabled; /* IN - enabled? */
> +};
> +typedef struct xen_hvm_set_ioreq_server_state
> xen_hvm_set_ioreq_server_state_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_ioreq_server_state_t);
> +
>  #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
> diff --git a/include/xen/interface/hvm/ioreq.h
> b/include/xen/interface/hvm/ioreq.h
> new file mode 100644
> index 0000000..6bbf4e4
> --- /dev/null
> +++ b/include/xen/interface/hvm/ioreq.h
> @@ -0,0 +1,132 @@
> +/*
> + * This program is free software; you can redistribute it and/or
> modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT
> + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with
> + * this program; if not, write to the Free Software Foundation,
> Inc.,
> + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +#ifndef _IOREQ_H_
> +#define _IOREQ_H_
> +
> +#define IOREQ_READ      1
> +#define IOREQ_WRITE     0
> +
> +#define STATE_IOREQ_NONE        0
> +#define STATE_IOREQ_READY       1
> +#define STATE_IOREQ_INPROCESS   2
> +#define STATE_IORESP_READY      3
> +
> +#define IOREQ_TYPE_PIO          0 /* pio */
> +#define IOREQ_TYPE_COPY         1 /* mmio ops */
> +#define IOREQ_TYPE_PCI_CONFIG   2
> +#define IOREQ_TYPE_TIMEOFFSET   7
> +#define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
> +
> +/*
> + * VMExit dispatcher should cooperate with instruction decoder to
> + * prepare this structure and notify service OS and DM by sending
> + * virq
> + */
> +struct ioreq {
> +    uint64_t addr;          /* physical address */
> +    uint64_t data;          /* data (or paddr of data) */
> +    uint32_t count;         /* for rep prefixes */
> +    uint32_t size;          /* size in bytes */
> +    uint32_t vp_eport;      /* evtchn for notifications to/from
> device model */
> +    uint16_t _pad0;
> +    uint8_t state:4;
> +    uint8_t data_is_ptr:1;  /* if 1, data above is the guest paddr
> +                             * of the real data to use. */
> +    uint8_t dir:1;          /* 1=read, 0=write */
> +    uint8_t df:1;
> +    uint8_t _pad1:1;
> +    uint8_t type;           /* I/O type */
> +};
> +typedef struct ioreq ioreq_t;
> +
> +struct shared_iopage {
> +    struct ioreq vcpu_ioreq[1];
> +};
> +typedef struct shared_iopage shared_iopage_t;
> +
> +struct buf_ioreq {
> +    uint8_t  type;   /* I/O type                    */
> +    uint8_t  pad:1;
> +    uint8_t  dir:1;  /* 1=read, 0=write             */
> +    uint8_t  size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two
> buf_ioreqs */
> +    uint32_t addr:20;/* physical address            */
> +    uint32_t data;   /* data                        */
> +};
> +typedef struct buf_ioreq buf_ioreq_t;
> +
> +#define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte 
> indexes */
> +struct buffered_iopage {
> +    unsigned int read_pointer;
> +    unsigned int write_pointer;
> +    buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
> +}; /* NB. Size of this structure must be no greater than one page.
> */
> +typedef struct buffered_iopage buffered_iopage_t;
> +
> +#if defined(__ia64__)
> +struct pio_buffer {
> +    uint32_t page_offset;
> +    uint32_t pointer;
> +    uint32_t data_end;
> +    uint32_t buf_size;
> +    void *opaque;
> +};
> +
> +#define PIO_BUFFER_IDE_PRIMARY   0 /* I/O port = 0x1F0 */
> +#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */
> +#define PIO_BUFFER_ENTRY_NUM     2
> +struct buffered_piopage {
> +    struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM];
> +    uint8_t buffer[1];
> +};
> +#endif /* defined(__ia64__) */
> +
> +/*
> + * ACPI Control/Event register locations. Location is controlled by
> a
> + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION.
> + */
> +
> +/* Version 0 (default): Traditional Xen locations. */
> +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40
> +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 +
> 0x04)
> +#define ACPI_PM_TMR_BLK_ADDRESS_V0   (ACPI_PM1A_EVT_BLK_ADDRESS_V0 +
> 0x08)
> +#define ACPI_GPE0_BLK_ADDRESS_V0     (ACPI_PM_TMR_BLK_ADDRESS_V0 +
> 0x20)
> +#define ACPI_GPE0_BLK_LEN_V0         0x08
> +
> +/* Version 1: Locations preferred by modern Qemu. */
> +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000
> +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 +
> 0x04)
> +#define ACPI_PM_TMR_BLK_ADDRESS_V1   (ACPI_PM1A_EVT_BLK_ADDRESS_V1 +
> 0x08)
> +#define ACPI_GPE0_BLK_ADDRESS_V1     0xafe0
> +#define ACPI_GPE0_BLK_LEN_V1         0x04
> +
> +/* Compatibility definitions for the default location (version 0).
> */
> +#define ACPI_PM1A_EVT_BLK_ADDRESS    ACPI_PM1A_EVT_BLK_ADDRESS_V0
> +#define ACPI_PM1A_CNT_BLK_ADDRESS    ACPI_PM1A_CNT_BLK_ADDRESS_V0
> +#define ACPI_PM_TMR_BLK_ADDRESS      ACPI_PM_TMR_BLK_ADDRESS_V0
> +#define ACPI_GPE0_BLK_ADDRESS        ACPI_GPE0_BLK_ADDRESS_V0
> +#define ACPI_GPE0_BLK_LEN            ACPI_GPE0_BLK_LEN_V0
> +
> +
> +#endif /* _IOREQ_H_ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-set-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/interface/memory.h
> b/include/xen/interface/memory.h
> index 2ecfe4f..92f18c5 100644
> --- a/include/xen/interface/memory.h
> +++ b/include/xen/interface/memory.h
> @@ -9,6 +9,7 @@
>  #ifndef __XEN_PUBLIC_MEMORY_H__
>  #define __XEN_PUBLIC_MEMORY_H__
>  
> +#include <xen/interface/event_channel.h>
>  #include <linux/spinlock.h>
>  
>  /*
> @@ -141,6 +142,11 @@ struct xen_machphys_mfn_list {
>  DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
>  
>  /*
> + * Returns the maximum GPFN in use by the guest, or -ve errcode on
> failure.
> + */
> +#define XENMEM_maximum_gpfn         14
> +
> +/*
>   * Returns the location in virtual address space of the
> machine_to_phys
>   * mapping table. Architectures which do not have a m2p table, or
> which do not
>   * map it by default into guest address space, do not implement this
> command.
> @@ -263,4 +269,26 @@ struct xen_remove_from_physmap {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
>  
> +/*
> + * Translate the given guest PFNs to MFNs
> + */
> +#define XENMEM_get_mfn_from_pfn    25
> +struct xen_get_mfn_from_pfn {
> +    /*
> +     * Pointer to buffer to fill with list of pfn.
> +     * for IN, it contains the guest PFN that need to translated
> +     * for OUT, it contains the translated MFN. or INVALID_MFN if no
> valid translation
> +     */
> +    GUEST_HANDLE(ulong) pfn_list;
> +
> +    /*
> +     * IN: Size of the pfn_array.
> +     */
> +    unsigned int nr_pfns;
> +
> +    /* IN: which domain */
> +    domid_t domid;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xen_get_mfn_from_pfn);
> +
>  #endif /* __XEN_PUBLIC_MEMORY_H__ */
> diff --git a/include/xen/interface/xen.h
> b/include/xen/interface/xen.h
> index 78a38f1..c7e0f32 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -756,6 +756,112 @@ struct tmem_op {
>  
>  DEFINE_GUEST_HANDLE(u64);
>  
> +/* XEN_DOMCTL_getdomaininfo */
> +struct xen_domctl_getdomaininfo {
> +        /* OUT variables. */
> +        domid_t  domain;              /* Also echoed in
> domctl.domain */
> +        /* Domain is scheduled to die. */
> +#define _XEN_DOMINF_dying     0
> +#define XEN_DOMINF_dying      (1U<<_XEN_DOMINF_dying)
> +        /* Domain is an HVM guest (as opposed to a PV guest). */
> +#define _XEN_DOMINF_hvm_guest 1
> +#define XEN_DOMINF_hvm_guest  (1U<<_XEN_DOMINF_hvm_guest)
> +        /* The guest OS has shut down. */
> +#define _XEN_DOMINF_shutdown  2
> +#define XEN_DOMINF_shutdown   (1U<<_XEN_DOMINF_shutdown)
> +        /* Currently paused by control software. */
> +#define _XEN_DOMINF_paused    3
> +#define XEN_DOMINF_paused     (1U<<_XEN_DOMINF_paused)
> +        /* Currently blocked pending an event.     */
> +#define _XEN_DOMINF_blocked   4
> +#define XEN_DOMINF_blocked    (1U<<_XEN_DOMINF_blocked)
> +        /* Domain is currently running.            */
> +#define _XEN_DOMINF_running   5
> +#define XEN_DOMINF_running    (1U<<_XEN_DOMINF_running)
> +        /* Being debugged.  */
> +#define _XEN_DOMINF_debugged  6
> +#define XEN_DOMINF_debugged   (1U<<_XEN_DOMINF_debugged)
> +        /* XEN_DOMINF_shutdown guest-supplied code.  */
> +#define XEN_DOMINF_shutdownmask 255
> +#define XEN_DOMINF_shutdownshift 16
> +        uint32_t flags;              /* XEN_DOMINF_* */
> +        aligned_u64 tot_pages;
> +        aligned_u64 max_pages;
> +        aligned_u64 outstanding_pages;
> +        aligned_u64 shr_pages;
> +        aligned_u64 paged_pages;
> +        aligned_u64 shared_info_frame; /* GMFN of shared_info struct
> */
> +        aligned_u64 cpu_time;
> +        uint32_t nr_online_vcpus;    /* Number of VCPUs currently
> online. */
> +        uint32_t max_vcpu_id;        /* Maximum VCPUID in use by
> this domain. */
> +        uint32_t ssidref;
> +        xen_domain_handle_t handle;
> +        uint32_t cpupool;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_getdomaininfo);
> +
> +#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
> +#define XEN_DOMCTL_pausedomain                    3
> +#define XEN_DOMCTL_getdomaininfo                  5
> +#define XEN_DOMCTL_memory_mapping                 39
> +#define XEN_DOMCTL_iomem_permission               20
> +
> +
> +#define XEN_DOMCTL_vgt_io_trap                    700
> +
> +#define MAX_VGT_IO_TRAP_INFO 4
> +
> +struct vgt_io_trap_info {
> +        uint64_t s;
> +        uint64_t e;
> +};
> +
> +struct xen_domctl_vgt_io_trap {
> +        uint32_t n_pio;
> +        struct vgt_io_trap_info pio[MAX_VGT_IO_TRAP_INFO];
> +
> +        uint32_t n_mmio;
> +        struct vgt_io_trap_info mmio[MAX_VGT_IO_TRAP_INFO];
> +};
> +
> +/* Bind machine I/O address range -> HVM address range. */
> +/* XEN_DOMCTL_memory_mapping */
> +#define DPCI_ADD_MAPPING        1
> +#define DPCI_REMOVE_MAPPING     0
> +struct xen_domctl_memory_mapping {
> +        aligned_u64 first_gfn; /* first page (hvm guest phys page)
> in range */
> +        aligned_u64 first_mfn; /* first page (machine page) in
> range. */
> +        aligned_u64 nr_mfns;   /* number of pages in range (>0) */
> +        uint32_t add_mapping;  /* Add or remove mapping */
> +        uint32_t padding;      /* padding for 64-bit aligned struct
> */
> +};
> +typedef struct xen_domctl_memory_mapping
> xen_domctl_memory_mapping_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_memory_mapping_t);
> +
> +/* XEN_DOMCTL_iomem_permission */
> +struct xen_domctl_iomem_permission {
> +    aligned_u64 first_mfn;/* first page (physical page number) in
> range */
> +    aligned_u64 nr_mfns;  /* number of pages in range (>0) */
> +    uint8_t  allow_access;     /* allow (!0) or deny (0) access to
> range? */
> +};
> +typedef struct xen_domctl_iomem_permission
> xen_domctl_iomem_permission_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_iomem_permission_t);
> +
> +struct xen_domctl {
> +        uint32_t cmd;
> +        uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION
> */
> +        domid_t  domain;
> +        union {
> +                struct xen_domctl_getdomaininfo     getdomaininfo;
> +                struct xen_domctl_vgt_io_trap       vgt_io_trap;
> +                struct xen_domctl_memory_mapping    memory_mapping;
> +                struct xen_domctl_iomem_permission      iomem_perm;
> +                uint8_t                             pad[256];
> +        }u;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl);
> +
> +
>  #else /* __ASSEMBLY__ */
>  
>  /* In assembly code we cannot use C numeric constant suffixes. */
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 86abe07..dde9eb0 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -123,4 +123,9 @@ static inline void
> xen_preemptible_hcall_end(void)
>  
>  #endif /* CONFIG_PREEMPT */
>  
> +struct vm_struct * xen_remap_domain_mfn_range_in_kernel(unsigned
> long mfn,
> +        int nr, unsigned domid);
> +void xen_unmap_domain_mfn_range_in_kernel(struct vm_struct *area,
> int nr,
> +                unsigned domid);
> +
>  #endif /* INCLUDE_XEN_OPS_H */
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-01-28 11:32 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-28 10:21 [RFC 00/29] iGVT-g implementation in i915 Zhi Wang
2016-01-28 10:21 ` [RFC 01/29] drm/i915/gvt: Introduce the basic architecture of GVT-g Zhi Wang
2016-01-29 13:57   ` Joonas Lahtinen
2016-01-29 16:48     ` Chris Wilson
2016-02-03  6:28       ` Zhi Wang
2016-02-05  7:02       ` Zhiyuan Lv
2016-02-03  6:01     ` Zhi Wang
2016-02-03  7:01       ` Zhiyuan Lv
2016-02-04 11:25       ` Joonas Lahtinen
2016-02-16  9:54       ` Zhi Wang
2016-02-16 12:44         ` Jani Nikula
2016-02-16 14:08         ` Joonas Lahtinen
2016-01-28 10:21 ` [RFC 02/29] drm/i915: Introduce host graphics memory balloon for gvt Zhi Wang
2016-02-04 11:27   ` Joonas Lahtinen
2016-02-05 10:03     ` Zhiyuan Lv
2016-02-05 13:40       ` Joonas Lahtinen
2016-02-05 14:16         ` Zhiyuan Lv
2016-02-08 11:52           ` Joonas Lahtinen
2016-02-10  8:08   ` Daniel Vetter
2016-01-28 10:21 ` [RFC 03/29] drm/i915: Introduce GVT context creation API Zhi Wang
2016-01-28 10:21 ` [RFC 04/29] drm/i915: Ondemand populate context addressing mode bit Zhi Wang
2016-01-28 10:21 ` [RFC 05/29] drm/i915: Do not populate PPGTT root pointers for GVT context Zhi Wang
2016-01-28 10:21 ` [RFC 06/29] drm/i915: Do not initialize the engine state of " Zhi Wang
2016-01-28 10:21 ` [RFC 07/29] drm/i915: GVT context scheduling Zhi Wang
2016-01-28 10:21 ` [RFC 08/29] drm/i915: Support vGPU guest framebuffer GEM object Zhi Wang
2016-01-28 10:21 ` [RFC 09/29] drm/i915: gvt: Resource allocator Zhi Wang
2016-01-28 10:21 ` [RFC 10/29] drm/i915: gvt: Basic mmio emulation state Zhi Wang
2016-01-28 10:21 ` [RFC 11/29] drm/i915: gvt: update PVINFO page definition in i915_vgpu.h Zhi Wang
2016-01-28 10:21 ` [RFC 12/29] drm/i915: gvt: vGPU life cycle management Zhi Wang
2016-01-28 10:21 ` [RFC 13/29] drm/i915: gvt: trace stub Zhi Wang
2016-01-28 10:21 ` [RFC 14/29] drm/i915: gvt: vGPU interrupt emulation framework Zhi Wang
2016-01-28 10:21 ` [RFC 15/29] drm/i915: gvt: vGPU graphics memory " Zhi Wang
2016-01-28 10:21 ` [RFC 16/29] drm/i915: gvt: Generic MPT framework Zhi Wang
2016-01-28 10:21 ` [RFC 17/29] gvt: Xen hypervisor GVT-g MPT module Zhi Wang
2016-01-28 11:33   ` Joonas Lahtinen [this message]
2016-01-28 12:50     ` Zhiyuan Lv
2016-01-28 10:21 ` [RFC 18/29] drm/i915: gvt: vGPU configuration emulation Zhi Wang
2016-01-28 10:21 ` [RFC 19/29] drm/i915: gvt: vGPU OpRegion emulation Zhi Wang
2016-01-28 10:21 ` [RFC 20/29] drm/i915: gvt: vGPU framebuffer format decoder Zhi Wang
2016-01-28 10:21 ` [RFC 21/29] drm/i915: gvt: vGPU MMIO register emulation Zhi Wang
2016-01-28 10:21 ` [RFC 22/29] drm/i915: gvt: Full display virtualization Zhi Wang
2016-01-28 10:21 ` [RFC 23/29] drm/i915: gvt: Introduce GVT control interface Zhi Wang
2016-01-28 10:21 ` [RFC 24/29] drm/i915: gvt: Full execlist status emulation Zhi Wang
2016-01-28 10:21 ` [RFC 25/29] drm/i915: gvt: vGPU execlist workload submission Zhi Wang
2016-01-28 10:21 ` [RFC 26/29] drm/i915: gvt: workload scheduler Zhi Wang
2016-01-28 10:21 ` [RFC 27/29] drm/i915: gvt: vGPU schedule policy framework Zhi Wang
2016-01-28 10:21 ` [RFC 28/29] drm/i915: gvt: vGPU context switch Zhi Wang
2016-01-28 10:21 ` [RFC 29/29] drm/i915: gvt: vGPU command scanner Zhi Wang
2016-01-28 17:15 ` ✗ Fi.CI.BAT: failure for iGVT-g implementation in i915 Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1453980813.5004.18.camel@linux.intel.com \
    --to=joonas.lahtinen@linux.intel.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=david.j.cowperthwaite@intel.com \
    --cc=igvt-g@lists.01.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=zhi.a.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.