Re: [PATCH v1 2/8]: PVH mmu changes

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Mukesh Rathor <mukesh.rathor@oracle.com>
To: Ian Campbell <Ian.Campbell@citrix.com>
Cc: "Xen-devel@lists.xensource.com" <Xen-devel@lists.xensource.com>,
	Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Subject: Re: [PATCH v1 2/8]: PVH mmu changes
Date: Fri, 5 Oct 2012 19:00:11 -0700	[thread overview]
Message-ID: <20121005190011.2653bcfa@mantra.us.oracle.com> (raw)
In-Reply-To: <1349339279.650.214.camel@zakaz.uk.xensource.com>

[-- Attachment #1: Type: text/plain, Size: 13541 bytes --]

On Thu, 4 Oct 2012 09:27:59 +0100
Ian Campbell <Ian.Campbell@citrix.com> wrote:

> On Wed, 2012-10-03 at 23:29 +0100, Mukesh Rathor wrote:
> > On Wed, 3 Oct 2012 16:42:43 +0100
> > Ian Campbell <Ian.Campbell@citrix.com> wrote:
> > 
> While implementing the ARM version of this interface it occurred to me
> that the num_pgs and next_todo fields here are not really needed
> across the arch interface e.g. you already get pi_num_pgs from the nr
> argument to xen_remap_domain_mfn_range and pi_next_todo is really
> state which fits in struct pvh_remap_data. That would mean that
> remap_foo could take a struct page * directly and I think you would
> save an allocation.

Ok, take a look at the attached and inlined diffs. I redid it, passing
struct page to the api, and getting rid of the pi_* struct completely.

thanks
Mukesh


diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ef63895..6cbf59a 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -33,6 +33,7 @@
 #include <xen/features.h>
 #include <xen/page.h>
 #include <xen/xen-ops.h>
+#include <xen/balloon.h>
 
 #include "privcmd.h"
 
@@ -178,7 +179,7 @@ static int mmap_mfn_range(void *data, void *state)
 					msg->va & PAGE_MASK,
 					msg->mfn, msg->npages,
 					vma->vm_page_prot,
-					st->domain);
+					st->domain, NULL);
 	if (rc < 0)
 		return rc;
 
@@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
 	if (!xen_initial_domain())
 		return -EPERM;
 
+	/* We only support privcmd_ioctl_mmap_batch for auto translated. */
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -ENOSYS;
+
 	if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
 		return -EFAULT;
 
@@ -246,6 +251,7 @@ struct mmap_batch_state {
 	domid_t domain;
 	unsigned long va;
 	struct vm_area_struct *vma;
+	int index;
 	/* A tristate:
 	 *      0 for no errors
 	 *      1 if at least one error has happened (and no
@@ -260,14 +266,20 @@ struct mmap_batch_state {
 	xen_pfn_t __user *user_mfn;
 };
 
+/* PVH dom0 fyi: if domU being created is PV, then mfn is mfn(addr on bus). If
+ * it's PVH then mfn is pfn (input to HAP). */
 static int mmap_batch_fn(void *data, void *state)
 {
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
+	struct vm_area_struct *vma = st->vma;
+	struct page **pages = vma ? vma->vm_private_data : NULL;
+	struct page *cur_page = pages[st->index++];
 	int ret;
 
 	ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-					 st->vma->vm_page_prot, st->domain);
+					 st->vma->vm_page_prot, st->domain, 
+					 &cur_page);
 
 	/* Store error code for second pass. */
 	*(st->err++) = ret;
@@ -303,6 +315,32 @@ static int mmap_return_errors_v1(void *data, void *state)
 	return __put_user(*mfnp, st->user_mfn++);
 }
 
+/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
+ * the vma with the page info to use later.
+ * Returns: 0 if success, otherwise -errno
+ */ 
+static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs)
+{
+	int rc;
+	struct page **pages;
+
+	pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
+	if (pages == NULL)
+		return -ENOMEM;
+
+	rc = alloc_xenballooned_pages(numpgs, pages, 0);
+	if (rc != 0) {
+		pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, 
+			numpgs, rc);
+		kfree(pages);
+		return -ENOMEM;
+	}
+	BUG_ON(vma->vm_private_data);
+	vma->vm_private_data = pages;
+
+	return 0;
+}
+
 static struct vm_operations_struct privcmd_vm_ops;
 
 static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
@@ -370,10 +408,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 		up_write(&mm->mmap_sem);
 		goto out;
 	}
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		if ((ret = pvh_privcmd_resv_pfns(vma, m.num)) < 0) {
+			up_write(&mm->mmap_sem);
+			goto out;
+		}
+	}
 
 	state.domain        = m.dom;
 	state.vma           = vma;
 	state.va            = m.addr;
+	state.index         = 0;
 	state.global_error  = 0;
 	state.err           = err_array;
 
@@ -438,6 +483,20 @@ static long privcmd_ioctl(struct file *file,
 	return ret;
 }
 
+static void privcmd_close(struct vm_area_struct *vma)
+{
+	struct page **pages = vma ? vma->vm_private_data : NULL;
+	int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	if (!pages || !numpgs || !xen_feature(XENFEAT_auto_translated_physmap))
+		return;
+
+	xen_unmap_domain_mfn_range(vma);
+	while (numpgs--)
+		free_xenballooned_pages(1, &pages[numpgs]);
+	kfree(pages);
+}
+
 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
@@ -448,6 +507,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 static struct vm_operations_struct privcmd_vm_ops = {
+	.close = privcmd_close,
 	.fault = privcmd_fault
 };
 
@@ -464,6 +524,10 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
 {
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		int sz = sizeof(vma->vm_private_data);
+		return (!__cmpxchg(&vma->vm_private_data, NULL, NULL, sz));
+	}
 	return (xchg(&vma->vm_private_data, (void *)1) == NULL);
 }
 
===============================================================================================

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5a16824..d5e53ad 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -73,6 +73,7 @@
 #include <xen/interface/version.h>
 #include <xen/interface/memory.h>
 #include <xen/hvc-console.h>
+#include <xen/balloon.h>
 
 #include "multicalls.h"
 #include "mmu.h"
@@ -331,6 +332,20 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
 	__xen_set_pte(ptep, pteval);
 }
 
+void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, 
+			      int nr_mfns, int add_mapping)
+{
+	struct physdev_map_iomem iomem;
+
+	iomem.first_gfn = pfn;
+	iomem.first_mfn = mfn;
+	iomem.nr_mfns = nr_mfns;
+	iomem.add_mapping = add_mapping;
+
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem))
+		BUG();
+}
+
 static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
@@ -1220,6 +1235,8 @@ static void __init xen_pagetable_init(void)
 #endif
 	paging_init();
 	xen_setup_shared_info();
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return;
 #ifdef CONFIG_X86_64
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 		unsigned long new_mfn_list;
@@ -1527,6 +1544,10 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 {
 	struct mmuext_op op;
+
+	if (xen_feature(XENFEAT_writable_page_tables))
+		return;
+
 	op.cmd = cmd;
 	op.arg1.mfn = pfn_to_mfn(pfn);
 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
@@ -1724,6 +1745,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
 	pte_t pte = pfn_pte(pfn, prot);
 
+	/* recall for PVH, page tables are native. */
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return;
+
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
 		BUG();
 }
@@ -1801,6 +1826,9 @@ static void convert_pfn_mfn(void *v)
 	pte_t *pte = v;
 	int i;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return;
+
 	/* All levels are converted the same way, so just treat them
 	   as ptes. */
 	for (i = 0; i < PTRS_PER_PTE; i++)
@@ -1820,6 +1848,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
 		(*pt_end)--;
 	}
 }
+
 /*
  * Set up the initial kernel pagetable.
  *
@@ -1830,6 +1859,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
  * but that's enough to get __va working.  We need to fill in the rest
  * of the physical mapping once some sort of allocator has been set
  * up.
+ * NOTE: for PVH, the page tables are native.
  */
 void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
@@ -1907,10 +1937,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	 * structure to attach it to, so make sure we just set kernel
 	 * pgd.
 	 */
-	xen_mc_batch();
-	__xen_write_cr3(true, __pa(init_level4_pgt));
-	xen_mc_issue(PARAVIRT_LAZY_CPU);
-
+	if (xen_feature(XENFEAT_writable_page_tables)) {
+		native_write_cr3(__pa(init_level4_pgt));
+	} else {
+		xen_mc_batch();
+		__xen_write_cr3(true, __pa(init_level4_pgt));
+		xen_mc_issue(PARAVIRT_LAZY_CPU);
+	}
 	/* We can't that easily rip out L3 and L2, as the Xen pagetables are
 	 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ...  for
 	 * the initial domain. For guests using the toolstack, they are in:
@@ -2177,8 +2210,19 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 
 void __init xen_init_mmu_ops(void)
 {
-	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	x86_init.paging.pagetable_init = xen_pagetable_init;
+
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+
+		/* set_pte* for PCI devices to map iomem. */
+		if (xen_initial_domain()) {
+			pv_mmu_ops.set_pte = native_set_pte;
+			pv_mmu_ops.set_pte_at = native_set_pte_at;
+		}
+		return;
+	}
+	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	pv_mmu_ops = xen_mmu_ops;
 
 	memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2414,6 +2458,94 @@ void __init xen_hvm_init_mmu_ops(void)
 }
 #endif
 
+/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space 
+ * creating new guest on PVH dom0 and needs to map domU pages. 
+ */
+static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
+			      unsigned int domid)
+{
+	int rc;
+	struct xen_add_to_physmap xatp = { .u.foreign_domid = domid };
+
+	xatp.gpfn = lpfn;
+	xatp.idx = fgmfn;
+	xatp.domid = DOMID_SELF;
+	xatp.space = XENMAPSPACE_gmfn_foreign;
+	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
+	if (rc)
+		pr_warn("d0: Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n", 
+			rc, lpfn, fgmfn); 
+	return rc;
+}
+
+int pvh_rem_xen_p2m(unsigned long spfn, int count)
+{
+	struct xen_remove_from_physmap xrp;
+	int i, rc;
+
+	for (i=0; i < count; i++) {
+		xrp.domid = DOMID_SELF;
+		xrp.gpfn = spfn+i;
+		rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+		if (rc) {
+			pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
+				spfn+i, rc, i);
+			return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
+
+struct pvh_remap_data {
+	unsigned long fgmfn;		/* foreign domain's gmfn */
+	pgprot_t prot;
+	domid_t  domid;
+	int	 index;
+	struct page **pages;
+};
+
+static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, 
+			void *data)
+{
+	int rc;
+	struct pvh_remap_data *remap = data;
+	unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
+	pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
+
+	if ((rc=pvh_add_to_xen_p2m(pfn, remap->fgmfn, remap->domid)))
+		return rc;
+	native_set_pte(ptep, pteval);
+
+	return 0;
+}
+
+/* The only caller at moment passes one gmfn at a time.
+ * PVH TBD/FIXME: expand this in future to honor batch requests.
+ */
+static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long mfn, int nr,
+				pgprot_t prot, unsigned domid,
+				struct page **pages)
+{
+	int err;
+	struct pvh_remap_data pvhdata;
+
+	if (nr > 1)
+		return -EINVAL;
+
+	pvhdata.fgmfn = mfn;
+	pvhdata.prot = prot;
+	pvhdata.domid = domid;
+	pvhdata.index = 0;
+	pvhdata.pages = pages;
+	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+				  pvh_map_pte_fn, &pvhdata);
+	flush_tlb_all();
+	/* flush_tlb_page(vma, addr); */
+	return err;
+}
+
 #define REMAP_BATCH_SIZE 16
 
 struct remap_data {
@@ -2438,7 +2570,9 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 			       unsigned long addr,
 			       unsigned long mfn, int nr,
-			       pgprot_t prot, unsigned domid)
+			       pgprot_t prot, unsigned domid,
+			       struct page **pages)
+
 {
 	struct remap_data rmd;
 	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
@@ -2446,14 +2580,17 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 	unsigned long range;
 	int err = 0;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -EINVAL;
-
 	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
 
 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
 				(VM_PFNMAP | VM_RESERVED | VM_IO)));
 
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		/* We need to update the local page tables and the xen HAP */
+		return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid,
+					    pages);
+	}
+
 	rmd.mfn = mfn;
 	rmd.prot = prot;
 
@@ -2483,3 +2620,25 @@ out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+
+/* Returns: 0 success */
+int xen_unmap_domain_mfn_range(struct vm_area_struct *vma)
+{
+	int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	struct page **pages = vma ? vma->vm_private_data : NULL;
+
+	if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
+		return 0;
+
+	while (numpgs--) {
+
+		/* the mmu has already cleaned up the process mmu resources at
+		 * this point (lookup_address will return NULL). */
+		unsigned long pfn = page_to_pfn(pages[numpgs]);
+
+		pvh_rem_xen_p2m(pfn, 1);
+	}
+	flush_tlb_all();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);

[-- Attachment #2: priv.diff --]
[-- Type: text/x-patch, Size: 4441 bytes --]

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ef63895..6cbf59a 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -33,6 +33,7 @@
 #include <xen/features.h>
 #include <xen/page.h>
 #include <xen/xen-ops.h>
+#include <xen/balloon.h>
 
 #include "privcmd.h"
 
@@ -178,7 +179,7 @@ static int mmap_mfn_range(void *data, void *state)
 					msg->va & PAGE_MASK,
 					msg->mfn, msg->npages,
 					vma->vm_page_prot,
-					st->domain);
+					st->domain, NULL);
 	if (rc < 0)
 		return rc;
 
@@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
 	if (!xen_initial_domain())
 		return -EPERM;
 
+	/* We only support privcmd_ioctl_mmap_batch for auto translated. */
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -ENOSYS;
+
 	if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
 		return -EFAULT;
 
@@ -246,6 +251,7 @@ struct mmap_batch_state {
 	domid_t domain;
 	unsigned long va;
 	struct vm_area_struct *vma;
+	int index;
 	/* A tristate:
 	 *      0 for no errors
 	 *      1 if at least one error has happened (and no
@@ -260,14 +266,20 @@ struct mmap_batch_state {
 	xen_pfn_t __user *user_mfn;
 };
 
+/* PVH dom0 fyi: if domU being created is PV, then mfn is mfn(addr on bus). If
+ * it's PVH then mfn is pfn (input to HAP). */
 static int mmap_batch_fn(void *data, void *state)
 {
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
+	struct vm_area_struct *vma = st->vma;
+	struct page **pages = vma ? vma->vm_private_data : NULL;
+	struct page *cur_page = pages[st->index++];
 	int ret;
 
 	ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-					 st->vma->vm_page_prot, st->domain);
+					 st->vma->vm_page_prot, st->domain, 
+					 &cur_page);
 
 	/* Store error code for second pass. */
 	*(st->err++) = ret;
@@ -303,6 +315,32 @@ static int mmap_return_errors_v1(void *data, void *state)
 	return __put_user(*mfnp, st->user_mfn++);
 }
 
+/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
+ * the vma with the page info to use later.
+ * Returns: 0 if success, otherwise -errno
+ */ 
+static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs)
+{
+	int rc;
+	struct page **pages;
+
+	pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
+	if (pages == NULL)
+		return -ENOMEM;
+
+	rc = alloc_xenballooned_pages(numpgs, pages, 0);
+	if (rc != 0) {
+		pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, 
+			numpgs, rc);
+		kfree(pages);
+		return -ENOMEM;
+	}
+	BUG_ON(vma->vm_private_data);
+	vma->vm_private_data = pages;
+
+	return 0;
+}
+
 static struct vm_operations_struct privcmd_vm_ops;
 
 static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
@@ -370,10 +408,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 		up_write(&mm->mmap_sem);
 		goto out;
 	}
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		if ((ret = pvh_privcmd_resv_pfns(vma, m.num)) < 0) {
+			up_write(&mm->mmap_sem);
+			goto out;
+		}
+	}
 
 	state.domain        = m.dom;
 	state.vma           = vma;
 	state.va            = m.addr;
+	state.index         = 0;
 	state.global_error  = 0;
 	state.err           = err_array;
 
@@ -438,6 +483,20 @@ static long privcmd_ioctl(struct file *file,
 	return ret;
 }
 
+static void privcmd_close(struct vm_area_struct *vma)
+{
+	struct page **pages = vma ? vma->vm_private_data : NULL;
+	int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	if (!pages || !numpgs || !xen_feature(XENFEAT_auto_translated_physmap))
+		return;
+
+	xen_unmap_domain_mfn_range(vma);
+	while (numpgs--)
+		free_xenballooned_pages(1, &pages[numpgs]);
+	kfree(pages);
+}
+
 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
@@ -448,6 +507,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 static struct vm_operations_struct privcmd_vm_ops = {
+	.close = privcmd_close,
 	.fault = privcmd_fault
 };
 
@@ -464,6 +524,10 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
 {
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		int sz = sizeof(vma->vm_private_data);
+		return (!__cmpxchg(&vma->vm_private_data, NULL, NULL, sz));
+	}
 	return (xchg(&vma->vm_private_data, (void *)1) == NULL);
 }
 

[-- Attachment #3: mmu.diff --]
[-- Type: text/x-patch, Size: 8173 bytes --]

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5a16824..d5e53ad 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -73,6 +73,7 @@
 #include <xen/interface/version.h>
 #include <xen/interface/memory.h>
 #include <xen/hvc-console.h>
+#include <xen/balloon.h>
 
 #include "multicalls.h"
 #include "mmu.h"
@@ -331,6 +332,20 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
 	__xen_set_pte(ptep, pteval);
 }
 
+void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, 
+			      int nr_mfns, int add_mapping)
+{
+	struct physdev_map_iomem iomem;
+
+	iomem.first_gfn = pfn;
+	iomem.first_mfn = mfn;
+	iomem.nr_mfns = nr_mfns;
+	iomem.add_mapping = add_mapping;
+
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem))
+		BUG();
+}
+
 static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
@@ -1220,6 +1235,8 @@ static void __init xen_pagetable_init(void)
 #endif
 	paging_init();
 	xen_setup_shared_info();
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return;
 #ifdef CONFIG_X86_64
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 		unsigned long new_mfn_list;
@@ -1527,6 +1544,10 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 {
 	struct mmuext_op op;
+
+	if (xen_feature(XENFEAT_writable_page_tables))
+		return;
+
 	op.cmd = cmd;
 	op.arg1.mfn = pfn_to_mfn(pfn);
 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
@@ -1724,6 +1745,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
 	pte_t pte = pfn_pte(pfn, prot);
 
+	/* recall for PVH, page tables are native. */
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return;
+
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
 		BUG();
 }
@@ -1801,6 +1826,9 @@ static void convert_pfn_mfn(void *v)
 	pte_t *pte = v;
 	int i;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return;
+
 	/* All levels are converted the same way, so just treat them
 	   as ptes. */
 	for (i = 0; i < PTRS_PER_PTE; i++)
@@ -1820,6 +1848,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
 		(*pt_end)--;
 	}
 }
+
 /*
  * Set up the initial kernel pagetable.
  *
@@ -1830,6 +1859,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
  * but that's enough to get __va working.  We need to fill in the rest
  * of the physical mapping once some sort of allocator has been set
  * up.
+ * NOTE: for PVH, the page tables are native.
  */
 void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
@@ -1907,10 +1937,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	 * structure to attach it to, so make sure we just set kernel
 	 * pgd.
 	 */
-	xen_mc_batch();
-	__xen_write_cr3(true, __pa(init_level4_pgt));
-	xen_mc_issue(PARAVIRT_LAZY_CPU);
-
+	if (xen_feature(XENFEAT_writable_page_tables)) {
+		native_write_cr3(__pa(init_level4_pgt));
+	} else {
+		xen_mc_batch();
+		__xen_write_cr3(true, __pa(init_level4_pgt));
+		xen_mc_issue(PARAVIRT_LAZY_CPU);
+	}
 	/* We can't that easily rip out L3 and L2, as the Xen pagetables are
 	 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ...  for
 	 * the initial domain. For guests using the toolstack, they are in:
@@ -2177,8 +2210,19 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 
 void __init xen_init_mmu_ops(void)
 {
-	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	x86_init.paging.pagetable_init = xen_pagetable_init;
+
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+
+		/* set_pte* for PCI devices to map iomem. */
+		if (xen_initial_domain()) {
+			pv_mmu_ops.set_pte = native_set_pte;
+			pv_mmu_ops.set_pte_at = native_set_pte_at;
+		}
+		return;
+	}
+	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	pv_mmu_ops = xen_mmu_ops;
 
 	memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2414,6 +2458,94 @@ void __init xen_hvm_init_mmu_ops(void)
 }
 #endif
 
+/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space 
+ * creating new guest on PVH dom0 and needs to map domU pages. 
+ */
+static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
+			      unsigned int domid)
+{
+	int rc;
+	struct xen_add_to_physmap xatp = { .u.foreign_domid = domid };
+
+	xatp.gpfn = lpfn;
+	xatp.idx = fgmfn;
+	xatp.domid = DOMID_SELF;
+	xatp.space = XENMAPSPACE_gmfn_foreign;
+	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
+	if (rc)
+		pr_warn("d0: Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n", 
+			rc, lpfn, fgmfn); 
+	return rc;
+}
+
+int pvh_rem_xen_p2m(unsigned long spfn, int count)
+{
+	struct xen_remove_from_physmap xrp;
+	int i, rc;
+
+	for (i=0; i < count; i++) {
+		xrp.domid = DOMID_SELF;
+		xrp.gpfn = spfn+i;
+		rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+		if (rc) {
+			pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
+				spfn+i, rc, i);
+			return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
+
+struct pvh_remap_data {
+	unsigned long fgmfn;		/* foreign domain's gmfn */
+	pgprot_t prot;
+	domid_t  domid;
+	int	 index;
+	struct page **pages;
+};
+
+static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, 
+			void *data)
+{
+	int rc;
+	struct pvh_remap_data *remap = data;
+	unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
+	pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
+
+	if ((rc=pvh_add_to_xen_p2m(pfn, remap->fgmfn, remap->domid)))
+		return rc;
+	native_set_pte(ptep, pteval);
+
+	return 0;
+}
+
+/* The only caller at moment passes one gmfn at a time.
+ * PVH TBD/FIXME: expand this in future to honor batch requests.
+ */
+static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long mfn, int nr,
+				pgprot_t prot, unsigned domid,
+				struct page **pages)
+{
+	int err;
+	struct pvh_remap_data pvhdata;
+
+	if (nr > 1)
+		return -EINVAL;
+
+	pvhdata.fgmfn = mfn;
+	pvhdata.prot = prot;
+	pvhdata.domid = domid;
+	pvhdata.index = 0;
+	pvhdata.pages = pages;
+	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+				  pvh_map_pte_fn, &pvhdata);
+	flush_tlb_all();
+	/* flush_tlb_page(vma, addr); */
+	return err;
+}
+
 #define REMAP_BATCH_SIZE 16
 
 struct remap_data {
@@ -2438,7 +2570,9 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 			       unsigned long addr,
 			       unsigned long mfn, int nr,
-			       pgprot_t prot, unsigned domid)
+			       pgprot_t prot, unsigned domid,
+			       struct page **pages)
+
 {
 	struct remap_data rmd;
 	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
@@ -2446,14 +2580,17 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 	unsigned long range;
 	int err = 0;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -EINVAL;
-
 	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
 
 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
 				(VM_PFNMAP | VM_RESERVED | VM_IO)));
 
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		/* We need to update the local page tables and the xen HAP */
+		return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid,
+					    pages);
+	}
+
 	rmd.mfn = mfn;
 	rmd.prot = prot;
 
@@ -2483,3 +2620,25 @@ out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+
+/* Returns: 0 success */
+int xen_unmap_domain_mfn_range(struct vm_area_struct *vma)
+{
+	int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	struct page **pages = vma ? vma->vm_private_data : NULL;
+
+	if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
+		return 0;
+
+	while (numpgs--) {
+
+		/* the mmu has already cleaned up the process mmu resources at
+		 * this point (lookup_address will return NULL). */
+		unsigned long pfn = page_to_pfn(pages[numpgs]);
+
+		pvh_rem_xen_p2m(pfn, 1);
+	}
+	flush_tlb_all();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);

[-- Attachment #4: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

next prev parent reply	other threads:[~2012-10-06  2:00 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-21 19:15 [PATCH v1 2/8]: PVH mmu changes Mukesh Rathor
2012-09-24 11:55 ` Stefano Stabellini
2012-09-24 12:16   ` Ian Campbell
2012-09-24 12:24     ` Stefano Stabellini
2012-09-26  0:27       ` Mukesh Rathor
2012-09-26 11:10         ` George Dunlap
2012-09-26 11:16         ` Stefano Stabellini
2012-09-24 13:50   ` Konrad Rzeszutek Wilk
2012-09-26  0:09   ` Mukesh Rathor
2012-09-26 11:15     ` Stefano Stabellini
2012-10-01 21:32   ` Mukesh Rathor
2012-10-01 21:44     ` Mukesh Rathor
2012-10-02 11:23       ` Stefano Stabellini
2012-10-02 22:22         ` Mukesh Rathor
2012-10-02 10:46     ` Stefano Stabellini
2012-10-03 18:27   ` Mukesh Rathor
2012-10-04  8:18     ` Ian Campbell
2012-09-24 14:04 ` Stefano Stabellini
2012-09-24 14:13   ` Ian Campbell
2012-09-26  0:33   ` Mukesh Rathor
2012-09-26 11:25     ` Stefano Stabellini
2012-09-26 13:54 ` Konrad Rzeszutek Wilk
2012-10-02 10:49 ` Stefano Stabellini
2012-10-03 15:42 ` Ian Campbell
2012-10-03 22:29   ` Mukesh Rathor
2012-10-04  8:27     ` Ian Campbell
2012-10-04 18:27       ` Mukesh Rathor
2012-10-04 18:42         ` Konrad Rzeszutek Wilk
2012-10-05  9:25         ` Ian Campbell
2012-10-06  2:00       ` Mukesh Rathor [this message]
2012-10-08  9:31         ` Ian Campbell
2012-10-04  8:31     ` Ian Campbell
2012-10-05  1:17       ` Mukesh Rathor
2012-10-05  9:26         ` Ian Campbell
2012-10-04 13:54 ` Ian Campbell
2012-10-05  1:51   ` Mukesh Rathor

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ef63895 dfblob:6cbf59a dfblob:5a16824 dfblob:d5e53ad
dfblob:ef63895 dfblob:6cbf59a dfblob:5a16824 dfblob:d5e53ad )
 OR (
bs:"Re: [PATCH v1 2/8]: PVH mmu changes" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121005190011.2653bcfa@mantra.us.oracle.com \
    --to=mukesh.rathor@oracle.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Stefano.Stabellini@eu.citrix.com \
    --cc=Xen-devel@lists.xensource.com \
    --cc=konrad.wilk@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).