Re: [PATCH v1 2/8]: PVH mmu changes - Konrad Rzeszutek Wilk

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: "Xen-devel@lists.xensource.com" <Xen-devel@lists.xensource.com>,
	Ian Campbell <Ian.Campbell@citrix.com>
Subject: Re: [PATCH v1 2/8]: PVH mmu changes
Date: Mon, 24 Sep 2012 09:50:31 -0400	[thread overview]
Message-ID: <20120924135031.GD31618@phenom.dumpdata.com> (raw)
In-Reply-To: <alpine.DEB.2.02.1209241228460.29232@kaball.uk.xensource.com>

On Mon, Sep 24, 2012 at 12:55:31PM +0100, Stefano Stabellini wrote:
> There are few code style issues on this patch, I suggest you run it
> through scripts/checkpatch.pl, it should be able to catch all these
> errors.
> 
> On Fri, 21 Sep 2012, Mukesh Rathor wrote:
> > ---
> >  arch/x86/xen/mmu.c    |  180 +++++++++++++++++++++++++++++++++++++++++++++++--
> >  arch/x86/xen/mmu.h    |    2 +
> >  include/xen/xen-ops.h |   12 +++-
> >  3 files changed, 188 insertions(+), 6 deletions(-)
> > 
> > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> > index b65a761..9b5a5ef 100644
> > --- a/arch/x86/xen/mmu.c
> > +++ b/arch/x86/xen/mmu.c
> > @@ -73,6 +73,7 @@
> >  #include <xen/interface/version.h>
> >  #include <xen/interface/memory.h>
> >  #include <xen/hvc-console.h>
> > +#include <xen/balloon.h>
> >  
> >  #include "multicalls.h"
> >  #include "mmu.h"
> > @@ -330,6 +331,26 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
> >  	__xen_set_pte(ptep, pteval);
> >  }
> >  
> > +void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, 
> > +			      int nr_mfns, int add_mapping)
> > +{
> > +	struct physdev_map_iomem iomem;
> > +
> > +	iomem.first_gfn = pfn;
> > +	iomem.first_mfn = mfn;
> > +	iomem.nr_mfns = nr_mfns;
> > +	iomem.add_mapping = add_mapping;
> > +
> > +	if (HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem))
> > +		BUG();
> > +}
> > +
> > +static void xen_dom0pvh_set_pte_at(struct mm_struct *mm, unsigned long addr,
> > +		    		   pte_t *ptep, pte_t pteval)
> > +{
> > +	native_set_pte(ptep, pteval);
> > +}
> 
> can't you just set set_pte_at to native_set_pte?
> 
> 
> >  static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
> >  		    pte_t *ptep, pte_t pteval)
> >  {
> > @@ -1197,6 +1218,10 @@ static void xen_post_allocator_init(void);
> >  static void __init xen_pagetable_setup_done(pgd_t *base)
> >  {
> >  	xen_setup_shared_info();
> > +
> > +	if (xen_feature(XENFEAT_auto_translated_physmap))
> > +		return;
> > +
> >  	xen_post_allocator_init();
> >  }
> 
> Can we move the if..return at the beginning of xen_post_allocator_init?
> It would make it easier to understand what it is for.
> Or maybe we could have xen_setup_shared_info take a pgd_t *base
> parameter so that you can set pagetable_setup_done to
> xen_setup_shared_info directly on pvh.

It actually ends up nicely aligning with my patches since I end up
using 'if (xen_feature(..)'. But you can't see that since these patches
are not rebased on that - which is OK.

> 
> 
> > @@ -1455,6 +1480,10 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
> >  static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
> >  {
> >  	struct mmuext_op op;
> > +
> > +	if (xen_feature(XENFEAT_writable_page_tables))
> > +		return;
> > +
> >  	op.cmd = cmd;
> >  	op.arg1.mfn = pfn_to_mfn(pfn);
> >  	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
> 
> given the very limited set of mmu pvops that we initialize on pvh, I
> cannot find who would actually call pin_pagetable_pfn on pvh.
> 
> 
> > @@ -1652,6 +1681,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
> >  	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
> >  	pte_t pte = pfn_pte(pfn, prot);
> >  
> > +	/* recall for PVH, page tables are native. */
> > +	if (xen_feature(XENFEAT_auto_translated_physmap))
> > +		return;
> > +
> >  	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
> >  		BUG();
> >  }
> > @@ -1729,6 +1762,9 @@ static void convert_pfn_mfn(void *v)
> >  	pte_t *pte = v;
> >  	int i;
> >  
> > +	if (xen_feature(XENFEAT_auto_translated_physmap))
> > +		return;
> > +
> >  	/* All levels are converted the same way, so just treat them
> >  	   as ptes. */
> >  	for (i = 0; i < PTRS_PER_PTE; i++)
> > @@ -1745,6 +1781,7 @@ static void convert_pfn_mfn(void *v)
> >   * but that's enough to get __va working.  We need to fill in the rest
> >   * of the physical mapping once some sort of allocator has been set
> >   * up.
> > + * NOTE: for PVH, the page tables are native.
> >   */
> >  pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
> >  					 unsigned long max_pfn)
> > @@ -1802,9 +1839,13 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
> >  	 * structure to attach it to, so make sure we just set kernel
> >  	 * pgd.
> >  	 */
> > -	xen_mc_batch();
> > -	__xen_write_cr3(true, __pa(pgd));
> > -	xen_mc_issue(PARAVIRT_LAZY_CPU);
> > +	if (xen_feature(XENFEAT_writable_page_tables)) {
> > +		native_write_cr3(__pa(pgd));
> > +	} else {
> > +		xen_mc_batch();
> > +		__xen_write_cr3(true, __pa(pgd));
> > +		xen_mc_issue(PARAVIRT_LAZY_CPU);
> > +	}
> >  
> >  	memblock_reserve(__pa(xen_start_info->pt_base),
> >  			 xen_start_info->nr_pt_frames * PAGE_SIZE);
> > @@ -2067,9 +2108,21 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
> >  
> >  void __init xen_init_mmu_ops(void)
> >  {
> > +	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
> > +
> > +	if (xen_feature(XENFEAT_auto_translated_physmap)) {
> > +		pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
> > +
> > +		/* set_pte* for PCI devices to map iomem. */
> > +		if (xen_initial_domain()) {
> > +			pv_mmu_ops.set_pte = native_set_pte;
> > +			pv_mmu_ops.set_pte_at = xen_dom0pvh_set_pte_at;
> > +		}
> > +		return;
> > +	}
> > +
> >  	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
> >  	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
> > -	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
> >  	pv_mmu_ops = xen_mmu_ops;
> >  
> >  	memset(dummy_mapping, 0xff, PAGE_SIZE);
> 
> I wonder whether it would make sense to have a xen_pvh_init_mmu_ops,
> given that they end up being so different...
> 
> 
> > @@ -2305,6 +2358,92 @@ void __init xen_hvm_init_mmu_ops(void)
> >  }
> >  #endif
> >  
> > +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space 
> > + * creating new guest on PVH dom0 and needs to map domU pages. 
> > + */
> > +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
> > +			      unsigned int domid)
> > +{
> > +	int rc;
> > +	struct xen_add_to_physmap xatp = { .u.foreign_domid = domid };
> > +
> > +	xatp.gpfn = lpfn;
> > +	xatp.idx = fgmfn;
> > +	xatp.domid = DOMID_SELF;
> > +	xatp.space = XENMAPSPACE_gmfn_foreign;
> > +	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
> > +	if (rc)
> > +		pr_warn("d0: Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n", 
> > +			rc, lpfn, fgmfn); 
> > +	return rc;
> > +}
> > +
> > +int pvh_rem_xen_p2m(unsigned long spfn, int count)
> > +{
> > +	struct xen_remove_from_physmap xrp;
> > +	int i, rc;
> > +
> > +	for (i=0; i < count; i++) {
> > +		xrp.domid = DOMID_SELF;
> > +		xrp.gpfn = spfn+i;
> > +		rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
> > +		if (rc) {
> > +			pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
> > +				spfn+i, rc, i);
> > +			return 1;
> > +		}
> > +	}
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
> > +
> > +struct pvh_remap_data {
> > +	unsigned long fgmfn;		/* foreign domain's gmfn */
> > +	pgprot_t prot;
> > +	domid_t  domid;
> > +	struct xen_pvh_pfn_info *pvhinfop;
> > +};
> > +
> > +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, 
> > +			void *data)
> > +{
> > +	int rc;
> > +	struct pvh_remap_data *remapp = data;
> > +	struct xen_pvh_pfn_info *pvhp = remapp->pvhinfop;
> > +	unsigned long pfn = page_to_pfn(pvhp->pi_paga[pvhp->pi_next_todo++]);
> > +	pte_t pteval = pte_mkspecial(pfn_pte(pfn, remapp->prot));
> > +
> > +	if ((rc=pvh_add_to_xen_p2m(pfn, remapp->fgmfn, remapp->domid)))
> > +		return rc;
> > +	native_set_pte(ptep, pteval);
> 
> Do we actually need the pte to be "special"?
> I would think that being in the guest p2m, it is actually quite a normal
> page.
> 
> 
> > +	return 0;
> > +}
> > +
> > +/* The only caller at moment passes one gmfn at a time.
> > + * PVH TBD/FIXME: expand this in future to honor batch requests.
> > + */
> > +static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
> > +				unsigned long addr, unsigned long mfn, int nr,
> > +				pgprot_t prot, unsigned domid,
> > +				struct xen_pvh_pfn_info *pvhp)
> > +{
> > +	int err;
> > +	struct pvh_remap_data pvhdata;
> > +
> > +	if (nr > 1)
> > +		return -EINVAL;
> > +
> > +	pvhdata.fgmfn = mfn;
> > +	pvhdata.prot = prot;
> > +	pvhdata.domid = domid;
> > +	pvhdata.pvhinfop = pvhp;
> > +	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
> > +				  pvh_map_pte_fn, &pvhdata);
> > +	flush_tlb_all();
> 
> Maybe we can use one of the flush_tlb_range varieties instead of a
> flush_tlb_all?
> 
> 
> > +	return err;
> > +}
> > +
> >  #define REMAP_BATCH_SIZE 16
> >  
> >  struct remap_data {
> > @@ -2329,7 +2468,9 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
> >  int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> >  			       unsigned long addr,
> >  			       unsigned long mfn, int nr,
> > -			       pgprot_t prot, unsigned domid)
> > +			       pgprot_t prot, unsigned domid,
> > +			       struct xen_pvh_pfn_info *pvhp)
> > +
> >  {
> >  	struct remap_data rmd;
> >  	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
> > @@ -2342,6 +2483,12 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> >  	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
> >  				(VM_PFNMAP | VM_RESERVED | VM_IO)));
> >  
> > +	if (xen_feature(XENFEAT_auto_translated_physmap)) {
> > +		/* We need to update the local page tables and the xen HAP */
> > +		return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid,
> > +					    pvhp);
> > +	}
> > +
> >  	rmd.mfn = mfn;
> >  	rmd.prot = prot;
> >  
> > @@ -2371,3 +2518,26 @@ out:
> >  	return err;
> >  }
> >  EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
> > +
> > +/* Returns: Number of pages unmapped */
> > +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
> > +			       struct xen_pvh_pfn_info *pvhp)
> > +{
> > +	int count = 0;
> > +
> > +	if (!pvhp || !xen_feature(XENFEAT_auto_translated_physmap))
> > +		return 0;
> > +
> > +	while (pvhp->pi_next_todo--) {
> > +		unsigned long pfn;
> > +
> > +		/* the mmu has already cleaned up the process mmu resources at
> > +		 * this point (lookup_address will return NULL). */
> > +		pfn = page_to_pfn(pvhp->pi_paga[pvhp->pi_next_todo]);
> > +		pvh_rem_xen_p2m(pfn, 1);
> > +		count++;
> > +	}
> > +	flush_tlb_all();
> > +	return count;
> 
> Who is going to remove the corresponding mapping from the vma?
> Also we might be able to replace the flush_tlb_all with a
> flush_tlb_range.
> 
> 
> > +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
> > diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
> > index 73809bb..6d0bb56 100644
> > --- a/arch/x86/xen/mmu.h
> > +++ b/arch/x86/xen/mmu.h
> > @@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void);
> >  
> >  extern void xen_init_mmu_ops(void);
> >  extern void xen_hvm_init_mmu_ops(void);
> > +extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
> > +				     int nr_mfns, int add_mapping);
> >  #endif	/* _XEN_MMU_H */
> > diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> > index 6a198e4..6c5ad83 100644
> > --- a/include/xen/xen-ops.h
> > +++ b/include/xen/xen-ops.h
> > @@ -24,9 +24,19 @@ int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
> >  void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
> >  
> >  struct vm_area_struct;
> > +struct xen_pvh_pfn_info;
> >  int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> >  			       unsigned long addr,
> >  			       unsigned long mfn, int nr,
> > -			       pgprot_t prot, unsigned domid);
> > +			       pgprot_t prot, unsigned domid,
> > +			       struct xen_pvh_pfn_info *pvhp);
> > +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
> > +			       struct xen_pvh_pfn_info *pvhp);
> > +
> > +struct xen_pvh_pfn_info {
> > +	struct page **pi_paga;		/* pfn info page array */
> > +	int 	      pi_num_pgs;
> > +	int 	      pi_next_todo;
> > +};
> >  
> >  #endif /* INCLUDE_XEN_OPS_H */
> > -- 
> > 1.7.2.3
> >

next prev parent reply	other threads:[~2012-09-24 13:50 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-21 19:15 [PATCH v1 2/8]: PVH mmu changes Mukesh Rathor
2012-09-24 11:55 ` Stefano Stabellini
2012-09-24 12:16   ` Ian Campbell
2012-09-24 12:24     ` Stefano Stabellini
2012-09-26  0:27       ` Mukesh Rathor
2012-09-26 11:10         ` George Dunlap
2012-09-26 11:16         ` Stefano Stabellini
2012-09-24 13:50   ` Konrad Rzeszutek Wilk [this message]
2012-09-26  0:09   ` Mukesh Rathor
2012-09-26 11:15     ` Stefano Stabellini
2012-10-01 21:32   ` Mukesh Rathor
2012-10-01 21:44     ` Mukesh Rathor
2012-10-02 11:23       ` Stefano Stabellini
2012-10-02 22:22         ` Mukesh Rathor
2012-10-02 10:46     ` Stefano Stabellini
2012-10-03 18:27   ` Mukesh Rathor
2012-10-04  8:18     ` Ian Campbell
2012-09-24 14:04 ` Stefano Stabellini
2012-09-24 14:13   ` Ian Campbell
2012-09-26  0:33   ` Mukesh Rathor
2012-09-26 11:25     ` Stefano Stabellini
2012-09-26 13:54 ` Konrad Rzeszutek Wilk
2012-10-02 10:49 ` Stefano Stabellini
2012-10-03 15:42 ` Ian Campbell
2012-10-03 22:29   ` Mukesh Rathor
2012-10-04  8:27     ` Ian Campbell
2012-10-04 18:27       ` Mukesh Rathor
2012-10-04 18:42         ` Konrad Rzeszutek Wilk
2012-10-05  9:25         ` Ian Campbell
2012-10-06  2:00       ` Mukesh Rathor
2012-10-08  9:31         ` Ian Campbell
2012-10-04  8:31     ` Ian Campbell
2012-10-05  1:17       ` Mukesh Rathor
2012-10-05  9:26         ` Ian Campbell
2012-10-04 13:54 ` Ian Campbell
2012-10-05  1:51   ` Mukesh Rathor

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120924135031.GD31618@phenom.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Xen-devel@lists.xensource.com \
    --cc=stefano.stabellini@eu.citrix.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.