From mboxrd@z Thu Jan  1 00:00:00 1970
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: Re: [PATCH v5] x86/p2m: use large pages for MMIO
	mappings
Date: Wed, 27 Jan 2016 12:32:02 +0000
Message-ID: <56A8B8C2.5010905@citrix.com>
References: <56A25C0602000078000CA367@prv-mh.provo.novell.com>
	<1453724207.4320.137.camel@citrix.com>
	<56A6371802000078000CAA6B@prv-mh.provo.novell.com>
	<1453730752.4320.164.camel@citrix.com>
	<56A63C4002000078000CAAA7@prv-mh.provo.novell.com>
	<1453731704.4320.173.camel@citrix.com>
	<56A658FE02000078000CAC3D@prv-mh.provo.novell.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Return-path: <xen-devel-bounces@lists.xen.org>
Received: from mail6.bemta3.messagelabs.com ([195.245.230.39])
	by lists.xen.org with esmtp (Exim 4.72)
	(envelope-from <prvs=82734b35b=Andrew.Cooper3@citrix.com>)
	id 1aOPGa-00067k-5t
	for xen-devel@lists.xenproject.org; Wed, 27 Jan 2016 12:32:08 +0000
In-Reply-To: <56A658FE02000078000CAC3D@prv-mh.provo.novell.com>
List-Unsubscribe: <http://lists.xen.org/cgi-bin/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xen.org>
List-Help: <mailto:xen-devel-request@lists.xen.org?subject=help>
List-Subscribe: <http://lists.xen.org/cgi-bin/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=subscribe>
Sender: xen-devel-bounces@lists.xen.org
Errors-To: xen-devel-bounces@lists.xen.org
To: Jan Beulich <JBeulich@suse.com>, xen-devel <xen-devel@lists.xenproject.org>
Cc: Kevin Tian <kevin.tian@intel.com>, Wei Liu <wei.liu2@citrix.com>, Ian Campbell <ian.campbell@citrix.com>, Stefano Stabellini <stefano.stabellini@eu.citrix.com>, George Dunlap <George.Dunlap@eu.citrix.com>, Tim Deegan <tim@xen.org>, Ian Jackson <Ian.Jackson@eu.citrix.com>, Jun Nakajima <jun.nakajima@intel.com>, Keir Fraser <keir@xen.org>
List-Id: xen-devel@lists.xenproject.org

On 25/01/16 16:18, Jan Beulich wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2491,7 +2491,7 @@ static int vmx_alloc_vlapic_mapping(stru
>      share_xen_page_with_guest(pg, d, XENSHARE_writable);
>      d->arch.hvm_domain.vmx.apic_access_mfn = mfn;
>      set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), _mfn(mfn),
> -                       p2m_get_hostp2m(d)->default_access);
> +                       PAGE_ORDER_4K, p2m_get_hostp2m(d)->default_access);
>  

This should ASSERT() success, in case we make further changes to the
error handling.

>      return 0;
>  }
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -899,48 +899,62 @@ void p2m_change_type_range(struct domain
>      p2m_unlock(p2m);
>  }
>  
> -/* Returns: 0 for success, -errno for failure */
> +/*
> + * Returns:
> + *    0        for success
> + *    -errno   for failure
> + *    order+1  for caller to retry with order (guaranteed smaller than
> + *             the order value passed in)
> + */
>  static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
> -                               p2m_type_t gfn_p2mt, p2m_access_t access)
> +                               unsigned int order, p2m_type_t gfn_p2mt,
> +                               p2m_access_t access)
>  {
>      int rc = 0;
>      p2m_access_t a;
>      p2m_type_t ot;
>      mfn_t omfn;
> +    unsigned int cur_order = 0;
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
>      if ( !paging_mode_translate(d) )
>          return -EIO;
>  
> -    gfn_lock(p2m, gfn, 0);
> -    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
> +    gfn_lock(p2m, gfn, order);
> +    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
> +    if ( cur_order < order )
> +    {
> +        gfn_unlock(p2m, gfn, order);
> +        return cur_order + 1;

Your comment states that the return value is guarenteed to be less than
the passed-in order, but this is not the case here.  cur_order could, in
principle, be only 1 less than order, at which point your documentation
is incorrect.

Does this rely on the x86 architectural orders to function as documented?

> +    }
>      if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
>      {
> -        gfn_unlock(p2m, gfn, 0);
> +        gfn_unlock(p2m, gfn, order);
>          domain_crash(d);
>          return -ENOENT;
>      }
>      else if ( p2m_is_ram(ot) )
>      {
> +        unsigned long i;
> +
>          ASSERT(mfn_valid(omfn));

Shouldn't this check should be extended to the top of the order?

> -        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
> +        for ( i = 0; i < (1UL << order); ++i )
> +            set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
>      }
>  
>      P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn));
> -    rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt,
> -                       access);
> +    rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
>      if ( rc )
> -        gdprintk(XENLOG_ERR,
> -                 "p2m_set_entry failed! mfn=%08lx rc:%d\n",
> -                 mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)), rc);
> +        gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n",
> +                 gfn, order, rc, mfn_x(mfn));
>      else if ( p2m_is_pod(ot) )
>      {
>          pod_lock(p2m);
> -        p2m->pod.entry_count--;
> +        p2m->pod.entry_count -= 1UL << order;
>          BUG_ON(p2m->pod.entry_count < 0);
>          pod_unlock(p2m);
>      }
> -    gfn_unlock(p2m, gfn, 0);
> +    gfn_unlock(p2m, gfn, order);
>  
>      return rc;
>  }
> @@ -949,14 +963,21 @@ static int set_typed_p2m_entry(struct do
>  static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
>                                   mfn_t mfn)
>  {
> -    return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign,
> +    return set_typed_p2m_entry(d, gfn, mfn, PAGE_ORDER_4K, p2m_map_foreign,
>                                 p2m_get_hostp2m(d)->default_access);
>  }
>  
>  int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
> -                       p2m_access_t access)
> +                       unsigned int order, p2m_access_t access)
>  {
> -    return set_typed_p2m_entry(d, gfn, mfn, p2m_mmio_direct, access);
> +    if ( order &&
> +         rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
> +                                 mfn_x(mfn) + (1UL << order) - 1) &&
> +         !rangeset_contains_range(mmio_ro_ranges, mfn_x(mfn),
> +                                  mfn_x(mfn) + (1UL << order) - 1) )
> +        return order;

Should this not be a hard error?  Even retrying with a lower order is
going fail.

> +
> +    return set_typed_p2m_entry(d, gfn, mfn, order, p2m_mmio_direct, access);
>  }
>  
>  int set_identity_p2m_entry(struct domain *d, unsigned long gfn,
> @@ -1009,20 +1030,33 @@ int set_identity_p2m_entry(struct domain
>      return ret;
>  }
>  
> -/* Returns: 0 for success, -errno for failure */
> -int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
> +/*
> + * Returns:
> + *    0        for success
> + *    -errno   for failure
> + *    order+1  for caller to retry with order (guaranteed smaller than
> + *             the order value passed in)
> + */
> +int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
> +                         unsigned int order)
>  {
>      int rc = -EINVAL;
>      mfn_t actual_mfn;
>      p2m_access_t a;
>      p2m_type_t t;
> +    unsigned int cur_order = 0;
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
>      if ( !paging_mode_translate(d) )
>          return -EIO;
>  
> -    gfn_lock(p2m, gfn, 0);
> -    actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, NULL, NULL);
> +    gfn_lock(p2m, gfn, order);
> +    actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL);
> +    if ( cur_order < order )
> +    {
> +        rc = cur_order + 1;
> +        goto out;
> +    }
>  
>      /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
>      if ( (INVALID_MFN == mfn_x(actual_mfn)) || (t != p2m_mmio_direct) )
> @@ -1035,11 +1069,11 @@ int clear_mmio_p2m_entry(struct domain *
>          gdprintk(XENLOG_WARNING,
>                   "no mapping between mfn %08lx and gfn %08lx\n",
>                   mfn_x(mfn), gfn);
> -    rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_invalid,
> +    rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), order, p2m_invalid,
>                         p2m->default_access);
>  
>   out:
> -    gfn_unlock(p2m, gfn, 0);
> +    gfn_unlock(p2m, gfn, order);
>  
>      return rc;
>  }
> @@ -2095,6 +2129,25 @@ void *map_domain_gfn(struct p2m_domain *
>      return map_domain_page(*mfn);
>  }
>  
> +static unsigned int mmio_order(const struct domain *d,
> +                               unsigned long start_fn, unsigned long nr)
> +{
> +    if ( !need_iommu(d) || !iommu_use_hap_pt(d) ||
> +         (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
> +        return 0;

Perhaps PAGE_ORDER_4K for consistency?

~Andrew