From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
xen-devel <xen-devel@lists.xenproject.org>
Cc: Wei Liu <wei.liu2@citrix.com>,
Stefano Stabellini <stefano.stabellini@eu.citrix.com>,
George Dunlap <George.Dunlap@eu.citrix.com>,
Ian Jackson <Ian.Jackson@eu.citrix.com>,
Ian Campbell <Ian.Campbell@eu.citrix.com>,
Tiejun Chen <tiejun.chen@intel.com>,
Malcolm Crossley <malcolm.crossley@citrix.com>,
Keir Fraser <keir@xen.org>
Subject: Re: [PATCH 3/4 RFC] x86/p2m: use large pages for MMIO mappings
Date: Thu, 17 Sep 2015 17:37:43 +0100 [thread overview]
Message-ID: <55FAEC57.4070207@citrix.com> (raw)
In-Reply-To: <55F7E63702000078000A2BD6@prv-mh.provo.novell.com>
On 15/09/15 08:34, Jan Beulich wrote:
> When mapping large BARs (e.g. the frame buffer of a graphics card) the
> overhead or establishing such mappings using onle 4k pages has,
> particularly after the XSA-125 fix, become unacceptable. Alter the
> XEN_DOMCTL_memory_mapping semantics once again, so that there's no
> longer a fixed amount of guest frames that represents the upper limit
> of what a single invocation can map. Instead bound execution time by
> limiting the number of iterations (regardless of page size).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> RFC reasons:
> - ARM side unimplemented (and hence libxc for now made cope with both
> models), the main issue (besides my inability to test any change
> there) being the many internal uses of map_mmio_regions())
> - error unmapping in map_mmio_regions() and error propagation to caller
> from unmap_mmio_regions() are not satisfactory (for the latter a
> possible model might be to have the function - and hence the domctl -
> return the [non-zero] number of completed entries upon error,
> requiring the caller to re-invoke the hypercall to then obtain the
> actual error for the failed slot)
Doesn't this mean the caller must always make two hypercalls to confirm
success?
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -897,39 +897,47 @@ void p2m_change_type_range(struct domain
>
> /* Returns: 0 for success, -errno for failure */
> static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
> - p2m_type_t gfn_p2mt, p2m_access_t access)
> + unsigned int order, p2m_type_t gfn_p2mt,
> + p2m_access_t access)
> {
> int rc = 0;
> p2m_access_t a;
> p2m_type_t ot;
> mfn_t omfn;
> + unsigned int cur_order = 0;
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
> if ( !paging_mode_translate(d) )
> return -EIO;
>
> - gfn_lock(p2m, gfn, 0);
> - omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
> + gfn_lock(p2m, gfn, order);
> + omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
> + if ( cur_order < order )
> + {
> + gfn_unlock(p2m, gfn, order);
> + return cur_order + 1;
This appears to change the error semantics, therefore warrents an update
to the function comment.
> + }
> if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
> {
> - gfn_unlock(p2m, gfn, 0);
> + gfn_unlock(p2m, gfn, order);
> domain_crash(d);
> return -ENOENT;
> }
> else if ( p2m_is_ram(ot) )
> {
> + unsigned long i;
> +
> ASSERT(mfn_valid(omfn));
> - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
> + for ( i = 0; i < (1UL << order); ++i )
> + set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
> }
>
> P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn));
> - rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt,
> - access);
> - gfn_unlock(p2m, gfn, 0);
> + rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
> + gfn_unlock(p2m, gfn, order);
> if ( rc )
> - gdprintk(XENLOG_ERR,
> - "p2m_set_entry failed! mfn=%08lx rc:%d\n",
> - mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)), rc);
> + gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (%#lx)\n",
PRI_mfn
> @@ -2037,6 +2052,25 @@ unsigned long paging_gva_to_gfn(struct v
> return hostmode->gva_to_gfn(v, hostp2m, va, pfec);- iommu_{,un}map_page() interfaces don't support "order" (hence
> mmio_order() for now returns zero when !iommu_hap_pt_share, which in
> particular means the AMD side isn't being take care of just yet)
>
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -2215,7 +2215,7 @@ int xc_domain_memory_mapping(
> {
> DECLARE_DOMCTL;
> xc_dominfo_t info;
> - int ret = 0, err;
> + int ret = 0, rc;
> unsigned long done = 0, nr, max_batch_sz;
>
> if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 ||
> @@ -2240,19 +2240,24 @@ int xc_domain_memory_mapping(
> domctl.u.memory_mapping.nr_mfns = nr;
> domctl.u.memory_mapping.first_gfn = first_gfn + done;
> domctl.u.memory_mapping.first_mfn = first_mfn + done;
> - err = do_domctl(xch, &domctl);
> - if ( err && errno == E2BIG )
> + rc = do_domctl(xch, &domctl);
> + if ( rc < 0 && errno == E2BIG )
> {
> if ( max_batch_sz <= 1 )
> break;
> max_batch_sz >>= 1;
> continue;
> }
> + if ( rc > 0 )
> + {
> + done += rc;
> + continue;
> + }
> /* Save the first error... */
> if ( !ret )
> - ret = err;
> + ret = rc;
> /* .. and ignore the rest of them when removing. */
> - if ( err && add_mapping != DPCI_REMOVE_MAPPING )
> + if ( rc && add_mapping != DPCI_REMOVE_MAPPING )
> break;
>
> done += nr;
> --- a/xen/arch/x86/domain_build.c
> +++ b/xen/arch/x86/domain_build.c
> @@ -436,7 +436,7 @@ static __init void pvh_add_mem_mapping(s
> else
> a = p2m_access_rwx;
>
> - if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), a)) )
> + if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), 0, a)) )
> panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n",
> gfn, mfn, i, rc);
> if ( !(i & 0xfffff) )
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2396,7 +2396,8 @@ static int vmx_alloc_vlapic_mapping(stru
> share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
> d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
> set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
> - _mfn(virt_to_mfn(apic_va)), p2m_get_hostp2m(d)->default_access);
> + _mfn(virt_to_mfn(apic_va)), 0,
> + p2m_get_hostp2m(d)->default_access);
>
> return 0;
> }
>
> }
>
> +static unsigned int mmio_order(const struct domain *d,
> + unsigned long start_fn, unsigned long nr)
Do you mean "start_gfn" ?
> +{
> + if ( !hap_enabled(d) || !need_iommu(d) || !iommu_hap_pt_share ||
> + (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
> + return 0;
> +
> + if ( !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) &&
> + opt_hap_1gb && hvm_hap_has_1gb(d) )
opt_hap_1gb should be made to be redundant with hvm_hap_has_1gb() to
avoid all the double checks. The only place where it is interesting for
them being different is in hvm_enable().
I will throw together a patch.
> + return PAGE_ORDER_1G;
> +
> + if ( opt_hap_2mb && hvm_hap_has_2mb(d) )
> + return PAGE_ORDER_2M;
> +
> + return 0;
> +}
> +
> +#define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
> +
> int map_mmio_regions(struct domain *d,
> unsigned long start_gfn,
> unsigned long nr,
> @@ -2044,22 +2078,45 @@ int map_mmio_regions(struct domain *d,
> {
> int ret = 0;
> unsigned long i;
> + unsigned int iter, order;
>
> if ( !paging_mode_translate(d) )
> return 0;
>
> - for ( i = 0; !ret && i < nr; i++ )
> + for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
> + i += 1UL << order, ++iter )
> {
> - ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i),
> - p2m_get_hostp2m(d)->default_access);
> - if ( ret )
> + for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ;
> + order = ret - 1 )
It is hard to reason as to whether this loop will terminate. All it
would take is a bug in set_mmio_p2m_entry() which causes it to
unilaterally return 1 and this loop would never terminate.
Is there any other condition which can be used as a safety check?
~Andrew
next prev parent reply other threads:[~2015-09-17 16:37 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <55F70C9A02000078000A2A58@prv-mh.provo.novell.com>
2015-09-15 7:13 ` [PATCH 0/4] x86/p2m: use large pages for MMIO mappings Jan Beulich
2015-09-15 7:30 ` [PATCH 1/4] x86/EPT: always return proper order value from ept_get_entry() Jan Beulich
2015-09-16 7:15 ` Tian, Kevin
2015-09-17 16:13 ` Andrew Cooper
2015-09-15 7:31 ` [PATCH 2/4] x86/NPT: always return proper order value from p2m_pt_get_entry() Jan Beulich
2015-09-15 7:35 ` Jan Beulich
2015-09-15 7:32 ` Jan Beulich
2015-09-17 16:14 ` Andrew Cooper
2015-09-15 7:34 ` [PATCH 3/4 RFC] x86/p2m: use large pages for MMIO mappings Jan Beulich
2015-09-16 10:02 ` Julien Grall
2015-09-17 16:37 ` Andrew Cooper [this message]
2015-09-17 17:59 ` Jan Beulich
2015-09-22 8:32 ` Jan Beulich
2015-09-29 11:33 ` Julien Grall
2015-09-29 11:44 ` Jan Beulich
2015-09-29 12:16 ` Julien Grall
2015-09-29 12:46 ` Jan Beulich
2015-09-29 12:52 ` Julien Grall
2015-09-29 13:00 ` Jan Beulich
2015-09-29 13:06 ` Julien Grall
2015-09-29 13:27 ` Jan Beulich
2015-09-30 10:15 ` Julien Grall
2015-09-15 7:37 ` [PATCH 4/4] x86/PoD: shorten certain operations on higher order ranges Jan Beulich
2015-09-23 17:10 ` George Dunlap
2015-09-23 17:16 ` George Dunlap
2015-09-24 8:42 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55FAEC57.4070207@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=Ian.Campbell@eu.citrix.com \
--cc=Ian.Jackson@eu.citrix.com \
--cc=JBeulich@suse.com \
--cc=keir@xen.org \
--cc=malcolm.crossley@citrix.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=tiejun.chen@intel.com \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.