From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
xen-devel <xen-devel@lists.xenproject.org>
Cc: Wei Liu <wei.liu2@citrix.com>,
Stefano Stabellini <stefano.stabellini@eu.citrix.com>,
George Dunlap <George.Dunlap@eu.citrix.com>,
Ian Jackson <Ian.Jackson@eu.citrix.com>,
Ian Campbell <Ian.Campbell@eu.citrix.com>,
Tiejun Chen <tiejun.chen@intel.com>,
Malcolm Crossley <malcolm.crossley@citrix.com>,
Keir Fraser <keir@xen.org>
Subject: Re: [PATCH 3/4 RFC] x86/p2m: use large pages for MMIO mappings
Date: Thu, 17 Sep 2015 17:37:43 +0100 [thread overview]
Message-ID: <55FAEC57.4070207@citrix.com> (raw)
In-Reply-To: <55F7E63702000078000A2BD6@prv-mh.provo.novell.com>
On 15/09/15 08:34, Jan Beulich wrote:
> When mapping large BARs (e.g. the frame buffer of a graphics card) the
> overhead or establishing such mappings using onle 4k pages has,
> particularly after the XSA-125 fix, become unacceptable. Alter the
> XEN_DOMCTL_memory_mapping semantics once again, so that there's no
> longer a fixed amount of guest frames that represents the upper limit
> of what a single invocation can map. Instead bound execution time by
> limiting the number of iterations (regardless of page size).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> RFC reasons:
> - ARM side unimplemented (and hence libxc for now made cope with both
> models), the main issue (besides my inability to test any change
> there) being the many internal uses of map_mmio_regions())
> - error unmapping in map_mmio_regions() and error propagation to caller
> from unmap_mmio_regions() are not satisfactory (for the latter a
> possible model might be to have the function - and hence the domctl -
> return the [non-zero] number of completed entries upon error,
> requiring the caller to re-invoke the hypercall to then obtain the
> actual error for the failed slot)
Doesn't this mean the caller must always make two hypercalls to confirm
success?
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -897,39 +897,47 @@ void p2m_change_type_range(struct domain
>
> /* Returns: 0 for success, -errno for failure */
> static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
> - p2m_type_t gfn_p2mt, p2m_access_t access)
> + unsigned int order, p2m_type_t gfn_p2mt,
> + p2m_access_t access)
> {
> int rc = 0;
> p2m_access_t a;
> p2m_type_t ot;
> mfn_t omfn;
> + unsigned int cur_order = 0;
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
> if ( !paging_mode_translate(d) )
> return -EIO;
>
> - gfn_lock(p2m, gfn, 0);
> - omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
> + gfn_lock(p2m, gfn, order);
> + omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
> + if ( cur_order < order )
> + {
> + gfn_unlock(p2m, gfn, order);
> + return cur_order + 1;
This appears to change the error semantics, therefore warrents an update
to the function comment.
> + }
> if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
> {
> - gfn_unlock(p2m, gfn, 0);
> + gfn_unlock(p2m, gfn, order);
> domain_crash(d);
> return -ENOENT;
> }
> else if ( p2m_is_ram(ot) )
> {
> + unsigned long i;
> +
> ASSERT(mfn_valid(omfn));
> - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
> + for ( i = 0; i < (1UL << order); ++i )
> + set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
> }
>
> P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn));
> - rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt,
> - access);
> - gfn_unlock(p2m, gfn, 0);
> + rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
> + gfn_unlock(p2m, gfn, order);
> if ( rc )
> - gdprintk(XENLOG_ERR,
> - "p2m_set_entry failed! mfn=%08lx rc:%d\n",
> - mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)), rc);
> + gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (%#lx)\n",
PRI_mfn
> @@ -2037,6 +2052,25 @@ unsigned long paging_gva_to_gfn(struct v
> return hostmode->gva_to_gfn(v, hostp2m, va, pfec);- iommu_{,un}map_page() interfaces don't support "order" (hence
> mmio_order() for now returns zero when !iommu_hap_pt_share, which in
> particular means the AMD side isn't being take care of just yet)
>
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -2215,7 +2215,7 @@ int xc_domain_memory_mapping(
> {
> DECLARE_DOMCTL;
> xc_dominfo_t info;
> - int ret = 0, err;
> + int ret = 0, rc;
> unsigned long done = 0, nr, max_batch_sz;
>
> if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 ||
> @@ -2240,19 +2240,24 @@ int xc_domain_memory_mapping(
> domctl.u.memory_mapping.nr_mfns = nr;
> domctl.u.memory_mapping.first_gfn = first_gfn + done;
> domctl.u.memory_mapping.first_mfn = first_mfn + done;
> - err = do_domctl(xch, &domctl);
> - if ( err && errno == E2BIG )
> + rc = do_domctl(xch, &domctl);
> + if ( rc < 0 && errno == E2BIG )
> {
> if ( max_batch_sz <= 1 )
> break;
> max_batch_sz >>= 1;
> continue;
> }
> + if ( rc > 0 )
> + {
> + done += rc;
> + continue;
> + }
> /* Save the first error... */
> if ( !ret )
> - ret = err;
> + ret = rc;
> /* .. and ignore the rest of them when removing. */
> - if ( err && add_mapping != DPCI_REMOVE_MAPPING )
> + if ( rc && add_mapping != DPCI_REMOVE_MAPPING )
> break;
>
> done += nr;
> --- a/xen/arch/x86/domain_build.c
> +++ b/xen/arch/x86/domain_build.c
> @@ -436,7 +436,7 @@ static __init void pvh_add_mem_mapping(s
> else
> a = p2m_access_rwx;
>
> - if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), a)) )
> + if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), 0, a)) )
> panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n",
> gfn, mfn, i, rc);
> if ( !(i & 0xfffff) )
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2396,7 +2396,8 @@ static int vmx_alloc_vlapic_mapping(stru
> share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
> d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
> set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
> - _mfn(virt_to_mfn(apic_va)), p2m_get_hostp2m(d)->default_access);
> + _mfn(virt_to_mfn(apic_va)), 0,
> + p2m_get_hostp2m(d)->default_access);
>
> return 0;
> }
>
> }
>
> +static unsigned int mmio_order(const struct domain *d,
> + unsigned long start_fn, unsigned long nr)
Do you mean "start_gfn" ?
> +{
> + if ( !hap_enabled(d) || !need_iommu(d) || !iommu_hap_pt_share ||
> + (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
> + return 0;
> +
> + if ( !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) &&
> + opt_hap_1gb && hvm_hap_has_1gb(d) )
opt_hap_1gb should be made to be redundant with hvm_hap_has_1gb() to
avoid all the double checks. The only place where it is interesting for
them being different is in hvm_enable().
I will throw together a patch.
> + return PAGE_ORDER_1G;
> +
> + if ( opt_hap_2mb && hvm_hap_has_2mb(d) )
> + return PAGE_ORDER_2M;
> +
> + return 0;
> +}
> +
> +#define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
> +
> int map_mmio_regions(struct domain *d,
> unsigned long start_gfn,
> unsigned long nr,
> @@ -2044,22 +2078,45 @@ int map_mmio_regions(struct domain *d,
> {
> int ret = 0;
> unsigned long i;
> + unsigned int iter, order;
>
> if ( !paging_mode_translate(d) )
> return 0;
>
> - for ( i = 0; !ret && i < nr; i++ )
> + for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
> + i += 1UL << order, ++iter )
> {
> - ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i),
> - p2m_get_hostp2m(d)->default_access);
> - if ( ret )
> + for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ;
> + order = ret - 1 )
It is hard to reason as to whether this loop will terminate. All it
would take is a bug in set_mmio_p2m_entry() which causes it to
unilaterally return 1 and this loop would never terminate.
Is there any other condition which can be used as a safety check?
~Andrew
next prev parent reply other threads:[~2015-09-17 16:37 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <55F70C9A02000078000A2A58@prv-mh.provo.novell.com>
2015-09-15 7:13 ` [PATCH 0/4] x86/p2m: use large pages for MMIO mappings Jan Beulich
2015-09-15 7:30 ` [PATCH 1/4] x86/EPT: always return proper order value from ept_get_entry() Jan Beulich
2015-09-16 7:15 ` Tian, Kevin
2015-09-17 16:13 ` Andrew Cooper
2015-09-15 7:31 ` [PATCH 2/4] x86/NPT: always return proper order value from p2m_pt_get_entry() Jan Beulich
2015-09-15 7:35 ` Jan Beulich
2015-09-15 7:32 ` Jan Beulich
2015-09-17 16:14 ` Andrew Cooper
2015-09-15 7:34 ` [PATCH 3/4 RFC] x86/p2m: use large pages for MMIO mappings Jan Beulich
2015-09-16 10:02 ` Julien Grall
2015-09-17 16:37 ` Andrew Cooper [this message]
2015-09-17 17:59 ` Jan Beulich
2015-09-22 8:32 ` Jan Beulich
2015-09-29 11:33 ` Julien Grall
2015-09-29 11:44 ` Jan Beulich
2015-09-29 12:16 ` Julien Grall
2015-09-29 12:46 ` Jan Beulich
2015-09-29 12:52 ` Julien Grall
2015-09-29 13:00 ` Jan Beulich
2015-09-29 13:06 ` Julien Grall
2015-09-29 13:27 ` Jan Beulich
2015-09-30 10:15 ` Julien Grall
2015-09-15 7:37 ` [PATCH 4/4] x86/PoD: shorten certain operations on higher order ranges Jan Beulich
2015-09-23 17:10 ` George Dunlap
2015-09-23 17:16 ` George Dunlap
2015-09-24 8:42 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55FAEC57.4070207@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=Ian.Campbell@eu.citrix.com \
--cc=Ian.Jackson@eu.citrix.com \
--cc=JBeulich@suse.com \
--cc=keir@xen.org \
--cc=malcolm.crossley@citrix.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=tiejun.chen@intel.com \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).