From: "Alejandro Vallejo" <alejandro.vallejo@cloud.com>
To: "Roger Pau Monne" <roger.pau@citrix.com>,
<xen-devel@lists.xenproject.org>
Cc: "Jan Beulich" <jbeulich@suse.com>,
"Andrew Cooper" <andrew.cooper3@citrix.com>
Subject: Re: [PATCH v2 15/18] x86/mm: introduce a per-vCPU mapcache when using ASI
Date: Thu, 09 Jan 2025 15:08:15 +0000 [thread overview]
Message-ID: <D6XMXUBHE5UI.16YI6AVTYXNUM@cloud.com> (raw)
In-Reply-To: <20250108142659.99490-16-roger.pau@citrix.com>
On Wed Jan 8, 2025 at 2:26 PM GMT, Roger Pau Monne wrote:
> When using a unique per-vCPU root page table the per-domain region becomes
> per-vCPU, and hence the mapcache is no longer shared between all vCPUs of a
> domain. Introduce per-vCPU mapcache structures, and modify map_domain_page()
> to create per-vCPU mappings when possible. Note the lock is also not needed
> with using per-vCPU map caches, as the structure is no longer shared.
>
> This introduces some duplication in the domain and vcpu structures, as both
> contain a mapcache field to support running with and without per-vCPU
> page-tables.
>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
> xen/arch/x86/domain_page.c | 90 ++++++++++++++++++++-----------
> xen/arch/x86/include/asm/domain.h | 20 ++++---
> 2 files changed, 71 insertions(+), 39 deletions(-)
>
> diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
> index 1372be20224e..65900d6218f8 100644
> --- a/xen/arch/x86/domain_page.c
> +++ b/xen/arch/x86/domain_page.c
> @@ -74,7 +74,9 @@ void *map_domain_page(mfn_t mfn)
> struct vcpu *v;
> struct mapcache_domain *dcache;
> struct mapcache_vcpu *vcache;
> + struct mapcache *cache;
> struct vcpu_maphash_entry *hashent;
> + struct domain *d;
>
> #ifdef NDEBUG
> if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
> @@ -85,9 +87,12 @@ void *map_domain_page(mfn_t mfn)
> if ( !v || !is_pv_vcpu(v) )
> return mfn_to_virt(mfn_x(mfn));
>
> - dcache = &v->domain->arch.pv.mapcache;
> + d = v->domain;
> + dcache = &d->arch.pv.mapcache;
> vcache = &v->arch.pv.mapcache;
> - if ( !dcache->inuse )
> + cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> + : &d->arch.pv.mapcache.cache;
> + if ( !cache->inuse )
> return mfn_to_virt(mfn_x(mfn));
>
> perfc_incr(map_domain_page_count);
> @@ -98,17 +103,18 @@ void *map_domain_page(mfn_t mfn)
> if ( hashent->mfn == mfn_x(mfn) )
> {
> idx = hashent->idx;
> - ASSERT(idx < dcache->entries);
> + ASSERT(idx < cache->entries);
> hashent->refcnt++;
> ASSERT(hashent->refcnt);
> ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn));
> goto out;
> }
>
> - spin_lock(&dcache->lock);
> + if ( !d->arch.vcpu_pt )
> + spin_lock(&dcache->lock);
Hmmm. I wonder whether we might not want a nospec here...
>
> /* Has some other CPU caused a wrap? We must flush if so. */
> - if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
> + if ( unlikely(!d->arch.vcpu_pt && dcache->epoch != vcache->shadow_epoch) )
> {
> vcache->shadow_epoch = dcache->epoch;
> if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
> @@ -118,21 +124,21 @@ void *map_domain_page(mfn_t mfn)
> }
> }
>
> - idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
> - if ( unlikely(idx >= dcache->entries) )
> + idx = find_next_zero_bit(cache->inuse, cache->entries, cache->cursor);
> + if ( unlikely(idx >= cache->entries) )
> {
> unsigned long accum = 0, prev = 0;
>
> /* /First/, clean the garbage map and update the inuse list. */
> - for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
> + for ( i = 0; i < BITS_TO_LONGS(cache->entries); i++ )
> {
> accum |= prev;
> - dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
> - prev = ~dcache->inuse[i];
> + cache->inuse[i] &= ~xchg(&cache->garbage[i], 0);
> + prev = ~cache->inuse[i];
> }
>
> - if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
> - idx = find_first_zero_bit(dcache->inuse, dcache->entries);
> + if ( accum | (prev & BITMAP_LAST_WORD_MASK(cache->entries)) )
> + idx = find_first_zero_bit(cache->inuse, cache->entries);
> else
> {
> /* Replace a hash entry instead. */
> @@ -152,19 +158,23 @@ void *map_domain_page(mfn_t mfn)
> i = 0;
> } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) );
> }
> - BUG_ON(idx >= dcache->entries);
> + BUG_ON(idx >= cache->entries);
>
> /* /Second/, flush TLBs. */
> perfc_incr(domain_page_tlb_flush);
> flush_tlb_local();
> - vcache->shadow_epoch = ++dcache->epoch;
> - dcache->tlbflush_timestamp = tlbflush_current_time();
> + if ( !d->arch.vcpu_pt )
> + {
> + vcache->shadow_epoch = ++dcache->epoch;
> + dcache->tlbflush_timestamp = tlbflush_current_time();
> + }
> }
>
> - set_bit(idx, dcache->inuse);
> - dcache->cursor = idx + 1;
> + set_bit(idx, cache->inuse);
> + cache->cursor = idx + 1;
>
> - spin_unlock(&dcache->lock);
> + if ( !d->arch.vcpu_pt )
> + spin_unlock(&dcache->lock);
... and here.
>
> l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
>
> @@ -178,6 +188,7 @@ void unmap_domain_page(const void *ptr)
> unsigned int idx;
> struct vcpu *v;
> struct mapcache_domain *dcache;
> + struct mapcache *cache;
> unsigned long va = (unsigned long)ptr, mfn, flags;
> struct vcpu_maphash_entry *hashent;
>
> @@ -190,7 +201,9 @@ void unmap_domain_page(const void *ptr)
> ASSERT(v && is_pv_vcpu(v));
>
> dcache = &v->domain->arch.pv.mapcache;
> - ASSERT(dcache->inuse);
> + cache = v->domain->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> + : &v->domain->arch.pv.mapcache.cache;
> + ASSERT(cache->inuse);
>
> idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
> mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
> @@ -213,7 +226,7 @@ void unmap_domain_page(const void *ptr)
> hashent->mfn);
> l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
> /* /Second/, mark as garbage. */
> - set_bit(hashent->idx, dcache->garbage);
> + set_bit(hashent->idx, cache->garbage);
> }
>
> /* Add newly-freed mapping to the maphash. */
> @@ -225,7 +238,7 @@ void unmap_domain_page(const void *ptr)
> /* /First/, zap the PTE. */
> l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
> /* /Second/, mark as garbage. */
> - set_bit(idx, dcache->garbage);
> + set_bit(idx, cache->garbage);
> }
>
> local_irq_restore(flags);
> @@ -234,7 +247,6 @@ void unmap_domain_page(const void *ptr)
> void mapcache_domain_init(struct domain *d)
> {
> struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> - unsigned int bitmap_pages;
>
> ASSERT(is_pv_domain(d));
>
> @@ -243,13 +255,12 @@ void mapcache_domain_init(struct domain *d)
> return;
> #endif
>
> + if ( d->arch.vcpu_pt )
> + return;
> +
> BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
> 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) >
> MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
> - bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
> - dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
> - dcache->garbage = dcache->inuse +
> - (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
>
> spin_lock_init(&dcache->lock);
> }
> @@ -258,30 +269,45 @@ int mapcache_vcpu_init(struct vcpu *v)
> {
> struct domain *d = v->domain;
> struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> + struct mapcache *cache;
> unsigned long i;
> - unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
> + unsigned int ents = (d->arch.vcpu_pt ? 1 : d->max_vcpus) *
> + MAPCACHE_VCPU_ENTRIES;
> unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
>
> - if ( !is_pv_vcpu(v) || !dcache->inuse )
> + if ( !is_pv_vcpu(v) )
> return 0;
>
> - if ( ents > dcache->entries )
> + cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> + : &dcache->cache;
> +
> + if ( !cache->inuse )
> + return 0;
> +
> + if ( ents > cache->entries )
> {
> /* Populate page tables. */
> int rc = create_perdomain_mapping(v, MAPCACHE_VIRT_START, ents, false);
> + const unsigned int bitmap_pages =
> + PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
> +
> + cache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
> + cache->garbage = cache->inuse +
> + (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
> +
>
> /* Populate bit maps. */
> if ( !rc )
> - rc = create_perdomain_mapping(v, (unsigned long)dcache->inuse,
> + rc = create_perdomain_mapping(v, (unsigned long)cache->inuse,
> nr, true);
> if ( !rc )
> - rc = create_perdomain_mapping(v, (unsigned long)dcache->garbage,
> + rc = create_perdomain_mapping(v, (unsigned long)cache->garbage,
> nr, true);
>
> if ( rc )
> return rc;
>
> - dcache->entries = ents;
> + cache->entries = ents;
> }
>
> /* Mark all maphash entries as not in use. */
Cheers,
Alejandro
next prev parent reply other threads:[~2025-01-09 15:08 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-08 14:26 [PATCH v2 00/18] x86: adventures in Address Space Isolation Roger Pau Monne
2025-01-08 14:26 ` [PATCH v2 01/18] x86/mm: purge unneeded destroy_perdomain_mapping() Roger Pau Monne
2025-01-08 15:59 ` Alejandro Vallejo
2025-01-08 14:26 ` [PATCH v2 02/18] x86/domain: limit window where curr_vcpu != current on context switch Roger Pau Monne
2025-01-08 16:26 ` Alejandro Vallejo
2025-01-09 17:39 ` Roger Pau Monné
2025-01-09 8:59 ` Jan Beulich
2025-01-09 17:33 ` Roger Pau Monné
2025-01-14 15:02 ` Jan Beulich
2025-01-17 14:57 ` Roger Pau Monné
2025-01-08 14:26 ` [PATCH v2 03/18] x86/mm: introduce helper to detect per-domain L1 entries that need freeing Roger Pau Monne
2025-01-09 9:03 ` Jan Beulich
2025-01-08 14:26 ` [PATCH v2 04/18] x86/pv: introduce function to populate perdomain area and use it to map Xen GDT Roger Pau Monne
2025-01-09 9:10 ` Jan Beulich
2025-01-10 14:15 ` Roger Pau Monné
2025-01-09 9:55 ` Alejandro Vallejo
2025-01-10 14:29 ` Roger Pau Monné
2025-01-10 15:50 ` Alejandro Vallejo
2025-01-08 14:26 ` [PATCH v2 05/18] x86/mm: switch destroy_perdomain_mapping() parameter from domain to vCPU Roger Pau Monne
2025-01-09 10:02 ` Alejandro Vallejo
2025-01-10 14:30 ` Roger Pau Monné
2025-01-08 14:26 ` [PATCH v2 06/18] x86/pv: set/clear guest GDT mappings using {populate,destroy}_perdomain_mapping() Roger Pau Monne
2025-01-08 15:11 ` [PATCH v2.1 " Roger Pau Monne
2025-01-09 10:25 ` Alejandro Vallejo
2025-01-10 14:33 ` Roger Pau Monné
2025-01-14 15:30 ` Jan Beulich
2025-01-08 14:26 ` [PATCH v2 07/18] x86/pv: update guest LDT mappings using the linear entries Roger Pau Monne
2025-01-09 14:34 ` Alejandro Vallejo
2025-01-10 14:44 ` Roger Pau Monné
2025-01-10 15:36 ` Alejandro Vallejo
2025-01-14 15:42 ` Jan Beulich
2025-01-08 14:26 ` [PATCH v2 08/18] x86/pv: remove stashing of GDT/LDT L1 page-tables Roger Pau Monne
2025-01-08 14:26 ` [PATCH v2 09/18] x86/mm: simplify create_perdomain_mapping() interface Roger Pau Monne
2025-01-09 11:01 ` Alejandro Vallejo
2025-01-10 14:45 ` Roger Pau Monné
2025-01-08 14:26 ` [PATCH v2 10/18] x86/mm: switch {create,destroy}_perdomain_mapping() domain parameter to vCPU Roger Pau Monne
2025-01-14 16:27 ` Jan Beulich
2025-01-08 14:26 ` [PATCH v2 11/18] x86/pv: untie issuing FLUSH_ROOT_PGTBL from XPTI Roger Pau Monne
2025-01-08 14:26 ` [PATCH v2 12/18] x86/mm: move FLUSH_ROOT_PGTBL handling before TLB flush Roger Pau Monne
2025-01-08 14:26 ` [PATCH v2 13/18] x86/spec-ctrl: introduce Address Space Isolation command line option Roger Pau Monne
2025-01-09 14:58 ` Alejandro Vallejo
2025-01-10 14:55 ` Roger Pau Monné
2025-01-10 15:51 ` Alejandro Vallejo
2025-01-08 14:26 ` [PATCH v2 14/18] x86/mm: introduce per-vCPU L3 page-table Roger Pau Monne
2025-01-08 14:26 ` [PATCH v2 15/18] x86/mm: introduce a per-vCPU mapcache when using ASI Roger Pau Monne
2025-01-09 15:08 ` Alejandro Vallejo [this message]
2025-01-10 15:02 ` Roger Pau Monné
2025-01-10 16:12 ` Alejandro Vallejo
2025-01-10 16:19 ` Alejandro Vallejo
2025-01-10 18:43 ` Roger Pau Monné
2025-01-08 14:26 ` [PATCH v2 16/18] x86/pv: allow using a unique per-pCPU root page table (L4) Roger Pau Monne
2025-01-08 14:26 ` [PATCH v2 17/18] x86/mm: switch to a per-CPU mapped stack when using ASI Roger Pau Monne
2025-01-08 14:26 ` [PATCH v2 18/18] x86/mm: zero stack on context switch Roger Pau Monne
2025-01-14 16:20 ` [PATCH v2 00/18] x86: adventures in Address Space Isolation Jan Beulich
2025-01-17 14:45 ` Roger Pau Monné
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=D6XMXUBHE5UI.16YI6AVTYXNUM@cloud.com \
--to=alejandro.vallejo@cloud.com \
--cc=andrew.cooper3@citrix.com \
--cc=jbeulich@suse.com \
--cc=roger.pau@citrix.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.