From: "Alejandro Vallejo" <alejandro.vallejo@cloud.com>
To: "Elias El Yandouzi" <eliasely@amazon.com>,
<xen-devel@lists.xenproject.org>
Cc: <julien@xen.org>, <pdurrant@amazon.com>, <dwmw@amazon.com>,
"Hongyan Xia" <hongyxia@amazon.com>,
"Julien Grall" <jgrall@amazon.com>
Subject: Re: [PATCH V4 01/15] x86: Create per-domain mapping for guest_root_pt
Date: Mon, 16 Dec 2024 16:01:23 +0000 [thread overview]
Message-ID: <D6D91G01K94X.2FF0VJ29P09FA@cloud.com> (raw)
In-Reply-To: <20241111131148.52568-2-eliasely@amazon.com>
On Mon Nov 11, 2024 at 1:11 PM GMT, Elias El Yandouzi wrote:
> From: Hongyan Xia <hongyxia@amazon.com>
>
> This patch introduces a per-domain mapping for the `guest_root_pt` in PV
> guests as part of the effort to remove the direct map in Xen.
>
> For the time being, the `root_pgt` is not mapped or unmapped, as it remains
> a Xenheap page. This will be addressed in subsequent patches.
>
> Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> Signed-off-by: Elias El Yandouzi <eliasely@amazon.com>
>
> ----
> Changes in V4:
> * Fix over-allocation issue
> * Update the mappings when switching from kernel to user-mode
>
> Changes in V3:
> * Rename SHADOW_ROOT
> * Haven't addressed the potentially over-allocation issue as I don't get it
>
> Changes in V2:
> * Rework the shadow perdomain mapping solution in the follow-up patches
>
> Changes since Hongyan's version:
> * Remove the final dot in the commit title
>
> diff --git a/xen/arch/x86/include/asm/config.h b/xen/arch/x86/include/asm/config.h
> index f8a5a4913b07..bd360ec4141e 100644
> --- a/xen/arch/x86/include/asm/config.h
> +++ b/xen/arch/x86/include/asm/config.h
> @@ -174,7 +174,7 @@
> /* Slot 260: per-domain mappings (including map cache). */
> #define PERDOMAIN_VIRT_START (PML4_ADDR(260))
> #define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
> -#define PERDOMAIN_SLOTS 3
> +#define PERDOMAIN_SLOTS 4
> #define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \
> (PERDOMAIN_SLOT_MBYTES << 20))
> /* Slot 4: mirror of per-domain mappings (for compat xlat area accesses). */
> @@ -288,6 +288,14 @@ extern unsigned long xen_phys_start;
> #define ARG_XLAT_START(v) \
> (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
>
> +/* pv_root_pt mapping area. The fourth per-domain-mapping sub-area */
> +#define PV_ROOT_PT_MAPPING_VIRT_START PERDOMAIN_VIRT_SLOT(3)
> +#define PV_ROOT_PT_MAPPING_ENTRIES MAX_VIRT_CPUS
> +
> +/* The address of a particular VCPU's PV_ROOT_PT */
> +#define PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v) \
> + (PV_ROOT_PT_MAPPING_VIRT_START + ((v)->vcpu_id * PAGE_SIZE))
> +
> #define ELFSIZE 64
>
> #define ARCH_CRASH_SAVE_VMCOREINFO
> diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
> index b79d6badd71c..478ce41ad8ca 100644
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -272,6 +272,7 @@ struct time_scale {
> struct pv_domain
> {
> l1_pgentry_t **gdt_ldt_l1tab;
> + l1_pgentry_t **root_pt_l1tab;
>
> atomic_t nr_l4_pages;
>
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index d537a799bced..a152e21bb086 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -516,6 +516,13 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
> v->arch.cr3 |= get_pcid_bits(v, false);
> }
>
> +#define pv_root_pt_idx(v) \
> + ((v)->vcpu_id >> PAGETABLE_ORDER)
> +
> +#define pv_root_pt_pte(v) \
> + ((v)->domain->arch.pv.root_pt_l1tab[pv_root_pt_idx(v)] + \
> + ((v)->vcpu_id & (L1_PAGETABLE_ENTRIES - 1)))
> +
> void write_ptbase(struct vcpu *v)
> {
> const struct domain *d = v->domain;
> @@ -527,11 +534,16 @@ void write_ptbase(struct vcpu *v)
>
> if ( is_pv_domain(d) && d->arch.pv.xpti )
> {
> + mfn_t guest_root_pt = _mfn(MASK_EXTR(v->arch.cr3, X86_CR3_ADDR_MASK));
> + l1_pgentry_t *pte = pv_root_pt_pte(v);
> +
> cpu_info->root_pgt_changed = true;
> cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
> if ( new_cr4 & X86_CR4_PCIDE )
> cpu_info->pv_cr3 |= get_pcid_bits(v, true);
> switch_cr3_cr4(v->arch.cr3, new_cr4);
> +
> + l1e_write(pte, l1e_from_mfn(guest_root_pt, __PAGE_HYPERVISOR_RO));
> }
> else
> {
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index d5a8564c1cbe..1a1c999743ac 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -289,6 +289,21 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v)
> 1U << GDT_LDT_VCPU_SHIFT);
> }
>
> +static int pv_create_root_pt_l1tab(const struct vcpu *v)
> +{
> + return create_perdomain_mapping(v->domain,
> + PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v),
> + 1, v->domain->arch.pv.root_pt_l1tab,
> + NULL);
> +}
> +
> +static void pv_destroy_root_pt_l1tab(const struct vcpu *v)
> +
> +{
> + destroy_perdomain_mapping(v->domain,
> + PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), 1);
> +}
> +
> void pv_vcpu_destroy(struct vcpu *v)
> {
> if ( is_pv_32bit_vcpu(v) )
> @@ -298,6 +313,7 @@ void pv_vcpu_destroy(struct vcpu *v)
> }
>
> pv_destroy_gdt_ldt_l1tab(v);
> + pv_destroy_root_pt_l1tab(v);
> XFREE(v->arch.pv.trap_ctxt);
> }
>
> @@ -312,6 +328,13 @@ int pv_vcpu_initialise(struct vcpu *v)
> if ( rc )
> return rc;
>
> + if ( v->domain->arch.pv.xpti )
> + {
> + rc = pv_create_root_pt_l1tab(v);
> + if ( rc )
> + goto done;
> + }
> +
> BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) >
> PAGE_SIZE);
> v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS);
> @@ -347,10 +370,12 @@ void pv_domain_destroy(struct domain *d)
>
> destroy_perdomain_mapping(d, GDT_LDT_VIRT_START,
> GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
> + destroy_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START, d->max_vcpus);
>
> XFREE(d->arch.pv.cpuidmasks);
>
> FREE_XENHEAP_PAGE(d->arch.pv.gdt_ldt_l1tab);
> + FREE_XENHEAP_PAGE(d->arch.pv.root_pt_l1tab);
With root_pt_l1tab coming from xzalloc_array(), this must use XFREE() instead.
XFREE(v->arch.pv.root_pt_l1tab);
> }
>
> void noreturn cf_check continue_pv_domain(void);
> @@ -382,8 +407,22 @@ int pv_domain_initialise(struct domain *d)
> if ( rc )
> goto fail;
>
> + rc = create_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START,
> + d->max_vcpus, NULL, NULL);
> + if ( rc )
> + goto fail;
> +
> d->arch.ctxt_switch = &pv_csw;
>
> + if ( d->arch.pv.xpti )
> + {
> + d->arch.pv.root_pt_l1tab =
> + xzalloc_array(l1_pgentry_t *,
> + DIV_ROUND_UP(d->max_vcpus, L1_PAGETABLE_ENTRIES));
> + if ( !d->arch.pv.root_pt_l1tab )
> + goto fail;
> + }
> +
> if ( !is_pv_32bit_domain(d) && use_invpcid && cpu_has_pcid )
> switch ( ACCESS_ONCE(opt_pcid) )
> {
> @@ -457,7 +496,8 @@ static void _toggle_guest_pt(struct vcpu *v)
> guest_update = false;
> }
> }
> - write_cr3(cr3);
> +
> + write_ptbase(v);
>
> if ( !pagetable_is_null(old_shadow) )
> shadow_put_top_level(v->domain, old_shadow);
> @@ -497,9 +537,6 @@ void toggle_guest_mode(struct vcpu *v)
> {
> struct cpu_info *cpu_info = get_cpu_info();
>
> - cpu_info->root_pgt_changed = true;
> - cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)) |
> - (d->arch.pv.pcid ? get_pcid_bits(v, true) : 0);
> /*
> * As in _toggle_guest_pt() the XPTI CR3 write needs to be a TLB-
> * flushing one too for shadow mode guests.
> diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
> index 630bdc39451d..c1ae5013af96 100644
> --- a/xen/arch/x86/x86_64/asm-offsets.c
> +++ b/xen/arch/x86/x86_64/asm-offsets.c
> @@ -80,6 +80,7 @@ void __dummy__(void)
>
> #undef OFFSET_EF
>
> + OFFSET(VCPU_id, struct vcpu, vcpu_id);
> OFFSET(VCPU_processor, struct vcpu, processor);
> OFFSET(VCPU_domain, struct vcpu, domain);
> OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
> index c5c723b5f4d4..91413b905768 100644
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -170,9 +170,16 @@ FUNC_LOCAL(restore_all_guest)
> movabs $PADDR_MASK & PAGE_MASK, %rsi
> movabs $DIRECTMAP_VIRT_START, %rcx
> and %rsi, %rdi
> - and %r9, %rsi
> add %rcx, %rdi
> +
> + /*
> + * The address in the vCPU cr3 is always mapped in the per-domain
> + * pv_root_pt virt area.
> + */
> + imul $PAGE_SIZE, VCPU_id(%rbx), %esi
> + movabs $PV_ROOT_PT_MAPPING_VIRT_START, %rcx
> add %rcx, %rsi
> +
> mov $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx
> mov root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8
> mov %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi)
next prev parent reply other threads:[~2024-12-16 16:01 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-11 13:11 [PATCH V4 00/15] Remove the directmap Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 01/15] x86: Create per-domain mapping for guest_root_pt Elias El Yandouzi
2024-12-16 16:01 ` Alejandro Vallejo [this message]
2024-11-11 13:11 ` [PATCH V4 02/15] x86/pv: Use copy_domain_page() to manage domheap pages during initrd relocation Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 03/15] x86/pv: Rewrite how building PV dom0 handles domheap mappings Elias El Yandouzi
2024-12-09 17:42 ` Alejandro Vallejo
2024-11-11 13:11 ` [PATCH V4 04/15] x86: Initialize mapcache for PV, HVM, and idle domains Elias El Yandouzi
2024-11-11 18:46 ` Andrew Cooper
2024-11-11 13:11 ` [PATCH V4 05/15] x86: Add a boot option to enable and disable the direct map Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 06/15] xen/x86: Add support for the PMAP Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 07/15] x86/domain_page: Remove the fast paths when mfn is not in the directmap Elias El Yandouzi
2024-11-18 18:08 ` Alejandro Vallejo
2024-11-19 7:55 ` Jan Beulich
2024-11-11 13:11 ` [PATCH V4 08/15] xen/page_alloc: Add a path for xenheap when there is no direct map Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 09/15] x86/setup: Leave early boot slightly earlier Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 10/15] xen/page_alloc: vmap heap nodes when they are outside the direct map Elias El Yandouzi
2024-12-13 13:46 ` Alejandro Vallejo
2024-12-13 14:59 ` Alejandro Vallejo
2024-11-11 13:11 ` [PATCH V4 11/15] x86/setup: Do not create valid mappings when directmap=no Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 12/15] xen/arm32: mm: Rename 'first' to 'root' in init_secondary_pagetables() Elias El Yandouzi
2024-11-21 10:34 ` Michal Orzel
2024-11-11 13:11 ` [PATCH V4 13/15] xen/arm64: mm: Use per-pCPU page-tables Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 14/15] xen/arm64: Implement a mapcache for arm64 Elias El Yandouzi
2024-11-11 13:11 ` [PATCH V4 15/15] xen/arm64: Allow the admin to enable/disable the directmap Elias El Yandouzi
2024-11-11 19:03 ` [PATCH V4 00/15] Remove " Andrew Cooper
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=D6D91G01K94X.2FF0VJ29P09FA@cloud.com \
--to=alejandro.vallejo@cloud.com \
--cc=dwmw@amazon.com \
--cc=eliasely@amazon.com \
--cc=hongyxia@amazon.com \
--cc=jgrall@amazon.com \
--cc=julien@xen.org \
--cc=pdurrant@amazon.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.