* [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
@ 2023-08-11 11:20 ` Vincent Donnefort
2023-08-11 21:55 ` Kalesh Singh
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Vincent Donnefort @ 2023-08-11 11:20 UTC (permalink / raw)
To: maz, oliver.upton
Cc: kvmarm, qperret, smostafa, kaleshsingh, linux-arm-kernel,
kernel-team, will, Vincent Donnefort
commit f922c13e778d ("KVM: arm64: Introduce
pkvm_alloc_private_va_range()") and commit 92abe0f81e13 ("KVM: arm64:
Introduce hyp_alloc_private_va_range()") added an alignment for the
start address of any allocation into the nVHE hypervisor private VA
range.
This alignment (order of the size of the allocation) intends to enable
efficient stack verification (if the PAGE_SHIFT bit is zero, the stack
pointer is on the guard page and a stack overflow occurred).
But this is only necessary for stack allocation and can waste a lot of
VA space. So instead make stack-specific functions, handling the guard
page requirements, while other users (e.g. fixmap) will only get page
alignment.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
---
v1 -> v2:
* hyp_create_stack() -> pkvm_create_stack() (Kalesh)
* Add !protected version as well (Kalesh)
* phys_addr_t as type for PAs
---
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0e1e1ab17b4d..96a80e8f6226 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **haddr);
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void **haddr);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
void __init free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 72dc53a75d1c..9acce45df2b5 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2283,30 +2283,8 @@ static int __init init_hyp_mode(void)
for_each_possible_cpu(cpu) {
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
- unsigned long hyp_addr;
- /*
- * Allocate a contiguous HYP private VA range for the stack
- * and guard page. The allocation is also aligned based on
- * the order of its size.
- */
- err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
- if (err) {
- kvm_err("Cannot allocate hyp stack guard page\n");
- goto out_err;
- }
-
- /*
- * Since the stack grows downwards, map the stack to the page
- * at the higher address and leave the lower guard page
- * unbacked.
- *
- * Any valid stack address now has the PAGE_SHIFT bit as 1
- * and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
- */
- err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
- __pa(stack_page), PAGE_HYP);
+ err = create_hyp_stack(__pa(stack_page), ¶ms->stack_hyp_va);
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_err;
@@ -2319,8 +2297,6 @@ static int __init init_hyp_mode(void)
* has been mapped in the flexible private VA space.
*/
params->stack_pa = __pa(stack_page);
-
- params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
for_each_possible_cpu(cpu) {
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index d5ec972b5c1e..230e4f2527de 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot,
unsigned long *haddr);
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 318298eb3d6b..65a7a186d7b2 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
+static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
+{
+ unsigned long cur;
+
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+
+ if (!start || start < __io_map_base)
+ return -EINVAL;
+
+ /* The allocated size is always a multiple of PAGE_SIZE */
+ cur = start + PAGE_ALIGN(size);
+
+ /* Are we overflowing on the vmemmap ? */
+ if (cur > __hyp_vmemmap)
+ return -ENOMEM;
+
+ __io_map_base = cur;
+
+ return 0;
+}
+
/**
* pkvm_alloc_private_va_range - Allocates a private VA range.
* @size: The size of the VA range to reserve.
@@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
*/
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
- unsigned long base, addr;
- int ret = 0;
+ unsigned long addr;
+ int ret;
hyp_spin_lock(&pkvm_pgd_lock);
-
- /* Align the allocation based on the order of its size */
- addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
-
- /* The allocated size is always a multiple of PAGE_SIZE */
- base = addr + PAGE_ALIGN(size);
-
- /* Are we overflowing on the vmemmap ? */
- if (!addr || base > __hyp_vmemmap)
- ret = -ENOMEM;
- else {
- __io_map_base = base;
- *haddr = addr;
- }
-
+ addr = __io_map_base;
+ ret = __pkvm_alloc_private_va_range(addr, size);
hyp_spin_unlock(&pkvm_pgd_lock);
+ *haddr = addr;
+
return ret;
}
@@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
}
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
+{
+ unsigned long addr, prev_base;
+ size_t size;
+ int ret;
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+
+ prev_base = __io_map_base;
+ /*
+ * Efficient stack verification using the PAGE_SHIFT bit implies
+ * an alignment of our allocation on the order of the size.
+ */
+ size = PAGE_SIZE * 2;
+ addr = ALIGN(__io_map_base, size);
+
+ ret = __pkvm_alloc_private_va_range(addr, size);
+ if (!ret) {
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
+ PAGE_SIZE, phys, PAGE_HYP);
+ if (ret)
+ __io_map_base = prev_base;
+ }
+ hyp_spin_unlock(&pkvm_pgd_lock);
+
+ *haddr = addr + size;
+
+ return ret;
+}
+
static void *admit_host_page(void *arg)
{
struct kvm_hyp_memcache *host_mc = arg;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bb98630dfeaf..0d5e0a89ddce 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
for (i = 0; i < hyp_nr_cpus; i++) {
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
- unsigned long hyp_addr;
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
@@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
- /*
- * Allocate a contiguous HYP private VA range for the stack
- * and guard page. The allocation is also aligned based on
- * the order of its size.
- */
- ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ ret = pkvm_create_stack(params->stack_pa, ¶ms->stack_hyp_va);
if (ret)
return ret;
-
- /*
- * Since the stack grows downwards, map the stack to the page
- * at the higher address and leave the lower guard page
- * unbacked.
- *
- * Any valid stack address now has the PAGE_SHIFT bit as 1
- * and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
- */
- hyp_spin_lock(&pkvm_pgd_lock);
- ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
- PAGE_SIZE, params->stack_pa, PAGE_HYP);
- hyp_spin_unlock(&pkvm_pgd_lock);
- if (ret)
- return ret;
-
- /* Update stack_hyp_va to end of the stack's private VA range */
- params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d3b4feed460c..32c2f1665c6e 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -592,6 +592,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return 0;
}
+static int __hyp_alloc_private_va_range(unsigned long base)
+{
+ lockdep_assert_held(&kvm_hyp_pgd_mutex);
+
+ if (!PAGE_ALIGNED(base))
+ return -EINVAL;
+
+ /*
+ * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+ * allocating the new area, as it would indicate we've
+ * overflowed the idmap/IO address range.
+ */
+ if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+ return -ENOMEM;
+
+ io_map_base = base;
+
+ return 0;
+}
/**
* hyp_alloc_private_va_range - Allocates a private VA range.
@@ -612,26 +631,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
/*
* This assumes that we have enough space below the idmap
- * page to allocate our VAs. If not, the check below will
- * kick. A potential alternative would be to detect that
- * overflow and switch to an allocation above the idmap.
+ * page to allocate our VAs. If not, the check in
+ * __hyp_alloc_private_va_range() will kick. A potential
+ * alternative would be to detect that overflow and switch
+ * to an allocation above the idmap.
*
* The allocated size is always a multiple of PAGE_SIZE.
*/
- base = io_map_base - PAGE_ALIGN(size);
-
- /* Align the allocation based on the order of its size */
- base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
-
- /*
- * Verify that BIT(VA_BITS - 1) hasn't been flipped by
- * allocating the new area, as it would indicate we've
- * overflowed the idmap/IO address range.
- */
- if ((base ^ io_map_base) & BIT(VA_BITS - 1))
- ret = -ENOMEM;
- else
- *haddr = io_map_base = base;
+ size = PAGE_ALIGN(size);
+ base = io_map_base - size;
+ ret = __hyp_alloc_private_va_range(base);
mutex_unlock(&kvm_hyp_pgd_mutex);
@@ -668,6 +677,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
return ret;
}
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
+{
+ unsigned long base;
+ size_t size;
+ int ret;
+
+ mutex_lock(&kvm_hyp_pgd_mutex);
+ /*
+ * Efficient stack verification using the PAGE_SHIFT bit implies
+ * an alignment of our allocation on the order of the size.
+ */
+ size = PAGE_SIZE * 2;
+ base = ALIGN_DOWN(io_map_base - size, size);
+
+ ret = __hyp_alloc_private_va_range(base);
+
+ mutex_unlock(&kvm_hyp_pgd_mutex);
+
+ if (ret) {
+ kvm_err("Cannot allocate hyp stack guard page\n");
+ return ret;
+ }
+
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
+ PAGE_HYP);
+ if (ret)
+ kvm_err("Cannot map hyp stack\n");
+
+ *haddr = base + size;
+
+ return ret;
+}
+
/**
* create_hyp_io_mappings - Map IO into both kernel and HYP
* @phys_addr: The physical start address which gets mapped
base-commit: 52a93d39b17dc7eb98b6aa3edb93943248e03b2f
--
2.41.0.640.ga95def55d0-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
2023-08-11 11:20 ` [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range Vincent Donnefort
@ 2023-08-11 21:55 ` Kalesh Singh
2023-08-14 7:40 ` Vincent Donnefort
2023-08-26 11:00 ` Marc Zyngier
2023-08-26 13:02 ` Marc Zyngier
2023-08-28 12:16 ` Marek Szyprowski
2 siblings, 2 replies; 8+ messages in thread
From: Kalesh Singh @ 2023-08-11 21:55 UTC (permalink / raw)
To: Vincent Donnefort
Cc: maz, oliver.upton, kvmarm, qperret, smostafa, linux-arm-kernel,
kernel-team, will
On Fri, Aug 11, 2023 at 4:20 AM Vincent Donnefort <vdonnefort@google.com> wrote:
>
> commit f922c13e778d ("KVM: arm64: Introduce
> pkvm_alloc_private_va_range()") and commit 92abe0f81e13 ("KVM: arm64:
> Introduce hyp_alloc_private_va_range()") added an alignment for the
> start address of any allocation into the nVHE hypervisor private VA
> range.
>
> This alignment (order of the size of the allocation) intends to enable
> efficient stack verification (if the PAGE_SHIFT bit is zero, the stack
> pointer is on the guard page and a stack overflow occurred).
>
> But this is only necessary for stack allocation and can waste a lot of
> VA space. So instead make stack-specific functions, handling the guard
> page requirements, while other users (e.g. fixmap) will only get page
> alignment.
>
> Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
>
> ---
>
> v1 -> v2:
> * hyp_create_stack() -> pkvm_create_stack() (Kalesh)
> * Add !protected version as well (Kalesh)
> * phys_addr_t as type for PAs
>
> ---
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index 0e1e1ab17b4d..96a80e8f6226 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
> void __iomem **haddr);
> int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
> void **haddr);
> +int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
> void __init free_hyp_pgds(void);
>
> void stage2_unmap_vm(struct kvm *kvm);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 72dc53a75d1c..9acce45df2b5 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -2283,30 +2283,8 @@ static int __init init_hyp_mode(void)
> for_each_possible_cpu(cpu) {
> struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
> char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
> - unsigned long hyp_addr;
>
> - /*
> - * Allocate a contiguous HYP private VA range for the stack
> - * and guard page. The allocation is also aligned based on
> - * the order of its size.
> - */
> - err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
> - if (err) {
> - kvm_err("Cannot allocate hyp stack guard page\n");
> - goto out_err;
> - }
> -
> - /*
> - * Since the stack grows downwards, map the stack to the page
> - * at the higher address and leave the lower guard page
> - * unbacked.
> - *
> - * Any valid stack address now has the PAGE_SHIFT bit as 1
> - * and addresses corresponding to the guard page have the
> - * PAGE_SHIFT bit as 0 - this is used for overflow detection.
> - */
> - err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
> - __pa(stack_page), PAGE_HYP);
> + err = create_hyp_stack(__pa(stack_page), ¶ms->stack_hyp_va);
> if (err) {
> kvm_err("Cannot map hyp stack\n");
> goto out_err;
> @@ -2319,8 +2297,6 @@ static int __init init_hyp_mode(void)
> * has been mapped in the flexible private VA space.
> */
> params->stack_pa = __pa(stack_page);
> -
> - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
> }
>
> for_each_possible_cpu(cpu) {
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
> index d5ec972b5c1e..230e4f2527de 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
> @@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
> int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
> enum kvm_pgtable_prot prot,
> unsigned long *haddr);
> +int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
> int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
>
> #endif /* __KVM_HYP_MM_H */
> diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
> index 318298eb3d6b..65a7a186d7b2 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mm.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mm.c
> @@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
> return err;
> }
>
> +static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
> +{
> + unsigned long cur;
> +
> + hyp_assert_lock_held(&pkvm_pgd_lock);
> +
> + if (!start || start < __io_map_base)
> + return -EINVAL;
> +
> + /* The allocated size is always a multiple of PAGE_SIZE */
> + cur = start + PAGE_ALIGN(size);
> +
> + /* Are we overflowing on the vmemmap ? */
> + if (cur > __hyp_vmemmap)
> + return -ENOMEM;
> +
> + __io_map_base = cur;
> +
> + return 0;
> +}
> +
> /**
> * pkvm_alloc_private_va_range - Allocates a private VA range.
> * @size: The size of the VA range to reserve.
> @@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
> */
> int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
> {
> - unsigned long base, addr;
> - int ret = 0;
> + unsigned long addr;
> + int ret;
>
> hyp_spin_lock(&pkvm_pgd_lock);
> -
> - /* Align the allocation based on the order of its size */
> - addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
> -
> - /* The allocated size is always a multiple of PAGE_SIZE */
> - base = addr + PAGE_ALIGN(size);
> -
> - /* Are we overflowing on the vmemmap ? */
> - if (!addr || base > __hyp_vmemmap)
> - ret = -ENOMEM;
> - else {
> - __io_map_base = base;
> - *haddr = addr;
> - }
> -
> + addr = __io_map_base;
> + ret = __pkvm_alloc_private_va_range(addr, size);
> hyp_spin_unlock(&pkvm_pgd_lock);
>
> + *haddr = addr;
> +
> return ret;
> }
>
> @@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
> return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
> }
>
> +int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
> +{
> + unsigned long addr, prev_base;
> + size_t size;
> + int ret;
> +
> + hyp_spin_lock(&pkvm_pgd_lock);
> +
> + prev_base = __io_map_base;
> + /*
> + * Efficient stack verification using the PAGE_SHIFT bit implies
> + * an alignment of our allocation on the order of the size.
> + */
> + size = PAGE_SIZE * 2;
> + addr = ALIGN(__io_map_base, size);
> +
> + ret = __pkvm_alloc_private_va_range(addr, size);
> + if (!ret) {
> + /*
> + * Since the stack grows downwards, map the stack to the page
> + * at the higher address and leave the lower guard page
> + * unbacked.
> + *
> + * Any valid stack address now has the PAGE_SHIFT bit as 1
> + * and addresses corresponding to the guard page have the
> + * PAGE_SHIFT bit as 0 - this is used for overflow detection.
> + */
> + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
> + PAGE_SIZE, phys, PAGE_HYP);
> + if (ret)
> + __io_map_base = prev_base;
> + }
> + hyp_spin_unlock(&pkvm_pgd_lock);
> +
> + *haddr = addr + size;
> +
> + return ret;
> +}
> +
> static void *admit_host_page(void *arg)
> {
> struct kvm_hyp_memcache *host_mc = arg;
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index bb98630dfeaf..0d5e0a89ddce 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
>
> for (i = 0; i < hyp_nr_cpus; i++) {
> struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
> - unsigned long hyp_addr;
>
> start = (void *)kern_hyp_va(per_cpu_base[i]);
> end = start + PAGE_ALIGN(hyp_percpu_size);
> @@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
> if (ret)
> return ret;
>
> - /*
> - * Allocate a contiguous HYP private VA range for the stack
> - * and guard page. The allocation is also aligned based on
> - * the order of its size.
> - */
> - ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
> + ret = pkvm_create_stack(params->stack_pa, ¶ms->stack_hyp_va);
> if (ret)
> return ret;
> -
> - /*
> - * Since the stack grows downwards, map the stack to the page
> - * at the higher address and leave the lower guard page
> - * unbacked.
> - *
> - * Any valid stack address now has the PAGE_SHIFT bit as 1
> - * and addresses corresponding to the guard page have the
> - * PAGE_SHIFT bit as 0 - this is used for overflow detection.
> - */
> - hyp_spin_lock(&pkvm_pgd_lock);
> - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
> - PAGE_SIZE, params->stack_pa, PAGE_HYP);
> - hyp_spin_unlock(&pkvm_pgd_lock);
> - if (ret)
> - return ret;
> -
> - /* Update stack_hyp_va to end of the stack's private VA range */
> - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
> }
>
> /*
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index d3b4feed460c..32c2f1665c6e 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -592,6 +592,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
> return 0;
> }
>
> +static int __hyp_alloc_private_va_range(unsigned long base)
> +{
> + lockdep_assert_held(&kvm_hyp_pgd_mutex);
> +
> + if (!PAGE_ALIGNED(base))
> + return -EINVAL;
> +
> + /*
> + * Verify that BIT(VA_BITS - 1) hasn't been flipped by
> + * allocating the new area, as it would indicate we've
> + * overflowed the idmap/IO address range.
> + */
> + if ((base ^ io_map_base) & BIT(VA_BITS - 1))
> + return -ENOMEM;
> +
> + io_map_base = base;
> +
> + return 0;
> +}
>
> /**
> * hyp_alloc_private_va_range - Allocates a private VA range.
> @@ -612,26 +631,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
>
> /*
> * This assumes that we have enough space below the idmap
> - * page to allocate our VAs. If not, the check below will
> - * kick. A potential alternative would be to detect that
> - * overflow and switch to an allocation above the idmap.
> + * page to allocate our VAs. If not, the check in
> + * __hyp_alloc_private_va_range() will kick. A potential
> + * alternative would be to detect that overflow and switch
> + * to an allocation above the idmap.
> *
> * The allocated size is always a multiple of PAGE_SIZE.
> */
> - base = io_map_base - PAGE_ALIGN(size);
> -
> - /* Align the allocation based on the order of its size */
> - base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
> -
> - /*
> - * Verify that BIT(VA_BITS - 1) hasn't been flipped by
> - * allocating the new area, as it would indicate we've
> - * overflowed the idmap/IO address range.
> - */
> - if ((base ^ io_map_base) & BIT(VA_BITS - 1))
> - ret = -ENOMEM;
> - else
> - *haddr = io_map_base = base;
> + size = PAGE_ALIGN(size);
> + base = io_map_base - size;
> + ret = __hyp_alloc_private_va_range(base);
>
> mutex_unlock(&kvm_hyp_pgd_mutex);
>
> @@ -668,6 +677,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
> return ret;
> }
>
> +int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
> +{
> + unsigned long base;
> + size_t size;
> + int ret;
> +
> + mutex_lock(&kvm_hyp_pgd_mutex);
> + /*
> + * Efficient stack verification using the PAGE_SHIFT bit implies
> + * an alignment of our allocation on the order of the size.
> + */
> + size = PAGE_SIZE * 2;
> + base = ALIGN_DOWN(io_map_base - size, size);
> +
> + ret = __hyp_alloc_private_va_range(base);
> +
> + mutex_unlock(&kvm_hyp_pgd_mutex);
> +
> + if (ret) {
> + kvm_err("Cannot allocate hyp stack guard page\n");
> + return ret;
> + }
> +
> + /*
> + * Since the stack grows downwards, map the stack to the page
> + * at the higher address and leave the lower guard page
> + * unbacked.
> + *
> + * Any valid stack address now has the PAGE_SHIFT bit as 1
> + * and addresses corresponding to the guard page have the
> + * PAGE_SHIFT bit as 0 - this is used for overflow detection.
> + */
> + ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
> + PAGE_HYP);
> + if (ret)
> + kvm_err("Cannot map hyp stack\n");
Should we reset the io_map_base if the mapping failed here as well?
Otherwise lgtm, Reviewed-by: Kalesh Singh <kaleshsingh@google.com>
Thanks,
Kalesh
> +
> + *haddr = base + size;
> +
> + return ret;
> +}
> +
> /**
> * create_hyp_io_mappings - Map IO into both kernel and HYP
> * @phys_addr: The physical start address which gets mapped
>
> base-commit: 52a93d39b17dc7eb98b6aa3edb93943248e03b2f
> --
> 2.41.0.640.ga95def55d0-goog
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
2023-08-11 21:55 ` Kalesh Singh
@ 2023-08-14 7:40 ` Vincent Donnefort
2023-08-26 11:00 ` Marc Zyngier
1 sibling, 0 replies; 8+ messages in thread
From: Vincent Donnefort @ 2023-08-14 7:40 UTC (permalink / raw)
To: Kalesh Singh
Cc: maz, oliver.upton, kvmarm, qperret, smostafa, linux-arm-kernel,
kernel-team, will
[...]
> > +int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
> > +{
> > + unsigned long base;
> > + size_t size;
> > + int ret;
> > +
> > + mutex_lock(&kvm_hyp_pgd_mutex);
> > + /*
> > + * Efficient stack verification using the PAGE_SHIFT bit implies
> > + * an alignment of our allocation on the order of the size.
> > + */
> > + size = PAGE_SIZE * 2;
> > + base = ALIGN_DOWN(io_map_base - size, size);
> > +
> > + ret = __hyp_alloc_private_va_range(base);
> > +
> > + mutex_unlock(&kvm_hyp_pgd_mutex);
> > +
> > + if (ret) {
> > + kvm_err("Cannot allocate hyp stack guard page\n");
> > + return ret;
> > + }
> > +
> > + /*
> > + * Since the stack grows downwards, map the stack to the page
> > + * at the higher address and leave the lower guard page
> > + * unbacked.
> > + *
> > + * Any valid stack address now has the PAGE_SHIFT bit as 1
> > + * and addresses corresponding to the guard page have the
> > + * PAGE_SHIFT bit as 0 - this is used for overflow detection.
> > + */
> > + ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
> > + PAGE_HYP);
> > + if (ret)
> > + kvm_err("Cannot map hyp stack\n");
>
> Should we reset the io_map_base if the mapping failed here as well?
I didn't do it on purpose. Here, I'm releasing the lock and I didn't want to add
a non locked __create_hyp_mappings() just for that reset that is probably not
binging much.
>
> Otherwise lgtm, Reviewed-by: Kalesh Singh <kaleshsingh@google.com>
Thanks for the review!
>
> Thanks,
> Kalesh
>
> > +
> > + *haddr = base + size;
> > +
> > + return ret;
> > +}
> > +
> > /**
> > * create_hyp_io_mappings - Map IO into both kernel and HYP
> > * @phys_addr: The physical start address which gets mapped
> >
> > base-commit: 52a93d39b17dc7eb98b6aa3edb93943248e03b2f
> > --
> > 2.41.0.640.ga95def55d0-goog
> >
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
2023-08-11 21:55 ` Kalesh Singh
2023-08-14 7:40 ` Vincent Donnefort
@ 2023-08-26 11:00 ` Marc Zyngier
1 sibling, 0 replies; 8+ messages in thread
From: Marc Zyngier @ 2023-08-26 11:00 UTC (permalink / raw)
To: Kalesh Singh
Cc: Vincent Donnefort, oliver.upton, kvmarm, qperret, smostafa,
linux-arm-kernel, kernel-team, will
Hey Kalesh,
On Fri, 11 Aug 2023 22:55:11 +0100,
Kalesh Singh <kaleshsingh@google.com> wrote:
>
> On Fri, Aug 11, 2023 at 4:20 AM Vincent Donnefort <vdonnefort@google.com> wrote:
> >
> > commit f922c13e778d ("KVM: arm64: Introduce
> > pkvm_alloc_private_va_range()") and commit 92abe0f81e13 ("KVM: arm64:
> > Introduce hyp_alloc_private_va_range()") added an alignment for the
> > start address of any allocation into the nVHE hypervisor private VA
> > range.
[...]
>
> Otherwise lgtm, Reviewed-by: Kalesh Singh <kaleshsingh@google.com>
Thanks for taking the time to review this.
Just a small nit: in the future, can you make sure your tags appear on
a separate line? This allows tools like 'b4' to automatically collect
them. It's not a huge deal, but I'd like to make sure to get the
credit you deserve.
Thanks again,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
2023-08-11 11:20 ` [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range Vincent Donnefort
2023-08-11 21:55 ` Kalesh Singh
@ 2023-08-26 13:02 ` Marc Zyngier
2023-08-28 12:16 ` Marek Szyprowski
2 siblings, 0 replies; 8+ messages in thread
From: Marc Zyngier @ 2023-08-26 13:02 UTC (permalink / raw)
To: Vincent Donnefort, oliver.upton
Cc: smostafa, kernel-team, kvmarm, qperret, will, kaleshsingh,
linux-arm-kernel
On Fri, 11 Aug 2023 12:20:37 +0100, Vincent Donnefort wrote:
> commit f922c13e778d ("KVM: arm64: Introduce
> pkvm_alloc_private_va_range()") and commit 92abe0f81e13 ("KVM: arm64:
> Introduce hyp_alloc_private_va_range()") added an alignment for the
> start address of any allocation into the nVHE hypervisor private VA
> range.
>
> This alignment (order of the size of the allocation) intends to enable
> efficient stack verification (if the PAGE_SHIFT bit is zero, the stack
> pointer is on the guard page and a stack overflow occurred).
>
> [...]
Applied to next, thanks!
[1/1] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
commit: f156a7d13fc35d0078cd644b8cf0a6f97cbbe2e2
Cheers,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
2023-08-11 11:20 ` [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range Vincent Donnefort
2023-08-11 21:55 ` Kalesh Singh
2023-08-26 13:02 ` Marc Zyngier
@ 2023-08-28 12:16 ` Marek Szyprowski
2023-08-28 13:19 ` Marc Zyngier
2 siblings, 1 reply; 8+ messages in thread
From: Marek Szyprowski @ 2023-08-28 12:16 UTC (permalink / raw)
To: Vincent Donnefort, maz, oliver.upton
Cc: kvmarm, qperret, smostafa, kaleshsingh, linux-arm-kernel,
kernel-team, will
On 11.08.2023 13:20, Vincent Donnefort wrote:
> commit f922c13e778d ("KVM: arm64: Introduce
> pkvm_alloc_private_va_range()") and commit 92abe0f81e13 ("KVM: arm64:
> Introduce hyp_alloc_private_va_range()") added an alignment for the
> start address of any allocation into the nVHE hypervisor private VA
> range.
>
> This alignment (order of the size of the allocation) intends to enable
> efficient stack verification (if the PAGE_SHIFT bit is zero, the stack
> pointer is on the guard page and a stack overflow occurred).
>
> But this is only necessary for stack allocation and can waste a lot of
> VA space. So instead make stack-specific functions, handling the guard
> page requirements, while other users (e.g. fixmap) will only get page
> alignment.
>
> Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
This change, merged to linux-next as f156a7d13fc3 ("KVM: arm64: Remove
size-order align in the nVHE hyp private VA range"), introduced the
following regression on Raspberry Pi4b board. Here is a log observed on
linux next-20230828 release:
--->8---
kvm [1]: IPA Size Limit: 44 bits
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at arch/arm64/kvm/hyp/pgtable.c:453
hyp_map_walker+0xb0/0x120
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-next-20230828 #13889
Hardware name: Raspberry Pi 4 Model B (DT)
pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : hyp_map_walker+0xb0/0x120
lr : hyp_map_walker+0x30/0x120
...
Call trace:
hyp_map_walker+0xb0/0x120
kvm_pgtable_visitor_cb.isra.0+0x38/0x5c
__kvm_pgtable_walk+0x1dc/0x32c
__kvm_pgtable_walk+0xd0/0x32c
__kvm_pgtable_walk+0xd0/0x32c
__kvm_pgtable_walk+0xd0/0x32c
kvm_pgtable_walk+0xd0/0x224
kvm_pgtable_hyp_map+0x8c/0xf0
__create_hyp_mappings+0x98/0xc0
__create_hyp_private_mapping+0xf0/0x14c
create_hyp_io_mappings+0x8c/0x118
vgic_v2_probe+0xbc/0x1ec
kvm_vgic_hyp_init+0xcc/0x210
kvm_arm_init+0x6a4/0x1040
do_one_initcall+0x74/0x2f0
kernel_init_freeable+0x28c/0x4dc
kernel_init+0x24/0x1dc
ret_from_fork+0x10/0x20
irq event stamp: 72194
hardirqs last enabled at (72193): [<ffff8000811f2c48>]
_raw_spin_unlock_irqrestore+0x74/0x78
hardirqs last disabled at (72194): [<ffff8000811df128>] el1_dbg+0x24/0x8c
softirqs last enabled at (72182): [<ffff800080010a10>]
__do_softirq+0x438/0x4ec
softirqs last disabled at (72173): [<ffff800080016980>]
____do_softirq+0x10/0x1c
---[ end trace 0000000000000000 ]---
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at arch/arm64/kvm/hyp/pgtable.c:470
hyp_map_walker+0x10c/0x120
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.5.0-next-20230828 #13889
Hardware name: Raspberry Pi 4 Model B (DT)
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : hyp_map_walker+0x10c/0x120
lr : hyp_map_walker+0x30/0x120
...
Call trace:
hyp_map_walker+0x10c/0x120
kvm_pgtable_visitor_cb.isra.0+0x38/0x5c
__kvm_pgtable_walk+0x1dc/0x32c
__kvm_pgtable_walk+0xd0/0x32c
__kvm_pgtable_walk+0xd0/0x32c
__kvm_pgtable_walk+0xd0/0x32c
kvm_pgtable_walk+0xd0/0x224
kvm_pgtable_hyp_map+0x8c/0xf0
__create_hyp_mappings+0x98/0xc0
__create_hyp_private_mapping+0xf0/0x14c
create_hyp_io_mappings+0x8c/0x118
vgic_v2_probe+0xbc/0x1ec
kvm_vgic_hyp_init+0xcc/0x210
kvm_arm_init+0x6a4/0x1040
do_one_initcall+0x74/0x2f0
kernel_init_freeable+0x28c/0x4dc
kernel_init+0x24/0x1dc
ret_from_fork+0x10/0x20
irq event stamp: 72214
hardirqs last enabled at (72213): [<ffff8000811df8f8>]
exit_to_kernel_mode+0x34/0x124
hardirqs last disabled at (72214): [<ffff8000811df128>] el1_dbg+0x24/0x8c
softirqs last enabled at (72212): [<ffff800080010a10>]
__do_softirq+0x438/0x4ec
softirqs last disabled at (72197): [<ffff800080016980>]
____do_softirq+0x10/0x1c
---[ end trace 0000000000000000 ]---
kvm [1]: Cannot map VCTRL into hyp
-------
With $subject patch reverted on top of next-20230828, kvm initializes
properly on Raspberry Pi4b:
--->8---
kvm [1]: IPA Size Limit: 44 bits
kvm [1]: vgic interrupt IRQ9
kvm [1]: Hyp mode initialized successfully
-------
Let me know if I can help somehow debugging this issue.
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
2023-08-28 12:16 ` Marek Szyprowski
@ 2023-08-28 13:19 ` Marc Zyngier
2023-08-28 13:43 ` Marek Szyprowski
0 siblings, 1 reply; 8+ messages in thread
From: Marc Zyngier @ 2023-08-28 13:19 UTC (permalink / raw)
To: Marek Szyprowski
Cc: Vincent Donnefort, oliver.upton, kvmarm, qperret, smostafa,
kaleshsingh, linux-arm-kernel, kernel-team, will
On Mon, 28 Aug 2023 13:16:51 +0100,
Marek Szyprowski <m.szyprowski@samsung.com> wrote:
>
> On 11.08.2023 13:20, Vincent Donnefort wrote:
> > commit f922c13e778d ("KVM: arm64: Introduce
> > pkvm_alloc_private_va_range()") and commit 92abe0f81e13 ("KVM: arm64:
> > Introduce hyp_alloc_private_va_range()") added an alignment for the
> > start address of any allocation into the nVHE hypervisor private VA
> > range.
> >
> > This alignment (order of the size of the allocation) intends to enable
> > efficient stack verification (if the PAGE_SHIFT bit is zero, the stack
> > pointer is on the guard page and a stack overflow occurred).
> >
> > But this is only necessary for stack allocation and can waste a lot of
> > VA space. So instead make stack-specific functions, handling the guard
> > page requirements, while other users (e.g. fixmap) will only get page
> > alignment.
> >
> > Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
>
> This change, merged to linux-next as f156a7d13fc3 ("KVM: arm64: Remove
> size-order align in the nVHE hyp private VA range"), introduced the
> following regression on Raspberry Pi4b board. Here is a log observed on
> linux next-20230828 release:
>
> --->8---
>
> kvm [1]: IPA Size Limit: 44 bits
> ------------[ cut here ]------------
> WARNING: CPU: 0 PID: 1 at arch/arm64/kvm/hyp/pgtable.c:453
> hyp_map_walker+0xb0/0x120
> Modules linked in:
> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-next-20230828 #13889
> Hardware name: Raspberry Pi 4 Model B (DT)
> pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : hyp_map_walker+0xb0/0x120
> lr : hyp_map_walker+0x30/0x120
> ...
> Call trace:
> hyp_map_walker+0xb0/0x120
> kvm_pgtable_visitor_cb.isra.0+0x38/0x5c
> __kvm_pgtable_walk+0x1dc/0x32c
> __kvm_pgtable_walk+0xd0/0x32c
> __kvm_pgtable_walk+0xd0/0x32c
> __kvm_pgtable_walk+0xd0/0x32c
> kvm_pgtable_walk+0xd0/0x224
> kvm_pgtable_hyp_map+0x8c/0xf0
> __create_hyp_mappings+0x98/0xc0
> __create_hyp_private_mapping+0xf0/0x14c
> create_hyp_io_mappings+0x8c/0x118
> vgic_v2_probe+0xbc/0x1ec
> kvm_vgic_hyp_init+0xcc/0x210
> kvm_arm_init+0x6a4/0x1040
Ah, wonderful. Can you give the snippet below a go?
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 11c1d786c506..50be51cc40cc 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -652,6 +652,9 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
mutex_unlock(&kvm_hyp_pgd_mutex);
+ if (!ret)
+ *haddr = base;
+
return ret;
}
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range
2023-08-28 13:19 ` Marc Zyngier
@ 2023-08-28 13:43 ` Marek Szyprowski
0 siblings, 0 replies; 8+ messages in thread
From: Marek Szyprowski @ 2023-08-28 13:43 UTC (permalink / raw)
To: Marc Zyngier
Cc: Vincent Donnefort, oliver.upton, kvmarm, qperret, smostafa,
kaleshsingh, linux-arm-kernel, kernel-team, will
On 28.08.2023 15:19, Marc Zyngier wrote:
> On Mon, 28 Aug 2023 13:16:51 +0100,
> Marek Szyprowski <m.szyprowski@samsung.com> wrote:
>> On 11.08.2023 13:20, Vincent Donnefort wrote:
>>> commit f922c13e778d ("KVM: arm64: Introduce
>>> pkvm_alloc_private_va_range()") and commit 92abe0f81e13 ("KVM: arm64:
>>> Introduce hyp_alloc_private_va_range()") added an alignment for the
>>> start address of any allocation into the nVHE hypervisor private VA
>>> range.
>>>
>>> This alignment (order of the size of the allocation) intends to enable
>>> efficient stack verification (if the PAGE_SHIFT bit is zero, the stack
>>> pointer is on the guard page and a stack overflow occurred).
>>>
>>> But this is only necessary for stack allocation and can waste a lot of
>>> VA space. So instead make stack-specific functions, handling the guard
>>> page requirements, while other users (e.g. fixmap) will only get page
>>> alignment.
>>>
>>> Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
>> This change, merged to linux-next as f156a7d13fc3 ("KVM: arm64: Remove
>> size-order align in the nVHE hyp private VA range"), introduced the
>> following regression on Raspberry Pi4b board. Here is a log observed on
>> linux next-20230828 release:
>>
>> --->8---
>>
>> kvm [1]: IPA Size Limit: 44 bits
>> ------------[ cut here ]------------
>> WARNING: CPU: 0 PID: 1 at arch/arm64/kvm/hyp/pgtable.c:453
>> hyp_map_walker+0xb0/0x120
>> Modules linked in:
>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-next-20230828 #13889
>> Hardware name: Raspberry Pi 4 Model B (DT)
>> pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>> pc : hyp_map_walker+0xb0/0x120
>> lr : hyp_map_walker+0x30/0x120
>> ...
>> Call trace:
>> hyp_map_walker+0xb0/0x120
>> kvm_pgtable_visitor_cb.isra.0+0x38/0x5c
>> __kvm_pgtable_walk+0x1dc/0x32c
>> __kvm_pgtable_walk+0xd0/0x32c
>> __kvm_pgtable_walk+0xd0/0x32c
>> __kvm_pgtable_walk+0xd0/0x32c
>> kvm_pgtable_walk+0xd0/0x224
>> kvm_pgtable_hyp_map+0x8c/0xf0
>> __create_hyp_mappings+0x98/0xc0
>> __create_hyp_private_mapping+0xf0/0x14c
>> create_hyp_io_mappings+0x8c/0x118
>> vgic_v2_probe+0xbc/0x1ec
>> kvm_vgic_hyp_init+0xcc/0x210
>> kvm_arm_init+0x6a4/0x1040
> Ah, wonderful. Can you give the snippet below a go?
>
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 11c1d786c506..50be51cc40cc 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -652,6 +652,9 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
>
> mutex_unlock(&kvm_hyp_pgd_mutex);
>
> + if (!ret)
> + *haddr = base;
> +
> return ret;
> }
>
This fixed the regression. Feel free to add:
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2023-08-28 13:43 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <CGME20230828121652eucas1p11cc5921bd55c7e90f83d1938a358237f@eucas1p1.samsung.com>
2023-08-11 11:20 ` [PATCH v2] KVM: arm64: Remove size-order align in the nVHE hyp private VA range Vincent Donnefort
2023-08-11 21:55 ` Kalesh Singh
2023-08-14 7:40 ` Vincent Donnefort
2023-08-26 11:00 ` Marc Zyngier
2023-08-26 13:02 ` Marc Zyngier
2023-08-28 12:16 ` Marek Szyprowski
2023-08-28 13:19 ` Marc Zyngier
2023-08-28 13:43 ` Marek Szyprowski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).