* [PATCH v2 1/2] KVM: riscv: Check hugetlb block mappings against memslot bounds
2026-06-04 14:26 [PATCH v2 0/2] KVM: riscv: Fix hugetlb G-stage block mapping eligibility Jinyu Tang
@ 2026-06-04 14:26 ` Jinyu Tang
2026-06-04 14:52 ` sashiko-bot
2026-06-04 14:26 ` [PATCH v2 2/2] KVM: selftests: Add a hugetlb memslot alignment test mode Jinyu Tang
1 sibling, 1 reply; 5+ messages in thread
From: Jinyu Tang @ 2026-06-04 14:26 UTC (permalink / raw)
To: Anup Patel, Anup Patel, Paolo Bonzini
Cc: Atish Patra, Paul Walmsley, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Alexandre Ghiti, Shuah Khan, Sean Christopherson,
Radim Krčmář, Andrew Jones, Conor Dooley,
Yong-Xuan Wang, Nutty Liu, kvm, kvm-riscv, linux-riscv,
linux-kernel, linux-kselftest, Jinyu Tang
RISC-V KVM has used the hugetlb VMA size directly as the G-stage
mapping size since stage-2 page table support was added. That is safe
only if the block covered by the fault is fully contained in the
memslot and the userspace address has the same offset as the GPA
within that block.
The THP path already checks those constraints before installing a PMD
block mapping. The hugetlb path did not, so an unaligned memslot could
make KVM install a PMD or PUD sized G-stage block that covers memory
outside the slot or maps the wrong host pages.
Pass the target mapping size into fault_supports_gstage_huge_mapping().
The same helper can be used for both THP PMD mappings and hugetlb
PMD/PUD mappings.
Select hugetlb mapping sizes through the same memslot-boundary check,
falling back from PUD to PMD to PAGE_SIZE. When a smaller hugetlb
mapping size is selected, fault the GFN aligned to that selected size
instead of the original VMA size.
Also keep hugetlb mappings out of transparent_hugepage_adjust(). Once
the hugetlb path has chosen PAGE_SIZE, promoting it again through the
THP helper would miss the hugetlb fallback decision.
Fixes: 9d05c1fee837 ("RISC-V: KVM: Implement stage2 page table programming")
Signed-off-by: Jinyu Tang <tjytimi@163.com>
Reviewed-by: Nutty Liu <nutty.liu@hotmail.com>
---
v1 -> v2:
- Squash the helper parameterization into this hugetlb fix.
- Use the ALIGN()/ALIGN_DOWN() form suggested by Nutty Liu and Anup
for the memslot boundary check.
arch/riscv/kvm/mmu.c | 54 ++++++++++++++++++++++++++++++++++++--------
1 file changed, 44 insertions(+), 10 deletions(-)
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 2d3def024..0adf017a2 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -286,7 +286,8 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
}
static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot,
- unsigned long hva)
+ unsigned long hva,
+ unsigned long map_size)
{
hva_t uaddr_start, uaddr_end;
gpa_t gpa_start;
@@ -300,8 +301,8 @@ static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot,
/*
* Pages belonging to memslots that don't have the same alignment
- * within a PMD for userspace and GPA cannot be mapped with g-stage
- * PMD entries, because we'll end up mapping the wrong pages.
+ * within a huge page for userspace and GPA cannot be mapped with
+ * g-stage block entries, because we'll end up mapping the wrong pages.
*
* Consider a layout like the following:
*
@@ -321,7 +322,7 @@ static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot,
* e -> g
* f -> h
*/
- if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
+ if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
return false;
/*
@@ -336,7 +337,8 @@ static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot,
* userspace_addr or the base_gfn, as both are equally aligned (per
* the check above) and equally sized.
*/
- return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
+ return (hva >= ALIGN(uaddr_start, map_size)) &&
+ (hva < ALIGN_DOWN(uaddr_end, map_size));
}
static int get_hva_mapping_size(struct kvm *kvm,
@@ -404,7 +406,7 @@ static unsigned long transparent_hugepage_adjust(struct kvm *kvm,
* sure that the HVA and GPA are sufficiently aligned and that the
* block map is contained within the memslot.
*/
- if (fault_supports_gstage_huge_mapping(memslot, hva)) {
+ if (fault_supports_gstage_huge_mapping(memslot, hva, PMD_SIZE)) {
int sz;
sz = get_hva_mapping_size(kvm, hva);
@@ -421,12 +423,33 @@ static unsigned long transparent_hugepage_adjust(struct kvm *kvm,
return PAGE_SIZE;
}
+static unsigned long hugetlb_mapping_size(struct kvm_memory_slot *memslot,
+ unsigned long hva,
+ unsigned long map_size)
+{
+ switch (map_size) {
+ case PUD_SIZE:
+ if (fault_supports_gstage_huge_mapping(memslot, hva, PUD_SIZE))
+ return PUD_SIZE;
+ fallthrough;
+ case PMD_SIZE:
+ if (fault_supports_gstage_huge_mapping(memslot, hva, PMD_SIZE))
+ return PMD_SIZE;
+ fallthrough;
+ case PAGE_SIZE:
+ return PAGE_SIZE;
+ default:
+ return map_size;
+ }
+}
+
int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write,
struct kvm_gstage_mapping *out_map)
{
int ret;
kvm_pfn_t hfn;
+ bool is_hugetlb;
bool writable;
short vma_pageshift;
gfn_t gfn = gpa >> PAGE_SHIFT;
@@ -460,16 +483,23 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma))
+ is_hugetlb = is_vm_hugetlb_page(vma);
+ if (is_hugetlb)
vma_pageshift = huge_page_shift(hstate_vma(vma));
else
vma_pageshift = PAGE_SHIFT;
vma_pagesize = 1ULL << vma_pageshift;
if (logging || (vma->vm_flags & VM_PFNMAP))
vma_pagesize = PAGE_SIZE;
+ else if (is_hugetlb)
+ vma_pagesize = hugetlb_mapping_size(memslot, hva, vma_pagesize);
+ /*
+ * For hugetlb mappings, vma_pagesize might have been reduced from the
+ * VMA size to a smaller safe mapping size.
+ */
if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
- gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
+ gfn = ALIGN_DOWN(gpa, vma_pagesize) >> PAGE_SHIFT;
/*
* Read mmu_invalidate_seq so that KVM can detect if the results of
@@ -511,8 +541,12 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
- /* Check if we are backed by a THP and thus use block mapping if possible */
- if (!logging && (vma_pagesize == PAGE_SIZE))
+ /*
+ * Check if we are backed by a THP and thus use block mapping if
+ * possible. Hugetlb mappings already selected their target size above,
+ * so do not promote them through the THP helper.
+ */
+ if (!logging && !is_hugetlb && vma_pagesize == PAGE_SIZE)
vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &hfn, &gpa);
if (writable) {
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v2 2/2] KVM: selftests: Add a hugetlb memslot alignment test mode
2026-06-04 14:26 [PATCH v2 0/2] KVM: riscv: Fix hugetlb G-stage block mapping eligibility Jinyu Tang
2026-06-04 14:26 ` [PATCH v2 1/2] KVM: riscv: Check hugetlb block mappings against memslot bounds Jinyu Tang
@ 2026-06-04 14:26 ` Jinyu Tang
2026-06-04 14:42 ` sashiko-bot
1 sibling, 1 reply; 5+ messages in thread
From: Jinyu Tang @ 2026-06-04 14:26 UTC (permalink / raw)
To: Anup Patel, Anup Patel, Paolo Bonzini
Cc: Atish Patra, Paul Walmsley, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Alexandre Ghiti, Shuah Khan, Sean Christopherson,
Radim Krčmář, Andrew Jones, Conor Dooley,
Yong-Xuan Wang, Nutty Liu, kvm, kvm-riscv, linux-riscv,
linux-kernel, linux-kselftest, Jinyu Tang
kvm_page_table_test can already exercise hugetlb-backed guest memory,
but it always creates the test memslot with GPA alignment matching the
hugetlb backing size. That misses the case where a valid hugetlb
memslot is later moved so that the memslot GPA and HVA no longer have
the same offset within the backing huge page.
Add a -u option that moves the test memslot GPA by one guest page after
creating the hugetlb memslot. The memslot is created through the normal
helper first, so the backing allocation remains valid and hugetlb aligned.
Moving the memslot then creates a deliberate HVA/GPA offset mismatch
before the guest mapping is installed.
This mode is useful for checking that architecture MMUs do not install
a block mapping when the block would map the wrong host pages or cover
memory outside the memslot. The option is restricted to hugetlb-backed
test memory because it's specifically about hugetlb block mapping
eligibility.
Signed-off-by: Jinyu Tang <tjytimi@163.com>
---
v1 -> v2:
- Keep the selftest change unchanged from v1
.../selftests/kvm/kvm_page_table_test.c | 28 +++++++++++++++----
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index fc5242fb9..a910e3abb 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -230,6 +230,7 @@ struct test_params {
u64 phys_offset;
u64 test_mem_size;
enum vm_mem_backing_src_type src_type;
+ bool misalign_slot_gpa;
};
static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
@@ -244,6 +245,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
u64 guest_num_pages;
u64 alignment;
void *host_test_mem;
+ struct userspace_mem_region *region;
struct kvm_vm *vm;
/* Align up the test memory size */
@@ -276,13 +278,22 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
/* Add an extra memory slot with specified backing src type */
vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem,
TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
+ region = memslot2region(vm, TEST_MEM_SLOT_INDEX);
+ host_test_mem = region->host_mem;
+
+ if (p->misalign_slot_gpa) {
+ TEST_ASSERT(is_backing_src_hugetlb(src_type),
+ "Memslot GPA misalignment requires hugetlb backing");
+ TEST_ASSERT(guest_num_pages > 1,
+ "Need at least two guest pages to misalign memslot GPA");
+
+ guest_test_phys_mem += guest_page_size;
+ vm_mem_region_move(vm, TEST_MEM_SLOT_INDEX, guest_test_phys_mem);
+ }
/* Do mapping(GVA->GPA) for the testing memory slot */
virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
- /* Cache the HVA pointer of the region */
- host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem);
-
/* Export shared structure test_args to guest */
sync_global_to_guest(vm, test_args);
@@ -417,8 +428,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-p offset] [-m mode] "
- "[-b mem-size] [-v vcpus] [-s mem-type]\n", name);
+ printf("usage: %s [-h] [-p offset] [-m mode] [-b mem-size]\n", name);
+ printf(" [-v vcpus] [-s mem-type] [-u]\n");
puts("");
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
@@ -428,6 +439,8 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run\n"
" (default: 1)\n");
backing_src_help("-s");
+ printf(" -u: move the test memslot GPA by one guest page after creating\n"
+ " the memslot, forcing a hugetlb HVA/GPA offset mismatch\n");
puts("");
}
@@ -442,7 +455,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) {
+ while ((opt = getopt(argc, argv, "hp:m:b:v:s:u")) != -1) {
switch (opt) {
case 'p':
p.phys_offset = strtoull(optarg, NULL, 0);
@@ -461,6 +474,9 @@ int main(int argc, char *argv[])
case 's':
p.src_type = parse_backing_src_type(optarg);
break;
+ case 'u':
+ p.misalign_slot_gpa = true;
+ break;
case 'h':
default:
help(argv[0]);
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread