public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kvm: selftests: do not use bitfields larger than 32-bits for PTEs
@ 2022-04-20 10:36 Paolo Bonzini
  2022-04-20 14:01 ` Peter Xu
  2022-04-20 15:47 ` Sean Christopherson
  0 siblings, 2 replies; 3+ messages in thread
From: Paolo Bonzini @ 2022-04-20 10:36 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: peterx, seanjc

Red Hat's QE team reported test failure on access_tracking_perf_test:

Testing guest mode: PA-bits:ANY, VA-bits:48,  4K pages
guest physical test memory offset: 0x3fffbffff000

Populating memory             : 0.684014577s
Writing to populated memory   : 0.006230175s
Reading from populated memory : 0.004557805s
==== Test Assertion Failure ====
  lib/kvm_util.c:1411: false
  pid=125806 tid=125809 errno=4 - Interrupted system call
     1  0x0000000000402f7c: addr_gpa2hva at kvm_util.c:1411
     2   (inlined by) addr_gpa2hva at kvm_util.c:1405
     3  0x0000000000401f52: lookup_pfn at access_tracking_perf_test.c:98
     4   (inlined by) mark_vcpu_memory_idle at access_tracking_perf_test.c:152
     5   (inlined by) vcpu_thread_main at access_tracking_perf_test.c:232
     6  0x00007fefe9ff81ce: ?? ??:0
     7  0x00007fefe9c64d82: ?? ??:0
  No vm physical memory at 0xffbffff000

I can easily reproduce it with a Intel(R) Xeon(R) CPU E5-2630 with 46 bits
PA.

It turns out that the address translation for clearing idle page tracking
returned a wrong result; addr_gva2gpa()'s last step, which is based on
"pte[index[0]].pfn", did the calculation with 40 bits length and the
high 12 bits got truncated.  In above case the GPA address to be returned
should be 0x3fffbffff000 for GVA 0xc0000000, but it got truncated into
0xffbffff000 and the subsequent gpa2hva lookup failed.

The width of operations on bit fields greater than 32-bit is
implementation defined, and differs between GCC (which uses the bitfield
precision) and clang (which uses 64-bit arithmetic), so this is a
potential minefield.  Remove the bit fields and using manual masking
instead.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075036
Reported-by: Peter Xu <peterx@redhat.com>
Message-Id: <20220414152837.83320-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/lib/x86_64/processor.c      | 202 ++++++++----------
 1 file changed, 89 insertions(+), 113 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 9f000dfb5594..90c3d34ce80b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -20,36 +20,18 @@
 vm_vaddr_t exception_handlers;
 
 /* Virtual translation table structure declarations */
-struct pageUpperEntry {
-	uint64_t present:1;
-	uint64_t writable:1;
-	uint64_t user:1;
-	uint64_t write_through:1;
-	uint64_t cache_disable:1;
-	uint64_t accessed:1;
-	uint64_t ignored_06:1;
-	uint64_t page_size:1;
-	uint64_t ignored_11_08:4;
-	uint64_t pfn:40;
-	uint64_t ignored_62_52:11;
-	uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
-	uint64_t present:1;
-	uint64_t writable:1;
-	uint64_t user:1;
-	uint64_t write_through:1;
-	uint64_t cache_disable:1;
-	uint64_t accessed:1;
-	uint64_t dirty:1;
-	uint64_t reserved_07:1;
-	uint64_t global:1;
-	uint64_t ignored_11_09:3;
-	uint64_t pfn:40;
-	uint64_t ignored_62_52:11;
-	uint64_t execute_disable:1;
-};
+#define PTE_PRESENT_MASK  (1ULL << 0)
+#define PTE_WRITABLE_MASK (1ULL << 1)
+#define PTE_USER_MASK     (1ULL << 2)
+#define PTE_ACCESSED_MASK (1ULL << 5)
+#define PTE_DIRTY_MASK    (1ULL << 6)
+#define PTE_LARGE_MASK    (1ULL << 7)
+#define PTE_GLOBAL_MASK   (1ULL << 8)
+#define PTE_NX_MASK       (1ULL << 63)
+
+#define PTE_PFN_MASK      0xFFFFFFFFFF000ULL
+#define PTE_PFN_SHIFT     12
+#define PTE_GET_PFN(pte) (((pte) & PTE_PFN_MASK) >> PTE_PFN_SHIFT)
 
 void regs_dump(FILE *stream, struct kvm_regs *regs,
 	       uint8_t indent)
@@ -195,23 +177,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
 	return &page_table[index];
 }
 
-static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
-						    uint64_t pt_pfn,
-						    uint64_t vaddr,
-						    uint64_t paddr,
-						    int level,
-						    enum x86_page_size page_size)
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+				       uint64_t pt_pfn,
+				       uint64_t vaddr,
+				       uint64_t paddr,
+				       int level,
+				       enum x86_page_size page_size)
 {
-	struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
-
-	if (!pte->present) {
-		pte->writable = true;
-		pte->present = true;
-		pte->page_size = (level == page_size);
-		if (pte->page_size)
-			pte->pfn = paddr >> vm->page_shift;
+	uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+	if (!(*pte & PTE_PRESENT_MASK)) {
+		*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+		if (level == page_size)
+			*pte |= PTE_LARGE_MASK | (paddr & PTE_PFN_MASK);
 		else
-			pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+			*pte |= vm_alloc_page_table(vm) & PTE_PFN_MASK;
 	} else {
 		/*
 		 * Entry already present.  Assert that the caller doesn't want
@@ -221,7 +201,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
 		TEST_ASSERT(level != page_size,
 			    "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
 			    page_size, vaddr);
-		TEST_ASSERT(!pte->page_size,
+		TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
 			    "Cannot create page table at level: %u, vaddr: 0x%lx\n",
 			    level, vaddr);
 	}
@@ -232,8 +212,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 		   enum x86_page_size page_size)
 {
 	const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
-	struct pageUpperEntry *pml4e, *pdpe, *pde;
-	struct pageTableEntry *pte;
+	uint64_t *pml4e, *pdpe, *pde;
+	uint64_t *pte;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
 		    "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -257,24 +237,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	 */
 	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
 				      vaddr, paddr, 3, page_size);
-	if (pml4e->page_size)
+	if (*pml4e & PTE_LARGE_MASK)
 		return;
 
-	pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
-	if (pdpe->page_size)
+	pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+	if (*pdpe & PTE_LARGE_MASK)
 		return;
 
-	pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
-	if (pde->page_size)
+	pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+	if (*pde & PTE_LARGE_MASK)
 		return;
 
 	/* Fill in page table entry. */
-	pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
-	TEST_ASSERT(!pte->present,
+	pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+	TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
 		    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
-	pte->pfn = paddr >> vm->page_shift;
-	pte->writable = true;
-	pte->present = 1;
+	*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PTE_PFN_MASK);
 }
 
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -282,12 +260,12 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 	__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
 }
 
-static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
 						       uint64_t vaddr)
 {
 	uint16_t index[4];
-	struct pageUpperEntry *pml4e, *pdpe, *pde;
-	struct pageTableEntry *pte;
+	uint64_t *pml4e, *pdpe, *pde;
+	uint64_t *pte;
 	struct kvm_cpuid_entry2 *entry;
 	struct kvm_sregs sregs;
 	int max_phy_addr;
@@ -329,30 +307,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
 	index[3] = (vaddr >> 39) & 0x1ffu;
 
 	pml4e = addr_gpa2hva(vm, vm->pgd);
-	TEST_ASSERT(pml4e[index[3]].present,
+	TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
 		"Expected pml4e to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
-		(rsvd_mask | (1ull << 7))) == 0,
+	TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
 		"Unexpected reserved bits set.");
 
-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
-	TEST_ASSERT(pdpe[index[2]].present,
+	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+	TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
 		"Expected pdpe to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT(pdpe[index[2]].page_size == 0,
+	TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
 		"Expected pdpe to map a pde not a 1-GByte page.");
-	TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+	TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
 		"Unexpected reserved bits set.");
 
-	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
-	TEST_ASSERT(pde[index[1]].present,
+	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+	TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
 		"Expected pde to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT(pde[index[1]].page_size == 0,
+	TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
 		"Expected pde to map a pte not a 2-MByte page.");
-	TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+	TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
 		"Unexpected reserved bits set.");
 
-	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
-	TEST_ASSERT(pte[index[0]].present,
+	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+	TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
 		"Expected pte to be present for gva: 0x%08lx", vaddr);
 
 	return &pte[index[0]];
@@ -360,7 +337,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
 
 uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
 {
-	struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+	uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
 
 	return *(uint64_t *)pte;
 }
@@ -368,18 +345,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
 void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
 			     uint64_t pte)
 {
-	struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
-								  vaddr);
+	uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
 
 	*(uint64_t *)new_pte = pte;
 }
 
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
-	struct pageUpperEntry *pml4e, *pml4e_start;
-	struct pageUpperEntry *pdpe, *pdpe_start;
-	struct pageUpperEntry *pde, *pde_start;
-	struct pageTableEntry *pte, *pte_start;
+	uint64_t *pml4e, *pml4e_start;
+	uint64_t *pdpe, *pdpe_start;
+	uint64_t *pde, *pde_start;
+	uint64_t *pte, *pte_start;
 
 	if (!vm->pgd_created)
 		return;
@@ -389,58 +365,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	fprintf(stream, "%*s      index hvaddr         gpaddr         "
 		"addr         w exec dirty\n",
 		indent, "");
-	pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
+	pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
 	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
 		pml4e = &pml4e_start[n1];
-		if (!pml4e->present)
+		if (!(*pml4e & PTE_PRESENT_MASK))
 			continue;
-		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
 			" %u\n",
 			indent, "",
 			pml4e - pml4e_start, pml4e,
-			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
-			pml4e->writable, pml4e->execute_disable);
+			addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+			!!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
 
-		pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
+		pdpe_start = addr_gpa2hva(vm, *pml4e & PTE_PFN_MASK);
 		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
 			pdpe = &pdpe_start[n2];
-			if (!pdpe->present)
+			if (!(*pdpe & PTE_PRESENT_MASK))
 				continue;
-			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
+			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
 				"%u  %u\n",
 				indent, "",
 				pdpe - pdpe_start, pdpe,
 				addr_hva2gpa(vm, pdpe),
-				(uint64_t) pdpe->pfn, pdpe->writable,
-				pdpe->execute_disable);
+				PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+				!!(*pdpe & PTE_NX_MASK));
 
-			pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
+			pde_start = addr_gpa2hva(vm, *pdpe & PTE_PFN_MASK);
 			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
 				pde = &pde_start[n3];
-				if (!pde->present)
+				if (!(*pde & PTE_PRESENT_MASK))
 					continue;
 				fprintf(stream, "%*spde   0x%-3zx %p "
-					"0x%-12lx 0x%-10lx %u  %u\n",
+					"0x%-12lx 0x%-10llx %u  %u\n",
 					indent, "", pde - pde_start, pde,
 					addr_hva2gpa(vm, pde),
-					(uint64_t) pde->pfn, pde->writable,
-					pde->execute_disable);
+					PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+					!!(*pde & PTE_NX_MASK));
 
-				pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
+				pte_start = addr_gpa2hva(vm, *pde & PTE_PFN_MASK);
 				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
 					pte = &pte_start[n4];
-					if (!pte->present)
+					if (!(*pte & PTE_PRESENT_MASK))
 						continue;
 					fprintf(stream, "%*spte   0x%-3zx %p "
-						"0x%-12lx 0x%-10lx %u  %u "
+						"0x%-12lx 0x%-10llx %u  %u "
 						"    %u    0x%-10lx\n",
 						indent, "",
 						pte - pte_start, pte,
 						addr_hva2gpa(vm, pte),
-						(uint64_t) pte->pfn,
-						pte->writable,
-						pte->execute_disable,
-						pte->dirty,
+						PTE_GET_PFN(*pte),
+						!!(*pte & PTE_WRITABLE_MASK),
+						!!(*pte & PTE_NX_MASK),
+						!!(*pte & PTE_DIRTY_MASK),
 						((uint64_t) n1 << 27)
 							| ((uint64_t) n2 << 18)
 							| ((uint64_t) n3 << 9)
@@ -558,8 +534,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 {
 	uint16_t index[4];
-	struct pageUpperEntry *pml4e, *pdpe, *pde;
-	struct pageTableEntry *pte;
+	uint64_t *pml4e, *pdpe, *pde;
+	uint64_t *pte;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -572,22 +548,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 	if (!vm->pgd_created)
 		goto unmapped_gva;
 	pml4e = addr_gpa2hva(vm, vm->pgd);
-	if (!pml4e[index[3]].present)
+	if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
-	if (!pdpe[index[2]].present)
+	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+	if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
-	if (!pde[index[1]].present)
+	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+	if (!(pde[index[1]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
-	if (!pte[index[0]].present)
+	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+	if (!(pte[index[0]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
+	return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & 0xfffu);
 
 unmapped_gva:
 	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] kvm: selftests: do not use bitfields larger than 32-bits for PTEs
  2022-04-20 10:36 [PATCH] kvm: selftests: do not use bitfields larger than 32-bits for PTEs Paolo Bonzini
@ 2022-04-20 14:01 ` Peter Xu
  2022-04-20 15:47 ` Sean Christopherson
  1 sibling, 0 replies; 3+ messages in thread
From: Peter Xu @ 2022-04-20 14:01 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, seanjc

On Wed, Apr 20, 2022 at 06:36:24AM -0400, Paolo Bonzini wrote:
> Red Hat's QE team reported test failure on access_tracking_perf_test:
> 
> Testing guest mode: PA-bits:ANY, VA-bits:48,  4K pages
> guest physical test memory offset: 0x3fffbffff000
> 
> Populating memory             : 0.684014577s
> Writing to populated memory   : 0.006230175s
> Reading from populated memory : 0.004557805s
> ==== Test Assertion Failure ====
>   lib/kvm_util.c:1411: false
>   pid=125806 tid=125809 errno=4 - Interrupted system call
>      1  0x0000000000402f7c: addr_gpa2hva at kvm_util.c:1411
>      2   (inlined by) addr_gpa2hva at kvm_util.c:1405
>      3  0x0000000000401f52: lookup_pfn at access_tracking_perf_test.c:98
>      4   (inlined by) mark_vcpu_memory_idle at access_tracking_perf_test.c:152
>      5   (inlined by) vcpu_thread_main at access_tracking_perf_test.c:232
>      6  0x00007fefe9ff81ce: ?? ??:0
>      7  0x00007fefe9c64d82: ?? ??:0
>   No vm physical memory at 0xffbffff000
> 
> I can easily reproduce it with a Intel(R) Xeon(R) CPU E5-2630 with 46 bits
> PA.
> 
> It turns out that the address translation for clearing idle page tracking
> returned a wrong result; addr_gva2gpa()'s last step, which is based on
> "pte[index[0]].pfn", did the calculation with 40 bits length and the
> high 12 bits got truncated.  In above case the GPA address to be returned
> should be 0x3fffbffff000 for GVA 0xc0000000, but it got truncated into
> 0xffbffff000 and the subsequent gpa2hva lookup failed.
> 
> The width of operations on bit fields greater than 32-bit is
> implementation defined, and differs between GCC (which uses the bitfield
> precision) and clang (which uses 64-bit arithmetic), so this is a
> potential minefield.  Remove the bit fields and using manual masking
> instead.
> 
> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075036
> Reported-by: Peter Xu <peterx@redhat.com>

Should be:

Reported-by: Nana Liu <nanliu@redhat.com>

Meanwhile:

Reviewed-by: Peter Xu <peterx@redhat.com>
Tested-by: Peter Xu <peterx@redhat.com>

Thanks,

-- 
Peter Xu


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] kvm: selftests: do not use bitfields larger than 32-bits for PTEs
  2022-04-20 10:36 [PATCH] kvm: selftests: do not use bitfields larger than 32-bits for PTEs Paolo Bonzini
  2022-04-20 14:01 ` Peter Xu
@ 2022-04-20 15:47 ` Sean Christopherson
  1 sibling, 0 replies; 3+ messages in thread
From: Sean Christopherson @ 2022-04-20 15:47 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, peterx

On Wed, Apr 20, 2022, Paolo Bonzini wrote:
> ---
>  .../selftests/kvm/lib/x86_64/processor.c      | 202 ++++++++----------
>  1 file changed, 89 insertions(+), 113 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
> index 9f000dfb5594..90c3d34ce80b 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
> @@ -20,36 +20,18 @@
>  vm_vaddr_t exception_handlers;
>  
>  /* Virtual translation table structure declarations */

Stale comment.

> -struct pageUpperEntry {
> -	uint64_t present:1;
> -	uint64_t writable:1;
> -	uint64_t user:1;
> -	uint64_t write_through:1;
> -	uint64_t cache_disable:1;
> -	uint64_t accessed:1;
> -	uint64_t ignored_06:1;
> -	uint64_t page_size:1;
> -	uint64_t ignored_11_08:4;
> -	uint64_t pfn:40;
> -	uint64_t ignored_62_52:11;
> -	uint64_t execute_disable:1;
> -};
> -
> -struct pageTableEntry {
> -	uint64_t present:1;
> -	uint64_t writable:1;
> -	uint64_t user:1;
> -	uint64_t write_through:1;
> -	uint64_t cache_disable:1;
> -	uint64_t accessed:1;
> -	uint64_t dirty:1;
> -	uint64_t reserved_07:1;
> -	uint64_t global:1;
> -	uint64_t ignored_11_09:3;
> -	uint64_t pfn:40;
> -	uint64_t ignored_62_52:11;
> -	uint64_t execute_disable:1;
> -};
> +#define PTE_PRESENT_MASK  (1ULL << 0)
> +#define PTE_WRITABLE_MASK (1ULL << 1)
> +#define PTE_USER_MASK     (1ULL << 2)
> +#define PTE_ACCESSED_MASK (1ULL << 5)
> +#define PTE_DIRTY_MASK    (1ULL << 6)
> +#define PTE_LARGE_MASK    (1ULL << 7)
> +#define PTE_GLOBAL_MASK   (1ULL << 8)
> +#define PTE_NX_MASK       (1ULL << 63)

Any objection to using BIT_ULL()?  And tab(s) after the MASK so that there's some
breathing room in the unlikely scenario we need a new, longer flag?

> +#define PTE_PFN_MASK      0xFFFFFFFFFF000ULL

GENMASK_ULL(52, 12), or maybe use the PAGE_SHIFT in the generation, though I find
that more difficult to read for whatever reason.

> +#define PTE_PFN_SHIFT     12

Can we use the kernel's nomenclature?  That way if selftests ever find a way to
pull in arch/x86/include/asm/page_types.h, we don't need to do a bunch of renames.
And IMO it will make it easier to context switch between KVM and selftests.


#define PTE_PRESENT_MASK	BIT_ULL(0)
#define PTE_WRITABLE_MASK	BIT_ULL(1)
#define PTE_USER_MASK		BIT_ULL(2)
#define PTE_ACCESSED_MASK	BIT_ULL(5)
#define PTE_DIRTY_MASK		BIT_ULL(6)
#define PTE_LARGE_MASK		BIT_ULL(7)
#define PTE_GLOBAL_MASK		BIT_ULL(8)
#define PTE_NX_MASK		BIT_ULL(63)

#define PAGE_SHIFT		12
#define PAGE_SIZE		(1ULL << PAGE_SHIFT)
#define PHYSICAL_PAGE_MASK	GENMASK_ULL(51, 12)

#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)


I'd also vote to move these (in a different patch) to processor.h so that selftests
can use PAGE_SIZE in particular.

  tools/testing/selftests/kvm/x86_64 $ git grep -E "define\s+PAGE_SIZE" | wc -l
  6

And _vm_get_page_table_entry() has several gross open-coded masks/shifts that can
be opportunistically converted now

@@ -269,8 +270,7 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
        struct kvm_cpuid_entry2 *entry;
        struct kvm_sregs sregs;
        int max_phy_addr;
-       /* Set the bottom 52 bits. */
-       uint64_t rsvd_mask = 0x000fffffffffffff;
+       uint64_t rsvd_mask = PHYSICAL_PAGE_MASK;
 
        entry = kvm_get_supported_cpuid_index(0x80000008, 0);
        max_phy_addr = entry->eax & 0x000000ff;
@@ -284,9 +284,8 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
         * the XD flag (bit 63) is reserved.
         */
        vcpu_sregs_get(vm, vcpuid, &sregs);
-       if ((sregs.efer & EFER_NX) == 0) {
-               rsvd_mask |= (1ull << 63);
-       }
+       if (!(sregs.efer & EFER_NX))
+               rsvd_mask |= PTE_NX_MASK;
 


and even more that can/should be cleaned up in the future, e.g. this pile, though
that can be left for a different day.

	/*
	 * Based on the mode check above there are 48 bits in the vaddr, so
	 * shift 16 to sign extend the last bit (bit-47),
	 */
	TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
		"Canonical check failed.  The virtual address is invalid.");

	index[0] = (vaddr >> 12) & 0x1ffu;
	index[1] = (vaddr >> 21) & 0x1ffu;
	index[2] = (vaddr >> 30) & 0x1ffu;
	index[3] = (vaddr >> 39) & 0x1ffu;
  
> -	return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
> +	return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & 0xfffu);

Yeesh, and yet more cleanup.  Probably worth adding

#define PAGE_MASK		(~(PAGE_SIZE-1))

and using ~PAGE_MASK here?  Or defining PAGE_OFFSET?  Though that would conflict
with the kernel's use of PAGE_OFFSET.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-04-20 15:47 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-04-20 10:36 [PATCH] kvm: selftests: do not use bitfields larger than 32-bits for PTEs Paolo Bonzini
2022-04-20 14:01 ` Peter Xu
2022-04-20 15:47 ` Sean Christopherson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox