From: Quentin Perret <qperret@google.com>
To: Marc Zyngier <maz@kernel.org>,
Oliver Upton <oliver.upton@linux.dev>,
Joey Gouly <joey.gouly@arm.com>,
Suzuki K Poulose <suzuki.poulose@arm.com>,
Zenghui Yu <yuzenghui@huawei.com>,
Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>
Cc: Fuad Tabba <tabba@google.com>,
Vincent Donnefort <vdonnefort@google.com>,
Sebastian Ene <sebastianene@google.com>,
linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
linux-kernel@vger.kernel.org
Subject: [PATCH 04/18] KVM: arm64: Move host page ownership tracking to the hyp vmemmap
Date: Mon, 4 Nov 2024 13:31:50 +0000 [thread overview]
Message-ID: <20241104133204.85208-5-qperret@google.com> (raw)
In-Reply-To: <20241104133204.85208-1-qperret@google.com>
We currently store part of the page-tracking state in PTE software bits
for the host, guests and the hypervisor. This is sub-optimal when e.g.
sharing pages as this forces to break block mappings purely to support
this software tracking. This causes an unnecessarily fragmented stage-2
page-table for the host in particular when it shares pages with Secure,
which can lead to measurable regressions. Moreover, having this state
stored in the page-table forces us to do multiple costly walks on the
page transition path, hence causing overhead.
In order to work around these problems, move the host-side page-tracking
logic from SW bits in its stage-2 PTEs to the hypervisor's vmemmap.
Signed-off-by: Quentin Perret <qperret@google.com>
---
arch/arm64/kvm/hyp/include/nvhe/memory.h | 6 +-
arch/arm64/kvm/hyp/nvhe/mem_protect.c | 94 ++++++++++++++++--------
arch/arm64/kvm/hyp/nvhe/setup.c | 7 +-
3 files changed, 71 insertions(+), 36 deletions(-)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index 88cb8ff9e769..08f3a0416d4c 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -8,7 +8,7 @@
#include <linux/types.h>
/*
- * SW bits 0-1 are reserved to track the memory ownership state of each page:
+ * Bits 0-1 are reserved to track the memory ownership state of each page:
* 00: The page is owned exclusively by the page-table owner.
* 01: The page is owned by the page-table owner, but is shared
* with another entity.
@@ -44,7 +44,9 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
struct hyp_page {
u16 refcount;
u8 order;
- u8 reserved;
+
+ /* Host (non-meta) state. Guarded by the host stage-2 lock. */
+ enum pkvm_page_state host_state : 8;
};
extern u64 __hyp_vmemmap;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index caba3e4bd09e..1595081c4f6b 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -201,8 +201,8 @@ static void *guest_s2_zalloc_page(void *mc)
memset(addr, 0, PAGE_SIZE);
p = hyp_virt_to_page(addr);
- memset(p, 0, sizeof(*p));
p->refcount = 1;
+ p->order = 0;
return addr;
}
@@ -268,6 +268,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
{
+ struct hyp_page *page;
void *addr;
/* Dump all pgtable pages in the hyp_pool */
@@ -279,7 +280,9 @@ void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
/* Drain the hyp_pool into the memcache */
addr = hyp_alloc_pages(&vm->pool, 0);
while (addr) {
- memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
+ page = hyp_virt_to_page(addr);
+ page->refcount = 0;
+ page->order = 0;
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
addr = hyp_alloc_pages(&vm->pool, 0);
@@ -382,19 +385,25 @@ bool addr_is_memory(phys_addr_t phys)
return !!find_mem_range(phys, &range);
}
-static bool addr_is_allowed_memory(phys_addr_t phys)
+static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
+{
+ return range->start <= addr && addr < range->end;
+}
+
+static int range_is_allowed_memory(u64 start, u64 end)
{
struct memblock_region *reg;
struct kvm_mem_range range;
- reg = find_mem_range(phys, &range);
+ /* Can't check the state of both MMIO and memory regions at once */
+ reg = find_mem_range(start, &range);
+ if (!is_in_mem_range(end - 1, &range))
+ return -EINVAL;
- return reg && !(reg->flags & MEMBLOCK_NOMAP);
-}
+ if (!reg || reg->flags & MEMBLOCK_NOMAP)
+ return -EPERM;
-static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
-{
- return range->start <= addr && addr < range->end;
+ return 0;
}
static bool range_is_memory(u64 start, u64 end)
@@ -454,8 +463,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
if (kvm_pte_valid(pte))
return -EAGAIN;
- if (pte)
+ if (pte) {
+ WARN_ON(addr_is_memory(addr) &&
+ !(hyp_phys_to_page(addr)->host_state & PKVM_NOPAGE));
return -EPERM;
+ }
do {
u64 granule = kvm_granule_size(level);
@@ -477,10 +489,29 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
}
+static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
+{
+ phys_addr_t end = addr + size;
+ for (; addr < end; addr += PAGE_SIZE)
+ hyp_phys_to_page(addr)->host_state = state;
+}
+
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
- return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
- addr, size, &host_s2_pool, owner_id);
+ int ret;
+
+ ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
+ addr, size, &host_s2_pool, owner_id);
+ if (ret || !addr_is_memory(addr))
+ return ret;
+
+ /* Don't forget to update the vmemmap tracking for the host */
+ if (owner_id == PKVM_ID_HOST)
+ __host_update_page_state(addr, size, PKVM_PAGE_OWNED);
+ else
+ __host_update_page_state(addr, size, PKVM_NOPAGE);
+
+ return 0;
}
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
@@ -604,35 +635,38 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
-static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
-{
- if (!addr_is_allowed_memory(addr))
- return PKVM_NOPAGE;
-
- if (!kvm_pte_valid(pte) && pte)
- return PKVM_NOPAGE;
-
- return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
-}
-
static int __host_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- struct check_walk_data d = {
- .desired = state,
- .get_page_state = host_get_page_state,
- };
+ u64 end = addr + size;
+ int ret;
+
+ ret = range_is_allowed_memory(addr, end);
+ if (ret)
+ return ret;
hyp_assert_lock_held(&host_mmu.lock);
- return check_page_state_range(&host_mmu.pgt, addr, size, &d);
+ for (; addr < end; addr += PAGE_SIZE) {
+ if (hyp_phys_to_page(addr)->host_state != state)
+ return -EPERM;
+ }
+
+ return 0;
}
static int __host_set_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
+ if (hyp_phys_to_page(addr)->host_state & PKVM_NOPAGE) {
+ int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
- return host_stage2_idmap_locked(addr, size, prot);
+ if (ret)
+ return ret;
+ }
+
+ __host_update_page_state(addr, size, state);
+
+ return 0;
}
static int host_request_owned_transition(u64 *completer_addr,
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 174007f3fadd..c315710f57ad 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -198,7 +198,6 @@ static void hpool_put_page(void *addr)
static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
- enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
phys_addr_t phys;
@@ -221,16 +220,16 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
case PKVM_PAGE_OWNED:
return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
case PKVM_PAGE_SHARED_OWNED:
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
+ hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED;
break;
case PKVM_PAGE_SHARED_BORROWED:
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+ hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED;
break;
default:
return -EINVAL;
}
- return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
+ return 0;
}
static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,
--
2.47.0.163.g1226f6d8fa-goog
next prev parent reply other threads:[~2024-11-04 13:32 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-04 13:31 [PATCH 00/18] KVM: arm64: Non-protected guest stage-2 support for pKVM Quentin Perret
2024-11-04 13:31 ` [PATCH 01/18] KVM: arm64: Change the layout of enum pkvm_page_state Quentin Perret
2024-11-04 17:31 ` Sebastian Ene
2024-11-04 17:46 ` Quentin Perret
2024-11-04 13:31 ` [PATCH 02/18] KVM: arm64: Move enum pkvm_page_state to memory.h Quentin Perret
2024-11-04 13:31 ` [PATCH 03/18] KVM: arm64: Make hyp_page::order a u8 Quentin Perret
2024-11-04 13:31 ` Quentin Perret [this message]
2024-11-04 13:31 ` [PATCH 05/18] KVM: arm64: Pass walk flags to kvm_pgtable_stage2_mkyoung Quentin Perret
2024-11-04 13:31 ` [PATCH 06/18] KVM: arm64: Pass walk flags to kvm_pgtable_stage2_relax_perms Quentin Perret
2024-11-04 13:31 ` [PATCH 07/18] KVM: arm64: Make kvm_pgtable_stage2_init() a static inline function Quentin Perret
2024-11-04 13:31 ` [PATCH 08/18] KVM: arm64: Introduce pkvm_vcpu_{load,put}() Quentin Perret
2024-11-04 13:31 ` [PATCH 09/18] KVM: arm64: Introduce {get,put}_pkvm_hyp_vm() helpers Quentin Perret
2024-11-04 13:31 ` [PATCH 10/18] KVM: arm64: Introduce __pkvm_host_share_guest() Quentin Perret
2024-11-04 13:31 ` [PATCH 11/18] KVM: arm64: Introduce __pkvm_host_unshare_guest() Quentin Perret
2024-11-04 13:31 ` [PATCH 12/18] KVM: arm64: Introduce __pkvm_host_relax_guest_perms() Quentin Perret
2024-11-04 13:31 ` [PATCH 13/18] KVM: arm64: Introduce __pkvm_host_wrprotect_guest() Quentin Perret
2024-11-04 13:32 ` [PATCH 14/18] KVM: arm64: Introduce __pkvm_host_test_clear_young_guest() Quentin Perret
2024-11-04 13:32 ` [PATCH 15/18] KVM: arm64: Introduce __pkvm_host_mkyoung_guest() Quentin Perret
2024-11-04 13:32 ` [PATCH 16/18] KVM: arm64: Introduce __pkvm_tlb_flush_vmid() Quentin Perret
2024-11-04 13:32 ` [PATCH 17/18] KVM: arm64: Introduce the EL1 pKVM MMU Quentin Perret
2024-11-06 16:58 ` Quentin Perret
2024-11-04 13:32 ` [PATCH 18/18] KVM: arm64: Plumb the pKVM MMU in KVM Quentin Perret
2024-11-05 5:53 ` kernel test robot
2024-11-05 16:07 ` Quentin Perret
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241104133204.85208-5-qperret@google.com \
--to=qperret@google.com \
--cc=catalin.marinas@arm.com \
--cc=joey.gouly@arm.com \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maz@kernel.org \
--cc=oliver.upton@linux.dev \
--cc=sebastianene@google.com \
--cc=suzuki.poulose@arm.com \
--cc=tabba@google.com \
--cc=vdonnefort@google.com \
--cc=will@kernel.org \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.