From: Yi-De Wu <yi-de.wu@mediatek.com>
To: Yingshiuan Pan <yingshiuan.pan@mediatek.com>,
Ze-Yu Wang <ze-yu.wang@mediatek.com>,
Yi-De Wu <yi-de.wu@mediatek.com>,
Rob Herring <robh+dt@kernel.org>,
Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>,
Conor Dooley <conor+dt@kernel.org>,
Jonathan Corbet <corbet@lwn.net>,
Catalin Marinas <catalin.marinas@arm.com>,
Wihl Deacon <will@kernel.org>,
Steven Rostedt <rostedt@goodmis.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Richard Cochran <richardcochran@gmail.com>,
Matthias Brugger <matthias.bgg@gmail.com>,
AngeloGioacchino Del Regno
<angelogioacchino.delregno@collabora.com>
Cc: <devicetree@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
<linux-doc@vger.kernel.org>,
<linux-arm-kernel@lists.infradead.org>,
<linux-trace-kernel@vger.kernel.org>, <netdev@vger.kernel.org>,
<linux-mediatek@lists.infradead.org>,
David Bradil <dbrazdil@google.com>,
Trilok Soni <quic_tsoni@quicinc.com>,
Jade Shih <jades.shih@mediatek.com>,
Ivan Tseng <ivan.tseng@mediatek.com>,
My Chuang <my.chuang@mediatek.com>,
Shawn Hsiao <shawn.hsiao@mediatek.com>,
PeiLun Suei <peilun.suei@mediatek.com>,
Liju Chen <liju-clr.chen@mediatek.com>,
Willix Yeh <chi-shen.yeh@mediatek.com>,
Kevenny Hsieh <kevenny.hsieh@mediatek.com>
Subject: [PATCH v8 16/20] virt: geniezone: Add memory pin/unpin support
Date: Thu, 28 Dec 2023 18:51:43 +0800 [thread overview]
Message-ID: <20231228105147.13752-17-yi-de.wu@mediatek.com> (raw)
In-Reply-To: <20231228105147.13752-1-yi-de.wu@mediatek.com>
From: "Jerry Wang" <ze-yu.wang@mediatek.com>
Protected VM's memory cannot be swapped out because the memory pages are
protected from host access.
Once host accesses to those protected pages, the hardware exception is
triggered and may crash the host. So, we have to make those protected
pages be ineligible for swapping or merging by the host kernel to avoid
host access. To do so, we pin the page when it is assigned (donated) to
VM and unpin when VM relinquish the pages or is destroyed. Besides, the
protected VM’s memory requires hypervisor to clear the content before
returning to host, but VMM may free those memory before clearing, it
will result in those memory pages are reclaimed and reused before
totally clearing. Using pin/unpin can also avoid the above problems.
The implementation is described as follows.
- Use rb_tree to store pinned memory pages.
- Pin the page when handling page fault.
- Unpin the pages when VM relinquish the pages or is destroyed.
Signed-off-by: Jerry Wang <ze-yu.wang@mediatek.com>
Signed-off-by: Yingshiuan Pan <yingshiuan.pan@mediatek.com>
Signed-off-by: Liju Chen <liju-clr.chen@mediatek.com>
Signed-off-by: Yi-De Wu <yi-de.wu@mediatek.com>
---
drivers/virt/geniezone/gzvm_mmu.c | 74 ++++++++++++++++++++++++++++++-
drivers/virt/geniezone/gzvm_vm.c | 21 +++++++++
include/linux/gzvm_drv.h | 12 +++++
3 files changed, 106 insertions(+), 1 deletion(-)
diff --git a/drivers/virt/geniezone/gzvm_mmu.c b/drivers/virt/geniezone/gzvm_mmu.c
index 1f8471509be2..857cc68f76dc 100644
--- a/drivers/virt/geniezone/gzvm_mmu.c
+++ b/drivers/virt/geniezone/gzvm_mmu.c
@@ -107,8 +107,73 @@ int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn,
return 0;
}
+static int cmp_ppages(struct rb_node *node, const struct rb_node *parent)
+{
+ struct gzvm_pinned_page *a = container_of(node,
+ struct gzvm_pinned_page,
+ node);
+ struct gzvm_pinned_page *b = container_of(parent,
+ struct gzvm_pinned_page,
+ node);
+
+ if (a->ipa < b->ipa)
+ return -1;
+ if (a->ipa > b->ipa)
+ return 1;
+ return 0;
+}
+
+static int rb_ppage_cmp(const void *key, const struct rb_node *node)
+{
+ struct gzvm_pinned_page *p = container_of(node,
+ struct gzvm_pinned_page,
+ node);
+ phys_addr_t ipa = (phys_addr_t)key;
+
+ return (ipa < p->ipa) ? -1 : (ipa > p->ipa);
+}
+
+/* Invoker of this function is responsible for locking */
+static int gzvm_insert_ppage(struct gzvm *vm, struct gzvm_pinned_page *ppage)
+{
+ if (rb_find_add(&ppage->node, &vm->pinned_pages, cmp_ppages))
+ return -EEXIST;
+ return 0;
+}
+
+static int pin_one_page(struct gzvm *vm, unsigned long hva, u64 gpa)
+{
+ unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
+ struct gzvm_pinned_page *ppage = NULL;
+ struct mm_struct *mm = current->mm;
+ struct page *page = NULL;
+
+ ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT);
+ if (!ppage)
+ return -ENOMEM;
+
+ mmap_read_lock(mm);
+ pin_user_pages(hva, 1, flags, &page);
+ mmap_read_unlock(mm);
+
+ if (!page) {
+ kfree(ppage);
+ return -EFAULT;
+ }
+
+ ppage->page = page;
+ ppage->ipa = gpa;
+
+ mutex_lock(&vm->mem_lock);
+ gzvm_insert_ppage(vm, ppage);
+ mutex_unlock(&vm->mem_lock);
+
+ return 0;
+}
+
static int handle_block_demand_page(struct gzvm *vm, int memslot_id, u64 gfn)
{
+ unsigned long hva;
u64 pfn, __gfn;
int ret, i;
@@ -131,6 +196,11 @@ static int handle_block_demand_page(struct gzvm *vm, int memslot_id, u64 gfn)
goto err_unlock;
}
vm->demand_page_buffer[i] = pfn;
+
+ hva = gzvm_gfn_to_hva_memslot(&vm->memslot[memslot_id], __gfn);
+ ret = pin_one_page(vm, hva, PFN_PHYS(__gfn));
+ if (ret)
+ goto err_unlock;
}
ret = gzvm_arch_map_guest_block(vm->vm_id, memslot_id, start_gfn,
@@ -148,6 +218,7 @@ static int handle_block_demand_page(struct gzvm *vm, int memslot_id, u64 gfn)
static int handle_single_demand_page(struct gzvm *vm, int memslot_id, u64 gfn)
{
+ unsigned long hva;
int ret;
u64 pfn;
@@ -159,7 +230,8 @@ static int handle_single_demand_page(struct gzvm *vm, int memslot_id, u64 gfn)
if (unlikely(ret))
return -EFAULT;
- return 0;
+ hva = gzvm_gfn_to_hva_memslot(&vm->memslot[memslot_id], gfn);
+ return pin_one_page(vm, hva, PFN_PHYS(gfn));
}
/**
diff --git a/drivers/virt/geniezone/gzvm_vm.c b/drivers/virt/geniezone/gzvm_vm.c
index 485d1e2097aa..1af78f161eb3 100644
--- a/drivers/virt/geniezone/gzvm_vm.c
+++ b/drivers/virt/geniezone/gzvm_vm.c
@@ -292,6 +292,22 @@ static long gzvm_vm_ioctl(struct file *filp, unsigned int ioctl,
return ret;
}
+/* Invoker of this function is responsible for locking */
+static void gzvm_destroy_all_ppage(struct gzvm *gzvm)
+{
+ struct gzvm_pinned_page *ppage;
+ struct rb_node *node;
+
+ node = rb_first(&gzvm->pinned_pages);
+ while (node) {
+ ppage = rb_entry(node, struct gzvm_pinned_page, node);
+ unpin_user_pages_dirty_lock(&ppage->page, 1, true);
+ node = rb_next(node);
+ rb_erase(&ppage->node, &gzvm->pinned_pages);
+ kfree(ppage);
+ }
+}
+
static void gzvm_destroy_vm(struct gzvm *gzvm)
{
size_t allocated_size;
@@ -315,6 +331,9 @@ static void gzvm_destroy_vm(struct gzvm *gzvm)
mutex_unlock(&gzvm->lock);
+ /* No need to lock here becauese it's single-threaded execution */
+ gzvm_destroy_all_ppage(gzvm);
+
kfree(gzvm);
}
@@ -390,6 +409,8 @@ static struct gzvm *gzvm_create_vm(unsigned long vm_type)
gzvm->vm_id = ret;
gzvm->mm = current->mm;
mutex_init(&gzvm->lock);
+ mutex_init(&gzvm->mem_lock);
+ gzvm->pinned_pages = RB_ROOT;
ret = gzvm_vm_irqfd_init(gzvm);
if (ret) {
diff --git a/include/linux/gzvm_drv.h b/include/linux/gzvm_drv.h
index 0a8b21e1ed1a..116d1d7c1562 100644
--- a/include/linux/gzvm_drv.h
+++ b/include/linux/gzvm_drv.h
@@ -12,6 +12,7 @@
#include <linux/mutex.h>
#include <linux/gzvm.h>
#include <linux/srcu.h>
+#include <linux/rbtree.h>
/*
* For the normal physical address, the highest 12 bits should be zero, so we
@@ -82,6 +83,12 @@ struct gzvm_vcpu {
struct gzvm_vcpu_hwstate *hwstate;
};
+struct gzvm_pinned_page {
+ struct rb_node node;
+ struct page *page;
+ u64 ipa;
+};
+
struct gzvm {
struct gzvm_vcpu *vcpus[GZVM_MAX_VCPUS];
/* userspace tied to this vm */
@@ -121,6 +128,11 @@ struct gzvm {
* at the same time
*/
struct mutex demand_paging_lock;
+
+ /* Use rb-tree to record pin/unpin page */
+ struct rb_root pinned_pages;
+ /* lock for memory operations */
+ struct mutex mem_lock;
};
long gzvm_dev_ioctl_check_extension(struct gzvm *gzvm, unsigned long args);
--
2.18.0
next prev parent reply other threads:[~2023-12-28 10:52 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-28 10:51 [PATCH v8 00/20] GenieZone hypervisor drivers Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 01/20] docs: geniezone: Introduce GenieZone hypervisor Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 02/20] dt-bindings: hypervisor: Add MediaTek " Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 03/20] virt: geniezone: Add GenieZone hypervisor driver Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 04/20] virt: geniezone: Add vm support Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 05/20] virt: geniezone: Add set_user_memory_region for vm Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 06/20] virt: geniezone: Add vm capability check Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 07/20] virt: geniezone: Optimize performance of protected VM memory Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 08/20] virt: geniezone: Add vcpu support Yi-De Wu
2024-01-04 17:52 ` Simon Horman
2023-12-28 10:51 ` [PATCH v8 09/20] virt: geniezone: Add irqchip support for virtual interrupt injection Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 10/20] virt: geniezone: Add irqfd support Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 11/20] virt: geniezone: Add ioeventfd support Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 12/20] virt: geniezone: Add memory region support Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 13/20] virt: geniezone: Add dtb config support Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 14/20] virt: geniezone: Add demand paging support Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 15/20] virt: geniezone: Add block-based " Yi-De Wu
2023-12-28 10:51 ` Yi-De Wu [this message]
2023-12-28 10:51 ` [PATCH v8 17/20] virt: geniezone: Add memory relinquish support Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 18/20] virt: geniezone: Provide individual VM memory statistics within debugfs Yi-De Wu
2023-12-28 10:51 ` [PATCH v8 19/20] virt: geniezone: Add tracing support for hyp call and vcpu exit_reason Yi-De Wu
2024-03-28 15:40 ` Steven Rostedt
2023-12-28 10:51 ` [PATCH v8 20/20] virt: geniezone: Enable PTP for synchronizing time between host and guest VMs Yi-De Wu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231228105147.13752-17-yi-de.wu@mediatek.com \
--to=yi-de.wu@mediatek.com \
--cc=angelogioacchino.delregno@collabora.com \
--cc=catalin.marinas@arm.com \
--cc=chi-shen.yeh@mediatek.com \
--cc=conor+dt@kernel.org \
--cc=corbet@lwn.net \
--cc=dbrazdil@google.com \
--cc=devicetree@vger.kernel.org \
--cc=ivan.tseng@mediatek.com \
--cc=jades.shih@mediatek.com \
--cc=kevenny.hsieh@mediatek.com \
--cc=krzysztof.kozlowski+dt@linaro.org \
--cc=liju-clr.chen@mediatek.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mediatek@lists.infradead.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=matthias.bgg@gmail.com \
--cc=mhiramat@kernel.org \
--cc=my.chuang@mediatek.com \
--cc=netdev@vger.kernel.org \
--cc=peilun.suei@mediatek.com \
--cc=quic_tsoni@quicinc.com \
--cc=richardcochran@gmail.com \
--cc=robh+dt@kernel.org \
--cc=rostedt@goodmis.org \
--cc=shawn.hsiao@mediatek.com \
--cc=will@kernel.org \
--cc=yingshiuan.pan@mediatek.com \
--cc=ze-yu.wang@mediatek.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).