public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Lilit Janpoladyan <lilitj@amazon.com>
To: <kvm@vger.kernel.org>, <maz@kernel.org>, <oliver.upton@linux.dev>,
	<james.morse@arm.com>, <suzuki.poulose@arm.com>,
	<yuzenghui@huawei.com>, <nh-open-source@amazon.com>,
	<lilitj@amazon.com>
Subject: [PATCH 1/8] arm64: add an interface for stage-2 page tracking
Date: Wed, 18 Sep 2024 15:28:00 +0000	[thread overview]
Message-ID: <20240918152807.25135-2-lilitj@amazon.com> (raw)
In-Reply-To: <20240918152807.25135-1-lilitj@amazon.com>

Add an interface for tracking stage-2 page accesses. The interface
can be implemented by a driver for a device that has the capabilities
e.g. AWS Graviton Page Tracking Agent accelerator. When a device
implementing page_tracking_device interface is available, KVM will
use it to accelerate dirty logging. The initial version of the
interface supports dirty logging only, but the interface can be
extended to other use cases, such as a WSS calculation.

page_tracking_device supports tracking stage-2 translations by VMID
and by CPU ID. While VMID filter is required, CPU ID is optional.
CPU ID == -1 denotes any CPU. Similarly, page_tracking_device allows
getting pages logged for either a particular CPU or for all. KVM
can use CPU ID of -1 to populate dirty bitmaps and a specific
CPU ID for per vCPU dirty rings.

Signed-off-by: Lilit Janpoladyan <lilitj@amazon.com>
---
 arch/arm64/include/asm/page_tracking.h |  79 +++++++++++++
 arch/arm64/kvm/Kconfig                 |  12 ++
 arch/arm64/kvm/Makefile                |   1 +
 arch/arm64/kvm/page_tracking.c         | 158 +++++++++++++++++++++++++
 4 files changed, 250 insertions(+)
 create mode 100644 arch/arm64/include/asm/page_tracking.h
 create mode 100644 arch/arm64/kvm/page_tracking.c

diff --git a/arch/arm64/include/asm/page_tracking.h b/arch/arm64/include/asm/page_tracking.h
new file mode 100644
index 000000000000..5162fb5b648e
--- /dev/null
+++ b/arch/arm64/include/asm/page_tracking.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARM64_PAGE_TRACKING_DEVICE_H
+#define _ARM64_PAGE_TRACKING_DEVICE_H
+
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+
+/* Page tracking mode */
+enum pt_mode {
+	dirty_pages,
+};
+
+/* Configuration of a per-VM page tracker */
+struct pt_config {
+	enum pt_mode mode; /* Tracking mode */
+	u32 vmid;	/* VMID to track */
+};
+
+/* Interface provided by the page tracking device */
+struct page_tracking_device {
+
+	/* Allocates a per-VM tracker, returns tracking context */
+	void* (*allocate_tracker)(struct pt_config config);
+
+	/* Releases a per-VM tracker */
+	int (*release_tracker)(void *ctx);
+
+	/*
+	 * Enables tracking for the specified @ctx and the specified @cpu,
+	 * @cpu = -1 enables tracking for all cpus
+	 *
+	 * The function may be called for the same @ctx and @cpu multiple
+	 * times and the implementation has to do reference counting to
+	 * correctly disable the tracking.
+	 * @returns 0 on success, negative errno in case of a failure
+	 */
+	int (*enable_tracking)(void *ctx, int cpu);
+
+	/*
+	 * Disables tracking for the @ctx
+	 *
+	 * Does actually disable the tracking of the @ctx and the @cpu only
+	 * when the number of disable and enable calls matches, i.e. when the
+	 * reference counter is at 0. @returns 0 in this case, -EBUSY while
+	 * reference counter > 0 and negative errno in case of a failure
+	 */
+	int (*disable_tracking)(void *ctx, int cpu);
+
+	/*
+	 * Flushes any tracking data available for the @ctx,
+	 * @returns 0 on success, negative errno in case of a failure
+	 */
+	int (*flush)(void *ctx);
+
+	/*
+	 * Reads up to @max dirty pages available for the @ctx
+	 * In case @cpu id is not -1, reads only pages dirtied by the specified cpu
+	 * @returns number of read pages and -errno in case of a failure
+	 */
+	int (*read_dirty_pages)(void *ctx,
+				int cpu,
+				gpa_t *pages,
+				u32 max);
+};
+
+/* Page tracking device tear-down, bring-up and existence checks */
+void page_tracking_device_unregister(struct page_tracking_device *pt_dev);
+int page_tracking_device_register(struct page_tracking_device *pt_dev);
+int page_tracking_device_registered(void);
+
+/* Page tracking device wrappers */
+void *page_tracking_allocate(struct pt_config config);
+int page_tracking_release(void *ctx);
+int page_tracking_enable(void *ctx, int cpu);
+int page_tracking_disable(void *ctx, int cpu);
+int page_tracking_flush(void *ctx);
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max);
+
+#endif /*_ARM64_PAGE_TRACKNG_DEVICE_H */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8304eb342be9..33844658279b 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -66,4 +66,16 @@ config PROTECTED_NVHE_STACKTRACE
 
 	  If unsure, or not using protected nVHE (pKVM), say N.
 
+config HAVE_KVM_PAGE_TRACKING_DEVICE
+	bool "Support for hardware accelerated dirty tracking"
+	default n
+	help
+	  Say Y to enable hardware accelerated dirty tracking
+
+	  Adds support for hardware accelerated dirty tracking during live
+	  migration of a virtual machine. Requires a hardware accelerator.
+
+	  If there is no required hardware, say N.
+
+
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 86a629aaf0a1..4e4f5c63baf2 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -18,6 +18,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
 	 guest.o debug.o reset.o sys_regs.o stacktrace.o \
 	 vgic-sys-reg-v3.o fpsimd.o pkvm.o \
 	 arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
+	 page_tracking.o \
 	 vgic/vgic.o vgic/vgic-init.o \
 	 vgic/vgic-irqfd.o vgic/vgic-v2.o \
 	 vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/page_tracking.c b/arch/arm64/kvm/page_tracking.c
new file mode 100644
index 000000000000..a81c917d4faa
--- /dev/null
+++ b/arch/arm64/kvm/page_tracking.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/page_tracking.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+
+#ifndef CONFIG_HAVE_KVM_PAGE_TRACKING_DEVICE
+
+int page_tracking_device_register(struct page_tracking_device *dev) { return 0; }
+void page_tracking_device_unregister(struct page_tracking_device *dev) {}
+int page_tracking_device_registered(void) { return 0; }
+void *page_tracking_allocate(struct pt_config config) { return NULL; }
+int page_tracking_release(void *ctx) { return 0; }
+int page_tracking_enable(void *ctx, int cpu) { return 0; }
+int page_tracking_disable(void *ctx, int cpu) { return 0; }
+int page_tracking_flush(void *ctx) { return 0; }
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max) { return 0; }
+
+#else
+
+static DEFINE_MUTEX(page_tracking_device_mutex);
+static struct page_tracking_device __rcu *pt_dev __read_mostly;
+
+int page_tracking_device_register(struct page_tracking_device *dev)
+{
+	int rc = 0;
+
+	mutex_lock(&page_tracking_device_mutex);
+
+	if (rcu_dereference_protected(pt_dev, lockdep_is_held(&page_tracking_device_mutex))) {
+		rc = -EBUSY;
+		goto out;
+	}
+	rcu_assign_pointer(pt_dev, dev);
+out:
+	mutex_unlock(&page_tracking_device_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_register);
+
+void page_tracking_device_unregister(struct page_tracking_device *dev)
+{
+	mutex_lock(&page_tracking_device_mutex);
+
+	if (dev == rcu_dereference_protected(pt_dev,
+					     lockdep_is_held(&page_tracking_device_mutex))) {
+		/* Disable page tracking device */
+		RCU_INIT_POINTER(pt_dev, NULL);
+		synchronize_rcu();
+	}
+	mutex_unlock(&page_tracking_device_mutex);
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_unregister);
+
+int page_tracking_device_registered(void)
+{
+	bool registered;
+
+	rcu_read_lock();
+	registered = (rcu_dereference(pt_dev) != NULL);
+	rcu_read_unlock();
+	return registered;
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_registered);
+
+/* Allocates a per-VM tracker, returns tracking context */
+void *page_tracking_allocate(struct pt_config config)
+{
+	struct page_tracking_device *dev;
+	void *ctx = NULL;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		ctx = dev->allocate_tracker(config);
+	rcu_read_unlock();
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(page_tracking_allocate);
+
+/* Releases a per-VM tracker */
+int page_tracking_release(void *ctx)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->release_tracker(ctx);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_release);
+
+/* Enables tracking for the specified @ctx and @cpu (-1 for all cpus) */
+int page_tracking_enable(void *ctx, int cpu)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->enable_tracking(ctx, cpu);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_enable);
+
+/* Disables tracking for the @ctx and @cpu */
+int page_tracking_disable(void *ctx, int cpu)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->disable_tracking(ctx, cpu);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_disable);
+
+/* Flushes any available data */
+int page_tracking_flush(void *ctx)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->flush(ctx);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_flush);
+
+/*
+ * Reads up to @max dirty pages available for the @ctx and @cpu (-1 for all cpus)
+ * @returns number of read pages and -errno in case of error
+ */
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->read_dirty_pages(ctx, cpu, pages, max);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_read_dirty_pages);
+
+#endif
-- 
2.40.1


  reply	other threads:[~2024-09-18 15:28 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-18 15:27 [PATCH 0/8] *** RFC: ARM KVM dirty tracking device *** Lilit Janpoladyan
2024-09-18 15:28 ` Lilit Janpoladyan [this message]
2024-09-18 15:28 ` [PATCH 2/8] KVM: arm64: add page tracking device as a capability Lilit Janpoladyan
2024-09-18 15:28 ` [PATCH 3/8] KVM: arm64: use page tracking interface to enable dirty logging Lilit Janpoladyan
2024-09-22  7:31   ` Sean Christopherson
2024-09-18 15:28 ` [PATCH 4/8] KVM: return value from kvm_arch_sync_dirty_log Lilit Janpoladyan
2024-09-19  1:50   ` kernel test robot
2024-09-19  2:32   ` kernel test robot
2024-09-18 15:28 ` [PATCH 5/8] KVM: arm64: get dirty pages from the page tracking device Lilit Janpoladyan
2024-09-18 15:28 ` [PATCH 6/8] KVM: arm64: flush dirty logging data Lilit Janpoladyan
2024-09-18 15:28 ` [PATCH 7/8] KVM: arm64: enable hardware dirty state management for stage-2 Lilit Janpoladyan
2024-09-18 15:28 ` [PATCH 8/8] KVM: arm64: make hardware manage dirty state after write faults Lilit Janpoladyan
2024-09-19  9:11 ` [PATCH 0/8] *** RFC: ARM KVM dirty tracking device *** Oliver Upton
2024-09-20 10:12   ` Janpoladyan, Lilit
2024-09-26 10:00   ` David Woodhouse
2024-09-30 17:33     ` Oliver Upton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240918152807.25135-2-lilitj@amazon.com \
    --to=lilitj@amazon.com \
    --cc=james.morse@arm.com \
    --cc=kvm@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=nh-open-source@amazon.com \
    --cc=oliver.upton@linux.dev \
    --cc=suzuki.poulose@arm.com \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox