public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Yan Zhao <yan.y.zhao@intel.com>
To: iommu@lists.linux.dev, kvm@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: alex.williamson@redhat.com, jgg@nvidia.com, pbonzini@redhat.com,
	seanjc@google.com, joro@8bytes.org, will@kernel.org,
	robin.murphy@arm.com, kevin.tian@intel.com,
	baolu.lu@linux.intel.com, dwmw2@infradead.org,
	yi.l.liu@intel.com, Yan Zhao <yan.y.zhao@intel.com>
Subject: [RFC PATCH 04/42] KVM: Skeleton of KVM TDP FD object
Date: Sat,  2 Dec 2023 17:16:15 +0800	[thread overview]
Message-ID: <20231202091615.13643-1-yan.y.zhao@intel.com> (raw)
In-Reply-To: <20231202091211.13376-1-yan.y.zhao@intel.com>

This is a skeleton implementation of KVM TDP FD object.
The KVM TDP FD object is created by ioctl KVM_CREATE_TDP_FD in
kvm_create_tdp_fd(), which contains

Public part (defined in <linux/kvm_tdp_fd.h>):
- A file object for reference count
  file reference count is 1 on creating KVM TDP FD object.
  On the reference count of the file object goes to 0, its .release()
  handler will destroy the KVM TDP FD object.
- ops kvm_exported_tdp_ops (empty implementation in this patch).

Private part (kvm_exported_tdp object defined in this patch) :
  The kvm_exported_tdp object is linked in kvm->exported_tdp_list, one for
  each KVM address space. It records address space id, and "kvm" pointer
  for TDP FD object, and KVM VM ref is hold during object life cycle.
  In later patches, this kvm_exported_tdp object will be associated to a
  TDP page table exported by KVM.

Two symbols kvm_tdp_fd_get() and kvm_tdp_fd_put() are implemented and
exported to external components to get/put KVM TDP FD object.

Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
 include/linux/kvm_host.h |  18 ++++
 virt/kvm/Kconfig         |   3 +
 virt/kvm/Makefile.kvm    |   1 +
 virt/kvm/kvm_main.c      |   5 +
 virt/kvm/tdp_fd.c        | 208 +++++++++++++++++++++++++++++++++++++++
 virt/kvm/tdp_fd.h        |   5 +
 6 files changed, 240 insertions(+)
 create mode 100644 virt/kvm/tdp_fd.c

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4944136efaa22..122f47c94ecae 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -44,6 +44,7 @@
 
 #include <asm/kvm_host.h>
 #include <linux/kvm_dirty_ring.h>
+#include <linux/kvm_tdp_fd.h>
 
 #ifndef KVM_MAX_VCPU_IDS
 #define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS
@@ -808,6 +809,11 @@ struct kvm {
 	struct notifier_block pm_notifier;
 #endif
 	char stats_id[KVM_STATS_NAME_SIZE];
+
+#ifdef CONFIG_HAVE_KVM_EXPORTED_TDP
+	struct list_head exported_tdp_list;
+	spinlock_t exported_tdplist_lock;
+#endif
 };
 
 #define kvm_err(fmt, ...) \
@@ -2318,4 +2324,16 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr)
 /* Max number of entries allowed for each kvm dirty ring */
 #define  KVM_DIRTY_RING_MAX_ENTRIES  65536
 
+#ifdef CONFIG_HAVE_KVM_EXPORTED_TDP
+
+struct kvm_exported_tdp {
+	struct kvm_tdp_fd *tdp_fd;
+
+	struct kvm *kvm;
+	u32 as_id;
+	/* head at kvm->exported_tdp_list */
+	struct list_head list_node;
+};
+
+#endif /* CONFIG_HAVE_KVM_EXPORTED_TDP */
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 484d0873061ca..63b5d55c84e95 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -92,3 +92,6 @@ config HAVE_KVM_PM_NOTIFIER
 
 config KVM_GENERIC_HARDWARE_ENABLING
        bool
+
+config HAVE_KVM_EXPORTED_TDP
+       bool
diff --git a/virt/kvm/Makefile.kvm b/virt/kvm/Makefile.kvm
index 2c27d5d0c367c..fad4638e407c5 100644
--- a/virt/kvm/Makefile.kvm
+++ b/virt/kvm/Makefile.kvm
@@ -12,3 +12,4 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
 kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
 kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o
 kvm-$(CONFIG_HAVE_KVM_PFNCACHE) += $(KVM)/pfncache.o
+kvm-$(CONFIG_HAVE_KVM_EXPORTED_TDP) += $(KVM)/tdp_fd.o
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 494b6301a6065..9fa9132055807 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1232,6 +1232,11 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
+#ifdef CONFIG_HAVE_KVM_EXPORTED_TDP
+	INIT_LIST_HEAD(&kvm->exported_tdp_list);
+	spin_lock_init(&kvm->exported_tdplist_lock);
+#endif
+
 	r = kvm_init_mmu_notifier(kvm);
 	if (r)
 		goto out_err_no_mmu_notifier;
diff --git a/virt/kvm/tdp_fd.c b/virt/kvm/tdp_fd.c
new file mode 100644
index 0000000000000..a5c4c3597e94f
--- /dev/null
+++ b/virt/kvm/tdp_fd.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM TDP FD
+ *
+ */
+#include <linux/anon_inodes.h>
+#include <uapi/linux/kvm.h>
+#include <linux/kvm_host.h>
+
+#include "tdp_fd.h"
+
+static inline int is_tdp_fd_file(struct file *file);
+static const struct file_operations kvm_tdp_fd_fops;
+static const struct kvm_exported_tdp_ops exported_tdp_ops;
+
+int kvm_create_tdp_fd(struct kvm *kvm, struct kvm_create_tdp_fd *ct)
+{
+	struct kvm_exported_tdp *tdp;
+	struct kvm_tdp_fd *tdp_fd;
+	int as_id = ct->as_id;
+	int ret, fd;
+
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || ct->pad || ct->mode)
+		return -EINVAL;
+
+	/* for each address space, only one exported tdp is allowed */
+	spin_lock(&kvm->exported_tdplist_lock);
+	list_for_each_entry(tdp, &kvm->exported_tdp_list, list_node) {
+		if (tdp->as_id != as_id)
+			continue;
+
+		spin_unlock(&kvm->exported_tdplist_lock);
+		return -EEXIST;
+	}
+	spin_unlock(&kvm->exported_tdplist_lock);
+
+	tdp_fd = kzalloc(sizeof(*tdp_fd), GFP_KERNEL_ACCOUNT);
+	if (!tdp)
+		return -ENOMEM;
+
+	tdp = kzalloc(sizeof(*tdp), GFP_KERNEL_ACCOUNT);
+	if (!tdp) {
+		kfree(tdp_fd);
+		return -ENOMEM;
+	}
+	tdp_fd->priv = tdp;
+	tdp->tdp_fd = tdp_fd;
+	tdp->as_id = as_id;
+
+	if (!kvm_get_kvm_safe(kvm)) {
+		ret = -ENODEV;
+		goto out;
+	}
+	tdp->kvm = kvm;
+
+	tdp_fd->file = anon_inode_getfile("tdp_fd", &kvm_tdp_fd_fops,
+					tdp_fd, O_RDWR | O_CLOEXEC);
+	if (!tdp_fd->file) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0)
+		goto out;
+
+	fd_install(fd, tdp_fd->file);
+	ct->fd = fd;
+	tdp_fd->ops = &exported_tdp_ops;
+
+	spin_lock(&kvm->exported_tdplist_lock);
+	list_add(&tdp->list_node, &kvm->exported_tdp_list);
+	spin_unlock(&kvm->exported_tdplist_lock);
+	return 0;
+
+out:
+	if (tdp_fd->file)
+		fput(tdp_fd->file);
+
+	if (tdp->kvm)
+		kvm_put_kvm_no_destroy(tdp->kvm);
+	kfree(tdp);
+	kfree(tdp_fd);
+	return ret;
+}
+
+static int kvm_tdp_fd_release(struct inode *inode, struct file *file)
+{
+	struct kvm_exported_tdp *tdp;
+	struct kvm_tdp_fd *tdp_fd;
+
+	if (!is_tdp_fd_file(file))
+		return -EINVAL;
+
+	tdp_fd = file->private_data;
+	tdp = tdp_fd->priv;
+
+	if (WARN_ON(!tdp || !tdp->kvm))
+		return -EFAULT;
+
+	spin_lock(&tdp->kvm->exported_tdplist_lock);
+	list_del(&tdp->list_node);
+	spin_unlock(&tdp->kvm->exported_tdplist_lock);
+
+	kvm_put_kvm(tdp->kvm);
+	kfree(tdp);
+	kfree(tdp_fd);
+	return 0;
+}
+
+static long kvm_tdp_fd_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	/* Do not support ioctl currently. May add it in future */
+	return -ENODEV;
+}
+
+static int kvm_tdp_fd_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	return -ENODEV;
+}
+
+static const struct file_operations kvm_tdp_fd_fops = {
+	.unlocked_ioctl = kvm_tdp_fd_ioctl,
+	.compat_ioctl   = compat_ptr_ioctl,
+	.release = kvm_tdp_fd_release,
+	.mmap = kvm_tdp_fd_mmap,
+};
+
+static inline int is_tdp_fd_file(struct file *file)
+{
+	return file->f_op == &kvm_tdp_fd_fops;
+}
+
+static int kvm_tdp_register_importer(struct kvm_tdp_fd *tdp_fd,
+				     struct kvm_tdp_importer_ops *ops, void *data)
+{
+	return -EOPNOTSUPP;
+}
+
+static void kvm_tdp_unregister_importer(struct kvm_tdp_fd *tdp_fd,
+					struct kvm_tdp_importer_ops *ops)
+{
+}
+
+static void *kvm_tdp_get_metadata(struct kvm_tdp_fd *tdp_fd)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static int kvm_tdp_fault(struct kvm_tdp_fd *tdp_fd, struct mm_struct *mm,
+			 unsigned long gfn, struct kvm_tdp_fault_type type)
+{
+	return -EOPNOTSUPP;
+}
+
+static const struct kvm_exported_tdp_ops exported_tdp_ops = {
+	.register_importer = kvm_tdp_register_importer,
+	.unregister_importer = kvm_tdp_unregister_importer,
+	.get_metadata = kvm_tdp_get_metadata,
+	.fault = kvm_tdp_fault,
+};
+
+/**
+ * kvm_tdp_fd_get - Public interface to get KVM TDP FD object.
+ *
+ * @fd:      fd of the KVM TDP FD object.
+ * @return:  KVM TDP FD object if @fd corresponds to a valid KVM TDP FD file.
+ *           -EBADF if @fd does not correspond a struct file.
+ *           -EINVAL if @fd does not correspond to a KVM TDP FD file.
+ *
+ * Callers of this interface will get a KVM TDP FD object with ref count
+ * increased.
+ */
+struct kvm_tdp_fd *kvm_tdp_fd_get(int fd)
+{
+	struct file *file;
+
+	file = fget(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
+
+	if (!is_tdp_fd_file(file)) {
+		fput(file);
+		return ERR_PTR(-EINVAL);
+	}
+	return file->private_data;
+}
+EXPORT_SYMBOL_GPL(kvm_tdp_fd_get);
+
+/**
+ * kvm_tdp_fd_put - Public interface to put ref count of a KVM TDP FD object.
+ *
+ * @tdp_fd:  KVM TDP FD object.
+ *
+ * Put reference count of the KVM TDP FD object.
+ * After the last reference count of the TDP fd goes away,
+ * kvm_tdp_fd_release() will be called to decrease KVM VM ref count and destroy
+ * the KVM TDP FD object.
+ */
+void kvm_tdp_fd_put(struct kvm_tdp_fd *tdp_fd)
+{
+	if (WARN_ON(!tdp_fd || !tdp_fd->file || !is_tdp_fd_file(tdp_fd->file)))
+		return;
+
+	fput(tdp_fd->file);
+}
+EXPORT_SYMBOL_GPL(kvm_tdp_fd_put);
diff --git a/virt/kvm/tdp_fd.h b/virt/kvm/tdp_fd.h
index 05c8a6d767469..85da9d8cc1ce4 100644
--- a/virt/kvm/tdp_fd.h
+++ b/virt/kvm/tdp_fd.h
@@ -2,9 +2,14 @@
 #ifndef __TDP_FD_H
 #define __TDP_FD_H
 
+#ifdef CONFIG_HAVE_KVM_EXPORTED_TDP
+int kvm_create_tdp_fd(struct kvm *kvm, struct kvm_create_tdp_fd *ct);
+
+#else
 static inline int kvm_create_tdp_fd(struct kvm *kvm, struct kvm_create_tdp_fd *ct)
 {
 	return -EOPNOTSUPP;
 }
+#endif /* CONFIG_HAVE_KVM_EXPORTED_TDP */
 
 #endif /* __TDP_FD_H */
-- 
2.17.1


  parent reply	other threads:[~2023-12-02  9:45 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-02  9:12 [RFC PATCH 00/42] Sharing KVM TDP to IOMMU Yan Zhao
2023-12-02  9:13 ` [RFC PATCH 01/42] KVM: Public header for KVM to export TDP Yan Zhao
2023-12-02  9:15 ` [RFC PATCH 02/42] KVM: x86: Arch header for kvm to export TDP for Intel Yan Zhao
2023-12-02  9:15 ` [RFC PATCH 03/42] KVM: Introduce VM ioctl KVM_CREATE_TDP_FD Yan Zhao
2023-12-02  9:16 ` Yan Zhao [this message]
2023-12-02  9:16 ` [RFC PATCH 05/42] KVM: Embed "arch" object and call arch init/destroy in TDP FD Yan Zhao
2023-12-02  9:17 ` [RFC PATCH 06/42] KVM: Register/Unregister importers to KVM exported TDP Yan Zhao
2023-12-02  9:18 ` [RFC PATCH 07/42] KVM: Forward page fault requests to arch specific code for " Yan Zhao
2023-12-02  9:18 ` [RFC PATCH 08/42] KVM: Add a helper to notify importers that KVM exported TDP is flushed Yan Zhao
2023-12-02  9:19 ` [RFC PATCH 09/42] iommu: Add IOMMU_DOMAIN_KVM Yan Zhao
2023-12-02  9:20 ` [RFC PATCH 10/42] iommu: Add new iommu op to create domains managed by KVM Yan Zhao
2023-12-04 15:09   ` Jason Gunthorpe
2023-12-02  9:20 ` [RFC PATCH 11/42] iommu: Add new domain op cache_invalidate_kvm Yan Zhao
2023-12-04 15:09   ` Jason Gunthorpe
2023-12-05  6:40     ` Yan Zhao
2023-12-05 14:52       ` Jason Gunthorpe
2023-12-06  1:00         ` Yan Zhao
2023-12-02  9:21 ` [RFC PATCH 12/42] iommufd: Introduce allocation data info and flag for KVM managed HWPT Yan Zhao
2023-12-04 18:29   ` Jason Gunthorpe
2023-12-05  7:08     ` Yan Zhao
2023-12-05 14:53       ` Jason Gunthorpe
2023-12-06  0:58         ` Yan Zhao
2023-12-02  9:21 ` [RFC PATCH 13/42] iommufd: Add a KVM HW pagetable object Yan Zhao
2023-12-02  9:22 ` [RFC PATCH 14/42] iommufd: Enable KVM HW page table object to be proxy between KVM and IOMMU Yan Zhao
2023-12-04 18:34   ` Jason Gunthorpe
2023-12-05  7:09     ` Yan Zhao
2023-12-02  9:22 ` [RFC PATCH 15/42] iommufd: Add iopf handler to KVM hw pagetable Yan Zhao
2023-12-02  9:23 ` [RFC PATCH 16/42] iommufd: Enable device feature IOPF during device attachment to KVM HWPT Yan Zhao
2023-12-04 18:36   ` Jason Gunthorpe
2023-12-05  7:14     ` Yan Zhao
2023-12-05 14:53       ` Jason Gunthorpe
2023-12-06  0:55         ` Yan Zhao
2023-12-02  9:23 ` [RFC PATCH 17/42] iommu/vt-d: Make some macros and helpers to be extern Yan Zhao
2023-12-02  9:24 ` [RFC PATCH 18/42] iommu/vt-d: Support of IOMMU_DOMAIN_KVM domain in Intel IOMMU Yan Zhao
2023-12-02  9:24 ` [RFC PATCH 19/42] iommu/vt-d: Set bit PGSNP in PASIDTE if domain cache coherency is enforced Yan Zhao
2023-12-02  9:25 ` [RFC PATCH 20/42] iommu/vt-d: Support attach devices to IOMMU_DOMAIN_KVM domain Yan Zhao
2023-12-02  9:26 ` [RFC PATCH 21/42] iommu/vt-d: Check reserved bits for " Yan Zhao
2023-12-02  9:26 ` [RFC PATCH 22/42] iommu/vt-d: Support cache invalidate of " Yan Zhao
2023-12-02  9:26 ` [RFC PATCH 23/42] iommu/vt-d: Allow pasid 0 in IOPF Yan Zhao
2023-12-02  9:27 ` [RFC PATCH 24/42] KVM: x86/mmu: Move bit SPTE_MMU_PRESENT from bit 11 to bit 59 Yan Zhao
2023-12-02  9:27 ` [RFC PATCH 25/42] KVM: x86/mmu: Abstract "struct kvm_mmu_common" from "struct kvm_mmu" Yan Zhao
2023-12-02  9:28 ` [RFC PATCH 26/42] KVM: x86/mmu: introduce new op get_default_mt_mask to kvm_x86_ops Yan Zhao
2023-12-02  9:28 ` [RFC PATCH 27/42] KVM: x86/mmu: change param "vcpu" to "kvm" in kvm_mmu_hugepage_adjust() Yan Zhao
2023-12-02  9:29 ` [RFC PATCH 28/42] KVM: x86/mmu: change "vcpu" to "kvm" in page_fault_handle_page_track() Yan Zhao
2023-12-02  9:29 ` [RFC PATCH 29/42] KVM: x86/mmu: remove param "vcpu" from kvm_mmu_get_tdp_level() Yan Zhao
2023-12-02  9:30 ` [RFC PATCH 30/42] KVM: x86/mmu: remove param "vcpu" from kvm_calc_tdp_mmu_root_page_role() Yan Zhao
2023-12-02  9:30 ` [RFC PATCH 31/42] KVM: x86/mmu: add extra param "kvm" to kvm_faultin_pfn() Yan Zhao
2023-12-02  9:31 ` [RFC PATCH 32/42] KVM: x86/mmu: add extra param "kvm" to make_mmio_spte() Yan Zhao
2023-12-02  9:31 ` [RFC PATCH 33/42] KVM: x86/mmu: add extra param "kvm" to make_spte() Yan Zhao
2023-12-02  9:32 ` [RFC PATCH 34/42] KVM: x86/mmu: add extra param "kvm" to tdp_mmu_map_handle_target_level() Yan Zhao
2023-12-02  9:32 ` [RFC PATCH 35/42] KVM: x86/mmu: Get/Put TDP root page to be exported Yan Zhao
2023-12-02  9:33 ` [RFC PATCH 36/42] KVM: x86/mmu: Keep exported TDP root valid Yan Zhao
2023-12-02  9:33 ` [RFC PATCH 37/42] KVM: x86: Implement KVM exported TDP fault handler on x86 Yan Zhao
2023-12-02  9:35 ` [RFC PATCH 38/42] KVM: x86: "compose" and "get" interface for meta data of exported TDP Yan Zhao
2023-12-02  9:35 ` [RFC PATCH 39/42] KVM: VMX: add config KVM_INTEL_EXPORTED_EPT Yan Zhao
2023-12-02  9:36 ` [RFC PATCH 40/42] KVM: VMX: Compose VMX specific meta data for KVM exported TDP Yan Zhao
2023-12-02  9:36 ` [RFC PATCH 41/42] KVM: VMX: Implement ops .flush_remote_tlbs* in VMX when EPT is on Yan Zhao
2023-12-02  9:37 ` [RFC PATCH 42/42] KVM: VMX: Notify importers of exported TDP to flush TLBs on KVM flushes EPT Yan Zhao
2023-12-04 15:08 ` [RFC PATCH 00/42] Sharing KVM TDP to IOMMU Jason Gunthorpe
2023-12-04 16:38   ` Sean Christopherson
2023-12-05  1:31     ` Yan Zhao
2023-12-05  6:45       ` Tian, Kevin
2023-12-05  1:52   ` Yan Zhao
2023-12-05  6:30   ` Tian, Kevin
2023-12-04 17:00 ` Sean Christopherson
2023-12-04 17:30   ` Jason Gunthorpe
2023-12-04 19:22     ` Sean Christopherson
2023-12-04 19:50       ` Jason Gunthorpe
2023-12-04 20:11         ` Sean Christopherson
2023-12-04 23:49           ` Jason Gunthorpe
2023-12-05  7:17         ` Tian, Kevin
2023-12-05  5:53       ` Yan Zhao
2023-12-05  3:51   ` Yan Zhao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231202091615.13643-1-yan.y.zhao@intel.com \
    --to=yan.y.zhao@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=dwmw2@infradead.org \
    --cc=iommu@lists.linux.dev \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=robin.murphy@arm.com \
    --cc=seanjc@google.com \
    --cc=will@kernel.org \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox