From: Chao Peng <chao.p.peng@linux.intel.com>
To: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
qemu-devel@nongnu.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
Jonathan Corbet <corbet@lwn.net>,
Sean Christopherson <seanjc@google.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Wanpeng Li <wanpengli@tencent.com>,
Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
Hugh Dickins <hughd@google.com>, Jeff Layton <jlayton@kernel.org>,
"J . Bruce Fields" <bfields@fieldses.org>,
Andrew Morton <akpm@linux-foundation.org>,
Yu Zhang <yu.c.zhang@linux.intel.com>,
Chao Peng <chao.p.peng@linux.intel.com>,
"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
luto@kernel.org, jun.nakajima@intel.com, dave.hansen@intel.com,
ak@linux.intel.com, david@redhat.com
Subject: [PATCH v4 03/12] mm: Introduce memfile_notifier
Date: Tue, 18 Jan 2022 21:21:12 +0800 [thread overview]
Message-ID: <20220118132121.31388-4-chao.p.peng@linux.intel.com> (raw)
In-Reply-To: <20220118132121.31388-1-chao.p.peng@linux.intel.com>
This patch introduces memfile_notifier facility so existing memory file
subsystems (e.g. tmpfs/hugetlbfs) can provide memory pages to allow a
third kernel component to make use of memory bookmarked in the memory
file and gets notified when the pages in the memory file become
allocated/invalidated.
It will be used for KVM to use a file descriptor as the guest memory
backing store and KVM will use this memfile_notifier interface to
interact with memory file subsystems. In the future there might be other
consumers (e.g. VFIO with encrypted device memory).
It consists two sets of callbacks:
- memfile_notifier_ops: callbacks for memory backing store to notify
KVM when memory gets allocated/invalidated.
- memfile_pfn_ops: callbacks for KVM to call into memory backing store
to request memory pages for guest private memory.
Userspace is in charge of guest memory lifecycle: it first allocates
pages in memory backing store and then passes the fd to KVM and lets KVM
register each memory slot to memory backing store via
memfile_register_notifier.
The supported memory backing store should maintain a memfile_notifier list
and provide routine for memfile_notifier to get the list head address and
memfile_pfn_ops callbacks for memfile_register_notifier. It also should call
memfile_notifier_fallocate/memfile_notifier_invalidate when the bookmarked
memory gets allocated/invalidated.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
include/linux/memfile_notifier.h | 53 +++++++++++++++++++
mm/Kconfig | 4 ++
mm/Makefile | 1 +
mm/memfile_notifier.c | 89 ++++++++++++++++++++++++++++++++
4 files changed, 147 insertions(+)
create mode 100644 include/linux/memfile_notifier.h
create mode 100644 mm/memfile_notifier.c
diff --git a/include/linux/memfile_notifier.h b/include/linux/memfile_notifier.h
new file mode 100644
index 000000000000..a03bebdd1322
--- /dev/null
+++ b/include/linux/memfile_notifier.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MEMFILE_NOTIFIER_H
+#define _LINUX_MEMFILE_NOTIFIER_H
+
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/srcu.h>
+#include <linux/fs.h>
+
+struct memfile_notifier;
+
+struct memfile_notifier_ops {
+ void (*invalidate)(struct memfile_notifier *notifier,
+ pgoff_t start, pgoff_t end);
+ void (*fallocate)(struct memfile_notifier *notifier,
+ pgoff_t start, pgoff_t end);
+};
+
+struct memfile_pfn_ops {
+ long (*get_lock_pfn)(struct inode *inode, pgoff_t offset, int *order);
+ void (*put_unlock_pfn)(unsigned long pfn);
+};
+
+struct memfile_notifier {
+ struct list_head list;
+ struct memfile_notifier_ops *ops;
+};
+
+struct memfile_notifier_list {
+ struct list_head head;
+ spinlock_t lock;
+};
+
+#ifdef CONFIG_MEMFILE_NOTIFIER
+static inline void memfile_notifier_list_init(struct memfile_notifier_list *list)
+{
+ INIT_LIST_HEAD(&list->head);
+ spin_lock_init(&list->lock);
+}
+
+extern void memfile_notifier_invalidate(struct memfile_notifier_list *list,
+ pgoff_t start, pgoff_t end);
+extern void memfile_notifier_fallocate(struct memfile_notifier_list *list,
+ pgoff_t start, pgoff_t end);
+extern int memfile_register_notifier(struct inode *inode,
+ struct memfile_notifier *notifier,
+ struct memfile_pfn_ops **pfn_ops);
+extern void memfile_unregister_notifier(struct inode *inode,
+ struct memfile_notifier *notifier);
+
+#endif /* CONFIG_MEMFILE_NOTIFIER */
+
+#endif /* _LINUX_MEMFILE_NOTIFIER_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 28edafc820ad..fa31eda3c895 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -900,6 +900,10 @@ config IO_MAPPING
config SECRETMEM
def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
+config MEMFILE_NOTIFIER
+ bool
+ select SRCU
+
source "mm/damon/Kconfig"
endmenu
diff --git a/mm/Makefile b/mm/Makefile
index d6c0042e3aa0..80588f7c3bc2 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -130,3 +130,4 @@ obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
obj-$(CONFIG_IO_MAPPING) += io-mapping.o
obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
+obj-$(CONFIG_MEMFILE_NOTIFIER) += memfile_notifier.o
diff --git a/mm/memfile_notifier.c b/mm/memfile_notifier.c
new file mode 100644
index 000000000000..8171d4601a04
--- /dev/null
+++ b/mm/memfile_notifier.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/mm/memfile_notifier.c
+ *
+ * Copyright (C) 2022 Intel Corporation.
+ * Chao Peng <chao.p.peng@linux.intel.com>
+ */
+
+#include <linux/memfile_notifier.h>
+#include <linux/srcu.h>
+
+DEFINE_STATIC_SRCU(srcu);
+
+void memfile_notifier_invalidate(struct memfile_notifier_list *list,
+ pgoff_t start, pgoff_t end)
+{
+ struct memfile_notifier *notifier;
+ int id;
+
+ id = srcu_read_lock(&srcu);
+ list_for_each_entry_srcu(notifier, &list->head, list,
+ srcu_read_lock_held(&srcu)) {
+ if (notifier->ops && notifier->ops->invalidate)
+ notifier->ops->invalidate(notifier, start, end);
+ }
+ srcu_read_unlock(&srcu, id);
+}
+
+void memfile_notifier_fallocate(struct memfile_notifier_list *list,
+ pgoff_t start, pgoff_t end)
+{
+ struct memfile_notifier *notifier;
+ int id;
+
+ id = srcu_read_lock(&srcu);
+ list_for_each_entry_srcu(notifier, &list->head, list,
+ srcu_read_lock_held(&srcu)) {
+ if (notifier->ops && notifier->ops->fallocate)
+ notifier->ops->fallocate(notifier, start, end);
+ }
+ srcu_read_unlock(&srcu, id);
+}
+
+static int memfile_get_notifier_info(struct inode *inode,
+ struct memfile_notifier_list **list,
+ struct memfile_pfn_ops **ops)
+{
+ return -EOPNOTSUPP;
+}
+
+int memfile_register_notifier(struct inode *inode,
+ struct memfile_notifier *notifier,
+ struct memfile_pfn_ops **pfn_ops)
+{
+ struct memfile_notifier_list *list;
+ int ret;
+
+ if (!inode || !notifier | !pfn_ops)
+ return -EINVAL;
+
+ ret = memfile_get_notifier_info(inode, &list, pfn_ops);
+ if (ret)
+ return ret;
+
+ spin_lock(&list->lock);
+ list_add_rcu(¬ifier->list, &list->head);
+ spin_unlock(&list->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(memfile_register_notifier);
+
+void memfile_unregister_notifier(struct inode *inode,
+ struct memfile_notifier *notifier)
+{
+ struct memfile_notifier_list *list;
+
+ if (!inode || !notifier)
+ return;
+
+ BUG_ON(memfile_get_notifier_info(inode, &list, NULL));
+
+ spin_lock(&list->lock);
+ list_del_rcu(¬ifier->list);
+ spin_unlock(&list->lock);
+
+ synchronize_srcu(&srcu);
+}
+EXPORT_SYMBOL_GPL(memfile_unregister_notifier);
--
2.17.1
next prev parent reply other threads:[~2022-01-18 13:22 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-18 13:21 [PATCH v4 00/12] KVM: mm: fd-based approach for supporting KVM guest private memory Chao Peng
2022-01-18 13:21 ` [PATCH v4 01/12] mm/shmem: Introduce F_SEAL_INACCESSIBLE Chao Peng
2022-02-07 12:24 ` Vlastimil Babka
2022-02-17 12:56 ` Chao Peng
2022-02-11 23:33 ` Andy Lutomirski
2022-02-17 13:06 ` Chao Peng
2022-02-17 19:09 ` Andy Lutomirski
2022-02-23 11:49 ` Chao Peng
2022-02-23 12:05 ` Steven Price
2022-03-04 19:24 ` Andy Lutomirski
2022-03-07 13:26 ` Chao Peng
2022-03-08 12:17 ` Paolo Bonzini
2022-01-18 13:21 ` [PATCH v4 02/12] mm/memfd: Introduce MFD_INACCESSIBLE flag Chao Peng
2022-01-21 15:50 ` Steven Price
2022-01-24 13:29 ` Chao Peng
2022-02-07 18:51 ` Vlastimil Babka
2022-02-08 8:49 ` David Hildenbrand
2022-02-08 18:22 ` Mike Rapoport
2022-01-18 13:21 ` Chao Peng [this message]
2022-03-07 15:42 ` [PATCH v4 03/12] mm: Introduce memfile_notifier Vlastimil Babka
2022-03-08 1:45 ` Chao Peng
2022-01-18 13:21 ` [PATCH v4 04/12] mm/shmem: Support memfile_notifier Chao Peng
2022-02-08 18:29 ` Mike Rapoport
2022-02-17 13:10 ` Chao Peng
2022-02-11 23:40 ` Andy Lutomirski
2022-02-17 13:23 ` Chao Peng
2022-01-18 13:21 ` [PATCH v4 05/12] KVM: Extend the memslot to support fd-based private memory Chao Peng
2022-01-18 13:21 ` [PATCH v4 06/12] KVM: Use kvm_userspace_memory_region_ext Chao Peng
2022-01-18 13:21 ` [PATCH v4 07/12] KVM: Add KVM_EXIT_MEMORY_ERROR exit Chao Peng
2022-01-18 13:21 ` [PATCH v4 08/12] KVM: Use memfile_pfn_ops to obtain pfn for private pages Chao Peng
2022-01-18 13:21 ` [PATCH v4 09/12] KVM: Handle page fault for private memory Chao Peng
2022-01-18 13:21 ` [PATCH v4 10/12] KVM: Register private memslot to memory backing store Chao Peng
2022-01-18 13:21 ` [PATCH v4 11/12] KVM: Zap existing KVM mappings when pages changed in the private fd Chao Peng
2022-01-18 13:21 ` [PATCH v4 12/12] KVM: Expose KVM_MEM_PRIVATE Chao Peng
2022-01-25 20:20 ` Maciej S. Szmigiero
2022-02-17 13:45 ` Chao Peng
2022-02-22 1:16 ` Maciej S. Szmigiero
2022-02-23 12:00 ` Chao Peng
2022-02-23 18:32 ` Maciej S. Szmigiero
2022-02-24 8:07 ` Chao Peng
2022-01-28 16:47 ` [PATCH v4 00/12] KVM: mm: fd-based approach for supporting KVM guest private memory Steven Price
2022-02-02 2:28 ` Nakajima, Jun
2022-02-02 9:23 ` Steven Price
2022-02-02 20:47 ` Nakajima, Jun
2022-02-08 18:33 ` Mike Rapoport
2022-02-17 13:47 ` Chao Peng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220118132121.31388-4-chao.p.peng@linux.intel.com \
--to=chao.p.peng@linux.intel.com \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=bfields@fieldses.org \
--cc=bp@alien8.de \
--cc=corbet@lwn.net \
--cc=dave.hansen@intel.com \
--cc=david@redhat.com \
--cc=hpa@zytor.com \
--cc=hughd@google.com \
--cc=jlayton@kernel.org \
--cc=jmattson@google.com \
--cc=joro@8bytes.org \
--cc=jun.nakajima@intel.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=seanjc@google.com \
--cc=tglx@linutronix.de \
--cc=vkuznets@redhat.com \
--cc=wanpengli@tencent.com \
--cc=x86@kernel.org \
--cc=yu.c.zhang@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.