From: Sean Christopherson <seanjc@google.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org, michael.roth@amd.com
Subject: Re: [PATCH 1/3] KVM: gmem: allocate private data for the gmem inode
Date: Wed, 13 Nov 2024 16:14:05 -0800 [thread overview]
Message-ID: <ZzVAzc3rVTW9OCJP@google.com> (raw)
In-Reply-To: <20241108155056.332412-2-pbonzini@redhat.com>
+Ackerley, who's also working on resurrecting the file system[*]. At a glance,
there appear to be non-trivial differences, e.g. Ackerley's version has a call
to security_inode_init_security_anon(). I've paged out much of the inode stuff,
so I trust Ackerley's judgment far, far more than my own :-)
[*] https://lore.kernel.org/all/d1940d466fc69472c8b6dda95df2e0522b2d8744.1726009989.git.ackerleytng@google.com
On Fri, Nov 08, 2024, Paolo Bonzini wrote:
> In preparation for removing the usage of the uptodate flag,
> reintroduce the gmem filesystem type. We need it in order to
> free the private inode information.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> include/uapi/linux/magic.h | 1 +
> virt/kvm/guest_memfd.c | 117 +++++++++++++++++++++++++++++++++----
> virt/kvm/kvm_main.c | 7 ++-
> virt/kvm/kvm_mm.h | 8 ++-
> 4 files changed, 119 insertions(+), 14 deletions(-)
>
> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> index bb575f3ab45e..d856dd6a7ed9 100644
> --- a/include/uapi/linux/magic.h
> +++ b/include/uapi/linux/magic.h
> @@ -103,5 +103,6 @@
> #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
> #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
> #define PID_FS_MAGIC 0x50494446 /* "PIDF" */
> +#define KVM_GUEST_MEM_MAGIC 0x474d454d /* "GMEM" */
>
> #endif /* __LINUX_MAGIC_H__ */
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 8f079a61a56d..3ea5a7597fd4 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,9 +4,74 @@
> #include <linux/kvm_host.h>
> #include <linux/pagemap.h>
> #include <linux/anon_inodes.h>
> +#include <linux/pseudo_fs.h>
>
> #include "kvm_mm.h"
>
> +/* Do all the filesystem crap just for evict_inode... */
> +
> +static struct vfsmount *kvm_gmem_mnt __read_mostly;
> +
> +static void gmem_evict_inode(struct inode *inode)
> +{
> + kvfree(inode->i_private);
> + truncate_inode_pages_final(&inode->i_data);
> + clear_inode(inode);
> +}
> +
> +static const struct super_operations gmem_super_operations = {
> + .drop_inode = generic_delete_inode,
> + .evict_inode = gmem_evict_inode,
> + .statfs = simple_statfs,
> +};
> +
> +static int gmem_init_fs_context(struct fs_context *fc)
> +{
> + struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC);
> + if (!ctx)
> + return -ENOMEM;
> +
> + ctx->ops = &gmem_super_operations;
> + return 0;
> +}
> +
> +static struct file_system_type kvm_gmem_fs_type = {
> + .name = "kvm_gmemfs",
> + .init_fs_context = gmem_init_fs_context,
> + .kill_sb = kill_anon_super,
> +};
> +
> +static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops)
> +{
> + struct inode *inode;
> + struct file *file;
> +
> + if (fops->owner && !try_module_get(fops->owner))
> + return ERR_PTR(-ENOENT);
> +
> + inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb);
> + if (IS_ERR(inode)) {
> + file = ERR_CAST(inode);
> + goto err;
> + }
> + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops);
> + if (IS_ERR(file))
> + goto err_iput;
> +
> + return file;
> +
> +err_iput:
> + iput(inode);
> +err:
> + module_put(fops->owner);
> + return file;
> +}
> +
> +
> +struct kvm_gmem_inode {
> + unsigned long flags;
> +};
> +
> struct kvm_gmem {
> struct kvm *kvm;
> struct xarray bindings;
> @@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = {
> .fallocate = kvm_gmem_fallocate,
> };
>
> -void kvm_gmem_init(struct module *module)
> +int kvm_gmem_init(struct module *module)
> {
> + int ret;
> +
> + ret = register_filesystem(&kvm_gmem_fs_type);
> + if (ret) {
> + pr_err("kvm-gmem: cannot register file system (%d)\n", ret);
> + return ret;
> + }
> +
> + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type);
> + if (IS_ERR(kvm_gmem_mnt)) {
> + pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt));
> + return PTR_ERR(kvm_gmem_mnt);
> + }
> +
> kvm_gmem_fops.owner = module;
> +
> + return 0;
> +}
> +
> +void kvm_gmem_exit(void)
> +{
> + kern_unmount(kvm_gmem_mnt);
> + unregister_filesystem(&kvm_gmem_fs_type);
> }
>
> static int kvm_gmem_migrate_folio(struct address_space *mapping,
> @@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = {
>
> static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> {
> - const char *anon_name = "[kvm-gmem]";
> + const char *gmem_name = "[kvm-gmem]";
> + struct kvm_gmem_inode *i_gmem;
> struct kvm_gmem *gmem;
> struct inode *inode;
> struct file *file;
> int fd, err;
>
> + i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL);
> + if (!i_gmem)
> + return -ENOMEM;
> + i_gmem->flags = flags;
> +
> fd = get_unused_fd_flags(0);
> - if (fd < 0)
> - return fd;
> + if (fd < 0) {
> + err = fd;
> + goto err_i_gmem;
> + }
>
> gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
> if (!gmem) {
> @@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> goto err_fd;
> }
>
> - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
> - O_RDWR, NULL);
> + file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops);
> if (IS_ERR(file)) {
> err = PTR_ERR(file);
> goto err_gmem;
> }
>
> + inode = file->f_inode;
> +
> + file->f_mapping = inode->i_mapping;
> + file->private_data = gmem;
> file->f_flags |= O_LARGEFILE;
>
> - inode = file->f_inode;
> - WARN_ON(file->f_mapping != inode->i_mapping);
> -
> - inode->i_private = (void *)(unsigned long)flags;
> + inode->i_private = i_gmem;
> inode->i_op = &kvm_gmem_iops;
> inode->i_mapping->a_ops = &kvm_gmem_aops;
> inode->i_mode |= S_IFREG;
> @@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> kfree(gmem);
> err_fd:
> put_unused_fd(fd);
> +err_i_gmem:
> + kvfree(i_gmem);
> return err;
> }
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 279e03029ce1..8b7b4e0eb639 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
> if (WARN_ON_ONCE(r))
> goto err_vfio;
>
> - kvm_gmem_init(module);
> + r = kvm_gmem_init(module);
> + if (r)
> + goto err_gmem;
>
> r = kvm_init_virtualization();
> if (r)
> @@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
> err_register:
> kvm_uninit_virtualization();
> err_virt:
> + kvm_gmem_exit();
> +err_gmem:
> kvm_vfio_ops_exit();
> err_vfio:
> kvm_async_pf_deinit();
> @@ -6556,6 +6560,7 @@ void kvm_exit(void)
> for_each_possible_cpu(cpu)
> free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
> kmem_cache_destroy(kvm_vcpu_cache);
> + kvm_gmem_exit();
> kvm_vfio_ops_exit();
> kvm_async_pf_deinit();
> kvm_irqfd_exit();
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index 715f19669d01..91e4202574a8 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
> #endif /* HAVE_KVM_PFNCACHE */
>
> #ifdef CONFIG_KVM_PRIVATE_MEM
> -void kvm_gmem_init(struct module *module);
> +int kvm_gmem_init(struct module *module);
> +void kvm_gmem_exit(void);
> int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
> int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
> unsigned int fd, loff_t offset);
> void kvm_gmem_unbind(struct kvm_memory_slot *slot);
> #else
> -static inline void kvm_gmem_init(struct module *module)
> +static inline void kvm_gmem_exit(void) {}
> +static inline int kvm_gmem_init(struct module *module)
> {
> -
> + return 0;
> }
>
> static inline int kvm_gmem_bind(struct kvm *kvm,
> --
> 2.43.5
>
>
next prev parent reply other threads:[~2024-11-14 0:14 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-08 15:50 [PATCH 0/3] KVM: gmem: track preparedness a page at a time Paolo Bonzini
2024-11-08 15:50 ` [PATCH 1/3] KVM: gmem: allocate private data for the gmem inode Paolo Bonzini
2024-11-14 0:14 ` Sean Christopherson [this message]
2024-11-08 15:50 ` [PATCH 2/3] KVM: gmem: add a complete set of functions to query page preparedness Paolo Bonzini
2024-11-14 1:42 ` Sean Christopherson
2024-11-08 15:50 ` [PATCH 3/3] KVM: gmem: track preparedness a page at a time Paolo Bonzini
2024-11-14 1:31 ` Sean Christopherson
2024-11-14 1:41 ` Sean Christopherson
2024-11-08 16:32 ` [PATCH 2.5/3] KVM: gmem: limit hole-punching to ranges within the file Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZzVAzc3rVTW9OCJP@google.com \
--to=seanjc@google.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=michael.roth@amd.com \
--cc=pbonzini@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.