linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: James Gowans <jgowans@amazon.com>
To: <linux-kernel@vger.kernel.org>
Cc: James Gowans <jgowans@amazon.com>,
	Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Steve Sistare <steven.sistare@oracle.com>,
	Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,
	"Anthony Yznaga" <anthony.yznaga@oracle.com>,
	Mike Rapoport <rppt@kernel.org>,
	"Andrew Morton" <akpm@linux-foundation.org>, <linux-mm@kvack.org>,
	Jason Gunthorpe <jgg@ziepe.ca>, <linux-fsdevel@vger.kernel.org>,
	Usama Arif <usama.arif@bytedance.com>, <kvm@vger.kernel.org>,
	Alexander Graf <graf@amazon.com>,
	David Woodhouse <dwmw@amazon.co.uk>,
	Paul Durrant <pdurrant@amazon.co.uk>,
	Nicolas Saenz Julienne <nsaenz@amazon.es>
Subject: [PATCH 01/10] guestmemfs: Introduce filesystem skeleton
Date: Mon, 5 Aug 2024 11:32:36 +0200	[thread overview]
Message-ID: <20240805093245.889357-2-jgowans@amazon.com> (raw)
In-Reply-To: <20240805093245.889357-1-jgowans@amazon.com>

Add an in-memory filesystem: guestmemfs. Memory is donated to guestmemfs
by carving it out of the normal System RAM range with the memmap= cmdline
parameter and then giving that same physical range to guestmemfs with the
guestmemfs= cmdline parameter.

A new filesystem is added; so far it doesn't do much except persist a
super block at the start of the donated memory and allows itself to be
mounted.

A hook to x86 mm init is added to reserve the memory really early on via
memblock allocator. There is probably a better arch-independent place to
do this...

Signed-off-by: James Gowans <jgowans@amazon.com>
---
 arch/x86/mm/init_64.c      |   2 +
 fs/Kconfig                 |   1 +
 fs/Makefile                |   1 +
 fs/guestmemfs/Kconfig      |  11 ++++
 fs/guestmemfs/Makefile     |   6 ++
 fs/guestmemfs/guestmemfs.c | 116 +++++++++++++++++++++++++++++++++++++
 fs/guestmemfs/guestmemfs.h |   9 +++
 include/linux/guestmemfs.h |  16 +++++
 8 files changed, 162 insertions(+)
 create mode 100644 fs/guestmemfs/Kconfig
 create mode 100644 fs/guestmemfs/Makefile
 create mode 100644 fs/guestmemfs/guestmemfs.c
 create mode 100644 fs/guestmemfs/guestmemfs.h
 create mode 100644 include/linux/guestmemfs.h

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8932ba8f5cdd..39fcf017c90c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
+#include <linux/guestmemfs.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
 #include <linux/kexec.h>
@@ -1331,6 +1332,7 @@ static void __init preallocate_vmalloc_pages(void)
 
 void __init mem_init(void)
 {
+	guestmemfs_reserve_mem();
 	pci_iommu_alloc();
 
 	/* clear_bss() already clear the empty_zero_page */
diff --git a/fs/Kconfig b/fs/Kconfig
index a46b0cbc4d8f..727359901da8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -321,6 +321,7 @@ source "fs/befs/Kconfig"
 source "fs/bfs/Kconfig"
 source "fs/efs/Kconfig"
 source "fs/jffs2/Kconfig"
+source "fs/guestmemfs/Kconfig"
 # UBIFS File system configuration
 source "fs/ubifs/Kconfig"
 source "fs/cramfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 6ecc9b0a53f2..044524b17d63 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -129,3 +129,4 @@ obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
 obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
 obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
+obj-$(CONFIG_GUESTMEMFS_FS)	+= guestmemfs/
diff --git a/fs/guestmemfs/Kconfig b/fs/guestmemfs/Kconfig
new file mode 100644
index 000000000000..d87fca4822cb
--- /dev/null
+++ b/fs/guestmemfs/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config GUESTMEMFS_FS
+	bool "Persistent Guest memory filesystem (guestmemfs)"
+	help
+	  An in-memory filesystem on top of reserved memory specified via
+	  guestmemfs= cmdline argument.  Used for storing kernel state and
+	  userspace memory which is preserved across kexec to support
+	  live update of a hypervisor when running guest virtual machines.
+	  Select this if you need the ability to persist memory for guest VMs
+	  across kexec to do live update.
diff --git a/fs/guestmemfs/Makefile b/fs/guestmemfs/Makefile
new file mode 100644
index 000000000000..6dc820a9d4fe
--- /dev/null
+++ b/fs/guestmemfs/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for persistent kernel filesystem
+#
+
+obj-y += guestmemfs.o
diff --git a/fs/guestmemfs/guestmemfs.c b/fs/guestmemfs/guestmemfs.c
new file mode 100644
index 000000000000..3aaada1b8df6
--- /dev/null
+++ b/fs/guestmemfs/guestmemfs.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "guestmemfs.h"
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/fs_context.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/statfs.h>
+
+static phys_addr_t guestmemfs_base, guestmemfs_size;
+struct guestmemfs_sb *psb;
+
+static int statfs(struct dentry *root, struct kstatfs *buf)
+{
+	simple_statfs(root, buf);
+	buf->f_bsize = PMD_SIZE;
+	buf->f_blocks = guestmemfs_size / PMD_SIZE;
+	buf->f_bfree = buf->f_bavail = buf->f_blocks;
+	return 0;
+}
+
+static const struct super_operations guestmemfs_super_ops = {
+	.statfs = statfs,
+};
+
+static int guestmemfs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	struct inode *inode;
+	struct dentry *dentry;
+
+	psb = kzalloc(sizeof(*psb), GFP_KERNEL);
+	/*
+	 * Keep a reference to the persistent super block in the
+	 * ephemeral super block.
+	 */
+	sb->s_fs_info = psb;
+	sb->s_op = &guestmemfs_super_ops;
+
+	inode = new_inode(sb);
+	if (!inode)
+		return -ENOMEM;
+
+	inode->i_ino = 1;
+	inode->i_mode = S_IFDIR;
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	simple_inode_init_ts(inode);
+	/* directory inodes start off with i_nlink == 2 (for "." entry) */
+	inc_nlink(inode);
+
+	dentry = d_make_root(inode);
+	if (!dentry)
+		return -ENOMEM;
+	sb->s_root = dentry;
+
+	return 0;
+}
+
+static int guestmemfs_get_tree(struct fs_context *fc)
+{
+	return get_tree_nodev(fc, guestmemfs_fill_super);
+}
+
+static const struct fs_context_operations guestmemfs_context_ops = {
+	.get_tree	= guestmemfs_get_tree,
+};
+
+static int guestmemfs_init_fs_context(struct fs_context *const fc)
+{
+	fc->ops = &guestmemfs_context_ops;
+	return 0;
+}
+
+static struct file_system_type guestmemfs_fs_type = {
+	.owner                  = THIS_MODULE,
+	.name                   = "guestmemfs",
+	.init_fs_context        = guestmemfs_init_fs_context,
+	.kill_sb                = kill_litter_super,
+	.fs_flags               = FS_USERNS_MOUNT,
+};
+
+static int __init guestmemfs_init(void)
+{
+	int ret;
+
+	ret = register_filesystem(&guestmemfs_fs_type);
+	return ret;
+}
+
+/**
+ * Format: guestmemfs=<size>:<base>
+ * Just like: memmap=nn[KMG]!ss[KMG]
+ */
+static int __init parse_guestmemfs_extents(char *p)
+{
+	guestmemfs_size = memparse(p, &p);
+	return 0;
+}
+
+early_param("guestmemfs", parse_guestmemfs_extents);
+
+void __init guestmemfs_reserve_mem(void)
+{
+	guestmemfs_base = memblock_phys_alloc(guestmemfs_size, 4 << 10);
+	if (guestmemfs_base) {
+		memblock_reserved_mark_noinit(guestmemfs_base, guestmemfs_size);
+		memblock_mark_nomap(guestmemfs_base, guestmemfs_size);
+	} else {
+		pr_warn("Failed to alloc %llu bytes for guestmemfs\n", guestmemfs_size);
+	}
+}
+
+MODULE_ALIAS_FS("guestmemfs");
+module_init(guestmemfs_init);
diff --git a/fs/guestmemfs/guestmemfs.h b/fs/guestmemfs/guestmemfs.h
new file mode 100644
index 000000000000..37d8cf630e0a
--- /dev/null
+++ b/fs/guestmemfs/guestmemfs.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#define pr_fmt(fmt) "guestmemfs: " KBUILD_MODNAME ": " fmt
+
+#include <linux/guestmemfs.h>
+
+struct guestmemfs_sb {
+    /* Will be populated soon... */
+};
diff --git a/include/linux/guestmemfs.h b/include/linux/guestmemfs.h
new file mode 100644
index 000000000000..60e769c8e533
--- /dev/null
+++ b/include/linux/guestmemfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef _LINUX_GUESTMEMFS_H
+#define _LINUX_GUESTMEMFS_H
+
+/*
+ * Carves out chunks of memory from memblocks for guestmemfs.
+ * Must be called in early boot before memblocks are freed.
+ */
+# ifdef CONFIG_GUESTMEMFS_FS
+void guestmemfs_reserve_mem(void);
+#else
+void guestmemfs_reserve_mem(void) { }
+#endif
+
+#endif
-- 
2.34.1


  reply	other threads:[~2024-08-05  9:33 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-05  9:32 [PATCH 00/10] Introduce guestmemfs: persistent in-memory filesystem James Gowans
2024-08-05  9:32 ` James Gowans [this message]
2024-08-05 10:20   ` [PATCH 01/10] guestmemfs: Introduce filesystem skeleton Christian Brauner
2024-08-05  9:32 ` [PATCH 02/10] guestmemfs: add inode store, files and dirs James Gowans
2024-08-05  9:32 ` [PATCH 03/10] guestmemfs: add persistent data block allocator James Gowans
2024-08-05  9:32 ` [PATCH 04/10] guestmemfs: support file truncation James Gowans
2024-08-05  9:32 ` [PATCH 05/10] guestmemfs: add file mmap callback James Gowans
2024-10-29 23:05   ` Elliot Berman
2024-10-30 22:18     ` Frank van der Linden
2024-11-01 12:55       ` Gowans, James
2024-10-31 15:30     ` Gowans, James
2024-10-31 16:06       ` Jason Gunthorpe
2024-11-01 13:01         ` Gowans, James
2024-11-01 13:42           ` Jason Gunthorpe
2024-11-02  8:24             ` Gowans, James
2024-11-04 11:11               ` Mike Rapoport
2024-11-04 14:39               ` Jason Gunthorpe
2024-11-04 10:49             ` Mike Rapoport
2024-08-05  9:32 ` [PATCH 06/10] kexec/kho: Add addr flag to not initialise memory James Gowans
2024-08-05  9:32 ` [PATCH 07/10] guestmemfs: Persist filesystem metadata via KHO James Gowans
2024-08-05  9:32 ` [PATCH 08/10] guestmemfs: Block modifications when serialised James Gowans
2024-08-05  9:32 ` [PATCH 09/10] guestmemfs: Add documentation and usage instructions James Gowans
2024-08-05  9:32 ` [PATCH 10/10] MAINTAINERS: Add maintainers for guestmemfs James Gowans
2024-08-05 14:32 ` [PATCH 00/10] Introduce guestmemfs: persistent in-memory filesystem Theodore Ts'o
2024-08-05 14:41   ` Paolo Bonzini
2024-08-05 19:47     ` Gowans, James
2024-08-05 19:53   ` Gowans, James
2024-08-05 20:01 ` Jan Kara
2024-08-05 23:29   ` Jason Gunthorpe
2024-08-06  8:26     ` Gowans, James
2024-08-06  8:12   ` Gowans, James
2024-08-06 13:43     ` David Hildenbrand
2024-08-07 23:45 ` David Matlack
2024-10-17  4:53 ` Vishal Annapurve
2024-11-01 12:53   ` Gowans, James

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240805093245.889357-2-jgowans@amazon.com \
    --to=jgowans@amazon.com \
    --cc=akpm@linux-foundation.org \
    --cc=anthony.yznaga@oracle.com \
    --cc=brauner@kernel.org \
    --cc=dwmw@amazon.co.uk \
    --cc=graf@amazon.com \
    --cc=jack@suse.cz \
    --cc=jgg@ziepe.ca \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nsaenz@amazon.es \
    --cc=pbonzini@redhat.com \
    --cc=pdurrant@amazon.co.uk \
    --cc=rppt@kernel.org \
    --cc=seanjc@google.com \
    --cc=steven.sistare@oracle.com \
    --cc=usama.arif@bytedance.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).