public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: madvenka@linux.microsoft.com
To: gregkh@linuxfoundation.org, pbonzini@redhat.com, rppt@kernel.org,
	jgowans@amazon.com, graf@amazon.de, arnd@arndb.de,
	keescook@chromium.org, stanislav.kinsburskii@gmail.com,
	anthony.yznaga@oracle.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, madvenka@linux.microsoft.com,
	jamorris@linux.microsoft.com
Subject: [RFC PATCH v1 09/10] mm/prmem: Implement DAX support for Persistent Ramdisks.
Date: Mon, 16 Oct 2023 18:32:14 -0500	[thread overview]
Message-ID: <20231016233215.13090-10-madvenka@linux.microsoft.com> (raw)
In-Reply-To: <20231016233215.13090-1-madvenka@linux.microsoft.com>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

One problem with using a ramdisk is that the page cache will contain
redundant copies of ramdisk data. To avoid this, implement DAX support
for persistent ramdisks.

To avail this, the filesystem that is installed on the ramdisk must
support DAX. Like ext4. Mount the filesystem with the dax option. E.g.,

	sudo mount -t ext4 -o dax /dev/pram0 /path/to/mountpoint

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 drivers/block/brd.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 3a05e56ca16f..d4a42d3bd212 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -25,6 +25,9 @@
 #include <linux/backing-dev.h>
 #include <linux/debugfs.h>
 #include <linux/prmem.h>
+#include <linux/pfn_t.h>
+#include <linux/dax.h>
+#include <linux/uio.h>
 
 #include <linux/uaccess.h>
 
@@ -42,6 +45,7 @@ struct brd_device {
 	enum brd_type		brd_type;
 	struct gendisk		*brd_disk;
 	struct list_head	brd_list;
+	struct dax_device	*brd_dax;
 
 	/*
 	 * Backing store of pages. This is the contents of the block device.
@@ -58,6 +62,8 @@ static void brd_free_page(struct brd_device *brd, struct page *page);
 static void brd_xa_init(struct brd_device *brd);
 static void brd_init_name(struct brd_device *brd, char *name);
 static void brd_set_capacity(struct brd_device *brd);
+static int brd_dax_init(struct brd_device *brd);
+static void brd_dax_cleanup(struct brd_device *brd);
 
 /*
  * Look up and return a brd's page for a given sector.
@@ -408,6 +414,9 @@ static int brd_alloc(int i)
 	strscpy(disk->disk_name, buf, DISK_NAME_LEN);
 	brd_set_capacity(brd);
 	
+	if (brd_dax_init(brd))
+		goto out_clean_dax;
+
 	/*
 	 * This is so fdisk will align partitions on 4k, because of
 	 * direct_access API needing 4k alignment, returning a PFN
@@ -421,6 +430,8 @@ static int brd_alloc(int i)
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
 	blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
 	blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
+	if (brd->brd_dax)
+		blk_queue_flag_set(QUEUE_FLAG_DAX, disk->queue);
 	err = add_disk(disk);
 	if (err)
 		goto out_cleanup_disk;
@@ -429,6 +440,8 @@ static int brd_alloc(int i)
 
 out_cleanup_disk:
 	put_disk(disk);
+out_clean_dax:
+	brd_dax_cleanup(brd);
 out_free_dev:
 	list_del(&brd->brd_list);
 	brd_free_device(brd);
@@ -447,6 +460,7 @@ static void brd_cleanup(void)
 	debugfs_remove_recursive(brd_debugfs_dir);
 
 	list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
+		brd_dax_cleanup(brd);
 		del_gendisk(brd->brd_disk);
 		put_disk(brd->brd_disk);
 		brd_free_pages(brd);
@@ -659,3 +673,95 @@ static void brd_set_capacity(struct brd_device *brd)
 		disksize = prd_data[brd->brd_number].size;
 	set_capacity(brd->brd_disk, disksize * 2);
 }
+
+static bool		prd_dax_enabled = IS_ENABLED(CONFIG_FS_DAX);
+
+static long brd_dax_direct_access(struct dax_device *dax_dev,
+				  pgoff_t pgoff, long nr_pages,
+				  enum dax_access_mode mode,
+				  void **kaddr, pfn_t *pfn);
+static int brd_dax_zero_page_range(struct dax_device *dax_dev,
+				   pgoff_t pgoff, size_t nr_pages);
+
+static const struct dax_operations brd_dax_ops = {
+	.direct_access = brd_dax_direct_access,
+	.zero_page_range = brd_dax_zero_page_range,
+};
+
+static int brd_dax_init(struct brd_device *brd)
+{
+	if (!prd_dax_enabled || brd->brd_type == BRD_NORMAL)
+		return 0;
+
+	brd->brd_dax = alloc_dax(brd, &brd_dax_ops);
+	if (IS_ERR(brd->brd_dax)) {
+		pr_warn("%s: DAX failed\n", __func__);
+		brd->brd_dax = NULL;
+		return -ENOMEM;
+	}
+
+	if (dax_add_host(brd->brd_dax, brd->brd_disk)) {
+		pr_warn("%s: DAX add failed\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void brd_dax_cleanup(struct brd_device *brd)
+{
+	if (!prd_dax_enabled || brd->brd_type == BRD_NORMAL)
+		return;
+
+	if (brd->brd_dax) {
+		dax_remove_host(brd->brd_disk);
+		kill_dax(brd->brd_dax);
+		put_dax(brd->brd_dax);
+	}
+}
+static int brd_dax_zero_page_range(struct dax_device *dax_dev,
+				   pgoff_t pgoff, size_t nr_pages)
+{
+	long rc;
+	void *kaddr;
+
+	rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
+			&kaddr, NULL);
+	if (rc < 0)
+		return rc;
+	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
+	return 0;
+}
+
+static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
+		long nr_pages, void **kaddr, pfn_t *pfn)
+{
+	struct page *page;
+	sector_t sector = (sector_t) pgoff << PAGE_SECTORS_SHIFT;
+	int ret;
+
+	if (!brd)
+		return -ENODEV;
+
+	ret = brd_insert_page(brd, sector, GFP_NOWAIT);
+	if (ret)
+		return ret;
+
+	page = brd_lookup_page(brd, sector);
+	if (!page)
+		return -ENOSPC;
+
+	*kaddr = page_address(page);
+	if (pfn)
+		*pfn = page_to_pfn_t(page);
+
+	return 1;
+}
+
+static long brd_dax_direct_access(struct dax_device *dax_dev,
+		pgoff_t pgoff, long nr_pages, enum dax_access_mode mode,
+		void **kaddr, pfn_t *pfn)
+{
+	struct brd_device *brd = dax_get_private(dax_dev);
+
+	return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn);
+}
-- 
2.25.1


  parent reply	other threads:[~2023-10-16 23:33 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1b1bc25eb87355b91fcde1de7c2f93f38abb2bf9>
2023-10-16 23:32 ` [RFC PATCH v1 00/10] mm/prmem: Implement the Persistent-Across-Kexec memory feature (prmem) madvenka
2023-10-16 23:32   ` [RFC PATCH v1 01/10] mm/prmem: Allocate memory during boot for storing persistent data madvenka
2023-10-16 23:32   ` [RFC PATCH v1 02/10] mm/prmem: Reserve metadata and persistent regions in early boot after kexec madvenka
2023-10-16 23:32   ` [RFC PATCH v1 03/10] mm/prmem: Manage persistent memory with the gen pool allocator madvenka
2023-10-16 23:32   ` [RFC PATCH v1 04/10] mm/prmem: Implement a page allocator for persistent memory madvenka
2023-10-16 23:32   ` [RFC PATCH v1 05/10] mm/prmem: Implement a buffer " madvenka
2023-10-16 23:32   ` [RFC PATCH v1 06/10] mm/prmem: Implement persistent XArray (and Radix Tree) madvenka
2023-10-16 23:32   ` [RFC PATCH v1 07/10] mm/prmem: Implement named Persistent Instances madvenka
2023-10-16 23:32   ` [RFC PATCH v1 08/10] mm/prmem: Implement Persistent Ramdisk instances madvenka
2023-10-16 23:32   ` madvenka [this message]
2023-10-16 23:32   ` [RFC PATCH v1 10/10] mm/prmem: Implement dynamic expansion of prmem madvenka
2023-10-17  8:31   ` [RFC PATCH v1 00/10] mm/prmem: Implement the Persistent-Across-Kexec memory feature (prmem) Alexander Graf
2023-10-17 18:08     ` Madhavan T. Venkataraman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231016233215.13090-10-madvenka@linux.microsoft.com \
    --to=madvenka@linux.microsoft.com \
    --cc=anthony.yznaga@oracle.com \
    --cc=arnd@arndb.de \
    --cc=graf@amazon.de \
    --cc=gregkh@linuxfoundation.org \
    --cc=jamorris@linux.microsoft.com \
    --cc=jgowans@amazon.com \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=pbonzini@redhat.com \
    --cc=rppt@kernel.org \
    --cc=stanislav.kinsburskii@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox