public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: madvenka@linux.microsoft.com
To: gregkh@linuxfoundation.org, pbonzini@redhat.com, rppt@kernel.org,
	jgowans@amazon.com, graf@amazon.de, arnd@arndb.de,
	keescook@chromium.org, stanislav.kinsburskii@gmail.com,
	anthony.yznaga@oracle.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, madvenka@linux.microsoft.com,
	jamorris@linux.microsoft.com
Subject: [RFC PATCH v1 10/10] mm/prmem: Implement dynamic expansion of prmem.
Date: Mon, 16 Oct 2023 18:32:15 -0500	[thread overview]
Message-ID: <20231016233215.13090-11-madvenka@linux.microsoft.com> (raw)
In-Reply-To: <20231016233215.13090-1-madvenka@linux.microsoft.com>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

For some use cases, it is hard to predict how much actual memory is
needed to store persistent data. This will depend on the workload. Either
we would have to overcommit memory for persistent data. Or, we could
allow dynamic expansion of prmem memory.

Implement dynamic expansion of prmem. When the allocator runs out of memory
it calls alloc_pages(MAX_ORDER) to allocate a max order page. It creates a
region for that memory and adds it to the list of regions. Then, the
allocator can allocate from that region.

To allow this, extend the command line parameter:

	prmem=size[KMG][,max_size[KMG]]

Size is allocated upfront as mentioned before. Between size and max_size,
prmem is expanded dynamically as mentioned above.

Choosing a max order page means that no fragmentation is created for
transparent huge pages and kmem slabs. But fragmentation may be created
for 1GB pages. This is not a problem for 1GB pages that are reserved
up front. This could be a problem for 1GB pages that are allocated at
run time dynamically.

If max_size is omitted from the command line parameter, no dynamic
expansion will happen.

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 include/linux/prmem.h          |  8 +++++++
 kernel/prmem/prmem_allocator.c | 38 ++++++++++++++++++++++++++++++++++
 kernel/prmem/prmem_init.c      |  1 +
 kernel/prmem/prmem_misc.c      |  3 ++-
 kernel/prmem/prmem_parse.c     | 20 +++++++++++++++++-
 kernel/prmem/prmem_region.c    |  1 +
 kernel/prmem/prmem_reserve.c   |  1 +
 7 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/include/linux/prmem.h b/include/linux/prmem.h
index c7034690f7cb..bb552946cb5b 100644
--- a/include/linux/prmem.h
+++ b/include/linux/prmem.h
@@ -83,6 +83,9 @@ struct prmem_instance {
  * metadata	Physical address of the metadata page.
  * size		Size of initial memory allocated to prmem.
  *
+ * cur_size	Current amount of memory allocated to prmem.
+ * max_size	Maximum amount of memory that can be allocated to prmem.
+ *
  * regions	List of memory regions.
  *
  * instances	Persistent instances.
@@ -95,6 +98,10 @@ struct prmem {
 	unsigned long		metadata;
 	size_t			size;
 
+	/* Dynamic expansion. */
+	size_t			cur_size;
+	size_t			max_size;
+
 	/* Persistent Regions. */
 	struct list_head	regions;
 
@@ -109,6 +116,7 @@ extern struct prmem		*prmem;
 extern unsigned long		prmem_metadata;
 extern unsigned long		prmem_pa;
 extern size_t			prmem_size;
+extern size_t			prmem_max_size;
 extern bool			prmem_inited;
 extern spinlock_t		prmem_lock;
 
diff --git a/kernel/prmem/prmem_allocator.c b/kernel/prmem/prmem_allocator.c
index f12975bc6777..1cb3eae8a3e7 100644
--- a/kernel/prmem/prmem_allocator.c
+++ b/kernel/prmem/prmem_allocator.c
@@ -9,17 +9,55 @@
 
 /* Page Allocation functions. */
 
+static void prmem_expand(void)
+{
+	struct prmem_region	*region;
+	struct page		*pages;
+	unsigned int		order = MAX_ORDER;
+	size_t			size = (1UL << order) << PAGE_SHIFT;
+
+	if (prmem->cur_size + size > prmem->max_size)
+		return;
+
+	spin_unlock(&prmem_lock);
+	pages = alloc_pages(GFP_NOWAIT, order);
+	spin_lock(&prmem_lock);
+
+	if (!pages)
+		return;
+
+	/* cur_size may have changed. Recheck. */
+	if (prmem->cur_size + size > prmem->max_size)
+		goto free;
+
+	region = prmem_add_region(page_to_phys(pages), size);
+	if (!region)
+		goto free;
+
+	pr_warn("%s: prmem expanded by %ld\n", __func__, size);
+	return;
+free:
+	__free_pages(pages, order);
+}
+
 void *prmem_alloc_pages_locked(unsigned int order)
 {
 	struct prmem_region	*region;
 	void			*va;
 	size_t			size = (1UL << order) << PAGE_SHIFT;
+	bool			expand = true;
 
+retry:
 	list_for_each_entry(region, &prmem->regions, node) {
 		va = prmem_alloc_pool(region, size, size);
 		if (va)
 			return va;
 	}
+	if (expand) {
+		expand = false;
+		prmem_expand();
+		goto retry;
+	}
 	return NULL;
 }
 
diff --git a/kernel/prmem/prmem_init.c b/kernel/prmem/prmem_init.c
index 166fca688ab3..f4814cc88508 100644
--- a/kernel/prmem/prmem_init.c
+++ b/kernel/prmem/prmem_init.c
@@ -20,6 +20,7 @@ void __init prmem_init(void)
 		/* Cold boot. */
 		prmem->metadata = prmem_metadata;
 		prmem->size = prmem_size;
+		prmem->max_size = prmem_max_size;
 		INIT_LIST_HEAD(&prmem->regions);
 		INIT_LIST_HEAD(&prmem->instances);
 
diff --git a/kernel/prmem/prmem_misc.c b/kernel/prmem/prmem_misc.c
index 49b6a7232c1a..3100662d2cbe 100644
--- a/kernel/prmem/prmem_misc.c
+++ b/kernel/prmem/prmem_misc.c
@@ -68,7 +68,8 @@ bool __init prmem_validate(void)
 	unsigned long		checksum;
 
 	/* Sanity check the boot parameter. */
-	if (prmem_metadata != prmem->metadata || prmem_size != prmem->size) {
+	if (prmem_metadata != prmem->metadata || prmem_size != prmem->size ||
+	    prmem_max_size != prmem->max_size) {
 		pr_warn("%s: Boot parameter mismatch\n", __func__);
 		return false;
 	}
diff --git a/kernel/prmem/prmem_parse.c b/kernel/prmem/prmem_parse.c
index 6c1a23c6b84e..3a57b37fa191 100644
--- a/kernel/prmem/prmem_parse.c
+++ b/kernel/prmem/prmem_parse.c
@@ -8,9 +8,11 @@
 #include <linux/prmem.h>
 
 /*
- * Syntax: prmem=size[KMG]
+ * Syntax: prmem=size[KMG][,max_size[KMG]]
  *
  *	Specifies the size of the initial memory to be allocated to prmem.
+ *	Optionally, specifies the maximum amount of memory to be allocated to
+ *	prmem. prmem will expand dynamically between size and max_size.
  */
 static int __init prmem_size_parse(char *cmdline)
 {
@@ -28,6 +30,22 @@ static int __init prmem_size_parse(char *cmdline)
 	}
 
 	prmem_size = size;
+	prmem_max_size = size;
+
+	cur = tmp;
+	if (*cur++ == ',') {
+		/* Get max size. */
+		size = memparse(cur, &tmp);
+		if (cur == tmp || !size || size & (PAGE_SIZE - 1) ||
+		    size <= prmem_size) {
+			prmem_size = 0;
+			prmem_max_size = 0;
+			pr_warn("%s: Incorrect max size %lx\n", __func__, size);
+			return -EINVAL;
+		}
+		prmem_max_size = size;
+	}
+
 	return 0;
 }
 early_param("prmem", prmem_size_parse);
diff --git a/kernel/prmem/prmem_region.c b/kernel/prmem/prmem_region.c
index 6dc88c74d9c8..390329a34b74 100644
--- a/kernel/prmem/prmem_region.c
+++ b/kernel/prmem/prmem_region.c
@@ -82,5 +82,6 @@ struct prmem_region *prmem_add_region(unsigned long pa, size_t size)
 		return NULL;
 
 	list_add_tail(&region->node, &prmem->regions);
+	prmem->cur_size += size;
 	return region;
 }
diff --git a/kernel/prmem/prmem_reserve.c b/kernel/prmem/prmem_reserve.c
index 8000fff05402..c5ae5d7d8f0a 100644
--- a/kernel/prmem/prmem_reserve.c
+++ b/kernel/prmem/prmem_reserve.c
@@ -11,6 +11,7 @@ struct prmem		*prmem;
 unsigned long		prmem_metadata;
 unsigned long		prmem_pa;
 unsigned long		prmem_size;
+unsigned long		prmem_max_size;
 
 void __init prmem_reserve_early(void)
 {
-- 
2.25.1


  parent reply	other threads:[~2023-10-16 23:33 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1b1bc25eb87355b91fcde1de7c2f93f38abb2bf9>
2023-10-16 23:32 ` [RFC PATCH v1 00/10] mm/prmem: Implement the Persistent-Across-Kexec memory feature (prmem) madvenka
2023-10-16 23:32   ` [RFC PATCH v1 01/10] mm/prmem: Allocate memory during boot for storing persistent data madvenka
2023-10-16 23:32   ` [RFC PATCH v1 02/10] mm/prmem: Reserve metadata and persistent regions in early boot after kexec madvenka
2023-10-16 23:32   ` [RFC PATCH v1 03/10] mm/prmem: Manage persistent memory with the gen pool allocator madvenka
2023-10-16 23:32   ` [RFC PATCH v1 04/10] mm/prmem: Implement a page allocator for persistent memory madvenka
2023-10-16 23:32   ` [RFC PATCH v1 05/10] mm/prmem: Implement a buffer " madvenka
2023-10-16 23:32   ` [RFC PATCH v1 06/10] mm/prmem: Implement persistent XArray (and Radix Tree) madvenka
2023-10-16 23:32   ` [RFC PATCH v1 07/10] mm/prmem: Implement named Persistent Instances madvenka
2023-10-16 23:32   ` [RFC PATCH v1 08/10] mm/prmem: Implement Persistent Ramdisk instances madvenka
2023-10-16 23:32   ` [RFC PATCH v1 09/10] mm/prmem: Implement DAX support for Persistent Ramdisks madvenka
2023-10-16 23:32   ` madvenka [this message]
2023-10-17  8:31   ` [RFC PATCH v1 00/10] mm/prmem: Implement the Persistent-Across-Kexec memory feature (prmem) Alexander Graf
2023-10-17 18:08     ` Madhavan T. Venkataraman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231016233215.13090-11-madvenka@linux.microsoft.com \
    --to=madvenka@linux.microsoft.com \
    --cc=anthony.yznaga@oracle.com \
    --cc=arnd@arndb.de \
    --cc=graf@amazon.de \
    --cc=gregkh@linuxfoundation.org \
    --cc=jamorris@linux.microsoft.com \
    --cc=jgowans@amazon.com \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=pbonzini@redhat.com \
    --cc=rppt@kernel.org \
    --cc=stanislav.kinsburskii@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox