All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org,
	mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org,
	npiggin@gmail.com, christophe.leroy@csgroup.eu
Cc: Michal Hocko <mhocko@suse.com>, Dave Jiang <dave.jiang@intel.com>,
	"Rafael J. Wysocki" <rafael@kernel.org>,
	Vishal Verma <vishal.l.verma@intel.com>,
	David Hildenbrand <david@redhat.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>,
	Huang Ying <ying.huang@intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Oscar Salvador <osalvador@suse.de>, Len Brown <lenb@kernel.org>
Subject: [PATCH v3 6/7] dax/kmem: Always enroll hotplugged memory for memmap_on_memory
Date: Tue, 11 Jul 2023 10:18:32 +0530	[thread overview]
Message-ID: <20230711044834.72809-7-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20230711044834.72809-1-aneesh.kumar@linux.ibm.com>

From: Vishal Verma <vishal.l.verma@intel.com>

With DAX memory regions originating from CXL memory expanders or
NVDIMMs, the kmem driver may be hot-adding huge amounts of system memory
on a system without enough 'regular' main memory to support the memmap
for it. To avoid this, ensure that all kmem managed hotplugged memory is
added with the MHP_MEMMAP_ON_MEMORY flag to place the memmap on the
new memory region being hot added.

To do this, call add_memory() in chunks of memory_block_size_bytes() as
that is a requirement for memmap_on_memory.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/dax/kmem.c | 81 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 59 insertions(+), 22 deletions(-)

diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index 898ca9505754..840bf7b40a44 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/memory-tiers.h>
+#include <linux/memory_hotplug.h>
 #include "dax-private.h"
 #include "bus.h"
 
@@ -105,6 +106,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 	data->mgid = rc;
 
 	for (i = 0; i < dev_dax->nr_range; i++) {
+		u64 cur_start, cur_len, remaining;
 		struct resource *res;
 		struct range range;
 
@@ -137,21 +139,42 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 		res->flags = IORESOURCE_SYSTEM_RAM;
 
 		/*
-		 * Ensure that future kexec'd kernels will not treat
-		 * this as RAM automatically.
+		 * Add memory in chunks of memory_block_size_bytes() so that
+		 * it is considered for MHP_MEMMAP_ON_MEMORY
+		 * @range has already been aligned to memory_block_size_bytes(),
+		 * so the following loop will always break it down cleanly.
 		 */
-		rc = add_memory_driver_managed(data->mgid, range.start,
-				range_len(&range), kmem_name, MHP_NID_IS_MGID);
-
-		if (rc) {
-			dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
-					i, range.start, range.end);
-			remove_resource(res);
-			kfree(res);
-			data->res[i] = NULL;
-			if (mapped)
-				continue;
-			goto err_request_mem;
+		cur_start = range.start;
+		cur_len = memory_block_size_bytes();
+		remaining = range_len(&range);
+		while (remaining) {
+			/*
+			 * If alignment rules are not satisified we will
+			 * fallback normal memmap allocation.
+			 */
+			mhp_t mhp_flags = MHP_NID_IS_MGID | MHP_MEMMAP_ON_MEMORY;
+			/*
+			 * Ensure that future kexec'd kernels will not treat
+			 * this as RAM automatically.
+			 */
+			rc = add_memory_driver_managed(data->mgid, cur_start,
+						       cur_len, kmem_name,
+						       mhp_flags);
+
+			if (rc) {
+				dev_warn(dev,
+					 "mapping%d: %#llx-%#llx memory add failed\n",
+					 i, cur_start, cur_start + cur_len - 1);
+				remove_resource(res);
+				kfree(res);
+				data->res[i] = NULL;
+				if (mapped)
+					continue;
+				goto err_request_mem;
+			}
+
+			cur_start += cur_len;
+			remaining -= cur_len;
 		}
 		mapped++;
 	}
@@ -186,25 +209,39 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 	 * unbind will succeed even if we return failure.
 	 */
 	for (i = 0; i < dev_dax->nr_range; i++) {
+
+		u64 cur_start, cur_len, remaining;
 		struct range range;
+		bool resource_remove;
 		int rc;
 
 		rc = dax_kmem_range(dev_dax, i, &range);
 		if (rc)
 			continue;
 
-		rc = remove_memory(range.start, range_len(&range));
-		if (rc == 0) {
+		resource_remove = true;
+		cur_start = range.start;
+		cur_len = memory_block_size_bytes();
+		remaining = range_len(&range);
+		while (remaining) {
+
+			rc = remove_memory(cur_start, cur_len);
+			if (rc) {
+				resource_remove = false;
+				dev_err(dev,
+					"mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
+					i, cur_start, cur_len);
+			}
+			cur_start += cur_len;
+			remaining -= cur_len;
+		}
+		if (resource_remove) {
 			remove_resource(data->res[i]);
 			kfree(data->res[i]);
 			data->res[i] = NULL;
 			success++;
-			continue;
-		}
-		any_hotremove_failed = true;
-		dev_err(dev,
-			"mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
-				i, range.start, range.end);
+		} else
+			any_hotremove_failed = true;
 	}
 
 	if (success >= dev_dax->nr_range) {
-- 
2.41.0


WARNING: multiple messages have this Message-ID (diff)
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org,
	mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org,
	npiggin@gmail.com, christophe.leroy@csgroup.eu
Cc: Oscar Salvador <osalvador@suse.de>,
	David Hildenbrand <david@redhat.com>,
	Michal Hocko <mhocko@suse.com>,
	Vishal Verma <vishal.l.verma@intel.com>,
	"Rafael J. Wysocki" <rafael@kernel.org>,
	Len Brown <lenb@kernel.org>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Jiang <dave.jiang@intel.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Huang Ying <ying.huang@intel.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Subject: [PATCH v3 6/7] dax/kmem: Always enroll hotplugged memory for memmap_on_memory
Date: Tue, 11 Jul 2023 10:18:32 +0530	[thread overview]
Message-ID: <20230711044834.72809-7-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20230711044834.72809-1-aneesh.kumar@linux.ibm.com>

From: Vishal Verma <vishal.l.verma@intel.com>

With DAX memory regions originating from CXL memory expanders or
NVDIMMs, the kmem driver may be hot-adding huge amounts of system memory
on a system without enough 'regular' main memory to support the memmap
for it. To avoid this, ensure that all kmem managed hotplugged memory is
added with the MHP_MEMMAP_ON_MEMORY flag to place the memmap on the
new memory region being hot added.

To do this, call add_memory() in chunks of memory_block_size_bytes() as
that is a requirement for memmap_on_memory.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/dax/kmem.c | 81 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 59 insertions(+), 22 deletions(-)

diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index 898ca9505754..840bf7b40a44 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/memory-tiers.h>
+#include <linux/memory_hotplug.h>
 #include "dax-private.h"
 #include "bus.h"
 
@@ -105,6 +106,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 	data->mgid = rc;
 
 	for (i = 0; i < dev_dax->nr_range; i++) {
+		u64 cur_start, cur_len, remaining;
 		struct resource *res;
 		struct range range;
 
@@ -137,21 +139,42 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 		res->flags = IORESOURCE_SYSTEM_RAM;
 
 		/*
-		 * Ensure that future kexec'd kernels will not treat
-		 * this as RAM automatically.
+		 * Add memory in chunks of memory_block_size_bytes() so that
+		 * it is considered for MHP_MEMMAP_ON_MEMORY
+		 * @range has already been aligned to memory_block_size_bytes(),
+		 * so the following loop will always break it down cleanly.
 		 */
-		rc = add_memory_driver_managed(data->mgid, range.start,
-				range_len(&range), kmem_name, MHP_NID_IS_MGID);
-
-		if (rc) {
-			dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
-					i, range.start, range.end);
-			remove_resource(res);
-			kfree(res);
-			data->res[i] = NULL;
-			if (mapped)
-				continue;
-			goto err_request_mem;
+		cur_start = range.start;
+		cur_len = memory_block_size_bytes();
+		remaining = range_len(&range);
+		while (remaining) {
+			/*
+			 * If alignment rules are not satisified we will
+			 * fallback normal memmap allocation.
+			 */
+			mhp_t mhp_flags = MHP_NID_IS_MGID | MHP_MEMMAP_ON_MEMORY;
+			/*
+			 * Ensure that future kexec'd kernels will not treat
+			 * this as RAM automatically.
+			 */
+			rc = add_memory_driver_managed(data->mgid, cur_start,
+						       cur_len, kmem_name,
+						       mhp_flags);
+
+			if (rc) {
+				dev_warn(dev,
+					 "mapping%d: %#llx-%#llx memory add failed\n",
+					 i, cur_start, cur_start + cur_len - 1);
+				remove_resource(res);
+				kfree(res);
+				data->res[i] = NULL;
+				if (mapped)
+					continue;
+				goto err_request_mem;
+			}
+
+			cur_start += cur_len;
+			remaining -= cur_len;
 		}
 		mapped++;
 	}
@@ -186,25 +209,39 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 	 * unbind will succeed even if we return failure.
 	 */
 	for (i = 0; i < dev_dax->nr_range; i++) {
+
+		u64 cur_start, cur_len, remaining;
 		struct range range;
+		bool resource_remove;
 		int rc;
 
 		rc = dax_kmem_range(dev_dax, i, &range);
 		if (rc)
 			continue;
 
-		rc = remove_memory(range.start, range_len(&range));
-		if (rc == 0) {
+		resource_remove = true;
+		cur_start = range.start;
+		cur_len = memory_block_size_bytes();
+		remaining = range_len(&range);
+		while (remaining) {
+
+			rc = remove_memory(cur_start, cur_len);
+			if (rc) {
+				resource_remove = false;
+				dev_err(dev,
+					"mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
+					i, cur_start, cur_len);
+			}
+			cur_start += cur_len;
+			remaining -= cur_len;
+		}
+		if (resource_remove) {
 			remove_resource(data->res[i]);
 			kfree(data->res[i]);
 			data->res[i] = NULL;
 			success++;
-			continue;
-		}
-		any_hotremove_failed = true;
-		dev_err(dev,
-			"mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
-				i, range.start, range.end);
+		} else
+			any_hotremove_failed = true;
 	}
 
 	if (success >= dev_dax->nr_range) {
-- 
2.41.0



  parent reply	other threads:[~2023-07-11  4:56 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-11  4:48 [PATCH v3 0/7] Add support for memmap on memory feature on ppc64 Aneesh Kumar K.V
2023-07-11  4:48 ` Aneesh Kumar K.V
2023-07-11  4:48 ` [PATCH v3 1/7] mm/hotplug: Simplify ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE kconfig Aneesh Kumar K.V
2023-07-11  4:48   ` Aneesh Kumar K.V
2023-07-11  4:48 ` [PATCH v3 2/7] mm/hotplug: Allow memmap on memory hotplug request to fallback Aneesh Kumar K.V
2023-07-11  4:48   ` Aneesh Kumar K.V
2023-07-11 10:23   ` David Hildenbrand
2023-07-11 10:23     ` David Hildenbrand
2023-07-11 15:58     ` Aneesh Kumar K V
2023-07-11 15:58       ` Aneesh Kumar K V
2023-07-11  4:48 ` [PATCH v3 3/7] mm/hotplug: Allow architecture to override memmap on memory support check Aneesh Kumar K.V
2023-07-11  4:48   ` Aneesh Kumar K.V
2023-07-11 10:36   ` David Hildenbrand
2023-07-11 10:36     ` David Hildenbrand
2023-07-11 16:07     ` Aneesh Kumar K V
2023-07-11 16:07       ` Aneesh Kumar K V
2023-07-11 16:09       ` David Hildenbrand
2023-07-11 16:09         ` David Hildenbrand
2023-07-12 20:07         ` John Hubbard
2023-07-12 20:07           ` John Hubbard
2023-07-13  9:08           ` David Hildenbrand
2023-07-13  9:08             ` David Hildenbrand
2023-07-14 23:14             ` John Hubbard
2023-07-14 23:14               ` John Hubbard
2023-07-11  4:48 ` [PATCH v3 4/7] mm/hotplug: Allow pageblock alignment via altmap reservation Aneesh Kumar K.V
2023-07-11  4:48   ` Aneesh Kumar K.V
2023-07-11  6:21   ` Huang, Ying
2023-07-11  6:21     ` Huang, Ying
2023-07-11  8:20     ` Aneesh Kumar K V
2023-07-11  8:20       ` Aneesh Kumar K V
2023-07-11 17:19   ` David Hildenbrand
2023-07-11 17:19     ` David Hildenbrand
2023-07-12  3:16     ` Aneesh Kumar K V
2023-07-12  3:16       ` Aneesh Kumar K V
2023-07-12  7:22       ` David Hildenbrand
2023-07-12  7:22         ` David Hildenbrand
2023-07-12 13:50         ` Aneesh Kumar K.V
2023-07-12 13:50           ` Aneesh Kumar K.V
2023-07-12 19:06           ` David Hildenbrand
2023-07-12 19:06             ` David Hildenbrand
2023-07-11  4:48 ` [PATCH v3 5/7] powerpc/book3s64/memhotplug: Enable memmap on memory for radix Aneesh Kumar K.V
2023-07-11  4:48   ` Aneesh Kumar K.V
2023-07-11 15:26   ` David Hildenbrand
2023-07-11 15:26     ` David Hildenbrand
2023-07-11 15:40     ` Aneesh Kumar K V
2023-07-11 15:40       ` Aneesh Kumar K V
2023-07-11 15:44       ` David Hildenbrand
2023-07-11 15:44         ` David Hildenbrand
2023-07-11 15:46         ` Aneesh Kumar K V
2023-07-11 15:46           ` Aneesh Kumar K V
2023-07-11  4:48 ` Aneesh Kumar K.V [this message]
2023-07-11  4:48   ` [PATCH v3 6/7] dax/kmem: Always enroll hotplugged memory for memmap_on_memory Aneesh Kumar K.V
2023-07-11 10:21   ` David Hildenbrand
2023-07-11 10:21     ` David Hildenbrand
2023-07-11  4:48 ` [PATCH v3 7/7] mm/hotplug: Embed vmem_altmap details in memory block Aneesh Kumar K.V
2023-07-11  4:48   ` Aneesh Kumar K.V

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230711044834.72809-7-aneesh.kumar@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=christophe.leroy@csgroup.eu \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dave.jiang@intel.com \
    --cc=david@redhat.com \
    --cc=lenb@kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mhocko@suse.com \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=osalvador@suse.de \
    --cc=rafael@kernel.org \
    --cc=vishal.l.verma@intel.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.