linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing
@ 2023-07-28 10:35 Aneesh Kumar K.V
  2023-07-28 10:35 ` [PATCH v3 2/2] powerpc/mm: Add memory_block_size as a kernel parameter Aneesh Kumar K.V
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2023-07-28 10:35 UTC (permalink / raw)
  To: linuxppc-dev, mpe, npiggin, christophe.leroy
  Cc: Aneesh Kumar K.V, foraker1, Reza Arbab

Parse the device tree in early init to find the memory block size to be
used by the kernel. Consolidate the memory block size device tree parsing
to one helper and use that on both powernv and pseries. We still want to
use machine-specific callback because on all machine types other than
powernv and pseries we continue to return MIN_MEMORY_BLOCK_SIZE.

pseries_memory_block_size used to look for the second memory
block (memory@x) to determine the memory_block_size value. This patch
changed that to look at all memory blocks and make sure we can map them all
correctly using the computed memory block size value.

Add workaround to force 256MB memory block size if device driver managed
memory such as GPU memory is present. This helps to add GPU memory
that is not aligned to 1G.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
Changes from v2:
* Add workaround for forcing 256MB memory blocksize with GPU

 arch/powerpc/include/asm/book3s/64/mmu.h      |   5 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c      |  65 +---------
 arch/powerpc/mm/init_64.c                     | 115 ++++++++++++++++++
 arch/powerpc/platforms/powernv/setup.c        |  10 +-
 .../platforms/pseries/hotplug-memory.c        |  60 +--------
 arch/powerpc/platforms/pseries/pseries.h      |   2 -
 arch/powerpc/platforms/pseries/setup.c        |   7 ++
 7 files changed, 129 insertions(+), 135 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 570a4960cf17..28033fd5403c 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -71,10 +71,7 @@ extern unsigned int mmu_pid_bits;
 /* Base PID to allocate from */
 extern unsigned int mmu_base_pid;
 
-/*
- * memory block size used with radix translation.
- */
-extern unsigned long __ro_after_init radix_mem_block_size;
+extern unsigned long __ro_after_init memory_block_size;
 
 #define PRTB_SIZE_SHIFT	(mmu_pid_bits + 4)
 #define PRTB_ENTRIES	(1ul << mmu_pid_bits)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index e7ea492ac510..b5102491b50f 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -37,7 +37,6 @@
 #include <mm/mmu_decl.h>
 
 unsigned int mmu_base_pid;
-unsigned long radix_mem_block_size __ro_after_init;
 
 static __ref void *early_alloc_pgtable(unsigned long size, int nid,
 			unsigned long region_start, unsigned long region_end)
@@ -300,7 +299,7 @@ static int __meminit create_physical_mapping(unsigned long start,
 	bool prev_exec, exec = false;
 	pgprot_t prot;
 	int psize;
-	unsigned long max_mapping_size = radix_mem_block_size;
+	unsigned long max_mapping_size = memory_block_size;
 
 	if (debug_pagealloc_enabled_or_kfence())
 		max_mapping_size = PAGE_SIZE;
@@ -502,58 +501,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
 	return 1;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int __init probe_memory_block_size(unsigned long node, const char *uname, int
-					  depth, void *data)
-{
-	unsigned long *mem_block_size = (unsigned long *)data;
-	const __be32 *prop;
-	int len;
-
-	if (depth != 1)
-		return 0;
-
-	if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
-		return 0;
-
-	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
-
-	if (!prop || len < dt_root_size_cells * sizeof(__be32))
-		/*
-		 * Nothing in the device tree
-		 */
-		*mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-	else
-		*mem_block_size = of_read_number(prop, dt_root_size_cells);
-	return 1;
-}
-
-static unsigned long __init radix_memory_block_size(void)
-{
-	unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-
-	/*
-	 * OPAL firmware feature is set by now. Hence we are ok
-	 * to test OPAL feature.
-	 */
-	if (firmware_has_feature(FW_FEATURE_OPAL))
-		mem_block_size = 1UL * 1024 * 1024 * 1024;
-	else
-		of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
-
-	return mem_block_size;
-}
-
-#else   /* CONFIG_MEMORY_HOTPLUG */
-
-static unsigned long __init radix_memory_block_size(void)
-{
-	return 1UL * 1024 * 1024 * 1024;
-}
-
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
-
 void __init radix__early_init_devtree(void)
 {
 	int rc;
@@ -577,16 +524,6 @@ void __init radix__early_init_devtree(void)
 		mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
 			psize_to_rpti_pgsize(MMU_PAGE_64K);
 	}
-
-	/*
-	 * Max mapping size used when mapping pages. We don't use
-	 * ppc_md.memory_block_size() here because this get called
-	 * early and we don't have machine probe called yet. Also
-	 * the pseries implementation only check for ibm,lmb-size.
-	 * All hypervisor supporting radix do expose that device
-	 * tree node.
-	 */
-	radix_mem_block_size = radix_memory_block_size();
 	return;
 }
 
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fe1b83020e0d..29178b3aafe6 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -40,6 +40,7 @@
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
 #include <linux/memremap.h>
+#include <linux/memory.h>
 
 #include <asm/pgalloc.h>
 #include <asm/page.h>
@@ -471,6 +472,118 @@ static int __init dt_scan_mmu_pid_width(unsigned long node,
 	return 1;
 }
 
+static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size)
+{
+	unsigned long section_size = 1UL << SECTION_SIZE_BITS;
+
+	for (; *block_size > section_size; *block_size >>= 2) {
+
+		if ((mem_size & *block_size) == 0)
+			break;
+	}
+}
+
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+					  depth, void *data)
+{
+	const char *type;
+	const char *compatible;
+	unsigned long *block_size = (unsigned long *)data;
+	const __be32 *reg, *endp;
+	int l;
+
+	if (depth != 1)
+		return 0;
+	/*
+	 * If we have dynamic-reconfiguration-memory node, use the
+	 * lmb value.
+	 */
+	if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+
+		const __be32 *prop;
+
+		prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+
+		if (!prop || l < dt_root_size_cells * sizeof(__be32))
+			/*
+			 * Nothing in the device tree
+			 */
+			*block_size = MIN_MEMORY_BLOCK_SIZE;
+		else
+			*block_size = of_read_number(prop, dt_root_size_cells);
+		/*
+		 * We have found the final value. Don't probe further.
+		 */
+		return 1;
+	}
+	/*
+	 * Find all the device tree nodes of memory type and make sure
+	 * the area can be mapped using the memory block size value
+	 * we end up using. We start with 1G value and keep reducing
+	 * it such that we can map the entire area using memory_block_size.
+	 * This will be used on powernv and older pseries that don't
+	 * have ibm,lmb-size node.
+	 * For ex: with P5 we can end up with
+	 * memory@0 -> 128MB
+	 * memory@128M -> 64M
+	 * This will end up using 64MB  memory block size value.
+	 */
+	type = of_get_flat_dt_prop(node, "device_type", NULL);
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	/*
+	 * "ibm,coherent-device-memory with linux,usable-memory = 0
+	 * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+	 * linux,usable-memory == 0 implies driver managed memory and
+	 * we can't use large memory block size due to hotplug/unplug
+	 * limitations.
+	 */
+	compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+	if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+		int len = 0;
+		const __be32 *usm;
+
+		usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
+		if (usm && !len) {
+			*block_size = SZ_256M;
+			return 1;
+		}
+	}
+
+	reg = of_get_flat_dt_prop(node, "reg", &l);
+	endp = reg + (l / sizeof(__be32));
+
+	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+		u64 base, size;
+
+		base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+		size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+		if (size == 0)
+			continue;
+
+		update_memory_block_size(block_size, size);
+	}
+	/* continue looking for other memory device types */
+	return 0;
+}
+
+/*
+ * start with 1G memory block size. Early init will
+ * fix this with correct value.
+ */
+unsigned long memory_block_size __ro_after_init = 1UL << 30;
+static void __init early_init_memory_block_size(void)
+{
+	/*
+	 * We need to do memory_block_size probe early so that
+	 * radix__early_init_mmu() can use this as limit for
+	 * mapping page size.
+	 */
+	of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
+}
+
 void __init mmu_early_init_devtree(void)
 {
 	bool hvmode = !!(mfmsr() & MSR_HV);
@@ -504,6 +617,8 @@ void __init mmu_early_init_devtree(void)
 	if (!hvmode)
 		early_check_vec5();
 
+	early_init_memory_block_size();
+
 	if (early_radix_enabled()) {
 		radix__early_init_devtree();
 
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 5e9c6b55809f..4dbb47ddbdcc 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -482,15 +482,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 #ifdef CONFIG_MEMORY_HOTPLUG
 static unsigned long pnv_memory_block_size(void)
 {
-	/*
-	 * We map the kernel linear region with 1GB large pages on radix. For
-	 * memory hot unplug to work our memory block size must be at least
-	 * this size.
-	 */
-	if (radix_enabled())
-		return radix_mem_block_size;
-	else
-		return 256UL * 1024 * 1024;
+	return memory_block_size;
 }
 #endif
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 9c62c2c3b3d0..1333d9ab7621 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -21,54 +21,6 @@
 #include <asm/drmem.h>
 #include "pseries.h"
 
-unsigned long pseries_memory_block_size(void)
-{
-	struct device_node *np;
-	u64 memblock_size = MIN_MEMORY_BLOCK_SIZE;
-	struct resource r;
-
-	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-	if (np) {
-		int len;
-		int size_cells;
-		const __be32 *prop;
-
-		size_cells = of_n_size_cells(np);
-
-		prop = of_get_property(np, "ibm,lmb-size", &len);
-		if (prop && len >= size_cells * sizeof(__be32))
-			memblock_size = of_read_number(prop, size_cells);
-		of_node_put(np);
-
-	} else  if (machine_is(pseries)) {
-		/* This fallback really only applies to pseries */
-		unsigned int memzero_size = 0;
-
-		np = of_find_node_by_path("/memory@0");
-		if (np) {
-			if (!of_address_to_resource(np, 0, &r))
-				memzero_size = resource_size(&r);
-			of_node_put(np);
-		}
-
-		if (memzero_size) {
-			/* We now know the size of memory@0, use this to find
-			 * the first memoryblock and get its size.
-			 */
-			char buf[64];
-
-			sprintf(buf, "/memory@%x", memzero_size);
-			np = of_find_node_by_path(buf);
-			if (np) {
-				if (!of_address_to_resource(np, 0, &r))
-					memblock_size = resource_size(&r);
-				of_node_put(np);
-			}
-		}
-	}
-	return memblock_size;
-}
-
 static void dlpar_free_property(struct property *prop)
 {
 	kfree(prop->name);
@@ -283,7 +235,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb)
 
 static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
 {
-	unsigned long block_sz, start_pfn;
+	unsigned long start_pfn;
 	int sections_per_block;
 	int i;
 
@@ -294,8 +246,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si
 	if (!pfn_valid(start_pfn))
 		goto out;
 
-	block_sz = pseries_memory_block_size();
-	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+	sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE;
 
 	for (i = 0; i < sections_per_block; i++) {
 		__remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
@@ -354,7 +305,6 @@ static int dlpar_add_lmb(struct drmem_lmb *);
 static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
 	struct memory_block *mem_block;
-	unsigned long block_sz;
 	int rc;
 
 	if (!lmb_is_removable(lmb))
@@ -370,13 +320,11 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 		return rc;
 	}
 
-	block_sz = pseries_memory_block_size();
-
-	__remove_memory(lmb->base_addr, block_sz);
+	__remove_memory(lmb->base_addr, memory_block_size);
 	put_device(&mem_block->dev);
 
 	/* Update memory regions for memory remove */
-	memblock_remove(lmb->base_addr, block_sz);
+	memblock_remove(lmb->base_addr, memory_block_size);
 
 	invalidate_lmb_associativity_index(lmb);
 	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index f8bce40ebd0c..02c012976b19 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -90,8 +90,6 @@ extern struct pci_controller_ops pseries_pci_controller_ops;
 int pseries_msi_allocate_domains(struct pci_controller *phb);
 void pseries_msi_free_domains(struct pci_controller *phb);
 
-unsigned long pseries_memory_block_size(void);
-
 extern int CMO_PrPSP;
 extern int CMO_SecPSP;
 extern unsigned long CMO_PageSize;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e2a57cfa6c83..6683debe283e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -1116,6 +1116,13 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
 	return PCI_PROBE_NORMAL;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long pseries_memory_block_size(void)
+{
+	return memory_block_size;
+}
+#endif
+
 struct pci_controller_ops pseries_pci_controller_ops = {
 	.probe_mode		= pSeries_pci_probe_mode,
 #ifdef CONFIG_SPAPR_TCE_IOMMU
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v3 2/2] powerpc/mm: Add memory_block_size as a kernel parameter
  2023-07-28 10:35 [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing Aneesh Kumar K.V
@ 2023-07-28 10:35 ` Aneesh Kumar K.V
  2023-07-28 21:55 ` [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing Reza Arbab
  2023-07-29  9:55 ` kernel test robot
  2 siblings, 0 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2023-07-28 10:35 UTC (permalink / raw)
  To: linuxppc-dev, mpe, npiggin, christophe.leroy
  Cc: Aneesh Kumar K.V, foraker1, Reza Arbab

Certain devices can possess non-standard memory capacities, not constrained
to multiples of 1GB. Provide a kernel parameter so that we can map the
device memory completely on memory hotplug.

Restrict memory_block_size value to a power of 2 value similar to LMB size.
The memory block size should also be more than the section size.

Reviewed-by: Reza Arbab <arbab@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 .../admin-guide/kernel-parameters.txt         |  3 +++
 arch/powerpc/kernel/setup_64.c                | 23 +++++++++++++++++++
 arch/powerpc/mm/init_64.c                     | 17 ++++++++++----
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..4e49696e0976 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3201,6 +3201,9 @@
 			Note that even when enabled, there are a few cases where
 			the feature is not effective.
 
+	memory_block_size=size [PPC]
+			 Use this parameter to configure the memory block size value.
+
 	memtest=	[KNL,X86,ARM,M68K,PPC,RISCV] Enable memtest
 			Format: <integer>
 			default : 0 <disable>
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 246201d0d879..cbdb924462c7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -892,6 +892,29 @@ unsigned long memory_block_size_bytes(void)
 
 	return MIN_MEMORY_BLOCK_SIZE;
 }
+
+/*
+ * Restrict to a power of 2 value for memblock which is larger than
+ * section size
+ */
+static int __init parse_mem_block_size(char *ptr)
+{
+	unsigned int order;
+	unsigned long size = memparse(ptr, NULL);
+
+	order = fls64(size);
+	if (!order)
+		return 0;
+
+	order--;
+	if (order < SECTION_SIZE_BITS)
+		return 0;
+
+	memory_block_size = 1UL << order;
+
+	return 0;
+}
+early_param("memory_block_size", parse_mem_block_size);
 #endif
 
 #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 29178b3aafe6..dbed37d6cffb 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -569,13 +569,20 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
 	return 0;
 }
 
-/*
- * start with 1G memory block size. Early init will
- * fix this with correct value.
- */
-unsigned long memory_block_size __ro_after_init = 1UL << 30;
+unsigned long memory_block_size __ro_after_init;
 static void __init early_init_memory_block_size(void)
 {
+	/*
+	 * if it is set via early param just return.
+	 */
+	if (memory_block_size)
+		return;
+
+	/*
+	 * start with 1G memory block size. update_memory_block_size()
+	 * will derive the right value based on device tree details.
+	 */
+	memory_block_size = SZ_1G;
 	/*
 	 * We need to do memory_block_size probe early so that
 	 * radix__early_init_mmu() can use this as limit for
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing
  2023-07-28 10:35 [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing Aneesh Kumar K.V
  2023-07-28 10:35 ` [PATCH v3 2/2] powerpc/mm: Add memory_block_size as a kernel parameter Aneesh Kumar K.V
@ 2023-07-28 21:55 ` Reza Arbab
  2023-07-29 15:28   ` Aneesh Kumar K V
  2023-07-29  9:55 ` kernel test robot
  2 siblings, 1 reply; 6+ messages in thread
From: Reza Arbab @ 2023-07-28 21:55 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: foraker1, linuxppc-dev, npiggin

On Fri, Jul 28, 2023 at 04:05:55PM +0530, Aneesh Kumar K.V wrote:
>--- a/arch/powerpc/mm/init_64.c
>+++ b/arch/powerpc/mm/init_64.c
[snip]
>+	/*
>+	 * "ibm,coherent-device-memory with linux,usable-memory = 0
>+	 * Force 256MiB block size. Work around for GPUs on P9 PowerNV
>+	 * linux,usable-memory == 0 implies driver managed memory and
>+	 * we can't use large memory block size due to hotplug/unplug
>+	 * limitations.
>+	 */
>+	compatible = of_get_flat_dt_prop(node, "compatible", NULL);
>+	if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
>+		int len = 0;
>+		const __be32 *usm;
>+
>+		usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);

I think this should be "linux,usable-memory".

>+		if (usm && !len) {
>+			*block_size = SZ_256M;
>+			return 1;
>+		}

This isn't quite right. The criteria is not that the property itself has 
no registers, it's that the base/size combo has size zero.

If you fold in the patch appended to the end of this mail, things worked 
for me.

>+	}
>+
>+	reg = of_get_flat_dt_prop(node, "reg", &l);
>+	endp = reg + (l / sizeof(__be32));
>+
>+	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
>+		u64 base, size;
>+
>+		base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>+		size = dt_mem_next_cell(dt_root_size_cells, &reg);
>+
>+		if (size == 0)
>+			continue;
>+
>+		update_memory_block_size(block_size, size);
>+	}
>+	/* continue looking for other memory device types */
>+	return 0;
>+}
>+
>+/*
>+ * start with 1G memory block size. Early init will
>+ * fix this with correct value.
>+ */
>+unsigned long memory_block_size __ro_after_init = 1UL << 30;

Could use SZ_1G here.

With the following fixup, I got 256MiB blocks on a system with
"ibm,coherent-device-memory" nodes.

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index dbed37d6cffb..1ac58e72a885 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -487,7 +487,6 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
  					  depth, void *data)
  {
  	const char *type;
-	const char *compatible;
  	unsigned long *block_size = (unsigned long *)data;
  	const __be32 *reg, *endp;
  	int l;
@@ -532,38 +531,38 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
  	if (type == NULL || strcmp(type, "memory") != 0)
  		return 0;
  
-	/*
-	 * "ibm,coherent-device-memory with linux,usable-memory = 0
-	 * Force 256MiB block size. Work around for GPUs on P9 PowerNV
-	 * linux,usable-memory == 0 implies driver managed memory and
-	 * we can't use large memory block size due to hotplug/unplug
-	 * limitations.
-	 */
-	compatible = of_get_flat_dt_prop(node, "compatible", NULL);
-	if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
-		int len = 0;
-		const __be32 *usm;
-
-		usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
-		if (usm && !len) {
-			*block_size = SZ_256M;
-			return 1;
-		}
-	}
+	reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+	if (!reg)
+		reg = of_get_flat_dt_prop(node, "reg", &l);
+	if (!reg)
+		return 0;
  
-	reg = of_get_flat_dt_prop(node, "reg", &l);
  	endp = reg + (l / sizeof(__be32));
  
  	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+		const char *compatible;
  		u64 base, size;
  
  		base = dt_mem_next_cell(dt_root_addr_cells, &reg);
  		size = dt_mem_next_cell(dt_root_size_cells, &reg);
  
-		if (size == 0)
+		if (size) {
+			update_memory_block_size(block_size, size);
  			continue;
+		}
  
-		update_memory_block_size(block_size, size);
+		/*
+		 * ibm,coherent-device-memory with linux,usable-memory = 0
+		 * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+		 * linux,usable-memory == 0 implies driver managed memory and
+		 * we can't use large memory block size due to hotplug/unplug
+		 * limitations.
+		 */
+		compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+		if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+			*block_size = SZ_256M;
+			return 1;
+		}
  	}
  	/* continue looking for other memory device types */
  	return 0;
-- 
Reza Arbab

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing
  2023-07-28 10:35 [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing Aneesh Kumar K.V
  2023-07-28 10:35 ` [PATCH v3 2/2] powerpc/mm: Add memory_block_size as a kernel parameter Aneesh Kumar K.V
  2023-07-28 21:55 ` [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing Reza Arbab
@ 2023-07-29  9:55 ` kernel test robot
  2 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2023-07-29  9:55 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linuxppc-dev, mpe, npiggin, christophe.leroy
  Cc: Aneesh Kumar K.V, foraker1, llvm, Reza Arbab, oe-kbuild-all

Hi Aneesh,

kernel test robot noticed the following build errors:

[auto build test ERROR on powerpc/next]
[also build test ERROR on powerpc/fixes linus/master v6.5-rc3 next-20230728]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Aneesh-Kumar-K-V/powerpc-mm-Add-memory_block_size-as-a-kernel-parameter/20230728-184256
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
patch link:    https://lore.kernel.org/r/20230728103556.745681-1-aneesh.kumar%40linux.ibm.com
patch subject: [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing
config: powerpc-powernv_defconfig (https://download.01.org/0day-ci/archive/20230729/202307291753.opRf3JsB-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce: (https://download.01.org/0day-ci/archive/20230729/202307291753.opRf3JsB-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202307291753.opRf3JsB-lkp@intel.com/

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/init_64.c:558:7: error: variable 'base' set but not used [-Werror,-Wunused-but-set-variable]
     558 |                 u64 base, size;
         |                     ^
   1 error generated.


vim +/base +558 arch/powerpc/mm/init_64.c

   485	
   486	static int __init probe_memory_block_size(unsigned long node, const char *uname, int
   487						  depth, void *data)
   488	{
   489		const char *type;
   490		const char *compatible;
   491		unsigned long *block_size = (unsigned long *)data;
   492		const __be32 *reg, *endp;
   493		int l;
   494	
   495		if (depth != 1)
   496			return 0;
   497		/*
   498		 * If we have dynamic-reconfiguration-memory node, use the
   499		 * lmb value.
   500		 */
   501		if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
   502	
   503			const __be32 *prop;
   504	
   505			prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
   506	
   507			if (!prop || l < dt_root_size_cells * sizeof(__be32))
   508				/*
   509				 * Nothing in the device tree
   510				 */
   511				*block_size = MIN_MEMORY_BLOCK_SIZE;
   512			else
   513				*block_size = of_read_number(prop, dt_root_size_cells);
   514			/*
   515			 * We have found the final value. Don't probe further.
   516			 */
   517			return 1;
   518		}
   519		/*
   520		 * Find all the device tree nodes of memory type and make sure
   521		 * the area can be mapped using the memory block size value
   522		 * we end up using. We start with 1G value and keep reducing
   523		 * it such that we can map the entire area using memory_block_size.
   524		 * This will be used on powernv and older pseries that don't
   525		 * have ibm,lmb-size node.
   526		 * For ex: with P5 we can end up with
   527		 * memory@0 -> 128MB
   528		 * memory@128M -> 64M
   529		 * This will end up using 64MB  memory block size value.
   530		 */
   531		type = of_get_flat_dt_prop(node, "device_type", NULL);
   532		if (type == NULL || strcmp(type, "memory") != 0)
   533			return 0;
   534	
   535		/*
   536		 * "ibm,coherent-device-memory with linux,usable-memory = 0
   537		 * Force 256MiB block size. Work around for GPUs on P9 PowerNV
   538		 * linux,usable-memory == 0 implies driver managed memory and
   539		 * we can't use large memory block size due to hotplug/unplug
   540		 * limitations.
   541		 */
   542		compatible = of_get_flat_dt_prop(node, "compatible", NULL);
   543		if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
   544			int len = 0;
   545			const __be32 *usm;
   546	
   547			usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
   548			if (usm && !len) {
   549				*block_size = SZ_256M;
   550				return 1;
   551			}
   552		}
   553	
   554		reg = of_get_flat_dt_prop(node, "reg", &l);
   555		endp = reg + (l / sizeof(__be32));
   556	
   557		while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
 > 558			u64 base, size;
   559	
   560			base = dt_mem_next_cell(dt_root_addr_cells, &reg);
   561			size = dt_mem_next_cell(dt_root_size_cells, &reg);
   562	
   563			if (size == 0)
   564				continue;
   565	
   566			update_memory_block_size(block_size, size);
   567		}
   568		/* continue looking for other memory device types */
   569		return 0;
   570	}
   571	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing
  2023-07-28 21:55 ` [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing Reza Arbab
@ 2023-07-29 15:28   ` Aneesh Kumar K V
  2023-07-31 13:15     ` Reza Arbab
  0 siblings, 1 reply; 6+ messages in thread
From: Aneesh Kumar K V @ 2023-07-29 15:28 UTC (permalink / raw)
  To: Reza Arbab; +Cc: foraker1, linuxppc-dev, npiggin

On 7/29/23 3:25 AM, Reza Arbab wrote:
> On Fri, Jul 28, 2023 at 04:05:55PM +0530, Aneesh Kumar K.V wrote:
>> --- a/arch/powerpc/mm/init_64.c
>> +++ b/arch/powerpc/mm/init_64.c
> [snip]
>> +    /*
>> +     * "ibm,coherent-device-memory with linux,usable-memory = 0
>> +     * Force 256MiB block size. Work around for GPUs on P9 PowerNV
>> +     * linux,usable-memory == 0 implies driver managed memory and
>> +     * we can't use large memory block size due to hotplug/unplug
>> +     * limitations.
>> +     */
>> +    compatible = of_get_flat_dt_prop(node, "compatible", NULL);
>> +    if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
>> +        int len = 0;
>> +        const __be32 *usm;
>> +
>> +        usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
> 
> I think this should be "linux,usable-memory".
> 
>> +        if (usm && !len) {
>> +            *block_size = SZ_256M;
>> +            return 1;
>> +        }
> 
> This isn't quite right. The criteria is not that the property itself has no registers, it's that the base/size combo has size zero.
> 
> If you fold in the patch appended to the end of this mail, things worked for me.
> 
>> +    }
>> +
>> +    reg = of_get_flat_dt_prop(node, "reg", &l);
>> +    endp = reg + (l / sizeof(__be32));
>> +
>> +    while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
>> +        u64 base, size;
>> +
>> +        base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>> +        size = dt_mem_next_cell(dt_root_size_cells, &reg);
>> +
>> +        if (size == 0)
>> +            continue;
>> +
>> +        update_memory_block_size(block_size, size);
>> +    }
>> +    /* continue looking for other memory device types */
>> +    return 0;
>> +}
>> +
>> +/*
>> + * start with 1G memory block size. Early init will
>> + * fix this with correct value.
>> + */
>> +unsigned long memory_block_size __ro_after_init = 1UL << 30;
> 
> Could use SZ_1G here.
> 
> With the following fixup, I got 256MiB blocks on a system with
> "ibm,coherent-device-memory" nodes.
> 
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index dbed37d6cffb..1ac58e72a885 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -487,7 +487,6 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
>                        depth, void *data)
>  {
>      const char *type;
> -    const char *compatible;
>      unsigned long *block_size = (unsigned long *)data;
>      const __be32 *reg, *endp;
>      int l;
> @@ -532,38 +531,38 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
>      if (type == NULL || strcmp(type, "memory") != 0)
>          return 0;
>  
> -    /*
> -     * "ibm,coherent-device-memory with linux,usable-memory = 0
> -     * Force 256MiB block size. Work around for GPUs on P9 PowerNV
> -     * linux,usable-memory == 0 implies driver managed memory and
> -     * we can't use large memory block size due to hotplug/unplug
> -     * limitations.
> -     */
> -    compatible = of_get_flat_dt_prop(node, "compatible", NULL);
> -    if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
> -        int len = 0;
> -        const __be32 *usm;
> -
> -        usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
> -        if (usm && !len) {
> -            *block_size = SZ_256M;
> -            return 1;
> -        }
> -    }
> +    reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
> +    if (!reg)
> +        reg = of_get_flat_dt_prop(node, "reg", &l);
> +    if (!reg)
> +        return 0;
>  
> -    reg = of_get_flat_dt_prop(node, "reg", &l);
>      endp = reg + (l / sizeof(__be32));
>  
>      while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
> +        const char *compatible;
>          u64 base, size;
>  
>          base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>          size = dt_mem_next_cell(dt_root_size_cells, &reg);
>  
> -        if (size == 0)
> +        if (size) {
> +            update_memory_block_size(block_size, size);
>              continue;
> +        }
>  
> -        update_memory_block_size(block_size, size);
> +        /*
> +         * ibm,coherent-device-memory with linux,usable-memory = 0
> +         * Force 256MiB block size. Work around for GPUs on P9 PowerNV
> +         * linux,usable-memory == 0 implies driver managed memory and
> +         * we can't use large memory block size due to hotplug/unplug
> +         * limitations.
> +         */
> +        compatible = of_get_flat_dt_prop(node, "compatible", NULL);
> +        if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
> +            *block_size = SZ_256M;
> +            return 1;
> +        }
>      }
>      /* continue looking for other memory device types */
>      return 0;

Thanks for correcting the right device tree node and testing the changes. Can I add

Co-authored-by: Reza Arbab <arbab@linux.ibm.com>

-aneesh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing
  2023-07-29 15:28   ` Aneesh Kumar K V
@ 2023-07-31 13:15     ` Reza Arbab
  0 siblings, 0 replies; 6+ messages in thread
From: Reza Arbab @ 2023-07-31 13:15 UTC (permalink / raw)
  To: Aneesh Kumar K V; +Cc: foraker1, linuxppc-dev, npiggin

On Sat, Jul 29, 2023 at 08:58:57PM +0530, Aneesh Kumar K V wrote:
>Thanks for correcting the right device tree node and testing the 
>changes. Can I add
>
>Co-authored-by: Reza Arbab <arbab@linux.ibm.com>

Sure, that's fine.

Signed-off-by: Reza Arbab <arbab@linux.ibm.com>

-- 
Reza Arbab

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-07-31 13:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-28 10:35 [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing Aneesh Kumar K.V
2023-07-28 10:35 ` [PATCH v3 2/2] powerpc/mm: Add memory_block_size as a kernel parameter Aneesh Kumar K.V
2023-07-28 21:55 ` [PATCH v3 1/2] powerpc/mm: Cleanup memory block size probing Reza Arbab
2023-07-29 15:28   ` Aneesh Kumar K V
2023-07-31 13:15     ` Reza Arbab
2023-07-29  9:55 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).