LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [v1 PATCH 1/2] Refactoring carrying over IMA measuremnet logs over Kexec.
From: kernel test robot @ 2020-06-08  2:29 UTC (permalink / raw)
  To: Prakhar Srivastava, linux-arm-kernel, linux-kernel, linuxppc-dev,
	devicetree, linux-integrity, linux-security-module
  Cc: kstewart, mark.rutland, bhsharma, kbuild-all, catalin.marinas
In-Reply-To: <20200607233323.22375-2-prsriva@linux.microsoft.com>

[-- Attachment #1: Type: text/plain, Size: 5700 bytes --]

Hi Prakhar,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on arm64/for-next/core]
[also build test WARNING on powerpc/next soc/for-next v5.7 next-20200605]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Prakhar-Srivastava/Adding-support-to-carry-IMA-measurement-logs/20200608-073805
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
config: arm64-allyesconfig (attached as .config)
compiler: aarch64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>, old ones prefixed by <<):

>> security/integrity/ima/ima_kexec.c:59:5: warning: no previous prototype for 'ima_get_kexec_buffer' [-Wmissing-prototypes]
59 | int ima_get_kexec_buffer(void **addr, size_t *size)
|     ^~~~~~~~~~~~~~~~~~~~
>> security/integrity/ima/ima_kexec.c:85:5: warning: no previous prototype for 'delete_fdt_mem_rsv' [-Wmissing-prototypes]
85 | int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
|     ^~~~~~~~~~~~~~~~~~
>> security/integrity/ima/ima_kexec.c:115:5: warning: no previous prototype for 'ima_free_kexec_buffer' [-Wmissing-prototypes]
115 | int ima_free_kexec_buffer(void)
|     ^~~~~~~~~~~~~~~~~~~~~
>> security/integrity/ima/ima_kexec.c:144:6: warning: no previous prototype for 'remove_ima_buffer' [-Wmissing-prototypes]
144 | void remove_ima_buffer(void *fdt, int chosen_node)
|      ^~~~~~~~~~~~~~~~~
security/integrity/ima/ima_kexec.c:231:6: warning: no previous prototype for 'ima_add_kexec_buffer' [-Wmissing-prototypes]
231 | void ima_add_kexec_buffer(struct kimage *image)
|      ^~~~~~~~~~~~~~~~~~~~

vim +/ima_get_kexec_buffer +59 security/integrity/ima/ima_kexec.c

    51	
    52	/**
    53	 * ima_get_kexec_buffer - get IMA buffer from the previous kernel
    54	 * @addr:	On successful return, set to point to the buffer contents.
    55	 * @size:	On successful return, set to the buffer size.
    56	 *
    57	 * Return: 0 on success, negative errno on error.
    58	 */
  > 59	int ima_get_kexec_buffer(void **addr, size_t *size)
    60	{
    61		int ret, len;
    62		unsigned long tmp_addr;
    63		size_t tmp_size;
    64		const void *prop;
    65	
    66		prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
    67		if (!prop)
    68			return -ENOENT;
    69	
    70		ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
    71		if (ret)
    72			return ret;
    73	
    74		*addr = __va(tmp_addr);
    75		*size = tmp_size;
    76	
    77		return 0;
    78	}
    79	
    80	/**
    81	 * delete_fdt_mem_rsv - delete memory reservation with given address and size
    82	 *
    83	 * Return: 0 on success, or negative errno on error.
    84	 */
  > 85	int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
    86	{
    87		int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
    88	
    89		for (i = 0; i < num_rsvs; i++) {
    90			uint64_t rsv_start, rsv_size;
    91	
    92			ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
    93			if (ret) {
    94				pr_err("Malformed device tree.\n");
    95				return -EINVAL;
    96			}
    97	
    98			if (rsv_start == start && rsv_size == size) {
    99				ret = fdt_del_mem_rsv(fdt, i);
   100				if (ret) {
   101					pr_err("Error deleting device tree reservation.\n");
   102					return -EINVAL;
   103				}
   104	
   105				return 0;
   106			}
   107		}
   108	
   109		return -ENOENT;
   110	}
   111	
   112	/**
   113	 * ima_free_kexec_buffer - free memory used by the IMA buffer
   114	 */
 > 115	int ima_free_kexec_buffer(void)
   116	{
   117		int ret;
   118		unsigned long addr;
   119		size_t size;
   120		struct property *prop;
   121	
   122		prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
   123		if (!prop)
   124			return -ENOENT;
   125	
   126		ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
   127		if (ret)
   128			return ret;
   129	
   130		ret = of_remove_property(of_chosen, prop);
   131		if (ret)
   132			return ret;
   133	
   134		return memblock_free(addr, size);
   135	
   136	}
   137	
   138	/**
   139	 * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
   140	 *
   141	 * The IMA measurement buffer is of no use to a subsequent kernel, so we always
   142	 * remove it from the device tree.
   143	 */
 > 144	void remove_ima_buffer(void *fdt, int chosen_node)
   145	{
   146		int ret, len;
   147		unsigned long addr;
   148		size_t size;
   149		const void *prop;
   150	
   151		prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
   152		if (!prop)
   153			return;
   154	
   155		ret = do_get_kexec_buffer(prop, len, &addr, &size);
   156		fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
   157		if (ret)
   158			return;
   159	
   160		ret = delete_fdt_mem_rsv(fdt, addr, size);
   161		if (!ret)
   162			pr_debug("Removed old IMA buffer reservation.\n");
   163	}
   164	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 71849 bytes --]

^ permalink raw reply

* [PATCH] mm/debug_vm_pgtable: Fix kernel crash with page table validate
From: Aneesh Kumar K.V @ 2020-06-08  6:27 UTC (permalink / raw)
  To: linux-mm; +Cc: akpm, anshuman.khandual, linuxppc-dev, Aneesh Kumar K.V

Architectures can have CONFIG_TRANSPARENT_HUGEPAGE enabled but
no THP support enabled based on platforms. For ex: with 4K
PAGE_SIZE ppc64 supports THP only with radix translation.

This results in below crash when running with hash translation and
4K PAGE_SIZE.

kernel BUG at arch/powerpc/include/asm/book3s/64/hash-4k.h:140!
cpu 0x61: Vector: 700 (Program Check) at [c000000ff948f860]
    pc: c0000000018810f8: debug_vm_pgtable+0x480/0x8b0
    lr: c0000000018810ec: debug_vm_pgtable+0x474/0x8b0
...
[c000000ff948faf0] c000000001880fec debug_vm_pgtable+0x374/0x8b0 (unreliable)
[c000000ff948fbf0] c000000000011648 do_one_initcall+0x98/0x4f0
[c000000ff948fcd0] c000000001843928 kernel_init_freeable+0x330/0x3fc
[c000000ff948fdb0] c0000000000122ac kernel_init+0x24/0x148
[c000000ff948fe20] c00000000000cc44 ret_from_kernel_thread+0x5c/0x78

Check for THP support correctly

Cc: anshuman.khandual@arm.com
Fixes: 399145f9eb6c ("mm/debug: add tests validating architecture page table helpers")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 188c18908964..e60151c5e997 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -61,6 +61,9 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
 {
 	pmd_t pmd = pfn_pmd(pfn, prot);
 
+	if (!has_transparent_hugepage())
+		return;
+
 	WARN_ON(!pmd_same(pmd, pmd));
 	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
 	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH] tty: serial: cpm_uart: Fix behaviour for non existing GPIOs
From: Johan Hovold @ 2020-06-08  6:41 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Greg Kroah-Hartman, Linus Walleij, linux-kernel, linux-serial,
	Jiri Slaby, linuxppc-dev
In-Reply-To: <bafd8df9e743c433196c727293c5015620fae2b8.1591428452.git.christophe.leroy@csgroup.eu>

On Sat, Jun 06, 2020 at 07:30:21AM +0000, Christophe Leroy wrote:
> devm_gpiod_get_index() doesn't return NULL but -ENOENT when the
> requested GPIO doesn't exist,  leading to the following messages:
> 
> [    2.742468] gpiod_direction_input: invalid GPIO (errorpointer)
> [    2.748147] can't set direction for gpio #2: -2
> [    2.753081] gpiod_direction_input: invalid GPIO (errorpointer)
> [    2.758724] can't set direction for gpio #3: -2
> [    2.763666] gpiod_direction_output: invalid GPIO (errorpointer)
> [    2.769394] can't set direction for gpio #4: -2
> [    2.774341] gpiod_direction_input: invalid GPIO (errorpointer)
> [    2.779981] can't set direction for gpio #5: -2
> [    2.784545] ff000a20.serial: ttyCPM1 at MMIO 0xfff00a20 (irq = 39, base_baud = 8250000) is a CPM UART
> 
> Use IS_ERR_OR_NULL() to properly check gpiod validity.

Why check for NULL at all?

> Fixes: 97cbaf2c829b ("tty: serial: cpm_uart: Convert to use GPIO descriptors")
> Cc: stable@vger.kernel.org
> Cc: Linus Walleij <linus.walleij@linaro.org>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  drivers/tty/serial/cpm_uart/cpm_uart_core.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
> index a04f74d2e854..3cbe24802296 100644
> --- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
> +++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
> @@ -1217,7 +1217,7 @@ static int cpm_uart_init_port(struct device_node *np,
>  
>  		gpiod = devm_gpiod_get_index(dev, NULL, i, GPIOD_ASIS);
>  
> -		if (gpiod) {
> +		if (!IS_ERR_OR_NULL(gpiod)) {
>  			if (i == GPIO_RTS || i == GPIO_DTR)
>  				ret = gpiod_direction_output(gpiod, 0);
>  			else

Johan

^ permalink raw reply

* [PATCH v2 2/4] powerpc/64/mm: implement page mapping percpu first chunk allocator
From: Aneesh Kumar K.V @ 2020-06-08  7:09 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: cam, Aneesh Kumar K.V
In-Reply-To: <20200608070904.387440-1-aneesh.kumar@linux.ibm.com>

Implement page mapping percpu first chunk allocator as a fallback to
the embedding allocator. With 4K hash translation we limit our page
table range to 64TB and commit: 0034d395f89d ("powerpc/mm/hash64: Map all the
kernel regions in the same 0xc range") moved all kernel mapping to
that 64TB range. In-order to support sparse memory layout we need
to increase our linear mapping space and reduce other mappings.

With such a layout percpu embedded first chunk allocator will fail
because of small vmalloc range. Add a fallback to page mapping
percpu first chunk allocator for such failures.

The below dmesg output can be observed in such case.

 percpu: max_distance=0x1ffffef00000 too large for vmalloc space 0x10000000000
 PERCPU: auto allocator failed (-22), falling back to page size
 percpu: 40 4K pages/cpu s148816 r0 d15024

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/Kconfig           |  5 ++-
 arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++++++++++++++++++++--
 2 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..820365c42065 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -59,7 +59,10 @@ config HAVE_SETUP_PER_CPU_AREA
 	def_bool PPC64
 
 config NEED_PER_CPU_EMBED_FIRST_CHUNK
-	def_bool PPC64
+	def_bool y if PPC64
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+	def_bool y if PPC64
 
 config NR_IRQS
 	int "Number of virtual interrupt numbers"
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eaddd53a0e13..6090d8290561 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -66,6 +66,7 @@
 #include <asm/feature-fixups.h>
 #include <asm/kup.h>
 #include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
 
 #include "setup.h"
 
@@ -808,13 +809,58 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		pud_t *new;
+
+		new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+		if (!new)
+			goto err_alloc;
+		p4d_populate(&init_mm, p4d, new);
+	}
+
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		pmd_t *new;
+
+		new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pud_populate(&init_mm, pud, new);
+	}
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd)) {
+		pte_t *new;
+
+		new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pmd_populate_kernel(&init_mm, pmd, new);
+	}
+
+	return;
+
+err_alloc:
+	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
+
 void __init setup_per_cpu_areas(void)
 {
 	const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 	size_t atom_size;
 	unsigned long delta;
 	unsigned int cpu;
-	int rc;
+	int rc = -EINVAL;
 
 	/*
 	 * Linear mapping is one of 4K, 1M and 16M.  For 4K, no need
@@ -826,8 +872,18 @@ void __init setup_per_cpu_areas(void)
 	else
 		atom_size = 1 << 20;
 
-	rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
-				    pcpu_alloc_bootmem, pcpu_free_bootmem);
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+					    pcpu_alloc_bootmem, pcpu_free_bootmem);
+		if (rc)
+			pr_warn("PERCPU: %s allocator failed (%d), "
+				"falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
+	}
+
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
+					   pcpu_populate_pte);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 1/4] powerpc/percpu: Update percpu bootmem allocator
From: Aneesh Kumar K.V @ 2020-06-08  7:09 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: cam, Aneesh Kumar K.V

This update the ppc64 version to be closer to x86/sparc.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/kernel/setup_64.c | 45 ++++++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index bb47555d48a2..eaddd53a0e13 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -753,17 +753,46 @@ void __init emergency_stack_init(void)
 }
 
 #ifdef CONFIG_SMP
-#define PCPU_DYN_SIZE		()
-
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
+					size_t align)
 {
-	return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
-				      MEMBLOCK_ALLOC_ACCESSIBLE,
-				      early_cpu_to_node(cpu));
+	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int node = early_cpu_to_node(cpu);
+	void *ptr;
 
+	if (!node_online(node) || !NODE_DATA(node)) {
+		ptr = memblock_alloc_from(size, align, goal);
+		pr_info("cpu %d has no node %d or node-local memory\n",
+			cpu, node);
+		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+			 cpu, size, __pa(ptr));
+	} else {
+		ptr = memblock_alloc_try_nid(size, align, goal,
+					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
+		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+			 "%016lx\n", cpu, size, node, __pa(ptr));
+	}
+	return ptr;
+#else
+	return memblock_alloc_from(size, align, goal);
+#endif
 }
 
-static void __init pcpu_fc_free(void *ptr, size_t size)
+static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
 	memblock_free(__pa(ptr), size);
 }
@@ -798,7 +827,7 @@ void __init setup_per_cpu_areas(void)
 		atom_size = 1 << 20;
 
 	rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
-				    pcpu_fc_alloc, pcpu_fc_free);
+				    pcpu_alloc_bootmem, pcpu_free_bootmem);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 4/4] powerpc/mm/book3s: Split radix and hash MAX_PHYSMEM limit
From: Aneesh Kumar K.V @ 2020-06-08  7:09 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: cam, Aneesh Kumar K.V
In-Reply-To: <20200608070904.387440-1-aneesh.kumar@linux.ibm.com>

MAX_PHYSMEM #define is used along with sparsemem to determine the SECTION_SHIFT
value. Powerpc also uses the same value to limit the max memory enabled on the
system. With 4K PAGE_SIZE and hash translation mode, we want to limit the max
memory enabled to 64TB due to page table size restrictions. However, with
radix translation, we don't have these restrictions. Hence split the radix
and hash MA_PHYSMEM limit and use different limit for each of them.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  5 +++++
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 13 +++++++++++++
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/mmu.h      | 15 ---------------
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  7 +++++++
 arch/powerpc/include/asm/book3s/64/radix.h    | 16 ++++++++++++++++
 arch/powerpc/kernel/prom.c                    |  5 +++++
 arch/powerpc/mm/book3s64/slb.c                |  4 ++--
 8 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 80c953414882..2e86d9b35766 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -22,6 +22,11 @@
 #define REGION_SHIFT		(40)
 #define H_KERN_MAP_SIZE		(ASM_CONST(1) << REGION_SHIFT)
 
+/*
+ * Limits the linear mapping range
+ */
+#define H_MAX_PHYSMEM_BITS	46
+
 /*
  * Define the address range of the kernel non-linear virtual area (61TB)
  */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 0729c034e56f..16c040d19a2b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -7,6 +7,19 @@
 #define H_PUD_INDEX_SIZE  10  // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
 #define H_PGD_INDEX_SIZE   8  // size: 8B <<  8 = 2KB, maps 2^8  x 16TB =  4PB
 
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define H_MAX_PHYSMEM_BITS	51
+#else
+#define H_MAX_PHYSMEM_BITS	46
+#endif
 
 /*
  * Each context is 512TB size. SLB miss for first context/default context
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 3fa1b962dc27..84817466a3dd 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -577,8 +577,8 @@ extern void slb_set_size(u16 size);
  * For vmalloc and memmap, we use just one context with 512TB. With 64 byte
  * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)).
  */
-#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
-#define MAX_KERNEL_CTX_CNT	(1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
+#if (H_MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
+#define MAX_KERNEL_CTX_CNT	(1UL << (H_MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
 #else
 #define MAX_KERNEL_CTX_CNT	1
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 5393a535240c..6202649e3e11 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -27,21 +27,6 @@ struct mmu_psize_def {
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 #endif /* __ASSEMBLY__ */
 
-/*
- * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
- * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
- * page_to_nid does a page->section->node lookup
- * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
- * memory requirements with large number of sections.
- * 51 bits is the max physical real address on POWER9
- */
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) &&  \
-	defined(CONFIG_PPC_64K_PAGES)
-#define MAX_PHYSMEM_BITS 51
-#else
-#define MAX_PHYSMEM_BITS 46
-#endif
-
 /* 64-bit classic hash table MMU */
 #include <asm/book3s/64/mmu-hash.h>
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f17442c3a092..4a6a0c51a733 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -295,6 +295,13 @@ extern unsigned long pci_io_base;
 #include <asm/book3s/64/hash.h>
 #include <asm/book3s/64/radix.h>
 
+#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
+#define  MAX_PHYSMEM_BITS	H_MAX_PHYSMEM_BITS
+#else
+#define  MAX_PHYSMEM_BITS	R_MAX_PHYSMEM_BITS
+#endif
+
+
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/pgtable-64k.h>
 #else
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 0cba794c4fb8..c7813dc628fc 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -91,6 +91,22 @@
  * +------------------------------+  Kernel linear (0xc.....)
  */
 
+
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define R_MAX_PHYSMEM_BITS	51
+#else
+#define R_MAX_PHYSMEM_BITS	46
+#endif
+
 #define RADIX_KERN_VIRT_START	ASM_CONST(0xc008000000000000)
 /*
  * 49 =  MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6a3bac357e24..238349dd101a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -771,6 +771,11 @@ void __init early_init_devtree(void *params)
 	limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
 	memblock_enforce_memory_limit(limit);
 
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES)
+	if (!early_radix_enabled())
+		memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS));
+#endif
+
 	memblock_allow_resize();
 	memblock_dump_all();
 
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 8141e8b40ee5..11d65de400e1 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -765,8 +765,8 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
 
 	if (id == LINEAR_MAP_REGION_ID) {
 
-		/* We only support upto MAX_PHYSMEM_BITS */
-		if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS))
+		/* We only support upto H_MAX_PHYSMEM_BITS */
+		if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
 			return -EFAULT;
 
 		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 3/4] powerpc/book3s64/hash/4k: Support large linear mapping range with 4K
From: Aneesh Kumar K.V @ 2020-06-08  7:09 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: cam, Aneesh Kumar K.V
In-Reply-To: <20200608070904.387440-1-aneesh.kumar@linux.ibm.com>

With commit: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel
regions in the same 0xc range"), we now split the 64TB address range
into 4 contexts each of 16TB. That implies we can do only 16TB linear
mapping.

On some systems, eg. Power9, memory attached to nodes > 0 will appear
above 16TB in the linear mapping. This resulted in kernel crash when
we boot such systems in hash translation mode with 4K PAGE_SIZE.

This patch updates the kernel mapping such that we now start supporting upto
61TB of memory with 4K. The kernel mapping now looks like below 4K PAGE_SIZE
and hash translation.

    vmalloc start     = 0xc0003d0000000000
    IO start          = 0xc0003e0000000000
    vmemmap start     = 0xc0003f0000000000

Our MAX_PHYSMEM_BITS for 4K is still 64TB even though we can only map 61TB.
We prevent bolt mapping anything outside 61TB range by checking against
H_VMALLOC_START.

Fixes: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 0xc range")
Reported-by: Cameron Berkenpas <cam@neo-zeon.de>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 3f9ae3585ab9..80c953414882 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -13,20 +13,19 @@
  */
 #define MAX_EA_BITS_PER_CONTEXT		46
 
-#define REGION_SHIFT		(MAX_EA_BITS_PER_CONTEXT - 2)
 
 /*
- * Our page table limit us to 64TB. Hence for the kernel mapping,
- * each MAP area is limited to 16 TB.
- * The four map areas are:  linear mapping, vmap, IO and vmemmap
+ * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB
+ * of vmemmap space. To better support sparse memory layout, we use 61TB
+ * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap.
  */
+#define REGION_SHIFT		(40)
 #define H_KERN_MAP_SIZE		(ASM_CONST(1) << REGION_SHIFT)
 
 /*
- * Define the address range of the kernel non-linear virtual area
- * 16TB
+ * Define the address range of the kernel non-linear virtual area (61TB)
  */
-#define H_KERN_VIRT_START	ASM_CONST(0xc000100000000000)
+#define H_KERN_VIRT_START	ASM_CONST(0xc0003d0000000000)
 
 #ifndef __ASSEMBLY__
 #define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH v2 2/4] powerpc/64/mm: implement page mapping percpu first chunk allocator
From: Aneesh Kumar K.V @ 2020-06-08  7:12 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: cam
In-Reply-To: <20200608070904.387440-2-aneesh.kumar@linux.ibm.com>

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> Implement page mapping percpu first chunk allocator as a fallback to
> the embedding allocator. With 4K hash translation we limit our page
> table range to 64TB and commit: 0034d395f89d ("powerpc/mm/hash64: Map all the
> kernel regions in the same 0xc range") moved all kernel mapping to
> that 64TB range. In-order to support sparse memory layout we need
> to increase our linear mapping space and reduce other mappings.
>
> With such a layout percpu embedded first chunk allocator will fail
> because of small vmalloc range. Add a fallback to page mapping
> percpu first chunk allocator for such failures.
>
> The below dmesg output can be observed in such case.
>
>  percpu: max_distance=0x1ffffef00000 too large for vmalloc space 0x10000000000
>  PERCPU: auto allocator failed (-22), falling back to page size
>  percpu: 40 4K pages/cpu s148816 r0 d15024
>

This patch requires powersave=off kernel command line to boot. We are
working to make sure we don't access per cpu variables in real mode.
Additionally, you can also try the below workaround patch

modified   arch/powerpc/kernel/mce.c
@@ -711,7 +711,7 @@ long hmi_exception_realmode(struct pt_regs *regs)
 {	
 	int ret;
 
-	__this_cpu_inc(irq_stat.hmi_exceptions);
+//	__this_cpu_inc(irq_stat.hmi_exceptions);
 
 	ret = hmi_handle_debugtrig(regs);
 	if (ret >= 0)


-aneesh

^ permalink raw reply

* [RFC PATCH] ASoC: fsl_asrc_dma: Fix warning "Cannot create DMA dma:tx symlink"
From: Shengjiu Wang @ 2020-06-08  7:07 UTC (permalink / raw)
  To: lars, perex, tiwai, lgirdwood, broonie, timur, nicoleotsuka,
	Xiubo.Lee, festevam, alsa-devel, linux-kernel, linuxppc-dev

The issue log is:

[   48.021506] CPU: 0 PID: 664 Comm: aplay Not tainted 5.7.0-rc1-13120-g12b434cbbea0 #343
[   48.031063] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[   48.037638] [<c0110dd8>] (unwind_backtrace) from [<c010b8ec>] (show_stack+0x10/0x14)
[   48.045413] [<c010b8ec>] (show_stack) from [<c0557fc0>] (dump_stack+0xe4/0x118)
[   48.052757] [<c0557fc0>] (dump_stack) from [<c032aeb4>] (sysfs_warn_dup+0x50/0x64)
[   48.060357] [<c032aeb4>] (sysfs_warn_dup) from [<c032b1a8>] (sysfs_do_create_link_sd+0xc8/0xd4)
[   48.069086] [<c032b1a8>] (sysfs_do_create_link_sd) from [<c05dc668>] (dma_request_chan+0xb0/0x210)
[   48.078068] [<c05dc668>] (dma_request_chan) from [<c05dc7d0>] (dma_request_slave_channel+0x8/0x14)
[   48.087060] [<c05dc7d0>] (dma_request_slave_channel) from [<c09d5cd4>] (fsl_asrc_dma_hw_params+0x1dc/0x434)
[   48.096831] [<c09d5cd4>] (fsl_asrc_dma_hw_params) from [<c09c143c>] (soc_pcm_hw_params+0x4b0/0x650)
[   48.105903] [<c09c143c>] (soc_pcm_hw_params) from [<c09c36a8>] (dpcm_fe_dai_hw_params+0x70/0xe4)
[   48.114715] [<c09c36a8>] (dpcm_fe_dai_hw_params) from [<c099b274>] (snd_pcm_hw_params+0x158/0x418)
[   48.123701] [<c099b274>] (snd_pcm_hw_params) from [<c099c5a0>] (snd_pcm_ioctl+0x734/0x183c)
[   48.132079] [<c099c5a0>] (snd_pcm_ioctl) from [<c029ff94>] (ksys_ioctl+0x2ac/0xb98)
[   48.139765] [<c029ff94>] (ksys_ioctl) from [<c0100080>] (ret_fast_syscall+0x0/0x28)
[   48.147440] Exception stack(0xed3c5fa8 to 0xed3c5ff0)
[   48.152515] 5fa0:                   bec28670 00e92870 00000004 c25c4111 bec28670 0002000f
[   48.160716] 5fc0: bec28670 00e92870 00e92820 00000036 0000bb80 00000000 0002c2f8 00000003
[   48.168913] 5fe0: b6f4d3fc bec2853c b6ee7655 b6e22cf8
[   48.174236] fsl-esai-dai 2024000.esai: Cannot create DMA dma:tx symlink

The dma channel is already requested by Back-End cpu dai driver,
if fsl_asrc_dma requests dma chan with same dma:tx symlink, then
this warning comes out.

The warning is added by
commit 71723a96b8b1 ("dmaengine: Create symlinks between DMA channels and slaves")
commit bad83565eafe ("dmaengine: Cleanups for the slave <-> channel symlink support")

As the dma channel is requested by Back-End, we need to reuse the channel
and avoid to request a new one, then the issue can be fixed.

In order to get the dma channel which is already requested in Back-End.
we export two functions (snd_soc_lookup_component_nolocked and
soc_component_to_pcm), if we can get the dma channel, then reuse it.
if can't, then request a new one.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
 include/sound/dmaengine_pcm.h         | 11 ++++++
 include/sound/soc.h                   |  2 ++
 sound/soc/fsl/fsl_asrc_common.h       |  2 ++
 sound/soc/fsl/fsl_asrc_dma.c          | 49 +++++++++++++++++++++------
 sound/soc/soc-core.c                  |  3 +-
 sound/soc/soc-generic-dmaengine-pcm.c | 12 -------
 6 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h
index b65220685920..8c5e38180fb0 100644
--- a/include/sound/dmaengine_pcm.h
+++ b/include/sound/dmaengine_pcm.h
@@ -161,4 +161,15 @@ int snd_dmaengine_pcm_prepare_slave_config(struct snd_pcm_substream *substream,
 
 #define SND_DMAENGINE_PCM_DRV_NAME "snd_dmaengine_pcm"
 
+struct dmaengine_pcm {
+	struct dma_chan *chan[SNDRV_PCM_STREAM_LAST + 1];
+	const struct snd_dmaengine_pcm_config *config;
+	struct snd_soc_component component;
+	unsigned int flags;
+};
+
+static inline struct dmaengine_pcm *soc_component_to_pcm(struct snd_soc_component *p)
+{
+	return container_of(p, struct dmaengine_pcm, component);
+}
 #endif
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 74868436ac79..565612a8d690 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -444,6 +444,8 @@ int devm_snd_soc_register_component(struct device *dev,
 			 const struct snd_soc_component_driver *component_driver,
 			 struct snd_soc_dai_driver *dai_drv, int num_dai);
 void snd_soc_unregister_component(struct device *dev);
+struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev,
+							    const char *driver_name);
 struct snd_soc_component *snd_soc_lookup_component(struct device *dev,
 						   const char *driver_name);
 
diff --git a/sound/soc/fsl/fsl_asrc_common.h b/sound/soc/fsl/fsl_asrc_common.h
index 77665b15c8db..09512bc79b80 100644
--- a/sound/soc/fsl/fsl_asrc_common.h
+++ b/sound/soc/fsl/fsl_asrc_common.h
@@ -32,6 +32,7 @@ enum asrc_pair_index {
  * @dma_chan: inputer and output DMA channels
  * @dma_data: private dma data
  * @pos: hardware pointer position
+ * @req_dma_chan_dev_to_dev: flag for release dev_to_dev chan
  * @private: pair private area
  */
 struct fsl_asrc_pair {
@@ -45,6 +46,7 @@ struct fsl_asrc_pair {
 	struct dma_chan *dma_chan[2];
 	struct imx_dma_data dma_data;
 	unsigned int pos;
+	bool req_dma_chan_dev_to_dev;
 
 	void *private;
 };
diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c
index d6a3fc5f87e5..3ad862225326 100644
--- a/sound/soc/fsl/fsl_asrc_dma.c
+++ b/sound/soc/fsl/fsl_asrc_dma.c
@@ -135,6 +135,7 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
 	struct snd_dmaengine_dai_dma_data *dma_params_be = NULL;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct fsl_asrc_pair *pair = runtime->private_data;
+	struct snd_soc_component *component_be = NULL;
 	struct fsl_asrc *asrc = pair->asrc;
 	struct dma_slave_config config_fe, config_be;
 	enum asrc_pair_index index = pair->index;
@@ -142,7 +143,7 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
 	int stream = substream->stream;
 	struct imx_dma_data *tmp_data;
 	struct snd_soc_dpcm *dpcm;
-	struct dma_chan *tmp_chan;
+	struct dma_chan *tmp_chan = NULL, *tmp_chan_new = NULL;
 	struct device *dev_be;
 	u8 dir = tx ? OUT : IN;
 	dma_cap_mask_t mask;
@@ -160,6 +161,7 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
 		substream_be = snd_soc_dpcm_get_substream(be, stream);
 		dma_params_be = snd_soc_dai_get_dma_data(dai, substream_be);
 		dev_be = dai->dev;
+		component_be = snd_soc_lookup_component_nolocked(dev_be, SND_DMAENGINE_PCM_DRV_NAME);
 		break;
 	}
 
@@ -205,10 +207,16 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
 	 */
 	if (!asrc->use_edma) {
 		/* Get DMA request of Back-End */
-		tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
+		if (component_be)
+			tmp_chan = soc_component_to_pcm(component_be)->chan[substream->stream];
+		if (!tmp_chan) {
+			tmp_chan_new = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
+			tmp_chan = tmp_chan_new;
+		}
 		tmp_data = tmp_chan->private;
 		pair->dma_data.dma_request = tmp_data->dma_request;
-		dma_release_channel(tmp_chan);
+		if (tmp_chan_new)
+			dma_release_channel(tmp_chan_new);
 
 		/* Get DMA request of Front-End */
 		tmp_chan = asrc->get_dma_channel(pair, dir);
@@ -220,9 +228,26 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
 
 		pair->dma_chan[dir] =
 			dma_request_channel(mask, filter, &pair->dma_data);
+		pair->req_dma_chan_dev_to_dev = true;
 	} else {
-		pair->dma_chan[dir] =
-			asrc->get_dma_channel(pair, dir);
+		/* With EDMA, there is two dma channels can be used for p2p,
+		 * one is from ASRC, one is from another peripheral
+		 * (ESAI or SAI) previously we select the dma channel of ASRC,
+		 * but find an issue for ideal ratio case, there is no control
+		 * for data copy speed, the speed is faster than sample
+		 * frequency.
+		 *
+		 * So we switch to dma channel of peripheral (ESAI or SAI),
+		 * that copy speed of DMA is controlled by data consumption
+		 * speed in the peripheral FIFO.
+		 */
+		pair->req_dma_chan_dev_to_dev = false;
+		if (component_be)
+			pair->dma_chan[dir] = soc_component_to_pcm(component_be)->chan[substream->stream];;
+		if (!pair->dma_chan[dir]) {
+			pair->dma_chan[dir] = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
+			pair->req_dma_chan_dev_to_dev = true;
+		}
 	}
 
 	if (!pair->dma_chan[dir]) {
@@ -273,19 +298,21 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
 static int fsl_asrc_dma_hw_free(struct snd_soc_component *component,
 				struct snd_pcm_substream *substream)
 {
+	bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct fsl_asrc_pair *pair = runtime->private_data;
+	u8 dir = tx ? OUT : IN;
 
 	snd_pcm_set_runtime_buffer(substream, NULL);
 
-	if (pair->dma_chan[IN])
-		dma_release_channel(pair->dma_chan[IN]);
+	if (pair->dma_chan[!dir])
+		dma_release_channel(pair->dma_chan[!dir]);
 
-	if (pair->dma_chan[OUT])
-		dma_release_channel(pair->dma_chan[OUT]);
+	if (pair->dma_chan[dir] && pair->req_dma_chan_dev_to_dev)
+		dma_release_channel(pair->dma_chan[dir]);
 
-	pair->dma_chan[IN] = NULL;
-	pair->dma_chan[OUT] = NULL;
+	pair->dma_chan[!dir] = NULL;
+	pair->dma_chan[dir] = NULL;
 
 	return 0;
 }
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index b07eca2c6ccc..d4c73e86d058 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -310,7 +310,7 @@ struct snd_soc_component *snd_soc_rtdcom_lookup(struct snd_soc_pcm_runtime *rtd,
 }
 EXPORT_SYMBOL_GPL(snd_soc_rtdcom_lookup);
 
-static struct snd_soc_component
+struct snd_soc_component
 *snd_soc_lookup_component_nolocked(struct device *dev, const char *driver_name)
 {
 	struct snd_soc_component *component;
@@ -329,6 +329,7 @@ static struct snd_soc_component
 
 	return found_component;
 }
+EXPORT_SYMBOL_GPL(snd_soc_lookup_component_nolocked);
 
 struct snd_soc_component *snd_soc_lookup_component(struct device *dev,
 						   const char *driver_name)
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index f728309a0833..80a4e71f2d95 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -21,18 +21,6 @@
  */
 #define SND_DMAENGINE_PCM_FLAG_NO_RESIDUE BIT(31)
 
-struct dmaengine_pcm {
-	struct dma_chan *chan[SNDRV_PCM_STREAM_LAST + 1];
-	const struct snd_dmaengine_pcm_config *config;
-	struct snd_soc_component component;
-	unsigned int flags;
-};
-
-static struct dmaengine_pcm *soc_component_to_pcm(struct snd_soc_component *p)
-{
-	return container_of(p, struct dmaengine_pcm, component);
-}
-
 static struct device *dmaengine_dma_dev(struct dmaengine_pcm *pcm,
 	struct snd_pcm_substream *substream)
 {
-- 
2.21.0


^ permalink raw reply related

* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support.
From: Aneesh Kumar K.V @ 2020-06-08  7:42 UTC (permalink / raw)
  To: Jan Kara, Williams, Dan J
  Cc: linux-nvdimm, jack@suse.de, Jeff Moyer, Michal Suchánek,
	linuxppc-dev
In-Reply-To: <f8113a5b-be3b-3627-7535-ed2c9e0293f9@linux.ibm.com>



"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> On 6/3/20 1:56 PM, Jan Kara wrote:
>> On Tue 02-06-20 17:59:08, Williams, Dan J wrote:
>>> [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ]
>>>
>>>> From: Jan Kara <jack@suse.cz>
>>>>>>> These flags are device properties that affect the kernel and
>>>>>>> userspace's handling of persistence.
>>>>>>>
>>>>>>
>>>>>> That will not handle the scenario with multiple applications using
>>>>>> the same fsdax mount point where one is updated to use the new
>>>>>> instruction and the other is not.
>>>>>
>>>>> Right, it needs to be a global setting / flag day to switch from one
>>>>> regime to another. Per-process control is a recipe for disaster.
>>>>
>>>> First I'd like to mention that hopefully the concern is mostly theoretical since
>>>> as Aneesh wrote above, real persistent memory never shipped for PPC and
>>>> so there are very few apps (if any) using the old way to ensure cache
>>>> flushing.
>>>>
>>>> But I'd like to understand why do you think per-process control is a recipe for
>>>> disaster? Because from my POV the sysfs interface you propose is actually
>>>> difficult to use in practice. As a distributor, you have hard time picking the
>>>> default because you have a choice between picking safe option which is
>>>> going to confuse users because of failing MAP_SYNC and unsafe option
>>>> where everyone will be happy until someone looses data because of some
>>>> ancient application using wrong instructions to persist data. Poor experience
>>>> for users in either way. And when distro defaults to "safe option", then the
>>>> burden is on the sysadmin to toggle the switch but how is he supposed to
>>>> decide when that is safe? First he has to understand what the problem
>>>> actually is, then he has to audit all the applications using pmem whether they
>>>> use the new instruction - which is IMO a lot of effort if you have a couple of
>>>> applications and practically infeasible if you have more of them.
>>>> So IMO the burden should be *on the application* to declare that it is aware
>>>> of the new instructions to flush pmem on the platform and only to such
>>>> application the kernel should give the trust to use MAP_SYNC mappings.
>>>
>>> The "disaster" in my mind is this need to globally change the ABI for
>>> persistence semantics for all of Linux because one CPU wants a do over.
>>> What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing
>>> deployed base of persistent memory applications? Yes, sysfs is awkward,
>>> but it's trying to provide some relief without imposing unexplainable
>>> semantics on everyone else. I think a comprehensive (overengineered)
>>> solution would involve not introducing another "I know what I'm doing"
>>> flag to the interface, but maybe requiring applications to call a pmem
>>> sync API in something like a vsyscall. Or, also overengineered, some
>>> binary translation / interpretation to actively detect and kill
>>> applications that deploy the old instructions. Something horrid like on
>>> first write fault to a MAP_SYNC try to look ahead in the binary for the
>>> correct sync sequence and kill the application otherwise. That would at
>>> least provide some enforcement and safety without requiring other
>>> architectures to consider what MAP_SYNC_ENABLE means to them.
>> 
>> Thanks for explanation. So I absolutely agree that other architectures (and
>> even older versions of POWER architecture) must not be influenced by the new
>> tunable. That's why I wrote in my reply to Aneesh that I'd be for checking
>> during mmap(2) with MAP_SYNC, whether we are in a situation where new PPC
>> flush instructions are required and *only in that case* decide based on the
>> prctl value whether MAP_SYNC should be allowed or not.
>> 
>
> v2 version of the patch series does that
>
> https://lore.kernel.org/linuxppc-dev/20200602074909.36738-1-aneesh.kumar@linux.ibm.com/
>
>> Whether this solution is overengineering or not depends on how you think
>> it's likely there will be applications trying to use old flush instructions
>> with MAP_SYNC on POWER10 platforms...
>> 
>
> Now considering that with ppc64 we never had a real persistent memory 
> device available for the end-user to try and the new instructions are 
> only needed on newer hardware, can we assume we have enough time to get 
> the userspace to use new instructions?
>
> As a safety net, we can keep the dax device-specific sysfs control. But 
> in reality, by the time newer hardware gets released, we can get the 
> distributions updated to flip the CONFIG_ARCH_MAP_SYNC_DISABLE=n?
>
> With this:
> 1) vPMEM continues to work and since it is a volatile region. That 
> doesn't need any flush instructions.
>
> 2) We get pmdk and other user applications updated to use new 
> instructions and make sure updated packages are made available to all 
> distributions
>
> 3) On newer hardware, the device will appear with a new compat string. 
> Hence older distributions won't initialize pmem on newer hardware.
>
> 4) If we have a newer kernel with an older distro, we use the per 
> namespace sysfs knob that prevents the usage of MAP_SYNC.
>
> 5) After a year or so we mark the CONFIG_ARCH_MAP_SYNC_DISABLE=n
> on ppc64 when we are confident that everybody is using the new flush 
> instruction.
>

Is this approach ok for distributions? If so I can repost the series
dropping the prctl change.

-aneesh

^ permalink raw reply

* [RFC PATCH v0 0/4] Off-load TLB invalidations to host for !GTSE
From: Bharata B Rao @ 2020-06-08 10:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar, npiggin, Bharata B Rao

Hypervisor may choose not to enable Guest Translation Shootdown Enable
(GTSE) option for the guest. When GTSE isn't ON, the guest OS isn't
permitted to use instructions like tblie and tlbsync directly, but is
expected to make hypervisor calls to get the TLB flushed.

This series enables the TLB flush routines in the radix code to
off-load TLB flushing to hypervisor via the newly proposed hcall
H_RPT_INVALIDATE. The specification of this hcall is still evolving
while the patchset is posted here for any early comments.

To easily check the availability of GTSE, it is made an MMU feature.
(TODO: Check if this can be a static key instead of MMU feature)

The OV5 handling and H_REGISTER_PROC_TBL hcall are changed to
handle GTSE as an optionally available feature and to not assume GTSE
when radix support is available.

H_RPT_INVALIDATE
================
Syntax:
int64   /* H_Success: Return code on successful completion */
        /* H_Busy - repeat the call with the same */
        /* H_P2, H_P3, H_P4, H_Parameter: Invalid parameters */
        hcall(const uint64 H_RPT_INVALIDATE, /* Invalidate process scoped RPT lookaside information */
              uint64 pid,       /* PID to invalidate */
              uint64 target,    /* Invalidation target */
              uint64 what,      /* What type of lookaside information */
              uint64 pages,     /* Page sizes */
              uint64 start,     /* Start of Effective Address (EA) range */
              uint64 end)       /* End of EA range */

Invalidation targets (target)
-----------------------------
Core MMU        0x01 /* All virtual processors in the partition */
Core local MMU  0x02 /* Current virtual processor */
Nest MMU        0x04 /* All nest/accelerator agents in use by the partition */
A combination of the above can be specified, except core and core local.

What to invalidate (what)
-------------------------
Reserved        0x0001  /* Reserved */
TLB             0x0002  /* Invalidate TLB */
PWC             0x0004  /* Invalidate Page Walk Cache */
PRS             0x0008  /* Invalidate Process Table Entries */
A combination of the above can be specified.

Page size mask (pages)
----------------------
4K              0x01
64K             0x02
2M              0x04
1G              0x08
All sizes       (-1UL)
A combination of the above can be specified.
All page sizes can be selected with -1.

Semantics: Invalidate radix tree lookaside information
           matching the parameters given.
* Return H_P2, H_P3 or H_P4 if target, what or pages parameters are
  different from the defined values.
* Return H_PARAMETER if (start, end) doesn't form a valid range.
* May invalidate more translation information than was specified.
* If start = 0 and end = -1, set the range to cover all valid addresses.
  Else start and end should be aligned to 4kB (lower 11 bits clear).
* If pid = 0 then valid addresses are quadrant 3 and quadrant 0 spaces,
  Else valid addresses are quadrant 0.
* Pages which are fully covered by the range are to be invalidated.
  Those which are partially covered are considered outside invalidation
  range, which allows a call to optimally invalidate ranges that may
  contain mixed page sizes.
* Return H_SUCCESS on success.

Bharata B Rao (3):
  powerpc/mm: Make GTSE as MMU FTR
  powerpc/prom_init: Ask for Radix GTSE only if supported.
  powerpc/pseries: H_REGISTER_PROC_TBL should ask for GTSE only if
    enabled

Nicholas Piggin (1):
  powerpc/mm/book3s64/radix: Off-load TLB invalidations to host when
    !GTSE

 arch/powerpc/include/asm/hvcall.h         |   1 +
 arch/powerpc/include/asm/mmu.h            |   4 +
 arch/powerpc/include/asm/plpar_wrappers.h |  14 +++
 arch/powerpc/kernel/dt_cpu_ftrs.c         |   2 +
 arch/powerpc/kernel/prom_init.c           |  13 +--
 arch/powerpc/mm/book3s64/radix_tlb.c      | 105 ++++++++++++++++++++--
 arch/powerpc/mm/init_64.c                 |   6 +-
 arch/powerpc/platforms/pseries/lpar.c     |   8 +-
 8 files changed, 137 insertions(+), 16 deletions(-)

-- 
2.21.3


^ permalink raw reply

* [RFC PATCH v0 2/4] powerpc/prom_init: Ask for Radix GTSE only if supported.
From: Bharata B Rao @ 2020-06-08 10:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar, npiggin, Bharata B Rao
In-Reply-To: <20200608104909.14350-1-bharata@linux.ibm.com>

In the case of radix, don't ask for GTSE by default but ask
only if GTSE is enabled.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 arch/powerpc/kernel/prom_init.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 5f15b10eb007..16dd14f58ba6 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1336,12 +1336,15 @@ static void __init prom_check_platform_support(void)
 		}
 	}
 
-	if (supported.radix_mmu && supported.radix_gtse &&
-	    IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
-		/* Radix preferred - but we require GTSE for now */
-		prom_debug("Asking for radix with GTSE\n");
+	if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
+		/* Radix preferred - Check if GTSE is also supported */
+		prom_debug("Asking for radix\n");
 		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
-		ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
+		if (supported.radix_gtse)
+			ibm_architecture_vec.vec5.radix_ext =
+					OV5_FEAT(OV5_RADIX_GTSE);
+		else
+			prom_debug("Radix GTSE isn't supported\n");
 	} else if (supported.hash_mmu) {
 		/* Default to hash mmu (if we can) */
 		prom_debug("Asking for hash\n");
-- 
2.21.3


^ permalink raw reply related

* [RFC PATCH v0 1/4] powerpc/mm: Make GTSE as MMU FTR
From: Bharata B Rao @ 2020-06-08 10:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar, npiggin, Bharata B Rao
In-Reply-To: <20200608104909.14350-1-bharata@linux.ibm.com>

Make GTSE as an MMU feature and enable it by default for radix.
However for guest, conditionally enable it if hypervisor supports it
via OV5 vector.

Making GTSE as a MMU feature will make it easy to enable radix
without GTSE.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 arch/powerpc/include/asm/mmu.h    | 4 ++++
 arch/powerpc/kernel/dt_cpu_ftrs.c | 2 ++
 arch/powerpc/mm/init_64.c         | 6 +++++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index f4ac25d4df05..884d51995934 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -28,6 +28,9 @@
  * Individual features below.
  */
 
+/* Guest Translation Shootdown Enable */
+#define MMU_FTR_GTSE			ASM_CONST(0x00001000)
+
 /*
  * Support for 68 bit VA space. We added that from ISA 2.05
  */
@@ -173,6 +176,7 @@ enum {
 #endif
 #ifdef CONFIG_PPC_RADIX_MMU
 		MMU_FTR_TYPE_RADIX |
+		MMU_FTR_GTSE |
 #ifdef CONFIG_PPC_KUAP
 		MMU_FTR_RADIX_KUAP |
 #endif /* CONFIG_PPC_KUAP */
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 3a409517c031..571aa39e35d5 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -337,6 +337,8 @@ static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
 #ifdef CONFIG_PPC_RADIX_MMU
 	cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
 	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	/* TODO: Does this need a separate cpu dt feature? */
+	cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
 	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
 
 	return 1;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index c7ce4ec5060e..feb9bed9177c 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -408,13 +408,17 @@ static void __init early_check_vec5(void)
 		if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
 						OV5_FEAT(OV5_RADIX_GTSE))) {
 			pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
-		}
+			cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
+		} else
+			cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
 		/* Do radix anyway - the hypervisor said we had to */
 		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
 	} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
 		/* Hypervisor only supports hash - disable radix */
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
 	}
+
 }
 
 void __init mmu_early_init_devtree(void)
-- 
2.21.3


^ permalink raw reply related

* [RFC PATCH v0 4/4] powerpc/mm/book3s64/radix: Off-load TLB invalidations to host when !GTSE
From: Bharata B Rao @ 2020-06-08 10:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar, npiggin, Bharata B Rao
In-Reply-To: <20200608104909.14350-1-bharata@linux.ibm.com>

From: Nicholas Piggin <npiggin@gmail.com>

When platform doesn't support GTSE, let TLB invalidation requests
for radix guests be off-loaded to the host using H_RPT_INVALIDATE
hcall

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 arch/powerpc/include/asm/hvcall.h         |   1 +
 arch/powerpc/include/asm/plpar_wrappers.h |  14 +++
 arch/powerpc/mm/book3s64/radix_tlb.c      | 105 ++++++++++++++++++++--
 3 files changed, 113 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e90c073e437e..08917147415b 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -335,6 +335,7 @@
 #define H_GET_24X7_CATALOG_PAGE	0xF078
 #define H_GET_24X7_DATA		0xF07C
 #define H_GET_PERF_COUNTER_INFO	0xF080
+#define H_RPT_INVALIDATE	0xF084
 
 /* Platform-specific hcalls used for nested HV KVM */
 #define H_SET_PARTITION_TABLE	0xF800
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 4497c8afb573..e952139b0e47 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -334,6 +334,13 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
 	return rc;
 }
 
+static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 what,
+					  u64 pages, u64 start, u64 end)
+{
+	return plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, what,
+				  pages, start, end);
+}
+
 #else /* !CONFIG_PPC_PSERIES */
 
 static inline long plpar_set_ciabr(unsigned long ciabr)
@@ -346,6 +353,13 @@ static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
 {
 	return 0;
 }
+
+static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 what,
+					  u64 pages, u64 start, u64 end)
+{
+	return 0;
+}
+
 #endif /* CONFIG_PPC_PSERIES */
 
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index b5cc9b23cf02..4dd1d3c75562 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -16,11 +16,39 @@
 #include <asm/tlbflush.h>
 #include <asm/trace.h>
 #include <asm/cputhreads.h>
+#include <asm/plpar_wrappers.h>
 
 #define RIC_FLUSH_TLB 0
 #define RIC_FLUSH_PWC 1
 #define RIC_FLUSH_ALL 2
 
+#define H_TLBI_TLB	0x0001
+#define H_TLBI_PWC	0x0002
+#define H_TLBI_PRS	0x0004
+
+#define H_TLBI_TARGET_CMMU	0x01
+#define H_TLBI_TARGET_CMMU_LOCAL 0x02
+#define H_TLBI_TARGET_NMMU	0x04
+
+#define H_TLBI_PAGE_ALL (-1UL)
+#define H_TLBI_PAGE_4K	0x01
+#define H_TLBI_PAGE_64K	0x02
+#define H_TLBI_PAGE_2M	0x04
+#define H_TLBI_PAGE_1G	0x08
+
+static inline u64 psize_to_h_tlbi(unsigned long psize)
+{
+	if (psize == MMU_PAGE_4K)
+		return H_TLBI_PAGE_4K;
+	if (psize == MMU_PAGE_64K)
+		return H_TLBI_PAGE_64K;
+	if (psize == MMU_PAGE_2M)
+		return H_TLBI_PAGE_2M;
+	if (psize == MMU_PAGE_1G)
+		return H_TLBI_PAGE_1G;
+	return H_TLBI_PAGE_ALL;
+}
+
 /*
  * tlbiel instruction for radix, set invalidation
  * i.e., r=1 and is=01 or is=10 or is=11
@@ -694,7 +722,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 			goto local;
 		}
 
-		if (cputlb_use_tlbie()) {
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long targ = H_TLBI_TARGET_CMMU;
+
+			if (atomic_read(&mm->context.copros) > 0)
+				targ |= H_TLBI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, targ, H_TLBI_TLB,
+					       H_TLBI_PAGE_ALL, 0, -1UL);
+		} else if (cputlb_use_tlbie()) {
 			if (mm_needs_flush_escalation(mm))
 				_tlbie_pid(pid, RIC_FLUSH_ALL);
 			else
@@ -727,7 +762,16 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 				goto local;
 			}
 		}
-		if (cputlb_use_tlbie())
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long targ = H_TLBI_TARGET_CMMU;
+			unsigned long what = H_TLBI_TLB | H_TLBI_PWC |
+					     H_TLBI_PRS;
+
+			if (atomic_read(&mm->context.copros) > 0)
+				targ |= H_TLBI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, targ, what,
+					       H_TLBI_PAGE_ALL, 0, -1UL);
+		} else if (cputlb_use_tlbie())
 			_tlbie_pid(pid, RIC_FLUSH_ALL);
 		else
 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
@@ -760,7 +804,17 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 			exit_flush_lazy_tlbs(mm);
 			goto local;
 		}
-		if (cputlb_use_tlbie())
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long targ = H_TLBI_TARGET_CMMU;
+			unsigned long pages = psize_to_h_tlbi(psize);
+			unsigned long page_size =
+					1UL << mmu_psize_to_shift(psize);
+
+			if (atomic_read(&mm->context.copros) > 0)
+				targ |= H_TLBI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, targ, H_TLBI_TLB, pages,
+					       vmaddr, vmaddr + page_size);
+		} else if (cputlb_use_tlbie())
 			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
 		else
 			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -810,7 +864,13 @@ static inline void _tlbiel_kernel_broadcast(void)
  */
 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	if (cputlb_use_tlbie())
+	if (!mmu_has_feature(MMU_FTR_GTSE)) {
+		unsigned long targ = H_TLBI_TARGET_CMMU | H_TLBI_TARGET_NMMU;
+		unsigned long what = H_TLBI_TLB | H_TLBI_PWC | H_TLBI_PRS;
+
+		pseries_rpt_invalidate(0, targ, what, H_TLBI_PAGE_ALL,
+				       start, end);
+	} else if (cputlb_use_tlbie())
 		_tlbie_pid(0, RIC_FLUSH_ALL);
 	else
 		_tlbiel_kernel_broadcast();
@@ -864,7 +924,17 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 				nr_pages > tlb_local_single_page_flush_ceiling);
 	}
 
-	if (full) {
+	if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+		unsigned long targ = H_TLBI_TARGET_CMMU;
+		unsigned long pages = psize_to_h_tlbi(mmu_virtual_psize);
+
+		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+			pages |= psize_to_h_tlbi(MMU_PAGE_2M);
+		if (atomic_read(&mm->context.copros) > 0)
+			targ |= H_TLBI_TARGET_NMMU;
+		pseries_rpt_invalidate(pid, targ, H_TLBI_TLB, pages,
+				       start, end);
+	} else if (full) {
 		if (local) {
 			_tlbiel_pid(pid, RIC_FLUSH_TLB);
 		} else {
@@ -1046,7 +1116,17 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
 				nr_pages > tlb_local_single_page_flush_ceiling);
 	}
 
-	if (full) {
+	if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+		unsigned long targ = H_TLBI_TARGET_CMMU;
+		unsigned long what = H_TLBI_TLB;
+		unsigned long pages = psize_to_h_tlbi(psize);
+
+		if (also_pwc)
+			what |= H_TLBI_PWC;
+		if (atomic_read(&mm->context.copros) > 0)
+			targ |= H_TLBI_TARGET_NMMU;
+		pseries_rpt_invalidate(pid, targ, what, pages, start, end);
+	} else if (full) {
 		if (local) {
 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
 		} else {
@@ -1111,7 +1191,18 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 			exit_flush_lazy_tlbs(mm);
 			goto local;
 		}
-		if (cputlb_use_tlbie())
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long targ = H_TLBI_TARGET_CMMU;
+			unsigned long what = H_TLBI_TLB | H_TLBI_PWC |
+					     H_TLBI_PRS;
+			unsigned long pages =
+					psize_to_h_tlbi(mmu_virtual_psize);
+
+			if (atomic_read(&mm->context.copros) > 0)
+				targ |= H_TLBI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, targ, what, pages,
+					       addr, end);
+		} else if (cputlb_use_tlbie())
 			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
 		else
 			_tlbiel_va_range_multicast(mm,
-- 
2.21.3


^ permalink raw reply related

* [RFC PATCH v0 3/4] powerpc/pseries: H_REGISTER_PROC_TBL should ask for GTSE only if enabled
From: Bharata B Rao @ 2020-06-08 10:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar, npiggin, Bharata B Rao
In-Reply-To: <20200608104909.14350-1-bharata@linux.ibm.com>

H_REGISTER_PROC_TBL asks for GTSE by default. GTSE flag bit should
be set only when GTSE is supported.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/lpar.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index e4ed5317f117..58ba76bc1964 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1680,9 +1680,11 @@ static int pseries_lpar_register_process_table(unsigned long base,
 
 	if (table_size)
 		flags |= PROC_TABLE_NEW;
-	if (radix_enabled())
-		flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
-	else
+	if (radix_enabled()) {
+		flags |= PROC_TABLE_RADIX;
+		if (mmu_has_feature(MMU_FTR_GTSE))
+			flags |= PROC_TABLE_GTSE;
+	} else
 		flags |= PROC_TABLE_HPT_SLB;
 	for (;;) {
 		rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
-- 
2.21.3


^ permalink raw reply related

* Re: [PATCH] mm/debug_vm_pgtable: Fix kernel crash with page table validate
From: Anshuman Khandual @ 2020-06-08 11:01 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm; +Cc: akpm, linuxppc-dev
In-Reply-To: <20200608062739.378902-1-aneesh.kumar@linux.ibm.com>

Hi Aneesh,

On 06/08/2020 11:57 AM, Aneesh Kumar K.V wrote:
> Architectures can have CONFIG_TRANSPARENT_HUGEPAGE enabled but
> no THP support enabled based on platforms. For ex: with 4K
> PAGE_SIZE ppc64 supports THP only with radix translation.

Good catch, never hit this before.

> 
> This results in below crash when running with hash translation and
> 4K PAGE_SIZE.
> 
> kernel BUG at arch/powerpc/include/asm/book3s/64/hash-4k.h:140!
> cpu 0x61: Vector: 700 (Program Check) at [c000000ff948f860]
>     pc: c0000000018810f8: debug_vm_pgtable+0x480/0x8b0
>     lr: c0000000018810ec: debug_vm_pgtable+0x474/0x8b0
> ...
> [c000000ff948faf0] c000000001880fec debug_vm_pgtable+0x374/0x8b0 (unreliable)
> [c000000ff948fbf0] c000000000011648 do_one_initcall+0x98/0x4f0
> [c000000ff948fcd0] c000000001843928 kernel_init_freeable+0x330/0x3fc
> [c000000ff948fdb0] c0000000000122ac kernel_init+0x24/0x148
> [c000000ff948fe20] c00000000000cc44 ret_from_kernel_thread+0x5c/0x78
> 
> Check for THP support correctly

Makes sense, is this the only configuration which hit the problem ?

> 
> Cc: anshuman.khandual@arm.com
> Fixes: 399145f9eb6c ("mm/debug: add tests validating architecture page table helpers")
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 188c18908964..e60151c5e997 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -61,6 +61,9 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
>  {
>  	pmd_t pmd = pfn_pmd(pfn, prot);
>  
> +	if (!has_transparent_hugepage())
> +		return;
> +

We should also add this check to pud_basic_tests() as well.

>  	WARN_ON(!pmd_same(pmd, pmd));
>  	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
>  	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
> 

The subject line here should mention about correct THP support
detection which fixes the problem. Probably something like this
or similar ("Fix kernel crash with correct THP support check").

- Anshuman

^ permalink raw reply

* Re: [PATCH] mm/debug_vm_pgtable: Fix kernel crash with page table validate
From: Aneesh Kumar K.V @ 2020-06-08 11:16 UTC (permalink / raw)
  To: Anshuman Khandual, linux-mm; +Cc: akpm, linuxppc-dev
In-Reply-To: <2497c7b7-b5cf-8df4-dc82-efefe2fb6f5a@arm.com>

On 6/8/20 4:31 PM, Anshuman Khandual wrote:
> Hi Aneesh,
> 
> On 06/08/2020 11:57 AM, Aneesh Kumar K.V wrote:
>> Architectures can have CONFIG_TRANSPARENT_HUGEPAGE enabled but
>> no THP support enabled based on platforms. For ex: with 4K
>> PAGE_SIZE ppc64 supports THP only with radix translation.
> 
> Good catch, never hit this before.
> 
>>
>> This results in below crash when running with hash translation and
>> 4K PAGE_SIZE.
>>
>> kernel BUG at arch/powerpc/include/asm/book3s/64/hash-4k.h:140!
>> cpu 0x61: Vector: 700 (Program Check) at [c000000ff948f860]
>>      pc: c0000000018810f8: debug_vm_pgtable+0x480/0x8b0
>>      lr: c0000000018810ec: debug_vm_pgtable+0x474/0x8b0
>> ...
>> [c000000ff948faf0] c000000001880fec debug_vm_pgtable+0x374/0x8b0 (unreliable)
>> [c000000ff948fbf0] c000000000011648 do_one_initcall+0x98/0x4f0
>> [c000000ff948fcd0] c000000001843928 kernel_init_freeable+0x330/0x3fc
>> [c000000ff948fdb0] c0000000000122ac kernel_init+0x24/0x148
>> [c000000ff948fe20] c00000000000cc44 ret_from_kernel_thread+0x5c/0x78
>>
>> Check for THP support correctly
> 
> Makes sense, is this the only configuration which hit the problem ?

4K hash ppc64 is the only config i guess.

> 
>>
>> Cc: anshuman.khandual@arm.com
>> Fixes: 399145f9eb6c ("mm/debug: add tests validating architecture page table helpers")
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   mm/debug_vm_pgtable.c | 3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>> index 188c18908964..e60151c5e997 100644
>> --- a/mm/debug_vm_pgtable.c
>> +++ b/mm/debug_vm_pgtable.c
>> @@ -61,6 +61,9 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
>>   {
>>   	pmd_t pmd = pfn_pmd(pfn, prot);
>>   
>> +	if (!has_transparent_hugepage())
>> +		return;
>> +
> 
> We should also add this check to pud_basic_tests() as well.


Do we have a function that check for runtime support for pud level THP? 
ppc64 don't do pud level THP yet. So  we have 
CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=n

are you suggesting we do the same check for pud level THP too?


> 
>>   	WARN_ON(!pmd_same(pmd, pmd));
>>   	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
>>   	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
>>
> 
> The subject line here should mention about correct THP support
> detection which fixes the problem. Probably something like this
> or similar ("Fix kernel crash with correct THP support check").


Not sure about that. This fix a kernel crash with page table validate code.


-aneesh

^ permalink raw reply

* Re: [v1 PATCH 1/2] Refactoring carrying over IMA measuremnet logs over Kexec.
From: Mimi Zohar @ 2020-06-08 12:02 UTC (permalink / raw)
  To: Prakhar Srivastava, linux-arm-kernel, linux-kernel, linuxppc-dev,
	devicetree, linux-integrity, linux-security-module
  Cc: kstewart, mark.rutland, catalin.marinas, bhsharma, tao.li, paulus,
	vincenzo.frascino, frowand.list, nramas, masahiroy, jmorris,
	takahiro.akashi, serge, pasha.tatashin, will, robh+dt, hsinyi,
	tusharsu, tglx, allison, christophe.leroy, mbrugger, balajib,
	dmitry.kasatkin, james.morse, gregkh
In-Reply-To: <20200607233323.22375-2-prsriva@linux.microsoft.com>

Hi Prakhar,

On Sun, 2020-06-07 at 16:33 -0700, Prakhar Srivastava wrote:
> This patch moves the non-architecture specific code out of powerpc and
>  adds to security/ima. 
> Update the arm64 and powerpc kexec file load paths to carry the IMA measurement
> logs.

From your patch description, this patch should be broken up.  Moving
the non-architecture specific code out of powerpc should be one patch.
 Additional support should be in another patch.  After each patch, the
code should work properly.

Before posting patches, please review them, making sure
unnecessary/unwanted changes haven't crept in - commenting out code,
moving code without removing the original code.

thanks,

Mimi

^ permalink raw reply

* Re: [PATCH] mm/debug_vm_pgtable: Fix kernel crash with page table validate
From: Anshuman Khandual @ 2020-06-08 12:15 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm; +Cc: akpm, linuxppc-dev
In-Reply-To: <c5c48970-714b-271f-1242-b789be801d9b@linux.ibm.com>



On 06/08/2020 04:46 PM, Aneesh Kumar K.V wrote:
> On 6/8/20 4:31 PM, Anshuman Khandual wrote:
>> Hi Aneesh,
>>
>> On 06/08/2020 11:57 AM, Aneesh Kumar K.V wrote:
>>> Architectures can have CONFIG_TRANSPARENT_HUGEPAGE enabled but
>>> no THP support enabled based on platforms. For ex: with 4K
>>> PAGE_SIZE ppc64 supports THP only with radix translation.
>>
>> Good catch, never hit this before.
>>
>>>
>>> This results in below crash when running with hash translation and
>>> 4K PAGE_SIZE.
>>>
>>> kernel BUG at arch/powerpc/include/asm/book3s/64/hash-4k.h:140!
>>> cpu 0x61: Vector: 700 (Program Check) at [c000000ff948f860]
>>>      pc: c0000000018810f8: debug_vm_pgtable+0x480/0x8b0
>>>      lr: c0000000018810ec: debug_vm_pgtable+0x474/0x8b0
>>> ...
>>> [c000000ff948faf0] c000000001880fec debug_vm_pgtable+0x374/0x8b0 (unreliable)
>>> [c000000ff948fbf0] c000000000011648 do_one_initcall+0x98/0x4f0
>>> [c000000ff948fcd0] c000000001843928 kernel_init_freeable+0x330/0x3fc
>>> [c000000ff948fdb0] c0000000000122ac kernel_init+0x24/0x148
>>> [c000000ff948fe20] c00000000000cc44 ret_from_kernel_thread+0x5c/0x78
>>>
>>> Check for THP support correctly
>>
>> Makes sense, is this the only configuration which hit the problem ?
> 
> 4K hash ppc64 is the only config i guess.

Okay.

> 
>>
>>>
>>> Cc: anshuman.khandual@arm.com
>>> Fixes: 399145f9eb6c ("mm/debug: add tests validating architecture page table helpers")
>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>> ---
>>>   mm/debug_vm_pgtable.c | 3 +++
>>>   1 file changed, 3 insertions(+)
>>>
>>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>>> index 188c18908964..e60151c5e997 100644
>>> --- a/mm/debug_vm_pgtable.c
>>> +++ b/mm/debug_vm_pgtable.c
>>> @@ -61,6 +61,9 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
>>>   {
>>>       pmd_t pmd = pfn_pmd(pfn, prot);
>>>   +    if (!has_transparent_hugepage())
>>> +        return;
>>> +
>>
>> We should also add this check to pud_basic_tests() as well.
> 
> 
> Do we have a function that check for runtime support for pud level THP? ppc64 don't do pud level THP yet. So  we have 
> CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=n

I believe, we dont have such a generic function. Please correct me, if I am
missing something here.

> 
> are you suggesting we do the same check for pud level THP too?

Yes. Because regardless CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD, could there
be any THP at PUD level when has_transparent_hugepage() returns negative ? The
current dependency between THP and PUD THP configs seems some what confusing
but having this check at PUD level should protect against similar problems. A
quick test (after adding this check to PUD level) on x86 does not indicate any
problem on the normal path.

> 
> 
>>
>>>       WARN_ON(!pmd_same(pmd, pmd));
>>>       WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
>>>       WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
>>>
>>
>> The subject line here should mention about correct THP support
>> detection which fixes the problem. Probably something like this
>> or similar ("Fix kernel crash with correct THP support check").
> 
> 
> Not sure about that. This fix a kernel crash with page table validate code.

What this fixes is very clear from the prefix itself - "mm/debug_vm_pgtable:",
making "page table validate" some what bit redundant. Instead, it could just
accommodate method of the fix i.e "via correct THP support check". Nonetheless,
it is just a small nit.

^ permalink raw reply

* [PATCH] KVM: PPC: Book3S HV: increase KVMPPC_NR_LPIDS on POWER8 and POWER9
From: Cédric Le Goater @ 2020-06-08 11:57 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: kvm, Nicholas Piggin, kvm-ppc, Paul Mackerras,
	Cédric Le Goater, linuxppc-dev

POWER8 and POWER9 have 12-bit LPIDs. Change LPID_RSVD to support up to
(4096 - 2) guests on these processors. POWER7 is kept the same with a
limitation of (1024 - 2), but it might be time to drop KVM support for
POWER7.

Tested with 2048 guests * 4 vCPUs on a witherspoon system with 512G
RAM and a bit of swap.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/reg.h      | 3 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 8 ++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 88e6c78100d9..b70bbfb0ea3c 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -473,7 +473,8 @@
 #ifndef SPRN_LPID
 #define SPRN_LPID	0x13F	/* Logical Partition Identifier */
 #endif
-#define   LPID_RSVD	0x3ff		/* Reserved LPID for partn switching */
+#define   LPID_RSVD_POWER7	0x3ff	/* Reserved LPID for partn switching */
+#define   LPID_RSVD		0xfff	/* Reserved LPID for partn switching */
 #define	SPRN_HMER	0x150	/* Hypervisor maintenance exception reg */
 #define   HMER_DEBUG_TRIG	(1ul << (63 - 17)) /* Debug trigger */
 #define	SPRN_HMEER	0x151	/* Hyp maintenance exception enable reg */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 18aed9775a3c..23035ab2ec50 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void)
 	if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
 		return -EINVAL;
 
-	/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
 	host_lpid = 0;
 	if (cpu_has_feature(CPU_FTR_HVMODE))
 		host_lpid = mfspr(SPRN_LPID);
-	rsvd_lpid = LPID_RSVD;
+
+	/* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		rsvd_lpid = LPID_RSVD;
+	else
+		rsvd_lpid = LPID_RSVD_POWER7;
 
 	kvmppc_init_lpid(rsvd_lpid + 1);
 
-- 
2.25.4


^ permalink raw reply related

* [PATCH v2] mm/debug_vm_pgtable: Fix kernel crash by checking for THP support
From: Aneesh Kumar K.V @ 2020-06-08 12:52 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: anshuman.khandual, linuxppc-dev, Aneesh Kumar K.V

Architectures can have CONFIG_TRANSPARENT_HUGEPAGE enabled but
no THP support enabled based on platforms. For ex: with 4K
PAGE_SIZE ppc64 supports THP only with radix translation.

This results in below crash when running with hash translation and
4K PAGE_SIZE.

kernel BUG at arch/powerpc/include/asm/book3s/64/hash-4k.h:140!
cpu 0x61: Vector: 700 (Program Check) at [c000000ff948f860]
    pc: c0000000018810f8: debug_vm_pgtable+0x480/0x8b0
    lr: c0000000018810ec: debug_vm_pgtable+0x474/0x8b0
...
[c000000ff948faf0] c000000001880fec debug_vm_pgtable+0x374/0x8b0 (unreliable)
[c000000ff948fbf0] c000000000011648 do_one_initcall+0x98/0x4f0
[c000000ff948fcd0] c000000001843928 kernel_init_freeable+0x330/0x3fc
[c000000ff948fdb0] c0000000000122ac kernel_init+0x24/0x148
[c000000ff948fe20] c00000000000cc44 ret_from_kernel_thread+0x5c/0x78

Check for THP support correctly

Cc: anshuman.khandual@arm.com
Fixes: 399145f9eb6c ("mm/debug: add tests validating architecture page table helpers")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 188c18908964..df3a3a08f4f8 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -61,6 +61,9 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
 {
 	pmd_t pmd = pfn_pmd(pfn, prot);
 
+	if (!has_transparent_hugepage())
+		return;
+
 	WARN_ON(!pmd_same(pmd, pmd));
 	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
 	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
@@ -80,6 +83,9 @@ static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot)
 {
 	pud_t pud = pfn_pud(pfn, prot);
 
+	if (!has_transparent_hugepage())
+		return;
+
 	WARN_ON(!pud_same(pud, pud));
 	WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud))));
 	WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud))));
-- 
2.26.2


^ permalink raw reply related

* [PATCH] selftests: powerpc: Fix online CPU selection
From: Sandipan Das @ 2020-06-08 14:42 UTC (permalink / raw)
  To: mpe; +Cc: shiganta, nasastry, linuxppc-dev, srikar, kamalesh

The size of the cpu set must be large enough for systems
with a very large number of CPUs. Otherwise, tests which
try to determine the first online CPU by calling
sched_getaffinity() will fail. This makes sure that the
size of the allocated cpu set is dependent on the number
of CPUs as reported by get_nprocs().

Fixes: 3752e453f6ba ("selftests/powerpc: Add tests of PMU EBBs")
Reported-by: Shirisha Ganta <shiganta@in.ibm.com>
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
---
 tools/testing/selftests/powerpc/utils.c | 33 ++++++++++++++++---------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 933678f1ed0a..bb8e402752c0 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -16,6 +16,7 @@
 #include <string.h>
 #include <sys/ioctl.h>
 #include <sys/stat.h>
+#include <sys/sysinfo.h>
 #include <sys/types.h>
 #include <sys/utsname.h>
 #include <unistd.h>
@@ -88,28 +89,36 @@ void *get_auxv_entry(int type)
 
 int pick_online_cpu(void)
 {
-	cpu_set_t mask;
-	int cpu;
+	int ncpus, cpu = -1;
+	cpu_set_t *mask;
+	size_t size;
 
-	CPU_ZERO(&mask);
+	ncpus = get_nprocs();
+	size = CPU_ALLOC_SIZE(ncpus);
+	mask = CPU_ALLOC(ncpus);
 
-	if (sched_getaffinity(0, sizeof(mask), &mask)) {
+	CPU_ZERO_S(size, mask);
+
+	if (sched_getaffinity(0, size, mask)) {
 		perror("sched_getaffinity");
-		return -1;
+		goto done;
 	}
 
 	/* We prefer a primary thread, but skip 0 */
-	for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
-		if (CPU_ISSET(cpu, &mask))
-			return cpu;
+	for (cpu = 8; cpu < ncpus; cpu += 8)
+		if (CPU_ISSET_S(cpu, size, mask))
+			goto done;
 
 	/* Search for anything, but in reverse */
-	for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
-		if (CPU_ISSET(cpu, &mask))
-			return cpu;
+	for (cpu = ncpus - 1; cpu >= 0; cpu--)
+		if (CPU_ISSET_S(cpu, size, mask))
+			goto done;
 
 	printf("No cpus in affinity mask?!\n");
-	return -1;
+
+done:
+	CPU_FREE(mask);
+	return cpu;
 }
 
 bool is_ppc64le(void)
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH] selftests: powerpc: Fix online CPU selection
From: Kamalesh Babulal @ 2020-06-08 15:05 UTC (permalink / raw)
  To: Sandipan Das; +Cc: shiganta, linuxppc-dev, srikar, nasastry
In-Reply-To: <20200608144212.985144-1-sandipan@linux.ibm.com>

On 6/8/20 8:12 PM, Sandipan Das wrote:
> The size of the cpu set must be large enough for systems
> with a very large number of CPUs. Otherwise, tests which
> try to determine the first online CPU by calling
> sched_getaffinity() will fail. This makes sure that the
> size of the allocated cpu set is dependent on the number
> of CPUs as reported by get_nprocs().
> 
> Fixes: 3752e453f6ba ("selftests/powerpc: Add tests of PMU EBBs")
> Reported-by: Shirisha Ganta <shiganta@in.ibm.com>
> Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>

LGTM,

Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>

-- 
Kamalesh

^ permalink raw reply

* Re: [PATCH v11 4/6] powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl()
From: Ira Weiny @ 2020-06-08 16:07 UTC (permalink / raw)
  To: Vaibhav Jain
  Cc: Santosh Sivaraj, linux-nvdimm, linux-kernel, Steven Rostedt,
	Oliver O'Halloran, Aneesh Kumar K . V, Dan Williams,
	linuxppc-dev
In-Reply-To: <20200607131339.476036-5-vaibhav@linux.ibm.com>

On Sun, Jun 07, 2020 at 06:43:37PM +0530, Vaibhav Jain wrote:
> Since papr_scm_ndctl() can be called from outside papr_scm, its
> exposed to the possibility of receiving NULL as value of 'cmd_rc'
> argument. This patch updates papr_scm_ndctl() to protect against such
> possibility by assigning it pointer to a local variable in case cmd_rc
> == NULL.
> 
> Finally the patch also updates the 'default' add a debug log unknown
> 'cmd' values.
> 
> Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Ira Weiny <ira.weiny@intel.com>

Reviewed-by: Ira Weiny <ira.weiny@intel.com>

> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> ---
> Changelog:
> 
> v10..v11:
> * Instead of returning *cmd_rd just return '0' in case nd_cmd is
>   handled. In case of unknown nd-cmd return -EINVAL
>   [ Ira and Dan Williams ]
> * Updated patch description.
> 
> v9..v10
> * New patch in the series
> ---
>  arch/powerpc/platforms/pseries/papr_scm.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index 0c091622b15e..692ad3d79826 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -355,11 +355,16 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>  {
>  	struct nd_cmd_get_config_size *get_size_hdr;
>  	struct papr_scm_priv *p;
> +	int rc;
>  
>  	/* Only dimm-specific calls are supported atm */
>  	if (!nvdimm)
>  		return -EINVAL;
>  
> +	/* Use a local variable in case cmd_rc pointer is NULL */
> +	if (!cmd_rc)
> +		cmd_rc = &rc;
> +
>  	p = nvdimm_provider_data(nvdimm);
>  
>  	switch (cmd) {
> @@ -381,6 +386,7 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>  		break;
>  
>  	default:
> +		dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
>  		return -EINVAL;
>  	}
>  
> -- 
> 2.26.2
> 

^ permalink raw reply

* Re: [PATCH v11 5/6] ndctl/papr_scm, uapi: Add support for PAPR nvdimm specific methods
From: Ira Weiny @ 2020-06-08 16:24 UTC (permalink / raw)
  To: Vaibhav Jain
  Cc: Santosh Sivaraj, linux-nvdimm, linux-kernel, Steven Rostedt,
	Oliver O'Halloran, Aneesh Kumar K . V, Dan Williams,
	linuxppc-dev
In-Reply-To: <20200607131339.476036-6-vaibhav@linux.ibm.com>

On Sun, Jun 07, 2020 at 06:43:38PM +0530, Vaibhav Jain wrote:
> Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm
> module and add the command family NVDIMM_FAMILY_PAPR to the white list
> of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the
> nvdimm command mask and implement necessary scaffolding in the module
> to handle ND_CMD_CALL ioctl and PDSM requests that we receive.
> 
> The layout of the PDSM request as we expect from libnvdimm/libndctl is
> described in newly introduced uapi header 'papr_pdsm.h' which
> defines a new 'struct nd_pdsm_cmd_pkg' header. This header is used
> to communicate the PDSM request via member
> 'nd_cmd_pkg.nd_command' and size of payload that need to be
> sent/received for servicing the PDSM.
> 
> A new function is_cmd_valid() is implemented that reads the args to
> papr_scm_ndctl() and performs sanity tests on them. A new function
> papr_scm_service_pdsm() is introduced and is called from
> papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL
> command from libnvdimm.
> 
> Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> ---
> Changelog:
> 
> v10..v11:
> * Moved in-lines 'nd_pdsm_cmd_pkg()' and 'pdsm_cmd_to_payload()' from
>   'papr_pdsm.h' header to 'papr_scm.c'. The avoids a potential license
>   incompatibility issue with non-GPL-2.0 user-space code trying to
>   include the header in its code. [ Ira ]
> * Verified papr_pdsm.h with UAPI_HEADER_TEST config.
> * Moved the is_cmd_valid() check in papr_scm_ndctl() before check for
>   cmd_rc == NULL. This prevents cmd_rc to be updated in case the
>   nd-cmd is invalid or unknown.
> 
> v9..v10:
> * Simplified 'struct nd_pdsm_cmd_pkg' by removing the
>   'payload_version' field.
> * Removed the corrosponding documentation on versioning and backward
>   compatibility from 'papr_pdsm.h'
> * Reduced the size of reserved fields to 4-bytes making 'struct
>   nd_pdsm_cmd_pkg' 64 + 8 bytes long.
> * Updated is_cmd_valid() to enforce validation checks on pdsm
>   commands. [ Dan Williams ]
> * Added check for reserved fields being set to '0' in is_cmd_valid()
>   [ Ira ]
> * Moved changes for checking cmd_rc == NULL and logging improvements
>   to a separate prelim patch [ Ira ].
> * Moved  pdsm package validation checks from papr_scm_service_pdsm()
>   to is_cmd_valid().
> * Marked papr_scm_service_pdsm() return type as 'void' since errors
>   are reported in nd_pdsm_cmd_pkg.cmd_status field.
> 
> Resend:
> * Added ack from Aneesh.
> 
> v8..v9:
> * Reduced the usage of term SCM replacing it with appropriate
>   replacement [ Dan Williams, Aneesh ]
> * Renamed 'papr_scm_pdsm.h' to 'papr_pdsm.h'
> * s/PAPR_SCM_PDSM_*/PAPR_PDSM_*/g
> * s/NVDIMM_FAMILY_PAPR_SCM/NVDIMM_FAMILY_PAPR/g
> * Minor updates to 'papr_psdm.h' to replace usage of term 'SCM'.
> * Minor update to patch description.
> 
> v7..v8:
> * Removed the 'payload_offset' field from 'struct
>   nd_pdsm_cmd_pkg'. Instead command payload is always assumed to start
>   at 'nd_pdsm_cmd_pkg.payload'. [ Aneesh ]
> * To enable introducing new fields to 'struct nd_pdsm_cmd_pkg',
>   'reserved' field of 10-bytes is introduced. [ Aneesh ]
> * Fixed a typo in "Backward Compatibility" section of papr_scm_pdsm.h
>   [ Ira ]
> 
> Resend:
> * None
> 
> v6..v7 :
> * Removed the re-definitions of __packed macro from papr_scm_pdsm.h
>   [Mpe].
> * Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h [Mpe].
> * Removed macros that were unused in papr_scm.c from papr_scm_pdsm.h
>   [Mpe].
> * Made functions defined in papr_scm_pdsm.h as static inline. [Mpe]
> 
> v5..v6 :
> * Changed the usage of the term DSM to PDSM to distinguish it from the
>   ACPI term [ Dan Williams ]
> * Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various struct
>   to reflect the new terminology.
> * Updated the patch description and title to reflect the new terminology.
> * Squashed patch to introduce new command family in 'ndctl.h' with
>   this patch [ Dan Williams ]
> * Updated the papr_scm_pdsm method starting index from 0x10000 to 0x0
>   [ Dan Williams ]
> * Removed redundant license text from the papr_scm_psdm.h file.
>   [ Dan Williams ]
> * s/envelop/envelope/ at various places [ Dan Williams ]
> * Added '__packed' attribute to command package header to gaurd
>   against different compiler adding paddings between the fields.
>   [ Dan Williams]
> * Converted various pr_debug to dev_debug [ Dan Williams ]
> 
> v4..v5 :
> * None
> 
> v3..v4 :
> * None
> 
> v2..v3 :
> * Updated the patch prefix to 'ndctl/uapi' [Aneesh]
> 
> v1..v2 :
> * None
> ---
>  arch/powerpc/include/uapi/asm/papr_pdsm.h |  84 +++++++++++++++
>  arch/powerpc/platforms/pseries/papr_scm.c | 126 +++++++++++++++++++++-
>  include/uapi/linux/ndctl.h                |   1 +
>  3 files changed, 207 insertions(+), 4 deletions(-)
>  create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h
> 
> diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> new file mode 100644
> index 000000000000..df2447455cfe
> --- /dev/null
> +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> @@ -0,0 +1,84 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
> + *
> + * (C) Copyright IBM 2020
> + *
> + * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
> + */
> +
> +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
> +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
> +
> +#include <linux/types.h>
> +#include <linux/ndctl.h>
> +
> +/*
> + * PDSM Envelope:
> + *
> + * The ioctl ND_CMD_CALL transfers data between user-space and kernel via
> + * envelope which consists of a header and user-defined payload sections.
> + * The header is described by 'struct nd_pdsm_cmd_pkg' which expects a
> + * payload following it and accessible via 'nd_pdsm_cmd_pkg.payload' field.
> + * There is reserved field that can used to introduce new fields to the
> + * structure in future. It also tries to ensure that 'nd_pdsm_cmd_pkg.payload'
> + * lies at a 8-byte boundary.
> + *
> + *  +-------------+---------------------+---------------------------+
> + *  |   64-Bytes  |       8-Bytes       |       Max 184-Bytes       |
> + *  +-------------+---------------------+---------------------------+
> + *  |               nd_pdsm_cmd_pkg     |                           |
> + *  |-------------+                     |                           |
> + *  |  nd_cmd_pkg |                     |                           |
> + *  +-------------+---------------------+---------------------------+
> + *  | nd_family   |                     |                           |
> + *  | nd_size_out | cmd_status          |                           |
> + *  | nd_size_in  | reserved            |     payload               |
> + *  | nd_command  |                     |                           |
> + *  | nd_fw_size  |                     |                           |
> + *  +-------------+---------------------+---------------------------+
> + *
> + * PDSM Header:
> + *
> + * The header is defined as 'struct nd_pdsm_cmd_pkg' which embeds a
> + * 'struct nd_cmd_pkg' instance. The PDSM command is assigned to member
> + * 'nd_cmd_pkg.nd_command'. Apart from size information of the envelope which is
> + * contained in 'struct nd_cmd_pkg', the header also has members following
> + * members:
> + *
> + * 'cmd_status'		: (Out) Errors if any encountered while servicing PDSM.
> + * 'reserved'		: Not used, reserved for future and should be set to 0.
> + *
> + * PDSM Payload:
> + *
> + * The layout of the PDSM Payload is defined by various structs shared between
> + * papr_scm and libndctl so that contents of payload can be interpreted. During
> + * servicing of a PDSM the papr_scm module will read input args from the payload
> + * field by casting its contents to an appropriate struct pointer based on the
> + * PDSM command. Similarly the output of servicing the PDSM command will be
> + * copied to the payload field using the same struct.
> + *
> + * 'libnvdimm' enforces a hard limit of 256 bytes on the envelope size, which
> + * leaves around 184 bytes for the envelope payload

'around'?

> (ignoring any padding that
> + * the compiler may silently introduce).

When building user interfaces like this you have to be more exact.  I think the
code is fine but you can't have the compiler silently moving things around or
have different compilers move things differently between the user app and the
kernel.  So these statements are not correct.

Ira

> + *
> + */
> +
> +/* PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm */
> +struct nd_pdsm_cmd_pkg {
> +	struct nd_cmd_pkg hdr;	/* Package header containing sub-cmd */
> +	__s32 cmd_status;	/* Out: Sub-cmd status returned back */
> +	__u16 reserved[2];	/* Ignored and to be used in future */
> +	__u8 payload[];		/* In/Out: Sub-cmd data buffer */
> +} __packed;
> +
> +/*
> + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
> + * via 'nd_pdsm_cmd_pkg.hdr.nd_command' member of the ioctl struct
> + */
> +enum papr_pdsm {
> +	PAPR_PDSM_MIN = 0x0,
> +	PAPR_PDSM_MAX,
> +};
> +
> +#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index 692ad3d79826..167fcf0e249d 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -15,13 +15,15 @@
>  #include <linux/seq_buf.h>
>  
>  #include <asm/plpar_wrappers.h>
> +#include <asm/papr_pdsm.h>
>  
>  #define BIND_ANY_ADDR (~0ul)
>  
>  #define PAPR_SCM_DIMM_CMD_MASK \
>  	((1ul << ND_CMD_GET_CONFIG_SIZE) | \
>  	 (1ul << ND_CMD_GET_CONFIG_DATA) | \
> -	 (1ul << ND_CMD_SET_CONFIG_DATA))
> +	 (1ul << ND_CMD_SET_CONFIG_DATA) | \
> +	 (1ul << ND_CMD_CALL))
>  
>  /* DIMM health bitmap bitmap indicators */
>  /* SCM device is unable to persist memory contents */
> @@ -89,6 +91,21 @@ struct papr_scm_priv {
>  	u64 health_bitmap;
>  };
>  
> +/* Convert a libnvdimm nd_cmd_pkg to pdsm specific pkg */
> +static inline struct nd_pdsm_cmd_pkg *nd_to_pdsm_cmd_pkg(struct nd_cmd_pkg *cmd)
> +{
> +	return (struct nd_pdsm_cmd_pkg *)cmd;
> +}
> +
> +/* Return the payload pointer for a given pcmd */
> +static inline void *pdsm_cmd_to_payload(struct nd_pdsm_cmd_pkg *pcmd)
> +{
> +	if (pcmd->hdr.nd_size_in == 0 && pcmd->hdr.nd_size_out == 0)
> +		return NULL;
> +	else
> +		return (void *)(pcmd->payload);
> +}
> +
>  static int drc_pmem_bind(struct papr_scm_priv *p)
>  {
>  	unsigned long ret[PLPAR_HCALL_BUFSIZE];
> @@ -349,17 +366,113 @@ static int papr_scm_meta_set(struct papr_scm_priv *p,
>  	return 0;
>  }
>  
> +/*
> + * Do a sanity checks on the inputs args to dimm-control function and return
> + * '0' if valid. This also does validation on ND_CMD_CALL sub-command packages.
> + */
> +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
> +			unsigned int buf_len)
> +{
> +	unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK;
> +	struct nd_pdsm_cmd_pkg *pkg;
> +	struct nd_cmd_pkg *nd_cmd;
> +	struct papr_scm_priv *p;
> +	enum papr_pdsm pdsm;
> +
> +	/* Only dimm-specific calls are supported atm */
> +	if (!nvdimm)
> +		return -EINVAL;
> +
> +	/* get the provider data from struct nvdimm */
> +	p = nvdimm_provider_data(nvdimm);
> +
> +	if (!test_bit(cmd, &cmd_mask)) {
> +		dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd);
> +		return -EINVAL;
> +	}
> +
> +	/* For CMD_CALL verify pdsm request */
> +	if (cmd == ND_CMD_CALL) {
> +		/* Verify the envelope package */
> +		if (!buf || buf_len < sizeof(struct nd_pdsm_cmd_pkg)) {
> +			dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n",
> +				buf_len);
> +			return -EINVAL;
> +		}
> +
> +		/* Verify that the nd_cmd_pkg.nd_family is correct */
> +		nd_cmd = (struct nd_cmd_pkg *)buf;
> +		if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) {
> +			dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n",
> +				nd_cmd->nd_family);
> +			return -EINVAL;
> +		}
> +
> +		/* Get the pdsm request package and the command */
> +		pkg = nd_to_pdsm_cmd_pkg(nd_cmd);
> +		pdsm = pkg->hdr.nd_command;
> +
> +		/* Verify if the psdm command is valid */
> +		if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) {
> +			dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", pdsm);
> +			return -EINVAL;
> +		}
> +
> +		/* We except a payload with all PDSM commands */
> +		if (!pdsm_cmd_to_payload(pkg)) {
> +			dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Empty payload\n", pdsm);
> +			return -EINVAL;
> +		}
> +
> +		/* Ensure reserved fields of the pdsm header are set to 0 */
> +		if (pkg->reserved[0] || pkg->reserved[1]) {
> +			dev_dbg(&p->pdev->dev,
> +				"PDSM[0x%x]: Invalid reserved field usage\n", pdsm);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	/* Let the command be further processed */
> +	return 0;
> +}
> +
> +/*
> + * For a given pdsm request call an appropriate service function.
> + * Returns errors if any while handling the pdsm command package.
> + */
> +static int papr_scm_service_pdsm(struct papr_scm_priv *p,
> +				 struct nd_pdsm_cmd_pkg *pkg)
> +{
> +	const enum papr_pdsm pdsm = pkg->hdr.nd_command;
> +
> +	dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm);
> +
> +	/* Call pdsm service function */
> +	switch (pdsm) {
> +	default:
> +		dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n",
> +			pdsm);
> +		pkg->cmd_status = -ENOENT;
> +		break;
> +	}
> +
> +	return pkg->cmd_status;
> +}
> +
>  static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>  			  struct nvdimm *nvdimm, unsigned int cmd, void *buf,
>  			  unsigned int buf_len, int *cmd_rc)
>  {
>  	struct nd_cmd_get_config_size *get_size_hdr;
> +	struct nd_pdsm_cmd_pkg *call_pkg = NULL;
>  	struct papr_scm_priv *p;
>  	int rc;
>  
> -	/* Only dimm-specific calls are supported atm */
> -	if (!nvdimm)
> -		return -EINVAL;
> +	rc = is_cmd_valid(nvdimm, cmd, buf, buf_len);
> +	if (rc) {
> +		pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc);
> +		return rc;
> +	}
>  
>  	/* Use a local variable in case cmd_rc pointer is NULL */
>  	if (!cmd_rc)
> @@ -385,6 +498,11 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>  		*cmd_rc = papr_scm_meta_set(p, buf);
>  		break;
>  
> +	case ND_CMD_CALL:
> +		call_pkg = nd_to_pdsm_cmd_pkg(buf);
> +		*cmd_rc = papr_scm_service_pdsm(p, call_pkg);
> +		break;
> +
>  	default:
>  		dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
>  		return -EINVAL;
> diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
> index de5d90212409..0e09dc5cec19 100644
> --- a/include/uapi/linux/ndctl.h
> +++ b/include/uapi/linux/ndctl.h
> @@ -244,6 +244,7 @@ struct nd_cmd_pkg {
>  #define NVDIMM_FAMILY_HPE2 2
>  #define NVDIMM_FAMILY_MSFT 3
>  #define NVDIMM_FAMILY_HYPERV 4
> +#define NVDIMM_FAMILY_PAPR 5
>  
>  #define ND_IOCTL_CALL			_IOWR(ND_IOCTL, ND_CMD_CALL,\
>  					struct nd_cmd_pkg)
> -- 
> 2.26.2
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox