LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] powerpc/64s: remove PROT_SAO support
From: Nicholas Piggin @ 2020-06-07 12:02 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

ISA v3.1 does not support the SAO storage control attribute required to
implement PROT_SAO. PROT_SAO was used by specialised system software
(Lx86) that has been discontinued for about 7 years, and is not thought
to be used elsewhere, so removal should not cause problems.

We rather remove it than keep support for older processors, because
live migrating guest partitions to newer processors may not be possible
if SAO is in use.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  8 ++--
 arch/powerpc/include/asm/cputable.h           |  9 ++--
 arch/powerpc/include/asm/kvm_book3s_64.h      |  3 +-
 arch/powerpc/include/asm/mman.h               | 24 +++--------
 arch/powerpc/include/asm/nohash/64/pgtable.h  |  2 -
 arch/powerpc/kernel/dt_cpu_ftrs.c             |  2 +-
 arch/powerpc/mm/book3s64/hash_utils.c         |  2 -
 include/linux/mm.h                            |  2 -
 include/trace/events/mmflags.h                |  2 -
 mm/ksm.c                                      |  4 --
 tools/testing/selftests/powerpc/mm/.gitignore |  1 -
 tools/testing/selftests/powerpc/mm/Makefile   |  4 +-
 tools/testing/selftests/powerpc/mm/prot_sao.c | 42 -------------------
 13 files changed, 18 insertions(+), 87 deletions(-)
 delete mode 100644 tools/testing/selftests/powerpc/mm/prot_sao.c

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f17442c3a092..d9e92586f8dc 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -20,9 +20,13 @@
 #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
 #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
 #define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
-#define _PAGE_SAO		0x00010 /* Strong access order */
+
+#define _PAGE_CACHE_CTL		0x00030 /* Bits for the folowing cache modes */
+			/*	No bits set is normal cacheable memory */
+			/*	0x00010 unused, is SAO bit on radix POWER9 */
 #define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
 #define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
+
 #define _PAGE_DIRTY		0x00080 /* C: page changed */
 #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
 /*
@@ -825,8 +829,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 	return hash__set_pte_at(mm, addr, ptep, pte, percpu);
 }
 
-#define _PAGE_CACHE_CTL	(_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
-
 #define pgprot_noncached pgprot_noncached
 static inline pgprot_t pgprot_noncached(pgprot_t prot)
 {
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index bac2252c839e..c7e923b0000a 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -191,7 +191,6 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTR_SPURR			LONG_ASM_CONST(0x0000000001000000)
 #define CPU_FTR_DSCR			LONG_ASM_CONST(0x0000000002000000)
 #define CPU_FTR_VSX			LONG_ASM_CONST(0x0000000004000000)
-#define CPU_FTR_SAO			LONG_ASM_CONST(0x0000000008000000)
 #define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0000000010000000)
 #define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0000000020000000)
 #define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0000000040000000)
@@ -435,7 +434,7 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
+	    CPU_FTR_DSCR | CPU_FTR_ASYM_SMT | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | \
 	    CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
@@ -444,7 +443,7 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_DSCR | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
@@ -455,7 +454,7 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_DSCR | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
@@ -473,7 +472,7 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_DSCR | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9bb9bb370b53..579c9229124b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -400,7 +400,8 @@ static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
 
 	/* Handle SAO */
 	if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
-	    cpu_has_feature(CPU_FTR_ARCH_206))
+	    cpu_has_feature(CPU_FTR_ARCH_206) &&
+	    !cpu_has_feature(CPU_FTR_ARCH_31))
 		wimg = HPTE_R_M;
 
 	if (!is_ci)
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index d610c2e07b28..43a62f3e21a0 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,38 +13,24 @@
 #include <linux/pkeys.h>
 #include <asm/cpu_has_feature.h>
 
-/*
- * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
- * here.  How important is the optimization?
- */
+#ifdef CONFIG_PPC_MEM_KEYS
 static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
 		unsigned long pkey)
 {
-#ifdef CONFIG_PPC_MEM_KEYS
-	return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
-#else
-	return ((prot & PROT_SAO) ? VM_SAO : 0);
-#endif
+	return pkey_to_vmflag_bits(pkey);
 }
 #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
 
 static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
 {
-#ifdef CONFIG_PPC_MEM_KEYS
-	return (vm_flags & VM_SAO) ?
-		__pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
-		__pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
-#else
-	return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
-#endif
+	return __pgprot(vmflag_to_pte_pkey_bits(vm_flags));
 }
 #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
+#endif
 
 static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
 {
-	if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
-		return false;
-	if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO))
+	if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM))
 		return false;
 	return true;
 }
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 3424381b81da..2fd528ef48e0 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -82,8 +82,6 @@
  */
 #include <asm/nohash/pte-book3e.h>
 
-#define _PAGE_SAO	0
-
 #define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
 
 /*
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 3a409517c031..8d2e4043702f 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -622,7 +622,7 @@ static struct dt_cpu_feature_match __initdata
 	{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
 	{"processor-utilization-of-resources-register", feat_enable_purr, 0},
 	{"no-execute", feat_enable, 0},
-	{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
+	{"strong-access-ordering", feat_enable, 0},
 	{"cache-inhibited-large-page", feat_enable_large_ci, 0},
 	{"coprocessor-icswx", feat_enable, 0},
 	{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 0124003e60d0..14b6abdc3bd8 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -232,8 +232,6 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 		rflags |= HPTE_R_I;
 	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
 		rflags |= (HPTE_R_I | HPTE_R_G);
-	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
-		rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
 	else
 		/*
 		 * Add memory coherence if cache inhibited is not set
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 86adc71a972f..bdcaae914120 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -316,8 +316,6 @@ extern unsigned int kobjsize(const void *objp);
 
 #if defined(CONFIG_X86)
 # define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
-#elif defined(CONFIG_PPC)
-# define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP	VM_ARCH_1
 #elif defined(CONFIG_IA64)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 5fb752034386..939092dbcb8b 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -114,8 +114,6 @@ IF_HAVE_PG_IDLE(PG_idle,		"idle"		)
 
 #if defined(CONFIG_X86)
 #define __VM_ARCH_SPECIFIC_1 {VM_PAT,     "pat"           }
-#elif defined(CONFIG_PPC)
-#define __VM_ARCH_SPECIFIC_1 {VM_SAO,     "sao"           }
 #elif defined(CONFIG_PARISC) || defined(CONFIG_IA64)
 #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP,	"growsup"	}
 #elif !defined(CONFIG_MMU)
diff --git a/mm/ksm.c b/mm/ksm.c
index 18c5d005bd01..b225b0e16111 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2452,10 +2452,6 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		if (vma_is_dax(vma))
 			return 0;
 
-#ifdef VM_SAO
-		if (*vm_flags & VM_SAO)
-			return 0;
-#endif
 #ifdef VM_SPARC_ADI
 		if (*vm_flags & VM_SPARC_ADI)
 			return 0;
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca523255b1b..ff296c94f627 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,7 +2,6 @@
 hugetlb_vs_thp_test
 subpage_prot
 tempfile
-prot_sao
 segv_errors
 wild_bctr
 large_vm_fork_separation
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4bb414..9b8a7b3069c5 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \
 		  large_vm_fork_separation bad_accesses
 TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
@@ -12,8 +12,6 @@ include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
 
-$(OUTPUT)/prot_sao: ../utils.c
-
 $(OUTPUT)/wild_bctr: CFLAGS += -m64
 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
 $(OUTPUT)/bad_accesses: CFLAGS += -m64
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
deleted file mode 100644
index e2eed65b7735..000000000000
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2016, Michael Ellerman, IBM Corp.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-
-#include <asm/cputable.h>
-
-#include "utils.h"
-
-#define SIZE (64 * 1024)
-
-int test_prot_sao(void)
-{
-	char *p;
-
-	/* 2.06 or later should support SAO */
-	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
-
-	/*
-	 * Ensure we can ask for PROT_SAO.
-	 * We can't really verify that it does the right thing, but at least we
-	 * confirm the kernel will accept it.
-	 */
-	p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO,
-		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-	FAIL_IF(p == MAP_FAILED);
-
-	/* Write to the mapping, to at least cause a fault */
-	memset(p, 0xaa, SIZE);
-
-	return 0;
-}
-
-int main(void)
-{
-	return test_harness(test_prot_sao, "prot-sao");
-}
-- 
2.23.0


^ permalink raw reply related

* [PATCH 2/2] powerpc/64s/hash: disable subpage_prot syscall by default
From: Nicholas Piggin @ 2020-06-07 12:02 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20200607120209.463501-1-npiggin@gmail.com>

The subpage_prot syscall was added for specialised system software
(Lx86) that has been discontinued for about 7 years, and is not thought
to be used elsewhere, so disable it by default.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/Kconfig                   | 1 +
 arch/powerpc/configs/powernv_defconfig | 1 -
 arch/powerpc/configs/pseries_defconfig | 1 -
 3 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..1701646f845d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -834,6 +834,7 @@ config FORCE_MAX_ZONEORDER
 
 config PPC_SUBPAGE_PROT
 	bool "Support setting protections for 4k subpages"
+	default n
 	depends on PPC_BOOK3S_64 && PPC_64K_PAGES
 	help
 	  This option adds support for a system call to allow user programs
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 2de9aadf0f50..afc0dd73a1e6 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -64,7 +64,6 @@ CONFIG_HWPOISON_INJECT=m
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
 CONFIG_PM=y
 CONFIG_HOTPLUG_PCI=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index dfa4a726333b..894e8d85fb48 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -57,7 +57,6 @@ CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_RPA=m
-- 
2.23.0


^ permalink raw reply related

* [PATCH v5 4/4] riscv: Check relocations at compile time
From: Alexandre Ghiti @ 2020-06-07  7:59 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, Anup Patel, Atish Patra,
	Zong Li, linux-kernel, linuxppc-dev, linux-riscv
  Cc: Anup Patel, Alexandre Ghiti
In-Reply-To: <20200607075949.665-1-alex@ghiti.fr>

Relocating kernel at runtime is done very early in the boot process, so
it is not convenient to check for relocations there and react in case a
relocation was not expected.

There exists a script in scripts/ that extracts the relocations from
vmlinux that is then used at postlink to check the relocations.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Reviewed-by: Anup Patel <anup@brainfault.org>
---
 arch/riscv/Makefile.postlink     | 36 ++++++++++++++++++++++++++++++++
 arch/riscv/tools/relocs_check.sh | 26 +++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 arch/riscv/Makefile.postlink
 create mode 100755 arch/riscv/tools/relocs_check.sh

diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink
new file mode 100644
index 000000000000..bf2b2bca1845
--- /dev/null
+++ b/arch/riscv/Makefile.postlink
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+# ===========================================================================
+# Post-link riscv pass
+# ===========================================================================
+#
+# Check that vmlinux relocations look sane
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include scripts/Kbuild.include
+
+quiet_cmd_relocs_check = CHKREL  $@
+cmd_relocs_check = 							\
+	$(CONFIG_SHELL) $(srctree)/arch/riscv/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
+
+# `@true` prevents complaint when there is nothing to be done
+
+vmlinux: FORCE
+	@true
+ifdef CONFIG_RELOCATABLE
+	$(call if_changed,relocs_check)
+endif
+
+%.ko: FORCE
+	@true
+
+clean:
+	@true
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
diff --git a/arch/riscv/tools/relocs_check.sh b/arch/riscv/tools/relocs_check.sh
new file mode 100755
index 000000000000..baeb2e7b2290
--- /dev/null
+++ b/arch/riscv/tools/relocs_check.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Based on powerpc relocs_check.sh
+
+# This script checks the relocations of a vmlinux for "suspicious"
+# relocations.
+
+if [ $# -lt 3 ]; then
+        echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2
+        exit 1
+fi
+
+bad_relocs=$(
+${srctree}/scripts/relocs_check.sh "$@" |
+	# These relocations are okay
+	#	R_RISCV_RELATIVE
+	grep -F -w -v 'R_RISCV_RELATIVE'
+)
+
+if [ -z "$bad_relocs" ]; then
+	exit 0
+fi
+
+num_bad=$(echo "$bad_relocs" | wc -l)
+echo "WARNING: $num_bad bad relocations"
+echo "$bad_relocs"
-- 
2.20.1


^ permalink raw reply related

* [PATCH v5 3/4] powerpc: Move script to check relocations at compile time in scripts/
From: Alexandre Ghiti @ 2020-06-07  7:59 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, Anup Patel, Atish Patra,
	Zong Li, linux-kernel, linuxppc-dev, linux-riscv
  Cc: Anup Patel, Alexandre Ghiti
In-Reply-To: <20200607075949.665-1-alex@ghiti.fr>

Relocating kernel at runtime is done very early in the boot process, so
it is not convenient to check for relocations there and react in case a
relocation was not expected.

Powerpc architecture has a script that allows to check at compile time
for such unexpected relocations: extract the common logic to scripts/
so that other architectures can take advantage of it.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Reviewed-by: Anup Patel <anup@brainfault.org>
---
 arch/powerpc/tools/relocs_check.sh | 18 ++----------------
 scripts/relocs_check.sh            | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 16 deletions(-)
 create mode 100755 scripts/relocs_check.sh

diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh
index 014e00e74d2b..e367895941ae 100755
--- a/arch/powerpc/tools/relocs_check.sh
+++ b/arch/powerpc/tools/relocs_check.sh
@@ -15,21 +15,8 @@ if [ $# -lt 3 ]; then
 	exit 1
 fi
 
-# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
-objdump="$1"
-nm="$2"
-vmlinux="$3"
-
-# Remove from the bad relocations those that match an undefined weak symbol
-# which will result in an absolute relocation to 0.
-# Weak unresolved symbols are of that form in nm output:
-# "                  w _binary__btf_vmlinux_bin_end"
-undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
-
 bad_relocs=$(
-$objdump -R "$vmlinux" |
-	# Only look at relocation lines.
-	grep -E '\<R_' |
+${srctree}/scripts/relocs_check.sh "$@" |
 	# These relocations are okay
 	# On PPC64:
 	#	R_PPC64_RELATIVE, R_PPC64_NONE
@@ -43,8 +30,7 @@ R_PPC_ADDR16_LO
 R_PPC_ADDR16_HI
 R_PPC_ADDR16_HA
 R_PPC_RELATIVE
-R_PPC_NONE' |
-	([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
+R_PPC_NONE'
 )
 
 if [ -z "$bad_relocs" ]; then
diff --git a/scripts/relocs_check.sh b/scripts/relocs_check.sh
new file mode 100755
index 000000000000..137c660499f3
--- /dev/null
+++ b/scripts/relocs_check.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# Get a list of all the relocations, remove from it the relocations
+# that are known to be legitimate and return this list to arch specific
+# script that will look for suspicious relocations.
+
+objdump="$1"
+nm="$2"
+vmlinux="$3"
+
+# Remove from the possible bad relocations those that match an undefined
+# weak symbol which will result in an absolute relocation to 0.
+# Weak unresolved symbols are of that form in nm output:
+# "                  w _binary__btf_vmlinux_bin_end"
+undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
+
+$objdump -R "$vmlinux" |
+	grep -E '\<R_' |
+	([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
-- 
2.20.1


^ permalink raw reply related

* [PATCH v5 2/4] riscv: Introduce CONFIG_RELOCATABLE
From: Alexandre Ghiti @ 2020-06-07  7:59 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, Anup Patel, Atish Patra,
	Zong Li, linux-kernel, linuxppc-dev, linux-riscv
  Cc: Anup Patel, Alexandre Ghiti
In-Reply-To: <20200607075949.665-1-alex@ghiti.fr>

This config allows to compile the kernel as PIE and to relocate it at
any virtual address at runtime: this paves the way to KASLR and to 4-level
page table folding at runtime. Runtime relocation is possible since
relocation metadata are embedded into the kernel.

Note that relocating at runtime introduces an overhead even if the
kernel is loaded at the same address it was linked at and that the compiler
options are those used in arm64 which uses the same RELA relocation
format.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Reviewed-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
---
 arch/riscv/Kconfig              | 12 +++++++
 arch/riscv/Makefile             |  5 ++-
 arch/riscv/kernel/vmlinux.lds.S |  6 ++--
 arch/riscv/mm/Makefile          |  4 +++
 arch/riscv/mm/init.c            | 63 +++++++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a31e1a41913a..93127d5913fe 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -170,6 +170,18 @@ config PGTABLE_LEVELS
 	default 3 if 64BIT
 	default 2
 
+config RELOCATABLE
+	bool
+	depends on MMU
+	help
+          This builds a kernel as a Position Independent Executable (PIE),
+          which retains all relocation metadata required to relocate the
+          kernel binary at runtime to a different virtual address than the
+          address it was linked at.
+          Since RISCV uses the RELA relocation format, this requires a
+          relocation pass at runtime even if the kernel is loaded at the
+          same address it was linked at.
+
 source "arch/riscv/Kconfig.socs"
 
 menu "Platform type"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index fb6e37db836d..1406416ea743 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -9,7 +9,10 @@
 #
 
 OBJCOPYFLAGS    := -O binary
-LDFLAGS_vmlinux :=
+ifeq ($(CONFIG_RELOCATABLE),y)
+LDFLAGS_vmlinux := -shared -Bsymbolic -z notext -z norelro
+KBUILD_CFLAGS += -fPIE
+endif
 ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
 	LDFLAGS_vmlinux := --no-relax
 endif
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index a9abde62909f..e8ffba8c2044 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -85,8 +85,10 @@ SECTIONS
 
 	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
 
-	.rel.dyn : {
-		*(.rel.dyn*)
+	.rela.dyn : ALIGN(8) {
+		__rela_dyn_start = .;
+		*(.rela .rela*)
+		__rela_dyn_end = .;
 	}
 
 	_end = .;
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 363ef01c30b1..dc5cdaa80bc1 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
 CFLAGS_init.o := -mcmodel=medany
+ifdef CONFIG_RELOCATABLE
+CFLAGS_init.o += -fno-pie
+endif
+
 ifdef CONFIG_FTRACE
 CFLAGS_REMOVE_init.o = -pg
 endif
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 71da78914645..29b33289a12f 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -13,6 +13,9 @@
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
 #include <linux/set_memory.h>
+#ifdef CONFIG_RELOCATABLE
+#include <linux/elf.h>
+#endif
 
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
@@ -379,6 +382,53 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
 #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
 #endif
 
+#ifdef CONFIG_RELOCATABLE
+extern unsigned long __rela_dyn_start, __rela_dyn_end;
+
+#ifdef CONFIG_64BIT
+#define Elf_Rela Elf64_Rela
+#define Elf_Addr Elf64_Addr
+#else
+#define Elf_Rela Elf32_Rela
+#define Elf_Addr Elf32_Addr
+#endif
+
+void __init relocate_kernel(uintptr_t load_pa)
+{
+	Elf_Rela *rela = (Elf_Rela *)&__rela_dyn_start;
+	/*
+	 * This holds the offset between the linked virtual address and the
+	 * relocated virtual address.
+	 */
+	uintptr_t reloc_offset = kernel_virt_addr - KERNEL_LINK_ADDR;
+	/*
+	 * This holds the offset between kernel linked virtual address and
+	 * physical address.
+	 */
+	uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - load_pa;
+
+	for ( ; rela < (Elf_Rela *)&__rela_dyn_end; rela++) {
+		Elf_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
+		Elf_Addr relocated_addr = rela->r_addend;
+
+		if (rela->r_info != R_RISCV_RELATIVE)
+			continue;
+
+		/*
+		 * Make sure to not relocate vdso symbols like rt_sigreturn
+		 * which are linked from the address 0 in vmlinux since
+		 * vdso symbol addresses are actually used as an offset from
+		 * mm->context.vdso in VDSO_OFFSET macro.
+		 */
+		if (relocated_addr >= KERNEL_LINK_ADDR)
+			relocated_addr += reloc_offset;
+
+		*(Elf_Addr *)addr = relocated_addr;
+	}
+}
+
+#endif
+
 static uintptr_t load_pa, load_sz;
 
 static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size)
@@ -405,6 +455,19 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 
 	pfn_base = PFN_DOWN(load_pa);
 
+#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_64BIT
+	/*
+	 * Early page table uses only one PGDIR, which makes it possible
+	 * to map PGDIR_SIZE aligned on PGDIR_SIZE: if the relocation offset
+	 * makes the kernel cross over a PGDIR_SIZE boundary, raise a bug
+	 * since a part of the kernel would not get mapped.
+	 * This cannot happen on rv32 as we use the entire page directory level.
+	 */
+	BUG_ON(PGDIR_SIZE - (kernel_virt_addr & (PGDIR_SIZE - 1)) < load_sz);
+#endif
+	relocate_kernel(load_pa);
+#endif
 	/*
 	 * Enforce boot alignment requirements of RV32 and
 	 * RV64 by only allowing PMD or PGD mappings.
-- 
2.20.1


^ permalink raw reply related

* [PATCH v5 1/4] riscv: Move kernel mapping to vmalloc zone
From: Alexandre Ghiti @ 2020-06-07  7:59 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, Anup Patel, Atish Patra,
	Zong Li, linux-kernel, linuxppc-dev, linux-riscv
  Cc: Alexandre Ghiti
In-Reply-To: <20200607075949.665-1-alex@ghiti.fr>

This is a preparatory patch for relocatable kernel.

The kernel used to be linked at PAGE_OFFSET address and used to be loaded
physically at the beginning of the main memory. Therefore, we could use
the linear mapping for the kernel mapping.

But the relocated kernel base address will be different from PAGE_OFFSET
and since in the linear mapping, two different virtual addresses cannot
point to the same physical address, the kernel mapping needs to lie outside
the linear mapping.

In addition, because modules and BPF must be close to the kernel (inside
+-2GB window), the kernel is placed at the end of the vmalloc zone minus
2GB, which leaves room for modules and BPF. The kernel could not be
placed at the beginning of the vmalloc zone since other vmalloc
allocations from the kernel could get all the +-2GB window around the
kernel which would prevent new modules and BPF programs to be loaded.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Reviewed-by: Zong Li <zong.li@sifive.com>
---
 arch/riscv/boot/loader.lds.S     |  3 +-
 arch/riscv/include/asm/page.h    | 10 +++++-
 arch/riscv/include/asm/pgtable.h | 38 ++++++++++++++-------
 arch/riscv/kernel/head.S         |  3 +-
 arch/riscv/kernel/module.c       |  4 +--
 arch/riscv/kernel/vmlinux.lds.S  |  3 +-
 arch/riscv/mm/init.c             | 58 +++++++++++++++++++++++++-------
 arch/riscv/mm/physaddr.c         |  2 +-
 8 files changed, 88 insertions(+), 33 deletions(-)

diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 #include <asm/page.h>
+#include <asm/pgtable.h>
 
 OUTPUT_ARCH(riscv)
 ENTRY(_start)
 
 SECTIONS
 {
-	. = PAGE_OFFSET;
+	. = KERNEL_LINK_ADDR;
 
 	.payload : {
 		*(.payload)
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 2d50f76efe48..48bb09b6a9b7 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -90,18 +90,26 @@ typedef struct page *pgtable_t;
 
 #ifdef CONFIG_MMU
 extern unsigned long va_pa_offset;
+extern unsigned long va_kernel_pa_offset;
 extern unsigned long pfn_base;
 #define ARCH_PFN_OFFSET		(pfn_base)
 #else
 #define va_pa_offset		0
+#define va_kernel_pa_offset	0
 #define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
 #endif /* CONFIG_MMU */
 
 extern unsigned long max_low_pfn;
 extern unsigned long min_low_pfn;
+extern unsigned long kernel_virt_addr;
 
 #define __pa_to_va_nodebug(x)	((void *)((unsigned long) (x) + va_pa_offset))
-#define __va_to_pa_nodebug(x)	((unsigned long)(x) - va_pa_offset)
+#define linear_mapping_va_to_pa(x)	((unsigned long)(x) - va_pa_offset)
+#define kernel_mapping_va_to_pa(x)	\
+	((unsigned long)(x) - va_kernel_pa_offset)
+#define __va_to_pa_nodebug(x)		\
+	(((x) >= PAGE_OFFSET) ?		\
+		linear_mapping_va_to_pa(x) : kernel_mapping_va_to_pa(x))
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 35b60035b6b0..94ef3b49dfb6 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,23 +11,29 @@
 
 #include <asm/pgtable-bits.h>
 
-#ifndef __ASSEMBLY__
-
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include <linux/mm_types.h>
-
-#ifdef CONFIG_MMU
+#ifndef CONFIG_MMU
+#define KERNEL_VIRT_ADDR	PAGE_OFFSET
+#define KERNEL_LINK_ADDR	PAGE_OFFSET
+#else
+/*
+ * Leave 2GB for modules and BPF that must lie within a 2GB range around
+ * the kernel.
+ */
+#define KERNEL_VIRT_ADDR	(VMALLOC_END - SZ_2G + 1)
+#define KERNEL_LINK_ADDR	KERNEL_VIRT_ADDR
 
 #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END      (PAGE_OFFSET - 1)
 #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
 
 #define BPF_JIT_REGION_SIZE	(SZ_128M)
-#define BPF_JIT_REGION_START	(PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END	(VMALLOC_END)
+#define BPF_JIT_REGION_START	PFN_ALIGN((unsigned long)&_end)
+#define BPF_JIT_REGION_END	(BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
+
+#ifdef CONFIG_64BIT
+#define VMALLOC_MODULE_START	BPF_JIT_REGION_END
+#define VMALLOC_MODULE_END	(((unsigned long)&_start & PAGE_MASK) + SZ_2G)
+#endif
 
 /*
  * Roughly size the vmemmap space to be large enough to fit enough
@@ -57,9 +63,16 @@
 #define FIXADDR_SIZE     PGDIR_SIZE
 #endif
 #define FIXADDR_START    (FIXADDR_TOP - FIXADDR_SIZE)
-
 #endif
 
+#ifndef __ASSEMBLY__
+
+/* Page Upper Directory not used in RISC-V */
+#include <asm-generic/pgtable-nopud.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <linux/mm_types.h>
+
 #ifdef CONFIG_64BIT
 #include <asm/pgtable-64.h>
 #else
@@ -483,6 +496,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl
 
 #define kern_addr_valid(addr)   (1) /* FIXME */
 
+extern char _start[];
 extern void *dtb_early_va;
 void setup_bootmem(void);
 void paging_init(void);
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 98a406474e7d..8f5bb7731327 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -49,7 +49,8 @@ ENTRY(_start)
 #ifdef CONFIG_MMU
 relocate:
 	/* Relocate return address */
-	li a1, PAGE_OFFSET
+	la a1, kernel_virt_addr
+	REG_L a1, 0(a1)
 	la a2, _start
 	sub a1, a1, a2
 	add ra, ra, a1
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 8bbe5dbe1341..1a8fbe05accf 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -392,12 +392,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 }
 
 #if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-#define VMALLOC_MODULE_START \
-	 max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
 void *module_alloc(unsigned long size)
 {
 	return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
-				    VMALLOC_END, GFP_KERNEL,
+				    VMALLOC_MODULE_END, GFP_KERNEL,
 				    PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
 				    __builtin_return_address(0));
 }
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 0339b6bbe11a..a9abde62909f 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -4,7 +4,8 @@
  * Copyright (C) 2017 SiFive
  */
 
-#define LOAD_OFFSET PAGE_OFFSET
+#include <asm/pgtable.h>
+#define LOAD_OFFSET KERNEL_LINK_ADDR
 #include <asm/vmlinux.lds.h>
 #include <asm/page.h>
 #include <asm/cache.h>
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 736de6c8739f..71da78914645 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -22,6 +22,9 @@
 
 #include "../kernel/head.h"
 
+unsigned long kernel_virt_addr = KERNEL_VIRT_ADDR;
+EXPORT_SYMBOL(kernel_virt_addr);
+
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
 							__page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
@@ -178,8 +181,12 @@ void __init setup_bootmem(void)
 }
 
 #ifdef CONFIG_MMU
+/* Offset between linear mapping virtual address and kernel load address */
 unsigned long va_pa_offset;
 EXPORT_SYMBOL(va_pa_offset);
+/* Offset between kernel mapping virtual address and kernel load address */
+unsigned long va_kernel_pa_offset;
+EXPORT_SYMBOL(va_kernel_pa_offset);
 unsigned long pfn_base;
 EXPORT_SYMBOL(pfn_base);
 
@@ -271,7 +278,7 @@ static phys_addr_t __init alloc_pmd(uintptr_t va)
 	if (mmu_enabled)
 		return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
 
-	pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
+	pmd_num = (va - kernel_virt_addr) >> PGDIR_SHIFT;
 	BUG_ON(pmd_num >= NUM_EARLY_PMDS);
 	return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
 }
@@ -372,14 +379,30 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
 #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
 #endif
 
+static uintptr_t load_pa, load_sz;
+
+static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size)
+{
+	uintptr_t va, end_va;
+
+	end_va = kernel_virt_addr + load_sz;
+	for (va = kernel_virt_addr; va < end_va; va += map_size)
+		create_pgd_mapping(pgdir, va,
+				   load_pa + (va - kernel_virt_addr),
+				   map_size, PAGE_KERNEL_EXEC);
+}
+
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 {
 	uintptr_t va, end_va;
-	uintptr_t load_pa = (uintptr_t)(&_start);
-	uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
 	uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
 
+	load_pa = (uintptr_t)(&_start);
+	load_sz = (uintptr_t)(&_end) - load_pa;
+
 	va_pa_offset = PAGE_OFFSET - load_pa;
+	va_kernel_pa_offset = kernel_virt_addr - load_pa;
+
 	pfn_base = PFN_DOWN(load_pa);
 
 	/*
@@ -402,26 +425,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	create_pmd_mapping(fixmap_pmd, FIXADDR_START,
 			   (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
 	/* Setup trampoline PGD and PMD */
-	create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+	create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
 			   (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
-	create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
+	create_pmd_mapping(trampoline_pmd, kernel_virt_addr,
 			   load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
 #else
 	/* Setup trampoline PGD */
-	create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+	create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
 			   load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
 #endif
 
 	/*
-	 * Setup early PGD covering entire kernel which will allows
+	 * Setup early PGD covering entire kernel which will allow
 	 * us to reach paging_init(). We map all memory banks later
 	 * in setup_vm_final() below.
 	 */
-	end_va = PAGE_OFFSET + load_sz;
-	for (va = PAGE_OFFSET; va < end_va; va += map_size)
-		create_pgd_mapping(early_pg_dir, va,
-				   load_pa + (va - PAGE_OFFSET),
-				   map_size, PAGE_KERNEL_EXEC);
+	create_kernel_page_table(early_pg_dir, map_size);
 
 	/* Create fixed mapping for early FDT parsing */
 	end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
@@ -441,6 +460,7 @@ static void __init setup_vm_final(void)
 	uintptr_t va, map_size;
 	phys_addr_t pa, start, end;
 	struct memblock_region *reg;
+	static struct vm_struct vm_kernel = { 0 };
 
 	/* Set mmu_enabled flag */
 	mmu_enabled = true;
@@ -467,10 +487,22 @@ static void __init setup_vm_final(void)
 		for (pa = start; pa < end; pa += map_size) {
 			va = (uintptr_t)__va(pa);
 			create_pgd_mapping(swapper_pg_dir, va, pa,
-					   map_size, PAGE_KERNEL_EXEC);
+					   map_size, PAGE_KERNEL);
 		}
 	}
 
+	/* Map the kernel */
+	create_kernel_page_table(swapper_pg_dir, PMD_SIZE);
+
+	/* Reserve the vmalloc area occupied by the kernel */
+	vm_kernel.addr = (void *)kernel_virt_addr;
+	vm_kernel.phys_addr = load_pa;
+	vm_kernel.size = (load_sz + PMD_SIZE - 1) & ~(PMD_SIZE - 1);
+	vm_kernel.flags = VM_MAP | VM_NO_GUARD;
+	vm_kernel.caller = __builtin_return_address(0);
+
+	vm_area_add_early(&vm_kernel);
+
 	/* Clear fixmap PTE and PMD mappings */
 	clear_fixmap(FIX_PTE);
 	clear_fixmap(FIX_PMD);
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
index e8e4dcd39fed..35703d5ef5fd 100644
--- a/arch/riscv/mm/physaddr.c
+++ b/arch/riscv/mm/physaddr.c
@@ -23,7 +23,7 @@ EXPORT_SYMBOL(__virt_to_phys);
 
 phys_addr_t __phys_addr_symbol(unsigned long x)
 {
-	unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
+	unsigned long kernel_start = (unsigned long)kernel_virt_addr;
 	unsigned long kernel_end = (unsigned long)_end;
 
 	/*
-- 
2.20.1


^ permalink raw reply related

* [PATCH v5 0/4] vmalloc kernel mapping and relocatable kernel
From: Alexandre Ghiti @ 2020-06-07  7:59 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, Anup Patel, Atish Patra,
	Zong Li, linux-kernel, linuxppc-dev, linux-riscv
  Cc: Alexandre Ghiti

This patchset originally implemented relocatable kernel support but now          
also moves the kernel mapping into the vmalloc zone.                             
                                                                                 
The first patch explains why we need to move the kernel into vmalloc             
zone (instead of memcpying it around). That patch should ease KASLR              
implementation a lot.                                                            
                                                                                 
The second patch allows to build relocatable kernels but is not selected         
by default.                                                                      
                                                                                 
The third and fourth patches take advantage of an already existing powerpc       
script that checks relocations at compile-time, and uses it for riscv.           
                                                                                 
Changes in v5:
  * Add "static __init" to create_kernel_page_table function as reported by
    Kbuild test robot
  * Add reviewed-by from Zong
  * Rebase onto v5.7

Changes in v4:                                                                   
  * Fix BPF region that overlapped with kernel's as suggested by Zong            
  * Fix end of module region that could be larger than 2GB as suggested by Zong  
  * Fix the size of the vm area reserved for the kernel as we could lose         
    PMD_SIZE if the size was already aligned on PMD_SIZE                         
  * Split compile time relocations check patch into 2 patches as suggested by Anup
  * Applied Reviewed-by from Zong and Anup                                       
                                                                                 
Changes in v3:                                                                   
  * Move kernel mapping to vmalloc                                               
                                                                                 
Changes in v2:                                                                   
  * Make RELOCATABLE depend on MMU as suggested by Anup                          
  * Rename kernel_load_addr into kernel_virt_addr as suggested by Anup           
  * Use __pa_symbol instead of __pa, as suggested by Zong                        
  * Rebased on top of v5.6-rc3                                                   
  * Tested with sv48 patchset                                                    
  * Add Reviewed/Tested-by from Zong and Anup 

Alexandre Ghiti (4):
  riscv: Move kernel mapping to vmalloc zone
  riscv: Introduce CONFIG_RELOCATABLE
  powerpc: Move script to check relocations at compile time in scripts/
  riscv: Check relocations at compile time

 arch/powerpc/tools/relocs_check.sh |  18 +----
 arch/riscv/Kconfig                 |  12 +++
 arch/riscv/Makefile                |   5 +-
 arch/riscv/Makefile.postlink       |  36 +++++++++
 arch/riscv/boot/loader.lds.S       |   3 +-
 arch/riscv/include/asm/page.h      |  10 ++-
 arch/riscv/include/asm/pgtable.h   |  38 ++++++---
 arch/riscv/kernel/head.S           |   3 +-
 arch/riscv/kernel/module.c         |   4 +-
 arch/riscv/kernel/vmlinux.lds.S    |   9 ++-
 arch/riscv/mm/Makefile             |   4 +
 arch/riscv/mm/init.c               | 121 +++++++++++++++++++++++++----
 arch/riscv/mm/physaddr.c           |   2 +-
 arch/riscv/tools/relocs_check.sh   |  26 +++++++
 scripts/relocs_check.sh            |  20 +++++
 15 files changed, 259 insertions(+), 52 deletions(-)
 create mode 100644 arch/riscv/Makefile.postlink
 create mode 100755 arch/riscv/tools/relocs_check.sh
 create mode 100755 scripts/relocs_check.sh

-- 
2.20.1


^ permalink raw reply

* Re: [PATCH v10 6/6] powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH
From: Segher Boessenkool @ 2020-06-06 18:09 UTC (permalink / raw)
  To: Vaibhav Jain
  Cc: Santosh Sivaraj, linux-nvdimm, Aneesh Kumar K . V, linuxppc-dev,
	linux-kernel, Steven Rostedt, Oliver O'Halloran, Dan Williams,
	Ira Weiny
In-Reply-To: <87wo4kfk58.fsf@linux.ibm.com>

On Sat, Jun 06, 2020 at 06:04:11PM +0530, Vaibhav Jain wrote:
> >> +	/* update health struct with various flags derived from health bitmap */
> >> +	health = (struct nd_papr_pdsm_health) {
> >> +		.dimm_unarmed = p->health_bitmap & PAPR_PMEM_UNARMED_MASK,
> >> +		.dimm_bad_shutdown = p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK,
> >> +		.dimm_bad_restore = p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK,
> >> +		.dimm_encrypted = p->health_bitmap & PAPR_PMEM_ENCRYPTED,
> >> +		.dimm_locked = p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED,
> >> +		.dimm_scrubbed = p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED,
> >
> > Are you sure these work?  These are not assignments to a bool so I don't think
> > gcc will do what you want here.
> Yeah, somehow this slipped by and didnt show up in my tests. I checked
> the assembly dump and seems GCC was silently skipping initializing these
> fields without making any noise.

It's not "skipping" that, it initialises the field to 0, just like your
code said it should :-)

If you think GCC should warn for this, please open a PR?  It is *normal*
for bit-fields to be truncated from what is assigned to it, but maybe we
could warn for it in the 1-bit case (we currently don't seem to, even
when the bit-field type is _Bool).

Thanks,


Segher

^ permalink raw reply

* Re: [PATCH v10 6/6] powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH
From: Vaibhav Jain @ 2020-06-06 12:34 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Santosh Sivaraj, linux-nvdimm, Aneesh Kumar K . V, linux-kernel,
	Steven Rostedt, Oliver O'Halloran, Dan Williams, linuxppc-dev
In-Reply-To: <20200605183655.GP1505637@iweiny-DESK2.sc.intel.com>

Ira Weiny <ira.weiny@intel.com> writes:

> On Fri, Jun 05, 2020 at 05:11:36AM +0530, Vaibhav Jain wrote:
>> This patch implements support for PDSM request 'PAPR_PDSM_HEALTH'
>> that returns a newly introduced 'struct nd_papr_pdsm_health' instance
>> containing dimm health information back to user space in response to
>> ND_CMD_CALL. This functionality is implemented in newly introduced
>> papr_pdsm_health() that queries the nvdimm health information and
>> then copies this information to the package payload whose layout is
>> defined by 'struct nd_papr_pdsm_health'.
>> 
>> Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
>> Cc: Dan Williams <dan.j.williams@intel.com>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Cc: Ira Weiny <ira.weiny@intel.com>
>> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
>> ---
>> Changelog:
>> 
>> v9..v10:
>> * Removed code in papr_pdsm_health that performed validation on pdsm
>>   payload version and corrosponding struct and defines used for
>>   validation of payload version.
>> * Dropped usage of struct papr_pdsm_health in 'struct
>>   papr_scm_priv'. Instead papr_psdm_health() now uses
>>   'papr_scm_priv.health_bitmap' to populate the pdsm payload.
>> * Above change also fixes the problem where this patch was removing
>>   the code that was previously introduced in this patch-series.
>>   [ Ira ]
>> * Introduced a new def ND_PDSM_ENVELOPE_HDR_SIZE that indicates the
>>   space allocated to 'struct nd_pdsm_cmd_pkg' fields except 'struct
>>   nd_cmd_pkg'. This def is useful in validating payload sizes.
>> * Reworked papr_pdsm_health() to enforce a specific payload size for
>>   'PAPR_PDSM_HEALTH' pdsm request.
>> 
>> Resend:
>> * Added ack from Aneesh.
>> 
>> v8..v9:
>> * s/PAPR_SCM_PDSM_HEALTH/PAPR_PDSM_HEALTH/g  [ Dan , Aneesh ]
>> * s/PAPR_SCM_PSDM_DIMM_*/PAPR_PDSM_DIMM_*/g
>> * Renamed papr_scm_get_health() to papr_psdm_health()
>> * Updated patch description to replace papr-scm dimm with nvdimm.
>> 
>> v7..v8:
>> * None
>> 
>> Resend:
>> * None
>> 
>> v6..v7:
>> * Updated flags_show() to use seq_buf_printf(). [Mpe]
>> * Updated papr_scm_get_health() to use newly introduced
>>   __drc_pmem_query_health() bypassing the cache [Mpe].
>> 
>> v5..v6:
>> * Added attribute '__packed' to 'struct nd_papr_pdsm_health_v1' to
>>   gaurd against possibility of different compilers adding different
>>   paddings to the struct [ Dan Williams ]
>> 
>> * Updated 'struct nd_papr_pdsm_health_v1' to use __u8 instead of
>>   'bool' and also updated drc_pmem_query_health() to take this into
>>   account. [ Dan Williams ]
>> 
>> v4..v5:
>> * None
>> 
>> v3..v4:
>> * Call the DSM_PAPR_SCM_HEALTH service function from
>>   papr_scm_service_dsm() instead of papr_scm_ndctl(). [Aneesh]
>> 
>> v2..v3:
>> * Updated struct nd_papr_scm_dimm_health_stat_v1 to use '__xx' types
>>   as its exported to the userspace [Aneesh]
>> * Changed the constants DSM_PAPR_SCM_DIMM_XX indicating dimm health
>>   from enum to #defines [Aneesh]
>> 
>> v1..v2:
>> * New patch in the series
>> ---
>>  arch/powerpc/include/uapi/asm/papr_pdsm.h | 33 +++++++++++
>>  arch/powerpc/platforms/pseries/papr_scm.c | 70 +++++++++++++++++++++++
>>  2 files changed, 103 insertions(+)
>> 
>> diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> index 8b1a4f8fa316..c4c990ede5d4 100644
>> --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> @@ -71,12 +71,17 @@ struct nd_pdsm_cmd_pkg {
>>  	__u8 payload[];		/* In/Out: Sub-cmd data buffer */
>>  } __packed;
>>  
>> +/* Calculate size used by the pdsm header fields minus 'struct nd_cmd_pkg' */
>> +#define ND_PDSM_ENVELOPE_HDR_SIZE \
>> +	(sizeof(struct nd_pdsm_cmd_pkg) - sizeof(struct nd_cmd_pkg))
>> +
[ ]
>
> This is kind of a weird name for this.
>
> Isn't this just the ND PDSM header size?  What is 'envelope' mean
> here?
Was referring to 'nd_cmd_pkg' envelop here. But yes removing 'ENVELOPE'
should make it more concise. Have already done this in v11 of this patch.

>
>>  /*
>>   * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
>>   * via 'nd_pdsm_cmd_pkg.hdr.nd_command' member of the ioctl struct
>>   */
>>  enum papr_pdsm {
>>  	PAPR_PDSM_MIN = 0x0,
>> +	PAPR_PDSM_HEALTH,
>>  	PAPR_PDSM_MAX,
>>  };
>>  
>> @@ -95,4 +100,32 @@ static inline void *pdsm_cmd_to_payload(struct nd_pdsm_cmd_pkg *pcmd)
>>  		return (void *)(pcmd->payload);
>>  }
>>  
>> +/* Various nvdimm health indicators */
>> +#define PAPR_PDSM_DIMM_HEALTHY       0
>> +#define PAPR_PDSM_DIMM_UNHEALTHY     1
>> +#define PAPR_PDSM_DIMM_CRITICAL      2
>> +#define PAPR_PDSM_DIMM_FATAL         3
>> +
>> +/*
>> + * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
>> + * Various flags indicate the health status of the dimm.
>> + *
>> + * dimm_unarmed		: Dimm not armed. So contents wont persist.
>> + * dimm_bad_shutdown	: Previous shutdown did not persist contents.
>> + * dimm_bad_restore	: Contents from previous shutdown werent restored.
>> + * dimm_scrubbed	: Contents of the dimm have been scrubbed.
>> + * dimm_locked		: Contents of the dimm cant be modified until CEC reboot
>> + * dimm_encrypted	: Contents of dimm are encrypted.
>> + * dimm_health		: Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
>> + */
>> +struct nd_papr_pdsm_health {
>> +	__u8 dimm_unarmed;
>> +	__u8 dimm_bad_shutdown;
>> +	__u8 dimm_bad_restore;
>> +	__u8 dimm_scrubbed;
>> +	__u8 dimm_locked;
>> +	__u8 dimm_encrypted;
>> +	__u16 dimm_health;
>> +} __packed;
>> +
>>  #endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
>> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
>> index 05eb56ecab5e..984942be24c1 100644
>> --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> @@ -421,6 +421,72 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
>>  	return 0;
>>  }
>>  
>> +/* Fetch the DIMM health info and populate it in provided package. */
>> +static int papr_pdsm_health(struct papr_scm_priv *p,
>> +			    struct nd_pdsm_cmd_pkg *pkg)
>> +{
>> +	int rc;
>> +	struct nd_papr_pdsm_health health = { 0 };
>> +	u16 copysize = sizeof(struct nd_papr_pdsm_health);
>> +	u16 payload_size = pkg->hdr.nd_size_out - ND_PDSM_ENVELOPE_HDR_SIZE;
>> +
>> +	/* Ensure correct payload size that can hold struct nd_papr_pdsm_health */
>> +	if (payload_size != copysize) {
>> +		dev_dbg(&p->pdev->dev,
>> +			"Unexpected payload-size (%u). Expected (%u)",
>> +			pkg->hdr.nd_size_out, copysize);
>> +		rc = -ENOSPC;
>> +		goto out;
>> +	}
>> +
>> +	/* Ensure dimm health mutex is taken preventing concurrent access */
>> +	rc = mutex_lock_interruptible(&p->health_mutex);
>> +	if (rc)
>> +		goto out;
>> +
>> +	/* Always fetch upto date dimm health data ignoring cached values */
>> +	rc = __drc_pmem_query_health(p);
>> +	if (rc) {
>> +		mutex_unlock(&p->health_mutex);
>> +		goto out;
>> +	}
>> +
>> +	/* update health struct with various flags derived from health bitmap */
>> +	health = (struct nd_papr_pdsm_health) {
>> +		.dimm_unarmed = p->health_bitmap & PAPR_PMEM_UNARMED_MASK,
>> +		.dimm_bad_shutdown = p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK,
>> +		.dimm_bad_restore = p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK,
>> +		.dimm_encrypted = p->health_bitmap & PAPR_PMEM_ENCRYPTED,
>> +		.dimm_locked = p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED,
>> +		.dimm_scrubbed = p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED,
>
> Are you sure these work?  These are not assignments to a bool so I don't think
> gcc will do what you want here.
Yeah, somehow this slipped by and didnt show up in my tests. I checked
the assembly dump and seems GCC was silently skipping initializing these
fields without making any noise. Have rectified this in v11 by changing
these designated initilizers to

           .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED)

Thanks for catching this.

~ Vaibhav
>
> Ira
>
>> +		.dimm_health = PAPR_PDSM_DIMM_HEALTHY,
>> +	};
>> +
>> +	/* Update field dimm_health based on health_bitmap flags */
>> +	if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL)
>> +		health.dimm_health = PAPR_PDSM_DIMM_FATAL;
>> +	else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL)
>> +		health.dimm_health = PAPR_PDSM_DIMM_CRITICAL;
>> +	else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY)
>> +		health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
>> +
>> +	/* struct populated hence can release the mutex now */
>> +	mutex_unlock(&p->health_mutex);
>> +
>> +	dev_dbg(&p->pdev->dev, "Copying payload size=%u\n", copysize);
>> +
>> +	/* Copy the health struct to the payload */
>> +	memcpy(pdsm_cmd_to_payload(pkg), &health, copysize);
>> +
>> +	/* Update fw size including size of struct nd_pdsm_cmd_pkg fields */
>> +	pkg->hdr.nd_fw_size = copysize + ND_PDSM_ENVELOPE_HDR_SIZE;
>> +
>> +out:
>> +	dev_dbg(&p->pdev->dev, "completion code = %d\n", rc);
>> +
>> +	return rc;
>> +}
>> +
>>  /*
>>   * For a given pdsm request call an appropriate service function.
>>   * Note: Use 'nd_pdsm_cmd_pkg.cmd_status to report psdm servicing errors. Hence
>> @@ -435,6 +501,10 @@ static void papr_scm_service_pdsm(struct papr_scm_priv *p,
>>  
>>  	/* Call pdsm service function */
>>  	switch (pdsm) {
>> +	case PAPR_PDSM_HEALTH:
>> +		pkg->cmd_status = papr_pdsm_health(p, pkg);
>> +		break;
>> +
>>  	default:
>>  		dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n",
>>  			pdsm);
>> -- 
>> 2.26.2
>> 

-- 
Cheers
~ Vaibhav

^ permalink raw reply

* Re: [PATCH v10 4/6] powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl()
From: Vaibhav Jain @ 2020-06-06 11:21 UTC (permalink / raw)
  To: Dan Williams, Ira Weiny
  Cc: Santosh Sivaraj, linux-nvdimm, Aneesh Kumar K . V,
	Linux Kernel Mailing List, Steven Rostedt, Oliver O'Halloran,
	linuxppc-dev
In-Reply-To: <CAPcyv4g2x7LV3ARRj-RBS1K84WNayr9oDcupzPQ1gtK1A_e+aQ@mail.gmail.com>

Hi Ira and Dan,

Thanks for reviewing this patch. Have updated the patch based on your
feedback to upadate cmd_rc only when the nd_cmd was handled and return
'0' in that case.

Other errors in case the nd_cmd was unrecognized or invalid result in
error returned from this functions as you suggested.

~ Vaibhav

Dan Williams <dan.j.williams@intel.com> writes:

> On Fri, Jun 5, 2020 at 10:13 AM Ira Weiny <ira.weiny@intel.com> wrote:
>>
>> On Fri, Jun 05, 2020 at 05:11:34AM +0530, Vaibhav Jain wrote:
>> > Since papr_scm_ndctl() can be called from outside papr_scm, its
>> > exposed to the possibility of receiving NULL as value of 'cmd_rc'
>> > argument. This patch updates papr_scm_ndctl() to protect against such
>> > possibility by assigning it pointer to a local variable in case cmd_rc
>> > == NULL.
>> >
>> > Finally the patch also updates the 'default' clause of the switch-case
>> > block removing a 'return' statement thereby ensuring that value of
>> > 'cmd_rc' is always logged when papr_scm_ndctl() returns.
>> >
>> > Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
>> > Cc: Dan Williams <dan.j.williams@intel.com>
>> > Cc: Michael Ellerman <mpe@ellerman.id.au>
>> > Cc: Ira Weiny <ira.weiny@intel.com>
>> > Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
>> > ---
>> > Changelog:
>> >
>> > v9..v10
>> > * New patch in the series
>>
>> Thanks for making this a separate patch it is easier to see what is going on
>> here.
>>
>> > ---
>> >  arch/powerpc/platforms/pseries/papr_scm.c | 10 ++++++++--
>> >  1 file changed, 8 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
>> > index 0c091622b15e..6512fe6a2874 100644
>> > --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> > +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> > @@ -355,11 +355,16 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>> >  {
>> >       struct nd_cmd_get_config_size *get_size_hdr;
>> >       struct papr_scm_priv *p;
>> > +     int rc;
>> >
>> >       /* Only dimm-specific calls are supported atm */
>> >       if (!nvdimm)
>> >               return -EINVAL;
>> >
>> > +     /* Use a local variable in case cmd_rc pointer is NULL */
>> > +     if (!cmd_rc)
>> > +             cmd_rc = &rc;
>> > +
>>
>> This protects you from the NULL.  However...
>>
>> >       p = nvdimm_provider_data(nvdimm);
>> >
>> >       switch (cmd) {
>> > @@ -381,12 +386,13 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>> >               break;
>> >
>> >       default:
>> > -             return -EINVAL;
>> > +             dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
>> > +             *cmd_rc = -EINVAL;
>>
>> ... I think you are conflating rc and cmd_rc...
>>
>> >       }
>> >
>> >       dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
>> >
>> > -     return 0;
>> > +     return *cmd_rc;
>>
>> ... this changes the behavior of the current commands.  Now if the underlying
>> papr_scm_meta_[get|set]() fails you return that failure as rc rather than 0.
>>
>> Is that ok?
>
> The expectation is that rc is "did the command get sent to the device,
> or did it fail for 'transport' reasons". The role of cmd_rc is to
> translate the specific status response of the command into a common
> error code. The expectations are:

>
> rc < 0: Error code, Linux terminated the ioctl before talking to hardware
>
> rc == 0: Linux successfully submitted the command to hardware, cmd_rc
> is valid for command specific response
>
> rc > 0: Linux successfully submitted the command, but detected that
> only a subset of the data was accepted for "write"-style commands, or
> that only subset of data was returned for "read"-style commands. I.e.
> short-write / short-read semantics. cmd_rc is valid in this case and
> its up to userspace to determine if a short transfer is an error or
> not.
>
>> Also 'logging cmd_rc' in the invalid cmd case does not seem quite right unless
>> you really want rc to be cmd_rc.
>>
>> The architecture is designed to separate errors which occur in the kernel vs
>> errors in the firmware/dimm.  Are they always the same?  The current code
>> differentiates them.
>
> Yeah, they're distinct, transport vs end-point / command-specific
> status returns.


-- 
Cheers
~ Vaibhav

^ permalink raw reply

* [PATCH] tty: serial: cpm_uart: Fix behaviour for non existing GPIOs
From: Christophe Leroy @ 2020-06-06  7:30 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Jiri Slaby, Linus Walleij
  Cc: linuxppc-dev, linux-kernel, linux-serial

devm_gpiod_get_index() doesn't return NULL but -ENOENT when the
requested GPIO doesn't exist,  leading to the following messages:

[    2.742468] gpiod_direction_input: invalid GPIO (errorpointer)
[    2.748147] can't set direction for gpio #2: -2
[    2.753081] gpiod_direction_input: invalid GPIO (errorpointer)
[    2.758724] can't set direction for gpio #3: -2
[    2.763666] gpiod_direction_output: invalid GPIO (errorpointer)
[    2.769394] can't set direction for gpio #4: -2
[    2.774341] gpiod_direction_input: invalid GPIO (errorpointer)
[    2.779981] can't set direction for gpio #5: -2
[    2.784545] ff000a20.serial: ttyCPM1 at MMIO 0xfff00a20 (irq = 39, base_baud = 8250000) is a CPM UART

Use IS_ERR_OR_NULL() to properly check gpiod validity.

Fixes: 97cbaf2c829b ("tty: serial: cpm_uart: Convert to use GPIO descriptors")
Cc: stable@vger.kernel.org
Cc: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 drivers/tty/serial/cpm_uart/cpm_uart_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index a04f74d2e854..3cbe24802296 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -1217,7 +1217,7 @@ static int cpm_uart_init_port(struct device_node *np,
 
 		gpiod = devm_gpiod_get_index(dev, NULL, i, GPIOD_ASIS);
 
-		if (gpiod) {
+		if (!IS_ERR_OR_NULL(gpiod)) {
 			if (i == GPIO_RTS || i == GPIO_DTR)
 				ret = gpiod_direction_output(gpiod, 0);
 			else
-- 
2.25.0


^ permalink raw reply related

* [PATCH] powerpc/fadump: update kernel logs before fadump crash begins
From: Sourabh Jain @ 2020-06-06  4:45 UTC (permalink / raw)
  To: mpe; +Cc: mahesh, linux-kernel, hbathini, linuxppc-dev

When we hit the fadump crash via the panic path the pstore update is
missing. This is observed when commit 8341f2f222d7 ("sysrq: Use panic()
to force a crash") changed the sysrq-trigger to take panic path instead
of die path.

The PPC panic event handler addresses the system panic in two different
ways based on the system configuration. It first allows the FADump (if
configured) to handle the kernel panic else forwards the call to platform
specific panic function. Now pstore update is missing only if FADump
handles the kernel panic, the platform-specific panic function do update
the pstore by calling panic_flush_kmsg_end function.

The simplest approach to handle this issue is to add pstore update in PPC
panic handler before FADump handles the panic. But this leads to multiple
pstore updates in case FADump is not configured and platform-specific
panic function serves the kernel panic.

Hence the function panic_flush_kmsg_end (used by the platform-specific
panic function to update the kernel logs) is split into two functions, one
will update the pstore (called in ppc panic event handler) and others will
flush the kmsg on the console (called in platform specific panic function).

Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
---
 arch/powerpc/include/asm/bug.h         |  2 ++
 arch/powerpc/kernel/setup-common.c     |  1 +
 arch/powerpc/kernel/traps.c            | 12 +++++++++++-
 arch/powerpc/platforms/ps3/setup.c     |  2 +-
 arch/powerpc/platforms/pseries/setup.c |  2 +-
 5 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 338f36cd9934..9268551a69bc 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -118,6 +118,8 @@ extern void _exception_pkey(struct pt_regs *, unsigned long, int);
 extern void die(const char *, struct pt_regs *, long);
 extern bool die_will_crash(void);
 extern void panic_flush_kmsg_start(void);
+extern void panic_flush_kmsg_dump(void);
+extern void panic_flush_kmsg_console(void);
 extern void panic_flush_kmsg_end(void);
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 7f8c890360fe..2d546a9e8bb1 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -699,6 +699,7 @@ static int ppc_panic_event(struct notifier_block *this,
 	 * want interrupts to be hard disabled.
 	 */
 	hard_irq_disable();
+	panic_flush_kmsg_dump();
 
 	/*
 	 * If firmware-assisted dump has been registered then trigger
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 82a3438300fd..bb6bc19992b3 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -169,15 +169,25 @@ extern void panic_flush_kmsg_start(void)
 	bust_spinlocks(1);
 }
 
-extern void panic_flush_kmsg_end(void)
+extern void panic_flush_kmsg_dump(void)
 {
 	printk_safe_flush_on_panic();
 	kmsg_dump(KMSG_DUMP_PANIC);
+}
+
+extern void panic_flush_kmsg_console(void)
+{
 	bust_spinlocks(0);
 	debug_locks_off();
 	console_flush_on_panic(CONSOLE_FLUSH_PENDING);
 }
 
+extern void panic_flush_kmsg_end(void)
+{
+	panic_flush_kmsg_dump();
+	panic_flush_kmsg_console();
+}
+
 static unsigned long oops_begin(struct pt_regs *regs)
 {
 	int cpu;
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index b29368931c56..f96ba34284a1 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -101,7 +101,7 @@ static void ps3_panic(char *str)
 	printk("   System does not reboot automatically.\n");
 	printk("   Please press POWER button.\n");
 	printk("\n");
-	panic_flush_kmsg_end();
+	panic_flush_kmsg_console();
 
 	while(1)
 		lv1_pause(1);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0c8421dd01ab..66ecb88c4b8e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -788,7 +788,7 @@ static void __init pSeries_setup_arch(void)
 
 static void pseries_panic(char *str)
 {
-	panic_flush_kmsg_end();
+	panic_flush_kmsg_console();
 	rtas_os_term(str);
 }
 
-- 
2.25.4


^ permalink raw reply related

* [powerpc:merge] BUILD SUCCESS ec7b8eb9bc7a519047485c95f7292b48f5b73fe6
From: kernel test robot @ 2020-06-06  3:48 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  merge
branch HEAD: ec7b8eb9bc7a519047485c95f7292b48f5b73fe6  module: break nested ARCH_HAS_STRICT_MODULE_RWX and STRICT_MODULE_RWX #ifdefs

elapsed time: 747m

configs tested: 134
configs skipped: 4

The following configs have been built successfully.
More configs may be tested in the coming days.

arm                                 defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                               allnoconfig
arm64                            allyesconfig
arm64                               defconfig
arm64                            allmodconfig
arm64                             allnoconfig
powerpc                          allyesconfig
m68k                       m5475evb_defconfig
nios2                         10m50_defconfig
mips                      malta_kvm_defconfig
m68k                        mvme147_defconfig
arm                          imote2_defconfig
arm                         lpc18xx_defconfig
sh                 kfr2r09-romimage_defconfig
arm                           spitz_defconfig
c6x                              alldefconfig
arm                         orion5x_defconfig
powerpc                     mpc5200_defconfig
arc                        vdk_hs38_defconfig
sh                           se7724_defconfig
powerpc                     mpc512x_defconfig
sh                  sh7785lcr_32bit_defconfig
mips                           ip28_defconfig
arm                         s5pv210_defconfig
mips                           ip32_defconfig
mips                         tb0287_defconfig
ia64                        generic_defconfig
arc                             nps_defconfig
s390                             alldefconfig
h8300                               defconfig
i386                              allnoconfig
powerpc                      mgcoge_defconfig
mips                      bmips_stb_defconfig
arm                              alldefconfig
powerpc                      pasemi_defconfig
arc                    vdk_hs38_smp_defconfig
s390                              allnoconfig
arm                         lpc32xx_defconfig
riscv                             allnoconfig
mips                          ath79_defconfig
mips                     loongson1c_defconfig
arm                        magician_defconfig
arm                          pxa910_defconfig
arc                     nsimosci_hs_defconfig
alpha                            alldefconfig
arm                        mvebu_v5_defconfig
mips                        qi_lb60_defconfig
i386                             allyesconfig
i386                                defconfig
i386                              debian-10.3
ia64                             allmodconfig
ia64                                defconfig
ia64                              allnoconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                              allnoconfig
m68k                           sun3_defconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
nios2                            allyesconfig
openrisc                            defconfig
c6x                              allyesconfig
c6x                               allnoconfig
openrisc                         allyesconfig
nds32                               defconfig
nds32                             allnoconfig
csky                             allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
xtensa                              defconfig
h8300                            allmodconfig
h8300                            allyesconfig
arc                                 defconfig
arc                              allyesconfig
sh                               allmodconfig
sh                                allnoconfig
microblaze                        allnoconfig
mips                             allyesconfig
mips                              allnoconfig
mips                             allmodconfig
parisc                            allnoconfig
parisc                              defconfig
parisc                           allyesconfig
parisc                           allmodconfig
powerpc                          rhel-kconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
powerpc                             defconfig
i386                 randconfig-a001-20200605
i386                 randconfig-a006-20200605
i386                 randconfig-a002-20200605
i386                 randconfig-a005-20200605
i386                 randconfig-a004-20200605
i386                 randconfig-a003-20200605
x86_64               randconfig-a002-20200605
x86_64               randconfig-a001-20200605
x86_64               randconfig-a003-20200605
x86_64               randconfig-a004-20200605
x86_64               randconfig-a005-20200605
i386                 randconfig-a014-20200605
i386                 randconfig-a015-20200605
i386                 randconfig-a011-20200605
i386                 randconfig-a016-20200605
i386                 randconfig-a012-20200605
i386                 randconfig-a013-20200605
x86_64               randconfig-a006-20200605
riscv                            allyesconfig
riscv                               defconfig
riscv                            allmodconfig
s390                             allyesconfig
s390                             allmodconfig
s390                                defconfig
sparc                            allyesconfig
sparc                               defconfig
sparc64                             defconfig
sparc64                           allnoconfig
sparc64                          allyesconfig
sparc64                          allmodconfig
um                                allnoconfig
um                                  defconfig
um                               allmodconfig
um                               allyesconfig
x86_64                                   rhel
x86_64                               rhel-7.6
x86_64                    rhel-7.6-kselftests
x86_64                         rhel-7.2-clear
x86_64                                    lkp
x86_64                              fedora-25
x86_64                                  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [musl] Re: ppc64le and 32-bit LE userland compatibility
From: Daniel Kolesa @ 2020-06-06  2:13 UTC (permalink / raw)
  To: Segher Boessenkool, musl
  Cc: Rich Felker, libc-alpha, eery, Will Springer,
	Palmer Dabbelt via binutils, via libc-dev, Michal Suchánek,
	linuxppc-dev, Joseph Myers
In-Reply-To: <20200606001210.GV31009@gate.crashing.org>

On Sat, Jun 6, 2020, at 02:12, Segher Boessenkool wrote:
> On Fri, Jun 05, 2020 at 11:59:32PM +0200, Daniel Kolesa wrote:
> > On Fri, Jun 5, 2020, at 19:27, Segher Boessenkool wrote:
> > > > Third party precompiled stuff doesn't really need to concern us, since none really exists.
> > > 
> > > ... Yet.  And if you claim you support ELFv2, not mentioning the ways
> > > your implementation deviates from it, users will be unhappy.
> > 
> > Will they?
> 
> Yes; not only *your* users, but also users of the "actual" ELFv2 (for
> BE), if anything starts using that: the ABI is defined, and at least at
> some point it worked, it is not unreasonable to think someone might
> want to start using it).

I guess you mean like, if third party software starts shipping builds using ELFv2 and people suddenly find these things don't work because $reasons. The ABI itself wouldn't cause issues here as at binary interface level we're still compatible, but problems may actually arise in case glibc has a modern HW targeting ELFv2 port with IEEE quad float at some point. (not to mention that would be 100% incompatible at binary level because symvers, so i want to resolve this until then either way)

> 
> Just don't pretend X and Y are the same if they are not, people do not
> like to be misled.  Just be clear upfront what the differences are, and
> everyone will be happy.  Confusing people and wasting their time won't
> make you very popular though ;-)
> 
> > The system explicitly mentions that the minimum target in the binary packages is 970. Users can't be expecting features that the hardware we support doesn't support :)
> 
> But you are not the only user of ELFv2.

Sure, the point was more like "ELFv2 + 970 minimum", both of which are explicitly mentioned, cannot really mean anything else than what it already does.

> 
> > Also, we're not the only ones that do this - there's musl of course, but there's also the BSDs. FreeBSD 13 uses ELFv2, and supports all the old hardware including processors without VMX, let alone VSX. OpenBSD is likely to take the same path. But I'm not opposed to making this explicit, if that's what it takes to make other people happy with it.
> 
> Yeah, please do :-)
> 
> > > > It's also still an upgrade over ELFv1 regardless (I mean, the same things apply there).
> > > 
> > > Yeah, in mostly minor ways, but it all adds up for sure.
> > 
> > It's made my life simpler on numerous occasions,
> 
> Great to hear that!
> 
> > and allowed us to bring in software that'd otherwise take significant patching (no software is complete until it has its own assembly implementation of coroutines or something like that :P),
> 
> .. but does it have a mail client?

Probably some ad-hoc, informally specified, bug-ridden implementations of half of Common Lisp as well...

> 
> > > That depends on what you call the average case.  Code that is control
> > > and memory-bound will not benefit much from *anything* :-)
> > 
> > Average case is, quite literally, an average case - i.e. the average of all the software packages shipped in a distro :)
> 
> Ah, mostly boring stuff :-)
> 
> > > Yeah, but it helps quite a bit if your system (shared) libraries get all
> > > improvements they can as well.
> > 
> > Well, glibc will still benefit automatically to a degree even if not built for a modern baseline, since it has runtime checks in place already; as for other things... well, at least for Void, I already mentioned before we're as much of a source distro as a binary one - people can easily rebuild things that bottleneck them, with modern CFLAGS, and still have things be interoperable.
> 
> Yeah, good point there.
> 
> > > I'm not trying to dissuade you from not requiring VSX and 2.07 -- this
> > > sounds like your best option, given the constraints.  I'm just saying
> > > the cost is not trivial (even ignoring the ABI divergence).
> > 
> > Of course the cost is there - it's just not something I can do anything about. I generally recommend that people who can run LE should run LE. We're a bi-endian distribution, so there is a complete, fully functional, ISA-2.07-baseline ppc64le variant (in fact, it's our best-supported port, with greatest repo coverage and testing), as well as the 970-targeting ppc64 variant.
> 
> Ah, so your BE target is mostly for legacy hardware?  That took a while
> to sink in, sorry!  :-)

That's what the average user on BE is expected to be. I'm not opinionated enough to force BE on an average modern hardware user, there's just too many drawbacks nowadays, some of them inherent, like limited graphics hardware support (the choice of HW is limited, modern GPUs don't work at all, older GPUs are limited to OpenGL 3.2 even if they support newer API, and even if newer GPUs *were* working, they'd be crippled performance-wise as modern GPUs are always LE these days - as a gamedev person I need my graphics stuff working, and I expect my RX 5700 XT to be more than a paperweight), nevertheless, if someone is opinionated enough to use BE despite everything when they don't have to, they're welcome and so are their patches.

> 
> > I know about the biarch case as well, and there is also multilib, as an even more elaborate form of that. That's not directly related to what I originally said, though
> 
> Biarch is why -m32 and -m64 can work at all (for building user binaries).
> My point is that this does *not* work for most finer ABI (or OS) -related
> points -- you really do need a toolchain built specifically for that
> config.

Indeed. But target triples don't give you the necessary distinctions. This is why I find that the "we're an everything-compiler" approach of Clang that's supposedly meant to simplify things to the user actually complicates them; with gcc, you can configure your toolchain for the target env when building it, with clang you are not given this option, all you can do is patch its source code, as the ideas of targeting everything and of configuring the compiler details for a target are inherently in conflict. And when cross-building the real complexity for the user lies in setting up the sysroot anyway, at least for complex things with actual dependencies.

> 
> 
> Segher
>

Daniel

^ permalink raw reply

* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.8-1 tag
From: Michael Ellerman @ 2020-06-06  0:50 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: ego, emmanuel.nicolet, chenzhou10, jniethe5, linuxram, kernelfans,
	Linux Kernel Mailing List, st5pub, Oliver O'Halloran, huhai,
	Markus Elfring, rzinsly, leobras.c, mikey, Herbert Xu,
	Aneesh Kumar K.V, haren, michal.simek, mahesh, Takashi Iwai,
	kjain, leonardo, Naveen N. Rao, Ravi Bangoria, ajd, Arnd Bergmann,
	Stephen Rothwell, alistair, Nick Piggin, wangxiongfeng2, Qian Cai,
	clg, Nathan Chancellor, hbathini, Christophe Leroy, geoff,
	Dmitry Torokhov, Gustavo A. R. Silva, wsa, sbobroff, fbarrat,
	Christophe JAILLET, Andrew Morton, linuxppc-dev
In-Reply-To: <CAHk-=wg33hLZHjqcMhvWzmgNrE6Gv+xpEcFXUx7iUHS2t5QtdQ@mail.gmail.com>

Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Fri, Jun 5, 2020 at 12:01 PM Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
>>
>> ..and then when I actually compared whether I otherwise got the same
>> result as you, I realized that this all depends on the module tree.
>>
>> I'll go merge that first, and then re-do this all. Oh well.
>
> Ok, redone and pushed out. Can you check that the end result makes sense?

Yep, looks good to me. Has passed all my basic build & boot tests.

cheers

^ permalink raw reply

* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.8-1 tag
From: Michael Ellerman @ 2020-06-06  0:45 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: ego, emmanuel.nicolet, chenzhou10, jniethe5, linuxram, kernelfans,
	Linux Kernel Mailing List, st5pub, Oliver O'Halloran, huhai,
	Markus Elfring, rzinsly, leobras.c, mikey, Herbert Xu,
	Aneesh Kumar K.V, haren, michal.simek, mahesh, Takashi Iwai,
	kjain, leonardo, Naveen N. Rao, Ravi Bangoria, ajd, Arnd Bergmann,
	Stephen Rothwell, alistair, Nick Piggin, wangxiongfeng2, Qian Cai,
	clg, Nathan Chancellor, hbathini, Christophe Leroy, geoff,
	Dmitry Torokhov, Gustavo A. R. Silva, wsa, sbobroff, fbarrat,
	Christophe JAILLET, Andrew Morton, linuxppc-dev
In-Reply-To: <CAHk-=wh5vSwYqF=YiKOOGBHE=hCwnes_ndbP4QOyjPK_Xocz7w@mail.gmail.com>

Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Fri, Jun 5, 2020 at 9:38 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>>
>> I've pushed the result of my resolution of the conflicts to the powerpc/merge
>> branch, if you want to look at that, though I've also tried to describe it in
>> full below.
>
> I ended up doing the machine_check_exception() differently, because I
> felt the code itself was done wrong and I wanted to add a note about
> that.
>
> Having the same function have completely different semantics depending
> on a platform issue is just fundamentally wrong, and makes not just
> for fragile code, but also means that you can't do single image
> kernels.

Yeah I agree it's not that nicely structured.

In this case CONFIG_PPC_BOOK3S_64 is one of our top-level compile time
switches anyway, so the single image thing at least isn't a concern.

> It should be two different functions, possibly just
>
>    non_nmi_fn() { ... }
>
>    nmi_fn() { nmi_enter(); non_nmi_fn(); nmi_exit(); }
>
> and now you don't have odd rules for the same function that depends on
> how the platform happens to call it.

Agreed.

> I didn't do the above. I did something that looked like the old code,
> but had a comment. Oh well.

Thanks, we can restructure it later.

> But thanks for describing the merge, I'd have missed the place where
> there was a new use of pgd_oiffset().

Yeah I missed it to begin with :}

> ..and then when I actually compared whether I otherwise got the same
> result as you, I realized that this all depends on the module tree.
>
> I'll go merge that first, and then re-do this all. Oh well.

Darn it. I figured you'd have merged that by the time you saw my pull,
will mention anything similar in future.

cheers

^ permalink raw reply

* Re: [musl] Re: ppc64le and 32-bit LE userland compatibility
From: Segher Boessenkool @ 2020-06-06  0:12 UTC (permalink / raw)
  To: Daniel Kolesa
  Cc: Rich Felker, libc-alpha, eery, musl, Will Springer,
	Palmer Dabbelt via binutils, via libc-dev, Michal Suchánek,
	linuxppc-dev, Joseph Myers
In-Reply-To: <6b987f87-1eee-4772-9ecc-f4d9ce9233b6@www.fastmail.com>

On Fri, Jun 05, 2020 at 11:59:32PM +0200, Daniel Kolesa wrote:
> On Fri, Jun 5, 2020, at 19:27, Segher Boessenkool wrote:
> > > Third party precompiled stuff doesn't really need to concern us, since none really exists.
> > 
> > ... Yet.  And if you claim you support ELFv2, not mentioning the ways
> > your implementation deviates from it, users will be unhappy.
> 
> Will they?

Yes; not only *your* users, but also users of the "actual" ELFv2 (for
BE), if anything starts using that: the ABI is defined, and at least at
some point it worked, it is not unreasonable to think someone might
want to start using it).

Just don't pretend X and Y are the same if they are not, people do not
like to be misled.  Just be clear upfront what the differences are, and
everyone will be happy.  Confusing people and wasting their time won't
make you very popular though ;-)

> The system explicitly mentions that the minimum target in the binary packages is 970. Users can't be expecting features that the hardware we support doesn't support :)

But you are not the only user of ELFv2.

> Also, we're not the only ones that do this - there's musl of course, but there's also the BSDs. FreeBSD 13 uses ELFv2, and supports all the old hardware including processors without VMX, let alone VSX. OpenBSD is likely to take the same path. But I'm not opposed to making this explicit, if that's what it takes to make other people happy with it.

Yeah, please do :-)

> > > It's also still an upgrade over ELFv1 regardless (I mean, the same things apply there).
> > 
> > Yeah, in mostly minor ways, but it all adds up for sure.
> 
> It's made my life simpler on numerous occasions,

Great to hear that!

> and allowed us to bring in software that'd otherwise take significant patching (no software is complete until it has its own assembly implementation of coroutines or something like that :P),

.. but does it have a mail client?

> > That depends on what you call the average case.  Code that is control
> > and memory-bound will not benefit much from *anything* :-)
> 
> Average case is, quite literally, an average case - i.e. the average of all the software packages shipped in a distro :)

Ah, mostly boring stuff :-)

> > Yeah, but it helps quite a bit if your system (shared) libraries get all
> > improvements they can as well.
> 
> Well, glibc will still benefit automatically to a degree even if not built for a modern baseline, since it has runtime checks in place already; as for other things... well, at least for Void, I already mentioned before we're as much of a source distro as a binary one - people can easily rebuild things that bottleneck them, with modern CFLAGS, and still have things be interoperable.

Yeah, good point there.

> > I'm not trying to dissuade you from not requiring VSX and 2.07 -- this
> > sounds like your best option, given the constraints.  I'm just saying
> > the cost is not trivial (even ignoring the ABI divergence).
> 
> Of course the cost is there - it's just not something I can do anything about. I generally recommend that people who can run LE should run LE. We're a bi-endian distribution, so there is a complete, fully functional, ISA-2.07-baseline ppc64le variant (in fact, it's our best-supported port, with greatest repo coverage and testing), as well as the 970-targeting ppc64 variant.

Ah, so your BE target is mostly for legacy hardware?  That took a while
to sink in, sorry!  :-)

> I know about the biarch case as well, and there is also multilib, as an even more elaborate form of that. That's not directly related to what I originally said, though

Biarch is why -m32 and -m64 can work at all (for building user binaries).
My point is that this does *not* work for most finer ABI (or OS) -related
points -- you really do need a toolchain built specifically for that
config.


Segher

^ permalink raw reply

* Re: ppc64le and 32-bit LE userland compatibility
From: Will Springer @ 2020-06-05 23:54 UTC (permalink / raw)
  To: linuxppc-dev, Christophe Leroy
  Cc: libc-alpha, eery, daniel, musl, binutils, libc-dev
In-Reply-To: <2498690.q0ZmV6gNhb@sheen>

On Saturday, May 30, 2020 3:17:24 PM PDT Will Springer wrote:
> On Saturday, May 30, 2020 8:37:43 AM PDT Christophe Leroy wrote:
> > There is a series at
> > https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=173231
> > to switch powerpc to the Generic C VDSO.
> > 
> > Can you try and see whether it fixes your issue ?
> > 
> > Christophe
> 
> Sure thing, I spotted that after making the initial post. Will report
> back with results.
> 
> Will [she/her]

Sorry for the wait, I just sat down to work on this again yesterday.

Tested this series on top of stable/linux-5.7.y (5.7.0 at the time of 
writing), plus the one-line signal handler patch. Had to rewind to the 
state of powerpc/merge at the time of the mail before the patch would 
apply, then cherry-picked to 5.6 until I realized the patchset used some 
functionality that didn't land until 5.7, so I moved it there.

Good news is that `date` now works correctly with the vdso call in 32-bit 
LE. Bad news is it seems to have broken things on the 64-bit side—in my 
testing, Void kicks off runit but hangs after starting eudev, and in a 
Debian Stretch system, systemd doesn't get to the point of printing 
anything whatsoever. (I had to `init=/bin/sh` to confirm the date worked 
in ppcle, although in ppc64le running `date` also hung the system when it 
made the vdso call...) Not sure how to approach debugging that, so I'd 
appreciate any pointers.

Will [she/her]




^ permalink raw reply

* Re: [musl] Re: ppc64le and 32-bit LE userland compatibility
From: Segher Boessenkool @ 2020-06-05 23:45 UTC (permalink / raw)
  To: Rich Felker
  Cc: libc-alpha, eery, Daniel Kolesa, musl, Will Springer,
	Palmer Dabbelt via binutils, via libc-dev, Michal Suchánek,
	linuxppc-dev, Joseph Myers
In-Reply-To: <20200605175045.GW1079@brightrain.aerifal.cx>

Hi!

On Fri, Jun 05, 2020 at 01:50:46PM -0400, Rich Felker wrote:
> On Fri, Jun 05, 2020 at 12:27:02PM -0500, Segher Boessenkool wrote:
> > > I'm also not really all that convinced that vectors make a huge difference in non-specialized code (autovectorization still has a way to go)
> > 
> > They do make a huge difference, depending on the application of course.
> > But VSX is not just vectors even: it also gives you twice as many
> > floating point scalars (64 now), and in newer versions of the ISA it can
> > be beneficially used for integer scalars even.
> 
> Vectorization is useful for a lot of things, and I'm sure there are
> specialized workloads that benefit from 64 scalars, but I've never
> encountered a place where having more than 16 registers made a
> practical difference.

20 years ago 32 FP registers was already often a limitation, making FFT
and similar kernels almost twice slower than they could otherwise be.
Things are only *worse* with short vectors, not better.  In general with
floating point data you need more registers (because you have more state
to look at concurrently) than with integer data.

*Of course* having 64 floating point registers does not matter if your
whole program only ever uses three floating point values, total, let
alone concurrently.

> The fact that there are specialized areas where this stuff matters
> does not imply there aren't huge domains where it's completely
> irrelevant.

There are very few domains where ISA 2.07 does not have significant
advantages over ISA 2.01.  That is Power8 vs. Power4.

> > No, that is exactly the point of requiring ISA 2.07.  Anything can use
> > ISA 2.07 (incl. VSX) without checking first, and without having a
> > fallback to some other implementation.  Going from ISA 2.01 to 2.07 is
> > more than a decade of improvements, it is not trivial at all.
> 
> This only affects code that's non-portable and PPC-specific, which a

No, it does not.  It is not only about vector registers, either.

> I think a lot of the unnecessary fighting on this topic is arising
> from differences of opinion over what an ABI entails. I would call
> what you're talking about a "platform" and more of a platform-specific
> *API* than an ABI -- it's about guarantees of interfaces available to
> the programmer, not implementation details of linkage.

No, this is very much about the ABI.  The B stands for Binary.  Which
is what this is about.


Segher

^ permalink raw reply

* Re: [musl] Re: ppc64le and 32-bit LE userland compatibility
From: Daniel Kolesa @ 2020-06-05 21:59 UTC (permalink / raw)
  To: Segher Boessenkool, musl
  Cc: Rich Felker, libc-alpha, eery, Will Springer,
	Palmer Dabbelt via binutils, via libc-dev, Michal Suchánek,
	linuxppc-dev, Joseph Myers
In-Reply-To: <20200605172702.GP31009@gate.crashing.org>

On Fri, Jun 5, 2020, at 19:27, Segher Boessenkool wrote:
> On Fri, Jun 05, 2020 at 04:18:18AM +0200, Daniel Kolesa wrote:
> > On Fri, Jun 5, 2020, at 01:35, Segher Boessenkool wrote:
> > > > The thing is, I've yet to see in which way the ELFv2 ABI *actually* requires VSX - I don't think compiling for 970 introduces any actual differences. There will be omissions, yes - but then the more accurate thing would be to say that a subset of ELFv2 is used, rather than it being a different ABI per se.
> > > 
> > > Two big things are that binaries that someone else made are supposed to
> > > work for you as well -- including binaries using VSX registers, or any
> > > instructions that require ISA 2.07 (or some older ISA after 970).  This
> > > includes DSOs (shared libraries).  So for a distribution this means that
> > > they will not use VSX *anywhere*, or only in very specialised things.
> > > That is a many-years setback, for people/situations where it could be
> > > used.
> > 
> > Third party precompiled stuff doesn't really need to concern us, since none really exists.
> 
> ... Yet.  And if you claim you support ELFv2, not mentioning the ways
> your implementation deviates from it, users will be unhappy.

Will they? The system explicitly mentions that the minimum target in the binary packages is 970. Users can't be expecting features that the hardware we support doesn't support :) Also, we're not the only ones that do this - there's musl of course, but there's also the BSDs. FreeBSD 13 uses ELFv2, and supports all the old hardware including processors without VMX, let alone VSX. OpenBSD is likely to take the same path. But I'm not opposed to making this explicit, if that's what it takes to make other people happy with it.

> 
> > It's also still an upgrade over ELFv1 regardless (I mean, the same things apply there).
> 
> Yeah, in mostly minor ways, but it all adds up for sure.

It's made my life simpler on numerous occasions, and allowed us to bring in software that'd otherwise take significant patching (no software is complete until it has its own assembly implementation of coroutines or something like that :P), or would be outright impossible without a lot of work (e.g. the lld linker).

> 
> > I'm also not really all that convinced that vectors make a huge difference in non-specialized code (autovectorization still has a way to go)
> 
> They do make a huge difference, depending on the application of course.

I've yet to see non-specialized things without explicit VSX impls where the difference is more than 5%, usually it's less.

> But VSX is not just vectors even: it also gives you twice as many
> floating point scalars (64 now), and in newer versions of the ISA it can
> be beneficially used for integer scalars even.
> 
> > and code written to use vector instructions should probably check auxval and take those paths at runtime.
> 
> No, that is exactly the point of requiring ISA 2.07.  Anything can use
> ISA 2.07 (incl. VSX) without checking first, and without having a
> fallback to some other implementation.  Going from ISA 2.01 to 2.07 is
> more than a decade of improvements, it is not trivial at all.

Portable code will need runtime checks anyway if they target big endian systems, especially existing ones, since modern vector instructions can be used even in legacy ABI, where you get no such guarantee. Sure, it's nice to not have to worry about it, but once everything is considered, going with a more modern ABI is still a big net gain for us, even if the guarantees are not all there :)

> 
> 
> > As for other instructions, fair enough, but from my rough testing, it doesn't make such a massive difference for average case
> 
> That depends on what you call the average case.  Code that is control
> and memory-bound will not benefit much from *anything* :-)

Average case is, quite literally, an average case - i.e. the average of all the software packages shipped in a distro :)

> 
> > (and where it does, one can always rebuild their thing with CFLAGS=-mcpu=power9)
> 
> Yeah, but it helps quite a bit if your system (shared) libraries get all
> improvements they can as well.

Well, glibc will still benefit automatically to a degree even if not built for a modern baseline, since it has runtime checks in place already; as for other things... well, at least for Void, I already mentioned before we're as much of a source distro as a binary one - people can easily rebuild things that bottleneck them, with modern CFLAGS, and still have things be interoperable.

> 
> 
> I'm not trying to dissuade you from not requiring VSX and 2.07 -- this
> sounds like your best option, given the constraints.  I'm just saying
> the cost is not trivial (even ignoring the ABI divergence).

Of course the cost is there - it's just not something I can do anything about. I generally recommend that people who can run LE should run LE. We're a bi-endian distribution, so there is a complete, fully functional, ISA-2.07-baseline ppc64le variant (in fact, it's our best-supported port, with greatest repo coverage and testing), as well as the 970-targeting ppc64 variant.

The 970-targeting ppc64 variant is still capable of running on modern hardware, as there are people who wish to run big endian even on their modern machines, and I don't want to take that choice away from them (and since kernel 4.20, you can even have one generic kernel capable of booting POWER4-POWER9, previously separate kernels were needed for <= POWER6 and >= POWER7). Those people can either deal with their computers running a bit slower, or rebuild the packages that specifically bottleneck them with modern CFLAGS.

> 
> 
> > > The target name allows to make such distinctions: this could for example
> > > be  powerpc64-*-linux-void  (maybe I put the distinction in the wrong
> > > part of the name here?  The glibc people will know better, and "void" is
> > > probably not a great name anyway).
> > 
> > Hm, I'm not a huge fan of putting ABI specifics in the triplet, it feels wrong - there is no precedent for it with POWER (ARM did it with EABI though),
> 
> Maybe look at what the various BSDs use?  We do have things like this.

Briefly, the FreeBSD 13 powerpc64 target triple had an -elfv2 suffix, this was done in clang, but I noticed it was reverted in the end and replaced with checks not based on triples. I believe there is no ABI in BSD powerpc triples right now.

> 
> > the last part should remain 'gnu' as it's still glibc; besides, gcc is compiled for exactly one target triplet, and traditionally with ppc compilers it's always been possible to target everything with just one compiler (endian, 32bit, 64bit, abi...).
> 
> This isn't completely true.
> 
> Yes, the compiler allows you to change word size, endianness, ABI, some
> more things.  That does not mean you can actually build working binaries
> for all resulting combinations.  As a trivial example, it will still
> pick up the same libraries from the same library paths usually, and
> those will spectacularly fail to work.

I know, I meant mostly from purely compiler perspective. It's good enough for bare-metal (in that way it matters for stuff like GRUB etc. where you still need to build a 32-bit big-endian ELF, etc)

> 
> We are biarch for some targets, which means that both powerpc-linux
> targets and powerpc64-linux targets can actually handle both of those,
> with just -m32 or -m64 needed to switch which configuration is used.
> But you cannot magically transparently switch to many other
> configurations: for those, you just build a separate toolchain for that
> specfic (variant) configuration, in the general case.

I know about the biarch case as well, and there is also multilib, as an even more elaborate form of that. That's not directly related to what I originally said, though

> 
> > The best way would probably be adding a new -mabi, e.g. -mabi=elfv2-novsx (just an example), which would behave exactly like -mabi=elfv2, except it'd emit some extra detection macro
> 
> Yeah, that sounds like a good idea.  Patches welcome :-)
> 
> (A separate target name is still needed, but this will make development
> simpler for sure).

I don't think a separate target is strictly necessary - it can be done, sure, but with an ABI switch it's more just informative than anything else.

> 
> 
> Segher
>

Daniel

^ permalink raw reply

* Re: [PATCH v10 5/6] ndctl/papr_scm, uapi: Add support for PAPR nvdimm specific methods
From: Dan Williams @ 2020-06-05 20:58 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Santosh Sivaraj, linux-nvdimm, Linux Kernel Mailing List,
	Steven Rostedt, Oliver O'Halloran, Aneesh Kumar K . V,
	Vaibhav Jain, linuxppc-dev
In-Reply-To: <20200605194952.GS1505637@iweiny-DESK2.sc.intel.com>

On Fri, Jun 5, 2020 at 12:50 PM Ira Weiny <ira.weiny@intel.com> wrote:
>
> On Fri, Jun 05, 2020 at 05:11:35AM +0530, Vaibhav Jain wrote:
> > Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm
> > module and add the command family NVDIMM_FAMILY_PAPR to the white list
> > of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the
> > nvdimm command mask and implement necessary scaffolding in the module
> > to handle ND_CMD_CALL ioctl and PDSM requests that we receive.
> >
> > The layout of the PDSM request as we expect from libnvdimm/libndctl is
> > described in newly introduced uapi header 'papr_pdsm.h' which
> > defines a new 'struct nd_pdsm_cmd_pkg' header. This header is used
> > to communicate the PDSM request via member
> > 'nd_cmd_pkg.nd_command' and size of payload that need to be
> > sent/received for servicing the PDSM.
> >
> > A new function is_cmd_valid() is implemented that reads the args to
> > papr_scm_ndctl() and performs sanity tests on them. A new function
> > papr_scm_service_pdsm() is introduced and is called from
> > papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL
> > command from libnvdimm.
> >
> > Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > Cc: Ira Weiny <ira.weiny@intel.com>
> > Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> > ---
> > Changelog:
> >
> > v9..v10:
> > * Simplified 'struct nd_pdsm_cmd_pkg' by removing the
> >   'payload_version' field.
> > * Removed the corrosponding documentation on versioning and backward
> >   compatibility from 'papr_pdsm.h'
> > * Reduced the size of reserved fields to 4-bytes making 'struct
> >   nd_pdsm_cmd_pkg' 64 + 8 bytes long.
> > * Updated is_cmd_valid() to enforce validation checks on pdsm
> >   commands. [ Dan Williams ]
> > * Added check for reserved fields being set to '0' in is_cmd_valid()
> >   [ Ira ]
> > * Moved changes for checking cmd_rc == NULL and logging improvements
> >   to a separate prelim patch [ Ira ].
> > * Moved  pdsm package validation checks from papr_scm_service_pdsm()
> >   to is_cmd_valid().
> > * Marked papr_scm_service_pdsm() return type as 'void' since errors
> >   are reported in nd_pdsm_cmd_pkg.cmd_status field.
> >
> > Resend:
> > * Added ack from Aneesh.
> >
> > v8..v9:
> > * Reduced the usage of term SCM replacing it with appropriate
> >   replacement [ Dan Williams, Aneesh ]
> > * Renamed 'papr_scm_pdsm.h' to 'papr_pdsm.h'
> > * s/PAPR_SCM_PDSM_*/PAPR_PDSM_*/g
> > * s/NVDIMM_FAMILY_PAPR_SCM/NVDIMM_FAMILY_PAPR/g
> > * Minor updates to 'papr_psdm.h' to replace usage of term 'SCM'.
> > * Minor update to patch description.
> >
> > v7..v8:
> > * Removed the 'payload_offset' field from 'struct
> >   nd_pdsm_cmd_pkg'. Instead command payload is always assumed to start
> >   at 'nd_pdsm_cmd_pkg.payload'. [ Aneesh ]
> > * To enable introducing new fields to 'struct nd_pdsm_cmd_pkg',
> >   'reserved' field of 10-bytes is introduced. [ Aneesh ]
> > * Fixed a typo in "Backward Compatibility" section of papr_scm_pdsm.h
> >   [ Ira ]
> >
> > Resend:
> > * None
> >
> > v6..v7 :
> > * Removed the re-definitions of __packed macro from papr_scm_pdsm.h
> >   [Mpe].
> > * Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h [Mpe].
> > * Removed macros that were unused in papr_scm.c from papr_scm_pdsm.h
> >   [Mpe].
> > * Made functions defined in papr_scm_pdsm.h as static inline. [Mpe]
> >
> > v5..v6 :
> > * Changed the usage of the term DSM to PDSM to distinguish it from the
> >   ACPI term [ Dan Williams ]
> > * Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various struct
> >   to reflect the new terminology.
> > * Updated the patch description and title to reflect the new terminology.
> > * Squashed patch to introduce new command family in 'ndctl.h' with
> >   this patch [ Dan Williams ]
> > * Updated the papr_scm_pdsm method starting index from 0x10000 to 0x0
> >   [ Dan Williams ]
> > * Removed redundant license text from the papr_scm_psdm.h file.
> >   [ Dan Williams ]
> > * s/envelop/envelope/ at various places [ Dan Williams ]
> > * Added '__packed' attribute to command package header to gaurd
> >   against different compiler adding paddings between the fields.
> >   [ Dan Williams]
> > * Converted various pr_debug to dev_debug [ Dan Williams ]
> >
> > v4..v5 :
> > * None
> >
> > v3..v4 :
> > * None
> >
> > v2..v3 :
> > * Updated the patch prefix to 'ndctl/uapi' [Aneesh]
> >
> > v1..v2 :
> > * None
> > ---
> >  arch/powerpc/include/uapi/asm/papr_pdsm.h |  98 +++++++++++++++++++
> >  arch/powerpc/platforms/pseries/papr_scm.c | 113 +++++++++++++++++++++-
> >  include/uapi/linux/ndctl.h                |   1 +
> >  3 files changed, 207 insertions(+), 5 deletions(-)
> >  create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h
> >
> > diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> > new file mode 100644
> > index 000000000000..8b1a4f8fa316
> > --- /dev/null
> > +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> > @@ -0,0 +1,98 @@
> > +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> > +/*
> > + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
> > + *
> > + * (C) Copyright IBM 2020
> > + *
> > + * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
> > + */
> > +
> > +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
> > +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
> > +
> > +#include <linux/types.h>
> > +
> > +/*
> > + * PDSM Envelope:
> > + *
> > + * The ioctl ND_CMD_CALL transfers data between user-space and kernel via
> > + * envelope which consists of a header and user-defined payload sections.
> > + * The header is described by 'struct nd_pdsm_cmd_pkg' which expects a
> > + * payload following it and accessible via 'nd_pdsm_cmd_pkg.payload' field.
> > + * There is reserved field that can used to introduce new fields to the
> > + * structure in future. It also tries to ensure that 'nd_pdsm_cmd_pkg.payload'
> > + * lies at a 8-byte boundary.
> > + *
> > + *  +-------------+---------------------+---------------------------+
> > + *  |   64-Bytes  |       8-Bytes       |       Max 184-Bytes       |
> > + *  +-------------+---------------------+---------------------------+
> > + *  |               nd_pdsm_cmd_pkg     |                           |
> > + *  |-------------+                     |                           |
> > + *  |  nd_cmd_pkg |                     |                           |
> > + *  +-------------+---------------------+---------------------------+
> > + *  | nd_family   |                     |                           |
> > + *  | nd_size_out | cmd_status          |                           |
> > + *  | nd_size_in  | reserved            |     payload               |
> > + *  | nd_command  |                     |                           |
> > + *  | nd_fw_size  |                     |                           |
> > + *  +-------------+---------------------+---------------------------+
> > + *
> > + * PDSM Header:
> > + *
> > + * The header is defined as 'struct nd_pdsm_cmd_pkg' which embeds a
> > + * 'struct nd_cmd_pkg' instance. The PDSM command is assigned to member
> > + * 'nd_cmd_pkg.nd_command'. Apart from size information of the envelope which is
> > + * contained in 'struct nd_cmd_pkg', the header also has members following
> > + * members:
> > + *
> > + * 'cmd_status'              : (Out) Errors if any encountered while servicing PDSM.
> > + * 'reserved'                : Not used, reserved for future and should be set to 0.
> > + *
> > + * PDSM Payload:
> > + *
> > + * The layout of the PDSM Payload is defined by various structs shared between
> > + * papr_scm and libndctl so that contents of payload can be interpreted. During
> > + * servicing of a PDSM the papr_scm module will read input args from the payload
> > + * field by casting its contents to an appropriate struct pointer based on the
> > + * PDSM command. Similarly the output of servicing the PDSM command will be
> > + * copied to the payload field using the same struct.
> > + *
> > + * 'libnvdimm' enforces a hard limit of 256 bytes on the envelope size, which
> > + * leaves around 184 bytes for the envelope payload (ignoring any padding that
> > + * the compiler may silently introduce).
> > + *
> > + */
> > +
> > +/* PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm */
> > +struct nd_pdsm_cmd_pkg {
> > +     struct nd_cmd_pkg hdr;  /* Package header containing sub-cmd */
> > +     __s32 cmd_status;       /* Out: Sub-cmd status returned back */
> > +     __u16 reserved[2];      /* Ignored and to be used in future */
> > +     __u8 payload[];         /* In/Out: Sub-cmd data buffer */
> > +} __packed;
> > +
> > +/*
> > + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
> > + * via 'nd_pdsm_cmd_pkg.hdr.nd_command' member of the ioctl struct
> > + */
> > +enum papr_pdsm {
> > +     PAPR_PDSM_MIN = 0x0,
> > +     PAPR_PDSM_MAX,
> > +};
> > +
> > +/* Convert a libnvdimm nd_cmd_pkg to pdsm specific pkg */
> > +static inline struct nd_pdsm_cmd_pkg *nd_to_pdsm_cmd_pkg(struct nd_cmd_pkg *cmd)
> > +{
> > +     return (struct nd_pdsm_cmd_pkg *)cmd;
> > +}
> > +
> > +/* Return the payload pointer for a given pcmd */
> > +static inline void *pdsm_cmd_to_payload(struct nd_pdsm_cmd_pkg *pcmd)
> > +{
> > +     if (pcmd->hdr.nd_size_in == 0 && pcmd->hdr.nd_size_out == 0)
> > +             return NULL;
> > +     else
> > +             return (void *)(pcmd->payload);
> > +}
>
> I just realized these were in the uapi header.  You don't want to do this as
> this code gets built into any user space program that uses it and there may be
> license issues if the user space code is not a compatible license.

Yes, include/uapi/linux/ndctl.h is specifically LGPL for this reason.

^ permalink raw reply

* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.8-1 tag
From: Linus Torvalds @ 2020-06-05 20:39 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: ego, emmanuel.nicolet, chenzhou10, jniethe5, linuxram, kernelfans,
	Linux Kernel Mailing List, st5pub, Oliver O'Halloran, huhai,
	Markus Elfring, rzinsly, leobras.c, mikey, Herbert Xu,
	Aneesh Kumar K.V, haren, michal.simek, mahesh, Takashi Iwai,
	kjain, leonardo, Naveen N. Rao, Ravi Bangoria, ajd, Arnd Bergmann,
	Stephen Rothwell, alistair, Nick Piggin, wangxiongfeng2, Qian Cai,
	clg, Nathan Chancellor, hbathini, Christophe Leroy, geoff,
	Dmitry Torokhov, Gustavo A. R. Silva, wsa, sbobroff, fbarrat,
	Christophe JAILLET, Andrew Morton, linuxppc-dev
In-Reply-To: <CAHk-=wh5vSwYqF=YiKOOGBHE=hCwnes_ndbP4QOyjPK_Xocz7w@mail.gmail.com>

On Fri, Jun 5, 2020 at 12:01 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> ..and then when I actually compared whether I otherwise got the same
> result as you, I realized that this all depends on the module tree.
>
> I'll go merge that first, and then re-do this all. Oh well.

Ok, redone and pushed out. Can you check that the end result makes sense?

              Linus

^ permalink raw reply

* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.8-1 tag
From: pr-tracker-bot @ 2020-06-05 20:40 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: ego, emmanuel.nicolet, chenzhou10, jniethe5, linuxram, kernelfans,
	linux-kernel, st5pub, oohall, huhai, elfring, rzinsly, leobras.c,
	mikey, herbert, aneesh.kumar, haren, michal.simek, mahesh, tiwai,
	kjain, leonardo, naveen.n.rao, ravi.bangoria, ajd, arnd, sfr,
	alistair, npiggin, wangxiongfeng2, cai, clg, natechancellor,
	hbathini, christophe.leroy, geoff, linuxppc-dev, dmitry.torokhov,
	gustavoars, wsa, sbobroff, fbarrat, christophe.jaillet, akpm,
	Linus Torvalds
In-Reply-To: <87eeqth3hi.fsf@mpe.ellerman.id.au>

The pull request you sent on Sat, 06 Jun 2020 02:38:49 +1000:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.8-1

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/7ae77150d94d3b535c7b85e6b3647113095e79bf

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply

* Re: [PATCH v10 5/6] ndctl/papr_scm, uapi: Add support for PAPR nvdimm specific methods
From: Ira Weiny @ 2020-06-05 19:49 UTC (permalink / raw)
  To: Vaibhav Jain
  Cc: Santosh Sivaraj, linux-nvdimm, linux-kernel, Steven Rostedt,
	Oliver O'Halloran, Aneesh Kumar K . V, Dan Williams,
	linuxppc-dev
In-Reply-To: <20200604234136.253703-6-vaibhav@linux.ibm.com>

On Fri, Jun 05, 2020 at 05:11:35AM +0530, Vaibhav Jain wrote:
> Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm
> module and add the command family NVDIMM_FAMILY_PAPR to the white list
> of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the
> nvdimm command mask and implement necessary scaffolding in the module
> to handle ND_CMD_CALL ioctl and PDSM requests that we receive.
> 
> The layout of the PDSM request as we expect from libnvdimm/libndctl is
> described in newly introduced uapi header 'papr_pdsm.h' which
> defines a new 'struct nd_pdsm_cmd_pkg' header. This header is used
> to communicate the PDSM request via member
> 'nd_cmd_pkg.nd_command' and size of payload that need to be
> sent/received for servicing the PDSM.
> 
> A new function is_cmd_valid() is implemented that reads the args to
> papr_scm_ndctl() and performs sanity tests on them. A new function
> papr_scm_service_pdsm() is introduced and is called from
> papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL
> command from libnvdimm.
> 
> Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> ---
> Changelog:
> 
> v9..v10:
> * Simplified 'struct nd_pdsm_cmd_pkg' by removing the
>   'payload_version' field.
> * Removed the corrosponding documentation on versioning and backward
>   compatibility from 'papr_pdsm.h'
> * Reduced the size of reserved fields to 4-bytes making 'struct
>   nd_pdsm_cmd_pkg' 64 + 8 bytes long.
> * Updated is_cmd_valid() to enforce validation checks on pdsm
>   commands. [ Dan Williams ]
> * Added check for reserved fields being set to '0' in is_cmd_valid()
>   [ Ira ]
> * Moved changes for checking cmd_rc == NULL and logging improvements
>   to a separate prelim patch [ Ira ].
> * Moved  pdsm package validation checks from papr_scm_service_pdsm()
>   to is_cmd_valid().
> * Marked papr_scm_service_pdsm() return type as 'void' since errors
>   are reported in nd_pdsm_cmd_pkg.cmd_status field.
> 
> Resend:
> * Added ack from Aneesh.
> 
> v8..v9:
> * Reduced the usage of term SCM replacing it with appropriate
>   replacement [ Dan Williams, Aneesh ]
> * Renamed 'papr_scm_pdsm.h' to 'papr_pdsm.h'
> * s/PAPR_SCM_PDSM_*/PAPR_PDSM_*/g
> * s/NVDIMM_FAMILY_PAPR_SCM/NVDIMM_FAMILY_PAPR/g
> * Minor updates to 'papr_psdm.h' to replace usage of term 'SCM'.
> * Minor update to patch description.
> 
> v7..v8:
> * Removed the 'payload_offset' field from 'struct
>   nd_pdsm_cmd_pkg'. Instead command payload is always assumed to start
>   at 'nd_pdsm_cmd_pkg.payload'. [ Aneesh ]
> * To enable introducing new fields to 'struct nd_pdsm_cmd_pkg',
>   'reserved' field of 10-bytes is introduced. [ Aneesh ]
> * Fixed a typo in "Backward Compatibility" section of papr_scm_pdsm.h
>   [ Ira ]
> 
> Resend:
> * None
> 
> v6..v7 :
> * Removed the re-definitions of __packed macro from papr_scm_pdsm.h
>   [Mpe].
> * Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h [Mpe].
> * Removed macros that were unused in papr_scm.c from papr_scm_pdsm.h
>   [Mpe].
> * Made functions defined in papr_scm_pdsm.h as static inline. [Mpe]
> 
> v5..v6 :
> * Changed the usage of the term DSM to PDSM to distinguish it from the
>   ACPI term [ Dan Williams ]
> * Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various struct
>   to reflect the new terminology.
> * Updated the patch description and title to reflect the new terminology.
> * Squashed patch to introduce new command family in 'ndctl.h' with
>   this patch [ Dan Williams ]
> * Updated the papr_scm_pdsm method starting index from 0x10000 to 0x0
>   [ Dan Williams ]
> * Removed redundant license text from the papr_scm_psdm.h file.
>   [ Dan Williams ]
> * s/envelop/envelope/ at various places [ Dan Williams ]
> * Added '__packed' attribute to command package header to gaurd
>   against different compiler adding paddings between the fields.
>   [ Dan Williams]
> * Converted various pr_debug to dev_debug [ Dan Williams ]
> 
> v4..v5 :
> * None
> 
> v3..v4 :
> * None
> 
> v2..v3 :
> * Updated the patch prefix to 'ndctl/uapi' [Aneesh]
> 
> v1..v2 :
> * None
> ---
>  arch/powerpc/include/uapi/asm/papr_pdsm.h |  98 +++++++++++++++++++
>  arch/powerpc/platforms/pseries/papr_scm.c | 113 +++++++++++++++++++++-
>  include/uapi/linux/ndctl.h                |   1 +
>  3 files changed, 207 insertions(+), 5 deletions(-)
>  create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h
> 
> diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> new file mode 100644
> index 000000000000..8b1a4f8fa316
> --- /dev/null
> +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> @@ -0,0 +1,98 @@
> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> +/*
> + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
> + *
> + * (C) Copyright IBM 2020
> + *
> + * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
> + */
> +
> +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
> +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
> +
> +#include <linux/types.h>
> +
> +/*
> + * PDSM Envelope:
> + *
> + * The ioctl ND_CMD_CALL transfers data between user-space and kernel via
> + * envelope which consists of a header and user-defined payload sections.
> + * The header is described by 'struct nd_pdsm_cmd_pkg' which expects a
> + * payload following it and accessible via 'nd_pdsm_cmd_pkg.payload' field.
> + * There is reserved field that can used to introduce new fields to the
> + * structure in future. It also tries to ensure that 'nd_pdsm_cmd_pkg.payload'
> + * lies at a 8-byte boundary.
> + *
> + *  +-------------+---------------------+---------------------------+
> + *  |   64-Bytes  |       8-Bytes       |       Max 184-Bytes       |
> + *  +-------------+---------------------+---------------------------+
> + *  |               nd_pdsm_cmd_pkg     |                           |
> + *  |-------------+                     |                           |
> + *  |  nd_cmd_pkg |                     |                           |
> + *  +-------------+---------------------+---------------------------+
> + *  | nd_family   |                     |                           |
> + *  | nd_size_out | cmd_status          |                           |
> + *  | nd_size_in  | reserved            |     payload               |
> + *  | nd_command  |                     |                           |
> + *  | nd_fw_size  |                     |                           |
> + *  +-------------+---------------------+---------------------------+
> + *
> + * PDSM Header:
> + *
> + * The header is defined as 'struct nd_pdsm_cmd_pkg' which embeds a
> + * 'struct nd_cmd_pkg' instance. The PDSM command is assigned to member
> + * 'nd_cmd_pkg.nd_command'. Apart from size information of the envelope which is
> + * contained in 'struct nd_cmd_pkg', the header also has members following
> + * members:
> + *
> + * 'cmd_status'		: (Out) Errors if any encountered while servicing PDSM.
> + * 'reserved'		: Not used, reserved for future and should be set to 0.
> + *
> + * PDSM Payload:
> + *
> + * The layout of the PDSM Payload is defined by various structs shared between
> + * papr_scm and libndctl so that contents of payload can be interpreted. During
> + * servicing of a PDSM the papr_scm module will read input args from the payload
> + * field by casting its contents to an appropriate struct pointer based on the
> + * PDSM command. Similarly the output of servicing the PDSM command will be
> + * copied to the payload field using the same struct.
> + *
> + * 'libnvdimm' enforces a hard limit of 256 bytes on the envelope size, which
> + * leaves around 184 bytes for the envelope payload (ignoring any padding that
> + * the compiler may silently introduce).
> + *
> + */
> +
> +/* PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm */
> +struct nd_pdsm_cmd_pkg {
> +	struct nd_cmd_pkg hdr;	/* Package header containing sub-cmd */
> +	__s32 cmd_status;	/* Out: Sub-cmd status returned back */
> +	__u16 reserved[2];	/* Ignored and to be used in future */
> +	__u8 payload[];		/* In/Out: Sub-cmd data buffer */
> +} __packed;
> +
> +/*
> + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
> + * via 'nd_pdsm_cmd_pkg.hdr.nd_command' member of the ioctl struct
> + */
> +enum papr_pdsm {
> +	PAPR_PDSM_MIN = 0x0,
> +	PAPR_PDSM_MAX,
> +};
> +
> +/* Convert a libnvdimm nd_cmd_pkg to pdsm specific pkg */
> +static inline struct nd_pdsm_cmd_pkg *nd_to_pdsm_cmd_pkg(struct nd_cmd_pkg *cmd)
> +{
> +	return (struct nd_pdsm_cmd_pkg *)cmd;
> +}
> +
> +/* Return the payload pointer for a given pcmd */
> +static inline void *pdsm_cmd_to_payload(struct nd_pdsm_cmd_pkg *pcmd)
> +{
> +	if (pcmd->hdr.nd_size_in == 0 && pcmd->hdr.nd_size_out == 0)
> +		return NULL;
> +	else
> +		return (void *)(pcmd->payload);
> +}

I just realized these were in the uapi header.  You don't want to do this as
this code gets built into any user space program that uses it and there may be
license issues if the user space code is not a compatible license.

Ira

> +
> +#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index 6512fe6a2874..05eb56ecab5e 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -15,13 +15,15 @@
>  #include <linux/seq_buf.h>
>  
>  #include <asm/plpar_wrappers.h>
> +#include <asm/papr_pdsm.h>
>  
>  #define BIND_ANY_ADDR (~0ul)
>  
>  #define PAPR_SCM_DIMM_CMD_MASK \
>  	((1ul << ND_CMD_GET_CONFIG_SIZE) | \
>  	 (1ul << ND_CMD_GET_CONFIG_DATA) | \
> -	 (1ul << ND_CMD_SET_CONFIG_DATA))
> +	 (1ul << ND_CMD_SET_CONFIG_DATA) | \
> +	 (1ul << ND_CMD_CALL))
>  
>  /* DIMM health bitmap bitmap indicators */
>  /* SCM device is unable to persist memory contents */
> @@ -349,22 +351,117 @@ static int papr_scm_meta_set(struct papr_scm_priv *p,
>  	return 0;
>  }
>  
> +/*
> + * Do a sanity checks on the inputs args to dimm-control function and return
> + * '0' if valid. This also does validation on ND_CMD_CALL sub-command packages.
> + */
> +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
> +			unsigned int buf_len)
> +{
> +	unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK;
> +	struct nd_pdsm_cmd_pkg *pkg;
> +	struct nd_cmd_pkg *nd_cmd;
> +	struct papr_scm_priv *p;
> +	enum papr_pdsm pdsm;
> +
> +	/* Only dimm-specific calls are supported atm */
> +	if (!nvdimm)
> +		return -EINVAL;
> +
> +	/* get the provider data from struct nvdimm */
> +	p = nvdimm_provider_data(nvdimm);
> +
> +	if (!test_bit(cmd, &cmd_mask)) {
> +		dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd);
> +		return -EINVAL;
> +	}
> +
> +	/* For CMD_CALL verify pdsm request */
> +	if (cmd == ND_CMD_CALL) {
> +		/* Verify the envelope package */
> +		if (!buf || buf_len < sizeof(struct nd_pdsm_cmd_pkg)) {
> +			dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n",
> +				buf_len);
> +			return -EINVAL;
> +		}
> +
> +		/* Verify that the nd_cmd_pkg.nd_family is correct */
> +		nd_cmd = (struct nd_cmd_pkg *)buf;
> +		if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) {
> +			dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n",
> +				nd_cmd->nd_family);
> +			return -EINVAL;
> +		}
> +
> +		/* Get the pdsm request package and the command */
> +		pkg = nd_to_pdsm_cmd_pkg(nd_cmd);
> +		pdsm = pkg->hdr.nd_command;
> +
> +		/* Verify if the psdm command is valid */
> +		if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) {
> +			dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", pdsm);
> +			return -EINVAL;
> +		}
> +
> +		/* We except a payload with all PDSM commands */
> +		if (!pdsm_cmd_to_payload(pkg)) {
> +			dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Empty payload\n", pdsm);
> +			return -EINVAL;
> +		}
> +
> +		/* Ensure reserved fields of the pdsm header are set to 0 */
> +		if (pkg->reserved[0] || pkg->reserved[1]) {
> +			dev_dbg(&p->pdev->dev,
> +				"PDSM[0x%x]: Invalid reserved field usage\n", pdsm);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	/* Let the command be further processed */
> +	return 0;
> +}
> +
> +/*
> + * For a given pdsm request call an appropriate service function.
> + * Note: Use 'nd_pdsm_cmd_pkg.cmd_status to report psdm servicing errors. Hence
> + * function marked as void.
> + */
> +static void papr_scm_service_pdsm(struct papr_scm_priv *p,
> +				  struct nd_pdsm_cmd_pkg *pkg)
> +{
> +	const enum papr_pdsm pdsm = pkg->hdr.nd_command;
> +
> +	dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm);
> +
> +	/* Call pdsm service function */
> +	switch (pdsm) {
> +	default:
> +		dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n",
> +			pdsm);
> +		pkg->cmd_status = -ENOENT;
> +		break;
> +	}
> +}
> +
>  static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>  			  struct nvdimm *nvdimm, unsigned int cmd, void *buf,
>  			  unsigned int buf_len, int *cmd_rc)
>  {
>  	struct nd_cmd_get_config_size *get_size_hdr;
> +	struct nd_pdsm_cmd_pkg *call_pkg = NULL;
>  	struct papr_scm_priv *p;
>  	int rc;
>  
> -	/* Only dimm-specific calls are supported atm */
> -	if (!nvdimm)
> -		return -EINVAL;
> -
>  	/* Use a local variable in case cmd_rc pointer is NULL */
>  	if (!cmd_rc)
>  		cmd_rc = &rc;
>  
> +	*cmd_rc = is_cmd_valid(nvdimm, cmd, buf, buf_len);
> +	if (*cmd_rc) {
> +		pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, *cmd_rc);
> +		return *cmd_rc;
> +	}
> +
>  	p = nvdimm_provider_data(nvdimm);
>  
>  	switch (cmd) {
> @@ -385,6 +482,12 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>  		*cmd_rc = papr_scm_meta_set(p, buf);
>  		break;
>  
> +	case ND_CMD_CALL:
> +		call_pkg = nd_to_pdsm_cmd_pkg(buf);
> +		papr_scm_service_pdsm(p, call_pkg);
> +		*cmd_rc = 0;
> +		break;
> +
>  	default:
>  		dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
>  		*cmd_rc = -EINVAL;
> diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
> index de5d90212409..0e09dc5cec19 100644
> --- a/include/uapi/linux/ndctl.h
> +++ b/include/uapi/linux/ndctl.h
> @@ -244,6 +244,7 @@ struct nd_cmd_pkg {
>  #define NVDIMM_FAMILY_HPE2 2
>  #define NVDIMM_FAMILY_MSFT 3
>  #define NVDIMM_FAMILY_HYPERV 4
> +#define NVDIMM_FAMILY_PAPR 5
>  
>  #define ND_IOCTL_CALL			_IOWR(ND_IOCTL, ND_CMD_CALL,\
>  					struct nd_cmd_pkg)
> -- 
> 2.26.2
> 

^ permalink raw reply

* Re: [PATCH v10 4/6] powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl()
From: Dan Williams @ 2020-06-05 19:49 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Santosh Sivaraj, linux-nvdimm, Linux Kernel Mailing List,
	Steven Rostedt, Oliver O'Halloran, Aneesh Kumar K . V,
	Vaibhav Jain, linuxppc-dev
In-Reply-To: <20200605171313.GO1505637@iweiny-DESK2.sc.intel.com>

On Fri, Jun 5, 2020 at 10:13 AM Ira Weiny <ira.weiny@intel.com> wrote:
>
> On Fri, Jun 05, 2020 at 05:11:34AM +0530, Vaibhav Jain wrote:
> > Since papr_scm_ndctl() can be called from outside papr_scm, its
> > exposed to the possibility of receiving NULL as value of 'cmd_rc'
> > argument. This patch updates papr_scm_ndctl() to protect against such
> > possibility by assigning it pointer to a local variable in case cmd_rc
> > == NULL.
> >
> > Finally the patch also updates the 'default' clause of the switch-case
> > block removing a 'return' statement thereby ensuring that value of
> > 'cmd_rc' is always logged when papr_scm_ndctl() returns.
> >
> > Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > Cc: Ira Weiny <ira.weiny@intel.com>
> > Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> > ---
> > Changelog:
> >
> > v9..v10
> > * New patch in the series
>
> Thanks for making this a separate patch it is easier to see what is going on
> here.
>
> > ---
> >  arch/powerpc/platforms/pseries/papr_scm.c | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> > index 0c091622b15e..6512fe6a2874 100644
> > --- a/arch/powerpc/platforms/pseries/papr_scm.c
> > +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> > @@ -355,11 +355,16 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
> >  {
> >       struct nd_cmd_get_config_size *get_size_hdr;
> >       struct papr_scm_priv *p;
> > +     int rc;
> >
> >       /* Only dimm-specific calls are supported atm */
> >       if (!nvdimm)
> >               return -EINVAL;
> >
> > +     /* Use a local variable in case cmd_rc pointer is NULL */
> > +     if (!cmd_rc)
> > +             cmd_rc = &rc;
> > +
>
> This protects you from the NULL.  However...
>
> >       p = nvdimm_provider_data(nvdimm);
> >
> >       switch (cmd) {
> > @@ -381,12 +386,13 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
> >               break;
> >
> >       default:
> > -             return -EINVAL;
> > +             dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
> > +             *cmd_rc = -EINVAL;
>
> ... I think you are conflating rc and cmd_rc...
>
> >       }
> >
> >       dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
> >
> > -     return 0;
> > +     return *cmd_rc;
>
> ... this changes the behavior of the current commands.  Now if the underlying
> papr_scm_meta_[get|set]() fails you return that failure as rc rather than 0.
>
> Is that ok?

The expectation is that rc is "did the command get sent to the device,
or did it fail for 'transport' reasons". The role of cmd_rc is to
translate the specific status response of the command into a common
error code. The expectations are:

rc < 0: Error code, Linux terminated the ioctl before talking to hardware

rc == 0: Linux successfully submitted the command to hardware, cmd_rc
is valid for command specific response

rc > 0: Linux successfully submitted the command, but detected that
only a subset of the data was accepted for "write"-style commands, or
that only subset of data was returned for "read"-style commands. I.e.
short-write / short-read semantics. cmd_rc is valid in this case and
its up to userspace to determine if a short transfer is an error or
not.

> Also 'logging cmd_rc' in the invalid cmd case does not seem quite right unless
> you really want rc to be cmd_rc.
>
> The architecture is designed to separate errors which occur in the kernel vs
> errors in the firmware/dimm.  Are they always the same?  The current code
> differentiates them.

Yeah, they're distinct, transport vs end-point / command-specific
status returns.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox