LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] selftests/powerpc: Allow choice of CI memory location in alignment_handler test
From: Jordan Niethe @ 2020-05-20  2:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: alistair, Jordan Niethe

The alignment handler selftest needs cache-inhibited memory and
currently /dev/fb0 is relied on to provided this. This prevents running
the test on systems without /dev/fb0 (e.g., mambo). Read the commandline
arguments for an optional path to be used instead, as well as an
optional offset to be for mmaping this path.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 .../powerpc/alignment/alignment_handler.c     | 63 ++++++++++++-------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0453c50c949c..eb6aba323f8b 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -9,7 +9,17 @@
  * This selftest exercises the powerpc alignment fault handler.
  *
  * We create two sets of source and destination buffers, one in regular memory,
- * the other cache-inhibited (we use /dev/fb0 for this).
+ * the other cache-inhibited (by default we use /dev/fb0 for this, but an
+ * alterative path for cache-inhibited memory may be provided).
+ *
+ * One way to get cache-inhibited memory is to use the "mem" kernel parameter
+ * to limit the kernel to less memory than actually exists.  Addresses above
+ * the limit may still be accessed but will be treated as cache-inhibited. For
+ * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
+ * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
+ * To access this region /dev/mem is used. The kernel should be configured
+ * without CONFIG_STRICT_DEVMEM. In this case use:
+ *         ./alignment_handler /dev/mem 0xc0000000
  *
  * We initialise the source buffers, then use whichever set of load/store
  * instructions is under test to copy bytes from the source buffers to the
@@ -53,6 +63,8 @@ int bufsize;
 int debug;
 int testing;
 volatile int gotsig;
+char *cipath = "/dev/fb0";
+long cioffset;
 
 void sighandler(int sig, siginfo_t *info, void *ctx)
 {
@@ -195,17 +207,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
 
 	printf("\tDoing %s:\t", test_name);
 
-	fd = open("/dev/fb0", O_RDWR);
+	fd = open(cipath, O_RDWR);
 	if (fd < 0) {
 		printf("\n");
-		perror("Can't open /dev/fb0 now?");
+		perror("Can't open ci file now?");
 		return 1;
 	}
 
-	ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
-		   fd, 0x0);
-	ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
-		   fd, bufsize);
+	ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+		   fd, cioffset);
+	ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+		   fd, cioffset + bufsize);
+
 	if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
 		printf("\n");
 		perror("mmap failed");
@@ -270,11 +283,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
 	return rc;
 }
 
-static bool can_open_fb0(void)
+static bool can_open_cifile(void)
 {
 	int fd;
 
-	fd = open("/dev/fb0", O_RDWR);
+	fd = open(cipath, O_RDWR);
 	if (fd < 0)
 		return false;
 
@@ -286,7 +299,7 @@ int test_alignment_handler_vsx_206(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("VSX: 2.06B\n");
@@ -304,7 +317,7 @@ int test_alignment_handler_vsx_207(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
 
 	printf("VSX: 2.07B\n");
@@ -320,7 +333,7 @@ int test_alignment_handler_vsx_300(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
 	printf("VSX: 3.00B\n");
@@ -352,7 +365,7 @@ int test_alignment_handler_integer(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	printf("Integer\n");
 	LOAD_DFORM_TEST(lbz);
@@ -408,7 +421,7 @@ int test_alignment_handler_integer_206(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("Integer: 2.06\n");
@@ -423,7 +436,7 @@ int test_alignment_handler_vmx(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
 
 	printf("VMX\n");
@@ -451,7 +464,7 @@ int test_alignment_handler_fp(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	printf("Floating point\n");
 	LOAD_FLOAT_DFORM_TEST(lfd);
@@ -479,7 +492,7 @@ int test_alignment_handler_fp_205(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
 
 	printf("Floating point: 2.05\n");
@@ -497,7 +510,7 @@ int test_alignment_handler_fp_206(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("Floating point: 2.06\n");
@@ -509,11 +522,12 @@ int test_alignment_handler_fp_206(void)
 
 void usage(char *prog)
 {
-	printf("Usage: %s [options]\n", prog);
+	printf("Usage: %s [options] [path [offset]]\n", prog);
 	printf("  -d	Enable debug error output\n");
 	printf("\n");
-	printf("This test requires a POWER8 or POWER9 CPU and a usable ");
-	printf("framebuffer at /dev/fb0.\n");
+	printf("This test requires a POWER8 or POWER9 CPU and either a ");
+	printf("usable framebuffer at /dev/fb0 or the path to usable ");
+	printf("cache-inhibited memory and optional offset to be provided\n");
 }
 
 int main(int argc, char *argv[])
@@ -533,6 +547,13 @@ int main(int argc, char *argv[])
 			exit(1);
 		}
 	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc > 0)
+		cipath = argv[0];
+	if (argc > 1)
+		cioffset = strtol(argv[1], 0, 0x10);
 
 	bufsize = getpagesize();
 
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH V2] powerpc/perf: Add support for outputting extended regs in perf intr_regs
From: Arnaldo Carvalho de Melo @ 2020-05-20  2:01 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: ravi.bangoria, maddy, linux-kernel, anju, jolsa, linuxppc-dev
In-Reply-To: <1589868937-1537-1-git-send-email-atrajeev@linux.vnet.ibm.com>

Em Tue, May 19, 2020 at 02:15:37AM -0400, Athira Rajeev escreveu:
> From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
> 
> Add support for perf extended register capability in powerpc.
> The capability flag PERF_PMU_CAP_EXTENDED_REGS, is used to indicate the
> PMU which support extended registers. The generic code define the mask
> of extended registers as 0 for non supported architectures.
> 
> Patch adds extended regs support for power9 platform by
> exposing MMCR0, MMCR1 and MMCR2 registers.
> 
> REG_RESERVED mask needs update to include extended regs.
> `PERF_REG_EXTENDED_MASK`, contains mask value of the supported registers,
> is defined at runtime in the kernel based on platform since the supported
> registers may differ from one processor version to another and hence the
> MASK value.
> 
> Perf tools side uses extended mask to display the platform
> supported register names (with -I? option) to the user and also
> send this mask to the kernel to capture the extended registers
> in each sample. Hence decide the mask value based on the processor
> version.
> 
> with patch
> ----------
> 
> available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11
> r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26
> r27 r28 r29 r30 r31 nip msr orig_r3 ctr link xer ccr softe
> trap dar dsisr sier mmcra mmcr0 mmcr1 mmcr2
> 
> PERF_RECORD_SAMPLE(IP, 0x1): 4784/4784: 0 period: 1 addr: 0
> ... intr regs: mask 0xffffffffffff ABI 64-bit
> .... r0    0xc00000000012b77c
> .... r1    0xc000003fe5e03930
> .... r2    0xc000000001b0e000
> .... r3    0xc000003fdcddf800
> .... r4    0xc000003fc7880000
> .... r5    0x9c422724be
> .... r6    0xc000003fe5e03908
> .... r7    0xffffff63bddc8706
> .... r8    0x9e4
> .... r9    0x0
> .... r10   0x1
> .... r11   0x0
> .... r12   0xc0000000001299c0
> .... r13   0xc000003ffffc4800
> .... r14   0x0
> .... r15   0x7fffdd8b8b00
> .... r16   0x0
> .... r17   0x7fffdd8be6b8
> .... r18   0x7e7076607730
> .... r19   0x2f
> .... r20   0xc00000001fc26c68
> .... r21   0xc0002041e4227e00
> .... r22   0xc00000002018fb60
> .... r23   0x1
> .... r24   0xc000003ffec4d900
> .... r25   0x80000000
> .... r26   0x0
> .... r27   0x1
> .... r28   0x1
> .... r29   0xc000000001be1260
> .... r30   0x6008010
> .... r31   0xc000003ffebb7218
> .... nip   0xc00000000012b910
> .... msr   0x9000000000009033
> .... orig_r3 0xc00000000012b86c
> .... ctr   0xc0000000001299c0
> .... link  0xc00000000012b77c
> .... xer   0x0
> .... ccr   0x28002222
> .... softe 0x1
> .... trap  0xf00
> .... dar   0x0
> .... dsisr 0x80000000000
> .... sier  0x0
> .... mmcra 0x80000000000
> .... mmcr0 0x82008090
> .... mmcr1 0x1e000000
> .... mmcr2 0x0
>  ... thread: perf:4784
> 
> Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
> [Defined PERF_REG_EXTENDED_MASK at run time to add support for different platforms ]
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> ---
> Changes from v1 -> v2
> 
> - PERF_REG_EXTENDED_MASK` is defined at runtime in the kernel
> based on platform. This will give flexibility in using extended
> regs for all processor versions where the supported registers may differ.
> - removed PERF_REG_EXTENDED_MASK from the perf tools side. Based on the
> processor version(from PVR value), tool side will return the appropriate
> extended mask
> - Since tool changes can handle without a "PERF_REG_EXTENDED_MASK" macro,
> dropped patch to set NO_AUXTRACE.
> - Addressed review comments from Ravi Bangoria for V1
> 
> ---
> 
>  arch/powerpc/include/asm/perf_event_server.h    |  8 ++++
>  arch/powerpc/include/uapi/asm/perf_regs.h       | 14 ++++++-
>  arch/powerpc/perf/core-book3s.c                 |  1 +
>  arch/powerpc/perf/perf_regs.c                   | 34 ++++++++++++++--
>  arch/powerpc/perf/power9-pmu.c                  |  6 +++

Can you please split this patch so that the kernel bits are separate
from the tooling bits?

Thanks,

- Arnaldo

>  tools/arch/powerpc/include/uapi/asm/perf_regs.h | 14 ++++++-
>  tools/perf/arch/powerpc/include/perf_regs.h     |  5 ++-
>  tools/perf/arch/powerpc/util/perf_regs.c        | 54 +++++++++++++++++++++++++
>  8 files changed, 130 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
> index 3e9703f..1458e1a 100644
> --- a/arch/powerpc/include/asm/perf_event_server.h
> +++ b/arch/powerpc/include/asm/perf_event_server.h
> @@ -15,6 +15,9 @@
>  #define MAX_EVENT_ALTERNATIVES	8
>  #define MAX_LIMITED_HWCOUNTERS	2
>  
> +extern u64 mask_var;
> +#define PERF_REG_EXTENDED_MASK          mask_var
> +
>  struct perf_event;
>  
>  /*
> @@ -55,6 +58,11 @@ struct power_pmu {
>  	int 		*blacklist_ev;
>  	/* BHRB entries in the PMU */
>  	int		bhrb_nr;
> +	/*
> +	 * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
> +	 * the pmu supports extended perf regs capability
> +	 */
> +	int		capabilities;
>  };
>  
>  /*
> diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h
> index f599064..485b1d5 100644
> --- a/arch/powerpc/include/uapi/asm/perf_regs.h
> +++ b/arch/powerpc/include/uapi/asm/perf_regs.h
> @@ -48,6 +48,18 @@ enum perf_event_powerpc_regs {
>  	PERF_REG_POWERPC_DSISR,
>  	PERF_REG_POWERPC_SIER,
>  	PERF_REG_POWERPC_MMCRA,
> -	PERF_REG_POWERPC_MAX,
> +	/* Extended registers */
> +	PERF_REG_POWERPC_MMCR0,
> +	PERF_REG_POWERPC_MMCR1,
> +	PERF_REG_POWERPC_MMCR2,
> +	/* Max regs without the extended regs */
> +	PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
>  };
> +
> +#define PERF_REG_PMU_MASK	((1ULL << PERF_REG_POWERPC_MAX) - 1)
> +
> +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
> +#define PERF_REG_PMU_MASK_300   (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) \
> +				- PERF_REG_PMU_MASK)
> +
>  #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 3dcfecf..f56b778 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2276,6 +2276,7 @@ int register_power_pmu(struct power_pmu *pmu)
>  
>  	power_pmu.attr_groups = ppmu->attr_groups;
>  
> +	power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
>  #ifdef MSR_HV
>  	/*
>  	 * Use FCHV to ignore kernel events if MSR.HV is set.
> diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
> index a213a0a..f1dbbc5 100644
> --- a/arch/powerpc/perf/perf_regs.c
> +++ b/arch/powerpc/perf/perf_regs.c
> @@ -13,9 +13,11 @@
>  #include <asm/ptrace.h>
>  #include <asm/perf_regs.h>
>  
> +u64 mask_var;
> +
>  #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
>  
> -#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
> +#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK))
>  
>  static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
>  	PT_REGS_OFFSET(PERF_REG_POWERPC_R0,  gpr[0]),
> @@ -69,10 +71,26 @@
>  	PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
>  };
>  
> +/* Function to return the extended register values */
> +static u64 get_ext_regs_value(int idx)
> +{
> +	switch (idx) {
> +	case PERF_REG_POWERPC_MMCR0:
> +			return mfspr(SPRN_MMCR0);
> +	case PERF_REG_POWERPC_MMCR1:
> +			return mfspr(SPRN_MMCR1);
> +	case PERF_REG_POWERPC_MMCR2:
> +			return mfspr(SPRN_MMCR2);
> +	default: return 0;
> +	}
> +}
> +
>  u64 perf_reg_value(struct pt_regs *regs, int idx)
>  {
> -	if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
> -		return 0;
> +	u64 PERF_REG_EXTENDED_MAX;
> +
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_MMCR2 + 1;
>  
>  	if (idx == PERF_REG_POWERPC_SIER &&
>  	   (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
> @@ -85,6 +103,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
>  	    IS_ENABLED(CONFIG_PPC32)))
>  		return 0;
>  
> +	if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX)
> +		return get_ext_regs_value(idx);
> +
> +	/*
> +	 * If the idx is referring to value beyond the
> +	 * supported registers, return 0 with a warning
> +	 */
> +	if (WARN_ON_ONCE(idx >= PERF_REG_EXTENDED_MAX))
> +		return 0;
> +
>  	return regs_get_register(regs, pt_regs_offset[idx]);
>  }
>  
> diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
> index 08c3ef7..4525090 100644
> --- a/arch/powerpc/perf/power9-pmu.c
> +++ b/arch/powerpc/perf/power9-pmu.c
> @@ -90,6 +90,8 @@ enum {
>  #define POWER9_MMCRA_IFM3		0x00000000C0000000UL
>  #define POWER9_MMCRA_BHRB_MASK		0x00000000C0000000UL
>  
> +extern u64 mask_var;
> +
>  /* Nasty Power9 specific hack */
>  #define PVR_POWER9_CUMULUS		0x00002000
>  
> @@ -434,6 +436,7 @@ static void power9_config_bhrb(u64 pmu_bhrb_filter)
>  	.cache_events		= &power9_cache_events,
>  	.attr_groups		= power9_pmu_attr_groups,
>  	.bhrb_nr		= 32,
> +	.capabilities           = PERF_PMU_CAP_EXTENDED_REGS,
>  };
>  
>  int init_power9_pmu(void)
> @@ -457,6 +460,9 @@ int init_power9_pmu(void)
>  		}
>  	}
>  
> +	/* Set the PERF_REG_EXTENDED_MASK here */
> +	mask_var = PERF_REG_PMU_MASK_300;
> +
>  	rc = register_power_pmu(&power9_pmu);
>  	if (rc)
>  		return rc;
> diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
> index f599064..485b1d5 100644
> --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
> +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
> @@ -48,6 +48,18 @@ enum perf_event_powerpc_regs {
>  	PERF_REG_POWERPC_DSISR,
>  	PERF_REG_POWERPC_SIER,
>  	PERF_REG_POWERPC_MMCRA,
> -	PERF_REG_POWERPC_MAX,
> +	/* Extended registers */
> +	PERF_REG_POWERPC_MMCR0,
> +	PERF_REG_POWERPC_MMCR1,
> +	PERF_REG_POWERPC_MMCR2,
> +	/* Max regs without the extended regs */
> +	PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
>  };
> +
> +#define PERF_REG_PMU_MASK	((1ULL << PERF_REG_POWERPC_MAX) - 1)
> +
> +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
> +#define PERF_REG_PMU_MASK_300   (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) \
> +				- PERF_REG_PMU_MASK)
> +
>  #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
> diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
> index e18a355..46ed00d 100644
> --- a/tools/perf/arch/powerpc/include/perf_regs.h
> +++ b/tools/perf/arch/powerpc/include/perf_regs.h
> @@ -64,7 +64,10 @@
>  	[PERF_REG_POWERPC_DAR] = "dar",
>  	[PERF_REG_POWERPC_DSISR] = "dsisr",
>  	[PERF_REG_POWERPC_SIER] = "sier",
> -	[PERF_REG_POWERPC_MMCRA] = "mmcra"
> +	[PERF_REG_POWERPC_MMCRA] = "mmcra",
> +	[PERF_REG_POWERPC_MMCR0] = "mmcr0",
> +	[PERF_REG_POWERPC_MMCR1] = "mmcr1",
> +	[PERF_REG_POWERPC_MMCR2] = "mmcr2",
>  };
>  
>  static inline const char *perf_reg_name(int id)
> diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
> index 0a52429..b78f81f 100644
> --- a/tools/perf/arch/powerpc/util/perf_regs.c
> +++ b/tools/perf/arch/powerpc/util/perf_regs.c
> @@ -6,9 +6,15 @@
>  
>  #include "../../../util/perf_regs.h"
>  #include "../../../util/debug.h"
> +#include "../../../util/event.h"
> +#include "../../../util/header.h"
> +#include "../../../perf-sys.h"
>  
> +#include <api/fs/fs.h>
>  #include <linux/kernel.h>
>  
> +#define PVR_POWER9		0x004E
> +
>  const struct sample_reg sample_reg_masks[] = {
>  	SMPL_REG(r0, PERF_REG_POWERPC_R0),
>  	SMPL_REG(r1, PERF_REG_POWERPC_R1),
> @@ -55,6 +61,9 @@
>  	SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
>  	SMPL_REG(sier, PERF_REG_POWERPC_SIER),
>  	SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA),
> +	SMPL_REG(mmcr0, PERF_REG_POWERPC_MMCR0),
> +	SMPL_REG(mmcr1, PERF_REG_POWERPC_MMCR1),
> +	SMPL_REG(mmcr2, PERF_REG_POWERPC_MMCR2),
>  	SMPL_REG_END
>  };
>  
> @@ -163,3 +172,48 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
>  
>  	return SDT_ARG_VALID;
>  }
> +
> +uint64_t arch__intr_reg_mask(void)
> +{
> +	struct perf_event_attr attr = {
> +		.type                   = PERF_TYPE_HARDWARE,
> +		.config                 = PERF_COUNT_HW_CPU_CYCLES,
> +		.sample_type            = PERF_SAMPLE_REGS_INTR,
> +		.precise_ip             = 1,
> +		.disabled               = 1,
> +		.exclude_kernel         = 1,
> +	};
> +	int fd, ret;
> +	char buffer[64];
> +	u32 version;
> +	u64 extended_mask = 0;
> +
> +	/* Get the PVR value to set the extended
> +	 * mask specific to platform
> +	 */
> +	get_cpuid(buffer, sizeof(buffer));
> +	ret = sscanf(buffer, "%u,", &version);
> +
> +	if (ret != 1) {
> +		pr_debug("Failed to get the processor version, unable to output extended registers\n");
> +		return PERF_REGS_MASK;
> +	}
> +
> +	if (version == PVR_POWER9)
> +		extended_mask = PERF_REG_PMU_MASK_300;
> +
> +	attr.sample_regs_intr = extended_mask;
> +	attr.sample_period = 1;
> +	event_attr_init(&attr);
> +
> +	/*
> +	 * check if the pmu supports perf extended regs, before
> +	 * returning the register mask to sample.
> +	 */
> +	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
> +	if (fd != -1) {
> +		close(fd);
> +		return (extended_mask | PERF_REGS_MASK);
> +	}
> +	return PERF_REGS_MASK;
> +}
> -- 
> 1.8.3.1
> 

-- 

- Arnaldo

^ permalink raw reply

* Re: Fwd: [CRON] Broken: ClangBuiltLinux/continuous-integration#1432 (master - 0aceafc)
From: Nathan Chancellor @ 2020-05-20  1:01 UTC (permalink / raw)
  To: Nick Desaulniers; +Cc: linuxppc-dev, clang-built-linux
In-Reply-To: <CAKwvOdn0Spc15v3WUE_rdrb5UvaTXmOvjEJOsD7ahktQOwQk+A@mail.gmail.com>

On Tue, May 19, 2020 at 05:56:32PM -0700, 'Nick Desaulniers' via Clang Built Linux wrote:
> Looks like our CI is still red from this:
> 
> https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/builds/166854584
> 
> Filing a bug to follow up on:
> https://github.com/ClangBuiltLinux/linux/issues/1031
> 
> On Thu, May 7, 2020 at 8:29 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
> >
> > Nick Desaulniers <ndesaulniers@google.com> writes:
> > > Looks like ppc64le powernv_defconfig is suddenly failing the locking
> > > torture tests, then locks up?
> > > https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/jobs/329211572#L3111-L3167
> > > Any recent changes related here in -next?  I believe this is the first
> > > failure, so I'll report back if we see this again.
> >
> > Thanks for the report.
> >
> > There's nothing newly in next-20200507 that seems related.
> >
> > Odd that it just showed up.
> >
> > cheers
> >
> >
> > > ---------- Forwarded message ---------
> > > From: Travis CI <builds@travis-ci.com>
> > > Date: Thu, May 7, 2020 at 9:40 AM
> > > Subject: [CRON] Broken: ClangBuiltLinux/continuous-integration#1432 (master
> > > - 0aceafc)
> > > To: <ndesaulniers@google.com>, <natechancellor@gmail.com>
> > >
> > >
> > > ClangBuiltLinux
> > >
> > > /
> > >
> > > continuous-integration
> > > <https://travis-ci.com/github/ClangBuiltLinux/continuous-integration?utm_medium=notification&utm_source=email>
> > >
> > > [image: branch icon]master
> > > <https://github.com/ClangBuiltLinux/continuous-integration/tree/master>
> > > [image: build has failed]
> > > Build #1432 was broken
> > > <https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/builds/164415390?utm_medium=notification&utm_source=email>
> > > [image: arrow to build time]
> > > [image: clock icon]7 hrs, 0 mins, and 54 secs
> > >
> > > [image: Nick Desaulniers avatar]Nick Desaulniers
> > > 0aceafc CHANGESET →
> > > <https://github.com/ClangBuiltLinux/continuous-integration/compare/877d002bdcfe6bc5cb0255c3c39192e8175e2c19...0aceafcfcca7c4a095957efae0939a612d755077>
> > >
> > > Merge pull request #182 from ClangBuiltLinux/i386
> > >
> > > i386
> > >
> > > Want to know about upcoming build environment updates?
> > >
> > > Would you like to stay up-to-date with the upcoming Travis CI build
> > > environment updates? We set up a mailing list for you!
> > > SIGN UP HERE <http://eepurl.com/9OCsP>
> > >
> > > [image: book icon]
> > >
> > > Documentation <https://docs.travis-ci.com/> about Travis CI
> > > Have any questions? We're here to help. <support@travis-ci.com>
> > > Unsubscribe
> > > <https://travis-ci.com/account/preferences/unsubscribe?repository=6718752&utm_medium=notification&utm_source=email>
> > > from build emails from the ClangBuiltLinux/continuous-integration
> > > repository.
> > > To unsubscribe from *all* build emails, please update your settings
> > > <https://travis-ci.com/account/preferences/unsubscribe?utm_medium=notification&utm_source=email>.
> > >
> > > [image: black and white travis ci logo] <https://travis-ci.com>
> > >
> > > Travis CI GmbH, Rigaer Str. 8, 10427 Berlin, Germany | GF/CEO: Randy Jacops
> > > | Contact: contact@travis-ci.com | Amtsgericht Charlottenburg, Berlin, HRB
> > > 140133 B | Umsatzsteuer-ID gemäß §27 a Umsatzsteuergesetz: DE282002648
> > >
> > >
> > > --
> > > Thanks,
> > > ~Nick Desaulniers
> 
> 
> 
> -- 
> Thanks,
> ~Nick Desaulniers
> 

This is probably still a manifestation of
https://github.com/ClangBuiltLinux/continuous-integration/issues/262
because rekicking the tests usually fixes it.

We should probably just disable the torture tests like we do for x86_64
for CI because we do not have access to QEMU 5.0.0 where this should be
fixed. I believe it is slated for 4.2.1 as well but we still have to
wait for that to be updated and packaged in Ubuntu.

Relevant threads:

https://lore.kernel.org/linuxppc-dev/20200410205932.GA880@ubuntu-s3-xlarge-x86/

https://lore.kernel.org/qemu-devel/20200414111131.465560-1-npiggin@gmail.com/

Cheers,
Nathan

^ permalink raw reply

* Re: Fwd: [CRON] Broken: ClangBuiltLinux/continuous-integration#1432 (master - 0aceafc)
From: Nick Desaulniers @ 2020-05-20  0:56 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: clang-built-linux, linuxppc-dev
In-Reply-To: <87d07fcdee.fsf@mpe.ellerman.id.au>

Looks like our CI is still red from this:

https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/builds/166854584

Filing a bug to follow up on:
https://github.com/ClangBuiltLinux/linux/issues/1031

On Thu, May 7, 2020 at 8:29 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Nick Desaulniers <ndesaulniers@google.com> writes:
> > Looks like ppc64le powernv_defconfig is suddenly failing the locking
> > torture tests, then locks up?
> > https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/jobs/329211572#L3111-L3167
> > Any recent changes related here in -next?  I believe this is the first
> > failure, so I'll report back if we see this again.
>
> Thanks for the report.
>
> There's nothing newly in next-20200507 that seems related.
>
> Odd that it just showed up.
>
> cheers
>
>
> > ---------- Forwarded message ---------
> > From: Travis CI <builds@travis-ci.com>
> > Date: Thu, May 7, 2020 at 9:40 AM
> > Subject: [CRON] Broken: ClangBuiltLinux/continuous-integration#1432 (master
> > - 0aceafc)
> > To: <ndesaulniers@google.com>, <natechancellor@gmail.com>
> >
> >
> > ClangBuiltLinux
> >
> > /
> >
> > continuous-integration
> > <https://travis-ci.com/github/ClangBuiltLinux/continuous-integration?utm_medium=notification&utm_source=email>
> >
> > [image: branch icon]master
> > <https://github.com/ClangBuiltLinux/continuous-integration/tree/master>
> > [image: build has failed]
> > Build #1432 was broken
> > <https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/builds/164415390?utm_medium=notification&utm_source=email>
> > [image: arrow to build time]
> > [image: clock icon]7 hrs, 0 mins, and 54 secs
> >
> > [image: Nick Desaulniers avatar]Nick Desaulniers
> > 0aceafc CHANGESET →
> > <https://github.com/ClangBuiltLinux/continuous-integration/compare/877d002bdcfe6bc5cb0255c3c39192e8175e2c19...0aceafcfcca7c4a095957efae0939a612d755077>
> >
> > Merge pull request #182 from ClangBuiltLinux/i386
> >
> > i386
> >
> > Want to know about upcoming build environment updates?
> >
> > Would you like to stay up-to-date with the upcoming Travis CI build
> > environment updates? We set up a mailing list for you!
> > SIGN UP HERE <http://eepurl.com/9OCsP>
> >
> > [image: book icon]
> >
> > Documentation <https://docs.travis-ci.com/> about Travis CI
> > Have any questions? We're here to help. <support@travis-ci.com>
> > Unsubscribe
> > <https://travis-ci.com/account/preferences/unsubscribe?repository=6718752&utm_medium=notification&utm_source=email>
> > from build emails from the ClangBuiltLinux/continuous-integration
> > repository.
> > To unsubscribe from *all* build emails, please update your settings
> > <https://travis-ci.com/account/preferences/unsubscribe?utm_medium=notification&utm_source=email>.
> >
> > [image: black and white travis ci logo] <https://travis-ci.com>
> >
> > Travis CI GmbH, Rigaer Str. 8, 10427 Berlin, Germany | GF/CEO: Randy Jacops
> > | Contact: contact@travis-ci.com | Amtsgericht Charlottenburg, Berlin, HRB
> > 140133 B | Umsatzsteuer-ID gemäß §27 a Umsatzsteuergesetz: DE282002648
> >
> >
> > --
> > Thanks,
> > ~Nick Desaulniers



-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply

* Re: [PATCH v2 6/7] powerpc/dt_cpu_ftrs: Add MMA feature
From: Alistair Popple @ 2020-05-19 23:56 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar, mikey, Paul A. Clarke, npiggin
In-Reply-To: <20200519175153.GE24922@oc3272150783.ibm.com>

Thanks, not sure where I got that name from but it's probably wrong in a few 
places. Will wait a bit in case there are any more comments and then respin 
the series to update the name.

- Alistair

On Wednesday, 20 May 2020 3:51:53 AM AEST Paul A. Clarke wrote:
> On Tue, May 19, 2020 at 10:31:56AM +1000, Alistair Popple wrote:
> > Matrix multiple accumulate (MMA) is a new feature added to ISAv3.1 and
> 
> Conclusion is that this should be "Matrix-Multiply Assist", but then there
> are a couple more below...
> 
> > POWER10. Support on powernv can be selected via a firmware CPU device
> > tree feature which enables it via a PCR bit.
> > 
> > Signed-off-by: Alistair Popple <alistair@popple.id.au>
> > ---
> > 
> >  arch/powerpc/include/asm/reg.h    |  3 ++-
> >  arch/powerpc/kernel/dt_cpu_ftrs.c | 17 ++++++++++++++++-
> >  2 files changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/reg.h
> > b/arch/powerpc/include/asm/reg.h index 1931b1142599..c446863a40cf 100644
> > --- a/arch/powerpc/include/asm/reg.h
> > +++ b/arch/powerpc/include/asm/reg.h
> > @@ -479,7 +479,8 @@
> > 
> >  #define   PCR_VEC_DIS	(__MASK(63-0))	/* Vec. disable (bit NA since
> >  POWER8) */ #define   PCR_VSX_DIS	(__MASK(63-1))	/* VSX disable (bit NA
> >  since POWER8) */ #define   PCR_TM_DIS	(__MASK(63-2))	/* Trans. memory
> >  disable (POWER8) */> 
> > -#define   PCR_HIGH_BITS	(PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
> > +#define   PCR_MMA_DIS	(__MASK(63-3)) /* Matrix-Multiply Accelerator */
> 
> also here.
> 
> > +#define   PCR_HIGH_BITS	(PCR_MMA_DIS | PCR_VEC_DIS | PCR_VSX_DIS |
> > PCR_TM_DIS)> 
> >  /*
> >  
> >   * These bits are used in the function kvmppc_set_arch_compat() to
> >   specify and * determine both the compatibility level which we want to
> >   emulate and the> 
> > diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c
> > b/arch/powerpc/kernel/dt_cpu_ftrs.c index 93c340906aad..e7540ee5cad8
> > 100644
> > --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
> > +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
> > @@ -75,6 +75,7 @@ static struct {
> > 
> >  	u64	lpcr_clear;
> >  	u64	hfscr;
> >  	u64	fscr;
> > 
> > +	u64	pcr;
> > 
> >  } system_registers;
> >  
> >  static void (*init_pmu_registers)(void);
> > 
> > @@ -102,7 +103,7 @@ static void __restore_cpu_cpufeatures(void)
> > 
> >  	if (hv_mode) {
> >  	
> >  		mtspr(SPRN_LPID, 0);
> >  		mtspr(SPRN_HFSCR, system_registers.hfscr);
> > 
> > -		mtspr(SPRN_PCR, PCR_MASK);
> > +		mtspr(SPRN_PCR, system_registers.pcr);
> > 
> >  	}
> >  	mtspr(SPRN_FSCR, system_registers.fscr);
> > 
> > @@ -555,6 +556,18 @@ static int __init feat_enable_large_ci(struct
> > dt_cpu_feature *f)> 
> >  	return 1;
> >  
> >  }
> > 
> > +static int __init feat_enable_mma(struct dt_cpu_feature *f)
> > +{
> > +	u64 pcr;
> > +
> > +	feat_enable(f);
> > +	pcr = mfspr(SPRN_PCR);
> > +	pcr &= ~PCR_MMA_DIS;
> > +	mtspr(SPRN_PCR, pcr);
> > +
> > +	return 1;
> > +}
> > +
> > 
> >  struct dt_cpu_feature_match {
> >  
> >  	const char *name;
> >  	int (*enable)(struct dt_cpu_feature *f);
> > 
> > @@ -629,6 +642,7 @@ static struct dt_cpu_feature_match __initdata
> > 
> >  	{"vector-binary16", feat_enable, 0},
> >  	{"wait-v3", feat_enable, 0},
> >  	{"prefix-instructions", feat_enable, 0},
> > 
> > +	{"matrix-multiply-accumulate", feat_enable_mma, 0},
> 
> and presumably here as well.
> 
> >  };
> 
> PC





^ permalink raw reply

* Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice
From: Guenter Roeck @ 2020-05-19 19:42 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Peter Zijlstra, Dave Hansen, dri-devel, linux-mips,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux, Dan Williams, Helge Deller, x86,
	linux-csky, Christoph Hellwig, Ingo Molnar, linux-snps-arc,
	linux-xtensa, Borislav Petkov, Al Viro, Andy Lutomirski,
	Thomas Gleixner, linux-arm-kernel, Chris Zankel,
	Thomas Bogendoerfer, linux-parisc, linux-kernel, Christian Koenig,
	Andrew Morton, linuxppc-dev, David S. Miller
In-Reply-To: <20200519184031.GB3356843@iweiny-DESK2.sc.intel.com>

On Tue, May 19, 2020 at 11:40:32AM -0700, Ira Weiny wrote:
> On Tue, May 19, 2020 at 09:54:22AM -0700, Guenter Roeck wrote:
> > On Mon, May 18, 2020 at 11:48:43AM -0700, ira.weiny@intel.com wrote:
> > > From: Ira Weiny <ira.weiny@intel.com>
> > > 
> > > The kunmap_atomic clean up failed to remove one set of pagefault/preempt
> > > enables when vaddr is not in the fixmap.
> > > 
> > > Fixes: bee2128a09e6 ("arch/kunmap_atomic: consolidate duplicate code")
> > > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> > microblazeel works with this patch,
> 
> Awesome...  Andrew in my rush yesterday I should have put a reported by on the
> patch for Guenter as well.
> 
> Sorry about that Guenter,

No worries.

> Ira
> 
> > as do the nosmp sparc32 boot tests,
> > but sparc32 boot tests with SMP enabled still fail with lots of messages
> > such as:
> > 
> > BUG: Bad page state in process swapper/0  pfn:006a1
> > page:f0933420 refcount:0 mapcount:1 mapping:(ptrval) index:0x1
> > flags: 0x0()
> > raw: 00000000 00000100 00000122 00000000 00000001 00000000 00000000 00000000
> > page dumped because: nonzero mapcount
> > Modules linked in:
> > CPU: 0 PID: 1 Comm: swapper/0 Tainted: G    B             5.7.0-rc6-next-20200518-00002-gb178d2d56f29 #1
> > [f00e7ab8 :
> > bad_page+0xa8/0x108 ]
> > [f00e8b54 :
> > free_pcppages_bulk+0x154/0x52c ]
> > [f00ea024 :
> > free_unref_page+0x54/0x6c ]
> > [f00ed864 :
> > free_reserved_area+0x58/0xec ]
> > [f0527104 :
> > kernel_init+0x14/0x110 ]
> > [f000b77c :
> > ret_from_kernel_thread+0xc/0x38 ]
> > [00000000 :
> > 0x0 ]
> > 
> > Code path leading to that message is different but always the same
> > from free_unref_page().
> > 
> > Still testing ppc images.
> > 

ppc image tests are passing with this patch.

Guenter

^ permalink raw reply

* [RESEND PATCH v7 5/5] powerpc/papr_scm: Implement support for PAPR_SCM_PDSM_HEALTH
From: Vaibhav Jain @ 2020-05-19 19:00 UTC (permalink / raw)
  To: linuxppc-dev, linux-nvdimm, linux-kernel
  Cc: Santosh Sivaraj, Steven Rostedt, Oliver O'Halloran,
	Aneesh Kumar K . V, Vaibhav Jain, Dan Williams
In-Reply-To: <20200519190058.257981-1-vaibhav@linux.ibm.com>

This patch implements support for PDSM request 'PAPR_SCM_PDSM_HEALTH'
that returns a newly introduced 'struct nd_papr_pdsm_health' instance
containing dimm health information back to user space in response to
ND_CMD_CALL. This functionality is implemented in newly introduced
papr_scm_get_health() that queries the scm-dimm health information and
then copies this information to the package payload whose layout is
defined by 'struct nd_papr_pdsm_health'.

The patch also introduces a new member 'struct papr_scm_priv.health'
thats an instance of 'struct nd_papr_pdsm_health' to cache the health
information of a nvdimm. As a result functions drc_pmem_query_health()
and flags_show() are updated to populate and use this new struct
instead of a u64 integer that was earlier used.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:

Resend:
* None

v6..v7:
* Updated flags_show() to use seq_buf_printf(). [Mpe]
* Updated papr_scm_get_health() to use newly introduced
  __drc_pmem_query_health() bypassing the cache [Mpe].

v5..v6:
* Added attribute '__packed' to 'struct nd_papr_pdsm_health_v1' to
  gaurd against possibility of different compilers adding different
  paddings to the struct [ Dan Williams ]

* Updated 'struct nd_papr_pdsm_health_v1' to use __u8 instead of
  'bool' and also updated drc_pmem_query_health() to take this into
  account. [ Dan Williams ]

v4..v5:
* None

v3..v4:
* Call the DSM_PAPR_SCM_HEALTH service function from
  papr_scm_service_dsm() instead of papr_scm_ndctl(). [Aneesh]

v2..v3:
* Updated struct nd_papr_scm_dimm_health_stat_v1 to use '__xx' types
  as its exported to the userspace [Aneesh]
* Changed the constants DSM_PAPR_SCM_DIMM_XX indicating dimm health
  from enum to #defines [Aneesh]

v1..v2:
* New patch in the series
---
 arch/powerpc/include/uapi/asm/papr_scm_pdsm.h |  39 ++++++
 arch/powerpc/platforms/pseries/papr_scm.c     | 125 +++++++++++++++---
 2 files changed, 147 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/papr_scm_pdsm.h b/arch/powerpc/include/uapi/asm/papr_scm_pdsm.h
index 671693439c1c..db0cf550dabe 100644
--- a/arch/powerpc/include/uapi/asm/papr_scm_pdsm.h
+++ b/arch/powerpc/include/uapi/asm/papr_scm_pdsm.h
@@ -113,6 +113,7 @@ struct nd_pdsm_cmd_pkg {
  */
 enum papr_scm_pdsm {
 	PAPR_SCM_PDSM_MIN = 0x0,
+	PAPR_SCM_PDSM_HEALTH,
 	PAPR_SCM_PDSM_MAX,
 };
 
@@ -131,4 +132,42 @@ static inline void *pdsm_cmd_to_payload(struct nd_pdsm_cmd_pkg *pcmd)
 		return (void *)((__u8 *) pcmd + pcmd->payload_offset);
 }
 
+/* Various scm-dimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY       0
+#define PAPR_PDSM_DIMM_UNHEALTHY     1
+#define PAPR_PDSM_DIMM_CRITICAL      2
+#define PAPR_PDSM_DIMM_FATAL         3
+
+/*
+ * Struct exchanged between kernel & ndctl in for PAPR_SCM_PDSM_HEALTH
+ * Various flags indicate the health status of the dimm.
+ *
+ * dimm_unarmed		: Dimm not armed. So contents wont persist.
+ * dimm_bad_shutdown	: Previous shutdown did not persist contents.
+ * dimm_bad_restore	: Contents from previous shutdown werent restored.
+ * dimm_scrubbed	: Contents of the dimm have been scrubbed.
+ * dimm_locked		: Contents of the dimm cant be modified until CEC reboot
+ * dimm_encrypted	: Contents of dimm are encrypted.
+ * dimm_health		: Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
+ */
+struct nd_papr_pdsm_health_v1 {
+	__u8 dimm_unarmed;
+	__u8 dimm_bad_shutdown;
+	__u8 dimm_bad_restore;
+	__u8 dimm_scrubbed;
+	__u8 dimm_locked;
+	__u8 dimm_encrypted;
+	__u16 dimm_health;
+} __packed;
+
+/*
+ * Typedef the current struct for dimm_health so that any application
+ * or kernel recompiled after introducing a new version automatically
+ * supports the new version.
+ */
+#define nd_papr_pdsm_health nd_papr_pdsm_health_v1
+
+/* Current version number for the dimm health struct */
+#define ND_PAPR_PDSM_HEALTH_VERSION 1
+
 #endif /* _UAPI_ASM_POWERPC_PAPR_SCM_PDSM_H_ */
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index ed4b49a6f1e1..c59bf17ad054 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -88,7 +88,7 @@ struct papr_scm_priv {
 	unsigned long lasthealth_jiffies;
 
 	/* Health information for the dimm */
-	u64 health_bitmap;
+	struct nd_papr_pdsm_health health;
 };
 
 static int drc_pmem_bind(struct papr_scm_priv *p)
@@ -201,6 +201,7 @@ static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
 static int __drc_pmem_query_health(struct papr_scm_priv *p)
 {
 	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	u64 health;
 	s64 rc;
 
 	/* issue the hcall */
@@ -208,18 +209,46 @@ static int __drc_pmem_query_health(struct papr_scm_priv *p)
 	if (rc != H_SUCCESS) {
 		dev_err(&p->pdev->dev,
 			 "Failed to query health information, Err:%lld\n", rc);
-		rc = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}
 
 	p->lasthealth_jiffies = jiffies;
-	p->health_bitmap = ret[0] & ret[1];
+	health = ret[0] & ret[1];
 
 	dev_dbg(&p->pdev->dev,
 		"Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
 		ret[0], ret[1]);
-out:
-	return rc;
+
+	memset(&p->health, 0, sizeof(p->health));
+
+	/* Check for various masks in bitmap and set the buffer */
+	if (health & PAPR_SCM_DIMM_UNARMED_MASK)
+		p->health.dimm_unarmed = 1;
+
+	if (health & PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK)
+		p->health.dimm_bad_shutdown = 1;
+
+	if (health & PAPR_SCM_DIMM_BAD_RESTORE_MASK)
+		p->health.dimm_bad_restore = 1;
+
+	if (health & PAPR_SCM_DIMM_ENCRYPTED)
+		p->health.dimm_encrypted = 1;
+
+	if (health & PAPR_SCM_DIMM_SCRUBBED_AND_LOCKED) {
+		p->health.dimm_locked = 1;
+		p->health.dimm_scrubbed = 1;
+	}
+
+	if (health & PAPR_SCM_DIMM_HEALTH_UNHEALTHY)
+		p->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
+
+	if (health & PAPR_SCM_DIMM_HEALTH_CRITICAL)
+		p->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL;
+
+	if (health & PAPR_SCM_DIMM_HEALTH_FATAL)
+		p->health.dimm_health = PAPR_PDSM_DIMM_FATAL;
+
+	return 0;
 }
 
 /* Min interval in seconds for assuming stable dimm health */
@@ -403,6 +432,58 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 	return 0;
 }
 
+/* Fetch the DIMM health info and populate it in provided package. */
+static int papr_scm_get_health(struct papr_scm_priv *p,
+			       struct nd_pdsm_cmd_pkg *pkg)
+{
+	int rc;
+	size_t copysize = sizeof(p->health);
+
+	/* Ensure dimm health mutex is taken preventing concurrent access */
+	rc = mutex_lock_interruptible(&p->health_mutex);
+	if (rc)
+		goto out;
+
+	/* Always fetch upto date dimm health data ignoring cached values */
+	rc = __drc_pmem_query_health(p);
+	if (rc)
+		goto out_unlock;
+	/*
+	 * If the requested payload version is greater than one we know
+	 * about, return the payload version we know about and let
+	 * caller/userspace handle.
+	 */
+	if (pkg->payload_version > ND_PAPR_PDSM_HEALTH_VERSION)
+		pkg->payload_version = ND_PAPR_PDSM_HEALTH_VERSION;
+
+	if (pkg->hdr.nd_size_out < copysize) {
+		dev_dbg(&p->pdev->dev, "Truncated payload (%u). Expected (%lu)",
+			pkg->hdr.nd_size_out, copysize);
+		rc = -ENOSPC;
+		goto out_unlock;
+	}
+
+	dev_dbg(&p->pdev->dev, "Copying payload size=%lu version=0x%x\n",
+		copysize, pkg->payload_version);
+
+	/* Copy the health struct to the payload */
+	memcpy(pdsm_cmd_to_payload(pkg), &p->health, copysize);
+	pkg->hdr.nd_fw_size = copysize;
+
+out_unlock:
+	mutex_unlock(&p->health_mutex);
+
+out:
+	/*
+	 * Put the error in out package and return success from function
+	 * so that errors if any are propogated back to userspace.
+	 */
+	pkg->cmd_status = rc;
+	dev_dbg(&p->pdev->dev, "completion code = %d\n", rc);
+
+	return 0;
+}
+
 static int papr_scm_service_pdsm(struct papr_scm_priv *p,
 				struct nd_pdsm_cmd_pkg *call_pkg)
 {
@@ -417,6 +498,9 @@ static int papr_scm_service_pdsm(struct papr_scm_priv *p,
 
 	/* Depending on the DSM command call appropriate service routine */
 	switch (call_pkg->hdr.nd_command) {
+	case PAPR_SCM_PDSM_HEALTH:
+		return papr_scm_get_health(p, call_pkg);
+
 	default:
 		dev_dbg(&p->pdev->dev, "Unsupported PDSM request 0x%llx\n",
 			call_pkg->hdr.nd_command);
@@ -485,34 +569,41 @@ static ssize_t flags_show(struct device *dev,
 	struct nvdimm *dimm = to_nvdimm(dev);
 	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
 	struct seq_buf s;
-	u64 health;
 	int rc;
 
 	rc = drc_pmem_query_health(p);
 	if (rc)
 		return rc;
 
-	/* Copy health_bitmap locally, check masks & update out buffer */
-	health = READ_ONCE(p->health_bitmap);
-
 	seq_buf_init(&s, buf, PAGE_SIZE);
-	if (health & PAPR_SCM_DIMM_UNARMED_MASK)
+
+	/* Protect concurrent modifications to papr_scm_priv */
+	rc = mutex_lock_interruptible(&p->health_mutex);
+	if (rc)
+		return rc;
+
+	if (p->health.dimm_unarmed)
 		seq_buf_printf(&s, "not_armed ");
 
-	if (health & PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK)
+	if (p->health.dimm_bad_shutdown)
 		seq_buf_printf(&s, "flush_fail ");
 
-	if (health & PAPR_SCM_DIMM_BAD_RESTORE_MASK)
+	if (p->health.dimm_bad_restore)
 		seq_buf_printf(&s, "restore_fail ");
 
-	if (health & PAPR_SCM_DIMM_ENCRYPTED)
+	if (p->health.dimm_encrypted)
 		seq_buf_printf(&s, "encrypted ");
 
-	if (health & PAPR_SCM_DIMM_SMART_EVENT_MASK)
+	if (p->health.dimm_health)
 		seq_buf_printf(&s, "smart_notify ");
 
-	if (health & PAPR_SCM_DIMM_SCRUBBED_AND_LOCKED)
-		seq_buf_printf(&s, "scrubbed locked ");
+	if (p->health.dimm_scrubbed)
+		seq_buf_printf(&s, "scrubbed ");
+
+	if (p->health.dimm_locked)
+		seq_buf_printf(&s, "locked ");
+
+	mutex_unlock(&p->health_mutex);
 
 	if (seq_buf_used(&s))
 		seq_buf_printf(&s, "\n");
-- 
2.26.2


^ permalink raw reply related

* [RESEND PATCH v7 2/5] seq_buf: Export seq_buf_printf() to external modules
From: Vaibhav Jain @ 2020-05-19 19:00 UTC (permalink / raw)
  To: linuxppc-dev, linux-nvdimm, linux-kernel
  Cc: Santosh Sivaraj, Cezary Rojewski, Piotr Maziarz, Steven Rostedt,
	Oliver O'Halloran, Aneesh Kumar K . V, Borislav Petkov,
	Vaibhav Jain, Dan Williams
In-Reply-To: <20200519190058.257981-1-vaibhav@linux.ibm.com>

'seq_buf' provides a very useful abstraction for writing to a string
buffer without needing to worry about it over-flowing. However even
though the API has been stable for couple of years now its stills not
exported to external modules limiting its usage.

Hence this patch proposes update to 'seq_buf.c' to mark
seq_buf_printf() which is part of the seq_buf API to be exported to
external GPL modules. This symbol will be used in later parts of this
patchset to simplify content creation for a sysfs attribute.

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Piotr Maziarz <piotrx.maziarz@linux.intel.com>
Cc: Cezary Rojewski <cezary.rojewski@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:

Resend:
* Added ack from Steven Rostedt

v6..v7:
* New patch in the series
---
 lib/seq_buf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/seq_buf.c b/lib/seq_buf.c
index 4e865d42ab03..707453f5d58e 100644
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -91,6 +91,7 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(seq_buf_printf);
 
 #ifdef CONFIG_BINARY_PRINTF
 /**
-- 
2.26.2


^ permalink raw reply related

* [RESEND PATCH v7 4/5] ndctl/papr_scm, uapi: Add support for PAPR nvdimm specific methods
From: Vaibhav Jain @ 2020-05-19 19:00 UTC (permalink / raw)
  To: linuxppc-dev, linux-nvdimm, linux-kernel
  Cc: Santosh Sivaraj, Steven Rostedt, Oliver O'Halloran,
	Aneesh Kumar K . V, Vaibhav Jain, Dan Williams
In-Reply-To: <20200519190058.257981-1-vaibhav@linux.ibm.com>

Introduce support for Papr nvDimm Specific Methods (PDSM) in papr_scm
modules and add the command family to the white list of NVDIMM command
sets. Also advertise support for ND_CMD_CALL for the dimm
command mask and implement necessary scaffolding in the module to
handle ND_CMD_CALL ioctl and PDSM requests that we receive.

The layout of the PDSM request as we expect from libnvdimm/libndctl is
described in newly introduced uapi header 'papr_scm_pdsm.h' which
defines a new 'struct nd_pdsm_cmd_pkg' header. This header is used
to communicate the PDSM request via member
'nd_pkg_papr_scm->nd_command' and size of payload that need to be
sent/received for servicing the PDSM.

A new function is_cmd_valid() is implemented that reads the args to
papr_scm_ndctl() and performs sanity tests on them. A new function
papr_scm_service_pdsm() is introduced and is called from
papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL
command from libnvdimm.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:

Resend:
* None

v6..v7 :
* Removed the re-definitions of __packed macro from papr_scm_pdsm.h
  [Mpe].
* Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h [Mpe].
* Removed macros that were unused in papr_scm.c from papr_scm_pdsm.h
  [Mpe].
* Made functions defined in papr_scm_pdsm.h as static inline. [Mpe]

v5..v6 :
* Changed the usage of the term DSM to PDSM to distinguish it from the
  ACPI term [ Dan Williams ]
* Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various struct
  to reflect the new terminology.
* Updated the patch description and title to reflect the new terminology.
* Squashed patch to introduce new command family in 'ndctl.h' with
  this patch [ Dan Williams ]
* Updated the papr_scm_pdsm method starting index from 0x10000 to 0x0
  [ Dan Williams ]
* Removed redundant license text from the papr_scm_psdm.h file.
  [ Dan Williams ]
* s/envelop/envelope/ at various places [ Dan Williams ]
* Added '__packed' attribute to command package header to gaurd
  against different compiler adding paddings between the fields.
  [ Dan Williams]
* Converted various pr_debug to dev_debug [ Dan Williams ]

v4..v5 :
* None

v3..v4 :
* None

v2..v3 :
* Updated the patch prefix to 'ndctl/uapi' [Aneesh]

v1..v2 :
* None
---
 arch/powerpc/include/uapi/asm/papr_scm_pdsm.h | 134 ++++++++++++++++++
 arch/powerpc/platforms/pseries/papr_scm.c     | 101 ++++++++++++-
 include/uapi/linux/ndctl.h                    |   1 +
 3 files changed, 230 insertions(+), 6 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/papr_scm_pdsm.h

diff --git a/arch/powerpc/include/uapi/asm/papr_scm_pdsm.h b/arch/powerpc/include/uapi/asm/papr_scm_pdsm.h
new file mode 100644
index 000000000000..671693439c1c
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr_scm_pdsm.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * PAPR-SCM Dimm specific methods (PDSM) and structs for libndctl
+ *
+ * (C) Copyright IBM 2020
+ *
+ * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
+ */
+
+#ifndef _UAPI_ASM_POWERPC_PAPR_SCM_PDSM_H_
+#define _UAPI_ASM_POWERPC_PAPR_SCM_PDSM_H_
+
+#include <linux/types.h>
+
+/*
+ * PDSM Envelope:
+ *
+ * The ioctl ND_CMD_CALL transfers data between user-space and kernel via
+ * 'envelopes' which consists of a header and user-defined payload sections.
+ * The header is described by 'struct nd_pdsm_cmd_pkg' which expects a
+ * payload following it and offset of which relative to the struct is provided
+ * by 'nd_pdsm_cmd_pkg.payload_offset'. *
+ *
+ *  +-------------+---------------------+---------------------------+
+ *  |   64-Bytes  |       8-Bytes       |       Max 184-Bytes       |
+ *  +-------------+---------------------+---------------------------+
+ *  |               nd_pdsm_cmd_pkg |                           |
+ *  |-------------+                     |                           |
+ *  |  nd_cmd_pkg |                     |                           |
+ *  +-------------+---------------------+---------------------------+
+ *  | nd_family   |			|			    |
+ *  | nd_size_out | cmd_status          |			    |
+ *  | nd_size_in  | payload_version     |      PAYLOAD		    |
+ *  | nd_command  | payload_offset ----->			    |
+ *  | nd_fw_size  |                     |			    |
+ *  +-------------+---------------------+---------------------------+
+ *
+ * PDSM Header:
+ *
+ * The header is defined as 'struct nd_pdsm_cmd_pkg' which embeds a
+ * 'struct nd_cmd_pkg' instance. The PDSM command is assigned to member
+ * 'nd_cmd_pkg.nd_command'. Apart from size information of the envelope which is
+ * contained in 'struct nd_cmd_pkg', the header also has members following
+ * members:
+ *
+ * 'cmd_status'		: (Out) Errors if any encountered while servicing PDSM.
+ * 'payload_version'	: (In/Out) Version number associated with the payload.
+ * 'payload_offset'	: (In)Relative offset of payload from start of envelope.
+ *
+ * PDSM Payload:
+ *
+ * The layout of the PDSM Payload is defined by various structs shared between
+ * papr_scm and libndctl so that contents of payload can be interpreted. During
+ * servicing of a PDSM the papr_scm module will read input args from the payload
+ * field by casting its contents to an appropriate struct pointer based on the
+ * PDSM command. Similarly the output of servicing the PDSM command will be
+ * copied to the payload field using the same struct.
+ *
+ * 'libnvdimm' enforces a hard limit of 256 bytes on the envelope size, which
+ * leaves around 184 bytes for the envelope payload (ignoring any padding that
+ * the compiler may silently introduce).
+ *
+ * Payload Version:
+ *
+ * A 'payload_version' field is present in PDSM header that indicates a specific
+ * version of the structure present in PDSM Payload for a given PDSM command.
+ * This provides backward compatibility in case the PDSM Payload structure
+ * evolves and different structures are supported by 'papr_scm' and 'libndctl'.
+ *
+ * When sending a PDSM Payload to 'papr_scm', 'libndctl' should send the version
+ * of the payload struct it supports via 'payload_version' field. The 'papr_scm'
+ * module when servicing the PDSM envelope checks the 'payload_version' and then
+ * uses 'payload struct version' == MIN('payload_version field',
+ * 'max payload-struct-version supported by papr_scm') to service the PDSM.
+ * After servicing the PDSM, 'papr_scm' put the negotiated version of payload
+ * struct in returned 'payload_version' field.
+ *
+ * Libndctl on receiving the envelope back from papr_scm again checks the
+ * 'payload_version' field and based on it use the appropriate version dsm
+ * struct to parse the results.
+ *
+ * Backward Compatibility:
+ *
+ * Above scheme of exchanging different versioned PDSM struct between libndctl
+ * and papr_scm should provide backward compatibility until following two
+ * assumptions/conditions when defining new PDSM structs hold:
+ *
+ * Let T(X) = { set of attributes in PDSM struct 'T' versioned X }
+ *
+ * 1. T(X) is a proper subset of T(Y) if X > Y.
+ *    i.e Each new version of PDSM struct should retain existing struct
+ *    attributes from previous version
+ *
+ * 2. If an entity (libndctl or papr_scm) supports a PDSM struct T(X) then
+ *    it should also support T(1), T(2)...T(X - 1).
+ *    i.e When adding support for new version of a PDSM struct, libndctl
+ *    and papr_scm should retain support of the existing PDSM struct
+ *    version they support.
+ */
+
+/* Papr-scm-header + payload expected with ND_CMD_CALL ioctl from libnvdimm */
+struct nd_pdsm_cmd_pkg {
+	struct nd_cmd_pkg hdr;	/* Package header containing sub-cmd */
+	__s32 cmd_status;	/* Out: Sub-cmd status returned back */
+	__u16 payload_offset;	/* In: offset from start of struct */
+	__u16 payload_version;	/* In/Out: version of the payload */
+	__u8 payload[];		/* In/Out: Sub-cmd data buffer */
+} __packed;
+
+/*
+ * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
+ * via 'nd_pdsm_cmd_pkg.hdr.nd_command' member of the ioctl struct
+ */
+enum papr_scm_pdsm {
+	PAPR_SCM_PDSM_MIN = 0x0,
+	PAPR_SCM_PDSM_MAX,
+};
+
+/* Convert a libnvdimm nd_cmd_pkg to pdsm specific pkg */
+static inline struct nd_pdsm_cmd_pkg *nd_to_pdsm_cmd_pkg(struct nd_cmd_pkg *cmd)
+{
+	return (struct nd_pdsm_cmd_pkg *) cmd;
+}
+
+/* Return the payload pointer for a given pcmd */
+static inline void *pdsm_cmd_to_payload(struct nd_pdsm_cmd_pkg *pcmd)
+{
+	if (pcmd->hdr.nd_size_in == 0 && pcmd->hdr.nd_size_out == 0)
+		return NULL;
+	else
+		return (void *)((__u8 *) pcmd + pcmd->payload_offset);
+}
+
+#endif /* _UAPI_ASM_POWERPC_PAPR_SCM_PDSM_H_ */
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 142636e1a59f..ed4b49a6f1e1 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -15,13 +15,15 @@
 #include <linux/seq_buf.h>
 
 #include <asm/plpar_wrappers.h>
+#include <asm/papr_scm_pdsm.h>
 
 #define BIND_ANY_ADDR (~0ul)
 
 #define PAPR_SCM_DIMM_CMD_MASK \
 	((1ul << ND_CMD_GET_CONFIG_SIZE) | \
 	 (1ul << ND_CMD_GET_CONFIG_DATA) | \
-	 (1ul << ND_CMD_SET_CONFIG_DATA))
+	 (1ul << ND_CMD_SET_CONFIG_DATA) | \
+	 (1ul << ND_CMD_CALL))
 
 /* DIMM health bitmap bitmap indicators */
 /* SCM device is unable to persist memory contents */
@@ -350,16 +352,97 @@ static int papr_scm_meta_set(struct papr_scm_priv *p,
 	return 0;
 }
 
+/*
+ * Validate the inputs args to dimm-control function and return '0' if valid.
+ * This also does initial sanity validation to ND_CMD_CALL sub-command packages.
+ */
+static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+		       unsigned int buf_len)
+{
+	unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK;
+	struct nd_pdsm_cmd_pkg *pkg = nd_to_pdsm_cmd_pkg(buf);
+	struct papr_scm_priv *p;
+
+	/* Only dimm-specific calls are supported atm */
+	if (!nvdimm)
+		return -EINVAL;
+
+	/* get the provider date from struct nvdimm */
+	p = nvdimm_provider_data(nvdimm);
+
+	if (!test_bit(cmd, &cmd_mask)) {
+		dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd);
+		return -EINVAL;
+	} else if (cmd == ND_CMD_CALL) {
+
+		/* Verify the envelope package */
+		if (!buf || buf_len < sizeof(struct nd_pdsm_cmd_pkg)) {
+			dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n",
+				buf_len);
+			return -EINVAL;
+		}
+
+		/* Verify that the PDSM family is valid */
+		if (pkg->hdr.nd_family != NVDIMM_FAMILY_PAPR_SCM) {
+			dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n",
+				pkg->hdr.nd_family);
+			return -EINVAL;
+
+		}
+
+		/* We except a payload with all PDSM commands */
+		if (pdsm_cmd_to_payload(pkg) == NULL) {
+			dev_dbg(&p->pdev->dev,
+				"Empty payload for sub-command=0x%llx\n",
+				pkg->hdr.nd_command);
+			return -EINVAL;
+		}
+	}
+
+	/* Command looks valid */
+	return 0;
+}
+
+static int papr_scm_service_pdsm(struct papr_scm_priv *p,
+				struct nd_pdsm_cmd_pkg *call_pkg)
+{
+	/* unknown subcommands return error in packages */
+	if (call_pkg->hdr.nd_command <= PAPR_SCM_PDSM_MIN ||
+	    call_pkg->hdr.nd_command >= PAPR_SCM_PDSM_MAX) {
+		dev_dbg(&p->pdev->dev, "Invalid PDSM request 0x%llx\n",
+			call_pkg->hdr.nd_command);
+		call_pkg->cmd_status = -EINVAL;
+		return 0;
+	}
+
+	/* Depending on the DSM command call appropriate service routine */
+	switch (call_pkg->hdr.nd_command) {
+	default:
+		dev_dbg(&p->pdev->dev, "Unsupported PDSM request 0x%llx\n",
+			call_pkg->hdr.nd_command);
+		call_pkg->cmd_status = -ENOENT;
+		return 0;
+	}
+}
+
 static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
 			  struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 			  unsigned int buf_len, int *cmd_rc)
 {
 	struct nd_cmd_get_config_size *get_size_hdr;
 	struct papr_scm_priv *p;
+	struct nd_pdsm_cmd_pkg *call_pkg = NULL;
+	int rc;
 
-	/* Only dimm-specific calls are supported atm */
-	if (!nvdimm)
-		return -EINVAL;
+	/* Use a local variable in case cmd_rc pointer is NULL */
+	if (cmd_rc == NULL)
+		cmd_rc = &rc;
+
+	*cmd_rc = is_cmd_valid(nvdimm, cmd, buf, buf_len);
+	if (*cmd_rc) {
+		pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, *cmd_rc);
+		return *cmd_rc;
+	}
 
 	p = nvdimm_provider_data(nvdimm);
 
@@ -381,13 +464,19 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
 		*cmd_rc = papr_scm_meta_set(p, buf);
 		break;
 
+	case ND_CMD_CALL:
+		call_pkg = nd_to_pdsm_cmd_pkg(buf);
+		*cmd_rc = papr_scm_service_pdsm(p, call_pkg);
+		break;
+
 	default:
-		return -EINVAL;
+		dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
+		*cmd_rc = -EINVAL;
 	}
 
 	dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
 
-	return 0;
+	return *cmd_rc;
 }
 
 static ssize_t flags_show(struct device *dev,
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index de5d90212409..99fb60600ef8 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -244,6 +244,7 @@ struct nd_cmd_pkg {
 #define NVDIMM_FAMILY_HPE2 2
 #define NVDIMM_FAMILY_MSFT 3
 #define NVDIMM_FAMILY_HYPERV 4
+#define NVDIMM_FAMILY_PAPR_SCM 5
 
 #define ND_IOCTL_CALL			_IOWR(ND_IOCTL, ND_CMD_CALL,\
 					struct nd_cmd_pkg)
-- 
2.26.2


^ permalink raw reply related

* [RESEND PATCH v7 3/5] powerpc/papr_scm: Fetch nvdimm health information from PHYP
From: Vaibhav Jain @ 2020-05-19 19:00 UTC (permalink / raw)
  To: linuxppc-dev, linux-nvdimm, linux-kernel
  Cc: Santosh Sivaraj, Steven Rostedt, Oliver O'Halloran,
	Aneesh Kumar K . V, Vaibhav Jain, Dan Williams
In-Reply-To: <20200519190058.257981-1-vaibhav@linux.ibm.com>

Implement support for fetching nvdimm health information via
H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair
of 64-bit big-endian integers, bitwise-and of which is then stored in
'struct papr_scm_priv' and subsequently partially exposed to
user-space via newly introduced dimm specific attribute
'papr/flags'. Since the hcall is costly, the health information is
cached and only re-queried, 60s after the previous successful hcall.

The patch also adds a  documentation text describing flags reported by
the the new sysfs attribute 'papr/flags' is also introduced at
Documentation/ABI/testing/sysfs-bus-papr-scm.

[1] commit 58b278f568f0 ("powerpc: Provide initial documentation for
PAPR hcalls")

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:

Resend:
* None

v6..v7 :
* Used the exported buf_seq_printf() function to generate content for
  'papr/flags'
* Moved the PAPR_SCM_DIMM_* bit-flags macro definitions to papr_scm.c
  and removed the papr_scm.h file [Mpe]
* Some minor consistency issued in sysfs-bus-papr-scm
  documentation. [Mpe]
* s/dimm_mutex/health_mutex/g [Mpe]
* Split drc_pmem_query_health() into two function one of which takes
  care of caching and locking. [Mpe]
* Fixed a local copy creation of dimm health information using
  READ_ONCE(). [Mpe]

v5..v6 :
* Change the flags sysfs attribute from 'papr_flags' to 'papr/flags'
  [Dan Williams]
* Include documentation for 'papr/flags' attr [Dan Williams]
* Change flag 'save_fail' to 'flush_fail' [Dan Williams]
* Caching of health bitmap to reduce expensive hcalls [Dan Williams]
* Removed usage of PPC_BIT from 'papr-scm.h' header [Mpe]
* Replaced two __be64 integers from papr_scm_priv to a single u64
  integer [Mpe]
* Updated patch description to reflect the changes made in this
  version.
* Removed avoidable usage of 'papr_scm_priv.dimm_mutex' from
  flags_show() [Dan Williams]

v4..v5 :
* None

v3..v4 :
* None

v2..v3 :
* Removed PAPR_SCM_DIMM_HEALTH_NON_CRITICAL as a condition for
       	 NVDIMM unarmed [Aneesh]

v1..v2 :
* New patch in the series.
---
 Documentation/ABI/testing/sysfs-bus-papr-scm |  27 +++
 arch/powerpc/platforms/pseries/papr_scm.c    | 169 ++++++++++++++++++-
 2 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-scm

diff --git a/Documentation/ABI/testing/sysfs-bus-papr-scm b/Documentation/ABI/testing/sysfs-bus-papr-scm
new file mode 100644
index 000000000000..6143d06072f1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-papr-scm
@@ -0,0 +1,27 @@
+What:		/sys/bus/nd/devices/nmemX/papr/flags
+Date:		Apr, 2020
+KernelVersion:	v5.8
+Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
+Description:
+		(RO) Report flags indicating various states of a
+		papr-scm NVDIMM device. Each flag maps to a one or
+		more bits set in the dimm-health-bitmap retrieved in
+		response to H_SCM_HEALTH hcall. The details of the bit
+		flags returned in response to this hcall is available
+		at 'Documentation/powerpc/papr_hcalls.rst' . Below are
+		the flags reported in this sysfs file:
+
+		* "not_armed"	: Indicates that NVDIMM contents will not
+				  survive a power cycle.
+		* "flush_fail"	: Indicates that NVDIMM contents
+				  couldn't be flushed during last
+				  shut-down event.
+		* "restore_fail": Indicates that NVDIMM contents
+				  couldn't be restored during NVDIMM
+				  initialization.
+		* "encrypted"	: NVDIMM contents are encrypted.
+		* "smart_notify": There is health event for the NVDIMM.
+		* "scrubbed"	: Indicating that contents of the
+				  NVDIMM have been scrubbed.
+		* "locked"	: Indicating that NVDIMM contents cant
+				  be modified until next power cycle.
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f35592423380..142636e1a59f 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -12,6 +12,7 @@
 #include <linux/libnvdimm.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
+#include <linux/seq_buf.h>
 
 #include <asm/plpar_wrappers.h>
 
@@ -22,6 +23,44 @@
 	 (1ul << ND_CMD_GET_CONFIG_DATA) | \
 	 (1ul << ND_CMD_SET_CONFIG_DATA))
 
+/* DIMM health bitmap bitmap indicators */
+/* SCM device is unable to persist memory contents */
+#define PAPR_SCM_DIMM_UNARMED                   (1ULL << (63 - 0))
+/* SCM device failed to persist memory contents */
+#define PAPR_SCM_DIMM_SHUTDOWN_DIRTY            (1ULL << (63 - 1))
+/* SCM device contents are persisted from previous IPL */
+#define PAPR_SCM_DIMM_SHUTDOWN_CLEAN            (1ULL << (63 - 2))
+/* SCM device contents are not persisted from previous IPL */
+#define PAPR_SCM_DIMM_EMPTY                     (1ULL << (63 - 3))
+/* SCM device memory life remaining is critically low */
+#define PAPR_SCM_DIMM_HEALTH_CRITICAL           (1ULL << (63 - 4))
+/* SCM device will be garded off next IPL due to failure */
+#define PAPR_SCM_DIMM_HEALTH_FATAL              (1ULL << (63 - 5))
+/* SCM contents cannot persist due to current platform health status */
+#define PAPR_SCM_DIMM_HEALTH_UNHEALTHY          (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_SCM_DIMM_HEALTH_NON_CRITICAL       (1ULL << (63 - 7))
+/* SCM device is encrypted */
+#define PAPR_SCM_DIMM_ENCRYPTED                 (1ULL << (63 - 8))
+/* SCM device has been scrubbed and locked */
+#define PAPR_SCM_DIMM_SCRUBBED_AND_LOCKED       (1ULL << (63 - 9))
+
+/* Bits status indicators for health bitmap indicating unarmed dimm */
+#define PAPR_SCM_DIMM_UNARMED_MASK (PAPR_SCM_DIMM_UNARMED |		\
+				    PAPR_SCM_DIMM_HEALTH_UNHEALTHY)
+
+/* Bits status indicators for health bitmap indicating unflushed dimm */
+#define PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK (PAPR_SCM_DIMM_SHUTDOWN_DIRTY)
+
+/* Bits status indicators for health bitmap indicating unrestored dimm */
+#define PAPR_SCM_DIMM_BAD_RESTORE_MASK  (PAPR_SCM_DIMM_EMPTY)
+
+/* Bit status indicators for smart event notification */
+#define PAPR_SCM_DIMM_SMART_EVENT_MASK (PAPR_SCM_DIMM_HEALTH_CRITICAL | \
+					PAPR_SCM_DIMM_HEALTH_FATAL |	\
+					PAPR_SCM_DIMM_HEALTH_UNHEALTHY)
+
+/* private struct associated with each region */
 struct papr_scm_priv {
 	struct platform_device *pdev;
 	struct device_node *dn;
@@ -39,6 +78,15 @@ struct papr_scm_priv {
 	struct resource res;
 	struct nd_region *region;
 	struct nd_interleave_set nd_set;
+
+	/* Protect dimm health data from concurrent read/writes */
+	struct mutex health_mutex;
+
+	/* Last time the health information of the dimm was updated */
+	unsigned long lasthealth_jiffies;
+
+	/* Health information for the dimm */
+	u64 health_bitmap;
 };
 
 static int drc_pmem_bind(struct papr_scm_priv *p)
@@ -144,6 +192,62 @@ static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
 	return drc_pmem_bind(p);
 }
 
+/*
+ * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
+ * health information.
+ */
+static int __drc_pmem_query_health(struct papr_scm_priv *p)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	s64 rc;
+
+	/* issue the hcall */
+	rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
+	if (rc != H_SUCCESS) {
+		dev_err(&p->pdev->dev,
+			 "Failed to query health information, Err:%lld\n", rc);
+		rc = -ENXIO;
+		goto out;
+	}
+
+	p->lasthealth_jiffies = jiffies;
+	p->health_bitmap = ret[0] & ret[1];
+
+	dev_dbg(&p->pdev->dev,
+		"Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
+		ret[0], ret[1]);
+out:
+	return rc;
+}
+
+/* Min interval in seconds for assuming stable dimm health */
+#define MIN_HEALTH_QUERY_INTERVAL 60
+
+/* Query cached health info and if needed call drc_pmem_query_health */
+static int drc_pmem_query_health(struct papr_scm_priv *p)
+{
+	unsigned long cache_timeout;
+	s64 rc;
+
+	/* Protect concurrent modifications to papr_scm_priv */
+	rc = mutex_lock_interruptible(&p->health_mutex);
+	if (rc)
+		return rc;
+
+	/* Jiffies offset for which the health data is assumed to be same */
+	cache_timeout = p->lasthealth_jiffies +
+		msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000);
+
+	/* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */
+	if (time_after(jiffies, cache_timeout))
+		rc = __drc_pmem_query_health(p);
+	else
+		/* Assume cached health data is valid */
+		rc = 0;
+
+	mutex_unlock(&p->health_mutex);
+	return rc;
+}
 
 static int papr_scm_meta_get(struct papr_scm_priv *p,
 			     struct nd_cmd_get_config_data_hdr *hdr)
@@ -286,6 +390,64 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
 	return 0;
 }
 
+static ssize_t flags_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *dimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+	struct seq_buf s;
+	u64 health;
+	int rc;
+
+	rc = drc_pmem_query_health(p);
+	if (rc)
+		return rc;
+
+	/* Copy health_bitmap locally, check masks & update out buffer */
+	health = READ_ONCE(p->health_bitmap);
+
+	seq_buf_init(&s, buf, PAGE_SIZE);
+	if (health & PAPR_SCM_DIMM_UNARMED_MASK)
+		seq_buf_printf(&s, "not_armed ");
+
+	if (health & PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK)
+		seq_buf_printf(&s, "flush_fail ");
+
+	if (health & PAPR_SCM_DIMM_BAD_RESTORE_MASK)
+		seq_buf_printf(&s, "restore_fail ");
+
+	if (health & PAPR_SCM_DIMM_ENCRYPTED)
+		seq_buf_printf(&s, "encrypted ");
+
+	if (health & PAPR_SCM_DIMM_SMART_EVENT_MASK)
+		seq_buf_printf(&s, "smart_notify ");
+
+	if (health & PAPR_SCM_DIMM_SCRUBBED_AND_LOCKED)
+		seq_buf_printf(&s, "scrubbed locked ");
+
+	if (seq_buf_used(&s))
+		seq_buf_printf(&s, "\n");
+
+	return seq_buf_used(&s);
+}
+DEVICE_ATTR_RO(flags);
+
+/* papr_scm specific dimm attributes */
+static struct attribute *papr_scm_nd_attributes[] = {
+	&dev_attr_flags.attr,
+	NULL,
+};
+
+static struct attribute_group papr_scm_nd_attribute_group = {
+	.name = "papr",
+	.attrs = papr_scm_nd_attributes,
+};
+
+static const struct attribute_group *papr_scm_dimm_attr_groups[] = {
+	&papr_scm_nd_attribute_group,
+	NULL,
+};
+
 static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 {
 	struct device *dev = &p->pdev->dev;
@@ -312,8 +474,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	dimm_flags = 0;
 	set_bit(NDD_LABELING, &dimm_flags);
 
-	p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
-				  PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+	p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_attr_groups,
+				  dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
 	if (!p->nvdimm) {
 		dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
 		goto err;
@@ -399,6 +561,9 @@ static int papr_scm_probe(struct platform_device *pdev)
 	if (!p)
 		return -ENOMEM;
 
+	/* Initialize the dimm mutex */
+	mutex_init(&p->health_mutex);
+
 	/* optional DT properties */
 	of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
 
-- 
2.26.2


^ permalink raw reply related

* [RESEND PATCH v7 1/5] powerpc: Document details on H_SCM_HEALTH hcall
From: Vaibhav Jain @ 2020-05-19 19:00 UTC (permalink / raw)
  To: linuxppc-dev, linux-nvdimm, linux-kernel
  Cc: Santosh Sivaraj, Steven Rostedt, Oliver O'Halloran,
	Aneesh Kumar K . V, Vaibhav Jain, Dan Williams
In-Reply-To: <20200519190058.257981-1-vaibhav@linux.ibm.com>

Add documentation to 'papr_hcalls.rst' describing the bitmap flags
that are returned from H_SCM_HEALTH hcall as per the PAPR-SCM
specification.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:

Resend:
* None

v6..v7:
* None

v5..v6
* New patch in the series
---
 Documentation/powerpc/papr_hcalls.rst | 43 ++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
index 3493631a60f8..9a5ba5eaf323 100644
--- a/Documentation/powerpc/papr_hcalls.rst
+++ b/Documentation/powerpc/papr_hcalls.rst
@@ -220,13 +220,48 @@ from the LPAR memory.
 **H_SCM_HEALTH**
 
 | Input: drcIndex
-| Out: *health-bitmap, health-bit-valid-bitmap*
+| Out: *health-bitmap (r4), health-bit-valid-bitmap (r5)*
 | Return Value: *H_Success, H_Parameter, H_Hardware*
 
 Given a DRC Index return the info on predictive failure and overall health of
-the NVDIMM. The asserted bits in the health-bitmap indicate a single predictive
-failure and health-bit-valid-bitmap indicate which bits in health-bitmap are
-valid.
+the NVDIMM. The asserted bits in the health-bitmap indicate one or more states
+(described in table below) of the NVDIMM and health-bit-valid-bitmap indicate
+which bits in health-bitmap are valid.
+
+Health Bitmap Flags:
+
++------+-----------------------------------------------------------------------+
+|  Bit |               Definition                                              |
++======+=======================================================================+
+|  00  | SCM device is unable to persist memory contents.                      |
+|      | If the system is powered down, nothing will be saved.                 |
++------+-----------------------------------------------------------------------+
+|  01  | SCM device failed to persist memory contents. Either contents were not|
+|      | saved successfully on power down or were not restored properly on     |
+|      | power up.                                                             |
++------+-----------------------------------------------------------------------+
+|  02  | SCM device contents are persisted from previous IPL. The data from    |
+|      | the last boot were successfully restored.                             |
++------+-----------------------------------------------------------------------+
+|  03  | SCM device contents are not persisted from previous IPL. There was no |
+|      | data to restore from the last boot.                                   |
++------+-----------------------------------------------------------------------+
+|  04  | SCM device memory life remaining is critically low                    |
++------+-----------------------------------------------------------------------+
+|  05  | SCM device will be garded off next IPL due to failure                 |
++------+-----------------------------------------------------------------------+
+|  06  | SCM contents cannot persist due to current platform health status. A  |
+|      | hardware failure may prevent data from being saved or restored.       |
++------+-----------------------------------------------------------------------+
+|  07  | SCM device is unable to persist memory contents in certain conditions |
++------+-----------------------------------------------------------------------+
+|  08  | SCM device is encrypted                                               |
++------+-----------------------------------------------------------------------+
+|  09  | SCM device has successfully completed a requested erase or secure     |
+|      | erase procedure.                                                      |
++------+-----------------------------------------------------------------------+
+|10:63 | Reserved / Unused                                                     |
++------+-----------------------------------------------------------------------+
 
 **H_SCM_PERFORMANCE_STATS**
 
-- 
2.26.2


^ permalink raw reply related

* [RESEND PATCH v7 0/5] powerpc/papr_scm: Add support for reporting nvdimm health
From: Vaibhav Jain @ 2020-05-19 19:00 UTC (permalink / raw)
  To: linuxppc-dev, linux-nvdimm, linux-kernel
  Cc: Santosh Sivaraj, Steven Rostedt, Oliver O'Halloran,
	Aneesh Kumar K . V, Vaibhav Jain, Dan Williams

The PAPR standard[1][3] provides mechanisms to query the health and
performance stats of an NVDIMM via various hcalls as described in
Ref[2].  Until now these stats were never available nor exposed to the
user-space tools like 'ndctl'. This is partly due to PAPR platform not
having support for ACPI and NFIT. Hence 'ndctl' is unable to query and
report the dimm health status and a user had no way to determine the
current health status of a NDVIMM.

To overcome this limitation, this patch-set updates papr_scm kernel
module to query and fetch NVDIMM health stats using hcalls described
in Ref[2].  This health and performance stats are then exposed to
userspace via sysfs and PAPR-NVDIMM-Specific-Methods(PDSM) issued by
libndctl.

These changes coupled with proposed ndtcl changes located at Ref[4]
should provide a way for the user to retrieve NVDIMM health status
using ndtcl.

Below is a sample output using proposed kernel + ndctl for PAPR NVDIMM
in a emulation environment:

 # ndctl list -DH
[
  {
    "dev":"nmem0",
    "health":{
      "health_state":"fatal",
      "shutdown_state":"dirty"
    }
  }
]

Dimm health report output on a pseries guest lpar with vPMEM or HMS
based NVDIMMs that are in perfectly healthy conditions:

 # ndctl list -d nmem0 -H
[
  {
    "dev":"nmem0",
    "health":{
      "health_state":"ok",
      "shutdown_state":"clean"
    }
  }
]

PAPR NVDIMM-Specific-Methods(PDSM)
==================================

PDSM requests are issued by vendor specific code in libndctl to
execute certain operations or fetch information from NVDIMMS. PDSMs
requests can be sent to papr_scm module via libndctl(userspace) and
libnvdimm (kernel) using the ND_CMD_CALL ioctl command which can be
handled in the dimm control function papr_scm_ndctl(). Current
patchset proposes a single PDSM to retrieve NVDIMM health, defined in
the newly introduced uapi header named 'papr_scm_pdsm.h'. Support for
more PDSMs will be added in future.

Structure of the patch-set
==========================

The patch-set starts with a doc patch documenting details of hcall
H_SCM_HEALTH. Second patch exports kernel symbol seq_buf_printf()
thats used in subsequent patches to generate sysfs attribute content.

Third patch implements support for fetching NVDIMM health information
from PHYP and partially exposing it to user-space via a NVDIMM sysfs
flag.

Fourth patches deal with implementing support for servicing PDSM
commands in papr_scm module.

Finally the last patch implements support for servicing PDSM
'PAPR_SCM_PDSM_HEALTH' that returns the NVDIMM health information to
libndctl.

Changelog:
==========

Resend:
* Added ack from Steven Rostedt on Patch-2 that exports kernel symbol
  seq_buf_printf()

v6..v7:

* Incorporate various review comments from Mpe.  Removed papr_scm.h
* Added a patch to export seq_buf_printf() [Mpe, Steven Rostedt]
* header file and moved its contents to papr_scm.c.
* Split function drc_pmem_query_health() into two functions, one that takes
  care of caching and concurrency and other one that doesn't.
* Fixed a possible incorrect way to make local copy of nvdimm health data.
* Some variable renames changed as suggested in previous review.
* Removed unused macros/defines from papr_scm_pdsm.h
* Updated papr_scm_pdsm.h to remove usage of __KERNEL__ define.
* Updated papr_scm_pdsm.h to remove redefinition of __packed macro.

v5..v6:

* Incorporate review comments from Mpe and Dan Williams.
* Changed the usage of term DSM to PDSM as former conflicted with
  usage in ACPI context.
* UAPI updates to remove usage of bool and marking the structs 
  defined as 'packed'.
* Simplified the health-bitmap handling in papr_scm to use u64
  instead of __be64 integers.
* Caching of the health information so reading the dimm-flag file
  doesn't result in costly hcalls everytime.
* Changed dimm-flag 'save_fail' to 'flush_fail'
* Moved the dimm flag file from 'papr_flags' to 'papr/flags'.
* Added a patch to document H_SCM_HEALTH hcall return values.
* Added sysfs ABI documentation for newly introduce dimm-flag
  sysfs file 'papr/flags'

v4..v5:

* Fixed a bug in new implementation of papr_scm_ndctl() that was triggering
  a false error condition.

v3..v4:

* Restructured papr_scm_ndctl() to dispatch ND_CMD_CALL commands to a new
  function named papr_scm_service_dsm() to serivice DSM requests. [Aneesh]

v2..v3:

* Updated the papr_scm_dsm.h header to be more confimant general kernel
  guidelines for UAPI headers. [Aneesh]

* Changed the definition of macro PAPR_SCM_DIMM_UNARMED_MASK to not
  include case when the NVDIMM is unarmed because its a vPMEM
  NVDIMM. [Aneesh]

v1..v2:

* Restructured the patch-set based on review comments on V1 patch-set to
simplify the patch review. Multiple small patches have been combined into
single patches to reduce cross referencing that was needed in earlier
patch-set. Hence most of the patches in this patch-set as now new. [Aneesh]

* Removed the initial work done for fetch NVDIMM performance statistics.
These changes will be re-proposed in a separate patch-set. [Aneesh]

* Simplified handling of versioning of 'struct
nd_papr_scm_dimm_health_stat_v1' as only one version of the structure is
currently in existence.

References:
[1] "Power Architecture Platform Reference"
      https://en.wikipedia.org/wiki/Power_Architecture_Platform_Reference
[2] commit 58b278f568f0
     ("powerpc: Provide initial documentation for PAPR hcalls")
[3] "Linux on Power Architecture Platform Reference"
     https://members.openpowerfoundation.org/document/dl/469
[4] https://github.com/vaibhav92/ndctl/tree/papr_scm_health_v7

Vaibhav Jain (5):
  powerpc: Document details on H_SCM_HEALTH hcall
  seq_buf: Export seq_buf_printf() to external modules
  powerpc/papr_scm: Fetch nvdimm health information from PHYP
  ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods
  powerpc/papr_scm: Implement support for PAPR_SCM_PDSM_HEALTH

 Documentation/ABI/testing/sysfs-bus-papr-scm  |  27 ++
 Documentation/powerpc/papr_hcalls.rst         |  43 ++-
 arch/powerpc/include/uapi/asm/papr_scm_pdsm.h | 173 +++++++++
 arch/powerpc/platforms/pseries/papr_scm.c     | 363 +++++++++++++++++-
 include/uapi/linux/ndctl.h                    |   1 +
 lib/seq_buf.c                                 |   1 +
 6 files changed, 595 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-scm
 create mode 100644 arch/powerpc/include/uapi/asm/papr_scm_pdsm.h

-- 
2.26.2


^ permalink raw reply

* Re: [PATCH v2 3/5] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier
From: Dan Williams @ 2020-05-19 18:59 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: alistair, linuxppc-dev, linux-nvdimm
In-Reply-To: <87d070f2vs.fsf@linux.ibm.com>

On Tue, May 19, 2020 at 6:53 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> Dan Williams <dan.j.williams@intel.com> writes:
>
> > On Mon, May 18, 2020 at 10:30 PM Aneesh Kumar K.V
> > <aneesh.kumar@linux.ibm.com> wrote:
>
> ...
>
> >> Applications using new instructions will behave as expected when running
> >> on P8 and P9. Only future hardware will differentiate between 'dcbf' and
> >> 'dcbfps'
> >
> > Right, this is the problem. Applications using new instructions behave
> > as expected, the kernel has been shipping of_pmem and papr_scm for
> > several cycles now, you're saying that the DAX applications written
> > against those platforms are going to be broken on P8 and P9?
>
> The expecation is that both kernel and userspace would get upgraded to
> use the new instruction before actual persistent memory devices are
> made available.
>
> >
> >> > I'm thinking the kernel
> >> > should go as far as to disable DAX operation by default on new
> >> > hardware until userspace asserts that it is prepared to switch to the
> >> > new implementation. Is there any other way to ensure the forward
> >> > compatibility of deployed ppc64 DAX applications?
> >>
> >> AFAIU there is no released persistent memory hardware on ppc64 platform
> >> and we need to make sure before applications get enabled to use these
> >> persistent memory devices, they should switch to use the new
> >> instruction?
> >
> > Right, I want the kernel to offer some level of safety here because
> > everything you are describing sounds like a flag day conversion. Am I
> > misreading? Is there some other gate that prevents existing users of
> > of_pmem and papr_scm from having their expectations violated when
> > running on P8 / P9 hardware? Maybe there's tighter ecosystem control
> > that I'm just not familiar with, I'm only going off the fact that the
> > kernel has shipped a non-zero number of NVDIMM drivers that build with
> > ARCH=ppc64 for several cycles.
>
> If we are looking at adding changes to kernel that will prevent a kernel
> from running on newer hardware in a specific case, we could as well take
> the changes to get the kernel use the newer instructions right?

Oh, no, I'm not talking about stopping the kernel from running. I'm
simply recommending that support for MAP_SYNC mappings (userspace
managed flushing) be disabled by default on PPC with either a
compile-time or run-time default to assert that userspace has been
audited for legacy applications or that the platform owner is
otherwise willing to take the risk.

> But I agree with your concern that if we have older kernel/applications
> that continue to use `dcbf` on future hardware we will end up
> having issues w.r.t powerfail consistency. The plan is what you outlined
> above as tighter ecosystem control. Considering we don't have a pmem
> device generally available, we get both kernel and userspace upgraded
> to use these new instructions before such a device is made available.

Ok, I think a compile time kernel option with a runtime override
satisfies my concern. Does that work for you?

^ permalink raw reply

* Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice
From: Ira Weiny @ 2020-05-19 18:40 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Peter Zijlstra, Dave Hansen, dri-devel, linux-mips,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux, Dan Williams, Helge Deller, x86,
	linux-csky, Christoph Hellwig, Ingo Molnar, linux-snps-arc,
	linux-xtensa, Borislav Petkov, Al Viro, Andy Lutomirski,
	Thomas Gleixner, linux-arm-kernel, Chris Zankel,
	Thomas Bogendoerfer, linux-parisc, linux-kernel, Christian Koenig,
	Andrew Morton, linuxppc-dev, David S. Miller
In-Reply-To: <20200519165422.GA5838@roeck-us.net>

On Tue, May 19, 2020 at 09:54:22AM -0700, Guenter Roeck wrote:
> On Mon, May 18, 2020 at 11:48:43AM -0700, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > The kunmap_atomic clean up failed to remove one set of pagefault/preempt
> > enables when vaddr is not in the fixmap.
> > 
> > Fixes: bee2128a09e6 ("arch/kunmap_atomic: consolidate duplicate code")
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> microblazeel works with this patch,

Awesome...  Andrew in my rush yesterday I should have put a reported by on the
patch for Guenter as well.

Sorry about that Guenter,
Ira

> as do the nosmp sparc32 boot tests,
> but sparc32 boot tests with SMP enabled still fail with lots of messages
> such as:
> 
> BUG: Bad page state in process swapper/0  pfn:006a1
> page:f0933420 refcount:0 mapcount:1 mapping:(ptrval) index:0x1
> flags: 0x0()
> raw: 00000000 00000100 00000122 00000000 00000001 00000000 00000000 00000000
> page dumped because: nonzero mapcount
> Modules linked in:
> CPU: 0 PID: 1 Comm: swapper/0 Tainted: G    B             5.7.0-rc6-next-20200518-00002-gb178d2d56f29 #1
> [f00e7ab8 :
> bad_page+0xa8/0x108 ]
> [f00e8b54 :
> free_pcppages_bulk+0x154/0x52c ]
> [f00ea024 :
> free_unref_page+0x54/0x6c ]
> [f00ed864 :
> free_reserved_area+0x58/0xec ]
> [f0527104 :
> kernel_init+0x14/0x110 ]
> [f000b77c :
> ret_from_kernel_thread+0xc/0x38 ]
> [00000000 :
> 0x0 ]
> 
> Code path leading to that message is different but always the same
> from free_unref_page().
> 
> Still testing ppc images.
> 
> Guenter

^ permalink raw reply

* Re: [PATCH v2 6/7] powerpc/dt_cpu_ftrs: Add MMA feature
From: Paul A. Clarke @ 2020-05-19 17:51 UTC (permalink / raw)
  To: Alistair Popple; +Cc: aneesh.kumar, mikey, linuxppc-dev, npiggin
In-Reply-To: <20200519003157.31946-7-alistair@popple.id.au>

On Tue, May 19, 2020 at 10:31:56AM +1000, Alistair Popple wrote:
> Matrix multiple accumulate (MMA) is a new feature added to ISAv3.1 and

Conclusion is that this should be "Matrix-Multiply Assist", but then there
are a couple more below...

> POWER10. Support on powernv can be selected via a firmware CPU device
> tree feature which enables it via a PCR bit.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/include/asm/reg.h    |  3 ++-
>  arch/powerpc/kernel/dt_cpu_ftrs.c | 17 ++++++++++++++++-
>  2 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 1931b1142599..c446863a40cf 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -479,7 +479,8 @@
>  #define   PCR_VEC_DIS	(__MASK(63-0))	/* Vec. disable (bit NA since POWER8) */
>  #define   PCR_VSX_DIS	(__MASK(63-1))	/* VSX disable (bit NA since POWER8) */
>  #define   PCR_TM_DIS	(__MASK(63-2))	/* Trans. memory disable (POWER8) */
> -#define   PCR_HIGH_BITS	(PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
> +#define   PCR_MMA_DIS	(__MASK(63-3)) /* Matrix-Multiply Accelerator */

also here.

> +#define   PCR_HIGH_BITS	(PCR_MMA_DIS | PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
>  /*
>   * These bits are used in the function kvmppc_set_arch_compat() to specify and
>   * determine both the compatibility level which we want to emulate and the
> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
> index 93c340906aad..e7540ee5cad8 100644
> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
> @@ -75,6 +75,7 @@ static struct {
>  	u64	lpcr_clear;
>  	u64	hfscr;
>  	u64	fscr;
> +	u64	pcr;
>  } system_registers;
> 
>  static void (*init_pmu_registers)(void);
> @@ -102,7 +103,7 @@ static void __restore_cpu_cpufeatures(void)
>  	if (hv_mode) {
>  		mtspr(SPRN_LPID, 0);
>  		mtspr(SPRN_HFSCR, system_registers.hfscr);
> -		mtspr(SPRN_PCR, PCR_MASK);
> +		mtspr(SPRN_PCR, system_registers.pcr);
>  	}
>  	mtspr(SPRN_FSCR, system_registers.fscr);
> 
> @@ -555,6 +556,18 @@ static int __init feat_enable_large_ci(struct dt_cpu_feature *f)
>  	return 1;
>  }
> 
> +static int __init feat_enable_mma(struct dt_cpu_feature *f)
> +{
> +	u64 pcr;
> +
> +	feat_enable(f);
> +	pcr = mfspr(SPRN_PCR);
> +	pcr &= ~PCR_MMA_DIS;
> +	mtspr(SPRN_PCR, pcr);
> +
> +	return 1;
> +}
> +
>  struct dt_cpu_feature_match {
>  	const char *name;
>  	int (*enable)(struct dt_cpu_feature *f);
> @@ -629,6 +642,7 @@ static struct dt_cpu_feature_match __initdata
>  	{"vector-binary16", feat_enable, 0},
>  	{"wait-v3", feat_enable, 0},
>  	{"prefix-instructions", feat_enable, 0},
> +	{"matrix-multiply-accumulate", feat_enable_mma, 0},

and presumably here as well.

>  };

PC

^ permalink raw reply

* Re: [PATCH v2 1/7] powerpc: Add new HWCAP bits
From: Paul A. Clarke @ 2020-05-19 17:48 UTC (permalink / raw)
  To: Alistair Popple; +Cc: aneesh.kumar, mikey, linuxppc-dev, npiggin
In-Reply-To: <20200519003157.31946-2-alistair@popple.id.au>

On Tue, May 19, 2020 at 10:31:51AM +1000, Alistair Popple wrote:
> POWER10 introduces two new architectural features - ISAv3.1 and matrix
> multiply accumulate (MMA) instructions. Userspace detects the presence
> of these features via two HWCAP bits introduced in this patch. These
> bits have been agreed to by the compiler and binutils team.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/include/uapi/asm/cputable.h | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
> index 540592034740..2692a56bf20b 100644
> --- a/arch/powerpc/include/uapi/asm/cputable.h
> +++ b/arch/powerpc/include/uapi/asm/cputable.h
> @@ -50,6 +50,8 @@
>  #define PPC_FEATURE2_DARN		0x00200000 /* darn random number insn */
>  #define PPC_FEATURE2_SCV		0x00100000 /* scv syscall */
>  #define PPC_FEATURE2_HTM_NO_SUSPEND	0x00080000 /* TM w/out suspended state */
> +#define PPC_FEATURE2_ARCH_3_1		0x00040000 /* ISA 3.1 */
> +#define PPC_FEATURE2_MMA		0x00020000 /* Matrix Multiply Accumulate */

Carrying the conclusion of the recent discussion further, this should also be
"Matrix-Multiply Assist".

PC

^ permalink raw reply

* Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice
From: Guenter Roeck @ 2020-05-19 16:54 UTC (permalink / raw)
  To: ira.weiny
  Cc: Peter Zijlstra, Dave Hansen, dri-devel, linux-mips,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux, Dan Williams, Helge Deller, x86,
	linux-csky, Christoph Hellwig, Ingo Molnar, linux-snps-arc,
	linux-xtensa, Borislav Petkov, Al Viro, Andy Lutomirski,
	Thomas Gleixner, linux-arm-kernel, Chris Zankel,
	Thomas Bogendoerfer, linux-parisc, linux-kernel, Christian Koenig,
	Andrew Morton, linuxppc-dev, David S. Miller
In-Reply-To: <20200518184843.3029640-1-ira.weiny@intel.com>

On Mon, May 18, 2020 at 11:48:43AM -0700, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> The kunmap_atomic clean up failed to remove one set of pagefault/preempt
> enables when vaddr is not in the fixmap.
> 
> Fixes: bee2128a09e6 ("arch/kunmap_atomic: consolidate duplicate code")
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

microblazeel works with this patch, as do the nosmp sparc32 boot tests,
but sparc32 boot tests with SMP enabled still fail with lots of messages
such as:

BUG: Bad page state in process swapper/0  pfn:006a1
page:f0933420 refcount:0 mapcount:1 mapping:(ptrval) index:0x1
flags: 0x0()
raw: 00000000 00000100 00000122 00000000 00000001 00000000 00000000 00000000
page dumped because: nonzero mapcount
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Tainted: G    B             5.7.0-rc6-next-20200518-00002-gb178d2d56f29 #1
[f00e7ab8 :
bad_page+0xa8/0x108 ]
[f00e8b54 :
free_pcppages_bulk+0x154/0x52c ]
[f00ea024 :
free_unref_page+0x54/0x6c ]
[f00ed864 :
free_reserved_area+0x58/0xec ]
[f0527104 :
kernel_init+0x14/0x110 ]
[f000b77c :
ret_from_kernel_thread+0xc/0x38 ]
[00000000 :
0x0 ]

Code path leading to that message is different but always the same
from free_unref_page().

Still testing ppc images.

Guenter

^ permalink raw reply

* Re: [PATCH V3 07/15] arch/kunmap_atomic: Consolidate duplicate code
From: Ira Weiny @ 2020-05-19 16:29 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Peter Zijlstra, Dave Hansen, dri-devel, linux-mips,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux, Dan Williams, Helge Deller, x86,
	linux-csky, Christoph Hellwig, Ingo Molnar, linux-snps-arc,
	linux-xtensa, Borislav Petkov, Al Viro, Andy Lutomirski,
	Thomas Gleixner, linux-arm-kernel, Chris Zankel,
	Thomas Bogendoerfer, linux-parisc, linux-kernel, Christian Koenig,
	Andrew Morton, linuxppc-dev, David S. Miller
In-Reply-To: <47757f51-15f2-3abe-9035-abdb3ba5816e@roeck-us.net>

On Mon, May 18, 2020 at 07:50:36PM -0700, Guenter Roeck wrote:
> Hi Ira,
> 
> On 5/18/20 5:03 PM, Ira Weiny wrote:
> > On Sun, May 17, 2020 at 09:29:32PM -0700, Guenter Roeck wrote:
> >> On Sun, May 17, 2020 at 08:49:39PM -0700, Ira Weiny wrote:
> >>> On Sat, May 16, 2020 at 03:33:06PM -0700, Guenter Roeck wrote:
> >>>> On Thu, May 07, 2020 at 07:59:55AM -0700, ira.weiny@intel.com wrote:
> >>>>> From: Ira Weiny <ira.weiny@intel.com>
> >>>>>
> >>>

Sorry for the delay I missed this email last night...  I blame outlook...  ;-)

...

> >>> Do you have a kernel config?  Specifically is CONFIG_HIGHMEM set?
> >>>
> >> See below. Yes, CONFIG_HIGHMEM is set.
> >>
> >> The scripts used to build and boot the image are at:
> >>
> >> https://github.com/groeck/linux-build-test/tree/master/rootfs/microblazeel
> > 
> > Despite finding the obvious error earlier today I've still been trying to get
> > this to work.
> > 
> > I had to make some slight modifications to use the 0-day cross compile build
> > and my local qemu build.  But those were pretty minor modifications.  I'm
> > running on x86_64 host.
> > 
> > With those slight mods to the scripts I get the following error even without my
> > patch set on 5.7-rc4.  I have 1 cpu pegged at 100% while it is running...  Is
> > there anything I can do to get more debug output?  Perhaps I just need to let
> > it run longer?
> > 
> 
> I don't think so. Try running it with "-d" parameter (run-qemu-microblazeel.sh
> -d petalogix-s3adsp1800); that gives you the qemu command line. Once it says
> "running", abort the script and execute qemu directly.

FYI Minor nit...  a simple copy/paste failed...  that print of the cmd line
did not include quotes around the -append text:

09:06:03 > /home/iweiny/dev/qemu/microblazeel-softmmu/qemu-system-microblazeel
   -M petalogix-s3adsp1800 -m 256 -kernel arch/microblaze/boot/linux.bin
   -no-reboot -initrd /tmp/buildbot-cache/microblazeel/rootfs.cpio -append
   panic=-1 slub_debug=FZPUA rdinit=/sbin/init console=ttyUL0,115200 -monitor
   none -serial stdio -nographic

qemu-system-microblazeel: slub_debug=FZPUA: Could not open 'slub_debug=FZPUA': No such file or directory

> Oh, and please update
> the repository; turns out I didn't push for a while and made a number of
> changes.

Cool beans...  I've updated.

> 
> My compiler was compiled with buildroot (a long time ago). I don't recall if
> it needed something special in the configuration, unfortunately.

AFAICT the compile is working...  It is running from the command line now...  I
expected it to be slow so I have also increased the timeouts last night.  So
far it still fails.  I did notice that there is a new 'R' in the wait output.

<quote>
.........................R......................... failed (silent)
------------
qemu log:
qemu-system-microblazeel: terminating on signal 15 from pid 3357146 (/bin/bash)
</quote>

I was hoping that meant it found qemu 'running' but looks like that was just a
retry...  :-(

Last night I increased some of the timeouts I could find.

<quote>
 LOOPTIME=5     # Wait time before checking status
 -MAXTIME=150    # Maximum wait time for qemu session to complete
 -MAXSTIME=60    # Maximum wait time for qemu session to generate output
 +#MAXTIME=150   # Maximum wait time for qemu session to complete
 +#MAXSTIME=60   # Maximum wait time for qemu session to generate output
 +MAXTIME=300    # Maximum wait time for qemu session to complete
 +MAXSTIME=120   # Maximum wait time for qemu session to generate output
</quote>

But thanks to the qemu command line hint I can see these were not nearly
enough...  (It has been running for > 20 minutes...  and I'm not getting
output...)  Or I've done something really wrong.  Shouldn't qemu be at least
showing something on the terminal by now?  I normally run qemu with different
display options (and my qemu foo is weak) so I'm not sure what I should be
seeing with this command line.

09:06:28 > /home/iweiny/dev/qemu/microblazeel-softmmu/qemu-system-microblazeel
  -M petalogix-s3adsp1800 -m 256 -kernel arch/microblaze/boot/linux.bin
  -no-reboot -initrd /tmp/buildbot-cache/microblazeel/rootfs.cpio -append
  "panic=-1 slub_debug=FZPUA rdinit=/sbin/init console=ttyUL0,115200" -monitor
  none -serial stdio -nographic

Maybe I just have too slow of a machine...  :-/

My qemu was built back in March.  I'm updating that now...

Sorry for being so dense...
Ira

^ permalink raw reply

* Re: [PATCH v2 6/7] powerpc/dt_cpu_ftrs: Add MMA feature
From: Paul A. Clarke @ 2020-05-19 15:49 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Alistair Popple, mikey, linuxppc-dev, npiggin, aneesh.kumar
In-Reply-To: <20200519152855.GN31009@gate.crashing.org>

On Tue, May 19, 2020 at 10:28:55AM -0500, Segher Boessenkool wrote:
> On Tue, May 19, 2020 at 10:22:40AM -0500, Paul A. Clarke wrote:
> > On Tue, May 19, 2020 at 10:05:56AM -0500, Segher Boessenkool wrote:
> > > On Tue, May 19, 2020 at 09:49:22AM -0500, Paul A. Clarke wrote:
> > > > On Tue, May 19, 2020 at 10:31:56AM +1000, Alistair Popple wrote:
> > > > > Matrix multiple accumulate (MMA) is a new feature added to ISAv3.1 and
> > > > 
> > > > nit: "Matrix-Multiply Accelerator".
> > > 
> > > "Matrix-Multiply Assist" in fact :-)
> > 
> > Not according to the ISA (p. 1129).
> 
> There is one mistake in the 3.1 ABI yes, in the description for the PCR
> bit of this same name.  Everything else calls this "assist", like all
> similar things are called as well (whereas an "accelerator" is an
> external device).

I stand corrected. The PCR bit description is in error, and that's what I
was going by.  Section 7.6.1.12 "VSX Matrix-Multiply Assist (MMA)
Instructions" seems to be more definitive.  Thanks, Segher, for correcting
my correction.

Should be "Matrix-Multiply Assist".

:-)

PC

^ permalink raw reply

* Re: [PATCH v2 6/7] powerpc/dt_cpu_ftrs: Add MMA feature
From: Segher Boessenkool @ 2020-05-19 15:28 UTC (permalink / raw)
  To: Paul A. Clarke
  Cc: Alistair Popple, mikey, linuxppc-dev, npiggin, aneesh.kumar
In-Reply-To: <20200519152240.GB24922@oc3272150783.ibm.com>

On Tue, May 19, 2020 at 10:22:40AM -0500, Paul A. Clarke wrote:
> On Tue, May 19, 2020 at 10:05:56AM -0500, Segher Boessenkool wrote:
> > On Tue, May 19, 2020 at 09:49:22AM -0500, Paul A. Clarke wrote:
> > > On Tue, May 19, 2020 at 10:31:56AM +1000, Alistair Popple wrote:
> > > > Matrix multiple accumulate (MMA) is a new feature added to ISAv3.1 and
> > > 
> > > nit: "Matrix-Multiply Accelerator".
> > 
> > "Matrix-Multiply Assist" in fact :-)
> 
> Not according to the ISA (p. 1129).

There is one mistake in the 3.1 ABI yes, in the description for the PCR
bit of this same name.  Everything else calls this "assist", like all
similar things are called as well (whereas an "accelerator" is an
external device).


Segher

^ permalink raw reply

* Re: [PATCH v2 6/7] powerpc/dt_cpu_ftrs: Add MMA feature
From: Paul A. Clarke @ 2020-05-19 15:22 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Alistair Popple, mikey, linuxppc-dev, npiggin, aneesh.kumar
In-Reply-To: <20200519150556.GM31009@gate.crashing.org>

On Tue, May 19, 2020 at 10:05:56AM -0500, Segher Boessenkool wrote:
> On Tue, May 19, 2020 at 09:49:22AM -0500, Paul A. Clarke wrote:
> > On Tue, May 19, 2020 at 10:31:56AM +1000, Alistair Popple wrote:
> > > Matrix multiple accumulate (MMA) is a new feature added to ISAv3.1 and
> > 
> > nit: "Matrix-Multiply Accelerator".
> 
> "Matrix-Multiply Assist" in fact :-)

Not according to the ISA (p. 1129).

PC

^ permalink raw reply

* Re: [PATCH v2 6/7] powerpc/dt_cpu_ftrs: Add MMA feature
From: Segher Boessenkool @ 2020-05-19 15:05 UTC (permalink / raw)
  To: Paul A. Clarke
  Cc: Alistair Popple, mikey, linuxppc-dev, npiggin, aneesh.kumar
In-Reply-To: <20200519144922.GA24922@oc3272150783.ibm.com>

On Tue, May 19, 2020 at 09:49:22AM -0500, Paul A. Clarke wrote:
> On Tue, May 19, 2020 at 10:31:56AM +1000, Alistair Popple wrote:
> > Matrix multiple accumulate (MMA) is a new feature added to ISAv3.1 and
> 
> nit: "Matrix-Multiply Accelerator".

"Matrix-Multiply Assist" in fact :-)


Segher

^ permalink raw reply

* Re: [PATCH v2 6/7] powerpc/dt_cpu_ftrs: Add MMA feature
From: Paul A. Clarke @ 2020-05-19 14:49 UTC (permalink / raw)
  To: Alistair Popple; +Cc: aneesh.kumar, mikey, linuxppc-dev, npiggin
In-Reply-To: <20200519003157.31946-7-alistair@popple.id.au>

On Tue, May 19, 2020 at 10:31:56AM +1000, Alistair Popple wrote:
> Matrix multiple accumulate (MMA) is a new feature added to ISAv3.1 and

nit: "Matrix-Multiply Accelerator".

PC

^ permalink raw reply

* [powerpc:next 108/110] kernel/events/hw_breakpoint.c:216:12: warning: no previous prototype for function 'arch_reserve_bp_slot'
From: kbuild test robot @ 2020-05-19 14:47 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: clang-built-linux, Michael Neuling, kbuild-all, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 3032 bytes --]

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
head:   30df74d67d48949da87e3a5b57c381763e8fd526
commit: 29da4f91c0c1fbda12b8a31be0d564930208c92e [108/110] powerpc/watchpoint: Don't allow concurrent perf and ptrace events
config: x86_64-allyesconfig (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project 135b877874fae96b4372c8a3fbfaa8ff44ff86e3)
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        git checkout 29da4f91c0c1fbda12b8a31be0d564930208c92e
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>, old ones prefixed by <<):

kernel/events/hw_breakpoint.c:71:12: warning: no previous prototype for function 'hw_breakpoint_weight' [-Wmissing-prototypes]
__weak int hw_breakpoint_weight(struct perf_event *bp)
^
kernel/events/hw_breakpoint.c:71:8: note: declare 'static' if the function is not intended to be used outside of this translation unit
__weak int hw_breakpoint_weight(struct perf_event *bp)
^
static
>> kernel/events/hw_breakpoint.c:216:12: warning: no previous prototype for function 'arch_reserve_bp_slot' [-Wmissing-prototypes]
__weak int arch_reserve_bp_slot(struct perf_event *bp)
^
kernel/events/hw_breakpoint.c:216:8: note: declare 'static' if the function is not intended to be used outside of this translation unit
__weak int arch_reserve_bp_slot(struct perf_event *bp)
^
static
>> kernel/events/hw_breakpoint.c:221:13: warning: no previous prototype for function 'arch_release_bp_slot' [-Wmissing-prototypes]
__weak void arch_release_bp_slot(struct perf_event *bp)
^
kernel/events/hw_breakpoint.c:221:8: note: declare 'static' if the function is not intended to be used outside of this translation unit
__weak void arch_release_bp_slot(struct perf_event *bp)
^
static
kernel/events/hw_breakpoint.c:228:13: warning: no previous prototype for function 'arch_unregister_hw_breakpoint' [-Wmissing-prototypes]
__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
^
kernel/events/hw_breakpoint.c:228:8: note: declare 'static' if the function is not intended to be used outside of this translation unit
__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
^
static
4 warnings generated.

vim +/arch_reserve_bp_slot +216 kernel/events/hw_breakpoint.c

   215	
 > 216	__weak int arch_reserve_bp_slot(struct perf_event *bp)
   217	{
   218		return 0;
   219	}
   220	
 > 221	__weak void arch_release_bp_slot(struct perf_event *bp)
   222	{
   223	}
   224	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 73420 bytes --]

^ permalink raw reply

* Re: [PATCH v2 3/5] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier
From: Aneesh Kumar K.V @ 2020-05-19 13:52 UTC (permalink / raw)
  To: Dan Williams; +Cc: alistair, linuxppc-dev, linux-nvdimm
In-Reply-To: <CAPcyv4g+oE305Q5bYWkNBKFifB9c0TZo6+hqFQnqiFqU5QFrhQ@mail.gmail.com>

Dan Williams <dan.j.williams@intel.com> writes:

> On Mon, May 18, 2020 at 10:30 PM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:

...

>> Applications using new instructions will behave as expected when running
>> on P8 and P9. Only future hardware will differentiate between 'dcbf' and
>> 'dcbfps'
>
> Right, this is the problem. Applications using new instructions behave
> as expected, the kernel has been shipping of_pmem and papr_scm for
> several cycles now, you're saying that the DAX applications written
> against those platforms are going to be broken on P8 and P9?

The expecation is that both kernel and userspace would get upgraded to
use the new instruction before actual persistent memory devices are
made available.

>
>> > I'm thinking the kernel
>> > should go as far as to disable DAX operation by default on new
>> > hardware until userspace asserts that it is prepared to switch to the
>> > new implementation. Is there any other way to ensure the forward
>> > compatibility of deployed ppc64 DAX applications?
>>
>> AFAIU there is no released persistent memory hardware on ppc64 platform
>> and we need to make sure before applications get enabled to use these
>> persistent memory devices, they should switch to use the new
>> instruction?
>
> Right, I want the kernel to offer some level of safety here because
> everything you are describing sounds like a flag day conversion. Am I
> misreading? Is there some other gate that prevents existing users of
> of_pmem and papr_scm from having their expectations violated when
> running on P8 / P9 hardware? Maybe there's tighter ecosystem control
> that I'm just not familiar with, I'm only going off the fact that the
> kernel has shipped a non-zero number of NVDIMM drivers that build with
> ARCH=ppc64 for several cycles.

If we are looking at adding changes to kernel that will prevent a kernel
from running on newer hardware in a specific case, we could as well take
the changes to get the kernel use the newer instructions right?

But I agree with your concern that if we have older kernel/applications
that continue to use `dcbf` on future hardware we will end up
having issues w.r.t powerfail consistency. The plan is what you outlined
above as tighter ecosystem control. Considering we don't have a pmem
device generally available, we get both kernel and userspace upgraded
to use these new instructions before such a device is made available.

-aneesh

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox