LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v8 22.5/30] powerpc/optprobes: Add register argument to patch_imm64_load_insns()
From: Michael Ellerman @ 2020-05-16 11:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: christophe.leroy, jniethe5
In-Reply-To: <20200506034050.24806-24-jniethe5@gmail.com>

From: Jordan Niethe <jniethe5@gmail.com>

Currently patch_imm32_load_insns() is used to load an instruction to
r4 to be emulated by emulate_step(). For prefixed instructions we
would like to be able to load a 64bit immediate to r4. To prepare for
this make patch_imm64_load_insns() take an argument that decides which
register to load an immediate to - rather than hardcoding r3.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/optprobes.c | 34 ++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

v8: Split out of patch 23.

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 52c1ab3f85aa..8eea8dbb93fa 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -162,38 +162,38 @@ void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr)
 
 /*
  * Generate instructions to load provided immediate 64-bit value
- * to register 'r3' and patch these instructions at 'addr'.
+ * to register 'reg' and patch these instructions at 'addr'.
  */
-void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr)
+void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
 {
-	/* lis r3,(op)@highest */
+	/* lis reg,(op)@highest */
 	patch_instruction((struct ppc_inst *)addr,
-			  ppc_inst(PPC_INST_ADDIS | ___PPC_RT(3) |
+			  ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) |
 				   ((val >> 48) & 0xffff)));
 	addr++;
 
-	/* ori r3,r3,(op)@higher */
+	/* ori reg,reg,(op)@higher */
 	patch_instruction((struct ppc_inst *)addr,
-			  ppc_inst(PPC_INST_ORI | ___PPC_RA(3) |
-				   ___PPC_RS(3) | ((val >> 32) & 0xffff)));
+			  ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) |
+				   ___PPC_RS(reg) | ((val >> 32) & 0xffff)));
 	addr++;
 
-	/* rldicr r3,r3,32,31 */
+	/* rldicr reg,reg,32,31 */
 	patch_instruction((struct ppc_inst *)addr,
-			  ppc_inst(PPC_INST_RLDICR | ___PPC_RA(3) |
-				   ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31)));
+			  ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) |
+				   ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31)));
 	addr++;
 
-	/* oris r3,r3,(op)@h */
+	/* oris reg,reg,(op)@h */
 	patch_instruction((struct ppc_inst *)addr,
-			  ppc_inst(PPC_INST_ORIS | ___PPC_RA(3) |
-				   ___PPC_RS(3) | ((val >> 16) & 0xffff)));
+			  ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) |
+				   ___PPC_RS(reg) | ((val >> 16) & 0xffff)));
 	addr++;
 
-	/* ori r3,r3,(op)@l */
+	/* ori reg,reg,(op)@l */
 	patch_instruction((struct ppc_inst *)addr,
-			  ppc_inst(PPC_INST_ORI | ___PPC_RA(3) |
-				   ___PPC_RS(3) | (val & 0xffff)));
+			  ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) |
+				   ___PPC_RS(reg) | (val & 0xffff)));
 }
 
 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
@@ -249,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
 	 * Fixup the template with instructions to:
 	 * 1. load the address of the actual probepoint
 	 */
-	patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX);
+	patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
 
 	/*
 	 * 2. branch to optimized_callback() and emulate_step()
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH v8 08/30] powerpc: Use a function for getting the instruction op code
From: Michael Ellerman @ 2020-05-16 11:08 UTC (permalink / raw)
  To: Jordan Niethe, linuxppc-dev
  Cc: Christophe Leroy, Alistair Popple, Nicholas Piggin, Balamuruhan S,
	naveen.n.rao, Daniel Axtens
In-Reply-To: <CACzsE9o0DNZ+fwO4Zh-oUp8B+zMukXAr_bicCi0V5PYcnJO7_A@mail.gmail.com>

Jordan Niethe <jniethe5@gmail.com> writes:
> mpe, as suggested by Christophe could you please add this.

I did that and ...

> diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
> --- a/arch/powerpc/include/asm/inst.h
> +++ b/arch/powerpc/include/asm/inst.h
> @@ -2,6 +2,8 @@
>  #ifndef _ASM_INST_H
>  #define _ASM_INST_H
>
> +#include <asm/disassemble.h>

.. this eventually breaks the build in some driver, because get_ra() is
redefined.

So I've backed out this change for now.

If we want to use the macros in disassemble.h we'll need to namespace
them better, eg. make them ppc_get_ra() and so on.

cheers

>  /*
>   * Instruction data type for POWER
>   */
> @@ -15,7 +17,7 @@ static inline u32 ppc_inst_val(u32 x)
>
>  static inline int ppc_inst_primary_opcode(u32 x)
>  {
> -    return ppc_inst_val(x) >> 26;
> +    return get_op(ppc_inst_val(x));
>  }
>
>  #endif /* _ASM_INST_H */
> -- 
> 2.17.1

^ permalink raw reply

* Re: [PATCH v5 2/2] powerpc/rtas: Implement reentrant rtas call
From: Nicholas Piggin @ 2020-05-16  7:36 UTC (permalink / raw)
  To: Allison Randal, Thiago Jung Bauermann, Benjamin Herrenschmidt,
	Daniel Axtens, Gautham R. Shenoy, Greg Kroah-Hartman,
	Anshuman Khandual, Leonardo Bras, Michael Ellerman, Nadav Amit,
	Nathan Lynch, Paul Mackerras, Thomas Gleixner
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20200516052137.175881-3-leobras.c@gmail.com>

Excerpts from Leonardo Bras's message of May 16, 2020 3:21 pm:
> Implement rtas_call_reentrant() for reentrant rtas-calls:
> "ibm,int-on", "ibm,int-off",ibm,get-xive" and  "ibm,set-xive".
> 
> On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
> items 2 and 3 say:
> 
> 2 - For the PowerPC External Interrupt option: The * call must be
> reentrant to the number of processors on the platform.
> 3 - For the PowerPC External Interrupt option: The * argument call
> buffer for each simultaneous call must be physically unique.
> 
> So, these rtas-calls can be called in a lockless way, if using
> a different buffer for each cpu doing such rtas call.
> 
> For this, it was suggested to add the buffer (struct rtas_args)
> in the PACA struct, so each cpu can have it's own buffer.
> The PACA struct received a pointer to rtas buffer, which is
> allocated in the memory range available to rtas 32-bit.
> 
> Reentrant rtas calls are useful to avoid deadlocks in crashing,
> where rtas-calls are needed, but some other thread crashed holding
> the rtas.lock.
> 
> This is a backtrace of a deadlock from a kdump testing environment:
> 
>   #0 arch_spin_lock
>   #1  lock_rtas ()
>   #2  rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
>   #3  ics_rtas_mask_real_irq (hw_irq=4100)
>   #4  machine_kexec_mask_interrupts
>   #5  default_machine_crash_shutdown
>   #6  machine_crash_shutdown
>   #7  __crash_kexec
>   #8  crash_kexec
>   #9  oops_end
> 
> Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
> ---
>  arch/powerpc/include/asm/paca.h     |  2 ++
>  arch/powerpc/include/asm/rtas.h     |  1 +
>  arch/powerpc/kernel/paca.c          | 20 +++++++++++
>  arch/powerpc/kernel/rtas.c          | 52 +++++++++++++++++++++++++++++
>  arch/powerpc/sysdev/xics/ics-rtas.c | 22 ++++++------
>  5 files changed, 86 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index e3cc9eb9204d..87cd9c2220cc 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -29,6 +29,7 @@
>  #include <asm/hmi.h>
>  #include <asm/cpuidle.h>
>  #include <asm/atomic.h>
> +#include <asm/rtas-types.h>
>  
>  #include <asm-generic/mmiowb_types.h>
>  
> @@ -270,6 +271,7 @@ struct paca_struct {
>  #ifdef CONFIG_MMIOWB
>  	struct mmiowb_state mmiowb_state;
>  #endif
> +	struct rtas_args *reentrant_args;
>  } ____cacheline_aligned;
>  
>  extern void copy_mm_to_paca(struct mm_struct *mm);
> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> index c35c5350b7e4..fa7509c85881 100644
> --- a/arch/powerpc/include/asm/rtas.h
> +++ b/arch/powerpc/include/asm/rtas.h
> @@ -236,6 +236,7 @@ extern struct rtas_t rtas;
>  extern int rtas_token(const char *service);
>  extern int rtas_service_present(const char *service);
>  extern int rtas_call(int token, int, int, int *, ...);
> +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...);
>  void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
>  			int nret, ...);
>  extern void __noreturn rtas_restart(char *cmd);
> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
> index 3f91ccaa9c74..88c9b61489fc 100644
> --- a/arch/powerpc/kernel/paca.c
> +++ b/arch/powerpc/kernel/paca.c
> @@ -16,6 +16,7 @@
>  #include <asm/kexec.h>
>  #include <asm/svm.h>
>  #include <asm/ultravisor.h>
> +#include <asm/rtas.h>
>  
>  #include "setup.h"
>  
> @@ -164,6 +165,23 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
>  
>  #endif /* CONFIG_PPC_BOOK3S_64 */
>  
> +/**
> + * new_rtas_args() - Allocates rtas args
> + * @cpu:	CPU number
> + * @limit:	Memory limit for this allocation
> + *
> + * Allocates a struct rtas_args and return it's pointer.
> + *
> + * Return:	Pointer to allocated rtas_args
> + */
> +static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit)
> +{
> +	limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX);
> +
> +	return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES,
> +			       limit, cpu);
> +}
> +
>  /* The Paca is an array with one entry per processor.  Each contains an
>   * lppaca, which contains the information shared between the
>   * hypervisor and Linux.
> @@ -202,6 +220,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int
>  	/* For now -- if we have threads this will be adjusted later */
>  	new_paca->tcd_ptr = &new_paca->tcd;
>  #endif
> +	new_paca->reentrant_args = NULL;
>  }
>  
>  /* Put the paca pointer into r13 and SPRG_PACA */
> @@ -274,6 +293,7 @@ void __init allocate_paca(int cpu)
>  #ifdef CONFIG_PPC_BOOK3S_64
>  	paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
>  #endif
> +	paca->reentrant_args = new_rtas_args(cpu, limit);

Good, I think tihs should work as you want now. Can you allocate it like 
lppacas? Put it under PSERIES (and in the paca) and check for !HV?

Thanks,
Nick

^ permalink raw reply

* [PATCH v5 2/2] powerpc/rtas: Implement reentrant rtas call
From: Leonardo Bras @ 2020-05-16  5:21 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Nicholas Piggin, Thomas Gleixner, Leonardo Bras, Allison Randal,
	Greg Kroah-Hartman, Thiago Jung Bauermann, Anshuman Khandual,
	Daniel Axtens, Nathan Lynch, Gautham R. Shenoy, Nadav Amit
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20200516052137.175881-1-leobras.c@gmail.com>

Implement rtas_call_reentrant() for reentrant rtas-calls:
"ibm,int-on", "ibm,int-off",ibm,get-xive" and  "ibm,set-xive".

On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
items 2 and 3 say:

2 - For the PowerPC External Interrupt option: The * call must be
reentrant to the number of processors on the platform.
3 - For the PowerPC External Interrupt option: The * argument call
buffer for each simultaneous call must be physically unique.

So, these rtas-calls can be called in a lockless way, if using
a different buffer for each cpu doing such rtas call.

For this, it was suggested to add the buffer (struct rtas_args)
in the PACA struct, so each cpu can have it's own buffer.
The PACA struct received a pointer to rtas buffer, which is
allocated in the memory range available to rtas 32-bit.

Reentrant rtas calls are useful to avoid deadlocks in crashing,
where rtas-calls are needed, but some other thread crashed holding
the rtas.lock.

This is a backtrace of a deadlock from a kdump testing environment:

  #0 arch_spin_lock
  #1  lock_rtas ()
  #2  rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
  #3  ics_rtas_mask_real_irq (hw_irq=4100)
  #4  machine_kexec_mask_interrupts
  #5  default_machine_crash_shutdown
  #6  machine_crash_shutdown
  #7  __crash_kexec
  #8  crash_kexec
  #9  oops_end

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
---
 arch/powerpc/include/asm/paca.h     |  2 ++
 arch/powerpc/include/asm/rtas.h     |  1 +
 arch/powerpc/kernel/paca.c          | 20 +++++++++++
 arch/powerpc/kernel/rtas.c          | 52 +++++++++++++++++++++++++++++
 arch/powerpc/sysdev/xics/ics-rtas.c | 22 ++++++------
 5 files changed, 86 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e3cc9eb9204d..87cd9c2220cc 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -29,6 +29,7 @@
 #include <asm/hmi.h>
 #include <asm/cpuidle.h>
 #include <asm/atomic.h>
+#include <asm/rtas-types.h>
 
 #include <asm-generic/mmiowb_types.h>
 
@@ -270,6 +271,7 @@ struct paca_struct {
 #ifdef CONFIG_MMIOWB
 	struct mmiowb_state mmiowb_state;
 #endif
+	struct rtas_args *reentrant_args;
 } ____cacheline_aligned;
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c35c5350b7e4..fa7509c85881 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -236,6 +236,7 @@ extern struct rtas_t rtas;
 extern int rtas_token(const char *service);
 extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
+int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...);
 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
 			int nret, ...);
 extern void __noreturn rtas_restart(char *cmd);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 3f91ccaa9c74..88c9b61489fc 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -16,6 +16,7 @@
 #include <asm/kexec.h>
 #include <asm/svm.h>
 #include <asm/ultravisor.h>
+#include <asm/rtas.h>
 
 #include "setup.h"
 
@@ -164,6 +165,23 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
 
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
+/**
+ * new_rtas_args() - Allocates rtas args
+ * @cpu:	CPU number
+ * @limit:	Memory limit for this allocation
+ *
+ * Allocates a struct rtas_args and return it's pointer.
+ *
+ * Return:	Pointer to allocated rtas_args
+ */
+static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit)
+{
+	limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX);
+
+	return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES,
+			       limit, cpu);
+}
+
 /* The Paca is an array with one entry per processor.  Each contains an
  * lppaca, which contains the information shared between the
  * hypervisor and Linux.
@@ -202,6 +220,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int
 	/* For now -- if we have threads this will be adjusted later */
 	new_paca->tcd_ptr = &new_paca->tcd;
 #endif
+	new_paca->reentrant_args = NULL;
 }
 
 /* Put the paca pointer into r13 and SPRG_PACA */
@@ -274,6 +293,7 @@ void __init allocate_paca(int cpu)
 #ifdef CONFIG_PPC_BOOK3S_64
 	paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
 #endif
+	paca->reentrant_args = new_rtas_args(cpu, limit);
 	paca_struct_size += sizeof(struct paca_struct);
 }
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c5fa251b8950..6e22eb4fc0e7 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -41,6 +41,7 @@
 #include <asm/time.h>
 #include <asm/mmu.h>
 #include <asm/topology.h>
+#include <asm/paca.h>
 
 /* This is here deliberately so it's only used in this file */
 void enter_rtas(unsigned long);
@@ -483,6 +484,57 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
 }
 EXPORT_SYMBOL(rtas_call);
 
+/**
+ * rtas_call_reentrant() - Used for reentrant rtas calls
+ * @token:	Token for desired reentrant RTAS call
+ * @nargs:	Number of Input Parameters
+ * @nret:	Number of Output Parameters
+ * @outputs:	Array of outputs
+ * @...:	Inputs for desired RTAS call
+ *
+ * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off",
+ * "ibm,get-xive" and "ibm,set-xive" are currently reentrant.
+ * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but
+ * PACA one instead.
+ *
+ * Return:	-1 on error,
+ *		First output value of RTAS call if (nret > 0),
+ *		0 otherwise,
+ */
+
+int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
+{
+	va_list list;
+	struct rtas_args *args;
+	unsigned long flags;
+	int i, ret = 0;
+
+	if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
+		return -1;
+
+	local_irq_save(flags);
+	preempt_disable();
+
+	/* We use the per-cpu (PACA) rtas args buffer */
+	args = local_paca->reentrant_args;
+
+	va_start(list, outputs);
+	va_rtas_call_unlocked(args, token, nargs, nret, list);
+	va_end(list);
+
+	if (nret > 1 && outputs)
+		for (i = 0; i < nret - 1; ++i)
+			outputs[i] = be32_to_cpu(args->rets[i + 1]);
+
+	if (nret > 0)
+		ret = be32_to_cpu(args->rets[0]);
+
+	local_irq_restore(flags);
+	preempt_enable();
+
+	return ret;
+}
+
 /* For RTAS_BUSY (-2), delay for 1 millisecond.  For an extended busy status
  * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds.
  */
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 6aabc74688a6..4cf18000f07c 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -50,8 +50,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
 
 	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
 
-	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
-				DEFAULT_PRIORITY);
+	call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq,
+					  server, DEFAULT_PRIORITY);
 	if (call_status != 0) {
 		printk(KERN_ERR
 			"%s: ibm_set_xive irq %u server %x returned %d\n",
@@ -60,7 +60,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
 	}
 
 	/* Now unmask the interrupt (often a no-op) */
-	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+	call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq);
 	if (call_status != 0) {
 		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
 			__func__, hw_irq, call_status);
@@ -91,7 +91,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
 	if (hw_irq == XICS_IPI)
 		return;
 
-	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+	call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq);
 	if (call_status != 0) {
 		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
 			__func__, hw_irq, call_status);
@@ -99,8 +99,8 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
 	}
 
 	/* Have to set XIVE to 0xff to be able to remove a slot */
-	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
-				xics_default_server, 0xff);
+	call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq,
+					  xics_default_server, 0xff);
 	if (call_status != 0) {
 		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
 			__func__, hw_irq, call_status);
@@ -131,7 +131,7 @@ static int ics_rtas_set_affinity(struct irq_data *d,
 	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
 		return -1;
 
-	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+	status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq);
 
 	if (status) {
 		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
@@ -146,8 +146,8 @@ static int ics_rtas_set_affinity(struct irq_data *d,
 		return -1;
 	}
 
-	status = rtas_call(ibm_set_xive, 3, 1, NULL,
-			   hw_irq, irq_server, xics_status[1]);
+	status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL,
+				     hw_irq, irq_server, xics_status[1]);
 
 	if (status) {
 		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
@@ -179,7 +179,7 @@ static int ics_rtas_map(struct ics *ics, unsigned int virq)
 		return -EINVAL;
 
 	/* Check if RTAS knows about this interrupt */
-	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+	rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq);
 	if (rc)
 		return -ENXIO;
 
@@ -198,7 +198,7 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
 {
 	int rc, status[2];
 
-	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+	rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec);
 	if (rc)
 		return -1;
 	return status[0];
-- 
2.25.4


^ permalink raw reply related

* [PATCH v5 1/2] powerpc/rtas: Move type/struct definitions from rtas.h into rtas-types.h
From: Leonardo Bras @ 2020-05-16  5:21 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Nicholas Piggin, Thomas Gleixner, Leonardo Bras, Allison Randal,
	Greg Kroah-Hartman, Thiago Jung Bauermann, Anshuman Khandual,
	Daniel Axtens, Nathan Lynch, Gautham R. Shenoy, Nadav Amit
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20200516052137.175881-1-leobras.c@gmail.com>

In order to get any rtas* struct into other headers, including rtas.h
may cause a lot of errors, regarding include dependency needed for
inline functions.

Create rtas-types.h and move there all type/struct definitions
from rtas.h, then include rtas-types.h into rtas.h.

Also, as suggested by checkpath.pl, replace uint8_t for u8, and keep
the same type pattern for the whole file, as they are the same
according to powerpc/boot/types.h.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
---
 arch/powerpc/include/asm/rtas-types.h | 126 ++++++++++++++++++++++++++
 arch/powerpc/include/asm/rtas.h       | 118 +-----------------------
 2 files changed, 127 insertions(+), 117 deletions(-)
 create mode 100644 arch/powerpc/include/asm/rtas-types.h

diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h
new file mode 100644
index 000000000000..87354e28f160
--- /dev/null
+++ b/arch/powerpc/include/asm/rtas-types.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_RTAS_TYPES_H
+#define _POWERPC_RTAS_TYPES_H
+#ifdef __KERNEL__
+
+#include <linux/spinlock_types.h>
+
+typedef __be32 rtas_arg_t;
+
+struct rtas_args {
+	__be32 token;
+	__be32 nargs;
+	__be32 nret;
+	rtas_arg_t args[16];
+	rtas_arg_t *rets;     /* Pointer to return values in args[]. */
+};
+
+struct rtas_t {
+	unsigned long entry;		/* physical address pointer */
+	unsigned long base;		/* physical address pointer */
+	unsigned long size;
+	arch_spinlock_t lock;
+	struct rtas_args args;
+	struct device_node *dev;	/* virtual address pointer */
+};
+
+struct rtas_suspend_me_data {
+	atomic_t working; /* number of cpus accessing this struct */
+	atomic_t done;
+	int token; /* ibm,suspend-me */
+	atomic_t error;
+	struct completion *complete; /* wait on this until working == 0 */
+};
+
+struct rtas_error_log {
+	/* Byte 0 */
+	u8		byte0;			/* Architectural version */
+
+	/* Byte 1 */
+	u8		byte1;
+	/* XXXXXXXX
+	 * XXX		3: Severity level of error
+	 *    XX	2: Degree of recovery
+	 *      X	1: Extended log present?
+	 *       XX	2: Reserved
+	 */
+
+	/* Byte 2 */
+	u8		byte2;
+	/* XXXXXXXX
+	 * XXXX		4: Initiator of event
+	 *     XXXX	4: Target of failed operation
+	 */
+	u8		byte3;			/* General event or error*/
+	__be32		extended_log_length;	/* length in bytes */
+	unsigned char	buffer[1];		/* Start of extended log */
+						/* Variable length.      */
+};
+
+/* RTAS general extended event log, Version 6. The extended log starts
+ * from "buffer" field of struct rtas_error_log defined above.
+ */
+struct rtas_ext_event_log_v6 {
+	/* Byte 0 */
+	u8 byte0;
+	/* XXXXXXXX
+	 * X		1: Log valid
+	 *  X		1: Unrecoverable error
+	 *   X		1: Recoverable (correctable or successfully retried)
+	 *    X		1: Bypassed unrecoverable error (degraded operation)
+	 *     X	1: Predictive error
+	 *      X	1: "New" log (always 1 for data returned from RTAS)
+	 *       X	1: Big Endian
+	 *        X	1: Reserved
+	 */
+
+	/* Byte 1 */
+	u8 byte1;			/* reserved */
+
+	/* Byte 2 */
+	u8 byte2;
+	/* XXXXXXXX
+	 * X		1: Set to 1 (indicating log is in PowerPC format)
+	 *  XXX		3: Reserved
+	 *     XXXX	4: Log format used for bytes 12-2047
+	 */
+
+	/* Byte 3 */
+	u8 byte3;			/* reserved */
+	/* Byte 4-11 */
+	u8 reserved[8];			/* reserved */
+	/* Byte 12-15 */
+	__be32  company_id;		/* Company ID of the company	*/
+					/* that defines the format for	*/
+					/* the vendor specific log type	*/
+	/* Byte 16-end of log */
+	u8 vendor_log[1];		/* Start of vendor specific log	*/
+					/* Variable length.		*/
+};
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_errorlog {
+	__be16 id;			/* 0x00 2-byte ASCII section ID	*/
+	__be16 length;			/* 0x02 Section length in bytes	*/
+	u8 version;			/* 0x04 Section version		*/
+	u8 subtype;			/* 0x05 Section subtype		*/
+	__be16 creator_component;	/* 0x06 Creator component ID	*/
+	u8 data[];			/* 0x08 Start of section data	*/
+};
+
+/* RTAS pseries hotplug errorlog section */
+struct pseries_hp_errorlog {
+	u8	resource;
+	u8	action;
+	u8	id_type;
+	u8	reserved;
+	union {
+		__be32	drc_index;
+		__be32	drc_count;
+		struct { __be32 count, index; } ic;
+		char	drc_name[1];
+	} _drc_u;
+};
+
+#endif /* __KERNEL__ */
+#endif /* _POWERPC_RTAS_TYPES_H */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3c1887351c71..c35c5350b7e4 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -5,6 +5,7 @@
 
 #include <linux/spinlock.h>
 #include <asm/page.h>
+#include <asm/rtas-types.h>
 #include <linux/time.h>
 #include <linux/cpumask.h>
 
@@ -42,33 +43,6 @@
  *
  */
 
-typedef __be32 rtas_arg_t;
-
-struct rtas_args {
-	__be32 token;
-	__be32 nargs;
-	__be32 nret; 
-	rtas_arg_t args[16];
-	rtas_arg_t *rets;     /* Pointer to return values in args[]. */
-};  
-
-struct rtas_t {
-	unsigned long entry;		/* physical address pointer */
-	unsigned long base;		/* physical address pointer */
-	unsigned long size;
-	arch_spinlock_t lock;
-	struct rtas_args args;
-	struct device_node *dev;	/* virtual address pointer */
-};
-
-struct rtas_suspend_me_data {
-	atomic_t working; /* number of cpus accessing this struct */
-	atomic_t done;
-	int token; /* ibm,suspend-me */
-	atomic_t error;
-	struct completion *complete; /* wait on this until working == 0 */
-};
-
 /* RTAS event classes */
 #define RTAS_INTERNAL_ERROR		0x80000000 /* set bit 0 */
 #define RTAS_EPOW_WARNING		0x40000000 /* set bit 1 */
@@ -148,31 +122,6 @@ struct rtas_suspend_me_data {
 /* RTAS check-exception vector offset */
 #define RTAS_VECTOR_EXTERNAL_INTERRUPT	0x500
 
-struct rtas_error_log {
-	/* Byte 0 */
-	uint8_t		byte0;			/* Architectural version */
-
-	/* Byte 1 */
-	uint8_t		byte1;
-	/* XXXXXXXX
-	 * XXX		3: Severity level of error
-	 *    XX	2: Degree of recovery
-	 *      X	1: Extended log present?
-	 *       XX	2: Reserved
-	 */
-
-	/* Byte 2 */
-	uint8_t		byte2;
-	/* XXXXXXXX
-	 * XXXX		4: Initiator of event
-	 *     XXXX	4: Target of failed operation
-	 */
-	uint8_t		byte3;			/* General event or error*/
-	__be32		extended_log_length;	/* length in bytes */
-	unsigned char	buffer[1];		/* Start of extended log */
-						/* Variable length.      */
-};
-
 static inline uint8_t rtas_error_severity(const struct rtas_error_log *elog)
 {
 	return (elog->byte1 & 0xE0) >> 5;
@@ -212,47 +161,6 @@ uint32_t rtas_error_extended_log_length(const struct rtas_error_log *elog)
 
 #define RTAS_V6EXT_COMPANY_ID_IBM	(('I' << 24) | ('B' << 16) | ('M' << 8))
 
-/* RTAS general extended event log, Version 6. The extended log starts
- * from "buffer" field of struct rtas_error_log defined above.
- */
-struct rtas_ext_event_log_v6 {
-	/* Byte 0 */
-	uint8_t byte0;
-	/* XXXXXXXX
-	 * X		1: Log valid
-	 *  X		1: Unrecoverable error
-	 *   X		1: Recoverable (correctable or successfully retried)
-	 *    X		1: Bypassed unrecoverable error (degraded operation)
-	 *     X	1: Predictive error
-	 *      X	1: "New" log (always 1 for data returned from RTAS)
-	 *       X	1: Big Endian
-	 *        X	1: Reserved
-	 */
-
-	/* Byte 1 */
-	uint8_t byte1;			/* reserved */
-
-	/* Byte 2 */
-	uint8_t byte2;
-	/* XXXXXXXX
-	 * X		1: Set to 1 (indicating log is in PowerPC format)
-	 *  XXX		3: Reserved
-	 *     XXXX	4: Log format used for bytes 12-2047
-	 */
-
-	/* Byte 3 */
-	uint8_t byte3;			/* reserved */
-	/* Byte 4-11 */
-	uint8_t reserved[8];		/* reserved */
-	/* Byte 12-15 */
-	__be32  company_id;		/* Company ID of the company	*/
-					/* that defines the format for	*/
-					/* the vendor specific log type	*/
-	/* Byte 16-end of log */
-	uint8_t vendor_log[1];		/* Start of vendor specific log	*/
-					/* Variable length.		*/
-};
-
 static
 inline uint8_t rtas_ext_event_log_format(struct rtas_ext_event_log_v6 *ext_log)
 {
@@ -287,16 +195,6 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log)
 #define PSERIES_ELOG_SECT_ID_HOTPLUG		(('H' << 8) | 'P')
 #define PSERIES_ELOG_SECT_ID_MCE		(('M' << 8) | 'C')
 
-/* Vendor specific Platform Event Log Format, Version 6, section header */
-struct pseries_errorlog {
-	__be16 id;			/* 0x00 2-byte ASCII section ID	*/
-	__be16 length;			/* 0x02 Section length in bytes	*/
-	uint8_t version;		/* 0x04 Section version		*/
-	uint8_t subtype;		/* 0x05 Section subtype		*/
-	__be16 creator_component;	/* 0x06 Creator component ID	*/
-	uint8_t data[];			/* 0x08 Start of section data	*/
-};
-
 static
 inline uint16_t pseries_errorlog_id(struct pseries_errorlog *sect)
 {
@@ -309,20 +207,6 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect)
 	return be16_to_cpu(sect->length);
 }
 
-/* RTAS pseries hotplug errorlog section */
-struct pseries_hp_errorlog {
-	u8	resource;
-	u8	action;
-	u8	id_type;
-	u8	reserved;
-	union {
-		__be32	drc_index;
-		__be32	drc_count;
-		struct { __be32 count, index; } ic;
-		char	drc_name[1];
-	} _drc_u;
-};
-
 #define PSERIES_HP_ELOG_RESOURCE_CPU	1
 #define PSERIES_HP_ELOG_RESOURCE_MEM	2
 #define PSERIES_HP_ELOG_RESOURCE_SLOT	3
-- 
2.25.4


^ permalink raw reply related

* [PATCH v5 0/2] Implement reentrant rtas call
From: Leonardo Bras @ 2020-05-16  5:21 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Allison Randal, Greg Kroah-Hartman, Thomas Gleixner,
	Nicholas Piggin, Leonardo Bras, Nathan Lynch, Gautham R. Shenoy,
	Nadav Amit
  Cc: linuxppc-dev, linux-kernel

Patch 2 implement rtas_call_reentrant() for reentrant rtas-calls:
"ibm,int-on", "ibm,int-off",ibm,get-xive" and  "ibm,set-xive",
according to LoPAPR Version 1.1 (March 24, 2016).

For that, it's necessary that every call uses a different
rtas buffer (rtas_args). Paul Mackerras suggested using the PACA
structure for creating a per-cpu buffer for these calls.

Patch 1 was necessary to make PACA have a 'struct rtas_args' member.

Reentrant rtas calls can be useful to avoid deadlocks in crashing,
where rtas-calls are needed, but some other thread crashed holding
the rtas.lock.

This is a backtrace of a deadlock from a kdump testing environment:

  #0 arch_spin_lock
  #1  lock_rtas () 
  #2  rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
  #3  ics_rtas_mask_real_irq (hw_irq=4100) 
  #4  machine_kexec_mask_interrupts
  #5  default_machine_crash_shutdown
  #6  machine_crash_shutdown 
  #7  __crash_kexec
  #8  crash_kexec
  #9  oops_end

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>

---
Changes since v4:
- Insted of having the full buffer on PACA, adds only a pointer and
  allocate it during allocate_paca(), making sure it's in a memory
  range available for RTAS (32-bit). (Thanks Nick Piggin!)

Changes since v3:
- Adds protection from preemption and interruption

Changes since v2:
- Fixed build failure from ppc64e, by including spinlock_types.h on 
  rtas-types.h
- Improved commit messages

Changes since v1:
- Moved buffer from stack to PACA (as suggested by Paul Mackerras)
- Added missing output bits
- Improve documentation following kernel-doc format (as suggested by
  Nathan Lynch)


Leonardo Bras (2):
  powerpc/rtas: Move type/struct definitions from rtas.h into
    rtas-types.h
  powerpc/rtas: Implement reentrant rtas call

 arch/powerpc/include/asm/paca.h       |   2 +
 arch/powerpc/include/asm/rtas-types.h | 126 ++++++++++++++++++++++++++
 arch/powerpc/include/asm/rtas.h       | 119 +-----------------------
 arch/powerpc/kernel/rtas.c            |  42 +++++++++
 arch/powerpc/sysdev/xics/ics-rtas.c   |  22 ++---
 5 files changed, 183 insertions(+), 128 deletions(-)
 create mode 100644 arch/powerpc/include/asm/rtas-types.h

-- 
2.25.4


^ permalink raw reply

* Re: [PATCH v4 2/2] powerpc/rtas: Implement reentrant rtas call
From: Leonardo Bras @ 2020-05-16  4:08 UTC (permalink / raw)
  To: Nicholas Piggin, Allison Randal, Benjamin Herrenschmidt,
	Gautham R. Shenoy, Greg Kroah-Hartman, Michael Ellerman,
	Nadav Amit, Nathan Lynch, Paul Mackerras, Thomas Gleixner
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1589525800.2asfsw2zlu.astroid@bobo.none>

Hello Nick,

On Fri, 2020-05-15 at 17:30 +1000, Nicholas Piggin wrote:
> Excerpts from Leonardo Bras's message of May 15, 2020 9:51 am:
> > Implement rtas_call_reentrant() for reentrant rtas-calls:
> > "ibm,int-on", "ibm,int-off",ibm,get-xive" and  "ibm,set-xive".
> > 
> > On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
> > items 2 and 3 say:
> > 
> > 2 - For the PowerPC External Interrupt option: The * call must be
> > reentrant to the number of processors on the platform.
> > 3 - For the PowerPC External Interrupt option: The * argument call
> > buffer for each simultaneous call must be physically unique.
> > 
> > So, these rtas-calls can be called in a lockless way, if using
> > a different buffer for each cpu doing such rtas call.
> 
> What about rtas_call_unlocked? Do the callers need to take the rtas 
> lock?
> 
> Machine checks must call ibm,nmi-interlock too, which we really don't 
> want to take a lock for either. Hopefully that's in a class of its own
> and we can essentially ignore with respect to other rtas calls.
> 
> The spec is pretty vague too :(
> 
> "The ibm,get-xive call must be reentrant to the number of processors on 
> the platform."
> 
> This suggests ibm,get-xive can be called concurrently by multiple
> processors. It doesn't say anything about being re-entrant against any 
> of the other re-entrant calls. Maybe that could be reasonably assumed,
> but I don't know if it's reasonable to assume it can be called 
> concurrently with a *non-reentrant* call, is it?

This was discussed on a previous version of the patchset:

https://lore.kernel.org/linuxppc-dev/875zcy2v8o.fsf@linux.ibm.com/

He checked with partition firmware development and these calls can be
used concurrently with arbitrary other RTAS calls.

> 
> > For this, it was suggested to add the buffer (struct rtas_args)
> > in the PACA struct, so each cpu can have it's own buffer.
> 
> You can't do this, paca is not limited to RTAS_INSTANTIATE_MAX.
> Which is good, because I didn't want you to add another 88 bytes to the 
> paca :) Can you make it a pointer and allocate it separately? Check
> the slb_shadow allocation, you could use a similar pattern.

Sure, I will send the next version with this change.

> 
> The other option would be to have just one more rtas args, and have the 
> crashing CPU always that. That would skirt the re-entrancy issue -- the
> concurrency is only ever a last resort. Would be a bit tricker though.

It seems a good idea, but I would like to try the previous alternative
first.

> Thanks,
> Nick

Thank you Nick! 


^ permalink raw reply

* Re: powerpc/pci: [PATCH 1/1 V2] PCIE PHB reset
From: Gustavo Romero @ 2020-05-15 22:02 UTC (permalink / raw)
  To: wenxiong, linuxppc-dev; +Cc: brking, oohall, wenxiong
In-Reply-To: <1589573097-12892-1-git-send-email-wenxiong@linux.vnet.ibm.com>

Hi Xiong,

On 5/15/20 5:04 PM, wenxiong@linux.vnet.ibm.com wrote:
> From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
> 
> Several device drivers hit EEH(Extended Error handling) when triggering
> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> in pci general code. PHB reset stop all PCI transactions from previous
> kernel. We have tested the patch in several enviroments:

What do you mean exactly by "previous kernel" in here? Is there a way to
enhance that comment a bit further?


Thanks,
Gustavo

^ permalink raw reply

* powerpc/pci: [PATCH 1/1 V2] PCIE PHB reset
From: wenxiong @ 2020-05-15 20:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: brking, Wen Xiong, oohall, wenxiong

From: Wen Xiong <wenxiong@linux.vnet.ibm.com>

Several device drivers hit EEH(Extended Error handling) when triggering
kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
in pci general code. PHB reset stop all PCI transactions from previous
kernel. We have tested the patch in several enviroments:
- direct slot adapters
- adapters under the switch
- a VF adapter in PowerVM
- a VF adapter/adapter in KVM guest.

Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>

---
 arch/powerpc/platforms/pseries/pci.c | 152 +++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 911534b89c85..cb7e4276cf04 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/string.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
 
 #include <asm/eeh.h>
 #include <asm/pci-bridge.h>
@@ -354,3 +356,153 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 
 	return 0;
 }
+
+/**
+ * pseries_get_pdn_addr - Retrieve PHB address
+ * @pe: EEH PE
+ *
+ * Retrieve the assocated PHB address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ */
+static int pseries_get_pdn_addr(struct pci_controller *phb)
+{
+	int ret = -1;
+	int rets[3];
+	int ibm_get_config_addr_info;
+	int ibm_get_config_addr_info2;
+	int config_addr = 0;
+	struct pci_dn *root_pdn, *pdn;
+
+	ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
+	ibm_get_config_addr_info    = rtas_token("ibm,get-config-addr-info");
+
+	root_pdn = PCI_DN(phb->dn);
+	pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
+	config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+		/*
+		 * First of all, we need to make sure there has one PE
+		 * associated with the device. If option is 1, it
+		 * queries if config address is supported in a PE or not.
+		 * If option is 0, it returns PE config address or config
+		 * address for the PE primary bus.
+		 */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 1);
+		if (ret || (rets[0] == 0)) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n",
+				__func__, pdn->phb->global_number, 1, rets[0]);
+			return -1;
+		}
+
+		/* Retrieve the associated PE config address */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# option=%d config_addr=%x\n",
+				__func__, pdn->phb->global_number, 0, rets[0]);
+			return -1;
+		}
+		return rets[0];
+	}
+
+	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 0);
+		if (ret || rets[0]) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# config_addr=%x\n",
+				__func__, pdn->phb->global_number, rets[0]);
+			return -1;
+		}
+		return rets[0];
+	}
+
+	return ret;
+}
+
+static int __init pseries_phb_reset(void)
+{
+	struct pci_controller *phb;
+	int config_addr;
+	int ibm_set_slot_reset;
+	int ibm_configure_pe;
+	int ret;
+
+	if (is_kdump_kernel() || reset_devices) {
+		pr_info("Issue PHB reset ...\n");
+		ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+		ibm_configure_pe = rtas_token("ibm,configure-pe");
+
+		if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
+				ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+			pr_info("%s: EEH functionality not supported\n",
+				__func__);
+		}
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
+
+			/* If fundamental-reset not supported, try hot-reset */
+			if (ret == -8)
+				ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+					config_addr, BUID_HI(phb->buid),
+					BUID_LO(phb->buid), EEH_RESET_HOT);
+
+			if (ret) {
+				pr_err("%s: PHB#%x-PE# failed with rtas_call activate reset=%d\n",
+					__func__, phb->global_number, ret);
+				continue;
+			}
+		}
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
+			if (ret) {
+				pr_err("%s: PHB#%x-PE# failed with rtas_call deactive reset=%d\n",
+					__func__, phb->global_number, ret);
+				continue;
+			}
+		}
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid));
+			if (ret) {
+				pr_err("%s: PHB#%x-PE# failed with rtas_call configure_pe =%d\n",
+					__func__, phb->global_number, ret);
+				continue;
+			}
+		}
+	}
+
+	return 0;
+}
+machine_postcore_initcall(pseries, pseries_phb_reset);
+
-- 
2.18.1


^ permalink raw reply related

* Re: [PATCH v2 00/12] mm: consolidate definitions of page table accessors
From: Andrew Morton @ 2020-05-15 21:12 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: linux-m68k, Rich Felker, linux-ia64, linux-sh, Catalin Marinas,
	Heiko Carstens, linux-mips, Max Filippov, Guo Ren, Matthew Wilcox,
	sparclinux, linux-hexagon, linux-riscv, Vincent Chen, Will Deacon,
	Greg Ungerer, linux-arch, linux-s390, linux-c6x-dev, Brian Cain,
	Helge Deller, x86, Russell King, Ley Foon Tan, Mike Rapoport,
	Ingo Molnar, Geert Uytterhoeven, linux-parisc, Mark Salter,
	Matt Turner, linux-snps-arc, linux-xtensa, Arnd Bergmann,
	linux-alpha, linux-um, Tony Luck, Borislav Petkov, Greentime Hu,
	Paul Walmsley, Stafford Horne, linux-csky, Guan Xuetao,
	linux-arm-kernel, Chris Zankel, Michal Simek, Thomas Bogendoerfer,
	Yoshinori Sato, Nick Hu, linux-mm, Vineet Gupta, linux-kernel,
	openrisc, Thomas Gleixner, Richard Weinberger, linuxppc-dev,
	David S. Miller
In-Reply-To: <20200514170327.31389-1-rppt@kernel.org>

On Thu, 14 May 2020 20:03:15 +0300 Mike Rapoport <rppt@kernel.org> wrote:

> The low level page table accessors (pXY_index(), pXY_offset()) are
> duplicated across all architectures and sometimes more than once. For
> instance, we have 31 definition of pgd_offset() for 25 supported
> architectures.
> 
> Most of these definitions are actually identical and typically it boils
> down to, e.g. 
> 
> static inline unsigned long pmd_index(unsigned long address)
> {
>         return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
> }
> 
> static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
> {
>         return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
> }
> 
> These definitions can be shared among 90% of the arches provided XYZ_SHIFT,
> PTRS_PER_XYZ and xyz_page_vaddr() are defined.
> 
> For architectures that really need a custom version there is always
> possibility to override the generic version with the usual ifdefs magic.
> 
> These patches introduce include/linux/pgtable.h that replaces
> include/asm-generic/pgtable.h and add the definitions of the page table
> accessors to the new header.

hm,

>  712 files changed, 684 insertions(+), 2021 deletions(-)

big!

There's a lot of stuff going on at present (I suspect everyone is
sitting at home coding up a storm).  However this all merged up fairly
cleanly, haven't tried compiling it yet.

^ permalink raw reply

* Re: [PATCH v4 5/6] pstore/ram: Introduce max_reason and convert dump_oops
From: Kees Cook @ 2020-05-15 19:53 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Jonathan Corbet, Anton Vorontsov,
	Linux Doc Mailing List, LKML, Steven Rostedt, Sergey Senozhatsky,
	devicetree, Rob Herring, Paul Mackerras, Colin Cross,
	Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <CA+CK2bCbAb1EN6xa9a-DRfan6Cv3YgZgPJ1buwUaej7jBRv=Kg@mail.gmail.com>

On Fri, May 15, 2020 at 03:40:14PM -0400, Pavel Tatashin wrote:
>  pdata.dump_oops = dump_oops;
> > +       /* If "max_reason" is set, its value has priority over "dump_oops". */
> > +       if (ramoops_max_reason != -1)
> > +               pdata.max_reason = ramoops_max_reason;
> 
>  (ramoops_max_reason >= 0) might make more sense here, we do not want
> negative max_reason even if it was provided by the user.

Yeah, that's a good point. I'll tweak that. Thanks!

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH v4 5/6] pstore/ram: Introduce max_reason and convert dump_oops
From: Kees Cook @ 2020-05-15 19:48 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Jonathan Corbet, Anton Vorontsov,
	Linux Doc Mailing List, LKML, Steven Rostedt, Sergey Senozhatsky,
	devicetree, Rob Herring, Paul Mackerras, Colin Cross,
	Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <CA+CK2bAvTo1=oLH32-Wdz07F3OP=T+gA6GgzkBH1Q9W8upHkNg@mail.gmail.com>

On Fri, May 15, 2020 at 03:30:27PM -0400, Pavel Tatashin wrote:
> >  #define parse_u32(name, field, default_value) {                                \
> >                 ret = ramoops_parse_dt_u32(pdev, name, default_value,   \
> 
> The series seems to be missing the patch where ramoops_parse_dt_size
> -> ramoops_parse_dt_u32 get renamed, and updated to handle default
> value.

Oops! Sorry, I cut the line in the wrong place for sending out the delta
on top of the pstore tree. :)

It's unchanged from:
https://lore.kernel.org/lkml/20200506211523.15077-4-keescook@chromium.org/

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH v4 5/6] pstore/ram: Introduce max_reason and convert dump_oops
From: Pavel Tatashin @ 2020-05-15 19:40 UTC (permalink / raw)
  To: Kees Cook
  Cc: Petr Mladek, Tony Luck, Jonathan Corbet, Anton Vorontsov,
	Linux Doc Mailing List, LKML, Steven Rostedt, Sergey Senozhatsky,
	devicetree, Rob Herring, Paul Mackerras, Colin Cross,
	Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-6-keescook@chromium.org>

 pdata.dump_oops = dump_oops;
> +       /* If "max_reason" is set, its value has priority over "dump_oops". */
> +       if (ramoops_max_reason != -1)
> +               pdata.max_reason = ramoops_max_reason;

 (ramoops_max_reason >= 0) might make more sense here, we do not want
negative max_reason even if it was provided by the user.

Otherwise the series looks good.

Thank you,
Pasha

^ permalink raw reply

* Re: [PATCH v4 5/6] pstore/ram: Introduce max_reason and convert dump_oops
From: Pavel Tatashin @ 2020-05-15 19:30 UTC (permalink / raw)
  To: Kees Cook
  Cc: Petr Mladek, Tony Luck, Jonathan Corbet, Anton Vorontsov,
	Linux Doc Mailing List, LKML, Steven Rostedt, Sergey Senozhatsky,
	devicetree, Rob Herring, Paul Mackerras, Colin Cross,
	Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-6-keescook@chromium.org>

>  #define parse_u32(name, field, default_value) {                                \
>                 ret = ramoops_parse_dt_u32(pdev, name, default_value,   \

The series seems to be missing the patch where ramoops_parse_dt_size
-> ramoops_parse_dt_u32 get renamed, and updated to handle default
value.

^ permalink raw reply

* Re: [PATCH v4 3/6] printk: Introduce kmsg_dump_reason_str()
From: Pavel Tatashin @ 2020-05-15 19:23 UTC (permalink / raw)
  To: Kees Cook
  Cc: Petr Mladek, Tony Luck, Jonathan Corbet, Anton Vorontsov,
	Linux Doc Mailing List, LKML, Steven Rostedt, Sergey Senozhatsky,
	devicetree, Rob Herring, Paul Mackerras, Colin Cross,
	Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-4-keescook@chromium.org>

On Fri, May 15, 2020 at 2:44 PM Kees Cook <keescook@chromium.org> wrote:
>
> The pstore subsystem already had a private version of this function.
> With the coming addition of the pstore/zone driver, this needs to be
> shared. As it really should live with printk, move it there instead.
>
> Link: https://lore.kernel.org/lkml/20200510202436.63222-8-keescook@chromium.org/
> Acked-by: Petr Mladek <pmladek@suse.com>
> Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>

Reviewed-by: Pavel Tatashin <pasha.tatashin@soleen.com>

^ permalink raw reply

* Re: [PATCH] tty: hvc: Fix data abort due to race in hvc_open
From: rananta @ 2020-05-15 19:21 UTC (permalink / raw)
  To: Greg KH; +Cc: linuxppc-dev, andrew, Jiri Slaby, linux-kernel
In-Reply-To: <20200515073041.GB1361563@kroah.com>

On 2020-05-15 00:30, Greg KH wrote:
> On Thu, May 14, 2020 at 04:22:10PM -0700, rananta@codeaurora.org wrote:
>> On 2020-05-13 00:04, Greg KH wrote:
>> > On Tue, May 12, 2020 at 02:39:50PM -0700, rananta@codeaurora.org wrote:
>> > > On 2020-05-12 01:25, Greg KH wrote:
>> > > > On Tue, May 12, 2020 at 09:22:15AM +0200, Jiri Slaby wrote:
>> > > > > On 11. 05. 20, 9:39, Greg KH wrote:
>> > > > > > On Mon, May 11, 2020 at 12:23:58AM -0700, rananta@codeaurora.org wrote:
>> > > > > >> On 2020-05-09 23:48, Greg KH wrote:
>> > > > > >>> On Sat, May 09, 2020 at 06:30:56PM -0700, rananta@codeaurora.org wrote:
>> > > > > >>>> On 2020-05-06 02:48, Greg KH wrote:
>> > > > > >>>>> On Mon, Apr 27, 2020 at 08:26:01PM -0700, Raghavendra Rao Ananta wrote:
>> > > > > >>>>>> Potentially, hvc_open() can be called in parallel when two tasks calls
>> > > > > >>>>>> open() on /dev/hvcX. In such a scenario, if the
>> > > > > >>>>>> hp->ops->notifier_add()
>> > > > > >>>>>> callback in the function fails, where it sets the tty->driver_data to
>> > > > > >>>>>> NULL, the parallel hvc_open() can see this NULL and cause a memory
>> > > > > >>>>>> abort.
>> > > > > >>>>>> Hence, serialize hvc_open and check if tty->private_data is NULL
>> > > > > >>>>>> before
>> > > > > >>>>>> proceeding ahead.
>> > > > > >>>>>>
>> > > > > >>>>>> The issue can be easily reproduced by launching two tasks
>> > > > > >>>>>> simultaneously
>> > > > > >>>>>> that does nothing but open() and close() on /dev/hvcX.
>> > > > > >>>>>> For example:
>> > > > > >>>>>> $ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &
>> > > > > >>>>>>
>> > > > > >>>>>> Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
>> > > > > >>>>>> ---
>> > > > > >>>>>>  drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
>> > > > > >>>>>>  1 file changed, 14 insertions(+), 2 deletions(-)
>> > > > > >>>>>>
>> > > > > >>>>>> diff --git a/drivers/tty/hvc/hvc_console.c
>> > > > > >>>>>> b/drivers/tty/hvc/hvc_console.c
>> > > > > >>>>>> index 436cc51c92c3..ebe26fe5ac09 100644
>> > > > > >>>>>> --- a/drivers/tty/hvc/hvc_console.c
>> > > > > >>>>>> +++ b/drivers/tty/hvc/hvc_console.c
>> > > > > >>>>>> @@ -75,6 +75,8 @@ static LIST_HEAD(hvc_structs);
>> > > > > >>>>>>   */
>> > > > > >>>>>>  static DEFINE_MUTEX(hvc_structs_mutex);
>> > > > > >>>>>>
>> > > > > >>>>>> +/* Mutex to serialize hvc_open */
>> > > > > >>>>>> +static DEFINE_MUTEX(hvc_open_mutex);
>> > > > > >>>>>>  /*
>> > > > > >>>>>>   * This value is used to assign a tty->index value to a hvc_struct
>> > > > > >>>>>> based
>> > > > > >>>>>>   * upon order of exposure via hvc_probe(), when we can not match it
>> > > > > >>>>>> to
>> > > > > >>>>>> @@ -346,16 +348,24 @@ static int hvc_install(struct tty_driver
>> > > > > >>>>>> *driver, struct tty_struct *tty)
>> > > > > >>>>>>   */
>> > > > > >>>>>>  static int hvc_open(struct tty_struct *tty, struct file * filp)
>> > > > > >>>>>>  {
>> > > > > >>>>>> -	struct hvc_struct *hp = tty->driver_data;
>> > > > > >>>>>> +	struct hvc_struct *hp;
>> > > > > >>>>>>  	unsigned long flags;
>> > > > > >>>>>>  	int rc = 0;
>> > > > > >>>>>>
>> > > > > >>>>>> +	mutex_lock(&hvc_open_mutex);
>> > > > > >>>>>> +
>> > > > > >>>>>> +	hp = tty->driver_data;
>> > > > > >>>>>> +	if (!hp) {
>> > > > > >>>>>> +		rc = -EIO;
>> > > > > >>>>>> +		goto out;
>> > > > > >>>>>> +	}
>> > > > > >>>>>> +
>> > > > > >>>>>>  	spin_lock_irqsave(&hp->port.lock, flags);
>> > > > > >>>>>>  	/* Check and then increment for fast path open. */
>> > > > > >>>>>>  	if (hp->port.count++ > 0) {
>> > > > > >>>>>>  		spin_unlock_irqrestore(&hp->port.lock, flags);
>> > > > > >>>>>>  		hvc_kick();
>> > > > > >>>>>> -		return 0;
>> > > > > >>>>>> +		goto out;
>> > > > > >>>>>>  	} /* else count == 0 */
>> > > > > >>>>>>  	spin_unlock_irqrestore(&hp->port.lock, flags);
>> > > > > >>>>>
>> > > > > >>>>> Wait, why isn't this driver just calling tty_port_open() instead of
>> > > > > >>>>> trying to open-code all of this?
>> > > > > >>>>>
>> > > > > >>>>> Keeping a single mutext for open will not protect it from close, it will
>> > > > > >>>>> just slow things down a bit.  There should already be a tty lock held by
>> > > > > >>>>> the tty core for open() to keep it from racing things, right?
>> > > > > >>>> The tty lock should have been held, but not likely across
>> > > > > >>>> ->install() and
>> > > > > >>>> ->open() callbacks, thus resulting in a race between hvc_install() and
>> > > > > >>>> hvc_open(),
>> > > > > >>>
>> > > > > >>> How?  The tty lock is held in install, and should not conflict with
>> > > > > >>> open(), otherwise, we would be seeing this happen in all tty drivers,
>> > > > > >>> right?
>> > > > > >>>
>> > > > > >> Well, I was expecting the same, but IIRC, I see that the open() was being
>> > > > > >> called in parallel for the same device node.
>> > > > > >
>> > > > > > So open and install are happening at the same time?  And the tty_lock()
>> > > > > > does not protect the needed fields from being protected properly?  If
>> > > > > > not, what fields are being touched without the lock?
>> > > > > >
>> > > > > >> Is it expected that the tty core would allow only one thread to
>> > > > > >> access the dev-node, while blocking the other, or is it the client
>> > > > > >> driver's responsibility to handle the exclusiveness?
>> > > > > >
>> > > > > > The tty core should handle this correctly, for things that can mess
>> > > > > > stuff up (like install and open at the same time).  A driver should not
>> > > > > > have to worry about that.
>> > > > > >
>> > > > > >>>> where hvc_install() sets a data and the hvc_open() clears it.
>> > > > > >>>> hvc_open()
>> > > > > >>>> doesn't
>> > > > > >>>> check if the data was set to NULL and proceeds.
>> > > > > >>>
>> > > > > >>> What data is being set that hvc_open is checking?
>> > > > > >> hvc_install sets tty->private_data to hp, while hvc_open sets it to NULL (in
>> > > > > >> one of the paths).
>> > > > > >
>> > > > > > I see no use of private_data in drivers/tty/hvc/ so what exactly are you
>> > > > > > referring to?
>> > > > >
>> > > > > He likely means tty->driver_data. And there exactly lays the issue.
>> > > > >
>> > > > > commit bdb498c20040616e94b05c31a0ceb3e134b7e829
>> > > > > Author: Jiri Slaby <jslaby@suse.cz>
>> > > > > Date:   Tue Aug 7 21:48:04 2012 +0200
>> > > > >
>> > > > >     TTY: hvc_console, add tty install
>> > > > >
>> > > > > added hvc_install but did not move 'tty->driver_data = NULL;' from
>> > > > > hvc_open's fail path to hvc_cleanup.
>> > > > >
>> > > > > IOW hvc_open now NULLs tty->driver_data even for another task which
>> > > > > opened the tty earlier. The same holds for
>> > > > > "tty_port_tty_set(&hp->port,
>> > > > > NULL);" there. And actually "tty_port_put(&hp->port);" is also
>> > > > > incorrect
>> > > > > for the 2nd task opening the tty.
>> > > > >
>> > > > > So, a mutex with tty->driver_data check in open is not definitely the
>> > > > > way to go. This mess needs to be sorted out properly. Sure, a good
>> > > > > start
>> > > > > would be a conversion to tty_port_open. Right after dropping "tty:
>> > > > > hvc:
>> > > > > Fix data abort due to race in hvc_open" from tty/tty-next :).
>> > > >
>> > > > I've now reverted this commit so we can start from a "clean" place.
>> > > >
>> > > > > What I *don't* understand is why hp->ops->notifier_add fails, given
>> > > > > the
>> > > > > open does not allow multiple opens anyway?
>> > > >
>> > > > I don't understand that either.  Raghavendra, can you show a real trace
>> > > > for this issue that shows this?
>> > > >
>> > > Let me know if this helps:
>> > >
>> > > [  265.332900] Unable to handle kernel NULL pointer dereference at
>> > > virtual
>> > > address 00000000000000a8
>> > > [  265.332920] Mem abort info:
>> > > [  265.332934]   ESR = 0x96000006
>> > > [  265.332950]   EC = 0x25: DABT (current EL), IL = 32 bits
>> > > [  265.332963]   SET = 0, FnV = 0
>> > > [  265.332975]   EA = 0, S1PTW = 0
>> > > [  265.332985] Data abort info:
>> > > [  265.332997]   ISV = 0, ISS = 0x00000006
>> > > [  265.333008]   CM = 0, WnR = 0
>> > > [  265.333025] user pgtable: 4k pages, 39-bit VAs,
>> > > pgdp=00000001620f3000
>> > > [  265.333038] [00000000000000a8] pgd=00000001620f2003,
>> > > pud=00000001620f2003, pmd=0000000000000000
>> > > [  265.333071] Internal error: Oops: 96000006 [#1] PREEMPT SMP
>> > > [  265.333424] CPU: 1 PID: 5653 Comm: stress-ng-dev Tainted: G S
>> > > W  O
>> > > 5.4.12-g04866e0 #1
>> > > [  265.333458] pstate: 80400085 (Nzcv daIf +PAN -UAO)
>> > > [  265.333499] pc : _raw_spin_lock_irqsave+0x40/0x7c
>> > > [  265.333517] lr : _raw_spin_lock_irqsave+0x38/0x7c
>> > > [  265.333530] sp : ffffffc02436ba40
>> > > [  265.333542] x29: ffffffc02436ba40 x28: 0000000000020800
>> > > [  265.333562] x27: ffffffdfb4046490 x26: ffffff8101b83400
>> > > [  265.333580] x25: ffffff80e163ad00 x24: ffffffdfb45c7798
>> > > [  265.333598] x23: ffffff8101b83668 x22: ffffffdfb4974000
>> > > [  265.333617] x21: 0000000000000001 x20: 00000000000000a8
>> > > [  265.333634] x19: 0000000000000000 x18: ffffff80e0b0d460
>> > > [  265.333652] x17: 0000000000000000 x16: 0000000001000000
>> > > [  265.333670] x15: 0000000001000000 x14: 00000000f8000000
>> > > [  265.333688] x13: 0000000000000000 x12: 0000000000000001
>> > > [  265.333706] x11: 17f5f16765f64600 x10: 17f5f16765f64600
>> > > [  265.333724] x9 : ffffffdfb3444244 x8 : 0000000000000000
>> > > [  265.333741] x7 : 0000000000000000 x6 : 0000000000000000
>> > > [  265.333759] x5 : 0000000000000000 x4 : 0000000000000002
>> > > [  265.333776] x3 : ffffffc02436b9c0 x2 : ffffffdfb40456e0
>> > > [  265.333794] x1 : ffffffc02436b9c0 x0 : ffffffdfb3444244
>> > > [  265.333812] Call trace:
>> > > [  265.333831]  _raw_spin_lock_irqsave+0x40/0x7c
>> > > [  265.333859]  28$61deaf328f140fd7df47c115ec866fa5+0x28/0x174
>> > > [  265.333882]  tty_open$86bd494905ebe22944bf63b711173de3+0x3d0/0x584
>> > > [  265.333921]
>> > > chrdev_open$4083aaa799bca8e0e1e0c8dc1947aa96+0x1c4/0x248
>> > > [  265.333940]  do_dentry_open+0x258/0x3b0
>> > > [  265.333956]  vfs_open+0x2c/0x38
>> > > [  265.333975]  path_openat+0x898/0xedc
>> > > [  265.333991]  do_filp_open+0x78/0x124
>> > > [  265.334006]  do_sys_open+0x13c/0x298
>> > > [  265.334022]  __arm64_sys_openat+0x28/0x34
>> > > [  265.334044]  el0_svc_common+0xb8/0x1b4
>> > > [  265.334059]  el0_svc_handler+0x6c/0x88
>> > > [  265.334079]  el0_svc+0x8/0xc
>> > > [  265.334110] Code: 52800035 97b9fec7 aa1f03e8 f9800291 (885ffe81)
>> > > [  265.334130] ---[ end trace ac90e3099a98e99f ]---
>> > > [  265.334146] Kernel panic - not syncing: Fatal exception
>> >
>> > Hm, do you have a strace showing the close happening at the same time?
>> > What about install()?
>> 
>> Yes, I do see the close happening, at which point the issue is not 
>> seen.
>> It's only seen if the second task came in before this close was 
>> called. For
>> this task, as the file was already opened, it doesn't go through
>> hvc_install.
>> 
>> (I figured adding some logs in the drivers would be helpful than 
>> straces to
>> also include hvc_install)
>> 
>> These are the traces you get when the issue happens:
>> [  154.212291] hvc_install called for pid: 666
>> [  154.216705] hvc_open called for pid: 666
>> [  154.233657] hvc_open: request_irq failed with rc -22.
>> [  154.238934] hvc_open called for pid: 678
>> [  154.243012] Unable to handle kernel NULL pointer dereference at 
>> virtual
>> address 00000000000000c4
>> # hvc_install isn't called for pid: 678 as the file wasn't closed yet.
>> 
>> And these are the traces we get when things are normal:
>> [   76.318861] hvc_install called for pid: 537
>> [   76.323240] hvc_open called for pid: 537
>> [   76.340177] hvc_open: request_irq failed with rc -22.
>> [   76.345384] hvc_close called for pid: 537
>> [   76.349555] hvc_install called for pid: 538
>> [   76.353921] hvc_open called for pid: 538
>> # hvc_open for the second task (pid: 538) seems to be fine here as the 
>> file
>> was closed prior to the second task tried to open the file.
>> 
>> >
>> > And what line in hvc_open() does that offset correspond to?
>> It's the point where it tries to acquire the spinlock for the first 
>> time:
>> spin_lock_irqsave(&hp->port.lock, flags);
> 
> Ah, is this a console?  Maybe this is the same issue that other console
> drivers have been having recently, look at:
> 	https://lore.kernel.org/r/20200428184050.6501-1-john.stultz@linaro.org
> and
> 	https://lore.kernel.org/r/1589019852-21505-2-git-send-email-sagar.kadam@sifive.com
> 
> Is that what you need here too?
> 
No. The spinlock is initialized here it's just that the data-structure 
that holds the lock (hp) is NULL because of the race.
> thanks,
> 
> greg k-h

Thank you.
Raghavendra

^ permalink raw reply

* Re: [PATCH v4 1/6] printk: Collapse shutdown types into a single dump reason
From: Pavel Tatashin @ 2020-05-15 19:17 UTC (permalink / raw)
  To: Kees Cook
  Cc: Petr Mladek, Tony Luck, Jonathan Corbet, Anton Vorontsov,
	Linux Doc Mailing List, LKML, Steven Rostedt, Sergey Senozhatsky,
	devicetree, Rob Herring, Paul Mackerras, Colin Cross,
	Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-2-keescook@chromium.org>

On Fri, May 15, 2020 at 2:44 PM Kees Cook <keescook@chromium.org> wrote:
>
> To turn the KMSG_DUMP_* reasons into a more ordered list, collapse
> the redundant KMSG_DUMP_(RESTART|HALT|POWEROFF) reasons into
> KMSG_DUMP_SHUTDOWN. The current users already don't meaningfully
> distinguish between them, so there's no need to, as discussed here:
> https://lore.kernel.org/lkml/CA+CK2bAPv5u1ih5y9t5FUnTyximtFCtDYXJCpuyjOyHNOkRdqw@mail.gmail.com/
>
> Signed-off-by: Kees Cook <keescook@chromium.org>

Maybe it makes sense to mention in the commit log that for all three
merged cases there is a pr_emerg() message logged right before the
kmsg_dump(), thus the reason is distinguishable from the dmesg log
itself.

Reviewed-by: Pavel Tatashin <pasha.tatashin@soleen.com>

^ permalink raw reply

* Re: [PATCH v4 0/6] allow ramoops to collect all kmesg_dump events
From: Pavel Tatashin @ 2020-05-15 19:13 UTC (permalink / raw)
  To: Kees Cook
  Cc: Petr Mladek, Tony Luck, Jonathan Corbet, Anton Vorontsov,
	Linux Doc Mailing List, LKML, Steven Rostedt, Sergey Senozhatsky,
	devicetree, Rob Herring, Paul Mackerras, Colin Cross,
	Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-1-keescook@chromium.org>

On Fri, May 15, 2020 at 2:44 PM Kees Cook <keescook@chromium.org> wrote:
>
> Hello!
>
> I wanted to get the pstore tree nailed down, so here's the v4 of
> Pavel's series, tweaked for the feedback during v3 review.

Hi Kees,

Thank you, I was planning to send a new version of this series later
today. Let me quickly review it.

Pasha

>
> -Kees
>
> v4:
> - rebase on pstore tree
> - collapse shutdown types into a single dump reason
>   https://lore.kernel.org/lkml/CA+CK2bAPv5u1ih5y9t5FUnTyximtFCtDYXJCpuyjOyHNOkRdqw@mail.gmail.com/
> - fix dump_oops vs max_reason module params
>   https://lore.kernel.org/lkml/20200512233504.GA118720@sequoia/
> - typos
>   https://lore.kernel.org/lkml/4cdeaa2af2fe0d6cc2ca8ce3a37608340799df8a.camel@perches.com/
> - rename DT parsing routines ..._size -> ..._u32
>   https://lore.kernel.org/lkml/CA+CK2bCu8eFomiU+NeBjVn-o2dbuECxwRfssNjB3ys3caCbXeA@mail.gmail.com/
> v3: https://lore.kernel.org/lkml/20200506211523.15077-1-keescook@chromium.org/
> v2: https://lore.kernel.org/lkml/20200505154510.93506-1-pasha.tatashin@soleen.com
> v1: https://lore.kernel.org/lkml/20200502143555.543636-1-pasha.tatashin@soleen.com
>
> Kees Cook (3):
>   printk: Collapse shutdown types into a single dump reason
>   printk: Introduce kmsg_dump_reason_str()
>   pstore/ram: Introduce max_reason and convert dump_oops
>
> Pavel Tatashin (3):
>   printk: honor the max_reason field in kmsg_dumper
>   pstore/platform: Pass max_reason to kmesg dump
>   ramoops: Add max_reason optional field to ramoops DT node
>
>  Documentation/admin-guide/ramoops.rst         | 14 +++--
>  .../bindings/reserved-memory/ramoops.txt      | 13 ++++-
>  arch/powerpc/kernel/nvram_64.c                |  4 +-
>  drivers/platform/chrome/chromeos_pstore.c     |  2 +-
>  fs/pstore/platform.c                          | 26 ++-------
>  fs/pstore/ram.c                               | 58 +++++++++++++------
>  include/linux/kmsg_dump.h                     | 12 +++-
>  include/linux/pstore.h                        |  7 +++
>  include/linux/pstore_ram.h                    |  2 +-
>  kernel/printk/printk.c                        | 32 ++++++++--
>  kernel/reboot.c                               |  6 +-
>  11 files changed, 114 insertions(+), 62 deletions(-)
>
> --
> 2.20.1
>

^ permalink raw reply

* [PATCH v4 6/6] ramoops: Add max_reason optional field to ramoops DT node
From: Kees Cook @ 2020-05-15 18:44 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Kees Cook, Jonathan Corbet,
	Anton Vorontsov, linux-doc, linux-kernel, Steven Rostedt,
	Sergey Senozhatsky, devicetree, Rob Herring, Paul Mackerras,
	Colin Cross, Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-1-keescook@chromium.org>

From: Pavel Tatashin <pasha.tatashin@soleen.com>

Currently, it is possible to dump kmsges for panic, or oops.
With max_reason it is possible to dump messages for other
kmesg_dump events, for example reboot, halt, shutdown, kexec.

Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Link: https://lore.kernel.org/lkml/20200506211523.15077-6-keescook@chromium.org/
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 .../devicetree/bindings/reserved-memory/ramoops.txt | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
index 0eba562fe5c6..b7886fea368c 100644
--- a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
@@ -30,7 +30,7 @@ Optional properties:
 - ecc-size: enables ECC support and specifies ECC buffer size in bytes
   (defaults to 0: no ECC)
 
-- record-size: maximum size in bytes of each dump done on oops/panic
+- record-size: maximum size in bytes of each kmsg dump.
   (defaults to 0: disabled)
 
 - console-size: size in bytes of log buffer reserved for kernel messages
@@ -45,7 +45,16 @@ Optional properties:
 - unbuffered: if present, use unbuffered mappings to map the reserved region
   (defaults to buffered mappings)
 
-- no-dump-oops: if present, only dump panics (defaults to panics and oops)
+- max-reason: if present, sets maximum type of kmsg dump reasons to store
+  (defaults to 2: log Oopses and Panics). This can be set to INT_MAX to
+  store all kmsg dumps. See include/linux/kmsg_dump.h KMSG_DUMP_* for other
+  kmsg dump reason values. Setting this to 0 (KMSG_DUMP_UNDEF), means the
+  reason filtering will be controlled by the printk.always_kmsg_dump boot
+  param: if unset, it will be KMSG_DUMP_OOPS, otherwise KMSG_DUMP_MAX.
+
+- no-dump-oops: deprecated, use max_reason instead. If present, and
+  max_reason is not specified, it is equivalent to max_reason = 1
+  (KMSG_DUMP_PANIC).
 
 - flags: if present, pass ramoops behavioral flags (defaults to 0,
   see include/linux/pstore_ram.h RAMOOPS_FLAG_* for flag values).
-- 
2.20.1


^ permalink raw reply related

* [PATCH v4 0/6] allow ramoops to collect all kmesg_dump events
From: Kees Cook @ 2020-05-15 18:44 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Kees Cook, Jonathan Corbet,
	Anton Vorontsov, linux-doc, linux-kernel, Steven Rostedt,
	Sergey Senozhatsky, devicetree, Rob Herring, Paul Mackerras,
	Colin Cross, Enric Balletbo i Serra, linuxppc-dev, Benson Leung

Hello!

I wanted to get the pstore tree nailed down, so here's the v4 of
Pavel's series, tweaked for the feedback during v3 review.

-Kees

v4:
- rebase on pstore tree
- collapse shutdown types into a single dump reason
  https://lore.kernel.org/lkml/CA+CK2bAPv5u1ih5y9t5FUnTyximtFCtDYXJCpuyjOyHNOkRdqw@mail.gmail.com/
- fix dump_oops vs max_reason module params
  https://lore.kernel.org/lkml/20200512233504.GA118720@sequoia/
- typos
  https://lore.kernel.org/lkml/4cdeaa2af2fe0d6cc2ca8ce3a37608340799df8a.camel@perches.com/
- rename DT parsing routines ..._size -> ..._u32
  https://lore.kernel.org/lkml/CA+CK2bCu8eFomiU+NeBjVn-o2dbuECxwRfssNjB3ys3caCbXeA@mail.gmail.com/
v3: https://lore.kernel.org/lkml/20200506211523.15077-1-keescook@chromium.org/
v2: https://lore.kernel.org/lkml/20200505154510.93506-1-pasha.tatashin@soleen.com
v1: https://lore.kernel.org/lkml/20200502143555.543636-1-pasha.tatashin@soleen.com

Kees Cook (3):
  printk: Collapse shutdown types into a single dump reason
  printk: Introduce kmsg_dump_reason_str()
  pstore/ram: Introduce max_reason and convert dump_oops

Pavel Tatashin (3):
  printk: honor the max_reason field in kmsg_dumper
  pstore/platform: Pass max_reason to kmesg dump
  ramoops: Add max_reason optional field to ramoops DT node

 Documentation/admin-guide/ramoops.rst         | 14 +++--
 .../bindings/reserved-memory/ramoops.txt      | 13 ++++-
 arch/powerpc/kernel/nvram_64.c                |  4 +-
 drivers/platform/chrome/chromeos_pstore.c     |  2 +-
 fs/pstore/platform.c                          | 26 ++-------
 fs/pstore/ram.c                               | 58 +++++++++++++------
 include/linux/kmsg_dump.h                     | 12 +++-
 include/linux/pstore.h                        |  7 +++
 include/linux/pstore_ram.h                    |  2 +-
 kernel/printk/printk.c                        | 32 ++++++++--
 kernel/reboot.c                               |  6 +-
 11 files changed, 114 insertions(+), 62 deletions(-)

-- 
2.20.1


^ permalink raw reply

* [PATCH v4 2/6] printk: honor the max_reason field in kmsg_dumper
From: Kees Cook @ 2020-05-15 18:44 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Kees Cook, Jonathan Corbet,
	Anton Vorontsov, linux-doc, linux-kernel, Steven Rostedt,
	Sergey Senozhatsky, devicetree, Rob Herring, Paul Mackerras,
	Colin Cross, Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-1-keescook@chromium.org>

From: Pavel Tatashin <pasha.tatashin@soleen.com>

kmsg_dump() allows to dump kmesg buffer for various system events: oops,
panic, reboot, etc. It provides an interface to register a callback call
for clients, and in that callback interface there is a field "max_reason"
which gets ignored unless always_kmsg_dump is passed as kernel parameter.

Allow clients to decide max_reason, and keep the current behavior when
max_reason is not set.

Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Link: https://lore.kernel.org/lkml/20200506211523.15077-2-keescook@chromium.org/
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/kmsg_dump.h |  1 +
 kernel/printk/printk.c    | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 3f82b5cb2d82..9826014771ab 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -26,6 +26,7 @@ enum kmsg_dump_reason {
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_EMERG,
 	KMSG_DUMP_SHUTDOWN,
+	KMSG_DUMP_MAX
 };
 
 /**
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9a9b6156270b..a121c2255737 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3157,12 +3157,19 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 	struct kmsg_dumper *dumper;
 	unsigned long flags;
 
-	if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
-		return;
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(dumper, &dump_list, list) {
-		if (dumper->max_reason && reason > dumper->max_reason)
+		enum kmsg_dump_reason max_reason = dumper->max_reason;
+
+		/*
+		 * If client has not provided a specific max_reason, default
+		 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set.
+		 */
+		if (max_reason == KMSG_DUMP_UNDEF) {
+			max_reason = always_kmsg_dump ? KMSG_DUMP_MAX :
+							KMSG_DUMP_OOPS;
+		}
+		if (reason > max_reason)
 			continue;
 
 		/* initialize iterator with data about the stored records */
-- 
2.20.1


^ permalink raw reply related

* [PATCH v4 1/6] printk: Collapse shutdown types into a single dump reason
From: Kees Cook @ 2020-05-15 18:44 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Kees Cook, Jonathan Corbet,
	Anton Vorontsov, linux-doc, linux-kernel, Steven Rostedt,
	Sergey Senozhatsky, devicetree, Rob Herring, Paul Mackerras,
	Colin Cross, Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-1-keescook@chromium.org>

To turn the KMSG_DUMP_* reasons into a more ordered list, collapse
the redundant KMSG_DUMP_(RESTART|HALT|POWEROFF) reasons into
KMSG_DUMP_SHUTDOWN. The current users already don't meaningfully
distinguish between them, so there's no need to, as discussed here:
https://lore.kernel.org/lkml/CA+CK2bAPv5u1ih5y9t5FUnTyximtFCtDYXJCpuyjOyHNOkRdqw@mail.gmail.com/

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/powerpc/kernel/nvram_64.c | 4 +---
 fs/pstore/platform.c           | 8 ++------
 include/linux/kmsg_dump.h      | 4 +---
 kernel/reboot.c                | 6 +++---
 4 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index fb4f61096613..0cd1c88bfc8b 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -655,9 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
 	int rc = -1;
 
 	switch (reason) {
-	case KMSG_DUMP_RESTART:
-	case KMSG_DUMP_HALT:
-	case KMSG_DUMP_POWEROFF:
+	case KMSG_DUMP_SHUTDOWN:
 		/* These are almost always orderly shutdowns. */
 		return;
 	case KMSG_DUMP_OOPS:
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 072440457c08..90d74ebaa70a 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -144,12 +144,8 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
 		return "Oops";
 	case KMSG_DUMP_EMERG:
 		return "Emergency";
-	case KMSG_DUMP_RESTART:
-		return "Restart";
-	case KMSG_DUMP_HALT:
-		return "Halt";
-	case KMSG_DUMP_POWEROFF:
-		return "Poweroff";
+	case KMSG_DUMP_SHUTDOWN:
+		return "Shutdown";
 	default:
 		return "Unknown";
 	}
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 2e7a1e032c71..3f82b5cb2d82 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -25,9 +25,7 @@ enum kmsg_dump_reason {
 	KMSG_DUMP_PANIC,
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_EMERG,
-	KMSG_DUMP_RESTART,
-	KMSG_DUMP_HALT,
-	KMSG_DUMP_POWEROFF,
+	KMSG_DUMP_SHUTDOWN,
 };
 
 /**
diff --git a/kernel/reboot.c b/kernel/reboot.c
index c4d472b7f1b4..491f1347bf43 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -250,7 +250,7 @@ void kernel_restart(char *cmd)
 		pr_emerg("Restarting system\n");
 	else
 		pr_emerg("Restarting system with command '%s'\n", cmd);
-	kmsg_dump(KMSG_DUMP_RESTART);
+	kmsg_dump(KMSG_DUMP_SHUTDOWN);
 	machine_restart(cmd);
 }
 EXPORT_SYMBOL_GPL(kernel_restart);
@@ -274,7 +274,7 @@ void kernel_halt(void)
 	migrate_to_reboot_cpu();
 	syscore_shutdown();
 	pr_emerg("System halted\n");
-	kmsg_dump(KMSG_DUMP_HALT);
+	kmsg_dump(KMSG_DUMP_SHUTDOWN);
 	machine_halt();
 }
 EXPORT_SYMBOL_GPL(kernel_halt);
@@ -292,7 +292,7 @@ void kernel_power_off(void)
 	migrate_to_reboot_cpu();
 	syscore_shutdown();
 	pr_emerg("Power down\n");
-	kmsg_dump(KMSG_DUMP_POWEROFF);
+	kmsg_dump(KMSG_DUMP_SHUTDOWN);
 	machine_power_off();
 }
 EXPORT_SYMBOL_GPL(kernel_power_off);
-- 
2.20.1


^ permalink raw reply related

* [PATCH v4 4/6] pstore/platform: Pass max_reason to kmesg dump
From: Kees Cook @ 2020-05-15 18:44 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Kees Cook, Jonathan Corbet,
	Anton Vorontsov, linux-doc, linux-kernel, Steven Rostedt,
	Sergey Senozhatsky, devicetree, Rob Herring, Paul Mackerras,
	Colin Cross, Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-1-keescook@chromium.org>

From: Pavel Tatashin <pasha.tatashin@soleen.com>

Add a new member to struct pstore_info for passing information about
kmesg dump maximum reason. This allows a finer control of what kmesg
dumps are sent to pstore storage backends.

Those backends that do not explicitly set this field (keeping it equal to
0), get the default behavior: store only Oopses and Panics, or everything
if the printk.always_kmsg_dump boot param is set.

Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Link: https://lore.kernel.org/lkml/20200506211523.15077-3-keescook@chromium.org/
Co-developed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/platform.c   | 4 +++-
 include/linux/pstore.h | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 5e6c6022deb9..a9e297eefdff 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -595,8 +595,10 @@ int pstore_register(struct pstore_info *psi)
 
 	pstore_get_records(0);
 
-	if (psi->flags & PSTORE_FLAGS_DMESG)
+	if (psi->flags & PSTORE_FLAGS_DMESG) {
+		pstore_dumper.max_reason = psinfo->max_reason;
 		pstore_register_kmsg();
+	}
 	if (psi->flags & PSTORE_FLAGS_CONSOLE)
 		pstore_register_console();
 	if (psi->flags & PSTORE_FLAGS_FTRACE)
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index f6f22b13e04f..eb93a54cff31 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -96,6 +96,12 @@ struct pstore_record {
  *
  * @read_mutex:	serializes @open, @read, @close, and @erase callbacks
  * @flags:	bitfield of frontends the backend can accept writes for
+ * @max_reason:	Used when PSTORE_FLAGS_DMESG is set. Contains the
+ *		kmsg_dump_reason enum value. KMSG_DUMP_UNDEF means
+ *		"use existing kmsg_dump() filtering, based on the
+ *		printk.always_kmsg_dump boot param" (which is either
+ *		KMSG_DUMP_OOPS when false, or KMSG_DUMP_MAX when
+ *		true); see printk.always_kmsg_dump for more details.
  * @data:	backend-private pointer passed back during callbacks
  *
  * Callbacks:
@@ -179,6 +185,7 @@ struct pstore_info {
 	struct mutex	read_mutex;
 
 	int		flags;
+	int		max_reason;
 	void		*data;
 
 	int		(*open)(struct pstore_info *psi);
-- 
2.20.1


^ permalink raw reply related

* [PATCH v4 5/6] pstore/ram: Introduce max_reason and convert dump_oops
From: Kees Cook @ 2020-05-15 18:44 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Kees Cook, Jonathan Corbet,
	Anton Vorontsov, linux-doc, linux-kernel, Steven Rostedt,
	Sergey Senozhatsky, devicetree, Rob Herring, Paul Mackerras,
	Colin Cross, Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-1-keescook@chromium.org>

Now that pstore_register() can correctly pass max_reason to the kmesg
dump facility, introduce a new "max_reason" module parameter and
"max-reason" Device Tree field.

The "dump_oops" module parameter and "dump-oops" Device
Tree field are now considered deprecated, but are now automatically
converted to their corresponding max_reason values when present, though
the new max_reason setting has precedence.

For struct ramoops_platform_data, the "dump_oops" member is entirely
replaced by a new "max_reason" member, with the only existing user
updated in place.

Additionally remove the "reason" filter logic from ramoops_pstore_write(),
as that is not specifically needed anymore, though technically
this is a change in behavior for any ramoops users also setting the
printk.always_kmsg_dump boot param, which will cause ramoops to behave as
if max_reason was set to KMSG_DUMP_MAX.

Co-developed-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Link: https://lore.kernel.org/lkml/20200506211523.15077-5-keescook@chromium.org/
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/admin-guide/ramoops.rst     | 14 ++++--
 drivers/platform/chrome/chromeos_pstore.c |  2 +-
 fs/pstore/ram.c                           | 58 +++++++++++++++--------
 include/linux/pstore_ram.h                |  2 +-
 4 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst
index 6dbcc5481000..a60a96218ba9 100644
--- a/Documentation/admin-guide/ramoops.rst
+++ b/Documentation/admin-guide/ramoops.rst
@@ -32,11 +32,17 @@ memory to be mapped strongly ordered, and atomic operations on strongly ordered
 memory are implementation defined, and won't work on many ARMs such as omaps.
 
 The memory area is divided into ``record_size`` chunks (also rounded down to
-power of two) and each oops/panic writes a ``record_size`` chunk of
+power of two) and each kmesg dump writes a ``record_size`` chunk of
 information.
 
-Dumping both oopses and panics can be done by setting 1 in the ``dump_oops``
-variable while setting 0 in that variable dumps only the panics.
+Limiting which kinds of kmsg dumps are stored can be controlled via
+the ``max_reason`` value, as defined in include/linux/kmsg_dump.h's
+``enum kmsg_dump_reason``. For example, to store both Oopses and Panics,
+``max_reason`` should be set to 2 (KMSG_DUMP_OOPS), to store only Panics
+``max_reason`` should be set to 1 (KMSG_DUMP_PANIC). Setting this to 0
+(KMSG_DUMP_UNDEF), means the reason filtering will be controlled by the
+``printk.always_kmsg_dump`` boot param: if unset, it'll be KMSG_DUMP_OOPS,
+otherwise KMSG_DUMP_MAX.
 
 The module uses a counter to record multiple dumps but the counter gets reset
 on restart (i.e. new dumps after the restart will overwrite old ones).
@@ -90,7 +96,7 @@ Setting the ramoops parameters can be done in several different manners:
         .mem_address            = <...>,
         .mem_type               = <...>,
         .record_size            = <...>,
-        .dump_oops              = <...>,
+        .max_reason             = <...>,
         .ecc                    = <...>,
   };
 
diff --git a/drivers/platform/chrome/chromeos_pstore.c b/drivers/platform/chrome/chromeos_pstore.c
index d13770785fb5..fa51153688b4 100644
--- a/drivers/platform/chrome/chromeos_pstore.c
+++ b/drivers/platform/chrome/chromeos_pstore.c
@@ -57,7 +57,7 @@ static struct ramoops_platform_data chromeos_ramoops_data = {
 	.record_size	= 0x40000,
 	.console_size	= 0x20000,
 	.ftrace_size	= 0x20000,
-	.dump_oops	= 1,
+	.max_reason	= KMSG_DUMP_OOPS,
 };
 
 static struct platform_device chromeos_ramoops = {
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 31f277633beb..f6eace1dbf7e 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -58,10 +58,10 @@ module_param(mem_type, uint, 0400);
 MODULE_PARM_DESC(mem_type,
 		"set to 1 to try to use unbuffered memory (default 0)");
 
-static int dump_oops = 1;
-module_param(dump_oops, int, 0400);
-MODULE_PARM_DESC(dump_oops,
-		"set to 1 to dump oopses, 0 to only dump panics (default 1)");
+static int ramoops_max_reason = -1;
+module_param_named(max_reason, ramoops_max_reason, int, 0400);
+MODULE_PARM_DESC(max_reason,
+		 "maximum reason for kmsg dump (default 2: Oops and Panic) ");
 
 static int ramoops_ecc;
 module_param_named(ecc, ramoops_ecc, int, 0400);
@@ -70,6 +70,11 @@ MODULE_PARM_DESC(ramoops_ecc,
 		"ECC buffer size in bytes (1 is a special value, means 16 "
 		"bytes ECC)");
 
+static int ramoops_dump_oops = -1;
+module_param_named(dump_oops, ramoops_dump_oops, int, 0400);
+MODULE_PARM_DESC(dump_oops,
+		 "(deprecated: use max_reason instead) set to 1 to dump oopses & panics, 0 to only dump panics");
+
 struct ramoops_context {
 	struct persistent_ram_zone **dprzs;	/* Oops dump zones */
 	struct persistent_ram_zone *cprz;	/* Console zone */
@@ -82,7 +87,6 @@ struct ramoops_context {
 	size_t console_size;
 	size_t ftrace_size;
 	size_t pmsg_size;
-	int dump_oops;
 	u32 flags;
 	struct persistent_ram_ecc_info ecc_info;
 	unsigned int max_dump_cnt;
@@ -336,16 +340,14 @@ static int notrace ramoops_pstore_write(struct pstore_record *record)
 		return -EINVAL;
 
 	/*
-	 * Out of the various dmesg dump types, ramoops is currently designed
-	 * to only store crash logs, rather than storing general kernel logs.
+	 * We could filter on record->reason here if we wanted to (which
+	 * would duplicate what happened before the "max_reason" setting
+	 * was added), but that would defeat the purpose of a system
+	 * changing printk.always_kmsg_dump, so instead log everything that
+	 * the kmsg dumper sends us, since it should be doing the filtering
+	 * based on the combination of printk.always_kmsg_dump and our
+	 * requested "max_reason".
 	 */
-	if (record->reason != KMSG_DUMP_OOPS &&
-	    record->reason != KMSG_DUMP_PANIC)
-		return -EINVAL;
-
-	/* Skip Oopes when configured to do so. */
-	if (record->reason == KMSG_DUMP_OOPS && !cxt->dump_oops)
-		return -EINVAL;
 
 	/*
 	 * Explicitly only take the first part of any new crash.
@@ -647,7 +649,14 @@ static int ramoops_parse_dt(struct platform_device *pdev,
 	pdata->mem_size = resource_size(res);
 	pdata->mem_address = res->start;
 	pdata->mem_type = of_property_read_bool(of_node, "unbuffered");
-	pdata->dump_oops = !of_property_read_bool(of_node, "no-dump-oops");
+	/*
+	 * Setting "no-dump-oops" is deprecated and will be ignored if
+	 * "max_reason" is also specified.
+	 */
+	if (of_property_read_bool(of_node, "no-dump-oops"))
+		pdata->max_reason = KMSG_DUMP_PANIC;
+	else
+		pdata->max_reason = KMSG_DUMP_OOPS;
 
 #define parse_u32(name, field, default_value) {				\
 		ret = ramoops_parse_dt_u32(pdev, name, default_value,	\
@@ -663,6 +672,7 @@ static int ramoops_parse_dt(struct platform_device *pdev,
 	parse_u32("pmsg-size", pdata->pmsg_size, 0);
 	parse_u32("ecc-size", pdata->ecc_info.ecc_size, 0);
 	parse_u32("flags", pdata->flags, 0);
+	parse_u32("max-reason", pdata->max_reason, pdata->max_reason);
 
 #undef parse_size
 
@@ -746,7 +756,6 @@ static int ramoops_probe(struct platform_device *pdev)
 	cxt->console_size = pdata->console_size;
 	cxt->ftrace_size = pdata->ftrace_size;
 	cxt->pmsg_size = pdata->pmsg_size;
-	cxt->dump_oops = pdata->dump_oops;
 	cxt->flags = pdata->flags;
 	cxt->ecc_info = pdata->ecc_info;
 
@@ -789,8 +798,10 @@ static int ramoops_probe(struct platform_device *pdev)
 	 * the single region size is how to check.
 	 */
 	cxt->pstore.flags = 0;
-	if (cxt->max_dump_cnt)
+	if (cxt->max_dump_cnt) {
 		cxt->pstore.flags |= PSTORE_FLAGS_DMESG;
+		cxt->pstore.max_reason = pdata->max_reason;
+	}
 	if (cxt->console_size)
 		cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE;
 	if (cxt->max_ftrace_cnt)
@@ -826,7 +837,7 @@ static int ramoops_probe(struct platform_device *pdev)
 	mem_size = pdata->mem_size;
 	mem_address = pdata->mem_address;
 	record_size = pdata->record_size;
-	dump_oops = pdata->dump_oops;
+	ramoops_max_reason = pdata->max_reason;
 	ramoops_console_size = pdata->console_size;
 	ramoops_pmsg_size = pdata->pmsg_size;
 	ramoops_ftrace_size = pdata->ftrace_size;
@@ -909,7 +920,16 @@ static void __init ramoops_register_dummy(void)
 	pdata.console_size = ramoops_console_size;
 	pdata.ftrace_size = ramoops_ftrace_size;
 	pdata.pmsg_size = ramoops_pmsg_size;
-	pdata.dump_oops = dump_oops;
+	/* If "max_reason" is set, its value has priority over "dump_oops". */
+	if (ramoops_max_reason != -1)
+		pdata.max_reason = ramoops_max_reason;
+	/* Otherwise, if "dump_oops" is set, parse it into "max_reason". */
+	else if (ramoops_dump_oops != -1)
+		pdata.max_reason = ramoops_dump_oops ? KMSG_DUMP_OOPS
+						     : KMSG_DUMP_PANIC;
+	/* And if neither are explicitly set, use the default. */
+	else
+		pdata.max_reason = KMSG_DUMP_OOPS;
 	pdata.flags = RAMOOPS_FLAG_FTRACE_PER_CPU;
 
 	/*
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 9cb9b9067298..9f16afec7290 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -133,7 +133,7 @@ struct ramoops_platform_data {
 	unsigned long	console_size;
 	unsigned long	ftrace_size;
 	unsigned long	pmsg_size;
-	int		dump_oops;
+	int		max_reason;
 	u32		flags;
 	struct persistent_ram_ecc_info ecc_info;
 };
-- 
2.20.1


^ permalink raw reply related

* [PATCH v4 3/6] printk: Introduce kmsg_dump_reason_str()
From: Kees Cook @ 2020-05-15 18:44 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: Petr Mladek, Tony Luck, Kees Cook, Jonathan Corbet,
	Anton Vorontsov, linux-doc, linux-kernel, Steven Rostedt,
	Sergey Senozhatsky, devicetree, Rob Herring, Paul Mackerras,
	Colin Cross, Enric Balletbo i Serra, linuxppc-dev, Benson Leung
In-Reply-To: <20200515184434.8470-1-keescook@chromium.org>

The pstore subsystem already had a private version of this function.
With the coming addition of the pstore/zone driver, this needs to be
shared. As it really should live with printk, move it there instead.

Link: https://lore.kernel.org/lkml/20200510202436.63222-8-keescook@chromium.org/
Acked-by: Petr Mladek <pmladek@suse.com>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/platform.c      | 18 +-----------------
 include/linux/kmsg_dump.h |  7 +++++++
 kernel/printk/printk.c    | 17 +++++++++++++++++
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 90d74ebaa70a..5e6c6022deb9 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -135,22 +135,6 @@ enum pstore_type_id pstore_name_to_type(const char *name)
 }
 EXPORT_SYMBOL_GPL(pstore_name_to_type);
 
-static const char *get_reason_str(enum kmsg_dump_reason reason)
-{
-	switch (reason) {
-	case KMSG_DUMP_PANIC:
-		return "Panic";
-	case KMSG_DUMP_OOPS:
-		return "Oops";
-	case KMSG_DUMP_EMERG:
-		return "Emergency";
-	case KMSG_DUMP_SHUTDOWN:
-		return "Shutdown";
-	default:
-		return "Unknown";
-	}
-}
-
 static void pstore_timer_kick(void)
 {
 	if (pstore_update_ms < 0)
@@ -403,7 +387,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	unsigned int	part = 1;
 	int		ret;
 
-	why = get_reason_str(reason);
+	why = kmsg_dump_reason_str(reason);
 
 	if (down_trylock(&psinfo->buf_lock)) {
 		/* Failed to acquire lock: give up if we cannot wait. */
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 9826014771ab..3378bcbe585e 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -70,6 +70,8 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper);
 int kmsg_dump_register(struct kmsg_dumper *dumper);
 
 int kmsg_dump_unregister(struct kmsg_dumper *dumper);
+
+const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason);
 #else
 static inline void kmsg_dump(enum kmsg_dump_reason reason)
 {
@@ -111,6 +113,11 @@ static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper)
 {
 	return -EINVAL;
 }
+
+static inline const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
+{
+	return "Disabled";
+}
 #endif
 
 #endif /* _LINUX_KMSG_DUMP_H */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a121c2255737..14ca4d05d902 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3144,6 +3144,23 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
 static bool always_kmsg_dump;
 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
 
+const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
+{
+	switch (reason) {
+	case KMSG_DUMP_PANIC:
+		return "Panic";
+	case KMSG_DUMP_OOPS:
+		return "Oops";
+	case KMSG_DUMP_EMERG:
+		return "Emergency";
+	case KMSG_DUMP_SHUTDOWN:
+		return "Shutdown";
+	default:
+		return "Unknown";
+	}
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
+
 /**
  * kmsg_dump - dump kernel log to kernel message dumpers.
  * @reason: the reason (oops, panic etc) for dumping
-- 
2.20.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox