* [PATCH v3 8/9] powerpc/watchpoint: Return available watchpoints dynamically
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-1-ravi.bangoria@linux.ibm.com>
So far Book3S Powerpc supported only one watchpoint. Power10 is
introducing 2nd DAWR. Enable 2nd DAWR support for Power10.
Availability of 2nd DAWR will depend on CPU_FTR_DAWR1.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
arch/powerpc/include/asm/cputable.h | 4 +++-
arch/powerpc/include/asm/hw_breakpoint.h | 5 +++--
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 3445c86e1f6f..36a0851a7a9b 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -633,7 +633,9 @@ enum {
* Maximum number of hw breakpoint supported on powerpc. Number of
* breakpoints supported by actual hw might be less than this.
*/
-#define HBP_NUM_MAX 1
+#define HBP_NUM_MAX 2
+#define HBP_NUM_ONE 1
+#define HBP_NUM_TWO 2
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index cb424799da0d..d4eab1694bcd 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -5,10 +5,11 @@
* Copyright 2010, IBM Corporation.
* Author: K.Prasad <prasad@linux.vnet.ibm.com>
*/
-
#ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H
#define _PPC_BOOK3S_64_HW_BREAKPOINT_H
+#include <asm/cpu_has_feature.h>
+
#ifdef __KERNEL__
struct arch_hw_breakpoint {
unsigned long address;
@@ -46,7 +47,7 @@ struct arch_hw_breakpoint {
static inline int nr_wp_slots(void)
{
- return HBP_NUM_MAX;
+ return cpu_has_feature(CPU_FTR_DAWR1) ? HBP_NUM_TWO : HBP_NUM_ONE;
}
#ifdef CONFIG_HAVE_HW_BREAKPOINT
--
2.26.2
^ permalink raw reply related
* [PATCH v3 7/9] powerpc/watchpoint: Guest support for 2nd DAWR hcall
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-1-ravi.bangoria@linux.ibm.com>
2nd DAWR can be set/unset using H_SET_MODE hcall with resource value 5.
Enable powervm guest support with that. This has no effect on kvm guest
because kvm will return error if guest does hcall with resource value 5.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
arch/powerpc/include/asm/hvcall.h | 1 +
arch/powerpc/include/asm/machdep.h | 2 +-
arch/powerpc/include/asm/plpar_wrappers.h | 5 +++++
arch/powerpc/kernel/dawr.c | 2 +-
arch/powerpc/platforms/pseries/setup.c | 7 +++++--
5 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a7f6f1aeda6b..3f170b9496a1 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -357,6 +357,7 @@
#define H_SET_MODE_RESOURCE_SET_DAWR0 2
#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3
#define H_SET_MODE_RESOURCE_LE 4
+#define H_SET_MODE_RESOURCE_SET_DAWR1 5
/* Values for argument to H_SIGNAL_SYS_RESET */
#define H_SIGNAL_SYS_RESET_ALL -1
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 7bcb64444a39..a90b892f0bfe 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -131,7 +131,7 @@ struct machdep_calls {
unsigned long dabrx);
/* Set DAWR for this platform, leave empty for default implementation */
- int (*set_dawr)(unsigned long dawr,
+ int (*set_dawr)(int nr, unsigned long dawr,
unsigned long dawrx);
#ifdef CONFIG_PPC32 /* XXX for now */
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 93eb133d572c..d7a1acc83593 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -315,6 +315,11 @@ static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawr
return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0);
}
+static inline long plpar_set_watchpoint1(unsigned long dawr1, unsigned long dawrx1)
+{
+ return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR1, dawr1, dawrx1);
+}
+
static inline long plpar_signal_sys_reset(long cpu)
{
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 500f52fa4711..cdc2dccb987d 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -37,7 +37,7 @@ int set_dawr(int nr, struct arch_hw_breakpoint *brk)
dawrx |= (mrd & 0x3f) << (63 - 53);
if (ppc_md.set_dawr)
- return ppc_md.set_dawr(dawr, dawrx);
+ return ppc_md.set_dawr(nr, dawr, dawrx);
if (nr == 0) {
mtspr(SPRN_DAWR0, dawr);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 2db8469e475f..d516ee8eb7fc 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -831,12 +831,15 @@ static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
}
-static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
+static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx)
{
/* PAPR says we can't set HYP */
dawrx &= ~DAWRX_HYP;
- return plpar_set_watchpoint0(dawr, dawrx);
+ if (nr == 0)
+ return plpar_set_watchpoint0(dawr, dawrx);
+ else
+ return plpar_set_watchpoint1(dawr, dawrx);
}
#define CMO_CHARACTERISTICS_TOKEN 44
--
2.26.2
^ permalink raw reply related
* [PATCH v3 6/9] powerpc/watchpoint: Rename current H_SET_MODE DAWR macro
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-1-ravi.bangoria@linux.ibm.com>
Current H_SET_MODE hcall macro name for setting/resetting DAWR0 is
H_SET_MODE_RESOURCE_SET_DAWR. Add suffix 0 to macro name as well.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
arch/powerpc/include/asm/hvcall.h | 2 +-
arch/powerpc/include/asm/plpar_wrappers.h | 2 +-
arch/powerpc/kvm/book3s_hv.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e90c073e437e..a7f6f1aeda6b 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -354,7 +354,7 @@
/* Values for 2nd argument to H_SET_MODE */
#define H_SET_MODE_RESOURCE_SET_CIABR 1
-#define H_SET_MODE_RESOURCE_SET_DAWR 2
+#define H_SET_MODE_RESOURCE_SET_DAWR0 2
#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3
#define H_SET_MODE_RESOURCE_LE 4
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 4497c8afb573..93eb133d572c 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -312,7 +312,7 @@ static inline long plpar_set_ciabr(unsigned long ciabr)
static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
{
- return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
+ return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0);
}
static inline long plpar_signal_sys_reset(long cpu)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6bf66649ab92..7ad692c2d7c7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -764,7 +764,7 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
return H_P3;
vcpu->arch.ciabr = value1;
return H_SUCCESS;
- case H_SET_MODE_RESOURCE_SET_DAWR:
+ case H_SET_MODE_RESOURCE_SET_DAWR0:
if (!kvmppc_power8_compatible(vcpu))
return H_P2;
if (!ppc_breakpoint_available())
--
2.26.2
^ permalink raw reply related
* [PATCH v3 5/9] powerpc/watchpoint: Set CPU_FTR_DAWR1 based on pa-features bit
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-1-ravi.bangoria@linux.ibm.com>
As per the PAPR, bit 0 of byte 64 in pa-features property indicates
availability of 2nd DAWR registers. i.e. If this bit is set, 2nd
DAWR is present, otherwise not. Host generally uses "cpu-features",
which masks "pa-features". But "cpu-features" are still not used for
guests and thus this change is mostly applicable for guests only.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
arch/powerpc/kernel/prom.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9cc49f265c86..c76c09b97bc8 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -175,6 +175,8 @@ static struct ibm_pa_feature {
*/
{ .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP,
.cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP },
+
+ { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 },
};
static void __init scan_features(unsigned long node, const unsigned char *ftrs,
--
2.26.2
^ permalink raw reply related
* [PATCH v3 4/9] powerpc/dt_cpu_ftrs: Add feature for 2nd DAWR
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-1-ravi.bangoria@linux.ibm.com>
Add new device-tree feature for 2nd DAWR. If this feature is present,
2nd DAWR is supported, otherwise not.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
arch/powerpc/include/asm/cputable.h | 7 +++++--
arch/powerpc/kernel/dt_cpu_ftrs.c | 7 +++++++
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index e506d429b1af..3445c86e1f6f 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -214,6 +214,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
#define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000)
#define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000)
+#define CPU_FTR_DAWR1 LONG_ASM_CONST(0x0008000000000000)
#ifndef __ASSEMBLY__
@@ -497,14 +498,16 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \
- CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10)
+ CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10 | \
+ CPU_FTR_DAWR1)
#else
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \
CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \
- CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10)
+ CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10 | \
+ CPU_FTR_DAWR1)
#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index a0edeb391e3e..be694567cebd 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -573,6 +573,12 @@ static int __init feat_enable_mma(struct dt_cpu_feature *f)
return 1;
}
+static int __init feat_enable_debug_facilities_v31(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->cpu_features |= CPU_FTR_DAWR1;
+ return 1;
+}
+
struct dt_cpu_feature_match {
const char *name;
int (*enable)(struct dt_cpu_feature *f);
@@ -648,6 +654,7 @@ static struct dt_cpu_feature_match __initdata
{"wait-v3", feat_enable, 0},
{"prefix-instructions", feat_enable, 0},
{"matrix-multiply-assist", feat_enable_mma, 0},
+ {"debug-facilities-v31", feat_enable_debug_facilities_v31, 0},
};
static bool __initdata using_dt_cpu_ftrs;
--
2.26.2
^ permalink raw reply related
* [PATCH v3 3/9] powerpc/watchpoint: Enable watchpoint functionality on power10 guest
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-1-ravi.bangoria@linux.ibm.com>
CPU_FTR_DAWR is by default enabled for host via CPU_FTRS_DT_CPU_BASE
(controlled by CONFIG_PPC_DT_CPU_FTRS). But cpu-features device-tree
node is not PAPR compatible and thus not yet used by kvm or pHyp
guests. Enable watchpoint functionality on power10 guest (both kvm
and powervm) by adding CPU_FTR_DAWR to CPU_FTRS_POWER10. Note that
this change does not enable 2nd DAWR support.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
arch/powerpc/include/asm/cputable.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index bac2252c839e..e506d429b1af 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -478,7 +478,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
- CPU_FTR_ARCH_31)
+ CPU_FTR_ARCH_31 | CPU_FTR_DAWR)
#define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
--
2.26.2
^ permalink raw reply related
* [PATCH v3 2/9] powerpc/watchpoint: Fix DAWR exception constraint
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-1-ravi.bangoria@linux.ibm.com>
Pedro Miraglia Franco de Carvalho noticed that on p8, DAR value is
inconsistent with different type of load/store. Like for byte,word
etc. load/stores, DAR is set to the address of the first byte of
overlap between watch range and real access. But for quadword load/
store it's set to the address of the first byte of real access. This
issue has been fixed in p10. In p10(ISA 3.1), DAR is always set to
the address of the first byte of overlap. Commit 27985b2a640e
("powerpc/watchpoint: Don't ignore extraneous exceptions blindly")
wrongly assumes that DAR is set to the address of the first byte of
overlap for all load/stores on p8 as well. Fix that. With the fix,
we now rely on 'ea' provided by analyse_instr(). If analyse_instr()
fails, generate event unconditionally on p8, and on p10 generate
event only if DAR is within a DAWR range.
Note: 8xx is not affected.
Fixes: 27985b2a640e ("powerpc/watchpoint: Don't ignore extraneous exceptions blindly")
Fixes: 74c6881019b7 ("powerpc/watchpoint: Prepare handler to handle more than one watchpoint")
Reported-by: Pedro Miraglia Franco de Carvalho <pedromfc@br.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
arch/powerpc/kernel/hw_breakpoint.c | 93 +++++++++++++++++++----------
1 file changed, 63 insertions(+), 30 deletions(-)
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 031e6defc08e..7a66c370a105 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -498,11 +498,11 @@ static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info
return ((info->address <= dar) && (dar - info->address < info->len));
}
-static bool dar_user_range_overlaps(unsigned long dar, int size,
- struct arch_hw_breakpoint *info)
+static bool ea_user_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
{
- return ((dar < info->address + info->len) &&
- (dar + size > info->address));
+ return ((ea < info->address + info->len) &&
+ (ea + size > info->address));
}
static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
@@ -515,20 +515,22 @@ static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
return ((hw_start_addr <= dar) && (hw_end_addr > dar));
}
-static bool dar_hw_range_overlaps(unsigned long dar, int size,
- struct arch_hw_breakpoint *info)
+static bool ea_hw_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
{
unsigned long hw_start_addr, hw_end_addr;
hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
- return ((dar < hw_end_addr) && (dar + size > hw_start_addr));
+ return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
}
/*
* If hw has multiple DAWR registers, we also need to check all
* dawrx constraint bits to confirm this is _really_ a valid event.
+ * If type is UNKNOWN, but privilege level matches, consider it as
+ * a positive match.
*/
static bool check_dawrx_constraints(struct pt_regs *regs, int type,
struct arch_hw_breakpoint *info)
@@ -536,7 +538,12 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,
if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
return false;
- if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE))
+ /*
+ * The Cache Management instructions other than dcbz never
+ * cause a match. i.e. if type is CACHEOP, the instruction
+ * is dcbz, and dcbz is treated as Store.
+ */
+ if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
return false;
if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
@@ -553,7 +560,8 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,
* including extraneous exception. Otherwise return false.
*/
static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
- int type, int size, struct arch_hw_breakpoint *info)
+ unsigned long ea, int type, int size,
+ struct arch_hw_breakpoint *info)
{
bool in_user_range = dar_in_user_range(regs->dar, info);
bool dawrx_constraints;
@@ -569,11 +577,10 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
}
if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
- if (in_user_range)
- return true;
-
- if (dar_in_hw_range(regs->dar, info)) {
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (dar_in_hw_range(regs->dar, info))
+ return true;
+ } else {
return true;
}
return false;
@@ -581,10 +588,20 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
dawrx_constraints = check_dawrx_constraints(regs, type, info);
- if (dar_user_range_overlaps(regs->dar, size, info))
+ if (type == UNKNOWN) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (dar_in_hw_range(regs->dar, info))
+ return dawrx_constraints;
+ } else {
+ return dawrx_constraints;
+ }
+ return false;
+ }
+
+ if (ea_user_range_overlaps(ea, size, info))
return dawrx_constraints;
- if (dar_hw_range_overlaps(regs->dar, size, info)) {
+ if (ea_hw_range_overlaps(ea, size, info)) {
if (dawrx_constraints) {
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
return true;
@@ -593,8 +610,17 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
return false;
}
+static int cache_op_size(void)
+{
+#ifdef __powerpc64__
+ return ppc64_caches.l1d.block_size;
+#else
+ return L1_CACHE_BYTES;
+#endif
+}
+
static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
- int *type, int *size, bool *larx_stcx)
+ int *type, int *size, unsigned long *ea)
{
struct instruction_op op;
@@ -602,16 +628,23 @@ static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
return;
analyse_instr(&op, regs, *instr);
-
- /*
- * Set size = 8 if analyse_instr() fails. If it's a userspace
- * watchpoint(valid or extraneous), we can notify user about it.
- * If it's a kernel watchpoint, instruction emulation will fail
- * in stepping_handler() and watchpoint will be disabled.
- */
*type = GETTYPE(op.type);
- *size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8;
- *larx_stcx = (*type == LARX || *type == STCX);
+ *ea = op.ea;
+#ifdef __powerpc64__
+ if (!(regs->msr & MSR_64BIT))
+ *ea &= 0xffffffffUL;
+#endif
+
+ *size = GETSIZE(op.type);
+ if (*type == CACHEOP) {
+ *size = cache_op_size();
+ *ea &= ~(*size - 1);
+ }
+}
+
+static bool is_larx_stcx_instr(int type)
+{
+ return type == LARX || type == STCX;
}
/*
@@ -678,7 +711,7 @@ int hw_breakpoint_handler(struct die_args *args)
struct ppc_inst instr = ppc_inst(0);
int type = 0;
int size = 0;
- bool larx_stcx = false;
+ unsigned long ea;
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
@@ -692,7 +725,7 @@ int hw_breakpoint_handler(struct die_args *args)
rcu_read_lock();
if (!IS_ENABLED(CONFIG_PPC_8xx))
- get_instr_detail(regs, &instr, &type, &size, &larx_stcx);
+ get_instr_detail(regs, &instr, &type, &size, &ea);
for (i = 0; i < nr_wp_slots(); i++) {
bp[i] = __this_cpu_read(bp_per_reg[i]);
@@ -702,7 +735,7 @@ int hw_breakpoint_handler(struct die_args *args)
info[i] = counter_arch_bp(bp[i]);
info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (check_constraints(regs, instr, type, size, info[i])) {
+ if (check_constraints(regs, instr, ea, type, size, info[i])) {
if (!IS_ENABLED(CONFIG_PPC_8xx) &&
ppc_inst_equal(instr, ppc_inst(0))) {
handler_error(bp[i], info[i]);
@@ -744,7 +777,7 @@ int hw_breakpoint_handler(struct die_args *args)
}
if (!IS_ENABLED(CONFIG_PPC_8xx)) {
- if (larx_stcx) {
+ if (is_larx_stcx_instr(type)) {
for (i = 0; i < nr_wp_slots(); i++) {
if (!hit[i])
continue;
--
2.26.2
^ permalink raw reply related
* [PATCH v3 1/9] powerpc/watchpoint: Fix 512 byte boundary limit
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-1-ravi.bangoria@linux.ibm.com>
Milton Miller reported that we are aligning start and end address to
wrong size SZ_512M. It should be SZ_512. Fix that.
While doing this change I also found a case where ALIGN() comparison
fails. Within a given aligned range, ALIGN() of two addresses does not
match when start address is pointing to the first byte and end address
is pointing to any other byte except the first one. But that's not true
for ALIGN_DOWN(). ALIGN_DOWN() of any two addresses within that range
will always point to the first byte. So use ALIGN_DOWN() instead of
ALIGN().
Fixes: e68ef121c1f4 ("powerpc/watchpoint: Use builtin ALIGN*() macros")
Reported-by: Milton Miller <miltonm@us.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
arch/powerpc/kernel/hw_breakpoint.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 0000daf0e1da..031e6defc08e 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -419,7 +419,7 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
if (dawr_enabled()) {
max_len = DAWR_MAX_LEN;
/* DAWR region can't cross 512 bytes boundary */
- if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M))
+ if (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512))
return -EINVAL;
} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
/* 8xx can setup a range without limitation */
--
2.26.2
^ permalink raw reply related
* [PATCH v3 0/9] powerpc/watchpoint: Enable 2nd DAWR on baremetal and powervm
From: Ravi Bangoria @ 2020-07-08 4:50 UTC (permalink / raw)
To: mpe, mikey
Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec,
miltonm, oleg, npiggin, linux-kernel, paulus, jolsa, pedromfc,
naveen.n.rao, linuxppc-dev, mingo
Last series[1] was to add basic infrastructure support for more than
one watchpoint on Book3S powerpc. This series actually enables the 2nd
DAWR for baremetal and powervm. Kvm guest is still not supported.
v2: https://lore.kernel.org/linuxppc-dev/20200604033443.70591-1-ravi.bangoria@linux.ibm.com/
v2->v3:
- patch #2 is new. It fixes an issue with DAWR exception constraint
- Rename dawr1 to debug-facilities-v31 in dt cpu feature, suggested
by Nick Piggin.
- Rebased to powerpc/next
[1]: https://lore.kernel.org/linuxppc-dev/20200514111741.97993-1-ravi.bangoria@linux.ibm.com/
Ravi Bangoria (9):
powerpc/watchpoint: Fix 512 byte boundary limit
powerpc/watchpoint: Fix DAWR exception constraint
powerpc/watchpoint: Enable watchpoint functionality on power10 guest
powerpc/dt_cpu_ftrs: Add feature for 2nd DAWR
powerpc/watchpoint: Set CPU_FTR_DAWR1 based on pa-features bit
powerpc/watchpoint: Rename current H_SET_MODE DAWR macro
powerpc/watchpoint: Guest support for 2nd DAWR hcall
powerpc/watchpoint: Return available watchpoints dynamically
powerpc/watchpoint: Remove 512 byte boundary
arch/powerpc/include/asm/cputable.h | 13 ++-
arch/powerpc/include/asm/hvcall.h | 3 +-
arch/powerpc/include/asm/hw_breakpoint.h | 5 +-
arch/powerpc/include/asm/machdep.h | 2 +-
arch/powerpc/include/asm/plpar_wrappers.h | 7 +-
arch/powerpc/kernel/dawr.c | 2 +-
arch/powerpc/kernel/dt_cpu_ftrs.c | 7 ++
arch/powerpc/kernel/hw_breakpoint.c | 98 +++++++++++++++--------
arch/powerpc/kernel/prom.c | 2 +
arch/powerpc/kvm/book3s_hv.c | 2 +-
arch/powerpc/platforms/pseries/setup.c | 7 +-
11 files changed, 103 insertions(+), 45 deletions(-)
--
2.26.2
^ permalink raw reply
* Re: [PATCH v2] powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()
From: Christophe Leroy @ 2020-07-08 4:49 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras, npiggin,
segher
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <faa6759a-8188-104b-a9f9-a5ff3b060cfa@csgroup.eu>
Le 07/07/2020 à 21:02, Christophe Leroy a écrit :
>
>
> Le 07/07/2020 à 14:44, Christophe Leroy a écrit :
>>
>>
>> Le 30/06/2020 à 03:19, Michael Ellerman a écrit :
>>> Michael Ellerman <mpe@ellerman.id.au> writes:
>>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>>> Hi Michael,
>>>>>
>>>>> I see this patch is marked as "defered" in patchwork, but I can't see
>>>>> any related discussion. Is it normal ?
>>>>
>>>> Because it uses the "m<>" constraint which didn't work on GCC 4.6.
>>>>
>>>> https://github.com/linuxppc/issues/issues/297
>>>>
>>>> So we should be able to pick it up for v5.9 hopefully.
>>>
>>> It seems to break the build with the kernel.org 4.9.4 compiler and
>>> corenet64_smp_defconfig:
>>
>> Most likely a GCC bug ?
>>
>> It seems the problem vanishes with patch
>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/173de3b659fa3a5f126a0eb170522cccd909950f.1594125164.git.christophe.leroy@csgroup.eu/
>>
>
> Same kind of issue in signal_64.c now.
>
> The following patch fixes it:
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/810bd8840ef990a200f58c9dea9abe767ca02a3a.1594146723.git.christophe.leroy@csgroup.eu/
>
>
This time I confirm, with the two above mentioned patches, it builds OK
with 4.9, see
http://kisskb.ellerman.id.au/kisskb/head/810bd8840ef990a200f58c9dea9abe767ca02a3a/
Christophe
^ permalink raw reply
* Re: [PATCH v2 4/4] powerpc/mm/radix: Create separate mappings for hot-plugged memory
From: Michael Ellerman @ 2020-07-08 4:44 UTC (permalink / raw)
To: Aneesh Kumar K.V, linuxppc-dev; +Cc: Aneesh Kumar K.V, Bharata B Rao
In-Reply-To: <20200625064547.228448-5-aneesh.kumar@linux.ibm.com>
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> To enable memory unplug without splitting kernel page table
> mapping, we force the max mapping size to the LMB size. LMB
> size is the unit in which hypervisor will do memory add/remove
> operation.
>
> This implies on pseries system, we now end up mapping
Please expand on why it "implies" that for pseries.
> memory with 2M page size instead of 1G. To improve
> that we want hypervisor to hint the kernel about the hotplug
> memory range. This was added that as part of
That
>
> commit b6eca183e23e ("powerpc/kernel: Enables memory
> hot-remove after reboot on pseries guests")
>
> But we still don't do that on PowerVM. Once we get PowerVM
I think you mean PowerVM doesn't provide that hint yet?
Realistically it won't until P10. So this means we'll always use 2MB on
Power9 PowerVM doesn't it?
What about KVM?
Have you done any benchmarking on the impact of switching the linear
mapping to 2MB pages?
> updated, we can then force the 2M mapping only to hot-pluggable
> memory region using memblock_is_hotpluggable(). Till then
> let's depend on LMB size for finding the mapping page size
> for linear range.
>
> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> arch/powerpc/mm/book3s64/radix_pgtable.c | 83 ++++++++++++++++++++----
> arch/powerpc/platforms/powernv/setup.c | 10 ++-
> 2 files changed, 81 insertions(+), 12 deletions(-)
>
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 78ad11812e0d..a4179e4da49d 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -15,6 +15,7 @@
> #include <linux/mm.h>
> #include <linux/hugetlb.h>
> #include <linux/string_helpers.h>
> +#include <linux/memory.h>
>
> #include <asm/pgtable.h>
> #include <asm/pgalloc.h>
> @@ -34,6 +35,7 @@
>
> unsigned int mmu_pid_bits;
> unsigned int mmu_base_pid;
> +unsigned int radix_mem_block_size;
static? __ro_after_init?
> @@ -266,6 +268,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
>
> static int __meminit create_physical_mapping(unsigned long start,
> unsigned long end,
> + unsigned long max_mapping_size,
> int nid, pgprot_t _prot)
> {
> unsigned long vaddr, addr, mapping_size = 0;
> @@ -279,6 +282,8 @@ static int __meminit create_physical_mapping(unsigned long start,
> int rc;
>
> gap = next_boundary(addr, end) - addr;
> + if (gap > max_mapping_size)
> + gap = max_mapping_size;
> previous_size = mapping_size;
> prev_exec = exec;
>
> @@ -329,8 +334,9 @@ static void __init radix_init_pgtable(void)
>
> /* We don't support slb for radix */
> mmu_slb_size = 0;
> +
> /*
> - * Create the linear mapping, using standard page size for now
> + * Create the linear mapping
> */
> for_each_memblock(memory, reg) {
> /*
> @@ -346,6 +352,7 @@ static void __init radix_init_pgtable(void)
>
> WARN_ON(create_physical_mapping(reg->base,
> reg->base + reg->size,
> + radix_mem_block_size,
> -1, PAGE_KERNEL));
> }
>
> @@ -486,6 +493,49 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
> return 1;
> }
>
> +static int __init probe_memory_block_size(unsigned long node, const char *uname, int
> + depth, void *data)
> +{
> + const __be32 *block_size;
> + int len;
> +
> + if (depth != 1)
> + return 0;
> +
> + if (!strcmp(uname, "ibm,dynamic-reconfiguration-memory")) {
if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") != 0)
return 0;
Then you can de-indent the rest of the body.
> + block_size = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
> + if (!block_size || len < dt_root_size_cells * sizeof(__be32))
This will give us a sparse warning, please do the endian conversion
before looking at the value.
> + /*
> + * Nothing in the device tree
> + */
> + return MIN_MEMORY_BLOCK_SIZE;
> +
> + return dt_mem_next_cell(dt_root_size_cells, &block_size);
Just of_read_number() ?
This is misusing the return value, as I explained on one of your other
recent patches. You should return !0 to indicate that scanning should
stop, and the actual value can go via the data pointer, or better just
set the global.
> + }
> +
> + return 0;
> +}
> +
> +static unsigned long radix_memory_block_size(void)
> +{
> + unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
> +
> + if (firmware_has_feature(FW_FEATURE_OPAL)) {
> +
> + mem_block_size = 1UL * 1024 * 1024 * 1024;
We have 1GB hardcoded here and also in pnv_memory_block_size().
Shouldn't pnv_memory_block_size() at least be using radix_mem_block_size?
> + } else if (firmware_has_feature(FW_FEATURE_LPAR)) {
> + mem_block_size = of_scan_flat_dt(probe_memory_block_size, NULL);
> + if (!mem_block_size)
> + mem_block_size = MIN_MEMORY_BLOCK_SIZE;
> + }
> +
> + return mem_block_size;
> +}
It would probably be simpler if that was just inlined below.
> +
> +
> void __init radix__early_init_devtree(void)
> {
> int rc;
> @@ -494,17 +544,27 @@ void __init radix__early_init_devtree(void)
> * Try to find the available page sizes in the device-tree
> */
> rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
> - if (rc != 0) /* Found */
> - goto found;
> + if (rc == 0) {
> + /*
> + * no page size details found in device tree
> + * let's assume we have page 4k and 64k support
Capitals and punctuation please?
> + */
> + mmu_psize_defs[MMU_PAGE_4K].shift = 12;
> + mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
> +
> + mmu_psize_defs[MMU_PAGE_64K].shift = 16;
> + mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
> + }
Moving that seems like an unrelated change. It's a reasonable change but
I'd rather you did it in a standalone patch.
> /*
> - * let's assume we have page 4k and 64k support
> + * Max mapping size used when mapping pages. We don't use
> + * ppc_md.memory_block_size() here because this get called
> + * early and we don't have machine probe called yet. Also
> + * the pseries implementation only check for ibm,lmb-size.
> + * All hypervisor supporting radix do expose that device
> + * tree node.
> */
> - mmu_psize_defs[MMU_PAGE_4K].shift = 12;
> - mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
> -
> - mmu_psize_defs[MMU_PAGE_64K].shift = 16;
> - mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
> -found:
> + radix_mem_block_size = radix_memory_block_size();
If you did that earlier in the function, before
radix_dt_scan_page_sizes(), the logic would be simpler.
> return;
> }
>
> @@ -856,7 +916,8 @@ int __meminit radix__create_section_mapping(unsigned long start,
> return -1;
> }
>
> - return create_physical_mapping(__pa(start), __pa(end), nid, prot);
> + return create_physical_mapping(__pa(start), __pa(end),
> + radix_mem_block_size, nid, prot);
> }
>
> int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
> diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
> index 3bc188da82ba..6e2f614590a3 100644
> --- a/arch/powerpc/platforms/powernv/setup.c
> +++ b/arch/powerpc/platforms/powernv/setup.c
> @@ -399,7 +399,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
> #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> static unsigned long pnv_memory_block_size(void)
> {
> - return 256UL * 1024 * 1024;
> + /*
> + * We map the kernel linear region with 1GB large pages on radix. For
> + * memory hot unplug to work our memory block size must be at least
> + * this size.
> + */
> + if (radix_enabled())
> + return 1UL * 1024 * 1024 * 1024;
> + else
> + return 256UL * 1024 * 1024;
> }
> #endif
>
> --
> 2.26.2
cheers
^ permalink raw reply
* Re: [PATCH v5 08/12] init: main: add KUnit to kernel init
From: Luis Chamberlain @ 2020-07-08 4:38 UTC (permalink / raw)
To: Brendan Higgins
Cc: linux-doc, catalin.marinas, jcmvbkbc, will, paulus,
linux-kselftest, frowand.list, anton.ivanov, linux-arch, richard,
rppt, yzaikin, linux-xtensa, keescook, arnd, jdike, linux-um,
linuxppc-dev, davidgow, skhan, linux-arm-kernel, kunit-dev, chris,
monstr, sboyd, gregkh, linux-kernel, logang, akpm, alan.maguire
In-Reply-To: <20200626210917.358969-9-brendanhiggins@google.com>
On Fri, Jun 26, 2020 at 02:09:13PM -0700, Brendan Higgins wrote:
> Remove KUnit from init calls entirely, instead call directly from
> kernel_init().
The commit log does not explain *why*.
> Co-developed-by: Alan Maguire <alan.maguire@oracle.com>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Other than that:
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Luis
^ permalink raw reply
* Re: [PATCH v5 01/12] vmlinux.lds.h: add linker section for KUnit test suites
From: Luis Chamberlain @ 2020-07-08 4:31 UTC (permalink / raw)
To: Brendan Higgins
Cc: open list:DOCUMENTATION, catalin.marinas, jcmvbkbc, will, paulus,
open list:KERNEL SELFTEST FRAMEWORK, Frank Rowand, Anton Ivanov,
linux-arch, Richard Weinberger, rppt, Iurii Zaikin, linux-xtensa,
Kees Cook, Arnd Bergmann, Jeff Dike, linux-um, linuxppc-dev,
David Gow, Shuah Khan, Linux ARM, KUnit Development, chris,
monstr, Stephen Boyd, Greg KH, Linux Kernel Mailing List,
Logan Gunthorpe, Andrew Morton, Alan Maguire
In-Reply-To: <CAFd5g47vu5vmrXnS0sLu+hdC2HmYz7GY82sE8rhcHfNkuC1NRw@mail.gmail.com>
On Fri, Jun 26, 2020 at 02:22:11PM -0700, Brendan Higgins wrote:
> On Fri, Jun 26, 2020 at 2:20 PM Kees Cook <keescook@chromium.org> wrote:
> >
> > On Fri, Jun 26, 2020 at 02:09:06PM -0700, Brendan Higgins wrote:
> > > Add a linker section where KUnit can put references to its test suites.
> > > This patch is the first step in transitioning to dispatching all KUnit
> > > tests from a centralized executor rather than having each as its own
> > > separate late_initcall.
> > >
> > > Co-developed-by: Iurii Zaikin <yzaikin@google.com>
> > > Signed-off-by: Iurii Zaikin <yzaikin@google.com>
> > > Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> > > Reviewed-by: Stephen Boyd <sboyd@kernel.org>
> > > ---
> > > include/asm-generic/vmlinux.lds.h | 8 ++++++++
> > > 1 file changed, 8 insertions(+)
> > >
> > > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> > > index db600ef218d7d..4f9b036fc9616 100644
> > > --- a/include/asm-generic/vmlinux.lds.h
> > > +++ b/include/asm-generic/vmlinux.lds.h
> > > @@ -881,6 +881,13 @@
> > > KEEP(*(.con_initcall.init)) \
> > > __con_initcall_end = .;
> > >
> > > +/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
> >
> > Nit on naming:
> >
> > > +#define KUNIT_TEST_SUITES \
> >
> > I would call this KUNIT_TABLE to maintain the same names as other things
> > of this nature.
> >
> > > + . = ALIGN(8); \
> > > + __kunit_suites_start = .; \
> > > + KEEP(*(.kunit_test_suites)) \
> > > + __kunit_suites_end = .;
> > > +
> > > #ifdef CONFIG_BLK_DEV_INITRD
> > > #define INIT_RAM_FS \
> > > . = ALIGN(4); \
> > > @@ -1056,6 +1063,7 @@
> > > INIT_CALLS \
> > > CON_INITCALL \
> > > INIT_RAM_FS \
> > > + KUNIT_TEST_SUITES \
> > > }
> >
> > Nack: this must be in INIT_DATA, not in INIT_DATA_SECTION. Not all
> > architectures use the INIT_DATA_SECTION macro (e.g. arm64), but everything
> > uses INIT_DATA.
>
> Oh, maybe that would eliminate the need for the other linkerscript
> patches? That would be nice.
Curious, did changing it as Kees suggest fix it for m68k?
Luis
^ permalink raw reply
* Re: [PATCH v5 0/4] vmalloc kernel mapping and relocatable kernel
From: Alex Ghiti @ 2020-07-08 4:21 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
Paul Walmsley, Palmer Dabbelt, Albert Ou, Anup Patel, Atish Patra,
Zong Li, linux-kernel, linuxppc-dev, linux-riscv
In-Reply-To: <20200607075949.665-1-alex@ghiti.fr>
Hi Palmer,
Le 6/7/20 à 3:59 AM, Alexandre Ghiti a écrit :
> This patchset originally implemented relocatable kernel support but now
> also moves the kernel mapping into the vmalloc zone.
>
> The first patch explains why we need to move the kernel into vmalloc
> zone (instead of memcpying it around). That patch should ease KASLR
> implementation a lot.
>
> The second patch allows to build relocatable kernels but is not selected
> by default.
>
> The third and fourth patches take advantage of an already existing powerpc
> script that checks relocations at compile-time, and uses it for riscv.
>
> Changes in v5:
> * Add "static __init" to create_kernel_page_table function as reported by
> Kbuild test robot
> * Add reviewed-by from Zong
> * Rebase onto v5.7
>
> Changes in v4:
> * Fix BPF region that overlapped with kernel's as suggested by Zong
> * Fix end of module region that could be larger than 2GB as suggested by Zong
> * Fix the size of the vm area reserved for the kernel as we could lose
> PMD_SIZE if the size was already aligned on PMD_SIZE
> * Split compile time relocations check patch into 2 patches as suggested by Anup
> * Applied Reviewed-by from Zong and Anup
>
> Changes in v3:
> * Move kernel mapping to vmalloc
>
> Changes in v2:
> * Make RELOCATABLE depend on MMU as suggested by Anup
> * Rename kernel_load_addr into kernel_virt_addr as suggested by Anup
> * Use __pa_symbol instead of __pa, as suggested by Zong
> * Rebased on top of v5.6-rc3
> * Tested with sv48 patchset
> * Add Reviewed/Tested-by from Zong and Anup
>
> Alexandre Ghiti (4):
> riscv: Move kernel mapping to vmalloc zone
> riscv: Introduce CONFIG_RELOCATABLE
> powerpc: Move script to check relocations at compile time in scripts/
> riscv: Check relocations at compile time
>
> arch/powerpc/tools/relocs_check.sh | 18 +----
> arch/riscv/Kconfig | 12 +++
> arch/riscv/Makefile | 5 +-
> arch/riscv/Makefile.postlink | 36 +++++++++
> arch/riscv/boot/loader.lds.S | 3 +-
> arch/riscv/include/asm/page.h | 10 ++-
> arch/riscv/include/asm/pgtable.h | 38 ++++++---
> arch/riscv/kernel/head.S | 3 +-
> arch/riscv/kernel/module.c | 4 +-
> arch/riscv/kernel/vmlinux.lds.S | 9 ++-
> arch/riscv/mm/Makefile | 4 +
> arch/riscv/mm/init.c | 121 +++++++++++++++++++++++++----
> arch/riscv/mm/physaddr.c | 2 +-
> arch/riscv/tools/relocs_check.sh | 26 +++++++
> scripts/relocs_check.sh | 20 +++++
> 15 files changed, 259 insertions(+), 52 deletions(-)
> create mode 100644 arch/riscv/Makefile.postlink
> create mode 100755 arch/riscv/tools/relocs_check.sh
> create mode 100755 scripts/relocs_check.sh
>
Do you have any remark regarding this series ?
Thanks,
Alex
^ permalink raw reply
* [Bug 208197] OF: /pci@f2000000/mac-io@17/gpio@50/...: could not find phandle
From: bugzilla-daemon @ 2020-07-08 4:05 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-208197-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=208197
--- Comment #7 from Michael Ellerman (michael@ellerman.id.au) ---
I couldn't really make sense of your bisect log, it doesn't have any good/bad
commits in it.
And I don't see how reverting a merge of v5.7-rc7 can be helping, because you
said v5.7 is good, and that contains v5.7-rc7.
Can you attach the output of "git bisect log".
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* Re: [PATCH v3 0/6] powerpc: queued spinlocks and rwlocks
From: Waiman Long @ 2020-07-08 3:33 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Cc: linux-arch, Peter Zijlstra, Boqun Feng, linux-kernel, kvm-ppc,
virtualization, Ingo Molnar, Will Deacon
In-Reply-To: <1594101082.hfq9x5yact.astroid@bobo.none>
[-- Attachment #1: Type: text/plain, Size: 1488 bytes --]
On 7/7/20 1:57 AM, Nicholas Piggin wrote:
> Yes, powerpc could certainly get more performance out of the slow
> paths, and then there are a few parameters to tune.
>
> We don't have a good alternate patching for function calls yet, but
> that would be something to do for native vs pv.
>
> And then there seem to be one or two tunable parameters we could
> experiment with.
>
> The paravirt locks may need a bit more tuning. Some simple testing
> under KVM shows we might be a bit slower in some cases. Whether this
> is fairness or something else I'm not sure. The current simple pv
> spinlock code can do a directed yield to the lock holder CPU, whereas
> the pv qspl here just does a general yield. I think we might actually
> be able to change that to also support directed yield. Though I'm
> not sure if this is actually the cause of the slowdown yet.
Regarding the paravirt lock, I have taken a further look into the
current PPC spinlock code. There is an equivalent of pv_wait() but no
pv_kick(). Maybe PPC doesn't really need that. Attached are two
additional qspinlock patches that adds a CONFIG_PARAVIRT_QSPINLOCKS_LITE
option to not require pv_kick(). There is also a fixup patch to be
applied after your patchset.
I don't have access to a PPC LPAR with shared processor at the moment,
so I can't test the performance of the paravirt code. Would you mind
adding my patches and do some performance test on your end to see if it
gives better result?
Thanks,
Longman
[-- Attachment #2: 0001-locking-pvqspinlock-Code-relocation-and-extraction.patch --]
[-- Type: text/x-patch, Size: 11938 bytes --]
From 161e545523a7eb4c42c145c04e9a5a15903ba3d9 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 7 Jul 2020 20:46:51 -0400
Subject: [PATCH 1/9] locking/pvqspinlock: Code relocation and extraction
Move pv_kick_node() and the unlock functions up and extract out the hash
and lock code from pv_wait_head_or_lock() into pv_hash_lock(). There
is no functional change.
Signed-off-by: Waiman Long <longman@redhat.com>
---
kernel/locking/qspinlock_paravirt.h | 302 ++++++++++++++--------------
1 file changed, 156 insertions(+), 146 deletions(-)
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index e84d21aa0722..8eec58320b85 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -55,6 +55,7 @@ struct pv_node {
/*
* Hybrid PV queued/unfair lock
+ * ----------------------------
*
* By replacing the regular queued_spin_trylock() with the function below,
* it will be called once when a lock waiter enter the PV slowpath before
@@ -259,6 +260,156 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
BUG();
}
+/*
+ * Insert lock into hash and set _Q_SLOW_VAL.
+ * Return true if lock acquired.
+ */
+static inline bool pv_hash_lock(struct qspinlock *lock, struct pv_node *node)
+{
+ struct qspinlock **lp = pv_hash(lock, node);
+
+ /*
+ * We must hash before setting _Q_SLOW_VAL, such that
+ * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
+ * we'll be sure to be able to observe our hash entry.
+ *
+ * [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL
+ * MB RMB
+ * [RmW] l->locked = _Q_SLOW_VAL [L] <unhash>
+ *
+ * Matches the smp_rmb() in __pv_queued_spin_unlock().
+ */
+ if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) {
+ /*
+ * The lock was free and now we own the lock.
+ * Change the lock value back to _Q_LOCKED_VAL
+ * and unhash the table.
+ */
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+ WRITE_ONCE(*lp, NULL);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Called after setting next->locked = 1 when we're the lock owner.
+ *
+ * Instead of waking the waiters stuck in pv_wait_node() advance their state
+ * such that they're waiting in pv_wait_head_or_lock(), this avoids a
+ * wake/sleep cycle.
+ */
+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
+{
+ struct pv_node *pn = (struct pv_node *)node;
+
+ /*
+ * If the vCPU is indeed halted, advance its state to match that of
+ * pv_wait_node(). If OTOH this fails, the vCPU was running and will
+ * observe its next->locked value and advance itself.
+ *
+ * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
+ *
+ * The write to next->locked in arch_mcs_spin_unlock_contended()
+ * must be ordered before the read of pn->state in the cmpxchg()
+ * below for the code to work correctly. To guarantee full ordering
+ * irrespective of the success or failure of the cmpxchg(),
+ * a relaxed version with explicit barrier is used. The control
+ * dependency will order the reading of pn->state before any
+ * subsequent writes.
+ */
+ smp_mb__before_atomic();
+ if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
+ != vcpu_halted)
+ return;
+
+ /*
+ * Put the lock into the hash table and set the _Q_SLOW_VAL.
+ *
+ * As this is the same vCPU that will check the _Q_SLOW_VAL value and
+ * the hash table later on at unlock time, no atomic instruction is
+ * needed.
+ */
+ WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
+ (void)pv_hash(lock, pn);
+}
+
+/*
+ * PV versions of the unlock fastpath and slowpath functions to be used
+ * instead of queued_spin_unlock().
+ */
+__visible void
+__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
+{
+ struct pv_node *node;
+
+ if (unlikely(locked != _Q_SLOW_VAL)) {
+ WARN(!debug_locks_silent,
+ "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n",
+ (unsigned long)lock, atomic_read(&lock->val));
+ return;
+ }
+
+ /*
+ * A failed cmpxchg doesn't provide any memory-ordering guarantees,
+ * so we need a barrier to order the read of the node data in
+ * pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
+ *
+ * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL.
+ */
+ smp_rmb();
+
+ /*
+ * Since the above failed to release, this must be the SLOW path.
+ * Therefore start by looking up the blocked node and unhashing it.
+ */
+ node = pv_unhash(lock);
+
+ /*
+ * Now that we have a reference to the (likely) blocked pv_node,
+ * release the lock.
+ */
+ smp_store_release(&lock->locked, 0);
+
+ /*
+ * At this point the memory pointed at by lock can be freed/reused,
+ * however we can still use the pv_node to kick the CPU.
+ * The other vCPU may not really be halted, but kicking an active
+ * vCPU is harmless other than the additional latency in completing
+ * the unlock.
+ */
+ lockevent_inc(pv_kick_unlock);
+ pv_kick(node->cpu);
+}
+
+/*
+ * Include the architecture specific callee-save thunk of the
+ * __pv_queued_spin_unlock(). This thunk is put together with
+ * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
+ * function close to each other sharing consecutive instruction cachelines.
+ * Alternatively, architecture specific version of __pv_queued_spin_unlock()
+ * can be defined.
+ */
+#include <asm/qspinlock_paravirt.h>
+
+#ifndef __pv_queued_spin_unlock
+__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+{
+ u8 locked;
+
+ /*
+ * We must not unlock if SLOW, because in that case we must first
+ * unhash. Otherwise it would be possible to have multiple @lock
+ * entries, which would be BAD.
+ */
+ locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
+ if (likely(locked == _Q_LOCKED_VAL))
+ return;
+
+ __pv_queued_spin_unlock_slowpath(lock, locked);
+}
+#endif /* __pv_queued_spin_unlock */
+
/*
* Return true if when it is time to check the previous node which is not
* in a running state.
@@ -350,48 +501,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
*/
}
-/*
- * Called after setting next->locked = 1 when we're the lock owner.
- *
- * Instead of waking the waiters stuck in pv_wait_node() advance their state
- * such that they're waiting in pv_wait_head_or_lock(), this avoids a
- * wake/sleep cycle.
- */
-static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
-{
- struct pv_node *pn = (struct pv_node *)node;
-
- /*
- * If the vCPU is indeed halted, advance its state to match that of
- * pv_wait_node(). If OTOH this fails, the vCPU was running and will
- * observe its next->locked value and advance itself.
- *
- * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
- *
- * The write to next->locked in arch_mcs_spin_unlock_contended()
- * must be ordered before the read of pn->state in the cmpxchg()
- * below for the code to work correctly. To guarantee full ordering
- * irrespective of the success or failure of the cmpxchg(),
- * a relaxed version with explicit barrier is used. The control
- * dependency will order the reading of pn->state before any
- * subsequent writes.
- */
- smp_mb__before_atomic();
- if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
- != vcpu_halted)
- return;
-
- /*
- * Put the lock into the hash table and set the _Q_SLOW_VAL.
- *
- * As this is the same vCPU that will check the _Q_SLOW_VAL value and
- * the hash table later on at unlock time, no atomic instruction is
- * needed.
- */
- WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
- (void)pv_hash(lock, pn);
-}
-
/*
* Wait for l->locked to become clear and acquire the lock;
* halt the vcpu after a short spin.
@@ -403,16 +512,13 @@ static u32
pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
- struct qspinlock **lp = NULL;
int waitcnt = 0;
int loop;
-
/*
- * If pv_kick_node() already advanced our state, we don't need to
+ * If pv_kick_node() had already advanced our state, we don't need to
* insert ourselves into the hash table anymore.
*/
- if (READ_ONCE(pn->state) == vcpu_hashed)
- lp = (struct qspinlock **)1;
+ bool hashed = (READ_ONCE(pn->state) == vcpu_hashed);
/*
* Tracking # of slowpath locking operations
@@ -439,30 +545,10 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
clear_pending(lock);
- if (!lp) { /* ONCE */
- lp = pv_hash(lock, pn);
-
- /*
- * We must hash before setting _Q_SLOW_VAL, such that
- * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
- * we'll be sure to be able to observe our hash entry.
- *
- * [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL
- * MB RMB
- * [RmW] l->locked = _Q_SLOW_VAL [L] <unhash>
- *
- * Matches the smp_rmb() in __pv_queued_spin_unlock().
- */
- if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) {
- /*
- * The lock was free and now we own the lock.
- * Change the lock value back to _Q_LOCKED_VAL
- * and unhash the table.
- */
- WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
- WRITE_ONCE(*lp, NULL);
+ if (!hashed) { /* ONCE */
+ if (pv_hash_lock(lock, pn))
goto gotlock;
- }
+ hashed = true;
}
WRITE_ONCE(pn->state, vcpu_hashed);
lockevent_inc(pv_wait_head);
@@ -484,79 +570,3 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
gotlock:
return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL);
}
-
-/*
- * PV versions of the unlock fastpath and slowpath functions to be used
- * instead of queued_spin_unlock().
- */
-__visible void
-__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
-{
- struct pv_node *node;
-
- if (unlikely(locked != _Q_SLOW_VAL)) {
- WARN(!debug_locks_silent,
- "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n",
- (unsigned long)lock, atomic_read(&lock->val));
- return;
- }
-
- /*
- * A failed cmpxchg doesn't provide any memory-ordering guarantees,
- * so we need a barrier to order the read of the node data in
- * pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
- *
- * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL.
- */
- smp_rmb();
-
- /*
- * Since the above failed to release, this must be the SLOW path.
- * Therefore start by looking up the blocked node and unhashing it.
- */
- node = pv_unhash(lock);
-
- /*
- * Now that we have a reference to the (likely) blocked pv_node,
- * release the lock.
- */
- smp_store_release(&lock->locked, 0);
-
- /*
- * At this point the memory pointed at by lock can be freed/reused,
- * however we can still use the pv_node to kick the CPU.
- * The other vCPU may not really be halted, but kicking an active
- * vCPU is harmless other than the additional latency in completing
- * the unlock.
- */
- lockevent_inc(pv_kick_unlock);
- pv_kick(node->cpu);
-}
-
-/*
- * Include the architecture specific callee-save thunk of the
- * __pv_queued_spin_unlock(). This thunk is put together with
- * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
- * function close to each other sharing consecutive instruction cachelines.
- * Alternatively, architecture specific version of __pv_queued_spin_unlock()
- * can be defined.
- */
-#include <asm/qspinlock_paravirt.h>
-
-#ifndef __pv_queued_spin_unlock
-__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
-{
- u8 locked;
-
- /*
- * We must not unlock if SLOW, because in that case we must first
- * unhash. Otherwise it would be possible to have multiple @lock
- * entries, which would be BAD.
- */
- locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
- if (likely(locked == _Q_LOCKED_VAL))
- return;
-
- __pv_queued_spin_unlock_slowpath(lock, locked);
-}
-#endif /* __pv_queued_spin_unlock */
--
2.18.1
[-- Attachment #3: 0002-locking-pvqspinlock-Introduce-CONFIG_PARAVIRT_QSPINL.patch --]
[-- Type: text/x-patch, Size: 6166 bytes --]
From 5d7941a498935fb225b2c7a3108cbf590114c3db Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 7 Jul 2020 22:29:16 -0400
Subject: [PATCH 2/9] locking/pvqspinlock: Introduce
CONFIG_PARAVIRT_QSPINLOCKS_LITE
Add a new PARAVIRT_QSPINLOCKS_LITE config option that allows
architectures to use the PV qspinlock code without the need to use or
implement a pv_kick() function, thus eliminating the atomic unlock
overhead. The non-atomic queued_spin_unlock() can be used instead.
The pv_wait() function will still be needed, but it can be a dummy
function.
With that option set, the hybrid PV queued/unfair locking code should
still be able to make it performant enough in a paravirtualized
environment.
Signed-off-by: Waiman Long <longman@redhat.com>
---
kernel/Kconfig.locks | 4 +++
kernel/locking/lock_events_list.h | 3 ++
kernel/locking/qspinlock_paravirt.h | 49 ++++++++++++++++++++++++-----
kernel/locking/qspinlock_stat.h | 5 +--
4 files changed, 52 insertions(+), 9 deletions(-)
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 3de8fd11873b..1824ba8c44a9 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -243,6 +243,10 @@ config QUEUED_SPINLOCKS
def_bool y if ARCH_USE_QUEUED_SPINLOCKS
depends on SMP
+config PARAVIRT_QSPINLOCKS_LITE
+ bool
+ depends on QUEUED_SPINLOCKS && PARAVIRT_SPINLOCKS
+
config BPF_ARCH_SPINLOCK
bool
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index 239039d0ce21..9ae07a7148e8 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -22,11 +22,14 @@
/*
* Locking events for PV qspinlock.
*/
+#ifndef CONFIG_PARAVIRT_QSPINLOCKS_LITE
LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */
LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */
LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */
LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */
LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */
+#endif
+
LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */
LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */
LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 8eec58320b85..2d24563aa9b9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -77,6 +77,23 @@ struct pv_node {
* This hybrid PV queued/unfair lock combines the best attributes of a
* queued lock (no lock starvation) and an unfair lock (good performance
* on not heavily contended locks).
+ *
+ * PV lock lite
+ * ------------
+ *
+ * By default, the PV lock uses two hypervisor specific functions pv_wait()
+ * and pv_kick() to release the vcpu back to the hypervisor and request the
+ * hypervisor to put the given vcpu online again respectively.
+ *
+ * The pv_kick() function is called at unlock time and requires the use of
+ * an atomic instruction to prevent missed wakeup. The unlock overhead of
+ * the PV lock is a major reason why the PV lock is slightly slower than
+ * the native lock. Not all the hypervisors need to really use both
+ * pv_wait() and pv_kick(). The PARAVIRT_QSPINLOCKS_LITE config option
+ * enables a lighter version of PV lock that relies mainly on the hybrid
+ * queued/unfair lock. The pv_wait() function will be used if provided.
+ * The pv_kick() function isn't used to eliminate the unlock overhead and
+ * the non-atomic queued_spin_unlock() can be used.
*/
#define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l)
static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
@@ -153,6 +170,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
}
#endif /* _Q_PENDING_BITS == 8 */
+#ifndef CONFIG_PARAVIRT_QSPINLOCKS_LITE
/*
* Lock and MCS node addresses hash table for fast lookup
*
@@ -410,6 +428,29 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
}
#endif /* __pv_queued_spin_unlock */
+static inline void set_pv_node_running(struct pv_node *pn)
+{
+ /*
+ * If pv_kick_node() changed us to vcpu_hashed, retain that value so
+ * that pv_wait_head_or_lock() will not try to hash this lock.
+ */
+ cmpxchg(&pn->state, vcpu_halted, vcpu_running);
+}
+#else
+static inline bool pv_hash_lock(struct qspinlock *lock, struct pv_node *node)
+{
+ return false;
+}
+
+static inline void pv_kick_node(struct qspinlock *lock,
+ struct mcs_spinlock *node) { }
+
+static inline void set_pv_node_running(struct pv_node *pn)
+{
+ pn->state = vcpu_running;
+}
+#endif /* CONFIG_PARAVIRT_QSPINLOCKS_LITE */
+
/*
* Return true if when it is time to check the previous node which is not
* in a running state.
@@ -475,13 +516,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
lockevent_cond_inc(pv_wait_early, wait_early);
pv_wait(&pn->state, vcpu_halted);
}
-
- /*
- * If pv_kick_node() changed us to vcpu_hashed, retain that
- * value so that pv_wait_head_or_lock() knows to not also try
- * to hash this lock.
- */
- cmpxchg(&pn->state, vcpu_halted, vcpu_running);
+ set_pv_node_running(pn);
/*
* If the locked flag is still not set after wakeup, it is a
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index e625bb410aa2..e9f63240785b 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -7,7 +7,8 @@
#include "lock_events.h"
#ifdef CONFIG_LOCK_EVENT_COUNTS
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && \
+ !defined(CONFIG_PARAVIRT_QSPINLOCKS_LITE)
/*
* Collect pvqspinlock locking event counts
*/
@@ -133,7 +134,7 @@ static inline void __pv_wait(u8 *ptr, u8 val)
#define pv_kick(c) __pv_kick(c)
#define pv_wait(p, v) __pv_wait(p, v)
-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+#endif /* CONFIG_PARAVIRT_SPINLOCKS && !CONFIG_PARAVIRT_QSPINLOCKS_LITE */
#else /* CONFIG_LOCK_EVENT_COUNTS */
--
2.18.1
[-- Attachment #4: 0009-powerpc-pseries-Fixup.patch --]
[-- Type: text/x-patch, Size: 3086 bytes --]
From 655d3a5d48a494a5674cf57454bf3e1f36b6eb83 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 7 Jul 2020 22:29:20 -0400
Subject: [PATCH 9/9] powerpc/pseries: Fixup
Signed-off-by: Waiman Long <longman@redhat.com>
---
arch/powerpc/include/asm/qspinlock.h | 22 ----------------------
arch/powerpc/platforms/pseries/Kconfig | 1 +
arch/powerpc/platforms/pseries/setup.c | 6 +-----
3 files changed, 2 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index b752d34517b3..1fa724d27a2d 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -10,7 +10,6 @@
#ifdef CONFIG_PARAVIRT_SPINLOCKS
extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_queued_spin_unlock(struct qspinlock *lock);
static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
@@ -20,15 +19,6 @@ static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u3
__pv_queued_spin_lock_slowpath(lock, val);
}
-#define queued_spin_unlock queued_spin_unlock
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- if (!is_shared_processor())
- smp_store_release(&lock->locked, 0);
- else
- __pv_queued_spin_unlock(lock);
-}
-
#else
extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
#endif
@@ -72,18 +62,6 @@ static __always_inline void pv_wait(u8 *ptr, u8 val)
*/
}
-static __always_inline void pv_kick(int cpu)
-{
- prod_cpu(cpu);
-}
-
-extern void __pv_init_lock_hash(void);
-
-static inline void pv_spinlocks_init(void)
-{
- __pv_init_lock_hash();
-}
-
#endif
#include <asm-generic/qspinlock.h>
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 756e727b383f..1e3bbe27d664 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -33,6 +33,7 @@ config PPC_SPLPAR
depends on PPC_PSERIES
bool "Support for shared-processor logical partitions"
select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+ select PARAVIRT_QSPINLOCKS_LITE if PPC_QUEUED_SPINLOCKS
help
Enabling this option will make the kernel run more efficiently
on logically-partitioned pSeries systems which use shared
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 747a203d9453..2db8469e475f 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -771,12 +771,8 @@ static void __init pSeries_setup_arch(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
vpa_init(boot_cpuid);
- if (lppaca_shared_proc(get_lppaca())) {
+ if (lppaca_shared_proc(get_lppaca()))
static_branch_enable(&shared_processor);
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
- pv_spinlocks_init();
-#endif
- }
ppc_md.power_save = pseries_lpar_idle;
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
--
2.18.1
^ permalink raw reply related
* Re: [PATCH v2 2/4] powerpc/mm/radix: Free PUD table when freeing pagetable
From: Aneesh Kumar K.V @ 2020-07-08 2:48 UTC (permalink / raw)
To: Michael Ellerman, linuxppc-dev; +Cc: Bharata B Rao
In-Reply-To: <87a70a4uw2.fsf@mpe.ellerman.id.au>
On 7/8/20 7:42 AM, Michael Ellerman wrote:
> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> From: Bharata B Rao <bharata@linux.ibm.com>
>>
>> remove_pagetable() isn't freeing PUD table. This causes memory
>> leak during memory unplug. Fix this.
>
Fixes: 4b5d62ca17a1 ("powerpc/mm: add radix__remove_section_mapping()")
> Fixes: ??
>
> cheers
>
>> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>> arch/powerpc/mm/book3s64/radix_pgtable.c | 16 ++++++++++++++++
>> 1 file changed, 16 insertions(+)
>>
>> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
>> index 80be96d3018f..af57604f295f 100644
>> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
>> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
>> @@ -708,6 +708,21 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
>> pud_clear(pud);
>> }
>>
>> +static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
>> +{
>> + pud_t *pud;
>> + int i;
>> +
>> + for (i = 0; i < PTRS_PER_PUD; i++) {
>> + pud = pud_start + i;
>> + if (!pud_none(*pud))
>> + return;
>> + }
>> +
>> + pud_free(&init_mm, pud_start);
>> + p4d_clear(p4d);
>> +}
>> +
>> struct change_mapping_params {
>> pte_t *pte;
>> unsigned long start;
>> @@ -882,6 +897,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
>>
>> pud_base = (pud_t *)p4d_page_vaddr(*p4d);
>> remove_pud_table(pud_base, addr, next);
>> + free_pud_table(pud_base, p4d);
>> }
>>
>> spin_unlock(&init_mm.page_table_lock);
>> --
>> 2.26.2
^ permalink raw reply
* Re: [PATCH v2 04/10] powerpc/perf: Add power10_feat to dt_cpu_ftrs
From: Athira Rajeev @ 2020-07-08 2:13 UTC (permalink / raw)
To: Michael Neuling; +Cc: maddy, linuxppc-dev
In-Reply-To: <fbc244b88f9291e83b2eabedd73ec9672b1fa12d.camel@neuling.org>
> On 07-Jul-2020, at 11:52 AM, Michael Neuling <mikey@neuling.org> wrote:
>
> On Wed, 2020-07-01 at 05:20 -0400, Athira Rajeev wrote:
>> From: Madhavan Srinivasan <maddy@linux.ibm.com>
>>
>> Add power10 feature function to dt_cpu_ftrs.c along
>> with a power10 specific init() to initialize pmu sprs.
>
> Can you say why you're doing this?
>
> Can you add some text about what you're doing to the BHRB in this patch?
Sure, I will include these information for commit message in the next version
Thanks
Athira
>
> Mikey
>
>>
>> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
>> ---
>> arch/powerpc/include/asm/reg.h | 3 +++
>> arch/powerpc/kernel/cpu_setup_power.S | 7 +++++++
>> arch/powerpc/kernel/dt_cpu_ftrs.c | 26 ++++++++++++++++++++++++++
>> 3 files changed, 36 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
>> index 21a1b2d..900ada1 100644
>> --- a/arch/powerpc/include/asm/reg.h
>> +++ b/arch/powerpc/include/asm/reg.h
>> @@ -1068,6 +1068,9 @@
>> #define MMCR0_PMC2_LOADMISSTIME 0x5
>> #endif
>>
>> +/* BHRB disable bit for PowerISA v3.10 */
>> +#define MMCRA_BHRB_DISABLE 0x0000002000000000
>> +
>> /*
>> * SPRG usage:
>> *
>> diff --git a/arch/powerpc/kernel/cpu_setup_power.S
>> b/arch/powerpc/kernel/cpu_setup_power.S
>> index efdcfa7..e8b3370c 100644
>> --- a/arch/powerpc/kernel/cpu_setup_power.S
>> +++ b/arch/powerpc/kernel/cpu_setup_power.S
>> @@ -233,3 +233,10 @@ __init_PMU_ISA207:
>> li r5,0
>> mtspr SPRN_MMCRS,r5
>> blr
>> +
>> +__init_PMU_ISA31:
>> + li r5,0
>> + mtspr SPRN_MMCR3,r5
>> + LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE)
>> + mtspr SPRN_MMCRA,r5
>> + blr
>> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c
>> b/arch/powerpc/kernel/dt_cpu_ftrs.c
>> index a0edeb3..14a513f 100644
>> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
>> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
>> @@ -449,6 +449,31 @@ static int __init feat_enable_pmu_power9(struct
>> dt_cpu_feature *f)
>> return 1;
>> }
>>
>> +static void init_pmu_power10(void)
>> +{
>> + init_pmu_power9();
>> +
>> + mtspr(SPRN_MMCR3, 0);
>> + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>> +}
>> +
>> +static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
>> +{
>> + hfscr_pmu_enable();
>> +
>> + init_pmu_power10();
>> + init_pmu_registers = init_pmu_power10;
>> +
>> + cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
>> + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
>> +
>> + cur_cpu_spec->num_pmcs = 6;
>> + cur_cpu_spec->pmc_type = PPC_PMC_IBM;
>> + cur_cpu_spec->oprofile_cpu_type = "ppc64/power10";
>> +
>> + return 1;
>> +}
>> +
>> static int __init feat_enable_tm(struct dt_cpu_feature *f)
>> {
>> #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>> @@ -638,6 +663,7 @@ struct dt_cpu_feature_match {
>> {"pc-relative-addressing", feat_enable, 0},
>> {"machine-check-power9", feat_enable_mce_power9, 0},
>> {"performance-monitor-power9", feat_enable_pmu_power9, 0},
>> + {"performance-monitor-power10", feat_enable_pmu_power10, 0},
>> {"event-based-branch-v3", feat_enable, 0},
>> {"random-number-generator", feat_enable, 0},
>> {"system-call-vectored", feat_disable, 0},
>
^ permalink raw reply
* Re: [PATCH v2 2/4] powerpc/mm/radix: Free PUD table when freeing pagetable
From: Michael Ellerman @ 2020-07-08 2:12 UTC (permalink / raw)
To: Aneesh Kumar K.V, linuxppc-dev; +Cc: Aneesh Kumar K . V, Bharata B Rao
In-Reply-To: <20200625064547.228448-3-aneesh.kumar@linux.ibm.com>
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> From: Bharata B Rao <bharata@linux.ibm.com>
>
> remove_pagetable() isn't freeing PUD table. This causes memory
> leak during memory unplug. Fix this.
Fixes: ??
cheers
> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> arch/powerpc/mm/book3s64/radix_pgtable.c | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 80be96d3018f..af57604f295f 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -708,6 +708,21 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
> pud_clear(pud);
> }
>
> +static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
> +{
> + pud_t *pud;
> + int i;
> +
> + for (i = 0; i < PTRS_PER_PUD; i++) {
> + pud = pud_start + i;
> + if (!pud_none(*pud))
> + return;
> + }
> +
> + pud_free(&init_mm, pud_start);
> + p4d_clear(p4d);
> +}
> +
> struct change_mapping_params {
> pte_t *pte;
> unsigned long start;
> @@ -882,6 +897,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
>
> pud_base = (pud_t *)p4d_page_vaddr(*p4d);
> remove_pud_table(pud_base, addr, next);
> + free_pud_table(pud_base, p4d);
> }
>
> spin_unlock(&init_mm.page_table_lock);
> --
> 2.26.2
^ permalink raw reply
* Re: [PATCH v2 10/10] powerpc/perf: Add extended regs support for power10 platform
From: Athira Rajeev @ 2020-07-08 1:53 UTC (permalink / raw)
To: kernel test robot; +Cc: mikey, maddy, linuxppc-dev, kbuild-all
In-Reply-To: <202007021706.LsuCoheg%lkp@intel.com>
> On 02-Jul-2020, at 3:10 PM, kernel test robot <lkp@intel.com> wrote:
>
> Hi Athira,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on powerpc/next]
> [also build test ERROR on tip/perf/core v5.8-rc3 next-20200702]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url: https://github.com/0day-ci/linux/commits/Athira-Rajeev/powerpc-perf-Add-support-for-power10-PMU-Hardware/20200701-181147
> base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
> config: powerpc-pmac32_defconfig (attached as .config)
> compiler: powerpc-linux-gcc (GCC) 9.3.0
> reproduce (this is a W=1 build):
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
> {standard input}: Assembler messages:
>>> {standard input}:84: Error: unsupported relocation against SPRN_SIER2
>>> {standard input}:91: Error: unsupported relocation against SPRN_SIER3
>>> {standard input}:119: Error: unsupported relocation against SPRN_MMCR3
These regs are not valid for ppc32 platform. Will fix this by including usage of these regs under conditional check for
“CONFIG_PPC64” in next version
Thanks
Athira Rajeev
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
> <.config.gz>
^ permalink raw reply
* Re: Failure to build librseq on ppc
From: Segher Boessenkool @ 2020-07-08 0:59 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: Boqun Feng, linuxppc-dev, Michael Jeanson
In-Reply-To: <972420887.755.1594149430308.JavaMail.zimbra@efficios.com>
Hi!
On Tue, Jul 07, 2020 at 03:17:10PM -0400, Mathieu Desnoyers wrote:
> I'm trying to build librseq at:
>
> https://git.kernel.org/pub/scm/libs/librseq/librseq.git
>
> on powerpc, and I get these errors when building the rseq basic
> test mirrored from the kernel selftests code:
>
> /tmp/ccieEWxU.s: Assembler messages:
> /tmp/ccieEWxU.s:118: Error: syntax error; found `,', expected `('
> /tmp/ccieEWxU.s:118: Error: junk at end of line: `,8'
> /tmp/ccieEWxU.s:121: Error: syntax error; found `,', expected `('
> /tmp/ccieEWxU.s:121: Error: junk at end of line: `,8'
> /tmp/ccieEWxU.s:626: Error: syntax error; found `,', expected `('
> /tmp/ccieEWxU.s:626: Error: junk at end of line: `,8'
> /tmp/ccieEWxU.s:629: Error: syntax error; found `,', expected `('
> /tmp/ccieEWxU.s:629: Error: junk at end of line: `,8'
> /tmp/ccieEWxU.s:735: Error: syntax error; found `,', expected `('
> /tmp/ccieEWxU.s:735: Error: junk at end of line: `,8'
> /tmp/ccieEWxU.s:738: Error: syntax error; found `,', expected `('
> /tmp/ccieEWxU.s:738: Error: junk at end of line: `,8'
> /tmp/ccieEWxU.s:741: Error: syntax error; found `,', expected `('
> /tmp/ccieEWxU.s:741: Error: junk at end of line: `,8'
> Makefile:581: recipe for target 'basic_percpu_ops_test.o' failed
You'll have to show the actual failing machine code, and with enough
context that we can relate this to the source code.
-save-temps helps, or use -S instead of -c, etc.
> I am using this compiler:
>
> gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.12)
> Target: powerpc-linux-gnu
>
> So far, I got things to build by changing "m" operands to "Q" operands.
> Based on https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
> it seems that "Q" means "A memory operand addressed by just a base register."
Yup.
> I suspect that lwz and stw don't expect some kind of immediate offset which
> can be kept with "m", and "Q" fixes this. Is that the right fix ?
>
> And should we change all operands passed to lwz and stw to a "Q" operand ?
No, lwz and stw exactly *do* take an immediate offset.
It sounds like the compiler passed memory addressed by indexed
addressing, instead. Which is fine for "m", and also fine for those
insns... well, you need lwzx and stwx.
So perhaps you have code like
int *p;
int x;
...
asm ("lwz %0,%1" : "=r"(x) : "m"(*p));
where that last line should actually read
asm ("lwz%X1 %0,%1" : "=r"(x) : "m"(*p));
?
Segher
^ permalink raw reply
* [PATCH 2/2] PCI/AER: Log correctable errors as warning, not error
From: Bjorn Helgaas @ 2020-07-08 0:14 UTC (permalink / raw)
To: Matt Jolly
Cc: Sam Bobroff, linux-pci, linux-kernel, Oliver O'Halloran,
Bjorn Helgaas, linuxppc-dev
In-Reply-To: <20200708001401.405749-1-helgaas@kernel.org>
From: Matt Jolly <Kangie@footclan.ninja>
PCIe correctable errors are recovered by hardware with no need for software
intervention (PCIe r5.0, sec 6.2.2.1).
Reduce the log level of correctable errors from KERN_ERR to KERN_WARNING.
The bug reports below are for correctable error logging. This doesn't fix
the cause of those reports, but it may make the messages less alarming.
[bhelgaas: commit log, use pci_printk() to avoid code duplication]
Link: https://bugzilla.kernel.org/show_bug.cgi?id=201517
Link: https://bugzilla.kernel.org/show_bug.cgi?id=196183
Link: https://lore.kernel.org/r/20200618155511.16009-1-Kangie@footclan.ninja
Signed-off-by: Matt Jolly <Kangie@footclan.ninja>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
drivers/pci/pcie/aer.c | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 9176c8a968b9..ca886bf91fd9 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -673,20 +673,23 @@ static void __aer_print_error(struct pci_dev *dev,
{
const char **strings;
unsigned long status = info->status & ~info->mask;
- const char *errmsg;
+ const char *level, *errmsg;
int i;
- if (info->severity == AER_CORRECTABLE)
+ if (info->severity == AER_CORRECTABLE) {
strings = aer_correctable_error_string;
- else
+ level = KERN_WARNING;
+ } else {
strings = aer_uncorrectable_error_string;
+ level = KERN_ERR;
+ }
for_each_set_bit(i, &status, 32) {
errmsg = strings[i];
if (!errmsg)
errmsg = "Unknown Error Bit";
- pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
+ pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
}
pci_dev_aer_stats_incr(dev, info);
@@ -696,6 +699,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
int layer, agent;
int id = ((dev->bus->number << 8) | dev->devfn);
+ const char *level;
if (!info->status) {
pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
@@ -706,13 +710,14 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
- pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
- aer_error_severity_string[info->severity],
- aer_error_layer[layer], aer_agent_string[agent]);
+ level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
+
+ pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
+ aer_error_severity_string[info->severity],
+ aer_error_layer[layer], aer_agent_string[agent]);
- pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
- dev->vendor, dev->device,
- info->status, info->mask);
+ pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
+ dev->vendor, dev->device, info->status, info->mask);
__aer_print_error(dev, info);
--
2.25.1
^ permalink raw reply related
* [PATCH 1/2] PCI/AER: Simplify __aer_print_error()
From: Bjorn Helgaas @ 2020-07-08 0:14 UTC (permalink / raw)
To: Matt Jolly
Cc: Sam Bobroff, linux-pci, linux-kernel, Oliver O'Halloran,
Bjorn Helgaas, linuxppc-dev
In-Reply-To: <20200618155511.16009-1-Kangie@footclan.ninja>
From: Bjorn Helgaas <bhelgaas@google.com>
aer_correctable_error_string[] and aer_uncorrectable_error_string[] have
descriptions of AER error status bits. Add NULL entries to these tables so
all entries for bits 0-31 are defined. Then we don't have to check for
ARRAY_SIZE() when decoding a status word, which simplifies
__aer_print_error().
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
drivers/pci/pcie/aer.c | 48 ++++++++++++++++++++++++++++++------------
1 file changed, 34 insertions(+), 14 deletions(-)
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 3acf56683915..9176c8a968b9 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -447,7 +447,7 @@ static const char *aer_error_layer[] = {
"Transaction Layer"
};
-static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
+static const char *aer_correctable_error_string[] = {
"RxErr", /* Bit Position 0 */
NULL,
NULL,
@@ -464,9 +464,25 @@ static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
"NonFatalErr", /* Bit Position 13 */
"CorrIntErr", /* Bit Position 14 */
"HeaderOF", /* Bit Position 15 */
+ NULL, /* Bit Position 16 */
+ NULL, /* Bit Position 17 */
+ NULL, /* Bit Position 18 */
+ NULL, /* Bit Position 19 */
+ NULL, /* Bit Position 20 */
+ NULL, /* Bit Position 21 */
+ NULL, /* Bit Position 22 */
+ NULL, /* Bit Position 23 */
+ NULL, /* Bit Position 24 */
+ NULL, /* Bit Position 25 */
+ NULL, /* Bit Position 26 */
+ NULL, /* Bit Position 27 */
+ NULL, /* Bit Position 28 */
+ NULL, /* Bit Position 29 */
+ NULL, /* Bit Position 30 */
+ NULL, /* Bit Position 31 */
};
-static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
+static const char *aer_uncorrectable_error_string[] = {
"Undefined", /* Bit Position 0 */
NULL,
NULL,
@@ -494,6 +510,11 @@ static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
"AtomicOpBlocked", /* Bit Position 24 */
"TLPBlockedErr", /* Bit Position 25 */
"PoisonTLPBlocked", /* Bit Position 26 */
+ NULL, /* Bit Position 27 */
+ NULL, /* Bit Position 28 */
+ NULL, /* Bit Position 29 */
+ NULL, /* Bit Position 30 */
+ NULL, /* Bit Position 31 */
};
static const char *aer_agent_string[] = {
@@ -650,24 +671,23 @@ static void __print_tlp_header(struct pci_dev *dev,
static void __aer_print_error(struct pci_dev *dev,
struct aer_err_info *info)
{
+ const char **strings;
unsigned long status = info->status & ~info->mask;
- const char *errmsg = NULL;
+ const char *errmsg;
int i;
+ if (info->severity == AER_CORRECTABLE)
+ strings = aer_correctable_error_string;
+ else
+ strings = aer_uncorrectable_error_string;
+
for_each_set_bit(i, &status, 32) {
- if (info->severity == AER_CORRECTABLE)
- errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
- aer_correctable_error_string[i] : NULL;
- else
- errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
- aer_uncorrectable_error_string[i] : NULL;
+ errmsg = strings[i];
+ if (!errmsg)
+ errmsg = "Unknown Error Bit";
- if (errmsg)
- pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
+ pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
- else
- pci_err(dev, " [%2d] Unknown Error Bit%s\n",
- i, info->first_error == i ? " (First)" : "");
}
pci_dev_aer_stats_incr(dev, info);
}
--
2.25.1
^ permalink raw reply related
* Re: [PATCH] pci: pcie: AER: Fix logging of Correctable errors
From: Bjorn Helgaas @ 2020-07-08 0:10 UTC (permalink / raw)
To: Matt Jolly
Cc: Sam Bobroff, linux-pci, linux-kernel, Oliver O'Halloran,
Bjorn Helgaas, linuxppc-dev
In-Reply-To: <20200618155511.16009-1-Kangie@footclan.ninja>
On Fri, Jun 19, 2020 at 01:55:11AM +1000, Matt Jolly wrote:
> The AER documentation indicates that correctable (severity=Corrected)
> errors should be output as a warning so that users can filter these
> errors if they choose to; This functionality does not appear to have been implemented.
>
> This patch modifies the functions aer_print_error and __aer_print_error
> to send correctable errors as a warning (pci_warn), rather than as an error (pci_err). It
> partially addresses several bugs in relation to kernel message buffer
> spam for misbehaving devices - the root cause (possibly device firmware?) isn't
> addressed, but the dmesg output is less alarming for end users, and can
> be filtered separately from uncorrectable errors. This should hopefully
> reduce the need for users to disable AER to suppress corrected errors.
>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=201517
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=196183
>
> Signed-off-by: Matt Jolly <Kangie@footclan.ninja>
> ---
> drivers/pci/pcie/aer.c | 36 ++++++++++++++++++++++++++----------
> 1 file changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 3acf56683915..131ecc0df2cb 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -662,12 +662,18 @@ static void __aer_print_error(struct pci_dev *dev,
> errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
> aer_uncorrectable_error_string[i] : NULL;
>
> - if (errmsg)
> - pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
> - info->first_error == i ? " (First)" : "");
> - else
> + if (errmsg) {
> + if (info->severity == AER_CORRECTABLE) {
> + pci_warn(dev, " [%2d] %-22s%s\n", i, errmsg,
I think we can use pci_printk() here to reduce the code duplication.
And I think we can also simplify the aer_correctable_error_string/
aer_uncorrectable_error_string stuff above, which would make this even
simpler.
I'll respond to this with my proposal.
> + info->first_error == i ? " (First)" : "");
> + } else {
> + pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
> + info->first_error == i ? " (First)" : "");
> + }
> + } else {
> pci_err(dev, " [%2d] Unknown Error Bit%s\n",
> i, info->first_error == i ? " (First)" : "");
> + }
> }
> pci_dev_aer_stats_incr(dev, info);
> }
> @@ -686,13 +692,23 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
> layer = AER_GET_LAYER_ERROR(info->severity, info->status);
> agent = AER_GET_AGENT(info->severity, info->status);
>
> - pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
> - aer_error_severity_string[info->severity],
> - aer_error_layer[layer], aer_agent_string[agent]);
> + if (info->severity == AER_CORRECTABLE) {
> + pci_warn(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
> + aer_error_severity_string[info->severity],
> + aer_error_layer[layer], aer_agent_string[agent]);
>
> - pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
> - dev->vendor, dev->device,
> - info->status, info->mask);
> + pci_warn(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
> + dev->vendor, dev->device,
> + info->status, info->mask);
> + } else {
> + pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
> + aer_error_severity_string[info->severity],
> + aer_error_layer[layer], aer_agent_string[agent]);
> +
> + pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
> + dev->vendor, dev->device,
> + info->status, info->mask);
> + }
>
> __aer_print_error(dev, info);
>
> --
> 2.26.2
>
^ permalink raw reply
* Re: [PATCH 13/20] Documentation: mips/ingenic-tcu: eliminate duplicated word
From: Paul Cercueil @ 2020-07-07 18:26 UTC (permalink / raw)
To: Randy Dunlap
Cc: kvm, linux-doc, David Airlie, kgdb-bugreport, linux-fpga,
Liviu Dudau, dri-devel, Douglas Anderson, keyrings,
Paul Mackerras, linux-i2c, Pavel Machek, Srinivas Pandruvada,
Mihail Atanassov, linux-leds, linux-s390, Daniel Thompson,
linux-scsi, Jonathan Corbet, Masahiro Yamada, Matthew Wilcox,
Halil Pasic, Jarkko Sakkinen, James Wang, linux-input,
Mali DP Maintainers, Derek Kiernan, linux-mips, Dragan Cvetic,
Wu Hao, Tony Krowiak, linux-kbuild, James E.J. Bottomley,
Jiri Kosina, Hannes Reinecke, linux-block, Thomas Bogendoerfer,
Jacek Anaszewski, linux-mm, Dan Williams, Andrew Morton,
Mimi Zohar, Jens Axboe, Michal Marek, Martin K. Petersen,
Pierre Morel, linux-kernel, Wolfram Sang, Daniel Vetter,
Jason Wessel, Paolo Bonzini, linux-integrity, linuxppc-dev,
Mike Rapoport, Dan Murphy
In-Reply-To: <20200707180414.10467-14-rdunlap@infradead.org>
Hi,
Le mar. 7 juil. 2020 à 11:04, Randy Dunlap <rdunlap@infradead.org> a
écrit :
> Drop the doubled word "to".
>
> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: linux-doc@vger.kernel.org
> Cc: Paul Cercueil <paul@crapouillou.net>
> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
> Cc: linux-mips@vger.kernel.org
Reviewed-by: Paul Cercueil <paul@crapouillou.net>
Cheers,
-Paul
> ---
> Documentation/mips/ingenic-tcu.rst | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> --- linux-next-20200701.orig/Documentation/mips/ingenic-tcu.rst
> +++ linux-next-20200701/Documentation/mips/ingenic-tcu.rst
> @@ -5,7 +5,7 @@ Ingenic JZ47xx SoCs Timer/Counter Unit h
> ===============================================
>
> The Timer/Counter Unit (TCU) in Ingenic JZ47xx SoCs is a
> multi-function
> -hardware block. It features up to to eight channels, that can be
> used as
> +hardware block. It features up to eight channels, that can be used as
> counters, timers, or PWM.
>
> - JZ4725B, JZ4750, JZ4755 only have six TCU channels. The other SoCs
> all
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox