* Re: [PATCH] ftrace: Cleanup ftrace_dyn_arch_init()
From: Weizhao Ouyang @ 2021-09-04 11:59 UTC (permalink / raw)
To: rostedt, mingo
Cc: dalias, linux-ia64, linux-sh, linux-mips, James.Bottomley, guoren,
hpa, sparclinux, linux-riscv, deanbo422, will, linux-s390, ysato,
deller, x86, linux, linux-csky, borntraeger, catalin.marinas, aou,
gor, hca, bp, green.hu, paul.walmsley, tglx, linux-arm-kernel,
monstr, tsbogend, linux-parisc, nickhu, linux-kernel, palmer,
paulus, linuxppc-dev, davem
In-Reply-To: <20210903071817.1162938-1-o451686892@gmail.com>
On 2021/9/3 15:18, Weizhao Ouyang wrote:
> Most ARCHs use empty ftrace_dyn_arch_init(), introduce a weak common
> ftrace_dyn_arch_init() to cleanup them.
>
> Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
> ---
> arch/arm/kernel/ftrace.c | 5 -----
> arch/arm64/kernel/ftrace.c | 5 -----
> arch/csky/kernel/ftrace.c | 5 -----
> arch/ia64/kernel/ftrace.c | 6 ------
> arch/microblaze/kernel/ftrace.c | 5 -----
> arch/mips/include/asm/ftrace.h | 2 ++
> arch/nds32/kernel/ftrace.c | 5 -----
> arch/parisc/kernel/ftrace.c | 5 -----
> arch/powerpc/include/asm/ftrace.h | 4 ++++
> arch/riscv/kernel/ftrace.c | 5 -----
> arch/s390/kernel/ftrace.c | 5 -----
> arch/sh/kernel/ftrace.c | 5 -----
> arch/sparc/kernel/ftrace.c | 5 -----
> arch/x86/kernel/ftrace.c | 5 -----
> include/linux/ftrace.h | 1 -
> kernel/trace/ftrace.c | 5 +++++
> 16 files changed, 11 insertions(+), 62 deletions(-)
>
> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
> index 3c83b5d29697..a006585e1c09 100644
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -193,11 +193,6 @@ int ftrace_make_nop(struct module *mod,
>
> return ret;
> }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> #endif /* CONFIG_DYNAMIC_FTRACE */
>
> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
> index 7f467bd9db7a..fc62dfe73f93 100644
> --- a/arch/arm64/kernel/ftrace.c
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -236,11 +236,6 @@ void arch_ftrace_update_code(int command)
> command |= FTRACE_MAY_SLEEP;
> ftrace_modify_all_code(command);
> }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> #endif /* CONFIG_DYNAMIC_FTRACE */
>
> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c
> index b4a7ec1517ff..50bfcf129078 100644
> --- a/arch/csky/kernel/ftrace.c
> +++ b/arch/csky/kernel/ftrace.c
> @@ -133,11 +133,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
> (unsigned long)func, true, true);
> return ret;
> }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> #endif /* CONFIG_DYNAMIC_FTRACE */
>
> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
> index b2ab2d58fb30..d6360fd404ab 100644
> --- a/arch/ia64/kernel/ftrace.c
> +++ b/arch/ia64/kernel/ftrace.c
> @@ -194,9 +194,3 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
> flush_icache_range(addr, addr + 16);
> return 0;
> }
> -
> -/* run from kstop_machine */
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
> index 224eea40e1ee..188749d62709 100644
> --- a/arch/microblaze/kernel/ftrace.c
> +++ b/arch/microblaze/kernel/ftrace.c
> @@ -163,11 +163,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> return ret;
> }
>
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> -
> int ftrace_update_ftrace_func(ftrace_func_t func)
> {
> unsigned long ip = (unsigned long)(&ftrace_call);
> diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
> index b463f2aa5a61..ed013e767390 100644
> --- a/arch/mips/include/asm/ftrace.h
> +++ b/arch/mips/include/asm/ftrace.h
> @@ -76,6 +76,8 @@ do { \
>
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> +int __init ftrace_dyn_arch_init(void);
> +
> static inline unsigned long ftrace_call_adjust(unsigned long addr)
> {
> return addr;
> diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
> index 0e23e3a8df6b..f0ef4842d191 100644
> --- a/arch/nds32/kernel/ftrace.c
> +++ b/arch/nds32/kernel/ftrace.c
> @@ -84,11 +84,6 @@ void _ftrace_caller(unsigned long parent_ip)
> /* restore all state needed by the compiler epilogue */
> }
>
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> -
> static unsigned long gen_sethi_insn(unsigned long addr)
> {
> unsigned long opcode = 0x46000000;
> diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
> index 0a1e75af5382..01581f715737 100644
> --- a/arch/parisc/kernel/ftrace.c
> +++ b/arch/parisc/kernel/ftrace.c
> @@ -94,11 +94,6 @@ int ftrace_disable_ftrace_graph_caller(void)
> #endif
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> int ftrace_update_ftrace_func(ftrace_func_t func)
> {
> return 0;
> diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
> index debe8c4f7062..4db83cf4283f 100644
> --- a/arch/powerpc/include/asm/ftrace.h
> +++ b/arch/powerpc/include/asm/ftrace.h
> @@ -61,6 +61,10 @@ struct dyn_arch_ftrace {
> };
> #endif /* __ASSEMBLY__ */
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +int __init ftrace_dyn_arch_init(void);
> +#endif
> +
Sorry there is a mistake CONFIG, I will send a v2 patch later.
> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> #define ARCH_SUPPORTS_FTRACE_OPS 1
> #endif
> diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
> index 7f1e5203de88..4716f4cdc038 100644
> --- a/arch/riscv/kernel/ftrace.c
> +++ b/arch/riscv/kernel/ftrace.c
> @@ -154,11 +154,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>
> return ret;
> }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> #endif
>
> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
> index 0a464d328467..3fd80397ff52 100644
> --- a/arch/s390/kernel/ftrace.c
> +++ b/arch/s390/kernel/ftrace.c
> @@ -262,11 +262,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
> return 0;
> }
>
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> -
> void arch_ftrace_update_code(int command)
> {
> if (ftrace_shared_hotpatch_trampoline(NULL))
> diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
> index 295c43315bbe..930001bb8c6a 100644
> --- a/arch/sh/kernel/ftrace.c
> +++ b/arch/sh/kernel/ftrace.c
> @@ -252,11 +252,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>
> return ftrace_modify_code(rec->ip, old, new);
> }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> #endif /* CONFIG_DYNAMIC_FTRACE */
>
> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
> index 684b84ce397f..eaead3da8e03 100644
> --- a/arch/sparc/kernel/ftrace.c
> +++ b/arch/sparc/kernel/ftrace.c
> @@ -82,11 +82,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
> new = ftrace_call_replace(ip, (unsigned long)func);
> return ftrace_modify_code(ip, old, new);
> }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> #endif
>
> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 1b3ce3b4a2a2..23d221a9a3cd 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -252,11 +252,6 @@ void arch_ftrace_update_code(int command)
> ftrace_modify_all_code(command);
> }
>
> -int __init ftrace_dyn_arch_init(void)
> -{
> - return 0;
> -}
> -
> /* Currently only x86_64 supports dynamic trampolines */
> #ifdef CONFIG_X86_64
>
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 832e65f06754..f1eca123d89d 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -573,7 +573,6 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
>
> /* defined in arch */
> extern int ftrace_ip_converted(unsigned long ip);
> -extern int ftrace_dyn_arch_init(void);
> extern void ftrace_replace_code(int enable);
> extern int ftrace_update_ftrace_func(ftrace_func_t func);
> extern void ftrace_caller(void);
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 7efbc8aaf7f6..4c090323198d 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -6846,6 +6846,11 @@ void __init ftrace_free_init_mem(void)
> ftrace_free_mem(NULL, start, end);
> }
>
> +int __init __weak ftrace_dyn_arch_init(void)
> +{
> + return 0;
> +}
> +
> void __init ftrace_init(void)
> {
> extern unsigned long __start_mcount_loc[];
Thanks.
^ permalink raw reply
* [PATCH kernel v2] KVM: PPC: Merge powerpc's debugfs entry content into generic entry
From: Alexey Kardashevskiy @ 2021-09-04 13:35 UTC (permalink / raw)
To: linuxppc-dev
Cc: kvm, Fabiano Rosas, Alexey Kardashevskiy, kvm-ppc, Paolo Bonzini
At the moment the generic KVM code creates an "%pid-%fd" entry per a KVM
instance; and the PPC HV KVM creates its own at "vm%pid". The Book3E KVM
creates its own entry for timings.
The problems with the PPC entries are:
1. they do not allow multiple VMs in the same process (which is extremely
rare case mostly used by syzkaller fuzzer);
2. prone to race bugs like the generic KVM code had fixed in
commit 85cd39af14f4 ("KVM: Do not leak memory for duplicate debugfs
directories").
This defines kvm_arch_create_kvm_debugfs() similar to one for vcpus.
This defines 2 hooks in kvmppc_ops for allowing specific KVM
implementations to add necessary entries. This defines handlers
for HV KVM and defines the Book3E debugfs vcpu helper as a handler.
This makes use of already existing kvm_arch_create_vcpu_debugfs
on PPC.
This removes no more used debugfs_dir pointers from PPC kvm_arch structs.
Suggested-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v2:
* handled powerpc-booke
* s/kvm/vm/ in arch hooks
---
arch/powerpc/include/asm/kvm_host.h | 7 +++---
arch/powerpc/include/asm/kvm_ppc.h | 2 ++
arch/powerpc/kvm/timing.h | 7 +++---
include/linux/kvm_host.h | 3 +++
arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +-
arch/powerpc/kvm/book3s_hv.c | 30 +++++++++-----------------
arch/powerpc/kvm/e500.c | 1 +
arch/powerpc/kvm/e500mc.c | 1 +
arch/powerpc/kvm/powerpc.c | 15 ++++++++++---
arch/powerpc/kvm/timing.c | 20 ++++-------------
virt/kvm/kvm_main.c | 3 +++
12 files changed, 44 insertions(+), 49 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 2bcac6da0a4b..f29b66cc2163 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -296,7 +296,6 @@ struct kvm_arch {
bool dawr1_enabled;
pgd_t *pgtable;
u64 process_table;
- struct dentry *debugfs_dir;
struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -672,7 +671,6 @@ struct kvm_vcpu_arch {
u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_last_exit;
- struct dentry *debugfs_exit_timing;
#endif
#ifdef CONFIG_PPC_BOOK3S
@@ -828,8 +826,6 @@ struct kvm_vcpu_arch {
struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */
struct kvmhv_tb_accumulator guest_time; /* guest execution */
struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */
-
- struct dentry *debugfs_dir;
#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
};
@@ -868,4 +864,7 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+#define __KVM_HAVE_ARCH_KVM_DEBUGFS
+
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 6355a6980ccf..fd841e844b90 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -316,6 +316,8 @@ struct kvmppc_ops {
int (*svm_off)(struct kvm *kvm);
int (*enable_dawr1)(struct kvm *kvm);
bool (*hash_v3_possible)(void);
+ void (*create_vm_debugfs)(struct kvm *kvm);
+ void (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index feef7885ba82..36f7c201c6f1 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -14,8 +14,8 @@
#ifdef CONFIG_KVM_EXIT_TIMING
void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+ struct dentry *debugfs_dentry);
static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
{
@@ -27,8 +27,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
- unsigned int id) {}
-static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+ struct dentry *debugfs_dentry) {}
static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
#endif /* CONFIG_KVM_EXIT_TIMING */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ae7735b490b4..4f22b1201a0d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1021,6 +1021,9 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state);
#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
#endif
+#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
+void kvm_arch_create_vm_debugfs(struct kvm *kvm);
+#endif
int kvm_arch_hardware_enable(void);
void kvm_arch_hardware_disable(void);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c63e263312a4..33dae253a0ac 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -2112,7 +2112,7 @@ static const struct file_operations debugfs_htab_fops = {
void kvmppc_mmu_debugfs_init(struct kvm *kvm)
{
- debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm,
+ debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
&debugfs_htab_fops);
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index c5508744e14c..f4e083c20872 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1452,7 +1452,7 @@ static const struct file_operations debugfs_radix_fops = {
void kvmhv_radix_debugfs_init(struct kvm *kvm)
{
- debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm,
+ debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
&debugfs_radix_fops);
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c8f12b056968..046df9e0d462 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2771,19 +2771,14 @@ static const struct file_operations debugfs_timings_ops = {
};
/* Create a debugfs directory for the vcpu */
-static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
{
- char buf[16];
- struct kvm *kvm = vcpu->kvm;
-
- snprintf(buf, sizeof(buf), "vcpu%u", id);
- vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
- debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
+ debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
&debugfs_timings_ops);
}
#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
-static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
{
}
#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
@@ -2907,8 +2902,6 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
- debugfs_vcpu_init(vcpu, id);
-
return 0;
}
@@ -5186,7 +5179,6 @@ void kvmppc_free_host_rm_ops(void)
static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned long lpcr, lpid;
- char buf[32];
int ret;
mutex_init(&kvm->arch.uvmem_lock);
@@ -5319,16 +5311,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm->arch.smt_mode = 1;
kvm->arch.emul_smt_mode = 1;
- /*
- * Create a debugfs directory for the VM
- */
- snprintf(buf, sizeof(buf), "vm%d", current->pid);
- kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
+ return 0;
+}
+
+static void kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm)
+{
kvmppc_mmu_debugfs_init(kvm);
if (radix_enabled())
kvmhv_radix_debugfs_init(kvm);
-
- return 0;
}
static void kvmppc_free_vcores(struct kvm *kvm)
@@ -5342,8 +5332,6 @@ static void kvmppc_free_vcores(struct kvm *kvm)
static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
- debugfs_remove_recursive(kvm->arch.debugfs_dir);
-
if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_deactivated();
@@ -5996,6 +5984,8 @@ static struct kvmppc_ops kvm_ops_hv = {
.svm_off = kvmhv_svm_off,
.enable_dawr1 = kvmhv_enable_dawr1,
.hash_v3_possible = kvmppc_hash_v3_possible,
+ .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
+ .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
};
static int kvm_init_subcore_bitmap(void)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 7e8b69015d20..d82e70c3e0a9 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = {
.emulate_op = kvmppc_core_emulate_op_e500,
.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+ .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
};
static int __init kvmppc_e500_init(void)
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 1c189b5aadcc..45eacd949f4b 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = {
.emulate_op = kvmppc_core_emulate_op_e500,
.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+ .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
};
static int __init kvmppc_e500mc_init(void)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c248d6d8b9e3..c895521ac6e9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -763,7 +763,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto out_vcpu_uninit;
vcpu->arch.waitp = &vcpu->wait;
- kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
return 0;
out_vcpu_uninit:
@@ -780,8 +779,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
/* Make sure we're not using the vcpu anymore */
hrtimer_cancel(&vcpu->arch.dec_timer);
- kvmppc_remove_vcpu_debugfs(vcpu);
-
switch (vcpu->arch.irq_type) {
case KVMPPC_IRQ_MPIC:
kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
@@ -2505,3 +2502,15 @@ int kvm_arch_init(void *opaque)
}
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
+
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
+{
+ if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs)
+ vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry);
+}
+
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+ if (kvm->arch.kvm_ops->create_vm_debugfs)
+ kvm->arch.kvm_ops->create_vm_debugfs(kvm);
+}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index ba56a5cbba97..e1c17afc714d 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -204,21 +204,9 @@ static const struct file_operations kvmppc_exit_timing_fops = {
.release = single_release,
};
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+ struct dentry *debugfs_dentry)
{
- static char dbg_fname[50];
- struct dentry *debugfs_file;
-
- snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
- current->pid, id);
- debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir,
- vcpu, &kvmppc_exit_timing_fops);
-
- vcpu->arch.debugfs_exit_timing = debugfs_file;
-}
-
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
- debugfs_remove(vcpu->arch.debugfs_exit_timing);
- vcpu->arch.debugfs_exit_timing = NULL;
+ debugfs_create_file("timing", 0666, debugfs_dentry,
+ vcpu, &kvmppc_exit_timing_fops);
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b50dbe269f4b..85b2550e18e7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -954,6 +954,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
kvm->debugfs_dentry, stat_data,
&stat_fops_per_vm);
}
+#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
+ kvm_arch_create_vm_debugfs(kvm);
+#endif
return 0;
}
--
2.30.2
^ permalink raw reply related
* Re: [PATCH kernel v2] KVM: PPC: Merge powerpc's debugfs entry content into generic entry
From: Alexey Kardashevskiy @ 2021-09-05 2:27 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Paolo Bonzini, Fabiano Rosas, kvm-ppc, kvm
In-Reply-To: <20210904133532.2871562-1-aik@ozlabs.ru>
Please ignore this one, v3 is coming.
After I posted this, I suddenly realized that the vcpu debugfs entry
remain until the VM exists and this does not handle vcpu
hotunplug+hotplug (the ppc book3e did handle this). Thanks,
On 04/09/2021 23:35, Alexey Kardashevskiy wrote:
> At the moment the generic KVM code creates an "%pid-%fd" entry per a KVM
> instance; and the PPC HV KVM creates its own at "vm%pid". The Book3E KVM
> creates its own entry for timings.
>
> The problems with the PPC entries are:
> 1. they do not allow multiple VMs in the same process (which is extremely
> rare case mostly used by syzkaller fuzzer);
> 2. prone to race bugs like the generic KVM code had fixed in
> commit 85cd39af14f4 ("KVM: Do not leak memory for duplicate debugfs
> directories").
>
> This defines kvm_arch_create_kvm_debugfs() similar to one for vcpus.
>
> This defines 2 hooks in kvmppc_ops for allowing specific KVM
> implementations to add necessary entries. This defines handlers
> for HV KVM and defines the Book3E debugfs vcpu helper as a handler.
>
> This makes use of already existing kvm_arch_create_vcpu_debugfs
> on PPC.
>
> This removes no more used debugfs_dir pointers from PPC kvm_arch structs.
>
> Suggested-by: Fabiano Rosas <farosas@linux.ibm.com>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
> Changes:
> v2:
> * handled powerpc-booke
> * s/kvm/vm/ in arch hooks
> ---
> arch/powerpc/include/asm/kvm_host.h | 7 +++---
> arch/powerpc/include/asm/kvm_ppc.h | 2 ++
> arch/powerpc/kvm/timing.h | 7 +++---
> include/linux/kvm_host.h | 3 +++
> arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
> arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +-
> arch/powerpc/kvm/book3s_hv.c | 30 +++++++++-----------------
> arch/powerpc/kvm/e500.c | 1 +
> arch/powerpc/kvm/e500mc.c | 1 +
> arch/powerpc/kvm/powerpc.c | 15 ++++++++++---
> arch/powerpc/kvm/timing.c | 20 ++++-------------
> virt/kvm/kvm_main.c | 3 +++
> 12 files changed, 44 insertions(+), 49 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 2bcac6da0a4b..f29b66cc2163 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -296,7 +296,6 @@ struct kvm_arch {
> bool dawr1_enabled;
> pgd_t *pgtable;
> u64 process_table;
> - struct dentry *debugfs_dir;
> struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
> #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
> @@ -672,7 +671,6 @@ struct kvm_vcpu_arch {
> u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
> u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
> u64 timing_last_exit;
> - struct dentry *debugfs_exit_timing;
> #endif
>
> #ifdef CONFIG_PPC_BOOK3S
> @@ -828,8 +826,6 @@ struct kvm_vcpu_arch {
> struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */
> struct kvmhv_tb_accumulator guest_time; /* guest execution */
> struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */
> -
> - struct dentry *debugfs_dir;
> #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
> };
>
> @@ -868,4 +864,7 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
> static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
> static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>
> +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> +#define __KVM_HAVE_ARCH_KVM_DEBUGFS
> +
> #endif /* __POWERPC_KVM_HOST_H__ */
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 6355a6980ccf..fd841e844b90 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -316,6 +316,8 @@ struct kvmppc_ops {
> int (*svm_off)(struct kvm *kvm);
> int (*enable_dawr1)(struct kvm *kvm);
> bool (*hash_v3_possible)(void);
> + void (*create_vm_debugfs)(struct kvm *kvm);
> + void (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
> };
>
> extern struct kvmppc_ops *kvmppc_hv_ops;
> diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
> index feef7885ba82..36f7c201c6f1 100644
> --- a/arch/powerpc/kvm/timing.h
> +++ b/arch/powerpc/kvm/timing.h
> @@ -14,8 +14,8 @@
> #ifdef CONFIG_KVM_EXIT_TIMING
> void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
> void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
> -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
> -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
> +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
> + struct dentry *debugfs_dentry);
>
> static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
> {
> @@ -27,8 +27,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
> static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
> static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
> static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
> - unsigned int id) {}
> -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
> + struct dentry *debugfs_dentry) {}
> static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
> #endif /* CONFIG_KVM_EXIT_TIMING */
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ae7735b490b4..4f22b1201a0d 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1021,6 +1021,9 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state);
> #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
> void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
> #endif
> +#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
> +void kvm_arch_create_vm_debugfs(struct kvm *kvm);
> +#endif
>
> int kvm_arch_hardware_enable(void);
> void kvm_arch_hardware_disable(void);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index c63e263312a4..33dae253a0ac 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -2112,7 +2112,7 @@ static const struct file_operations debugfs_htab_fops = {
>
> void kvmppc_mmu_debugfs_init(struct kvm *kvm)
> {
> - debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm,
> + debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
> &debugfs_htab_fops);
> }
>
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index c5508744e14c..f4e083c20872 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -1452,7 +1452,7 @@ static const struct file_operations debugfs_radix_fops = {
>
> void kvmhv_radix_debugfs_init(struct kvm *kvm)
> {
> - debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm,
> + debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
> &debugfs_radix_fops);
> }
>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index c8f12b056968..046df9e0d462 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2771,19 +2771,14 @@ static const struct file_operations debugfs_timings_ops = {
> };
>
> /* Create a debugfs directory for the vcpu */
> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
> {
> - char buf[16];
> - struct kvm *kvm = vcpu->kvm;
> -
> - snprintf(buf, sizeof(buf), "vcpu%u", id);
> - vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
> - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
> + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
> &debugfs_timings_ops);
> }
>
> #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
> {
> }
> #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
> @@ -2907,8 +2902,6 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
> vcpu->arch.cpu_type = KVM_CPU_3S_64;
> kvmppc_sanity_check(vcpu);
>
> - debugfs_vcpu_init(vcpu, id);
> -
> return 0;
> }
>
> @@ -5186,7 +5179,6 @@ void kvmppc_free_host_rm_ops(void)
> static int kvmppc_core_init_vm_hv(struct kvm *kvm)
> {
> unsigned long lpcr, lpid;
> - char buf[32];
> int ret;
>
> mutex_init(&kvm->arch.uvmem_lock);
> @@ -5319,16 +5311,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
> kvm->arch.smt_mode = 1;
> kvm->arch.emul_smt_mode = 1;
>
> - /*
> - * Create a debugfs directory for the VM
> - */
> - snprintf(buf, sizeof(buf), "vm%d", current->pid);
> - kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
> + return 0;
> +}
> +
> +static void kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm)
> +{
> kvmppc_mmu_debugfs_init(kvm);
> if (radix_enabled())
> kvmhv_radix_debugfs_init(kvm);
> -
> - return 0;
> }
>
> static void kvmppc_free_vcores(struct kvm *kvm)
> @@ -5342,8 +5332,6 @@ static void kvmppc_free_vcores(struct kvm *kvm)
>
> static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
> {
> - debugfs_remove_recursive(kvm->arch.debugfs_dir);
> -
> if (!cpu_has_feature(CPU_FTR_ARCH_300))
> kvm_hv_vm_deactivated();
>
> @@ -5996,6 +5984,8 @@ static struct kvmppc_ops kvm_ops_hv = {
> .svm_off = kvmhv_svm_off,
> .enable_dawr1 = kvmhv_enable_dawr1,
> .hash_v3_possible = kvmppc_hash_v3_possible,
> + .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
> + .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
> };
>
> static int kvm_init_subcore_bitmap(void)
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index 7e8b69015d20..d82e70c3e0a9 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = {
> .emulate_op = kvmppc_core_emulate_op_e500,
> .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
> .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
> + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
> };
>
> static int __init kvmppc_e500_init(void)
> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
> index 1c189b5aadcc..45eacd949f4b 100644
> --- a/arch/powerpc/kvm/e500mc.c
> +++ b/arch/powerpc/kvm/e500mc.c
> @@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = {
> .emulate_op = kvmppc_core_emulate_op_e500,
> .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
> .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
> + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
> };
>
> static int __init kvmppc_e500mc_init(void)
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index c248d6d8b9e3..c895521ac6e9 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -763,7 +763,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> goto out_vcpu_uninit;
>
> vcpu->arch.waitp = &vcpu->wait;
> - kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
> return 0;
>
> out_vcpu_uninit:
> @@ -780,8 +779,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> /* Make sure we're not using the vcpu anymore */
> hrtimer_cancel(&vcpu->arch.dec_timer);
>
> - kvmppc_remove_vcpu_debugfs(vcpu);
> -
> switch (vcpu->arch.irq_type) {
> case KVMPPC_IRQ_MPIC:
> kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
> @@ -2505,3 +2502,15 @@ int kvm_arch_init(void *opaque)
> }
>
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
> +
> +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
> +{
> + if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs)
> + vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry);
> +}
> +
> +void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> +{
> + if (kvm->arch.kvm_ops->create_vm_debugfs)
> + kvm->arch.kvm_ops->create_vm_debugfs(kvm);
> +}
> diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
> index ba56a5cbba97..e1c17afc714d 100644
> --- a/arch/powerpc/kvm/timing.c
> +++ b/arch/powerpc/kvm/timing.c
> @@ -204,21 +204,9 @@ static const struct file_operations kvmppc_exit_timing_fops = {
> .release = single_release,
> };
>
> -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
> +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
> + struct dentry *debugfs_dentry)
> {
> - static char dbg_fname[50];
> - struct dentry *debugfs_file;
> -
> - snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
> - current->pid, id);
> - debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir,
> - vcpu, &kvmppc_exit_timing_fops);
> -
> - vcpu->arch.debugfs_exit_timing = debugfs_file;
> -}
> -
> -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
> -{
> - debugfs_remove(vcpu->arch.debugfs_exit_timing);
> - vcpu->arch.debugfs_exit_timing = NULL;
> + debugfs_create_file("timing", 0666, debugfs_dentry,
> + vcpu, &kvmppc_exit_timing_fops);
> }
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index b50dbe269f4b..85b2550e18e7 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -954,6 +954,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
> kvm->debugfs_dentry, stat_data,
> &stat_fops_per_vm);
> }
> +#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
> + kvm_arch_create_vm_debugfs(kvm);
> +#endif
> return 0;
> }
>
>
--
Alexey
^ permalink raw reply
* Re: [PATCH kernel v2] KVM: PPC: Merge powerpc's debugfs entry content into generic entry
From: Alexey Kardashevskiy @ 2021-09-05 4:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Paolo Bonzini, Fabiano Rosas, kvm-ppc, kvm
In-Reply-To: <872d75a4-08e2-f597-0bee-6be9fdce0ac1@ozlabs.ru>
Huh, not sure anymore after reading d56f5136b0102 "KVM: let
kvm_destroy_vm_debugfs clean up vCPU debugfs directories" which remove
debugfs_dentry from vcpu. Paolo?
On 05/09/2021 12:27, Alexey Kardashevskiy wrote:
> Please ignore this one, v3 is coming.
>
> After I posted this, I suddenly realized that the vcpu debugfs entry
> remain until the VM exists and this does not handle vcpu
> hotunplug+hotplug (the ppc book3e did handle this). Thanks,
>
>
> On 04/09/2021 23:35, Alexey Kardashevskiy wrote:
>> At the moment the generic KVM code creates an "%pid-%fd" entry per a KVM
>> instance; and the PPC HV KVM creates its own at "vm%pid". The Book3E KVM
>> creates its own entry for timings.
>>
>> The problems with the PPC entries are:
>> 1. they do not allow multiple VMs in the same process (which is extremely
>> rare case mostly used by syzkaller fuzzer);
>> 2. prone to race bugs like the generic KVM code had fixed in
>> commit 85cd39af14f4 ("KVM: Do not leak memory for duplicate debugfs
>> directories").
>>
>> This defines kvm_arch_create_kvm_debugfs() similar to one for vcpus.
>>
>> This defines 2 hooks in kvmppc_ops for allowing specific KVM
>> implementations to add necessary entries. This defines handlers
>> for HV KVM and defines the Book3E debugfs vcpu helper as a handler.
>>
>> This makes use of already existing kvm_arch_create_vcpu_debugfs
>> on PPC.
>>
>> This removes no more used debugfs_dir pointers from PPC kvm_arch structs.
>>
>> Suggested-by: Fabiano Rosas <farosas@linux.ibm.com>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>> Changes:
>> v2:
>> * handled powerpc-booke
>> * s/kvm/vm/ in arch hooks
>> ---
>> arch/powerpc/include/asm/kvm_host.h | 7 +++---
>> arch/powerpc/include/asm/kvm_ppc.h | 2 ++
>> arch/powerpc/kvm/timing.h | 7 +++---
>> include/linux/kvm_host.h | 3 +++
>> arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
>> arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +-
>> arch/powerpc/kvm/book3s_hv.c | 30 +++++++++-----------------
>> arch/powerpc/kvm/e500.c | 1 +
>> arch/powerpc/kvm/e500mc.c | 1 +
>> arch/powerpc/kvm/powerpc.c | 15 ++++++++++---
>> arch/powerpc/kvm/timing.c | 20 ++++-------------
>> virt/kvm/kvm_main.c | 3 +++
>> 12 files changed, 44 insertions(+), 49 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>> b/arch/powerpc/include/asm/kvm_host.h
>> index 2bcac6da0a4b..f29b66cc2163 100644
>> --- a/arch/powerpc/include/asm/kvm_host.h
>> +++ b/arch/powerpc/include/asm/kvm_host.h
>> @@ -296,7 +296,6 @@ struct kvm_arch {
>> bool dawr1_enabled;
>> pgd_t *pgtable;
>> u64 process_table;
>> - struct dentry *debugfs_dir;
>> struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
>> #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
>> #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
>> @@ -672,7 +671,6 @@ struct kvm_vcpu_arch {
>> u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
>> u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
>> u64 timing_last_exit;
>> - struct dentry *debugfs_exit_timing;
>> #endif
>> #ifdef CONFIG_PPC_BOOK3S
>> @@ -828,8 +826,6 @@ struct kvm_vcpu_arch {
>> struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */
>> struct kvmhv_tb_accumulator guest_time; /* guest execution */
>> struct kvmhv_tb_accumulator cede_time; /* time napping inside
>> guest */
>> -
>> - struct dentry *debugfs_dir;
>> #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
>> };
>> @@ -868,4 +864,7 @@ static inline void kvm_arch_vcpu_blocking(struct
>> kvm_vcpu *vcpu) {}
>> static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>> static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>> +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
>> +#define __KVM_HAVE_ARCH_KVM_DEBUGFS
>> +
>> #endif /* __POWERPC_KVM_HOST_H__ */
>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
>> b/arch/powerpc/include/asm/kvm_ppc.h
>> index 6355a6980ccf..fd841e844b90 100644
>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>> @@ -316,6 +316,8 @@ struct kvmppc_ops {
>> int (*svm_off)(struct kvm *kvm);
>> int (*enable_dawr1)(struct kvm *kvm);
>> bool (*hash_v3_possible)(void);
>> + void (*create_vm_debugfs)(struct kvm *kvm);
>> + void (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry
>> *debugfs_dentry);
>> };
>> extern struct kvmppc_ops *kvmppc_hv_ops;
>> diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
>> index feef7885ba82..36f7c201c6f1 100644
>> --- a/arch/powerpc/kvm/timing.h
>> +++ b/arch/powerpc/kvm/timing.h
>> @@ -14,8 +14,8 @@
>> #ifdef CONFIG_KVM_EXIT_TIMING
>> void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
>> void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
>> -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
>> -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
>> +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
>> + struct dentry *debugfs_dentry);
>> static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int
>> type)
>> {
>> @@ -27,8 +27,7 @@ static inline void kvmppc_set_exit_type(struct
>> kvm_vcpu *vcpu, int type)
>> static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
>> static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
>> static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
>> - unsigned int id) {}
>> -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
>> + struct dentry *debugfs_dentry) {}
>> static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int
>> type) {}
>> #endif /* CONFIG_KVM_EXIT_TIMING */
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index ae7735b490b4..4f22b1201a0d 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -1021,6 +1021,9 @@ int kvm_arch_pm_notifier(struct kvm *kvm,
>> unsigned long state);
>> #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
>> void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct
>> dentry *debugfs_dentry);
>> #endif
>> +#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
>> +void kvm_arch_create_vm_debugfs(struct kvm *kvm);
>> +#endif
>> int kvm_arch_hardware_enable(void);
>> void kvm_arch_hardware_disable(void);
>> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> index c63e263312a4..33dae253a0ac 100644
>> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> @@ -2112,7 +2112,7 @@ static const struct file_operations
>> debugfs_htab_fops = {
>> void kvmppc_mmu_debugfs_init(struct kvm *kvm)
>> {
>> - debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm,
>> + debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
>> &debugfs_htab_fops);
>> }
>> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c
>> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
>> index c5508744e14c..f4e083c20872 100644
>> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
>> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
>> @@ -1452,7 +1452,7 @@ static const struct file_operations
>> debugfs_radix_fops = {
>> void kvmhv_radix_debugfs_init(struct kvm *kvm)
>> {
>> - debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm,
>> + debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
>> &debugfs_radix_fops);
>> }
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index c8f12b056968..046df9e0d462 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -2771,19 +2771,14 @@ static const struct file_operations
>> debugfs_timings_ops = {
>> };
>> /* Create a debugfs directory for the vcpu */
>> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
>> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu,
>> struct dentry *debugfs_dentry)
>> {
>> - char buf[16];
>> - struct kvm *kvm = vcpu->kvm;
>> -
>> - snprintf(buf, sizeof(buf), "vcpu%u", id);
>> - vcpu->arch.debugfs_dir = debugfs_create_dir(buf,
>> kvm->arch.debugfs_dir);
>> - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
>> + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
>> &debugfs_timings_ops);
>> }
>> #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
>> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
>> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu,
>> struct dentry *debugfs_dentry)
>> {
>> }
>> #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
>> @@ -2907,8 +2902,6 @@ static int kvmppc_core_vcpu_create_hv(struct
>> kvm_vcpu *vcpu)
>> vcpu->arch.cpu_type = KVM_CPU_3S_64;
>> kvmppc_sanity_check(vcpu);
>> - debugfs_vcpu_init(vcpu, id);
>> -
>> return 0;
>> }
>> @@ -5186,7 +5179,6 @@ void kvmppc_free_host_rm_ops(void)
>> static int kvmppc_core_init_vm_hv(struct kvm *kvm)
>> {
>> unsigned long lpcr, lpid;
>> - char buf[32];
>> int ret;
>> mutex_init(&kvm->arch.uvmem_lock);
>> @@ -5319,16 +5311,14 @@ static int kvmppc_core_init_vm_hv(struct kvm
>> *kvm)
>> kvm->arch.smt_mode = 1;
>> kvm->arch.emul_smt_mode = 1;
>> - /*
>> - * Create a debugfs directory for the VM
>> - */
>> - snprintf(buf, sizeof(buf), "vm%d", current->pid);
>> - kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
>> + return 0;
>> +}
>> +
>> +static void kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm)
>> +{
>> kvmppc_mmu_debugfs_init(kvm);
>> if (radix_enabled())
>> kvmhv_radix_debugfs_init(kvm);
>> -
>> - return 0;
>> }
>> static void kvmppc_free_vcores(struct kvm *kvm)
>> @@ -5342,8 +5332,6 @@ static void kvmppc_free_vcores(struct kvm *kvm)
>> static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
>> {
>> - debugfs_remove_recursive(kvm->arch.debugfs_dir);
>> -
>> if (!cpu_has_feature(CPU_FTR_ARCH_300))
>> kvm_hv_vm_deactivated();
>> @@ -5996,6 +5984,8 @@ static struct kvmppc_ops kvm_ops_hv = {
>> .svm_off = kvmhv_svm_off,
>> .enable_dawr1 = kvmhv_enable_dawr1,
>> .hash_v3_possible = kvmppc_hash_v3_possible,
>> + .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
>> + .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
>> };
>> static int kvm_init_subcore_bitmap(void)
>> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
>> index 7e8b69015d20..d82e70c3e0a9 100644
>> --- a/arch/powerpc/kvm/e500.c
>> +++ b/arch/powerpc/kvm/e500.c
>> @@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = {
>> .emulate_op = kvmppc_core_emulate_op_e500,
>> .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
>> .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
>> + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
>> };
>> static int __init kvmppc_e500_init(void)
>> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
>> index 1c189b5aadcc..45eacd949f4b 100644
>> --- a/arch/powerpc/kvm/e500mc.c
>> +++ b/arch/powerpc/kvm/e500mc.c
>> @@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = {
>> .emulate_op = kvmppc_core_emulate_op_e500,
>> .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
>> .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
>> + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
>> };
>> static int __init kvmppc_e500mc_init(void)
>> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
>> index c248d6d8b9e3..c895521ac6e9 100644
>> --- a/arch/powerpc/kvm/powerpc.c
>> +++ b/arch/powerpc/kvm/powerpc.c
>> @@ -763,7 +763,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>> goto out_vcpu_uninit;
>> vcpu->arch.waitp = &vcpu->wait;
>> - kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
>> return 0;
>> out_vcpu_uninit:
>> @@ -780,8 +779,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>> /* Make sure we're not using the vcpu anymore */
>> hrtimer_cancel(&vcpu->arch.dec_timer);
>> - kvmppc_remove_vcpu_debugfs(vcpu);
>> -
>> switch (vcpu->arch.irq_type) {
>> case KVMPPC_IRQ_MPIC:
>> kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
>> @@ -2505,3 +2502,15 @@ int kvm_arch_init(void *opaque)
>> }
>> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
>> +
>> +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct
>> dentry *debugfs_dentry)
>> +{
>> + if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs)
>> + vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu,
>> debugfs_dentry);
>> +}
>> +
>> +void kvm_arch_create_vm_debugfs(struct kvm *kvm)
>> +{
>> + if (kvm->arch.kvm_ops->create_vm_debugfs)
>> + kvm->arch.kvm_ops->create_vm_debugfs(kvm);
>> +}
>> diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
>> index ba56a5cbba97..e1c17afc714d 100644
>> --- a/arch/powerpc/kvm/timing.c
>> +++ b/arch/powerpc/kvm/timing.c
>> @@ -204,21 +204,9 @@ static const struct file_operations
>> kvmppc_exit_timing_fops = {
>> .release = single_release,
>> };
>> -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
>> +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
>> + struct dentry *debugfs_dentry)
>> {
>> - static char dbg_fname[50];
>> - struct dentry *debugfs_file;
>> -
>> - snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
>> - current->pid, id);
>> - debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir,
>> - vcpu, &kvmppc_exit_timing_fops);
>> -
>> - vcpu->arch.debugfs_exit_timing = debugfs_file;
>> -}
>> -
>> -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
>> -{
>> - debugfs_remove(vcpu->arch.debugfs_exit_timing);
>> - vcpu->arch.debugfs_exit_timing = NULL;
>> + debugfs_create_file("timing", 0666, debugfs_dentry,
>> + vcpu, &kvmppc_exit_timing_fops);
>> }
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index b50dbe269f4b..85b2550e18e7 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -954,6 +954,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm,
>> int fd)
>> kvm->debugfs_dentry, stat_data,
>> &stat_fops_per_vm);
>> }
>> +#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
>> + kvm_arch_create_vm_debugfs(kvm);
>> +#endif
>> return 0;
>> }
>>
>
--
Alexey
^ permalink raw reply
* [Bug 213837] "Kernel panic - not syncing: corrupted stack end detected inside scheduler" at building via distcc on a G5
From: bugzilla-daemon @ 2021-09-05 14:11 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-213837-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=213837
Erhard F. (erhard_f@mailbox.org) changed:
What |Removed |Added
----------------------------------------------------------------------------
See Also|https://bugzilla.kernel.org |
|/show_bug.cgi?id=213079 |
--- Comment #4 from Erhard F. (erhard_f@mailbox.org) ---
Checked out whether this has really something to do with bug #213079 or not by
copying this root partition to a regular HDD and use that one instead. As the
issue still happens it seems these are two seperate bugs.
[...]
Kernel panic - not syncing: corrupted stack end detected inside scheduler
CPU: 1 PID: 1509 Comm: powerpc64-unkno Tainted: G W
5.14.1-PowerMacG5+ #2
Call Trace:
[c0000000386434c0] [c00000000054cd64] .dump_stack_lvl+0x98/0xe0 (unreliable)
[c000000038643550] [c000000000068ab8] .panic+0x160/0x40c
[c000000038643600] [c00000000081202c] .__schedule+0x7c/0x840
[c0000000386436d0] [c00000000081293c] .preempt_schedule_common+0x28/0x48
[c000000038643750] [c00000000081298c] .__cond_resched+0x30/0x4c
[c0000000386437d0] [c0000000004edf18] .copy_page_to_iter+0xbc/0x32c
[c0000000386438a0] [c0000000001c99d8] .filemap_read+0x574/0x618
[c000000038643a60] [c00000000033182c] .ext4_file_read_iter+0xb8/0x11c
[c000000038643b00] [c000000000272f1c] .new_sync_read+0x94/0xe0
[c000000038643c00] [c0000000002746c0] .vfs_read+0x128/0x12c
[c000000038643ca0] [c000000000274a58] .ksys_read+0x78/0xc4
[c000000038643d60] [c000000000022808] .system_call_exception+0x1a4/0x1dc
[c000000038643e10] [c00000000000b4cc] system_call_common+0xec/0x250
--- interrupt: c00 at 0x3fffbc477cd0
NIP: 00003fffbc477cd0 LR: 000000011c413660 CTR: 0000000000000000
REGS: c000000038643e80 TRAP: 0c00 Tainted: G W
(5.14.1-PowerMacG5+)
MSR: 900000000200f032 <SF,HV,VEC,EE,PR,FP,ME,IR,DR,RI> CR: 24000422 XER:
00000000
IRQMASK: 0
GPR00: 0000000000000003 00003fffd3c43d70 00003fffbc4a4f00 0000000000000004
GPR04: 00003fffbbfac010 00000000001e7697 00003fffbc458320 0000000000000000
GPR08: 00003fffbc4582e0 0000000000000000 0000000000000000 0000000000000000
GPR12: 0000000000000000 00003fffbc54ec20 00000001470b79c0 0000000157c21760
GPR16: 000000011c41ec38 00003fffd3c44258 000000011c41eb28 00003fffd3c440a8
GPR20: 00003fffd3c44460 ffffffffffffffff 00000001470b6dd0 0000000000000000
GPR24: 00000001470b77f0 00000001470b7d30 0000000000000005 00003fffd3c43fc0
GPR28: 000000011c4668e8 0000000000000004 00003fffbbfac010 00000000001e7697
NIP [00003fffbc477cd0] 0x3fffbc477cd0
LR [000000011c413660] 0x11c413660
--- interrupt: c00
Rebooting in 40 seconds..
--
You may reply to this email to add a comment.
You are receiving this mail because:
You are watching someone on the CC list of the bug.
^ permalink raw reply
* [Bug 213837] "Kernel panic - not syncing: corrupted stack end detected inside scheduler" at building via distcc on a G5
From: bugzilla-daemon @ 2021-09-05 14:15 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-213837-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=213837
Erhard F. (erhard_f@mailbox.org) changed:
What |Removed |Added
----------------------------------------------------------------------------
Attachment #298395|0 |1
is obsolete| |
--- Comment #5 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 298671
--> https://bugzilla.kernel.org/attachment.cgi?id=298671&action=edit
kernel .config (5.14.1, PowerMac G5 11,2)
--
You may reply to this email to add a comment.
You are receiving this mail because:
You are watching someone on the CC list of the bug.
^ permalink raw reply
* Re: [PATCH 0/2] powerpc/perf: Add instruction and data address registers to extended regs
From: Athira Rajeev @ 2021-09-06 2:43 UTC (permalink / raw)
To: kajoljain, Arnaldo Carvalho de Melo
Cc: Madhavan Srinivasan, linuxppc-dev, Jiri Olsa, rnsastry
In-Reply-To: <bd5a9388-483d-91ca-b371-ab92ae4c08bc@linux.ibm.com>
> On 02-Sep-2021, at 1:04 PM, kajoljain <kjain@linux.ibm.com> wrote:
>
>
>
> On 6/20/21 8:15 PM, Athira Rajeev wrote:
>> Patch set adds PMU registers namely Sampled Instruction Address Register
>> (SIAR) and Sampled Data Address Register (SDAR) as part of extended regs
>> in PowerPC. These registers provides the instruction/data address and
>> adding these to extended regs helps in debug purposes.
>>
>> Patch 1/2 adds SIAR and SDAR as part of the extended regs mask.
>> Patch 2/2 includes perf tools side changes to add the SPRs to
>> sample_reg_mask to use with -I? option.
>>
>> Athira Rajeev (2):
>> powerpc/perf: Expose instruction and data address registers as part of
>> extended regs
>> tools/perf: Add perf tools support to expose instruction and data
>> address registers as part of extended regs
>>
>
> Patchset looks good to me.
>
> Reviewed-By: kajol Jain<kjain@linux.ibm.com>
Hi Arnaldo,
Requesting for your review on this patchset.
Thanks
Athira
>
> Thanks,
> Kajol Jain
>
>> arch/powerpc/include/uapi/asm/perf_regs.h | 12 +++++++-----
>> arch/powerpc/perf/perf_regs.c | 4 ++++
>> tools/arch/powerpc/include/uapi/asm/perf_regs.h | 12 +++++++-----
>> tools/perf/arch/powerpc/include/perf_regs.h | 2 ++
>> tools/perf/arch/powerpc/util/perf_regs.c | 2 ++
>> 5 files changed, 22 insertions(+), 10 deletions(-)
^ permalink raw reply
* [PATCH] powerpc/mce: Fix access error in mce handler
From: Ganesh Goudar @ 2021-09-06 8:18 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin
We queue an irq work for deferred processing of mce event
in realmode mce handler, where translation is disabled.
Queuing of the work may result in accessing memory outside
RMO region, such access needs the translation to be enabled
for an LPAR running with hash mmu else the kernel crashes.
So enable the translation before queuing the work.
Without this change following trace is seen on injecting machine
check error in an LPAR running with hash mmu.
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 5 PID: 1883 Comm: insmod Tainted: G OE 5.14.0-mce+ #137
NIP: c000000000735d60 LR: c000000000318640 CTR: 0000000000000000
REGS: c00000001ebff9a0 TRAP: 0300 Tainted: G OE (5.14.0-mce+)
MSR: 8000000000001003 <SF,ME,RI,LE> CR: 28008228 XER: 00000001
CFAR: c00000000031863c DAR: c00000027fa8fe08 DSISR: 40000000 IRQMASK: 0
GPR00: c0000000003186d0 c00000001ebffc40 c000000001b0df00 c0000000016337e8
GPR04: c0000000016337e8 c00000027fa8fe08 0000000000000023 c0000000016337f0
GPR08: 0000000000000023 c0000000012ffe08 0000000000000000 c008000001460240
GPR12: 0000000000000000 c00000001ec9a900 c00000002ac4bd00 0000000000000000
GPR16: 00000000000005a0 c0080000006b0000 c0080000006b05a0 c000000000ff3068
GPR20: c00000002ac4bbc0 0000000000000001 c00000002ac4bbc0 c008000001490298
GPR24: c008000001490108 c000000001636198 c008000001470090 c008000001470058
GPR28: 0000000000000510 c008000001000000 c008000008000019 0000000000000019
NIP [c000000000735d60] llist_add_batch+0x0/0x40
LR [c000000000318640] __irq_work_queue_local+0x70/0xc0
Call Trace:
[c00000001ebffc40] [c00000001ebffc0c] 0xc00000001ebffc0c (unreliable)
[c00000001ebffc60] [c0000000003186d0] irq_work_queue+0x40/0x70
[c00000001ebffc80] [c00000000004425c] machine_check_queue_event+0xbc/0xd0
[c00000001ebffcf0] [c00000000000838c] machine_check_early_common+0x16c/0x1f4
Fixes: 74c3354bc1d89 ("powerpc/pseries/mce: restore msr before returning from handler")
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
arch/powerpc/kernel/mce.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 47a683cd00d2..9d1e39d42e3e 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -249,6 +249,7 @@ void machine_check_queue_event(void)
{
int index;
struct machine_check_event evt;
+ unsigned long msr;
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return;
@@ -262,8 +263,19 @@ void machine_check_queue_event(void)
memcpy(&local_paca->mce_info->mce_event_queue[index],
&evt, sizeof(evt));
- /* Queue irq work to process this event later. */
- irq_work_queue(&mce_event_process_work);
+ /* Queue irq work to process this event later. Before
+ * queuing the work enable translation for non radix LPAR,
+ * as irq_work_queue may try to access memory outside RMO
+ * region.
+ */
+ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) {
+ msr = mfmsr();
+ mtmsr(msr | MSR_IR | MSR_DR);
+ irq_work_queue(&mce_event_process_work);
+ mtmsr(msr);
+ } else {
+ irq_work_queue(&mce_event_process_work);
+ }
}
void mce_common_process_ue(struct pt_regs *regs,
--
2.31.1
^ permalink raw reply related
* [PATCH v3 1/3] powerpc/pseries: Parse control memory access error
From: Ganesh Goudar @ 2021-09-06 8:43 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin
Add support to parse and log control memory access
error for pseries. These changes are made according to
PAPR v2.11 10.3.2.2.12.
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
v3: Modify the commit log to mention the document according
to which changes are made.
Define and use a macro to check if the effective address
is provided.
v2: No changes.
---
arch/powerpc/platforms/pseries/ras.c | 36 ++++++++++++++++++++++++----
1 file changed, 32 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 56092dccfdb8..e62a0ca2611a 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -60,11 +60,17 @@ struct pseries_mc_errorlog {
* XX 2: Reserved.
* XXX 3: Type of UE error.
*
- * For error_type != MC_ERROR_TYPE_UE
+ * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB
* XXXXXXXX
* X 1: Effective address provided.
* XXXXX 5: Reserved.
* XX 2: Type of SLB/ERAT/TLB error.
+ *
+ * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS
+ * XXXXXXXX
+ * X 1: Error causing address provided.
+ * XXX 3: Type of error.
+ * XXXX 4: Reserved.
*/
u8 sub_err_type;
u8 reserved_1[6];
@@ -80,6 +86,7 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TYPE_TLB 0x04
#define MC_ERROR_TYPE_D_CACHE 0x05
#define MC_ERROR_TYPE_I_CACHE 0x07
+#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08
/* RTAS pseries MCE error sub types */
#define MC_ERROR_UE_INDETERMINATE 0
@@ -90,6 +97,7 @@ struct pseries_mc_errorlog {
#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
#define UE_LOGICAL_ADDR_PROVIDED 0x20
+#define MC_EFFECTIVE_ADDR_PROVIDED 0x80
#define MC_ERROR_SLB_PARITY 0
#define MC_ERROR_SLB_MULTIHIT 1
@@ -103,6 +111,9 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TLB_MULTIHIT 2
#define MC_ERROR_TLB_INDETERMINATE 3
+#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0
+#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1
+
static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
{
switch (mlog->error_type) {
@@ -112,6 +123,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
case MC_ERROR_TYPE_ERAT:
case MC_ERROR_TYPE_TLB:
return (mlog->sub_err_type & 0x03);
+ case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+ return (mlog->sub_err_type & 0x70) >> 4;
default:
return 0;
}
@@ -656,7 +669,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_ERAT:
@@ -673,7 +686,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_TLB:
@@ -690,7 +703,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_D_CACHE:
@@ -699,6 +712,21 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
case MC_ERROR_TYPE_I_CACHE:
mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
break;
+ case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+ mce_err.error_type = MCE_ERROR_TYPE_RA;
+ switch (err_sub_type) {
+ case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
+ mce_err.u.ra_error_type =
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+ break;
+ case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
+ mce_err.u.ra_error_type =
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+ break;
+ }
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
case MC_ERROR_TYPE_UNKNOWN:
default:
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
--
2.31.1
^ permalink raw reply related
* [PATCH v3 2/3] selftests/powerpc: Add test for real address error handling
From: Ganesh Goudar @ 2021-09-06 8:43 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin
In-Reply-To: <20210906084303.183921-1-ganeshgr@linux.ibm.com>
Add test for real address or control memory address access
error handling, using NX-GZIP engine.
The error is injected by accessing the control memory address
using illegal instruction, on successful handling the process
attempting to access control memory address using illegal
instruction receives SIGBUS.
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
v3: Avoid using shell script to inject error.
v2: Fix build error.
---
tools/testing/selftests/powerpc/Makefile | 3 +-
tools/testing/selftests/powerpc/mce/Makefile | 7 ++
.../selftests/powerpc/mce/inject-ra-err.c | 65 +++++++++++++++++++
tools/testing/selftests/powerpc/mce/vas-api.h | 1 +
4 files changed, 75 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/powerpc/mce/Makefile
create mode 100644 tools/testing/selftests/powerpc/mce/inject-ra-err.c
create mode 120000 tools/testing/selftests/powerpc/mce/vas-api.h
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 0830e63818c1..4830372d7416 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -31,7 +31,8 @@ SUB_DIRS = alignment \
vphn \
math \
ptrace \
- security
+ security \
+ mce
endif
diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile
new file mode 100644
index 000000000000..2424513982d9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/Makefile
@@ -0,0 +1,7 @@
+#SPDX-License-Identifier: GPL-2.0-or-later
+
+TEST_GEN_PROGS := inject-ra-err
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mce/inject-ra-err.c b/tools/testing/selftests/powerpc/mce/inject-ra-err.c
new file mode 100644
index 000000000000..94323c34d9a6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/inject-ra-err.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vas-api.h"
+#include "utils.h"
+
+static bool faulted;
+
+static void sigbus_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+ ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ faulted = true;
+ regs->nip += 4;
+}
+
+static int test_ra_error(void)
+{
+ struct vas_tx_win_open_attr attr;
+ int fd, *paste_addr;
+ char *devname = "/dev/crypto/nx-gzip";
+ struct sigaction act = {
+ .sa_sigaction = sigbus_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ memset(&attr, 0, sizeof(attr));
+ attr.version = 1;
+ attr.vas_id = 0;
+
+ SKIP_IF(access(devname, F_OK));
+
+ fd = open(devname, O_RDWR);
+ FAIL_IF(fd < 0);
+ FAIL_IF(ioctl(fd, VAS_TX_WIN_OPEN, &attr) < 0);
+ FAIL_IF(sigaction(SIGBUS, &act, NULL) != 0);
+
+ paste_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0ULL);
+
+ /* The following assignment triggers exception */
+ mb();
+ *paste_addr = 1;
+ mb();
+
+ FAIL_IF(!faulted);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_ra_error, "inject-ra-err");
+}
+
diff --git a/tools/testing/selftests/powerpc/mce/vas-api.h b/tools/testing/selftests/powerpc/mce/vas-api.h
new file mode 120000
index 000000000000..1455c1bcd351
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/vas-api.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/include/uapi/asm/vas-api.h
\ No newline at end of file
--
2.31.1
^ permalink raw reply related
* [PATCH v3 3/3] powerpc/mce: Modify the real address error logging messages
From: Ganesh Goudar @ 2021-09-06 8:43 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin
In-Reply-To: <20210906084303.183921-1-ganeshgr@linux.ibm.com>
To avoid ambiguity, modify the strings in real address error
logging messages to "foreign/control memory" from "foreign",
Since the error discriptions in P9 user manual and P10 user
manual are different for same type of errors.
P9 User Manual for MCE:
DSISR:59 Host real address to foreign space during translation.
DSISR:60 Host real address to foreign space on a load or store
access.
P10 User Manual for MCE:
DSISR:59 D-side tablewalk used a host real address in the
control memory address range.
DSISR:60 D-side operand access to control memory address space.
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
v3: No changes.
v2: No changes.
---
arch/powerpc/kernel/mce.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9d1e39d42e3e..5baf69503349 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -400,14 +400,14 @@ void machine_check_print_event_info(struct machine_check_event *evt,
static const char *mc_ra_types[] = {
"Indeterminate",
"Instruction fetch (bad)",
- "Instruction fetch (foreign)",
+ "Instruction fetch (foreign/control memory)",
"Page table walk ifetch (bad)",
- "Page table walk ifetch (foreign)",
+ "Page table walk ifetch (foreign/control memory)",
"Load (bad)",
"Store (bad)",
"Page table walk Load/Store (bad)",
- "Page table walk Load/Store (foreign)",
- "Load/Store (foreign)",
+ "Page table walk Load/Store (foreign/control memory)",
+ "Load/Store (foreign/control memory)",
};
static const char *mc_link_types[] = {
"Indeterminate",
--
2.31.1
^ permalink raw reply related
* [PATCH v2] powerpc/papr_scm: Implement initial support for injecting smart errors
From: Shivaprasad G Bhat @ 2021-09-06 9:06 UTC (permalink / raw)
To: nvdimm
Cc: sbhat, aneesh.kumar, vaibhav, dan.j.williams, linuxppc-dev,
ira.weiny
From: Vaibhav Jain <vaibhav@linux.ibm.com>
Presently PAPR doesn't support injecting smart errors on an
NVDIMM. This makes testing the NVDIMM health reporting functionality
difficult as simulating NVDIMM health related events need a hacked up
qemu version.
To solve this problem this patch proposes simulating certain set of
NVDIMM health related events in papr_scm. Specifically 'fatal' health
state and 'dirty' shutdown state. These error can be injected via the
user-space 'ndctl-inject-smart(1)' command. With the proposed patch and
corresponding ndctl patches following command flow is expected:
$ sudo ndctl list -DH -d nmem0
...
"health_state":"ok",
"shutdown_state":"clean",
...
# inject unsafe shutdown and fatal health error
$ sudo ndctl inject-smart nmem0 -Uf
...
"health_state":"fatal",
"shutdown_state":"dirty",
...
# uninject all errors
$ sudo ndctl inject-smart nmem0 -N
...
"health_state":"ok",
"shutdown_state":"clean",
...
The patch adds two members 'health_bitmap_mask' and
'health_bitmap_override' inside struct papr_scm_priv which are then
bit blt'ed to the health bitmaps fetched from the hypervisor. In case
we are not able to fetch health information from the hypervisor we
service the health bitmap from these two members. These members are
accessible from sysfs at nmemX/papr/health_bitmap_override
A new PDSM named 'SMART_INJECT' is proposed that accepts newly
introduced 'struct nd_papr_pdsm_smart_inject' as payload thats
exchanged between libndctl and papr_scm to indicate the requested
smart-error states.
When the processing the PDSM 'SMART_INJECT', papr_pdsm_smart_inject()
constructs a pair or 'mask' and 'override' bitmaps from the payload
and bit-blt it to the 'health_bitmap_{mask, override}' members. This
ensures the after being fetched from the hypervisor, the health_bitmap
reflects requested smart-error states.
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
Changelog:
Since v1:
Link: https://patchwork.kernel.org/project/linux-nvdimm/list/?series=513881
* Updated the patch description.
* Removed dependency of a header movement patch.
* Removed '__packed' attribute for 'struct nd_papr_pdsm_smart_inject' [Aneesh]
arch/powerpc/include/uapi/asm/papr_pdsm.h | 18 ++++++
arch/powerpc/platforms/pseries/papr_scm.c | 94 ++++++++++++++++++++++++++++-
2 files changed, 109 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
index 82488b1e7276..17439925045c 100644
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -116,6 +116,22 @@ struct nd_papr_pdsm_health {
};
};
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+ union {
+ struct {
+ /* One or more of PDSM_SMART_INJECT_ */
+ __u32 flags;
+ __u8 fatal_enable;
+ __u8 unsafe_shutdown_enable;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
+
/*
* Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
* via 'nd_cmd_pkg.nd_command' member of the ioctl struct
@@ -123,12 +139,14 @@ struct nd_papr_pdsm_health {
enum papr_pdsm {
PAPR_PDSM_MIN = 0x0,
PAPR_PDSM_HEALTH,
+ PAPR_PDSM_SMART_INJECT,
PAPR_PDSM_MAX,
};
/* Maximal union that can hold all possible payload types */
union nd_pdsm_payload {
struct nd_papr_pdsm_health health;
+ struct nd_papr_pdsm_smart_inject smart_inject;
__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
} __packed;
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f48e87ac89c9..de4cf329cfb3 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -68,6 +68,10 @@
#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
#define PAPR_SCM_PERF_STATS_VERSION 0x1
+/* Use bitblt method to override specific bits in the '_bitmap_' */
+#define BITBLT_BITMAP(_bitmap_, _mask_, _override_) \
+ (((_bitmap_) & ~(_mask_)) | ((_mask_) & (_override_)))
+
/* Struct holding a single performance metric */
struct papr_scm_perf_stat {
u8 stat_id[8];
@@ -120,6 +124,12 @@ struct papr_scm_priv {
/* length of the stat buffer as expected by phyp */
size_t stat_buffer_len;
+
+ /* The bits which needs to be overridden */
+ u64 health_bitmap_mask;
+
+ /* The overridden values for the bits having the masks set */
+ u64 health_bitmap_override;
};
static int papr_scm_pmem_flush(struct nd_region *nd_region,
@@ -347,19 +357,28 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
static int __drc_pmem_query_health(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ u64 bitmap = 0;
long rc;
/* issue the hcall */
rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
- if (rc != H_SUCCESS) {
+ if (rc == H_SUCCESS)
+ bitmap = ret[0] & ret[1];
+ else if (rc == H_FUNCTION)
+ dev_info_once(&p->pdev->dev,
+ "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap");
+ else {
+
dev_err(&p->pdev->dev,
"Failed to query health information, Err:%ld\n", rc);
return -ENXIO;
}
p->lasthealth_jiffies = jiffies;
- p->health_bitmap = ret[0] & ret[1];
-
+ /* Allow overriding specific health bits via bit blt. */
+ bitmap = BITBLT_BITMAP(bitmap, p->health_bitmap_mask,
+ p->health_bitmap_override);
+ WRITE_ONCE(p->health_bitmap, bitmap);
dev_dbg(&p->pdev->dev,
"Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
ret[0], ret[1]);
@@ -669,6 +688,54 @@ static int papr_pdsm_health(struct papr_scm_priv *p,
return rc;
}
+/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_smart_inject(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ int rc;
+ u32 supported_flags = 0;
+ u64 mask = 0, override = 0;
+
+ /* Check for individual smart error flags and update mask and override */
+ if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
+ supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
+ mask |= PAPR_PMEM_HEALTH_FATAL;
+ override |= payload->smart_inject.fatal_enable ?
+ PAPR_PMEM_HEALTH_FATAL : 0;
+ }
+
+ if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
+ supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
+ mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ override |= payload->smart_inject.unsafe_shutdown_enable ?
+ PAPR_PMEM_SHUTDOWN_DIRTY : 0;
+ }
+
+ dev_dbg(&p->pdev->dev, "[Smart-inject] Mask=%#llx override=%#llx\n",
+ mask, override);
+
+ /* Prevent concurrent access to dimm health bitmap related members */
+ rc = mutex_lock_interruptible(&p->health_mutex);
+ if (rc)
+ return rc;
+
+ /* Bitblt mask/override to corrosponding health_bitmap couterparts */
+ p->health_bitmap_mask = BITBLT_BITMAP(p->health_bitmap_mask,
+ mask, override);
+ p->health_bitmap_override = BITBLT_BITMAP(p->health_bitmap_override,
+ mask, override);
+
+ /* Invalidate cached health bitmap */
+ p->lasthealth_jiffies = 0;
+
+ mutex_unlock(&p->health_mutex);
+
+ /* Return the supported flags back to userspace */
+ payload->smart_inject.flags = supported_flags;
+
+ return sizeof(struct nd_papr_pdsm_health);
+}
+
/*
* 'struct pdsm_cmd_desc'
* Identifies supported PDSMs' expected length of in/out payloads
@@ -702,6 +769,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = {
.size_out = sizeof(struct nd_papr_pdsm_health),
.service = papr_pdsm_health,
},
+
+ [PAPR_PDSM_SMART_INJECT] = {
+ .size_in = sizeof(struct nd_papr_pdsm_smart_inject),
+ .size_out = sizeof(struct nd_papr_pdsm_smart_inject),
+ .service = papr_pdsm_smart_inject,
+ },
/* Empty */
[PAPR_PDSM_MAX] = {
.size_in = 0,
@@ -838,6 +911,20 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
+static ssize_t health_bitmap_override_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+ return sprintf(buf, "mask=%#llx override=%#llx\n",
+ READ_ONCE(p->health_bitmap_mask),
+ READ_ONCE(p->health_bitmap_override));
+}
+
+static DEVICE_ATTR_ADMIN_RO(health_bitmap_override);
+
static ssize_t perf_stats_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -952,6 +1039,7 @@ static struct attribute *papr_nd_attributes[] = {
&dev_attr_flags.attr,
&dev_attr_perf_stats.attr,
&dev_attr_dirty_shutdown.attr,
+ &dev_attr_health_bitmap_override.attr,
NULL,
};
^ permalink raw reply related
* [PATCH v2] tests/nvdimm/ndtest: Simulate nvdimm health, DSC and smart-inject
From: Shivaprasad G Bhat @ 2021-09-06 9:15 UTC (permalink / raw)
To: nvdimm
Cc: sbhat, aneesh.kumar, vaibhav, dan.j.williams, linuxppc-dev,
ira.weiny
The 'papr_scm' module and 'papr' implementation in libndctl supports
PDSMs for reporting PAPR NVDIMM health, dirty-shutdown-count and
injecting smart-errors. This patch adds support for those PDSMs in
ndtest module so that PDSM specific paths in libndctl can be exercised.
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:
Since v1:
Link: https://patchwork.kernel.org/project/linux-nvdimm/list/?series=521767
* Removed the dependency on a header movement patch
tools/testing/nvdimm/test/ndtest.c | 148 ++++++++++++++++++++++++++++++++++++
tools/testing/nvdimm/test/ndtest.h | 96 +++++++++++++++++++++++
2 files changed, 244 insertions(+)
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 6862915f1fb0..45d42cd25e82 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -48,6 +48,10 @@ static struct ndtest_dimm dimm_group1[] = {
.uuid_str = "1e5c75d2-b618-11ea-9aa3-507b9ddc0f72",
.physical_id = 0,
.num_formats = 2,
+ .flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+ .extension_flags = PDSM_DIMM_DSC_VALID | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+ .dimm_fuel_gauge = 95,
+ .dimm_dsc = 42,
},
{
.size = DIMM_SIZE,
@@ -55,6 +59,10 @@ static struct ndtest_dimm dimm_group1[] = {
.uuid_str = "1c4d43ac-b618-11ea-be80-507b9ddc0f72",
.physical_id = 1,
.num_formats = 2,
+ .flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+ .extension_flags = PDSM_DIMM_DSC_VALID | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+ .dimm_fuel_gauge = 95,
+ .dimm_dsc = 42,
},
{
.size = DIMM_SIZE,
@@ -62,6 +70,10 @@ static struct ndtest_dimm dimm_group1[] = {
.uuid_str = "a9f17ffc-b618-11ea-b36d-507b9ddc0f72",
.physical_id = 2,
.num_formats = 2,
+ .flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+ .extension_flags = PDSM_DIMM_DSC_VALID | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+ .dimm_fuel_gauge = 95,
+ .dimm_dsc = 42,
},
{
.size = DIMM_SIZE,
@@ -69,6 +81,10 @@ static struct ndtest_dimm dimm_group1[] = {
.uuid_str = "b6b83b22-b618-11ea-8aae-507b9ddc0f72",
.physical_id = 3,
.num_formats = 2,
+ .flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+ .extension_flags = PDSM_DIMM_DSC_VALID | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+ .dimm_fuel_gauge = 95,
+ .dimm_dsc = 42,
},
{
.size = DIMM_SIZE,
@@ -296,6 +312,103 @@ static int ndtest_get_config_size(struct ndtest_dimm *dimm, unsigned int buf_len
return 0;
}
+static int ndtest_pdsm_health(struct ndtest_dimm *dimm,
+ union nd_pdsm_payload *payload,
+ unsigned int buf_len)
+{
+ struct nd_papr_pdsm_health *health = &payload->health;
+
+ if (buf_len < sizeof(health))
+ return -EINVAL;
+
+ health->extension_flags = 0;
+ health->dimm_unarmed = !!(dimm->flags & PAPR_PMEM_UNARMED_MASK);
+ health->dimm_bad_shutdown = !!(dimm->flags & PAPR_PMEM_BAD_SHUTDOWN_MASK);
+ health->dimm_bad_restore = !!(dimm->flags & PAPR_PMEM_BAD_RESTORE_MASK);
+ health->dimm_health = PAPR_PDSM_DIMM_HEALTHY;
+
+ if (dimm->flags & PAPR_PMEM_HEALTH_FATAL)
+ health->dimm_health = PAPR_PDSM_DIMM_FATAL;
+ else if (dimm->flags & PAPR_PMEM_HEALTH_CRITICAL)
+ health->dimm_health = PAPR_PDSM_DIMM_CRITICAL;
+ else if (dimm->flags & PAPR_PMEM_HEALTH_UNHEALTHY ||
+ dimm->flags & PAPR_PMEM_HEALTH_NON_CRITICAL)
+ health->dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
+
+ health->extension_flags = 0;
+ if (dimm->extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID) {
+ health->dimm_fuel_gauge = dimm->dimm_fuel_gauge;
+ health->extension_flags |= PDSM_DIMM_HEALTH_RUN_GAUGE_VALID;
+ }
+ if (dimm->extension_flags & PDSM_DIMM_DSC_VALID) {
+ health->dimm_dsc = dimm->dimm_dsc;
+ health->extension_flags |= PDSM_DIMM_DSC_VALID;
+ }
+
+ return 0;
+}
+
+static void smart_notify(struct ndtest_dimm *dimm)
+{
+ struct device *bus = dimm->dev->parent;
+
+ if (!(dimm->flags & PAPR_PMEM_HEALTH_NON_CRITICAL) ||
+ (dimm->flags & PAPR_PMEM_BAD_SHUTDOWN_MASK)) {
+ device_lock(bus);
+ /* send smart notification */
+ if (dimm->notify_handle)
+ sysfs_notify_dirent(dimm->notify_handle);
+ device_unlock(bus);
+ }
+}
+
+static int ndtest_pdsm_smart_inject(struct ndtest_dimm *dimm,
+ union nd_pdsm_payload *payload,
+ unsigned int buf_len)
+{
+ struct nd_papr_pdsm_smart_inject *inj = &payload->smart_inject;
+
+ if (buf_len < sizeof(inj))
+ return -EINVAL;
+
+ if (inj->flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
+ if (inj->fatal_enable)
+ dimm->flags |= PAPR_PMEM_HEALTH_FATAL;
+ else
+ dimm->flags &= ~PAPR_PMEM_HEALTH_FATAL;
+ }
+ if (inj->flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
+ if (inj->unsafe_shutdown_enable)
+ dimm->flags |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ else
+ dimm->flags &= ~PAPR_PMEM_SHUTDOWN_DIRTY;
+ }
+ smart_notify(dimm);
+
+ return 0;
+}
+
+static int ndtest_dimm_cmd_call(struct ndtest_dimm *dimm, unsigned int buf_len,
+ void *buf)
+{
+ struct nd_cmd_pkg *call_pkg = buf;
+ unsigned int len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ struct nd_pkg_pdsm *pdsm = (struct nd_pkg_pdsm *) call_pkg->nd_payload;
+ union nd_pdsm_payload *payload = &(pdsm->payload);
+ unsigned int func = call_pkg->nd_command;
+
+ switch (func) {
+ case PAPR_PDSM_HEALTH:
+ return ndtest_pdsm_health(dimm, payload, len);
+ case PAPR_PDSM_SMART_INJECT:
+ return ndtest_pdsm_smart_inject(dimm, payload, len);
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -325,6 +438,9 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
case ND_CMD_SET_CONFIG_DATA:
*cmd_rc = ndtest_config_set(dimm, buf_len, buf);
break;
+ case ND_CMD_CALL:
+ *cmd_rc = ndtest_dimm_cmd_call(dimm, buf_len, buf);
+ break;
default:
return -EINVAL;
}
@@ -614,6 +730,8 @@ static void put_dimms(void *data)
for (i = 0; i < p->config->dimm_count; i++)
if (p->config->dimms[i].dev) {
+ if (p->config->dimms[i].notify_handle)
+ sysfs_put(p->config->dimms[i].notify_handle);
device_unregister(p->config->dimms[i].dev);
p->config->dimms[i].dev = NULL;
}
@@ -826,6 +944,18 @@ static ssize_t flags_show(struct device *dev,
}
static DEVICE_ATTR_RO(flags);
+#define PAPR_PMEM_DIMM_CMD_MASK \
+ ((1U << PAPR_PDSM_HEALTH) \
+ | (1U << PAPR_PDSM_SMART_INJECT))
+
+static ssize_t dsm_mask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%#x\n", PAPR_PMEM_DIMM_CMD_MASK);
+}
+
+static DEVICE_ATTR_RO(dsm_mask);
+
static struct attribute *ndtest_nvdimm_attributes[] = {
&dev_attr_nvdimm_show_handle.attr,
&dev_attr_vendor.attr,
@@ -837,6 +967,7 @@ static struct attribute *ndtest_nvdimm_attributes[] = {
&dev_attr_format.attr,
&dev_attr_format1.attr,
&dev_attr_flags.attr,
+ &dev_attr_dsm_mask.attr,
NULL,
};
@@ -856,6 +987,7 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
{
struct device *dev = &priv->pdev.dev;
unsigned long dimm_flags = dimm->flags;
+ struct kernfs_node *papr_kernfs;
if (dimm->num_formats > 1) {
set_bit(NDD_ALIASING, &dimm_flags);
@@ -882,6 +1014,20 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
return -ENOMEM;
}
+ nd_synchronize();
+
+ papr_kernfs = sysfs_get_dirent(nvdimm_kobj(dimm->nvdimm)->sd, "papr");
+ if (!papr_kernfs) {
+ pr_err("Could not initialize the notifier handle\n");
+ return 0;
+ }
+
+ dimm->notify_handle = sysfs_get_dirent(papr_kernfs, "flags");
+ sysfs_put(papr_kernfs);
+ if (!dimm->notify_handle) {
+ pr_err("Could not initialize the notifier handle\n");
+ return 0;
+ }
return 0;
}
@@ -953,6 +1099,8 @@ static int ndtest_bus_register(struct ndtest_priv *p)
p->bus_desc.provider_name = NULL;
p->bus_desc.attr_groups = ndtest_attribute_groups;
+ set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
p->bus = nvdimm_bus_register(&p->pdev.dev, &p->bus_desc);
if (!p->bus) {
dev_err(&p->pdev.dev, "Error creating nvdimm bus %pOF\n", p->dn);
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index 2c54c9cbb90c..b9b381021313 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -16,6 +16,8 @@
#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
/* SCM contents cannot persist due to current platform health status */
#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7))
/* Bits status indicators for health bitmap indicating unarmed dimm */
#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
@@ -80,6 +82,13 @@ struct ndtest_dimm {
int id;
int fail_cmd_code;
u8 no_alias;
+
+ struct kernfs_node *notify_handle;
+
+ /* SMART Health information */
+ u32 extension_flags;
+ u16 dimm_fuel_gauge;
+ u64 dimm_dsc;
};
struct ndtest_mapping {
@@ -98,6 +107,93 @@ struct ndtest_region {
u8 range_index;
};
+#define ND_PDSM_PAYLOAD_MAX_SIZE 184
+/*
+ * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
+ * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
+ */
+enum papr_pdsm {
+ PAPR_PDSM_MIN = 0x0,
+ PAPR_PDSM_HEALTH,
+ PAPR_PDSM_SMART_INJECT,
+ PAPR_PDSM_MAX,
+};
+
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY 0
+#define PAPR_PDSM_DIMM_UNHEALTHY 1
+#define PAPR_PDSM_DIMM_CRITICAL 2
+#define PAPR_PDSM_DIMM_FATAL 3
+
+/* struct nd_papr_pdsm_health.extension_flags field flags */
+
+/* Indicate that the 'dimm_fuel_gauge' field is valid */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
+
+/* Indicate that the 'dimm_dsc' field is valid */
+#define PDSM_DIMM_DSC_VALID 2
+
+/*
+ * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
+ * Various flags indicate the health status of the dimm.
+ */
+struct nd_papr_pdsm_health {
+ union {
+ struct {
+ __u32 extension_flags;
+ __u8 dimm_unarmed;
+ __u8 dimm_bad_shutdown;
+ __u8 dimm_bad_restore;
+ __u8 dimm_scrubbed;
+ __u8 dimm_locked;
+ __u8 dimm_encrypted;
+ __u16 dimm_health;
+
+ /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+ __u16 dimm_fuel_gauge;
+
+ /* Extension flag PDSM_DIMM_DSC_VALID */
+ __u64 dimm_dsc;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
+
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+ union {
+ struct {
+ /* One or more of PDSM_SMART_INJECT_ */
+ __u32 flags;
+ __u8 fatal_enable;
+ __u8 unsafe_shutdown_enable;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
+
+/* Maximal union that can hold all possible payload types */
+union nd_pdsm_payload {
+ struct nd_papr_pdsm_health health;
+ struct nd_papr_pdsm_smart_inject smart_inject;
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+} __packed;
+
+/*
+ * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
+ * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
+ * that should always precede this struct when sent to papr_scm via CMD_CALL
+ * interface.
+ */
+struct nd_pkg_pdsm {
+ __s32 cmd_status; /* Out: Sub-cmd status returned back */
+ __u16 reserved[2]; /* Ignored and to be set as '0' */
+ union nd_pdsm_payload payload;
+} __packed;
+
struct ndtest_config {
struct ndtest_dimm *dimms;
struct ndtest_region *regions;
^ permalink raw reply related
* [RFC PATCH v2] powerpc/papr_scm: Move duplicate definitions to common header files
From: Shivaprasad G Bhat @ 2021-09-06 9:27 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel, mpe
Cc: nvdimm, dan.j.williams, vaibhav, sbhat, aneesh.kumar
papr_scm and ndtest share common PDSM payload structs like
nd_papr_pdsm_health. Presently these structs are duplicated across papr_pdsm.h
and ndtest.h header files. Since 'ndtest' is essentially arch independent and can
run on platforms other than PPC64, a way needs to be deviced to avoid redundancy
and duplication of PDSM structs in future.
So the patch proposes moving the PDSM header from arch/powerpc/include/uapi/ to
the generic include/uapi/linux directory. Also, there are some #defines common
between papr_scm and ndtest which are not exported to the user space. So, move
them to a header file which can be shared across ndtest and papr_scm via newly
introduced include/linux/papr_scm.h.
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Suggested-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
---
Changelog:
Since v1:
Link: https://patchwork.kernel.org/project/linux-nvdimm/patch/162505488483.72147.12741153746322191381.stgit@56e104a48989/
* Removed dependency on this patch for the other patches
MAINTAINERS | 2
arch/powerpc/include/uapi/asm/papr_pdsm.h | 165 -----------------------------
arch/powerpc/platforms/pseries/papr_scm.c | 43 --------
include/linux/papr_scm.h | 48 ++++++++
include/uapi/linux/papr_pdsm.h | 165 +++++++++++++++++++++++++++++
tools/testing/nvdimm/test/ndtest.c | 2
tools/testing/nvdimm/test/ndtest.h | 120 ---------------------
7 files changed, 219 insertions(+), 326 deletions(-)
delete mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h
create mode 100644 include/linux/papr_scm.h
create mode 100644 include/uapi/linux/papr_pdsm.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 6c8be735cc91..03fe0c77cefa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10661,6 +10661,8 @@ F: drivers/rtc/rtc-opal.c
F: drivers/scsi/ibmvscsi/
F: drivers/tty/hvc/hvc_opal.c
F: drivers/watchdog/wdrtas.c
+F: include/linux/papr_scm.h
+F: include/uapi/linux/papr_pdsm.h
F: tools/testing/selftests/powerpc
N: /pmac
N: powermac
diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
deleted file mode 100644
index 17439925045c..000000000000
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
- *
- * (C) Copyright IBM 2020
- *
- * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
- */
-
-#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
-#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
-
-#include <linux/types.h>
-#include <linux/ndctl.h>
-
-/*
- * PDSM Envelope:
- *
- * The ioctl ND_CMD_CALL exchange data between user-space and kernel via
- * envelope which consists of 2 headers sections and payload sections as
- * illustrated below:
- * +-----------------+---------------+---------------------------+
- * | 64-Bytes | 8-Bytes | Max 184-Bytes |
- * +-----------------+---------------+---------------------------+
- * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD |
- * +-----------------+---------------+---------------------------+
- * | nd_family | | |
- * | nd_size_out | cmd_status | |
- * | nd_size_in | reserved | nd_pdsm_payload |
- * | nd_command | payload --> | |
- * | nd_fw_size | | |
- * | nd_payload ---> | | |
- * +---------------+-----------------+---------------------------+
- *
- * ND Header:
- * This is the generic libnvdimm header described as 'struct nd_cmd_pkg'
- * which is interpreted by libnvdimm before passed on to papr_scm. Important
- * member fields used are:
- * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM
- * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0)
- * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD
- * 'nd_command' : (In) One of PAPR_PDSM_XXX
- * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned
- *
- * PDSM Header:
- * This is papr-scm specific header that precedes the payload. This is defined
- * as nd_cmd_pdsm_pkg. Following fields aare available in this header:
- *
- * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM.
- * 'reserved' : Not used, reserved for future and should be set to 0.
- * 'payload' : A union of all the possible payload structs
- *
- * PDSM Payload:
- *
- * The layout of the PDSM Payload is defined by various structs shared between
- * papr_scm and libndctl so that contents of payload can be interpreted. As such
- * its defined as a union of all possible payload structs as
- * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command'
- * appropriate member of the union is accessed.
- */
-
-/* Max payload size that we can handle */
-#define ND_PDSM_PAYLOAD_MAX_SIZE 184
-
-/* Max payload size that we can handle */
-#define ND_PDSM_HDR_SIZE \
- (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE)
-
-/* Various nvdimm health indicators */
-#define PAPR_PDSM_DIMM_HEALTHY 0
-#define PAPR_PDSM_DIMM_UNHEALTHY 1
-#define PAPR_PDSM_DIMM_CRITICAL 2
-#define PAPR_PDSM_DIMM_FATAL 3
-
-/* struct nd_papr_pdsm_health.extension_flags field flags */
-
-/* Indicate that the 'dimm_fuel_gauge' field is valid */
-#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
-
-/* Indicate that the 'dimm_dsc' field is valid */
-#define PDSM_DIMM_DSC_VALID 2
-
-/*
- * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
- * Various flags indicate the health status of the dimm.
- *
- * extension_flags : Any extension fields present in the struct.
- * dimm_unarmed : Dimm not armed. So contents wont persist.
- * dimm_bad_shutdown : Previous shutdown did not persist contents.
- * dimm_bad_restore : Contents from previous shutdown werent restored.
- * dimm_scrubbed : Contents of the dimm have been scrubbed.
- * dimm_locked : Contents of the dimm cant be modified until CEC reboot
- * dimm_encrypted : Contents of dimm are encrypted.
- * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
- * dimm_fuel_gauge : Life remaining of DIMM as a percentage from 0-100
- */
-struct nd_papr_pdsm_health {
- union {
- struct {
- __u32 extension_flags;
- __u8 dimm_unarmed;
- __u8 dimm_bad_shutdown;
- __u8 dimm_bad_restore;
- __u8 dimm_scrubbed;
- __u8 dimm_locked;
- __u8 dimm_encrypted;
- __u16 dimm_health;
-
- /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
- __u16 dimm_fuel_gauge;
-
- /* Extension flag PDSM_DIMM_DSC_VALID */
- __u64 dimm_dsc;
- };
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
- };
-};
-
-/* Flags for injecting specific smart errors */
-#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0)
-#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1)
-
-struct nd_papr_pdsm_smart_inject {
- union {
- struct {
- /* One or more of PDSM_SMART_INJECT_ */
- __u32 flags;
- __u8 fatal_enable;
- __u8 unsafe_shutdown_enable;
- };
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
- };
-};
-
-/*
- * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
- * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
- */
-enum papr_pdsm {
- PAPR_PDSM_MIN = 0x0,
- PAPR_PDSM_HEALTH,
- PAPR_PDSM_SMART_INJECT,
- PAPR_PDSM_MAX,
-};
-
-/* Maximal union that can hold all possible payload types */
-union nd_pdsm_payload {
- struct nd_papr_pdsm_health health;
- struct nd_papr_pdsm_smart_inject smart_inject;
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-} __packed;
-
-/*
- * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
- * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
- * that should always precede this struct when sent to papr_scm via CMD_CALL
- * interface.
- */
-struct nd_pkg_pdsm {
- __s32 cmd_status; /* Out: Sub-cmd status returned back */
- __u16 reserved[2]; /* Ignored and to be set as '0' */
- union nd_pdsm_payload payload;
-} __packed;
-
-#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index de4cf329cfb3..b7437c61a270 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -16,7 +16,8 @@
#include <linux/nd.h>
#include <asm/plpar_wrappers.h>
-#include <asm/papr_pdsm.h>
+#include <uapi/linux/papr_pdsm.h>
+#include <linux/papr_scm.h>
#include <asm/mce.h>
#include <asm/unaligned.h>
@@ -28,46 +29,6 @@
(1ul << ND_CMD_SET_CONFIG_DATA) | \
(1ul << ND_CMD_CALL))
-/* DIMM health bitmap bitmap indicators */
-/* SCM device is unable to persist memory contents */
-#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
-/* SCM device failed to persist memory contents */
-#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1))
-/* SCM device contents are persisted from previous IPL */
-#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2))
-/* SCM device contents are not persisted from previous IPL */
-#define PAPR_PMEM_EMPTY (1ULL << (63 - 3))
-/* SCM device memory life remaining is critically low */
-#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4))
-/* SCM device will be garded off next IPL due to failure */
-#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
-/* SCM contents cannot persist due to current platform health status */
-#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
-/* SCM device is unable to persist memory contents in certain conditions */
-#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7))
-/* SCM device is encrypted */
-#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8))
-/* SCM device has been scrubbed and locked */
-#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9))
-
-/* Bits status indicators for health bitmap indicating unarmed dimm */
-#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-/* Bits status indicators for health bitmap indicating unflushed dimm */
-#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
-
-/* Bits status indicators for health bitmap indicating unrestored dimm */
-#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY)
-
-/* Bit status indicators for smart event notification */
-#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
- PAPR_PMEM_HEALTH_FATAL | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
-#define PAPR_SCM_PERF_STATS_VERSION 0x1
-
/* Use bitblt method to override specific bits in the '_bitmap_' */
#define BITBLT_BITMAP(_bitmap_, _mask_, _override_) \
(((_bitmap_) & ~(_mask_)) | ((_mask_) & (_override_)))
diff --git a/include/linux/papr_scm.h b/include/linux/papr_scm.h
new file mode 100644
index 000000000000..f116e5ffef36
--- /dev/null
+++ b/include/linux/papr_scm.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __LINUX_PAPR_SCM_H
+#define __LINUX_PAPR_SCM_H
+
+/* DIMM health bitmap bitmap indicators */
+/* SCM device is unable to persist memory contents */
+#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
+/* SCM device failed to persist memory contents */
+#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1))
+/* SCM device contents are persisted from previous IPL */
+#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2))
+/* SCM device contents are not persisted from previous IPL */
+#define PAPR_PMEM_EMPTY (1ULL << (63 - 3))
+/* SCM device memory life remaining is critically low */
+#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4))
+/* SCM device will be garded off next IPL due to failure */
+#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
+/* SCM contents cannot persist due to current platform health status */
+#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7))
+/* SCM device is encrypted */
+#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8))
+/* SCM device has been scrubbed and locked */
+#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9))
+
+#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10))
+
+/* Bits status indicators for health bitmap indicating unarmed dimm */
+#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | PAPR_PMEM_HEALTH_UNHEALTHY)
+
+/* Bits status indicators for health bitmap indicating unflushed dimm */
+#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
+
+/* Bits status indicators for health bitmap indicating unrestored dimm */
+#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY)
+
+/* Bit status indicators for smart event notification */
+#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
+ PAPR_PMEM_HEALTH_FATAL | \
+ PAPR_PMEM_HEALTH_UNHEALTHY)
+
+#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED)
+
+#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
+#define PAPR_SCM_PERF_STATS_VERSION 0x1
+
+#endif /* __LINUX_PAPR_SCM_H */
diff --git a/include/uapi/linux/papr_pdsm.h b/include/uapi/linux/papr_pdsm.h
new file mode 100644
index 000000000000..1be9906f4540
--- /dev/null
+++ b/include/uapi/linux/papr_pdsm.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
+ *
+ * (C) Copyright IBM 2020-2021
+ *
+ * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
+ */
+
+#ifndef _UAPI_LINUX_PAPR_PDSM_H_
+#define _UAPI_LINUX_PAPR_PDSM_H_
+
+#include <linux/types.h>
+#include <linux/ndctl.h>
+
+/*
+ * PDSM Envelope:
+ *
+ * The ioctl ND_CMD_CALL exchange data between user-space and kernel via
+ * envelope which consists of 2 headers sections and payload sections as
+ * illustrated below:
+ * +-----------------+---------------+---------------------------+
+ * | 64-Bytes | 8-Bytes | Max 184-Bytes |
+ * +-----------------+---------------+---------------------------+
+ * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD |
+ * +-----------------+---------------+---------------------------+
+ * | nd_family | | |
+ * | nd_size_out | cmd_status | |
+ * | nd_size_in | reserved | nd_pdsm_payload |
+ * | nd_command | payload --> | |
+ * | nd_fw_size | | |
+ * | nd_payload ---> | | |
+ * +---------------+-----------------+---------------------------+
+ *
+ * ND Header:
+ * This is the generic libnvdimm header described as 'struct nd_cmd_pkg'
+ * which is interpreted by libnvdimm before passed on to papr_scm. Important
+ * member fields used are:
+ * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM
+ * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0)
+ * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD
+ * 'nd_command' : (In) One of PAPR_PDSM_XXX
+ * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned
+ *
+ * PDSM Header:
+ * This is papr-scm specific header that precedes the payload. This is defined
+ * as nd_cmd_pdsm_pkg. Following fields aare available in this header:
+ *
+ * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM.
+ * 'reserved' : Not used, reserved for future and should be set to 0.
+ * 'payload' : A union of all the possible payload structs
+ *
+ * PDSM Payload:
+ *
+ * The layout of the PDSM Payload is defined by various structs shared between
+ * papr_scm and libndctl so that contents of payload can be interpreted. As such
+ * its defined as a union of all possible payload structs as
+ * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command'
+ * appropriate member of the union is accessed.
+ */
+
+/* Max payload size that we can handle */
+#define ND_PDSM_PAYLOAD_MAX_SIZE 184
+
+/* Max payload size that we can handle */
+#define ND_PDSM_HDR_SIZE \
+ (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE)
+
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY 0
+#define PAPR_PDSM_DIMM_UNHEALTHY 1
+#define PAPR_PDSM_DIMM_CRITICAL 2
+#define PAPR_PDSM_DIMM_FATAL 3
+
+/* struct nd_papr_pdsm_health.extension_flags field flags */
+
+/* Indicate that the 'dimm_fuel_gauge' field is valid */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
+
+/* Indicate that the 'dimm_dsc' field is valid */
+#define PDSM_DIMM_DSC_VALID 2
+
+/*
+ * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
+ * Various flags indicate the health status of the dimm.
+ *
+ * extension_flags : Any extension fields present in the struct.
+ * dimm_unarmed : Dimm not armed. So contents wont persist.
+ * dimm_bad_shutdown : Previous shutdown did not persist contents.
+ * dimm_bad_restore : Contents from previous shutdown werent restored.
+ * dimm_scrubbed : Contents of the dimm have been scrubbed.
+ * dimm_locked : Contents of the dimm cant be modified until CEC reboot
+ * dimm_encrypted : Contents of dimm are encrypted.
+ * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
+ * dimm_fuel_gauge : Life remaining of DIMM as a percentage from 0-100
+ */
+struct nd_papr_pdsm_health {
+ union {
+ struct {
+ __u32 extension_flags;
+ __u8 dimm_unarmed;
+ __u8 dimm_bad_shutdown;
+ __u8 dimm_bad_restore;
+ __u8 dimm_scrubbed;
+ __u8 dimm_locked;
+ __u8 dimm_encrypted;
+ __u16 dimm_health;
+
+ /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+ __u16 dimm_fuel_gauge;
+
+ /* Extension flag PDSM_DIMM_DSC_VALID */
+ __u64 dimm_dsc;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
+
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+ union {
+ struct {
+ /* One or more of PDSM_SMART_INJECT_ */
+ __u32 flags;
+ __u8 fatal_enable;
+ __u8 unsafe_shutdown_enable;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
+
+/*
+ * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
+ * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
+ */
+enum papr_pdsm {
+ PAPR_PDSM_MIN = 0x0,
+ PAPR_PDSM_HEALTH,
+ PAPR_PDSM_SMART_INJECT,
+ PAPR_PDSM_MAX,
+};
+
+/* Maximal union that can hold all possible payload types */
+union nd_pdsm_payload {
+ struct nd_papr_pdsm_health health;
+ struct nd_papr_pdsm_smart_inject smart_inject;
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+} __packed;
+
+/*
+ * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
+ * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
+ * that should always precede this struct when sent to papr_scm via CMD_CALL
+ * interface.
+ */
+struct nd_pkg_pdsm {
+ __s32 cmd_status; /* Out: Sub-cmd status returned back */
+ __u16 reserved[2]; /* Ignored and to be set as '0' */
+ union nd_pdsm_payload payload;
+} __packed;
+
+#endif /* _UAPI_LINUX_PAPR_PDSM_H_ */
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 45d42cd25e82..6622e8adbd11 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -13,6 +13,8 @@
#include <nd-core.h>
#include <linux/printk.h>
#include <linux/seq_buf.h>
+#include <linux/papr_scm.h>
+#include <uapi/linux/papr_pdsm.h>
#include "../watermark.h"
#include "nfit_test.h"
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index b9b381021313..e18b3b006fa2 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -5,39 +5,6 @@
#include <linux/platform_device.h>
#include <linux/libnvdimm.h>
-/* SCM device is unable to persist memory contents */
-#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
-/* SCM device failed to persist memory contents */
-#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1))
-/* SCM device contents are not persisted from previous IPL */
-#define PAPR_PMEM_EMPTY (1ULL << (63 - 3))
-#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4))
-/* SCM device will be garded off next IPL due to failure */
-#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
-/* SCM contents cannot persist due to current platform health status */
-#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
-/* SCM device is unable to persist memory contents in certain conditions */
-#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7))
-
-/* Bits status indicators for health bitmap indicating unarmed dimm */
-#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10))
-
-/* Bits status indicators for health bitmap indicating unflushed dimm */
-#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
-
-/* Bits status indicators for health bitmap indicating unrestored dimm */
-#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY)
-
-/* Bit status indicators for smart event notification */
-#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
- PAPR_PMEM_HEALTH_FATAL | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED)
-
struct ndtest_config;
struct ndtest_priv {
@@ -107,93 +74,6 @@ struct ndtest_region {
u8 range_index;
};
-#define ND_PDSM_PAYLOAD_MAX_SIZE 184
-/*
- * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
- * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
- */
-enum papr_pdsm {
- PAPR_PDSM_MIN = 0x0,
- PAPR_PDSM_HEALTH,
- PAPR_PDSM_SMART_INJECT,
- PAPR_PDSM_MAX,
-};
-
-/* Various nvdimm health indicators */
-#define PAPR_PDSM_DIMM_HEALTHY 0
-#define PAPR_PDSM_DIMM_UNHEALTHY 1
-#define PAPR_PDSM_DIMM_CRITICAL 2
-#define PAPR_PDSM_DIMM_FATAL 3
-
-/* struct nd_papr_pdsm_health.extension_flags field flags */
-
-/* Indicate that the 'dimm_fuel_gauge' field is valid */
-#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
-
-/* Indicate that the 'dimm_dsc' field is valid */
-#define PDSM_DIMM_DSC_VALID 2
-
-/*
- * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
- * Various flags indicate the health status of the dimm.
- */
-struct nd_papr_pdsm_health {
- union {
- struct {
- __u32 extension_flags;
- __u8 dimm_unarmed;
- __u8 dimm_bad_shutdown;
- __u8 dimm_bad_restore;
- __u8 dimm_scrubbed;
- __u8 dimm_locked;
- __u8 dimm_encrypted;
- __u16 dimm_health;
-
- /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
- __u16 dimm_fuel_gauge;
-
- /* Extension flag PDSM_DIMM_DSC_VALID */
- __u64 dimm_dsc;
- };
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
- };
-};
-
-/* Flags for injecting specific smart errors */
-#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0)
-#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1)
-
-struct nd_papr_pdsm_smart_inject {
- union {
- struct {
- /* One or more of PDSM_SMART_INJECT_ */
- __u32 flags;
- __u8 fatal_enable;
- __u8 unsafe_shutdown_enable;
- };
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
- };
-};
-
-/* Maximal union that can hold all possible payload types */
-union nd_pdsm_payload {
- struct nd_papr_pdsm_health health;
- struct nd_papr_pdsm_smart_inject smart_inject;
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-} __packed;
-
-/*
- * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
- * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
- * that should always precede this struct when sent to papr_scm via CMD_CALL
- * interface.
- */
-struct nd_pkg_pdsm {
- __s32 cmd_status; /* Out: Sub-cmd status returned back */
- __u16 reserved[2]; /* Ignored and to be set as '0' */
- union nd_pdsm_payload payload;
-} __packed;
-
struct ndtest_config {
struct ndtest_dimm *dimms;
struct ndtest_region *regions;
^ permalink raw reply related
* [PATCH 1/5] s390/pci: refresh function handle in iomap
From: Niklas Schnelle @ 2021-09-06 9:49 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: linux-s390, Pierre Morel, Matthew Rosato, linux-kernel,
Oliver O'Halloran, Linas Vepstas, linuxppc-dev
In-Reply-To: <20210906094927.524106-1-schnelle@linux.ibm.com>
The function handle of a PCI function is updated when disabling or
enabling it as well as when the function's availability changes or it
enters the error state.
Until now this only occurred either while there is no struct pci_dev
associated with the function yet or the function became unavailable.
This meant that leaving a stale function handle in the iomap either
didn't happen because there was no iomap yet or it lead to errors on PCI
access but so would the correct disabled function handle.
In the future a CLP Set PCI Function Disable/Enable cycle during PCI
device recovery may be done while the device is bound to a driver. In
this case we must update the iomap associated with the now-stale
function handle to ensure that the resulting zPCI instruction references
an accurate function handle.
Since the function handle is accessed by the PCI accessor helpers
without locking use READ_ONCE()/WRITE_ONCE() to mark this access and
prevent compiler optimizations that would move the load/store.
With that infrastructure in place let's also properly update the
function handle in the existing cases. This makes sure that in the
future debugging of a zPCI function access through the handle will
show an up to date handle reducing the chance of confusion. Also it
makes sure we have one single place where a zPCI function handle is
updated after initialization.
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
arch/s390/include/asm/pci.h | 1 +
arch/s390/pci/pci.c | 36 ++++++++++++++++++++++++++++++++----
arch/s390/pci/pci_event.c | 6 +++---
arch/s390/pci/pci_insn.c | 4 ++--
4 files changed, 38 insertions(+), 9 deletions(-)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index e4803ec51110..5e6cba22a801 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -211,6 +211,7 @@ int zpci_deconfigure_device(struct zpci_dev *zdev);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
/* CLP */
int clp_setup_writeback_mio(void);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e7e6788d75a8..af22778551c1 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -481,6 +481,34 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
spin_unlock(&zpci_iomap_lock);
}
+static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
+{
+ int bar, idx;
+
+ spin_lock(&zpci_iomap_lock);
+ for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+ if (!zdev->bars[bar].size)
+ continue;
+ idx = zdev->bars[bar].map_idx;
+ if (!zpci_iomap_start[idx].count)
+ continue;
+ WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
+ }
+ spin_unlock(&zpci_iomap_lock);
+}
+
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
+{
+ if (!fh || zdev->fh == fh)
+ return;
+
+ zdev->fh = fh;
+ if (zpci_use_mio(zdev))
+ return;
+ if (zdev->has_resources && zdev_enabled(zdev))
+ zpci_do_update_iomap_fh(zdev, fh);
+}
+
static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
unsigned long size, unsigned long flags)
{
@@ -668,7 +696,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
rc = -EIO;
else
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
return rc;
}
@@ -679,14 +707,14 @@ int zpci_disable_device(struct zpci_dev *zdev)
cc = clp_disable_fh(zdev, &fh);
if (!cc) {
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
} else if (cc == CLP_RC_SETPCIFN_ALRDY) {
pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
zdev->fid);
/* Function is already disabled - update handle */
rc = clp_refresh_fh(zdev->fid, &fh);
if (!rc) {
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
rc = -EINVAL;
}
} else {
@@ -768,7 +796,7 @@ int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
{
int rc;
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
/* the PCI function will be scanned once function 0 appears */
if (!zdev->zbus->bus)
return 0;
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index c856f80cb21b..e868d996ec5b 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -76,7 +76,7 @@ void zpci_event_error(void *data)
static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
{
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
/* Give the driver a hint that the function is
* already unusable.
*/
@@ -117,7 +117,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
if (!zdev)
zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
else
- zdev->fh = ccdf->fh;
+ zpci_update_fh(zdev, ccdf->fh);
break;
case 0x0303: /* Deconfiguration requested */
if (zdev) {
@@ -126,7 +126,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
*/
if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
break;
- zdev->fh = ccdf->fh;
+ zpci_update_fh(zdev, ccdf->fh);
zpci_deconfigure_device(zdev);
}
break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 2e43996159f0..28d863aaafea 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -163,7 +163,7 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
unsigned long len)
{
struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+ u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
return __zpci_load(data, req, ZPCI_OFFSET(addr));
}
@@ -244,7 +244,7 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
unsigned long len)
{
struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+ u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
return __zpci_store(data, req, ZPCI_OFFSET(addr));
}
--
2.25.1
^ permalink raw reply related
* [PATCH 0/5] s390/pci: automatic error recovery
From: Niklas Schnelle @ 2021-09-06 9:49 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: linux-s390, Pierre Morel, Matthew Rosato, linux-kernel,
Oliver O'Halloran, Linas Vepstas, linuxppc-dev
Hello,
This series implements automatic error recovery for PCI devices on s390
following the scheme outlined at Documentation/PCI/pci-error-recovery.rst
it applies on top of currenct master.
The patches have are almost completely s390 specific except for two patches
exporting existing functionality for use by arch/s390/pci/ code. Nevertheless
I would also appreciate any feedback, especially on the last patch, concerning
the implementation of the error recovery flow. I believe we might be the first
implementation of PCI device recovery in a virtualized setting requiring us to
coordinate the device reset with the hypervisor platform by issuing a disable
and re-enable to the platform as well as starting the recovery following
a platform event.
The outline of the patches is as follows:
Patch 1 and 2 add s390 specific code implementing a reset mechanism that
takes the PCI function out of the platform specific error state.
Patches 3 and 4 export existing common code functions for use by the s390
specific recovery code.
Patch 3 I already sent separately resulting in the discussion below but without
a final conclusion.
https://lore.kernel.org/lkml/20210720150145.640727-1-schnelle@linux.ibm.com/
I believe even though there were some doubts about the use of
pci_dev_is_added() by arch code the existing uses as well as the use in the
final patch of this series warrant this export.
Patch 4 "PCI: Export pci_dev_lock()" is basically an extension to commit
e3a9b1212b9d ("PCI: Export pci_dev_trylock() and pci_dev_unlock()") which
already exported pci_dev_trylock(). In the final patch we make use of
pci_dev_lock() to wait for any other exclusive uses of the pdev to be finished
before starting recovery.
Finally Patch 5 implements the recovery flow as part of the existing s390
specific PCI availability and error event mechanism. Where previously the error
case only set an error indication requiring manual intervention to make the
device usable again. Now we handle the case where firmware has already reset
a PCI function after an error was encountered informing the OS that it should
be ready to be used again. Note that the same event is also issued by the
hypervisor if the function was manually taken into a service mode for example
for firmware upgrade via the hypervisor and is now ready to be used again.
Thanks,
Niklas Schnelle
Niklas Schnelle (5):
s390/pci: refresh function handle in iomap
s390/pci: implement reset_slot for hotplug slot
PCI: Move pci_dev_is/assign_added() to pci.h
PCI: Export pci_dev_lock()
s390/pci: implement minimal PCI error recovery
arch/powerpc/platforms/powernv/pci-sriov.c | 3 -
arch/powerpc/platforms/pseries/setup.c | 1 -
arch/s390/include/asm/pci.h | 6 +-
arch/s390/pci/pci.c | 143 ++++++++++++++-
arch/s390/pci/pci_event.c | 196 ++++++++++++++++++++-
arch/s390/pci/pci_insn.c | 4 +-
arch/s390/pci/pci_irq.c | 9 +
arch/s390/pci/pci_sysfs.c | 2 -
drivers/pci/hotplug/acpiphp_glue.c | 1 -
drivers/pci/hotplug/s390_pci_hpc.c | 24 +++
drivers/pci/pci.c | 3 +-
drivers/pci/pci.h | 15 --
include/linux/pci.h | 16 ++
13 files changed, 389 insertions(+), 34 deletions(-)
--
2.25.1
^ permalink raw reply
* [PATCH 2/5] s390/pci: implement reset_slot for hotplug slot
From: Niklas Schnelle @ 2021-09-06 9:49 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: linux-s390, Pierre Morel, Matthew Rosato, linux-kernel,
Oliver O'Halloran, Linas Vepstas, linuxppc-dev
In-Reply-To: <20210906094927.524106-1-schnelle@linux.ibm.com>
This is done by adding a zpci_hot_reset_device() call which does a low
level reset of the PCI function without changing its higher level
function state. This way it can be used while the zPCI function is bound
to a driver and with DMA tables being controlled either through the
IOMMU or DMA APIs which is prohibited when using zpci_disable_device()
as that drop existing DMA translations.
As this reset, unlike a normal FLR, also calls zpci_clear_irq() we need
to implement arch_restore_msi_irqs() and make sure we re-enable IRQs for
the PCI function if they were previously disabled.
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
arch/s390/include/asm/pci.h | 1 +
arch/s390/pci/pci.c | 58 ++++++++++++++++++++++++++++++
arch/s390/pci/pci_irq.c | 9 +++++
drivers/pci/hotplug/s390_pci_hpc.c | 24 +++++++++++++
4 files changed, 92 insertions(+)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 5e6cba22a801..2a2ed165a270 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -208,6 +208,7 @@ int zpci_disable_device(struct zpci_dev *);
int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh);
int zpci_deconfigure_device(struct zpci_dev *zdev);
+int zpci_hot_reset_device(struct zpci_dev *zdev);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index af22778551c1..a6322f45b5bd 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -723,6 +723,64 @@ int zpci_disable_device(struct zpci_dev *zdev)
return rc;
}
+/**
+ * zpci_hot_reset_device - perform a reset of the given zPCI function
+ * @zdev: the slot which should be reset
+ *
+ * Performs a low level reset of the zPCI function. The reset is low level in
+ * the sense that the zPCI function can be reset without detaching it from the
+ * common PCI subsystem. The reset may be performed while under control of
+ * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
+ * table is reinstated at the end of the reset.
+ *
+ * After the reset the functions internal state is reset to an initial state
+ * equivalent to its state during boot when first probing a driver.
+ * Consequently after reset the PCI function requires re-initialization via the
+ * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
+ * and enabling the function via e.g.pci_enablde_device_flags().The caller
+ * must guard against concurrent reset attempts.
+ *
+ * In most cases this function should not be called directly but through
+ * pci_reset_function() or pci_reset_bus() which handle the save/restore and
+ * locking.
+ *
+ * Return: 0 on success and an error value otherwise
+ */
+int zpci_hot_reset_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ zpci_dbg(3, "reset fid:%x\n", zdev->fid);
+ if (zdev_enabled(zdev)) {
+ /* Disables device access, DMAs and IRQs (reset state) */
+ rc = zpci_disable_device(zdev);
+ /*
+ * Due to a z/VM vs LPAR inconsistency in the error state the
+ * FH may indicate an enabled device but disable says the
+ * device is already disabled don't treat it as an error here.
+ */
+ if (rc == -EINVAL)
+ rc = 0;
+ if (rc)
+ return rc;
+ }
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+
+ if (zdev->dma_table) {
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ (u64)zdev->dma_table);
+ if (rc)
+ return rc;
+ } else {
+ zpci_dma_init_device(zdev);
+ }
+
+ return 0;
+}
+
/**
* zpci_create_device() - Create a new zpci_dev and add it to the zbus
* @fid: Function ID of the device to be created
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 9c7de9089939..ab98e7f5b79b 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -391,6 +391,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
}
+void arch_restore_msi_irqs(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ if (!zdev->irqs_registered)
+ zpci_set_irq(zdev);
+ default_restore_msi_irqs(pdev);
+}
+
static struct airq_struct zpci_airq = {
.handler = zpci_floating_irq_handler,
.isc = PCI_ISC,
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index 014868752cd4..07f28db0eed5 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -57,6 +57,29 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
return zpci_deconfigure_device(zdev);
}
+static int reset_slot(struct hotplug_slot *hotplug_slot, int probe)
+{
+ struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
+ hotplug_slot);
+
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ return -EIO;
+ /*
+ * We can't take the zdev->lock as reset_slot may be called during
+ * probing and/or device removal which already happens under the
+ * zdev->lock. Instead the user should use the higher level
+ * pci_reset_function() or pci_bus_reset() which hold the PCI device
+ * lock preventing concurrent removal. If not using these functions
+ * holding the PCI device lock is required.
+ */
+
+ /* As long as the function is configured we can reset */
+ if (probe)
+ return 0;
+
+ return zpci_hot_reset_device(zdev);
+}
+
static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
{
struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
@@ -83,6 +106,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
.enable_slot = enable_slot,
.disable_slot = disable_slot,
+ .reset_slot = reset_slot,
.get_power_status = get_power_status,
.get_adapter_status = get_adapter_status,
};
--
2.25.1
^ permalink raw reply related
* [PATCH 3/5] PCI: Move pci_dev_is/assign_added() to pci.h
From: Niklas Schnelle @ 2021-09-06 9:49 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: linux-s390, Pierre Morel, Matthew Rosato, linux-kernel,
Oliver O'Halloran, Linas Vepstas, linuxppc-dev
In-Reply-To: <20210906094927.524106-1-schnelle@linux.ibm.com>
The helper function pci_dev_is_added() from drivers/pci/pci.h is used in
PCI arch code of both s390 and powerpc leading to awkward relative
includes. Move it to the global include/linux/pci.h and get rid of these
includes just for that one function.
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
arch/powerpc/platforms/powernv/pci-sriov.c | 3 ---
arch/powerpc/platforms/pseries/setup.c | 1 -
arch/s390/pci/pci_sysfs.c | 2 --
drivers/pci/hotplug/acpiphp_glue.c | 1 -
drivers/pci/pci.h | 15 ---------------
include/linux/pci.h | 15 +++++++++++++++
6 files changed, 15 insertions(+), 22 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 28aac933a439..2e0ca5451e85 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -9,9 +9,6 @@
#include "pci.h"
-/* for pci_dev_is_added() */
-#include "../../../../drivers/pci/pci.h"
-
/*
* The majority of the complexity in supporting SR-IOV on PowerNV comes from
* the need to put the MMIO space for each VF into a separate PE. Internally
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0dfaa6ab44cc..08e846ae1853 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -74,7 +74,6 @@
#include <asm/hvconsole.h>
#include "pseries.h"
-#include "../../../../drivers/pci/pci.h"
DEFINE_STATIC_KEY_FALSE(shared_processor);
EXPORT_SYMBOL(shared_processor);
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 335c281811c7..40733b93a086 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -13,8 +13,6 @@
#include <linux/stat.h>
#include <linux/pci.h>
-#include "../../../drivers/pci/pci.h"
-
#include <asm/sclp.h>
#define zpci_attr(name, fmt, member) \
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index f031302ad401..4cb963f88183 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -38,7 +38,6 @@
#include <linux/slab.h>
#include <linux/acpi.h>
-#include "../pci.h"
#include "acpiphp.h"
static LIST_HEAD(bridge_list);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 93dcdd431072..a159cd0f6f05 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -383,21 +383,6 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev)
return dev->error_state == pci_channel_io_perm_failure;
}
-/* pci_dev priv_flags */
-#define PCI_DEV_ADDED 0
-#define PCI_DPC_RECOVERED 1
-#define PCI_DPC_RECOVERING 2
-
-static inline void pci_dev_assign_added(struct pci_dev *dev, bool added)
-{
- assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added);
-}
-
-static inline bool pci_dev_is_added(const struct pci_dev *dev)
-{
- return test_bit(PCI_DEV_ADDED, &dev->priv_flags);
-}
-
#ifdef CONFIG_PCIEAER
#include <linux/aer.h>
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 540b377ca8f6..ea0e23dbc8ec 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -507,6 +507,21 @@ struct pci_dev {
unsigned long priv_flags; /* Private flags for the PCI driver */
};
+/* pci_dev priv_flags */
+#define PCI_DEV_ADDED 0
+#define PCI_DPC_RECOVERED 1
+#define PCI_DPC_RECOVERING 2
+
+static inline void pci_dev_assign_added(struct pci_dev *dev, bool added)
+{
+ assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added);
+}
+
+static inline bool pci_dev_is_added(const struct pci_dev *dev)
+{
+ return test_bit(PCI_DEV_ADDED, &dev->priv_flags);
+}
+
static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
{
#ifdef CONFIG_PCI_IOV
--
2.25.1
^ permalink raw reply related
* [PATCH 5/5] s390/pci: implement minimal PCI error recovery
From: Niklas Schnelle @ 2021-09-06 9:49 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: linux-s390, Pierre Morel, Matthew Rosato, linux-kernel,
Oliver O'Halloran, Linas Vepstas, linuxppc-dev
In-Reply-To: <20210906094927.524106-1-schnelle@linux.ibm.com>
When the platform detects an error on a PCI function or a service action
has been performed it is put in the error state and an error event
notification is provided to the OS.
Currently we treat all error event notifications the same and simply set
pdev->error_state = pci_channel_io_perm_failure requiring user
intervention such as use of the recover attribute to get the device
usable again. Despite requiring a manual step this also has the
disadvantage that the device is completely torn down and recreated
resulting in higher level devices such as a block or network device
being recreated. In case of a block device this also means that it may
need to be removed and added to a software raid even if that could
otherwise survive with a temporary degradation.
This is of course not ideal more so since an error notification with PEC
0x3A indicates that the platform already performed error recovery
successfully or that the error state was caused by a service action that
is now finished.
At least in this case we can assume that the error state can be reset
and the function made usable again. So as not to have the disadvantage
of a full tear down and recreation we need to coordinate this recovery
with the driver. Thankfully there is already a well defined recovery
flow for this described in Documentation/PCI/pci-error-recovery.rst.
The implementation of this is somewhat straight forward and simplified
by the fact that our recovery flow is defined per PCI function. As
a reset we use the newly introduced zpci_hot_reset_device() which also
takes the PCI function out of the error state.
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
arch/s390/include/asm/pci.h | 4 +-
arch/s390/pci/pci.c | 49 ++++++++++
arch/s390/pci/pci_event.c | 190 +++++++++++++++++++++++++++++++++++-
3 files changed, 241 insertions(+), 2 deletions(-)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 2a2ed165a270..558877aff2e5 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -294,8 +294,10 @@ void zpci_debug_exit(void);
void zpci_debug_init_device(struct zpci_dev *, const char *);
void zpci_debug_exit_device(struct zpci_dev *);
-/* Error reporting */
+/* Error handling */
int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+int zpci_clear_error_state(struct zpci_dev *zdev);
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev);
#ifdef CONFIG_NUMA
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index a6322f45b5bd..77a3e85d43fb 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -954,6 +954,55 @@ int zpci_report_error(struct pci_dev *pdev,
}
EXPORT_SYMBOL(zpci_report_error);
+/**
+ * zpci_clear_error_state() - Clears the zPCI error state of the device
+ * @zdev: The zdev for which the zPCI error state should be reset
+ *
+ * Clear the zPCI error state of the device. If clearing the zPCI error state
+ * fails the device is left in the error state. In this case it may make sense
+ * to call zpci_io_perm_failure() on the associated pdev if it exists.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_clear_error_state(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
+ struct zpci_fib fib = {0};
+ u8 status;
+ int rc;
+
+ rc = zpci_mod_fc(req, &fib, &status);
+ if (rc)
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * zpci_reset_load_store_blocked() - Re-enables L/S from error state
+ * @zdev: The zdev for which to unblock load/store access
+ *
+ * R-eenables load/store access for a PCI function in the error state while
+ * keeping DMA blocked. In this state drivers can poke MMIO space to determine
+ * if error recovery is possible while catching any rogue DMA access from the
+ * device.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
+ struct zpci_fib fib = {0};
+ u8 status;
+ int rc;
+
+ rc = zpci_mod_fc(req, &fib, &status);
+ if (rc)
+ return -EIO;
+
+ return 0;
+}
+
static int zpci_mem_init(void)
{
BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index e868d996ec5b..ac9ed1572d39 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -47,15 +47,190 @@ struct zpci_ccdf_avail {
u16 pec; /* PCI event code */
} __packed;
+static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
+{
+ switch (ers_res) {
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ case PCI_ERS_RESULT_RECOVERED:
+ case PCI_ERS_RESULT_NEED_RESET:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct pci_driver *driver = pdev->driver;
+
+ pr_debug("%s: calling error_detected() callback\n", pci_name(pdev));
+ ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
+ if (ers_result_indicates_abort(ers_res))
+ pr_info("%s: driver can't recover\n", pci_name(pdev));
+ else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+ pr_debug("%s: driver needs reset to recover\n", pci_name(pdev));
+
+ return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct pci_driver *driver = pdev->driver;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int rc;
+
+ pr_debug("%s: reset load/store blocked\n", pci_name(pdev));
+ rc = zpci_reset_load_store_blocked(zdev);
+ if (rc) {
+ pr_err("%s: reset load/store blocked failed\n", pci_name(pdev));
+ /* Let's try a full reset instead */
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ if (driver->err_handler->mmio_enabled) {
+ pr_debug("%s: calling mmio_enabled() callback\n", pci_name(pdev));
+ ers_res = driver->err_handler->mmio_enabled(pdev);
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: driver can't recover after enabling MMIO\n", pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
+ }
+ }
+
+ pr_debug("%s: clearing error state\n", pci_name(pdev));
+ rc = zpci_clear_error_state(zdev);
+ if (!rc) {
+ pdev->error_state = pci_channel_io_normal;
+ } else {
+ pr_err("%s: resetting error state failed\n", pci_name(pdev));
+ /* Let's try a full reset instead */
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct pci_driver *driver = pdev->driver;
+
+ pr_info("%s: initiating reset\n", pci_name(pdev));
+ if (zpci_hot_reset_device(to_zpci(pdev))) {
+ pr_err("%s: resetting function failed\n", pci_name(pdev));
+ return ers_res;
+ }
+ pdev->error_state = pci_channel_io_normal;
+ if (driver->err_handler->slot_reset) {
+ ers_res = driver->err_handler->slot_reset(pdev);
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: driver can't recover after slot reset\n", pci_name(pdev));
+ return ers_res;
+ }
+ }
+
+ return ers_res;
+}
+
+/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
+ * @pdev: PCI function to recover currently in the error state
+ *
+ * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
+ * With the simplification that recovery always happens per function
+ * and the platform determines which functions are affected for
+ * multi-function devices.
+ */
+static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct pci_driver *driver;
+
+ /*
+ * Ensure that the PCI function is not removed concurrently, no driver
+ * is unbound or probed and that userspace can't access its
+ * configuration space while we perform recovery.
+ */
+ pci_dev_lock(pdev);
+ /*
+ * Between getting the pdev and locking it the PCI device may have been
+ * removed e.g. by a concurrent call to recover_store().
+ */
+ if (!pci_dev_is_added(pdev))
+ goto out_unlock;
+ if (pdev->error_state == pci_channel_io_perm_failure) {
+ ers_res = PCI_ERS_RESULT_DISCONNECT;
+ goto out_unlock;
+ }
+ pdev->error_state = pci_channel_io_frozen;
+
+ driver = pdev->driver;
+ if (!driver || !driver->err_handler || !driver->err_handler->error_detected) {
+ pr_info("%s: driver does not support error recovery\n", pci_name(pdev));
+ goto out_unlock;
+ }
+
+ ers_res = zpci_event_notify_error_detected(pdev);
+ if (ers_result_indicates_abort(ers_res))
+ goto out_unlock;
+
+ if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ ers_res = zpci_event_do_error_state_clear(pdev);
+ if (ers_result_indicates_abort(ers_res))
+ goto out_unlock;
+ }
+
+ if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+ ers_res = zpci_event_do_reset(pdev);
+
+ if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+ pr_err("%s: recovery failed\n", pci_name(pdev));
+ goto out_unlock;
+ }
+
+ pr_info("%s: resuming operations\n", pci_name(pdev));
+ if (driver->err_handler->resume)
+ driver->err_handler->resume(pdev);
+out_unlock:
+ pci_dev_unlock(pdev);
+
+ return ers_res;
+}
+
+/* zpci_event_io_failure - Report IO failure state es to driver
+ * @pdev: PCI function to report as failed
+ */
+static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
+{
+ struct pci_driver *driver;
+
+ pci_dev_lock(pdev);
+ if (!pci_dev_is_added(pdev))
+ goto out_unlock;
+ pdev->error_state = es;
+ driver = pdev->driver;
+ if (driver && driver->err_handler && driver->err_handler->error_detected)
+ driver->err_handler->error_detected(pdev, pdev->error_state);
+out_unlock:
+ pci_dev_unlock(pdev);
+}
+
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
+ pci_ers_result_t ers_res;
zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
if (zdev)
+ zpci_update_fh(zdev, ccdf->fh);
+
+ if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
@@ -64,7 +239,20 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (!pdev)
return;
- pdev->error_state = pci_channel_io_perm_failure;
+ switch (ccdf->pec) {
+ case 0x003a: /* Service Action or Error Recovery Successful */
+ ers_res = zpci_event_attempt_error_recovery(pdev);
+ if (ers_res != PCI_ERS_RESULT_RECOVERED)
+ zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+ break;
+ default:
+ /*
+ * Mark as frozen not permanently failed because the device
+ * could be subsequently recovered by the platform.
+ */
+ zpci_event_io_failure(pdev, pci_channel_io_frozen);
+ break;
+ }
pci_dev_put(pdev);
}
--
2.25.1
^ permalink raw reply related
* [PATCH 4/5] PCI: Export pci_dev_lock()
From: Niklas Schnelle @ 2021-09-06 9:49 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: linux-s390, Pierre Morel, Matthew Rosato, linux-kernel,
Oliver O'Halloran, Linas Vepstas, linuxppc-dev
In-Reply-To: <20210906094927.524106-1-schnelle@linux.ibm.com>
Commit e3a9b1212b9d ("PCI: Export pci_dev_trylock() and pci_dev_unlock()")
already exported pci_dev_trylock()/pci_dev_unlock() however in some
circumstances such as during error recovery it makes sense to block
waiting to get full access to the device so also export pci_dev_lock().
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
drivers/pci/pci.c | 3 ++-
include/linux/pci.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aacf575c15cf..3f416c6d3b0b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5040,12 +5040,13 @@ static int pci_reset_bus_function(struct pci_dev *dev, int probe)
return pci_parent_bus_reset(dev, probe);
}
-static void pci_dev_lock(struct pci_dev *dev)
+void pci_dev_lock(struct pci_dev *dev)
{
pci_cfg_access_lock(dev);
/* block PM suspend, driver probe, etc. */
device_lock(&dev->dev);
}
+EXPORT_SYMBOL_GPL(pci_dev_lock);
/* Return 1 on successful lock, 0 on contention */
int pci_dev_trylock(struct pci_dev *dev)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ea0e23dbc8ec..efc78b8d4696 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1639,6 +1639,7 @@ void pci_cfg_access_lock(struct pci_dev *dev);
bool pci_cfg_access_trylock(struct pci_dev *dev);
void pci_cfg_access_unlock(struct pci_dev *dev);
+void pci_dev_lock(struct pci_dev *dev);
int pci_dev_trylock(struct pci_dev *dev);
void pci_dev_unlock(struct pci_dev *dev);
--
2.25.1
^ permalink raw reply related
* Re: [PATCH 1/5] KVM: rseq: Update rseq when processing NOTIFY_RESUME on xfer to KVM guest
From: Paolo Bonzini @ 2021-09-06 10:28 UTC (permalink / raw)
To: Mathieu Desnoyers, Sean Christopherson
Cc: KVM list, Peter Zijlstra, Oleg Nesterov, Will Deacon, Guo Ren,
linux-kselftest, Ben Gardon, shuah, linux-s390, gor,
Russell King, ARM Linux, linux-csky, Christian Borntraeger,
Ingo Molnar, Catalin Marinas, linux-mips, Boqun Feng, paulmck,
Heiko Carstens, rostedt, Shakeel Butt, Andy Lutomirski,
Thomas Gleixner, Peter Foley, linux-arm-kernel,
Thomas Bogendoerfer, linux-kernel, Paul Mackerras, linuxppc-dev
In-Reply-To: <1872633041.20290.1629485463253.JavaMail.zimbra@efficios.com>
On 20/08/21 20:51, Mathieu Desnoyers wrote:
>> Ah, or is it the case that rseq_cs is non-NULL if and only if userspace is in an
>> rseq critical section, and because syscalls in critical sections are illegal, by
>> definition clearing rseq_cs is a nop unless userspace is misbehaving.
> Not quite, as I described above. But we want it to stay set so the CONFIG_DEBUG_RSEQ
> code executed when returning from ioctl to userspace will be able to validate that
> it is not nested within a rseq critical section.
>
>> If that's true, what about explicitly checking that at NOTIFY_RESUME? Or is it
>> not worth the extra code to detect an error that will likely be caught anyways?
> The error will indeed already be caught on return from ioctl to userspace, so I
> don't see any added value in duplicating this check.
Sean, can you send a v2 (even for this patch only would be okay)?
Thanks,
Paolo
^ permalink raw reply
* [PATCH v2] ftrace: Cleanup ftrace_dyn_arch_init()
From: Weizhao Ouyang @ 2021-09-06 11:16 UTC (permalink / raw)
To: Steven Rostedt, Ingo Molnar
Cc: Rich Felker, linux-ia64, linux-sh, linux-mips,
James E.J. Bottomley, Guo Ren, H. Peter Anvin, sparclinux,
linux-riscv, Vincent Chen, Will Deacon, linux-s390,
Yoshinori Sato, Helge Deller, x86, Russell King, linux-csky,
Christian Borntraeger, Catalin Marinas, Albert Ou, Weizhao Ouyang,
Vasily Gorbik, Heiko Carstens, Borislav Petkov, Greentime Hu,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, Michal Simek,
Thomas Bogendoerfer, linux-parisc, Nick Hu, linux-kernel,
Palmer Dabbelt, Paul Mackerras, linuxppc-dev, David S. Miller
Most of ARCHs use empty ftrace_dyn_arch_init(), introduce a weak common
ftrace_dyn_arch_init() to cleanup them.
Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com> (s390)
---
Changes in v2:
-- correct CONFIG_DYNAMIC_FTRACE on PowerPC
-- add Acked-by tag
---
arch/arm/kernel/ftrace.c | 5 -----
arch/arm64/kernel/ftrace.c | 5 -----
arch/csky/kernel/ftrace.c | 5 -----
arch/ia64/kernel/ftrace.c | 6 ------
arch/microblaze/kernel/ftrace.c | 5 -----
arch/mips/include/asm/ftrace.h | 2 ++
arch/nds32/kernel/ftrace.c | 5 -----
arch/parisc/kernel/ftrace.c | 5 -----
arch/powerpc/include/asm/ftrace.h | 4 ++++
arch/riscv/kernel/ftrace.c | 5 -----
arch/s390/kernel/ftrace.c | 5 -----
arch/sh/kernel/ftrace.c | 5 -----
arch/sparc/kernel/ftrace.c | 5 -----
arch/x86/kernel/ftrace.c | 5 -----
include/linux/ftrace.h | 1 -
kernel/trace/ftrace.c | 5 +++++
16 files changed, 11 insertions(+), 62 deletions(-)
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 3c83b5d29697..a006585e1c09 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -193,11 +193,6 @@ int ftrace_make_nop(struct module *mod,
return ret;
}
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 7f467bd9db7a..fc62dfe73f93 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -236,11 +236,6 @@ void arch_ftrace_update_code(int command)
command |= FTRACE_MAY_SLEEP;
ftrace_modify_all_code(command);
}
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c
index b4a7ec1517ff..50bfcf129078 100644
--- a/arch/csky/kernel/ftrace.c
+++ b/arch/csky/kernel/ftrace.c
@@ -133,11 +133,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
(unsigned long)func, true, true);
return ret;
}
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
index b2ab2d58fb30..d6360fd404ab 100644
--- a/arch/ia64/kernel/ftrace.c
+++ b/arch/ia64/kernel/ftrace.c
@@ -194,9 +194,3 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
flush_icache_range(addr, addr + 16);
return 0;
}
-
-/* run from kstop_machine */
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
index 224eea40e1ee..188749d62709 100644
--- a/arch/microblaze/kernel/ftrace.c
+++ b/arch/microblaze/kernel/ftrace.c
@@ -163,11 +163,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ret;
}
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
-
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index b463f2aa5a61..ed013e767390 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -76,6 +76,8 @@ do { \
#ifdef CONFIG_DYNAMIC_FTRACE
+int __init ftrace_dyn_arch_init(void);
+
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index 0e23e3a8df6b..f0ef4842d191 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -84,11 +84,6 @@ void _ftrace_caller(unsigned long parent_ip)
/* restore all state needed by the compiler epilogue */
}
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
-
static unsigned long gen_sethi_insn(unsigned long addr)
{
unsigned long opcode = 0x46000000;
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 0a1e75af5382..01581f715737 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -94,11 +94,6 @@ int ftrace_disable_ftrace_graph_caller(void)
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
int ftrace_update_ftrace_func(ftrace_func_t func)
{
return 0;
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index debe8c4f7062..d59f67c0225f 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -61,6 +61,10 @@ struct dyn_arch_ftrace {
};
#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_DYNAMIC_FTRACE
+int __init ftrace_dyn_arch_init(void);
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 7f1e5203de88..4716f4cdc038 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -154,11 +154,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0a464d328467..3fd80397ff52 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -262,11 +262,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return 0;
}
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
-
void arch_ftrace_update_code(int command)
{
if (ftrace_shared_hotpatch_trampoline(NULL))
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 295c43315bbe..930001bb8c6a 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -252,11 +252,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_modify_code(rec->ip, old, new);
}
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 684b84ce397f..eaead3da8e03 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -82,11 +82,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
new = ftrace_call_replace(ip, (unsigned long)func);
return ftrace_modify_code(ip, old, new);
}
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1b3ce3b4a2a2..23d221a9a3cd 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -252,11 +252,6 @@ void arch_ftrace_update_code(int command)
ftrace_modify_all_code(command);
}
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
-
/* Currently only x86_64 supports dynamic trampolines */
#ifdef CONFIG_X86_64
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 832e65f06754..f1eca123d89d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -573,7 +573,6 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
/* defined in arch */
extern int ftrace_ip_converted(unsigned long ip);
-extern int ftrace_dyn_arch_init(void);
extern void ftrace_replace_code(int enable);
extern int ftrace_update_ftrace_func(ftrace_func_t func);
extern void ftrace_caller(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7efbc8aaf7f6..4c090323198d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6846,6 +6846,11 @@ void __init ftrace_free_init_mem(void)
ftrace_free_mem(NULL, start, end);
}
+int __init __weak ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+
void __init ftrace_init(void)
{
extern unsigned long __start_mcount_loc[];
--
2.30.2
^ permalink raw reply related
* Re: [PATCH for-5.15 0/5] ASoC: fsl: register platform component before registering cpu dai
From: Mark Brown @ 2021-09-06 11:47 UTC (permalink / raw)
To: Shengjiu Wang
Cc: alsa-devel, timur, Xiubo.Lee, linuxppc-dev, tiwai, perex,
nicoleotsuka, festevam, linux-kernel
In-Reply-To: <1630665006-31437-1-git-send-email-shengjiu.wang@nxp.com>
[-- Attachment #1: Type: text/plain, Size: 428 bytes --]
On Fri, Sep 03, 2021 at 06:30:01PM +0800, Shengjiu Wang wrote:
> There is no defer probe when adding platform component to
> snd_soc_pcm_runtime(rtd), the code is in snd_soc_add_pcm_runtime()
...
> So if the platform component is not ready at that time, then the
> sound card still registered successfully, but platform component
> is empty, the sound card can't be used.
This sounds like a bug which should be fixed there?
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: [PATCH] powerpc/mce: Fix access error in mce handler
From: Michael Ellerman @ 2021-09-06 12:33 UTC (permalink / raw)
To: Ganesh Goudar, linuxppc-dev; +Cc: Ganesh Goudar, mahesh, npiggin
In-Reply-To: <20210906081823.181509-1-ganeshgr@linux.ibm.com>
Ganesh Goudar <ganeshgr@linux.ibm.com> writes:
> We queue an irq work for deferred processing of mce event
> in realmode mce handler, where translation is disabled.
> Queuing of the work may result in accessing memory outside
> RMO region, such access needs the translation to be enabled
> for an LPAR running with hash mmu else the kernel crashes.
>
> So enable the translation before queuing the work.
>
> Without this change following trace is seen on injecting machine
> check error in an LPAR running with hash mmu.
What type of error are you injecting?
> Oops: Kernel access of bad area, sig: 11 [#1]
> LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
> CPU: 5 PID: 1883 Comm: insmod Tainted: G OE 5.14.0-mce+ #137
> NIP: c000000000735d60 LR: c000000000318640 CTR: 0000000000000000
> REGS: c00000001ebff9a0 TRAP: 0300 Tainted: G OE (5.14.0-mce+)
> MSR: 8000000000001003 <SF,ME,RI,LE> CR: 28008228 XER: 00000001
> CFAR: c00000000031863c DAR: c00000027fa8fe08 DSISR: 40000000 IRQMASK: 0
> GPR00: c0000000003186d0 c00000001ebffc40 c000000001b0df00 c0000000016337e8
> GPR04: c0000000016337e8 c00000027fa8fe08 0000000000000023 c0000000016337f0
> GPR08: 0000000000000023 c0000000012ffe08 0000000000000000 c008000001460240
> GPR12: 0000000000000000 c00000001ec9a900 c00000002ac4bd00 0000000000000000
> GPR16: 00000000000005a0 c0080000006b0000 c0080000006b05a0 c000000000ff3068
> GPR20: c00000002ac4bbc0 0000000000000001 c00000002ac4bbc0 c008000001490298
> GPR24: c008000001490108 c000000001636198 c008000001470090 c008000001470058
> GPR28: 0000000000000510 c008000001000000 c008000008000019 0000000000000019
> NIP [c000000000735d60] llist_add_batch+0x0/0x40
> LR [c000000000318640] __irq_work_queue_local+0x70/0xc0
> Call Trace:
> [c00000001ebffc40] [c00000001ebffc0c] 0xc00000001ebffc0c (unreliable)
> [c00000001ebffc60] [c0000000003186d0] irq_work_queue+0x40/0x70
> [c00000001ebffc80] [c00000000004425c] machine_check_queue_event+0xbc/0xd0
> [c00000001ebffcf0] [c00000000000838c] machine_check_early_common+0x16c/0x1f4
>
> Fixes: 74c3354bc1d89 ("powerpc/pseries/mce: restore msr before returning from handler")
Please explain in more detail why that commit caused this breakage.
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index 47a683cd00d2..9d1e39d42e3e 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -249,6 +249,7 @@ void machine_check_queue_event(void)
> {
> int index;
> struct machine_check_event evt;
> + unsigned long msr;
>
> if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
> return;
> @@ -262,8 +263,19 @@ void machine_check_queue_event(void)
> memcpy(&local_paca->mce_info->mce_event_queue[index],
> &evt, sizeof(evt));
>
> - /* Queue irq work to process this event later. */
> - irq_work_queue(&mce_event_process_work);
> + /* Queue irq work to process this event later. Before
> + * queuing the work enable translation for non radix LPAR,
> + * as irq_work_queue may try to access memory outside RMO
> + * region.
> + */
> + if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) {
> + msr = mfmsr();
> + mtmsr(msr | MSR_IR | MSR_DR);
> + irq_work_queue(&mce_event_process_work);
> + mtmsr(msr);
> + } else {
> + irq_work_queue(&mce_event_process_work);
> + }
> }
We already went to virtual mode and queued (different) irq work in
arch/powerpc/platforms/pseries/ras.c:mce_handle_error()
We also called save_mce_event() which also might have queued irq work,
via machine_check_ue_event().
So it really feels like something about the design is wrong if we have
to go to virtual mode again and queue more irq work here.
I guess we can probably merge this as a backportable fix, doing anything
else would be a bigger change.
Looking at ras.c there's the comment:
* Enable translation as we will be accessing per-cpu variables
* in save_mce_event() which may fall outside RMO region, also
But AFAICS it's only irq_work_queue() that touches anything percpu?
So maybe we should just not be using irq_work_queue(). It's a pretty
thin wrapper around set_dec(1), perhaps we just need to hand-roll some
real-mode friendly way of doing that.
cheers
^ permalink raw reply
* Re: [PATCH v2] ftrace: Cleanup ftrace_dyn_arch_init()
From: Michael Ellerman @ 2021-09-06 14:22 UTC (permalink / raw)
To: Weizhao Ouyang, Steven Rostedt, Ingo Molnar
Cc: Rich Felker, linux-ia64, linux-sh, Catalin Marinas, linux-mips,
James E.J. Bottomley, Guo Ren, H. Peter Anvin, sparclinux,
linux-riscv, Vincent Chen, Will Deacon, linux-s390,
Yoshinori Sato, Helge Deller, x86, Russell King, linux-csky,
Christian Borntraeger, Albert Ou, Weizhao Ouyang, Vasily Gorbik,
Heiko Carstens, Borislav Petkov, Greentime Hu, Paul Walmsley,
Thomas Gleixner, linux-arm-kernel, Michal Simek,
Thomas Bogendoerfer, linux-parisc, Nick Hu, linux-kernel,
Palmer Dabbelt, Paul Mackerras, linuxppc-dev, David S. Miller
In-Reply-To: <20210906111626.1259867-1-o451686892@gmail.com>
Weizhao Ouyang <o451686892@gmail.com> writes:
> Most of ARCHs use empty ftrace_dyn_arch_init(), introduce a weak common
> ftrace_dyn_arch_init() to cleanup them.
>
> Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
> Acked-by: Heiko Carstens <hca@linux.ibm.com> (s390)
>
> ---
>
> Changes in v2:
> -- correct CONFIG_DYNAMIC_FTRACE on PowerPC
> -- add Acked-by tag
> diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
> index debe8c4f7062..d59f67c0225f 100644
> --- a/arch/powerpc/include/asm/ftrace.h
> +++ b/arch/powerpc/include/asm/ftrace.h
> @@ -61,6 +61,10 @@ struct dyn_arch_ftrace {
> };
> #endif /* __ASSEMBLY__ */
>
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +int __init ftrace_dyn_arch_init(void);
> +#endif /* CONFIG_DYNAMIC_FTRACE */
> +
> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> #define ARCH_SUPPORTS_FTRACE_OPS 1
> #endif
That breaks the build for powerpc:
/linux/arch/powerpc/include/asm/ftrace.h: Assembler messages:
/linux/arch/powerpc/include/asm/ftrace.h:65: Error: unrecognized opcode: `int'
make[4]: *** [/linux/scripts/Makefile.build:352: arch/powerpc/kernel/trace/ftrace_64.o] Error 1
make[3]: *** [/linux/scripts/Makefile.build:514: arch/powerpc/kernel/trace] Error 2
make[2]: *** [/linux/scripts/Makefile.build:514: arch/powerpc/kernel] Error 2
make[1]: *** [/linux/Makefile:1861: arch/powerpc] Error 2
make[1]: *** Waiting for unfinished jobs....
It needs to be inside an #ifndef __ASSEMBLY__ section.
cheers
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox