LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [powerpc:merge] BUILD SUCCESS 8d4e8a236506000f4802d83e507c2e61cfabc69a
From: kernel test robot @ 2021-09-04  3:38 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge
branch HEAD: 8d4e8a236506000f4802d83e507c2e61cfabc69a  powerpc/ci: Disable corenet32_smp_defconfig on clang / GCC 4.9

elapsed time: 927m

configs tested: 104
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
powerpc                     tqm5200_defconfig
sh                   sh7770_generic_defconfig
arm                  colibri_pxa270_defconfig
arm                          simpad_defconfig
sh                          urquell_defconfig
microblaze                      mmu_defconfig
xtensa                  nommu_kc705_defconfig
powerpc                  mpc866_ads_defconfig
powerpc                 mpc8313_rdb_defconfig
arm                      pxa255-idp_defconfig
ia64                         bigsur_defconfig
powerpc                     akebono_defconfig
powerpc                      pasemi_defconfig
sh                         microdev_defconfig
mips                      maltaaprp_defconfig
powerpc                 mpc85xx_cds_defconfig
x86_64                            allnoconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
xtensa                           allyesconfig
parisc                              defconfig
s390                             allyesconfig
s390                             allmodconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a006-20210904
x86_64               randconfig-a004-20210904
x86_64               randconfig-a003-20210904
x86_64               randconfig-a005-20210904
x86_64               randconfig-a001-20210904
x86_64               randconfig-a002-20210904
x86_64               randconfig-a016-20210903
x86_64               randconfig-a011-20210903
x86_64               randconfig-a012-20210903
x86_64               randconfig-a015-20210903
x86_64               randconfig-a014-20210903
x86_64               randconfig-a013-20210903
arc                  randconfig-r043-20210904
riscv                randconfig-r042-20210903
s390                 randconfig-r044-20210903
arc                  randconfig-r043-20210903
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
um                           x86_64_defconfig
um                             i386_defconfig
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec
x86_64                           allyesconfig
x86_64                    rhel-8.3-kselftests

clang tested configs:
i386                 randconfig-a005-20210903
i386                 randconfig-a004-20210903
i386                 randconfig-a006-20210903
i386                 randconfig-a002-20210903
i386                 randconfig-a001-20210903
i386                 randconfig-a003-20210903
x86_64               randconfig-a011-20210904
x86_64               randconfig-a016-20210904
x86_64               randconfig-a012-20210904
x86_64               randconfig-a015-20210904
x86_64               randconfig-a014-20210904
x86_64               randconfig-a013-20210904
i386                 randconfig-a012-20210904
i386                 randconfig-a015-20210904
i386                 randconfig-a011-20210904
i386                 randconfig-a013-20210904
i386                 randconfig-a014-20210904
i386                 randconfig-a016-20210904
hexagon              randconfig-r045-20210903
hexagon              randconfig-r041-20210903

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [RESEND PATCH v4 2/4] drivers/nvdimm: Add perf interface to expose nvdimm performance stats
From: kajoljain @ 2021-09-04  6:38 UTC (permalink / raw)
  To: kernel test robot, mpe, linuxppc-dev, nvdimm, linux-kernel,
	peterz, dan.j.williams, ira.weiny, vishal.l.verma
  Cc: santosh, maddy, kbuild-all
In-Reply-To: <202109032341.mgqAHURT-lkp@intel.com>



On 9/3/21 8:49 PM, kernel test robot wrote:
> Hi Kajol,
> 
> Thank you for the patch! Perhaps something to improve:
> 
> [auto build test WARNING on linux-nvdimm/libnvdimm-for-next]
> [also build test WARNING on powerpc/next linus/master v5.14 next-20210903]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
> 
> url:    https://github.com/0day-ci/linux/commits/Kajol-Jain/Add-perf-interface-to-expose-nvdimm/20210903-131212
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git libnvdimm-for-next
> config: x86_64-randconfig-s021-20210903 (attached as .config)
> compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
> reproduce:
>         # apt-get install sparse
>         # sparse version: v0.6.4-rc1-dirty
>         # https://github.com/0day-ci/linux/commit/f841601cc058e6033761bd2157b886a30190fc3a
>         git remote add linux-review https://github.com/0day-ci/linux
>         git fetch --no-tags linux-review Kajol-Jain/Add-perf-interface-to-expose-nvdimm/20210903-131212
>         git checkout f841601cc058e6033761bd2157b886a30190fc3a
>         # save the attached .config to linux build tree
>         make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/nvdimm/
> 
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
> 
> 
> sparse warnings: (new ones prefixed by >>)
>>> drivers/nvdimm/nd_perf.c:159:6: sparse: sparse: symbol 'nvdimm_pmu_free_hotplug_memory' was not declared. Should it be static?
> 
> Please review and possibly fold the followup patch.

Hi,
  Sure I will correct it and send follow-up patchset.

Thanks,
Kajol Jain

> 
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
> 

^ permalink raw reply

* Re: [RFC PATCH] drivers/nvdimm: nvdimm_pmu_free_hotplug_memory() can be static
From: kajoljain @ 2021-09-04  6:39 UTC (permalink / raw)
  To: kernel test robot, mpe, linuxppc-dev, nvdimm, linux-kernel,
	peterz, dan.j.williams, ira.weiny, vishal.l.verma
  Cc: santosh, maddy, kbuild-all
In-Reply-To: <20210903151941.GA23182@a0af9ae1a611>



On 9/3/21 8:49 PM, kernel test robot wrote:
> drivers/nvdimm/nd_perf.c:159:6: warning: symbol 'nvdimm_pmu_free_hotplug_memory' was not declared. Should it be static?
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: kernel test robot <lkp@intel.com>
> ---
>  nd_perf.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/nvdimm/nd_perf.c b/drivers/nvdimm/nd_perf.c
> index 4c49d1bc2a3c6..b129e5e702d59 100644
> --- a/drivers/nvdimm/nd_perf.c
> +++ b/drivers/nvdimm/nd_perf.c
> @@ -156,7 +156,7 @@ static int nvdimm_pmu_cpu_hotplug_init(struct nvdimm_pmu *nd_pmu)
>  	return 0;
>  }
>  
> -void nvdimm_pmu_free_hotplug_memory(struct nvdimm_pmu *nd_pmu)
> +static void nvdimm_pmu_free_hotplug_memory(struct nvdimm_pmu *nd_pmu)
>  {
>  	cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
>  	cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
> 

Hi,
   Thanks for reporting this issue, I will merge it in my followup patchset.

Thanks,
Kajol Jain

^ permalink raw reply

* [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
From: Kajol Jain @ 2021-09-04  6:49 UTC (permalink / raw)
  To: mpe, linuxppc-dev, linux-kernel, peterz, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak
  Cc: mark.rutland, songliubraving, atrajeev, daniel, rnsastry,
	alexander.shishkin, kjain, ast, yao.jin, maddy, paulus, kan.liang

Add couple of new macros to represent onchip L2 and onchip L3 accesses.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 include/uapi/linux/perf_event.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f92880a15645..030b3e990ac3 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1265,7 +1265,9 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0xa available */
+#define PERF_MEM_LVLNUM_OC_L2	0x05 /* On Chip L2 */
+#define PERF_MEM_LVLNUM_OC_L3	0x06 /* On Chip L3 */
+/* 7-0xa available */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
 #define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
 #define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
-- 
2.26.2


^ permalink raw reply related

* [PATCH 2/3] perf: Add macros to specify onchip L2/L3 accesses
From: Kajol Jain @ 2021-09-04  6:49 UTC (permalink / raw)
  To: mpe, linuxppc-dev, linux-kernel, peterz, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak
  Cc: mark.rutland, songliubraving, atrajeev, daniel, rnsastry,
	alexander.shishkin, kjain, ast, yao.jin, maddy, paulus, kan.liang
In-Reply-To: <20210904064932.307610-1-kjain@linux.ibm.com>

Add couple of new macros to represent onchip L2 and onchip L3 accesses.
Patch also adds the decoding strings in the mem_lvlnum data structure.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 tools/include/uapi/linux/perf_event.h | 4 +++-
 tools/perf/util/mem-events.c          | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index f92880a15645..030b3e990ac3 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1265,7 +1265,9 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0xa available */
+#define PERF_MEM_LVLNUM_OC_L2	0x05 /* On Chip L2 */
+#define PERF_MEM_LVLNUM_OC_L3	0x06 /* On Chip L3 */
+/* 7-0xa available */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
 #define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
 #define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index f0e75df72b80..f846a91220c2 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -294,6 +294,8 @@ static const char * const mem_lvl[] = {
 };
 
 static const char * const mem_lvlnum[] = {
+	[PERF_MEM_LVLNUM_OC_L2] = "OnChip L2",
+	[PERF_MEM_LVLNUM_OC_L3] = "OnChip L3",
 	[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
 	[PERF_MEM_LVLNUM_LFB] = "LFB",
 	[PERF_MEM_LVLNUM_RAM] = "RAM",
-- 
2.26.2


^ permalink raw reply related

* [PATCH 3/3] powerpc/perf: Fix data source encodings for power10
From: Kajol Jain @ 2021-09-04  6:49 UTC (permalink / raw)
  To: mpe, linuxppc-dev, linux-kernel, peterz, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak
  Cc: mark.rutland, songliubraving, atrajeev, daniel, rnsastry,
	alexander.shishkin, kjain, ast, yao.jin, maddy, paulus, kan.liang
In-Reply-To: <20210904064932.307610-1-kjain@linux.ibm.com>

Fix the data source encodings for power10 to represent
onchip L2/L3 accesses properly.

Fixes: 79e96f8f930d ("powerpc/perf: Export memory hierarchy info to user
space")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/perf/isa207-common.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index f92bf5f6b74f..9630a17c5da4 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -238,11 +238,22 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
 		ret |= P(SNOOP, HIT);
 		break;
 	case 5:
-		ret = PH(LVL, REM_CCE1);
-		if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
-			ret |= P(SNOOP, HIT);
-		else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
-			ret |= P(SNOOP, HITM);
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			if (sub_idx == 0 || sub_idx == 4)
+				ret = PH(LVLNUM, OC_L2) | P(SNOOP, HIT);
+			else if (sub_idx == 1 || sub_idx == 5)
+				ret = PH(LVLNUM, OC_L2) | P(SNOOP, HITM);
+			else if (sub_idx == 2 || sub_idx == 6)
+				ret = PH(LVLNUM, OC_L3) | P(SNOOP, HIT);
+			else if (sub_idx == 3 || sub_idx == 7)
+				ret = PH(LVLNUM, OC_L3) | P(SNOOP, HITM);
+		} else {
+			ret = PH(LVL, REM_CCE1);
+			if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
+				ret |= P(SNOOP, HIT);
+			else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
+				ret |= P(SNOOP, HITM);
+		}
 		break;
 	case 6:
 		ret = PH(LVL, REM_CCE2);
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH -next] powerpc/mm: check base flags in ioremap_prot
From: Nanyong Sun @ 2021-09-04  6:38 UTC (permalink / raw)
  To: Christophe Leroy, mpe, benh, paulus, akpm, npiggin,
	christophe.leroy
  Cc: wangkefeng 00584194, linuxppc-dev, linux-kernel
In-Reply-To: <90aa2b67-24c8-4a5f-d91a-b562054d5c5d@csgroup.eu>

[-- Attachment #1: Type: text/plain, Size: 4962 bytes --]


On 2021/9/3 17:16, Christophe Leroy wrote:
>
>
> Le 03/09/2021 à 11:03, Nanyong Sun a écrit :
>> Some drivers who call ioremap_prot without setting base flags like
>> ioremap_prot(addr, len, 0) may work well before
>> commit 56f3c1413f5c ("powerpc/mm: properly set PAGE_KERNEL flags in
>> ioremap()"), but now they will get a virtual address "successfully"
>> from ioremap_prot and badly fault on memory access later because that
>> patch also dropped the hack adding of base flags for ioremap_prot.
>>
>> So return NULL and throw a warning if the caller of ioremap_prot did
>> not set base flags properly. Why not just hack adding PAGE_KERNEL flags
>> in the ioremap_prot, because most scenarios can be covered by high level
>> functions like ioremap(), ioremap_coherent(), ioremap_cache()...
>> so it is better to keep max flexibility for this low level api.
>
> As far as I can see, there is no user of this fonction that sets flags 
> to 0 in the kernel tree.
>
> Did you find any ? If you did, I think it is better to fix the caller.
>
> Christophe
>
I see some vendor's drivers which are not on upstream has used 
ioremap_prot like

ioremap_prot(addr,len, _PAGE_NO_CACHE | _PAGE_GUARDED) or

ioremap_prot(addr,len, 0), and they worked well on old kernel versions 
before commit

56f3c1413f5c ("powerpc/mm: properly set PAGE_KERNEL flags in ioremap()").

Actually, in the commit( git show 56f3c1413f5c ), you can see that in old

kernel versions, the implementations of ioremap_xxx just set flags as 
_PAGE_xxx or 0,

Code examples of the commit:

In arch/powerpc/mm/pgtable_32.c

ioremap(phys_addr_t addr, unsigned long size)
  {
-       return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED,
-                               __builtin_return_address(0));
+       unsigned long flags = pgprot_val(pgprot_noncached(PAGE_KERNEL));
+
+       return __ioremap_caller(addr, size, flags, 
__builtin_return_address(0));
  }

In arch/powerpc/mm/pgtable_64.c

void __iomem * ioremap(phys_addr_t addr, unsigned long size)
  {
-       unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
+       unsigned long flags = pgprot_val(pgprot_noncached(PAGE_KERNEL));
         void *caller = __builtin_return_address(0);


They rely on the low level functions to add base flags.

So, these driver codes like 'ioremap_prot(addr,len, _PAGE_NO_CACHE) '

in old kernel version is**not very improper.

Ofcourse, when porting new kernel versions, they need to change because the

api implementation has changed, but it's difficult for driver developer 
to find out what

happend and how to change, because they still get a virtual address 
"successfully"

from ioremap_prot without base flags and then page fault on memory 
access later.

So, it is necessary to check and report base flags missing in 
ioremap_prot() timely.

Secondly, the commit 56f3c1413f5c ("powerpc/mm: properly set PAGE_KERNEL

flags in ioremap()") delete the hack adding of PAGE_KERNEL flags in low 
level

implementation and add flags properly for all ioremap_xx() APIs except 
ioreamp_prot,

for ioreamp_prot, it not only loss the hack adding, but also loss the 
basic flags check

which is necessary.

So we need add this basic check for this API.

Nanyong

>>
>> Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
>> ---
>>   arch/powerpc/mm/ioremap.c | 4 ++++
>>   1 file changed, 4 insertions(+)
>>
>> diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
>> index 57342154d2b0..b7eda0f0d04d 100644
>> --- a/arch/powerpc/mm/ioremap.c
>> +++ b/arch/powerpc/mm/ioremap.c
>> @@ -46,6 +46,10 @@ void __iomem *ioremap_prot(phys_addr_t addr, 
>> unsigned long size, unsigned long f
>>       pte_t pte = __pte(flags);
>>       void *caller = __builtin_return_address(0);
>>   +    /* The caller should set base page flags properly */
>> +    if (WARN_ON((flags & _PAGE_PRESENT) == 0))
>
> This probably doesn't work for some plateforms like book3s/64. You 
> should use helpers like pte_present().
>
> See the comment at 
> https://elixir.bootlin.com/linux/v5.14/source/arch/powerpc/include/asm/book3s/64/pgtable.h#L591
>
I'm afraid that pte_present() is not ok for book3s/64, because it also 
check _PAGE_PTE which will be set in the bottom

half of ioremap, so it would always return fail because the caller of 
ioremap_prot wouldn't set _PAGE_PTE. I think it's ok that

not check _PAGE_INVALID here because we intend to create a new valid PTE 
here.

And I think check _PAGE_PRESENT is ok  because in kernel version before 
commit 56f3c1413f5c , the function __ioremap_at()

and __ioremap_caller() used _PAGE_PRESENT to check base flags, book3s/64 
was also present by then.

Nanyong

>> +        return NULL;
>> +
>>       /* writeable implies dirty for kernel addresses */
>>       if (pte_write(pte))
>>           pte = pte_mkdirty(pte);
>>
> .

[-- Attachment #2: Type: text/html, Size: 7409 bytes --]

^ permalink raw reply

* Re: [PATCH kernel] KVM: PPC: Book3S: Merge powerpc's debugfs entry content into generic entry
From: Alexey Kardashevskiy @ 2021-09-04  7:57 UTC (permalink / raw)
  To: Fabiano Rosas, linuxppc-dev; +Cc: Paolo Bonzini, kvm-ppc, kvm
In-Reply-To: <87v93hens6.fsf@linux.ibm.com>



On 04/09/2021 00:28, Fabiano Rosas wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> 
>> At the moment the generic KVM code creates an "%pid-%fd" entry per a KVM
>> instance; and the PPC HV KVM creates its own at "vm%pid".
>>
>> The rproblems with the PPC entries are:
>> 1. they do not allow multiple VMs in the same process (which is extremely
>> rare case mostly used by syzkaller fuzzer);
>> 2. prone to race bugs like the generic KVM code had fixed in
>> commit 85cd39af14f4 ("KVM: Do not leak memory for duplicate debugfs
>> directories").
>>
>> This defines kvm_arch_create_kvm_debugfs() similar to one for vcpus.
> 
> I think kvm_arch_create_vm_debugfs is a bit mode accurate?


ah yes, it is better.

>                          ^
>> This defines 2 hooks in kvmppc_ops for allowing specific KVM
>> implementations to add necessary entries.
>>
>> This makes use of already existing kvm_arch_create_vcpu_debugfs.
>>
>> This removes no more used debugfs_dir pointers from PPC kvm_arch structs.
>>
>> Suggested-by: Fabiano Rosas <farosas@linux.ibm.com>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> ...
> 
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index c8f12b056968..325b388c725a 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -2771,19 +2771,14 @@ static const struct file_operations debugfs_timings_ops = {
>>   };
>>   
>>   /* Create a debugfs directory for the vcpu */
>> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
>> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
> 
> This could lose the 'arch' since it is already inside our code and
> accessed only via ops. I see that we already have a
> kvmppc_create_vcpu_debugfs that's used for some BookE processor, this

Ouch, missed kvmppc_create_vcpu_debugfs(). Good eye :)


> would make:
> 
> kvmppc_create_vcpu_debugfs
> kvmppc_create_vcpu_debugfs_hv
> kvmppc_create_vcpu_debugfs_pr (possibly)
> 
> which perhaps is more consistent.


Or  kvm_arch_vm_ioctl_hv(). I really like having "arch" in the name, 
tells right away what it is about. "kvmppc" might be excessive. Thanks,



>>   {
>> -	char buf[16];
>> -	struct kvm *kvm = vcpu->kvm;
>> -
>> -	snprintf(buf, sizeof(buf), "vcpu%u", id);
>> -	vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
>> -	debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
>> +	debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
>>   			    &debugfs_timings_ops);
>>   }
>>   
>>   #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
>> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
>> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
>>   {
>>   }
>>   #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */

-- 
Alexey

^ permalink raw reply

* Re: [PATCH] ftrace: Cleanup ftrace_dyn_arch_init()
From: Weizhao Ouyang @ 2021-09-04 10:48 UTC (permalink / raw)
  To: rostedt, mingo
  Cc: dalias, linux-ia64, linux-sh, linux-mips, James.Bottomley, guoren,
	hpa, sparclinux, linux-riscv, deanbo422, will, linux-s390, ysato,
	deller, x86, linux, linux-csky, borntraeger, catalin.marinas, aou,
	gor, hca, bp, green.hu, paul.walmsley, tglx, linux-arm-kernel,
	Michal Simek, tsbogend, linux-parisc, nickhu, linux-kernel,
	Palmer Dabbelt, paulus, linuxppc-dev, davem
In-Reply-To: <20210903071817.1162938-1-o451686892@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 8681 bytes --]

On Fri, Sep 3, 2021 at 3:18 PM Weizhao Ouyang <o451686892@gmail.com> wrote:

> Most ARCHs use empty ftrace_dyn_arch_init(), introduce a weak common
> ftrace_dyn_arch_init() to cleanup them.
>
> Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
> ---
>  arch/arm/kernel/ftrace.c          | 5 -----
>  arch/arm64/kernel/ftrace.c        | 5 -----
>  arch/csky/kernel/ftrace.c         | 5 -----
>  arch/ia64/kernel/ftrace.c         | 6 ------
>  arch/microblaze/kernel/ftrace.c   | 5 -----
>  arch/mips/include/asm/ftrace.h    | 2 ++
>  arch/nds32/kernel/ftrace.c        | 5 -----
>  arch/parisc/kernel/ftrace.c       | 5 -----
>  arch/powerpc/include/asm/ftrace.h | 4 ++++
>  arch/riscv/kernel/ftrace.c        | 5 -----
>  arch/s390/kernel/ftrace.c         | 5 -----
>  arch/sh/kernel/ftrace.c           | 5 -----
>  arch/sparc/kernel/ftrace.c        | 5 -----
>  arch/x86/kernel/ftrace.c          | 5 -----
>  include/linux/ftrace.h            | 1 -
>  kernel/trace/ftrace.c             | 5 +++++
>  16 files changed, 11 insertions(+), 62 deletions(-)
>
> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
> index 3c83b5d29697..a006585e1c09 100644
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -193,11 +193,6 @@ int ftrace_make_nop(struct module *mod,
>
>         return ret;
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
> index 7f467bd9db7a..fc62dfe73f93 100644
> --- a/arch/arm64/kernel/ftrace.c
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -236,11 +236,6 @@ void arch_ftrace_update_code(int command)
>         command |= FTRACE_MAY_SLEEP;
>         ftrace_modify_all_code(command);
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c
> index b4a7ec1517ff..50bfcf129078 100644
> --- a/arch/csky/kernel/ftrace.c
> +++ b/arch/csky/kernel/ftrace.c
> @@ -133,11 +133,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>                                 (unsigned long)func, true, true);
>         return ret;
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
> index b2ab2d58fb30..d6360fd404ab 100644
> --- a/arch/ia64/kernel/ftrace.c
> +++ b/arch/ia64/kernel/ftrace.c
> @@ -194,9 +194,3 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>         flush_icache_range(addr, addr + 16);
>         return 0;
>  }
> -
> -/* run from kstop_machine */
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
> diff --git a/arch/microblaze/kernel/ftrace.c
> b/arch/microblaze/kernel/ftrace.c
> index 224eea40e1ee..188749d62709 100644
> --- a/arch/microblaze/kernel/ftrace.c
> +++ b/arch/microblaze/kernel/ftrace.c
> @@ -163,11 +163,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned
> long addr)
>         return ret;
>  }
>
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
> -
>  int ftrace_update_ftrace_func(ftrace_func_t func)
>  {
>         unsigned long ip = (unsigned long)(&ftrace_call);
> diff --git a/arch/mips/include/asm/ftrace.h
> b/arch/mips/include/asm/ftrace.h
> index b463f2aa5a61..ed013e767390 100644
> --- a/arch/mips/include/asm/ftrace.h
> +++ b/arch/mips/include/asm/ftrace.h
> @@ -76,6 +76,8 @@ do {                                          \
>
>
>  #ifdef CONFIG_DYNAMIC_FTRACE
> +int __init ftrace_dyn_arch_init(void);
> +
>  static inline unsigned long ftrace_call_adjust(unsigned long addr)
>  {
>         return addr;
> diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
> index 0e23e3a8df6b..f0ef4842d191 100644
> --- a/arch/nds32/kernel/ftrace.c
> +++ b/arch/nds32/kernel/ftrace.c
> @@ -84,11 +84,6 @@ void _ftrace_caller(unsigned long parent_ip)
>         /* restore all state needed by the compiler epilogue */
>  }
>
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
> -
>  static unsigned long gen_sethi_insn(unsigned long addr)
>  {
>         unsigned long opcode = 0x46000000;
> diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
> index 0a1e75af5382..01581f715737 100644
> --- a/arch/parisc/kernel/ftrace.c
> +++ b/arch/parisc/kernel/ftrace.c
> @@ -94,11 +94,6 @@ int ftrace_disable_ftrace_graph_caller(void)
>  #endif
>
>  #ifdef CONFIG_DYNAMIC_FTRACE
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
>  int ftrace_update_ftrace_func(ftrace_func_t func)
>  {
>         return 0;
> diff --git a/arch/powerpc/include/asm/ftrace.h
> b/arch/powerpc/include/asm/ftrace.h
> index debe8c4f7062..4db83cf4283f 100644
> --- a/arch/powerpc/include/asm/ftrace.h
> +++ b/arch/powerpc/include/asm/ftrace.h
> @@ -61,6 +61,10 @@ struct dyn_arch_ftrace {
>  };
>  #endif /* __ASSEMBLY__ */
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +int __init ftrace_dyn_arch_init(void);
> +#endif
> +
>

Sorry there is a mistake CONFIG, I will send a v2 patch later.


>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
>  #define ARCH_SUPPORTS_FTRACE_OPS 1
>  #endif
> diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
> index 7f1e5203de88..4716f4cdc038 100644
> --- a/arch/riscv/kernel/ftrace.c
> +++ b/arch/riscv/kernel/ftrace.c
> @@ -154,11 +154,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>
>         return ret;
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
>  #endif
>
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
> index 0a464d328467..3fd80397ff52 100644
> --- a/arch/s390/kernel/ftrace.c
> +++ b/arch/s390/kernel/ftrace.c
> @@ -262,11 +262,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>         return 0;
>  }
>
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
> -
>  void arch_ftrace_update_code(int command)
>  {
>         if (ftrace_shared_hotpatch_trampoline(NULL))
> diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
> index 295c43315bbe..930001bb8c6a 100644
> --- a/arch/sh/kernel/ftrace.c
> +++ b/arch/sh/kernel/ftrace.c
> @@ -252,11 +252,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned
> long addr)
>
>         return ftrace_modify_code(rec->ip, old, new);
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
> index 684b84ce397f..eaead3da8e03 100644
> --- a/arch/sparc/kernel/ftrace.c
> +++ b/arch/sparc/kernel/ftrace.c
> @@ -82,11 +82,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>         new = ftrace_call_replace(ip, (unsigned long)func);
>         return ftrace_modify_code(ip, old, new);
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
>  #endif
>
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 1b3ce3b4a2a2..23d221a9a3cd 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -252,11 +252,6 @@ void arch_ftrace_update_code(int command)
>         ftrace_modify_all_code(command);
>  }
>
> -int __init ftrace_dyn_arch_init(void)
> -{
> -       return 0;
> -}
> -
>  /* Currently only x86_64 supports dynamic trampolines */
>  #ifdef CONFIG_X86_64
>
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 832e65f06754..f1eca123d89d 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -573,7 +573,6 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char
> *buf, int enable);
>
>  /* defined in arch */
>  extern int ftrace_ip_converted(unsigned long ip);
> -extern int ftrace_dyn_arch_init(void);
>  extern void ftrace_replace_code(int enable);
>  extern int ftrace_update_ftrace_func(ftrace_func_t func);
>  extern void ftrace_caller(void);
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 7efbc8aaf7f6..4c090323198d 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -6846,6 +6846,11 @@ void __init ftrace_free_init_mem(void)
>         ftrace_free_mem(NULL, start, end);
>  }
>
> +int __init __weak ftrace_dyn_arch_init(void)
> +{
> +       return 0;
> +}
> +
>  void __init ftrace_init(void)
>  {
>         extern unsigned long __start_mcount_loc[];
> --
> 2.30.2
>
>
Thanks.

[-- Attachment #2: Type: text/html, Size: 10327 bytes --]

^ permalink raw reply

* Re: [PATCH -next] powerpc/mm: check base flags in ioremap_prot
From: Michael Ellerman @ 2021-09-04 11:20 UTC (permalink / raw)
  To: Nanyong Sun, Christophe Leroy, benh, paulus, akpm, npiggin,
	christophe.leroy
  Cc: wangkefeng 00584194, linuxppc-dev, linux-kernel
In-Reply-To: <e27f8786-7d43-4191-9b65-5a55a64cf158@huawei.com>

Nanyong Sun <sunnanyong@huawei.com> writes:
> On 2021/9/3 17:16, Christophe Leroy wrote:
>> Le 03/09/2021 à 11:03, Nanyong Sun a écrit :
>>> Some drivers who call ioremap_prot without setting base flags like
>>> ioremap_prot(addr, len, 0) may work well before
>>> commit 56f3c1413f5c ("powerpc/mm: properly set PAGE_KERNEL flags in
>>> ioremap()"), but now they will get a virtual address "successfully"
>>> from ioremap_prot and badly fault on memory access later because that
>>> patch also dropped the hack adding of base flags for ioremap_prot.
>>>
>>> So return NULL and throw a warning if the caller of ioremap_prot did
>>> not set base flags properly. Why not just hack adding PAGE_KERNEL flags
>>> in the ioremap_prot, because most scenarios can be covered by high level
>>> functions like ioremap(), ioremap_coherent(), ioremap_cache()...
>>> so it is better to keep max flexibility for this low level api.
>>
>> As far as I can see, there is no user of this fonction that sets flags 
>> to 0 in the kernel tree.
>>
>> Did you find any ? If you did, I think it is better to fix the caller.
>>
>> Christophe
>>
> I see some vendor's drivers which are not on upstream ...

Sorry, but we don't carry extraneous checks in upstream for the sake of
out-of-tree drivers.

cheers

^ permalink raw reply

* Re: [PATCH] ftrace: Cleanup ftrace_dyn_arch_init()
From: Weizhao Ouyang @ 2021-09-04 11:59 UTC (permalink / raw)
  To: rostedt, mingo
  Cc: dalias, linux-ia64, linux-sh, linux-mips, James.Bottomley, guoren,
	hpa, sparclinux, linux-riscv, deanbo422, will, linux-s390, ysato,
	deller, x86, linux, linux-csky, borntraeger, catalin.marinas, aou,
	gor, hca, bp, green.hu, paul.walmsley, tglx, linux-arm-kernel,
	monstr, tsbogend, linux-parisc, nickhu, linux-kernel, palmer,
	paulus, linuxppc-dev, davem
In-Reply-To: <20210903071817.1162938-1-o451686892@gmail.com>


On 2021/9/3 15:18, Weizhao Ouyang wrote:
> Most ARCHs use empty ftrace_dyn_arch_init(), introduce a weak common
> ftrace_dyn_arch_init() to cleanup them.
>
> Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
> ---
>  arch/arm/kernel/ftrace.c          | 5 -----
>  arch/arm64/kernel/ftrace.c        | 5 -----
>  arch/csky/kernel/ftrace.c         | 5 -----
>  arch/ia64/kernel/ftrace.c         | 6 ------
>  arch/microblaze/kernel/ftrace.c   | 5 -----
>  arch/mips/include/asm/ftrace.h    | 2 ++
>  arch/nds32/kernel/ftrace.c        | 5 -----
>  arch/parisc/kernel/ftrace.c       | 5 -----
>  arch/powerpc/include/asm/ftrace.h | 4 ++++
>  arch/riscv/kernel/ftrace.c        | 5 -----
>  arch/s390/kernel/ftrace.c         | 5 -----
>  arch/sh/kernel/ftrace.c           | 5 -----
>  arch/sparc/kernel/ftrace.c        | 5 -----
>  arch/x86/kernel/ftrace.c          | 5 -----
>  include/linux/ftrace.h            | 1 -
>  kernel/trace/ftrace.c             | 5 +++++
>  16 files changed, 11 insertions(+), 62 deletions(-)
>
> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
> index 3c83b5d29697..a006585e1c09 100644
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -193,11 +193,6 @@ int ftrace_make_nop(struct module *mod,
>  
>  	return ret;
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>  
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
> index 7f467bd9db7a..fc62dfe73f93 100644
> --- a/arch/arm64/kernel/ftrace.c
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -236,11 +236,6 @@ void arch_ftrace_update_code(int command)
>  	command |= FTRACE_MAY_SLEEP;
>  	ftrace_modify_all_code(command);
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>  
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c
> index b4a7ec1517ff..50bfcf129078 100644
> --- a/arch/csky/kernel/ftrace.c
> +++ b/arch/csky/kernel/ftrace.c
> @@ -133,11 +133,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>  				(unsigned long)func, true, true);
>  	return ret;
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>  
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
> index b2ab2d58fb30..d6360fd404ab 100644
> --- a/arch/ia64/kernel/ftrace.c
> +++ b/arch/ia64/kernel/ftrace.c
> @@ -194,9 +194,3 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>  	flush_icache_range(addr, addr + 16);
>  	return 0;
>  }
> -
> -/* run from kstop_machine */
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
> diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
> index 224eea40e1ee..188749d62709 100644
> --- a/arch/microblaze/kernel/ftrace.c
> +++ b/arch/microblaze/kernel/ftrace.c
> @@ -163,11 +163,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  	return ret;
>  }
>  
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
> -
>  int ftrace_update_ftrace_func(ftrace_func_t func)
>  {
>  	unsigned long ip = (unsigned long)(&ftrace_call);
> diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
> index b463f2aa5a61..ed013e767390 100644
> --- a/arch/mips/include/asm/ftrace.h
> +++ b/arch/mips/include/asm/ftrace.h
> @@ -76,6 +76,8 @@ do {						\
>  
>  
>  #ifdef CONFIG_DYNAMIC_FTRACE
> +int __init ftrace_dyn_arch_init(void);
> +
>  static inline unsigned long ftrace_call_adjust(unsigned long addr)
>  {
>  	return addr;
> diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
> index 0e23e3a8df6b..f0ef4842d191 100644
> --- a/arch/nds32/kernel/ftrace.c
> +++ b/arch/nds32/kernel/ftrace.c
> @@ -84,11 +84,6 @@ void _ftrace_caller(unsigned long parent_ip)
>  	/* restore all state needed by the compiler epilogue */
>  }
>  
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
> -
>  static unsigned long gen_sethi_insn(unsigned long addr)
>  {
>  	unsigned long opcode = 0x46000000;
> diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
> index 0a1e75af5382..01581f715737 100644
> --- a/arch/parisc/kernel/ftrace.c
> +++ b/arch/parisc/kernel/ftrace.c
> @@ -94,11 +94,6 @@ int ftrace_disable_ftrace_graph_caller(void)
>  #endif
>  
>  #ifdef CONFIG_DYNAMIC_FTRACE
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
>  int ftrace_update_ftrace_func(ftrace_func_t func)
>  {
>  	return 0;
> diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
> index debe8c4f7062..4db83cf4283f 100644
> --- a/arch/powerpc/include/asm/ftrace.h
> +++ b/arch/powerpc/include/asm/ftrace.h
> @@ -61,6 +61,10 @@ struct dyn_arch_ftrace {
>  };
>  #endif /* __ASSEMBLY__ */
>  
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +int __init ftrace_dyn_arch_init(void);
> +#endif
> +

Sorry there is a mistake CONFIG, I will send a v2 patch later.

>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
>  #define ARCH_SUPPORTS_FTRACE_OPS 1
>  #endif
> diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
> index 7f1e5203de88..4716f4cdc038 100644
> --- a/arch/riscv/kernel/ftrace.c
> +++ b/arch/riscv/kernel/ftrace.c
> @@ -154,11 +154,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>  
>  	return ret;
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
>  #endif
>  
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
> index 0a464d328467..3fd80397ff52 100644
> --- a/arch/s390/kernel/ftrace.c
> +++ b/arch/s390/kernel/ftrace.c
> @@ -262,11 +262,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>  	return 0;
>  }
>  
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
> -
>  void arch_ftrace_update_code(int command)
>  {
>  	if (ftrace_shared_hotpatch_trampoline(NULL))
> diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
> index 295c43315bbe..930001bb8c6a 100644
> --- a/arch/sh/kernel/ftrace.c
> +++ b/arch/sh/kernel/ftrace.c
> @@ -252,11 +252,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  
>  	return ftrace_modify_code(rec->ip, old, new);
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>  
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
> index 684b84ce397f..eaead3da8e03 100644
> --- a/arch/sparc/kernel/ftrace.c
> +++ b/arch/sparc/kernel/ftrace.c
> @@ -82,11 +82,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>  	new = ftrace_call_replace(ip, (unsigned long)func);
>  	return ftrace_modify_code(ip, old, new);
>  }
> -
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
>  #endif
>  
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 1b3ce3b4a2a2..23d221a9a3cd 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -252,11 +252,6 @@ void arch_ftrace_update_code(int command)
>  	ftrace_modify_all_code(command);
>  }
>  
> -int __init ftrace_dyn_arch_init(void)
> -{
> -	return 0;
> -}
> -
>  /* Currently only x86_64 supports dynamic trampolines */
>  #ifdef CONFIG_X86_64
>  
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 832e65f06754..f1eca123d89d 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -573,7 +573,6 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
>  
>  /* defined in arch */
>  extern int ftrace_ip_converted(unsigned long ip);
> -extern int ftrace_dyn_arch_init(void);
>  extern void ftrace_replace_code(int enable);
>  extern int ftrace_update_ftrace_func(ftrace_func_t func);
>  extern void ftrace_caller(void);
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 7efbc8aaf7f6..4c090323198d 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -6846,6 +6846,11 @@ void __init ftrace_free_init_mem(void)
>  	ftrace_free_mem(NULL, start, end);
>  }
>  
> +int __init __weak ftrace_dyn_arch_init(void)
> +{
> +	return 0;
> +}
> +
>  void __init ftrace_init(void)
>  {
>  	extern unsigned long __start_mcount_loc[];

Thanks.

^ permalink raw reply

* [PATCH kernel v2] KVM: PPC: Merge powerpc's debugfs entry content into generic entry
From: Alexey Kardashevskiy @ 2021-09-04 13:35 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: kvm, Fabiano Rosas, Alexey Kardashevskiy, kvm-ppc, Paolo Bonzini

At the moment the generic KVM code creates an "%pid-%fd" entry per a KVM
instance; and the PPC HV KVM creates its own at "vm%pid". The Book3E KVM
creates its own entry for timings.

The problems with the PPC entries are:
1. they do not allow multiple VMs in the same process (which is extremely
rare case mostly used by syzkaller fuzzer);
2. prone to race bugs like the generic KVM code had fixed in
commit 85cd39af14f4 ("KVM: Do not leak memory for duplicate debugfs
directories").

This defines kvm_arch_create_kvm_debugfs() similar to one for vcpus.

This defines 2 hooks in kvmppc_ops for allowing specific KVM
implementations to add necessary entries. This defines handlers
for HV KVM and defines the Book3E debugfs vcpu helper as a handler.

This makes use of already existing kvm_arch_create_vcpu_debugfs
on PPC.

This removes no more used debugfs_dir pointers from PPC kvm_arch structs.

Suggested-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v2:
* handled powerpc-booke
* s/kvm/vm/ in arch hooks
---
 arch/powerpc/include/asm/kvm_host.h    |  7 +++---
 arch/powerpc/include/asm/kvm_ppc.h     |  2 ++
 arch/powerpc/kvm/timing.h              |  7 +++---
 include/linux/kvm_host.h               |  3 +++
 arch/powerpc/kvm/book3s_64_mmu_hv.c    |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
 arch/powerpc/kvm/book3s_hv.c           | 30 +++++++++-----------------
 arch/powerpc/kvm/e500.c                |  1 +
 arch/powerpc/kvm/e500mc.c              |  1 +
 arch/powerpc/kvm/powerpc.c             | 15 ++++++++++---
 arch/powerpc/kvm/timing.c              | 20 ++++-------------
 virt/kvm/kvm_main.c                    |  3 +++
 12 files changed, 44 insertions(+), 49 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 2bcac6da0a4b..f29b66cc2163 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -296,7 +296,6 @@ struct kvm_arch {
 	bool dawr1_enabled;
 	pgd_t *pgtable;
 	u64 process_table;
-	struct dentry *debugfs_dir;
 	struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -672,7 +671,6 @@ struct kvm_vcpu_arch {
 	u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
 	u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
 	u64 timing_last_exit;
-	struct dentry *debugfs_exit_timing;
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S
@@ -828,8 +826,6 @@ struct kvm_vcpu_arch {
 	struct kvmhv_tb_accumulator rm_exit;	/* real-mode exit code */
 	struct kvmhv_tb_accumulator guest_time;	/* guest execution */
 	struct kvmhv_tb_accumulator cede_time;	/* time napping inside guest */
-
-	struct dentry *debugfs_dir;
 #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
 };
 
@@ -868,4 +864,7 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+#define __KVM_HAVE_ARCH_KVM_DEBUGFS
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 6355a6980ccf..fd841e844b90 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -316,6 +316,8 @@ struct kvmppc_ops {
 	int (*svm_off)(struct kvm *kvm);
 	int (*enable_dawr1)(struct kvm *kvm);
 	bool (*hash_v3_possible)(void);
+	void (*create_vm_debugfs)(struct kvm *kvm);
+	void (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index feef7885ba82..36f7c201c6f1 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -14,8 +14,8 @@
 #ifdef CONFIG_KVM_EXIT_TIMING
 void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
 void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+				struct dentry *debugfs_dentry);
 
 static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
 {
@@ -27,8 +27,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
 static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
 static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
 static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
-						unsigned int id) {}
-static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+					      struct dentry *debugfs_dentry) {}
 static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
 #endif /* CONFIG_KVM_EXIT_TIMING */
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ae7735b490b4..4f22b1201a0d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1021,6 +1021,9 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state);
 #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
 void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
 #endif
+#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
+void kvm_arch_create_vm_debugfs(struct kvm *kvm);
+#endif
 
 int kvm_arch_hardware_enable(void);
 void kvm_arch_hardware_disable(void);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c63e263312a4..33dae253a0ac 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -2112,7 +2112,7 @@ static const struct file_operations debugfs_htab_fops = {
 
 void kvmppc_mmu_debugfs_init(struct kvm *kvm)
 {
-	debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm,
+	debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
 			    &debugfs_htab_fops);
 }
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index c5508744e14c..f4e083c20872 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1452,7 +1452,7 @@ static const struct file_operations debugfs_radix_fops = {
 
 void kvmhv_radix_debugfs_init(struct kvm *kvm)
 {
-	debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm,
+	debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
 			    &debugfs_radix_fops);
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c8f12b056968..046df9e0d462 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2771,19 +2771,14 @@ static const struct file_operations debugfs_timings_ops = {
 };
 
 /* Create a debugfs directory for the vcpu */
-static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
 {
-	char buf[16];
-	struct kvm *kvm = vcpu->kvm;
-
-	snprintf(buf, sizeof(buf), "vcpu%u", id);
-	vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
-	debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
+	debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
 			    &debugfs_timings_ops);
 }
 
 #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
-static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
 {
 }
 #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
@@ -2907,8 +2902,6 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 	vcpu->arch.cpu_type = KVM_CPU_3S_64;
 	kvmppc_sanity_check(vcpu);
 
-	debugfs_vcpu_init(vcpu, id);
-
 	return 0;
 }
 
@@ -5186,7 +5179,6 @@ void kvmppc_free_host_rm_ops(void)
 static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 {
 	unsigned long lpcr, lpid;
-	char buf[32];
 	int ret;
 
 	mutex_init(&kvm->arch.uvmem_lock);
@@ -5319,16 +5311,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 		kvm->arch.smt_mode = 1;
 	kvm->arch.emul_smt_mode = 1;
 
-	/*
-	 * Create a debugfs directory for the VM
-	 */
-	snprintf(buf, sizeof(buf), "vm%d", current->pid);
-	kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
+	return 0;
+}
+
+static void kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm)
+{
 	kvmppc_mmu_debugfs_init(kvm);
 	if (radix_enabled())
 		kvmhv_radix_debugfs_init(kvm);
-
-	return 0;
 }
 
 static void kvmppc_free_vcores(struct kvm *kvm)
@@ -5342,8 +5332,6 @@ static void kvmppc_free_vcores(struct kvm *kvm)
 
 static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 {
-	debugfs_remove_recursive(kvm->arch.debugfs_dir);
-
 	if (!cpu_has_feature(CPU_FTR_ARCH_300))
 		kvm_hv_vm_deactivated();
 
@@ -5996,6 +5984,8 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.svm_off = kvmhv_svm_off,
 	.enable_dawr1 = kvmhv_enable_dawr1,
 	.hash_v3_possible = kvmppc_hash_v3_possible,
+	.create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
+	.create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
 };
 
 static int kvm_init_subcore_bitmap(void)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 7e8b69015d20..d82e70c3e0a9 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = {
 	.emulate_op = kvmppc_core_emulate_op_e500,
 	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
 	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+	.create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
 };
 
 static int __init kvmppc_e500_init(void)
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 1c189b5aadcc..45eacd949f4b 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = {
 	.emulate_op = kvmppc_core_emulate_op_e500,
 	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
 	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+	.create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
 };
 
 static int __init kvmppc_e500mc_init(void)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c248d6d8b9e3..c895521ac6e9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -763,7 +763,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 		goto out_vcpu_uninit;
 
 	vcpu->arch.waitp = &vcpu->wait;
-	kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
 	return 0;
 
 out_vcpu_uninit:
@@ -780,8 +779,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	/* Make sure we're not using the vcpu anymore */
 	hrtimer_cancel(&vcpu->arch.dec_timer);
 
-	kvmppc_remove_vcpu_debugfs(vcpu);
-
 	switch (vcpu->arch.irq_type) {
 	case KVMPPC_IRQ_MPIC:
 		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
@@ -2505,3 +2502,15 @@ int kvm_arch_init(void *opaque)
 }
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
+
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
+{
+	if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs)
+		vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry);
+}
+
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+	if (kvm->arch.kvm_ops->create_vm_debugfs)
+		kvm->arch.kvm_ops->create_vm_debugfs(kvm);
+}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index ba56a5cbba97..e1c17afc714d 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -204,21 +204,9 @@ static const struct file_operations kvmppc_exit_timing_fops = {
 	.release = single_release,
 };
 
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+				struct dentry *debugfs_dentry)
 {
-	static char dbg_fname[50];
-	struct dentry *debugfs_file;
-
-	snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
-		 current->pid, id);
-	debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir,
-						vcpu, &kvmppc_exit_timing_fops);
-
-	vcpu->arch.debugfs_exit_timing = debugfs_file;
-}
-
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
-	debugfs_remove(vcpu->arch.debugfs_exit_timing);
-	vcpu->arch.debugfs_exit_timing = NULL;
+	debugfs_create_file("timing", 0666, debugfs_dentry,
+			    vcpu, &kvmppc_exit_timing_fops);
 }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b50dbe269f4b..85b2550e18e7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -954,6 +954,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 				    kvm->debugfs_dentry, stat_data,
 				    &stat_fops_per_vm);
 	}
+#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
+	kvm_arch_create_vm_debugfs(kvm);
+#endif
 	return 0;
 }
 
-- 
2.30.2


^ permalink raw reply related

* Re: [PATCH kernel v2] KVM: PPC: Merge powerpc's debugfs entry content into generic entry
From: Alexey Kardashevskiy @ 2021-09-05  2:27 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paolo Bonzini, Fabiano Rosas, kvm-ppc, kvm
In-Reply-To: <20210904133532.2871562-1-aik@ozlabs.ru>

Please ignore this one, v3 is coming.

After I posted this, I suddenly realized that the vcpu debugfs entry 
remain until the VM exists and this does not handle vcpu 
hotunplug+hotplug (the ppc book3e did handle this). Thanks,


On 04/09/2021 23:35, Alexey Kardashevskiy wrote:
> At the moment the generic KVM code creates an "%pid-%fd" entry per a KVM
> instance; and the PPC HV KVM creates its own at "vm%pid". The Book3E KVM
> creates its own entry for timings.
> 
> The problems with the PPC entries are:
> 1. they do not allow multiple VMs in the same process (which is extremely
> rare case mostly used by syzkaller fuzzer);
> 2. prone to race bugs like the generic KVM code had fixed in
> commit 85cd39af14f4 ("KVM: Do not leak memory for duplicate debugfs
> directories").
> 
> This defines kvm_arch_create_kvm_debugfs() similar to one for vcpus.
> 
> This defines 2 hooks in kvmppc_ops for allowing specific KVM
> implementations to add necessary entries. This defines handlers
> for HV KVM and defines the Book3E debugfs vcpu helper as a handler.
> 
> This makes use of already existing kvm_arch_create_vcpu_debugfs
> on PPC.
> 
> This removes no more used debugfs_dir pointers from PPC kvm_arch structs.
> 
> Suggested-by: Fabiano Rosas <farosas@linux.ibm.com>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
> Changes:
> v2:
> * handled powerpc-booke
> * s/kvm/vm/ in arch hooks
> ---
>   arch/powerpc/include/asm/kvm_host.h    |  7 +++---
>   arch/powerpc/include/asm/kvm_ppc.h     |  2 ++
>   arch/powerpc/kvm/timing.h              |  7 +++---
>   include/linux/kvm_host.h               |  3 +++
>   arch/powerpc/kvm/book3s_64_mmu_hv.c    |  2 +-
>   arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
>   arch/powerpc/kvm/book3s_hv.c           | 30 +++++++++-----------------
>   arch/powerpc/kvm/e500.c                |  1 +
>   arch/powerpc/kvm/e500mc.c              |  1 +
>   arch/powerpc/kvm/powerpc.c             | 15 ++++++++++---
>   arch/powerpc/kvm/timing.c              | 20 ++++-------------
>   virt/kvm/kvm_main.c                    |  3 +++
>   12 files changed, 44 insertions(+), 49 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 2bcac6da0a4b..f29b66cc2163 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -296,7 +296,6 @@ struct kvm_arch {
>   	bool dawr1_enabled;
>   	pgd_t *pgtable;
>   	u64 process_table;
> -	struct dentry *debugfs_dir;
>   	struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
>   #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
>   #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
> @@ -672,7 +671,6 @@ struct kvm_vcpu_arch {
>   	u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
>   	u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
>   	u64 timing_last_exit;
> -	struct dentry *debugfs_exit_timing;
>   #endif
>   
>   #ifdef CONFIG_PPC_BOOK3S
> @@ -828,8 +826,6 @@ struct kvm_vcpu_arch {
>   	struct kvmhv_tb_accumulator rm_exit;	/* real-mode exit code */
>   	struct kvmhv_tb_accumulator guest_time;	/* guest execution */
>   	struct kvmhv_tb_accumulator cede_time;	/* time napping inside guest */
> -
> -	struct dentry *debugfs_dir;
>   #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
>   };
>   
> @@ -868,4 +864,7 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
>   static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>   static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>   
> +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> +#define __KVM_HAVE_ARCH_KVM_DEBUGFS
> +
>   #endif /* __POWERPC_KVM_HOST_H__ */
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 6355a6980ccf..fd841e844b90 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -316,6 +316,8 @@ struct kvmppc_ops {
>   	int (*svm_off)(struct kvm *kvm);
>   	int (*enable_dawr1)(struct kvm *kvm);
>   	bool (*hash_v3_possible)(void);
> +	void (*create_vm_debugfs)(struct kvm *kvm);
> +	void (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
>   };
>   
>   extern struct kvmppc_ops *kvmppc_hv_ops;
> diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
> index feef7885ba82..36f7c201c6f1 100644
> --- a/arch/powerpc/kvm/timing.h
> +++ b/arch/powerpc/kvm/timing.h
> @@ -14,8 +14,8 @@
>   #ifdef CONFIG_KVM_EXIT_TIMING
>   void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
>   void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
> -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
> -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
> +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
> +				struct dentry *debugfs_dentry);
>   
>   static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
>   {
> @@ -27,8 +27,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
>   static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
>   static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
>   static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
> -						unsigned int id) {}
> -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
> +					      struct dentry *debugfs_dentry) {}
>   static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
>   #endif /* CONFIG_KVM_EXIT_TIMING */
>   
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ae7735b490b4..4f22b1201a0d 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1021,6 +1021,9 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state);
>   #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
>   void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
>   #endif
> +#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
> +void kvm_arch_create_vm_debugfs(struct kvm *kvm);
> +#endif
>   
>   int kvm_arch_hardware_enable(void);
>   void kvm_arch_hardware_disable(void);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index c63e263312a4..33dae253a0ac 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -2112,7 +2112,7 @@ static const struct file_operations debugfs_htab_fops = {
>   
>   void kvmppc_mmu_debugfs_init(struct kvm *kvm)
>   {
> -	debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm,
> +	debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
>   			    &debugfs_htab_fops);
>   }
>   
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index c5508744e14c..f4e083c20872 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -1452,7 +1452,7 @@ static const struct file_operations debugfs_radix_fops = {
>   
>   void kvmhv_radix_debugfs_init(struct kvm *kvm)
>   {
> -	debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm,
> +	debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
>   			    &debugfs_radix_fops);
>   }
>   
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index c8f12b056968..046df9e0d462 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2771,19 +2771,14 @@ static const struct file_operations debugfs_timings_ops = {
>   };
>   
>   /* Create a debugfs directory for the vcpu */
> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
>   {
> -	char buf[16];
> -	struct kvm *kvm = vcpu->kvm;
> -
> -	snprintf(buf, sizeof(buf), "vcpu%u", id);
> -	vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
> -	debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
> +	debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
>   			    &debugfs_timings_ops);
>   }
>   
>   #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
>   {
>   }
>   #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
> @@ -2907,8 +2902,6 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
>   	vcpu->arch.cpu_type = KVM_CPU_3S_64;
>   	kvmppc_sanity_check(vcpu);
>   
> -	debugfs_vcpu_init(vcpu, id);
> -
>   	return 0;
>   }
>   
> @@ -5186,7 +5179,6 @@ void kvmppc_free_host_rm_ops(void)
>   static int kvmppc_core_init_vm_hv(struct kvm *kvm)
>   {
>   	unsigned long lpcr, lpid;
> -	char buf[32];
>   	int ret;
>   
>   	mutex_init(&kvm->arch.uvmem_lock);
> @@ -5319,16 +5311,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
>   		kvm->arch.smt_mode = 1;
>   	kvm->arch.emul_smt_mode = 1;
>   
> -	/*
> -	 * Create a debugfs directory for the VM
> -	 */
> -	snprintf(buf, sizeof(buf), "vm%d", current->pid);
> -	kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
> +	return 0;
> +}
> +
> +static void kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm)
> +{
>   	kvmppc_mmu_debugfs_init(kvm);
>   	if (radix_enabled())
>   		kvmhv_radix_debugfs_init(kvm);
> -
> -	return 0;
>   }
>   
>   static void kvmppc_free_vcores(struct kvm *kvm)
> @@ -5342,8 +5332,6 @@ static void kvmppc_free_vcores(struct kvm *kvm)
>   
>   static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
>   {
> -	debugfs_remove_recursive(kvm->arch.debugfs_dir);
> -
>   	if (!cpu_has_feature(CPU_FTR_ARCH_300))
>   		kvm_hv_vm_deactivated();
>   
> @@ -5996,6 +5984,8 @@ static struct kvmppc_ops kvm_ops_hv = {
>   	.svm_off = kvmhv_svm_off,
>   	.enable_dawr1 = kvmhv_enable_dawr1,
>   	.hash_v3_possible = kvmppc_hash_v3_possible,
> +	.create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
> +	.create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
>   };
>   
>   static int kvm_init_subcore_bitmap(void)
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index 7e8b69015d20..d82e70c3e0a9 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = {
>   	.emulate_op = kvmppc_core_emulate_op_e500,
>   	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
>   	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
> +	.create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
>   };
>   
>   static int __init kvmppc_e500_init(void)
> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
> index 1c189b5aadcc..45eacd949f4b 100644
> --- a/arch/powerpc/kvm/e500mc.c
> +++ b/arch/powerpc/kvm/e500mc.c
> @@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = {
>   	.emulate_op = kvmppc_core_emulate_op_e500,
>   	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
>   	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
> +	.create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
>   };
>   
>   static int __init kvmppc_e500mc_init(void)
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index c248d6d8b9e3..c895521ac6e9 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -763,7 +763,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>   		goto out_vcpu_uninit;
>   
>   	vcpu->arch.waitp = &vcpu->wait;
> -	kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
>   	return 0;
>   
>   out_vcpu_uninit:
> @@ -780,8 +779,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>   	/* Make sure we're not using the vcpu anymore */
>   	hrtimer_cancel(&vcpu->arch.dec_timer);
>   
> -	kvmppc_remove_vcpu_debugfs(vcpu);
> -
>   	switch (vcpu->arch.irq_type) {
>   	case KVMPPC_IRQ_MPIC:
>   		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
> @@ -2505,3 +2502,15 @@ int kvm_arch_init(void *opaque)
>   }
>   
>   EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
> +
> +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
> +{
> +	if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs)
> +		vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry);
> +}
> +
> +void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> +{
> +	if (kvm->arch.kvm_ops->create_vm_debugfs)
> +		kvm->arch.kvm_ops->create_vm_debugfs(kvm);
> +}
> diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
> index ba56a5cbba97..e1c17afc714d 100644
> --- a/arch/powerpc/kvm/timing.c
> +++ b/arch/powerpc/kvm/timing.c
> @@ -204,21 +204,9 @@ static const struct file_operations kvmppc_exit_timing_fops = {
>   	.release = single_release,
>   };
>   
> -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
> +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
> +				struct dentry *debugfs_dentry)
>   {
> -	static char dbg_fname[50];
> -	struct dentry *debugfs_file;
> -
> -	snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
> -		 current->pid, id);
> -	debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir,
> -						vcpu, &kvmppc_exit_timing_fops);
> -
> -	vcpu->arch.debugfs_exit_timing = debugfs_file;
> -}
> -
> -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
> -{
> -	debugfs_remove(vcpu->arch.debugfs_exit_timing);
> -	vcpu->arch.debugfs_exit_timing = NULL;
> +	debugfs_create_file("timing", 0666, debugfs_dentry,
> +			    vcpu, &kvmppc_exit_timing_fops);
>   }
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index b50dbe269f4b..85b2550e18e7 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -954,6 +954,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
>   				    kvm->debugfs_dentry, stat_data,
>   				    &stat_fops_per_vm);
>   	}
> +#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
> +	kvm_arch_create_vm_debugfs(kvm);
> +#endif
>   	return 0;
>   }
>   
> 

-- 
Alexey

^ permalink raw reply

* Re: [PATCH kernel v2] KVM: PPC: Merge powerpc's debugfs entry content into generic entry
From: Alexey Kardashevskiy @ 2021-09-05  4:30 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paolo Bonzini, Fabiano Rosas, kvm-ppc, kvm
In-Reply-To: <872d75a4-08e2-f597-0bee-6be9fdce0ac1@ozlabs.ru>

Huh, not sure anymore after reading d56f5136b0102 "KVM: let 
kvm_destroy_vm_debugfs clean up vCPU debugfs directories" which remove 
debugfs_dentry from vcpu. Paolo?



On 05/09/2021 12:27, Alexey Kardashevskiy wrote:
> Please ignore this one, v3 is coming.
> 
> After I posted this, I suddenly realized that the vcpu debugfs entry 
> remain until the VM exists and this does not handle vcpu 
> hotunplug+hotplug (the ppc book3e did handle this). Thanks,
> 
> 
> On 04/09/2021 23:35, Alexey Kardashevskiy wrote:
>> At the moment the generic KVM code creates an "%pid-%fd" entry per a KVM
>> instance; and the PPC HV KVM creates its own at "vm%pid". The Book3E KVM
>> creates its own entry for timings.
>>
>> The problems with the PPC entries are:
>> 1. they do not allow multiple VMs in the same process (which is extremely
>> rare case mostly used by syzkaller fuzzer);
>> 2. prone to race bugs like the generic KVM code had fixed in
>> commit 85cd39af14f4 ("KVM: Do not leak memory for duplicate debugfs
>> directories").
>>
>> This defines kvm_arch_create_kvm_debugfs() similar to one for vcpus.
>>
>> This defines 2 hooks in kvmppc_ops for allowing specific KVM
>> implementations to add necessary entries. This defines handlers
>> for HV KVM and defines the Book3E debugfs vcpu helper as a handler.
>>
>> This makes use of already existing kvm_arch_create_vcpu_debugfs
>> on PPC.
>>
>> This removes no more used debugfs_dir pointers from PPC kvm_arch structs.
>>
>> Suggested-by: Fabiano Rosas <farosas@linux.ibm.com>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>> Changes:
>> v2:
>> * handled powerpc-booke
>> * s/kvm/vm/ in arch hooks
>> ---
>>   arch/powerpc/include/asm/kvm_host.h    |  7 +++---
>>   arch/powerpc/include/asm/kvm_ppc.h     |  2 ++
>>   arch/powerpc/kvm/timing.h              |  7 +++---
>>   include/linux/kvm_host.h               |  3 +++
>>   arch/powerpc/kvm/book3s_64_mmu_hv.c    |  2 +-
>>   arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
>>   arch/powerpc/kvm/book3s_hv.c           | 30 +++++++++-----------------
>>   arch/powerpc/kvm/e500.c                |  1 +
>>   arch/powerpc/kvm/e500mc.c              |  1 +
>>   arch/powerpc/kvm/powerpc.c             | 15 ++++++++++---
>>   arch/powerpc/kvm/timing.c              | 20 ++++-------------
>>   virt/kvm/kvm_main.c                    |  3 +++
>>   12 files changed, 44 insertions(+), 49 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/kvm_host.h 
>> b/arch/powerpc/include/asm/kvm_host.h
>> index 2bcac6da0a4b..f29b66cc2163 100644
>> --- a/arch/powerpc/include/asm/kvm_host.h
>> +++ b/arch/powerpc/include/asm/kvm_host.h
>> @@ -296,7 +296,6 @@ struct kvm_arch {
>>       bool dawr1_enabled;
>>       pgd_t *pgtable;
>>       u64 process_table;
>> -    struct dentry *debugfs_dir;
>>       struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
>>   #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
>>   #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
>> @@ -672,7 +671,6 @@ struct kvm_vcpu_arch {
>>       u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
>>       u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
>>       u64 timing_last_exit;
>> -    struct dentry *debugfs_exit_timing;
>>   #endif
>>   #ifdef CONFIG_PPC_BOOK3S
>> @@ -828,8 +826,6 @@ struct kvm_vcpu_arch {
>>       struct kvmhv_tb_accumulator rm_exit;    /* real-mode exit code */
>>       struct kvmhv_tb_accumulator guest_time;    /* guest execution */
>>       struct kvmhv_tb_accumulator cede_time;    /* time napping inside 
>> guest */
>> -
>> -    struct dentry *debugfs_dir;
>>   #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
>>   };
>> @@ -868,4 +864,7 @@ static inline void kvm_arch_vcpu_blocking(struct 
>> kvm_vcpu *vcpu) {}
>>   static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
>>   static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>> +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
>> +#define __KVM_HAVE_ARCH_KVM_DEBUGFS
>> +
>>   #endif /* __POWERPC_KVM_HOST_H__ */
>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
>> b/arch/powerpc/include/asm/kvm_ppc.h
>> index 6355a6980ccf..fd841e844b90 100644
>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>> @@ -316,6 +316,8 @@ struct kvmppc_ops {
>>       int (*svm_off)(struct kvm *kvm);
>>       int (*enable_dawr1)(struct kvm *kvm);
>>       bool (*hash_v3_possible)(void);
>> +    void (*create_vm_debugfs)(struct kvm *kvm);
>> +    void (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry 
>> *debugfs_dentry);
>>   };
>>   extern struct kvmppc_ops *kvmppc_hv_ops;
>> diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
>> index feef7885ba82..36f7c201c6f1 100644
>> --- a/arch/powerpc/kvm/timing.h
>> +++ b/arch/powerpc/kvm/timing.h
>> @@ -14,8 +14,8 @@
>>   #ifdef CONFIG_KVM_EXIT_TIMING
>>   void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
>>   void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
>> -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
>> -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
>> +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
>> +                struct dentry *debugfs_dentry);
>>   static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int 
>> type)
>>   {
>> @@ -27,8 +27,7 @@ static inline void kvmppc_set_exit_type(struct 
>> kvm_vcpu *vcpu, int type)
>>   static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
>>   static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
>>   static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
>> -                        unsigned int id) {}
>> -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
>> +                          struct dentry *debugfs_dentry) {}
>>   static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int 
>> type) {}
>>   #endif /* CONFIG_KVM_EXIT_TIMING */
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index ae7735b490b4..4f22b1201a0d 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -1021,6 +1021,9 @@ int kvm_arch_pm_notifier(struct kvm *kvm, 
>> unsigned long state);
>>   #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
>>   void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct 
>> dentry *debugfs_dentry);
>>   #endif
>> +#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
>> +void kvm_arch_create_vm_debugfs(struct kvm *kvm);
>> +#endif
>>   int kvm_arch_hardware_enable(void);
>>   void kvm_arch_hardware_disable(void);
>> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
>> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> index c63e263312a4..33dae253a0ac 100644
>> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> @@ -2112,7 +2112,7 @@ static const struct file_operations 
>> debugfs_htab_fops = {
>>   void kvmppc_mmu_debugfs_init(struct kvm *kvm)
>>   {
>> -    debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm,
>> +    debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
>>                   &debugfs_htab_fops);
>>   }
>> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
>> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
>> index c5508744e14c..f4e083c20872 100644
>> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
>> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
>> @@ -1452,7 +1452,7 @@ static const struct file_operations 
>> debugfs_radix_fops = {
>>   void kvmhv_radix_debugfs_init(struct kvm *kvm)
>>   {
>> -    debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm,
>> +    debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
>>                   &debugfs_radix_fops);
>>   }
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index c8f12b056968..046df9e0d462 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -2771,19 +2771,14 @@ static const struct file_operations 
>> debugfs_timings_ops = {
>>   };
>>   /* Create a debugfs directory for the vcpu */
>> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
>> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, 
>> struct dentry *debugfs_dentry)
>>   {
>> -    char buf[16];
>> -    struct kvm *kvm = vcpu->kvm;
>> -
>> -    snprintf(buf, sizeof(buf), "vcpu%u", id);
>> -    vcpu->arch.debugfs_dir = debugfs_create_dir(buf, 
>> kvm->arch.debugfs_dir);
>> -    debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
>> +    debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
>>                   &debugfs_timings_ops);
>>   }
>>   #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
>> -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
>> +static void kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, 
>> struct dentry *debugfs_dentry)
>>   {
>>   }
>>   #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
>> @@ -2907,8 +2902,6 @@ static int kvmppc_core_vcpu_create_hv(struct 
>> kvm_vcpu *vcpu)
>>       vcpu->arch.cpu_type = KVM_CPU_3S_64;
>>       kvmppc_sanity_check(vcpu);
>> -    debugfs_vcpu_init(vcpu, id);
>> -
>>       return 0;
>>   }
>> @@ -5186,7 +5179,6 @@ void kvmppc_free_host_rm_ops(void)
>>   static int kvmppc_core_init_vm_hv(struct kvm *kvm)
>>   {
>>       unsigned long lpcr, lpid;
>> -    char buf[32];
>>       int ret;
>>       mutex_init(&kvm->arch.uvmem_lock);
>> @@ -5319,16 +5311,14 @@ static int kvmppc_core_init_vm_hv(struct kvm 
>> *kvm)
>>           kvm->arch.smt_mode = 1;
>>       kvm->arch.emul_smt_mode = 1;
>> -    /*
>> -     * Create a debugfs directory for the VM
>> -     */
>> -    snprintf(buf, sizeof(buf), "vm%d", current->pid);
>> -    kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
>> +    return 0;
>> +}
>> +
>> +static void kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm)
>> +{
>>       kvmppc_mmu_debugfs_init(kvm);
>>       if (radix_enabled())
>>           kvmhv_radix_debugfs_init(kvm);
>> -
>> -    return 0;
>>   }
>>   static void kvmppc_free_vcores(struct kvm *kvm)
>> @@ -5342,8 +5332,6 @@ static void kvmppc_free_vcores(struct kvm *kvm)
>>   static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
>>   {
>> -    debugfs_remove_recursive(kvm->arch.debugfs_dir);
>> -
>>       if (!cpu_has_feature(CPU_FTR_ARCH_300))
>>           kvm_hv_vm_deactivated();
>> @@ -5996,6 +5984,8 @@ static struct kvmppc_ops kvm_ops_hv = {
>>       .svm_off = kvmhv_svm_off,
>>       .enable_dawr1 = kvmhv_enable_dawr1,
>>       .hash_v3_possible = kvmppc_hash_v3_possible,
>> +    .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
>> +    .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
>>   };
>>   static int kvm_init_subcore_bitmap(void)
>> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
>> index 7e8b69015d20..d82e70c3e0a9 100644
>> --- a/arch/powerpc/kvm/e500.c
>> +++ b/arch/powerpc/kvm/e500.c
>> @@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = {
>>       .emulate_op = kvmppc_core_emulate_op_e500,
>>       .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
>>       .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
>> +    .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
>>   };
>>   static int __init kvmppc_e500_init(void)
>> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
>> index 1c189b5aadcc..45eacd949f4b 100644
>> --- a/arch/powerpc/kvm/e500mc.c
>> +++ b/arch/powerpc/kvm/e500mc.c
>> @@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = {
>>       .emulate_op = kvmppc_core_emulate_op_e500,
>>       .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
>>       .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
>> +    .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs,
>>   };
>>   static int __init kvmppc_e500mc_init(void)
>> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
>> index c248d6d8b9e3..c895521ac6e9 100644
>> --- a/arch/powerpc/kvm/powerpc.c
>> +++ b/arch/powerpc/kvm/powerpc.c
>> @@ -763,7 +763,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>>           goto out_vcpu_uninit;
>>       vcpu->arch.waitp = &vcpu->wait;
>> -    kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
>>       return 0;
>>   out_vcpu_uninit:
>> @@ -780,8 +779,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>>       /* Make sure we're not using the vcpu anymore */
>>       hrtimer_cancel(&vcpu->arch.dec_timer);
>> -    kvmppc_remove_vcpu_debugfs(vcpu);
>> -
>>       switch (vcpu->arch.irq_type) {
>>       case KVMPPC_IRQ_MPIC:
>>           kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
>> @@ -2505,3 +2502,15 @@ int kvm_arch_init(void *opaque)
>>   }
>>   EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
>> +
>> +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct 
>> dentry *debugfs_dentry)
>> +{
>> +    if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs)
>> +        vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, 
>> debugfs_dentry);
>> +}
>> +
>> +void kvm_arch_create_vm_debugfs(struct kvm *kvm)
>> +{
>> +    if (kvm->arch.kvm_ops->create_vm_debugfs)
>> +        kvm->arch.kvm_ops->create_vm_debugfs(kvm);
>> +}
>> diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
>> index ba56a5cbba97..e1c17afc714d 100644
>> --- a/arch/powerpc/kvm/timing.c
>> +++ b/arch/powerpc/kvm/timing.c
>> @@ -204,21 +204,9 @@ static const struct file_operations 
>> kvmppc_exit_timing_fops = {
>>       .release = single_release,
>>   };
>> -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
>> +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
>> +                struct dentry *debugfs_dentry)
>>   {
>> -    static char dbg_fname[50];
>> -    struct dentry *debugfs_file;
>> -
>> -    snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
>> -         current->pid, id);
>> -    debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir,
>> -                        vcpu, &kvmppc_exit_timing_fops);
>> -
>> -    vcpu->arch.debugfs_exit_timing = debugfs_file;
>> -}
>> -
>> -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
>> -{
>> -    debugfs_remove(vcpu->arch.debugfs_exit_timing);
>> -    vcpu->arch.debugfs_exit_timing = NULL;
>> +    debugfs_create_file("timing", 0666, debugfs_dentry,
>> +                vcpu, &kvmppc_exit_timing_fops);
>>   }
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index b50dbe269f4b..85b2550e18e7 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -954,6 +954,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, 
>> int fd)
>>                       kvm->debugfs_dentry, stat_data,
>>                       &stat_fops_per_vm);
>>       }
>> +#ifdef __KVM_HAVE_ARCH_KVM_DEBUGFS
>> +    kvm_arch_create_vm_debugfs(kvm);
>> +#endif
>>       return 0;
>>   }
>>
> 

-- 
Alexey

^ permalink raw reply

* [Bug 213837] "Kernel panic - not syncing: corrupted stack end detected inside scheduler" at building via distcc on a G5
From: bugzilla-daemon @ 2021-09-05 14:11 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213837-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213837

Erhard F. (erhard_f@mailbox.org) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           See Also|https://bugzilla.kernel.org |
                   |/show_bug.cgi?id=213079     |

--- Comment #4 from Erhard F. (erhard_f@mailbox.org) ---
Checked out whether this has really something to do with bug #213079 or not by
copying this root partition to a regular HDD and use that one instead. As the
issue still happens it seems these are two seperate bugs.

[...]
Kernel panic - not syncing: corrupted stack end detected inside scheduler
CPU: 1 PID: 1509 Comm: powerpc64-unkno Tainted: G        W        
5.14.1-PowerMacG5+ #2
Call Trace:
[c0000000386434c0] [c00000000054cd64] .dump_stack_lvl+0x98/0xe0 (unreliable)
[c000000038643550] [c000000000068ab8] .panic+0x160/0x40c
[c000000038643600] [c00000000081202c] .__schedule+0x7c/0x840
[c0000000386436d0] [c00000000081293c] .preempt_schedule_common+0x28/0x48
[c000000038643750] [c00000000081298c] .__cond_resched+0x30/0x4c
[c0000000386437d0] [c0000000004edf18] .copy_page_to_iter+0xbc/0x32c
[c0000000386438a0] [c0000000001c99d8] .filemap_read+0x574/0x618
[c000000038643a60] [c00000000033182c] .ext4_file_read_iter+0xb8/0x11c
[c000000038643b00] [c000000000272f1c] .new_sync_read+0x94/0xe0
[c000000038643c00] [c0000000002746c0] .vfs_read+0x128/0x12c
[c000000038643ca0] [c000000000274a58] .ksys_read+0x78/0xc4
[c000000038643d60] [c000000000022808] .system_call_exception+0x1a4/0x1dc
[c000000038643e10] [c00000000000b4cc] system_call_common+0xec/0x250
--- interrupt: c00 at 0x3fffbc477cd0
NIP:  00003fffbc477cd0 LR: 000000011c413660 CTR: 0000000000000000
REGS: c000000038643e80 TRAP: 0c00   Tainted: G        W         
(5.14.1-PowerMacG5+)
MSR:  900000000200f032 <SF,HV,VEC,EE,PR,FP,ME,IR,DR,RI>  CR: 24000422  XER:
00000000
IRQMASK: 0 
GPR00: 0000000000000003 00003fffd3c43d70 00003fffbc4a4f00 0000000000000004 
GPR04: 00003fffbbfac010 00000000001e7697 00003fffbc458320 0000000000000000 
GPR08: 00003fffbc4582e0 0000000000000000 0000000000000000 0000000000000000 
GPR12: 0000000000000000 00003fffbc54ec20 00000001470b79c0 0000000157c21760 
GPR16: 000000011c41ec38 00003fffd3c44258 000000011c41eb28 00003fffd3c440a8 
GPR20: 00003fffd3c44460 ffffffffffffffff 00000001470b6dd0 0000000000000000 
GPR24: 00000001470b77f0 00000001470b7d30 0000000000000005 00003fffd3c43fc0 
GPR28: 000000011c4668e8 0000000000000004 00003fffbbfac010 00000000001e7697 
NIP [00003fffbc477cd0] 0x3fffbc477cd0
LR [000000011c413660] 0x11c413660
--- interrupt: c00
Rebooting in 40 seconds..

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching someone on the CC list of the bug.

^ permalink raw reply

* [Bug 213837] "Kernel panic - not syncing: corrupted stack end detected inside scheduler" at building via distcc on a G5
From: bugzilla-daemon @ 2021-09-05 14:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213837-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213837

Erhard F. (erhard_f@mailbox.org) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
 Attachment #298395|0                           |1
        is obsolete|                            |

--- Comment #5 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 298671
  --> https://bugzilla.kernel.org/attachment.cgi?id=298671&action=edit
kernel .config (5.14.1, PowerMac G5 11,2)

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching someone on the CC list of the bug.

^ permalink raw reply

* Re: [PATCH 0/2] powerpc/perf: Add instruction and data address registers to extended regs
From: Athira Rajeev @ 2021-09-06  2:43 UTC (permalink / raw)
  To: kajoljain, Arnaldo Carvalho de Melo
  Cc: Madhavan Srinivasan, linuxppc-dev, Jiri Olsa, rnsastry
In-Reply-To: <bd5a9388-483d-91ca-b371-ab92ae4c08bc@linux.ibm.com>



> On 02-Sep-2021, at 1:04 PM, kajoljain <kjain@linux.ibm.com> wrote:
> 
> 
> 
> On 6/20/21 8:15 PM, Athira Rajeev wrote:
>> Patch set adds PMU registers namely Sampled Instruction Address Register
>> (SIAR) and Sampled Data Address Register (SDAR) as part of extended regs
>> in PowerPC. These registers provides the instruction/data address and
>> adding these to extended regs helps in debug purposes.
>> 
>> Patch 1/2 adds SIAR and SDAR as part of the extended regs mask.
>> Patch 2/2 includes perf tools side changes to add the SPRs to
>> sample_reg_mask to use with -I? option.
>> 
>> Athira Rajeev (2):
>>  powerpc/perf: Expose instruction and data address registers as part of
>>    extended regs
>>  tools/perf: Add perf tools support to expose instruction and data
>>    address registers as part of extended regs
>> 
> 
> Patchset looks good to me.
> 
> Reviewed-By: kajol Jain<kjain@linux.ibm.com>

Hi Arnaldo,

Requesting for your review on this patchset.

Thanks
Athira
> 
> Thanks,
> Kajol Jain
> 
>> arch/powerpc/include/uapi/asm/perf_regs.h       | 12 +++++++-----
>> arch/powerpc/perf/perf_regs.c                   |  4 ++++
>> tools/arch/powerpc/include/uapi/asm/perf_regs.h | 12 +++++++-----
>> tools/perf/arch/powerpc/include/perf_regs.h     |  2 ++
>> tools/perf/arch/powerpc/util/perf_regs.c        |  2 ++
>> 5 files changed, 22 insertions(+), 10 deletions(-)


^ permalink raw reply

* [PATCH] powerpc/mce: Fix access error in mce handler
From: Ganesh Goudar @ 2021-09-06  8:18 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin

We queue an irq work for deferred processing of mce event
in realmode mce handler, where translation is disabled.
Queuing of the work may result in accessing memory outside
RMO region, such access needs the translation to be enabled
for an LPAR running with hash mmu else the kernel crashes.

So enable the translation before queuing the work.

Without this change following trace is seen on injecting machine
check error in an LPAR running with hash mmu.

Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 5 PID: 1883 Comm: insmod Tainted: G        OE     5.14.0-mce+ #137
NIP:  c000000000735d60 LR: c000000000318640 CTR: 0000000000000000
REGS: c00000001ebff9a0 TRAP: 0300   Tainted: G       OE      (5.14.0-mce+)
MSR:  8000000000001003 <SF,ME,RI,LE>  CR: 28008228  XER: 00000001
CFAR: c00000000031863c DAR: c00000027fa8fe08 DSISR: 40000000 IRQMASK: 0
GPR00: c0000000003186d0 c00000001ebffc40 c000000001b0df00 c0000000016337e8
GPR04: c0000000016337e8 c00000027fa8fe08 0000000000000023 c0000000016337f0
GPR08: 0000000000000023 c0000000012ffe08 0000000000000000 c008000001460240
GPR12: 0000000000000000 c00000001ec9a900 c00000002ac4bd00 0000000000000000
GPR16: 00000000000005a0 c0080000006b0000 c0080000006b05a0 c000000000ff3068
GPR20: c00000002ac4bbc0 0000000000000001 c00000002ac4bbc0 c008000001490298
GPR24: c008000001490108 c000000001636198 c008000001470090 c008000001470058
GPR28: 0000000000000510 c008000001000000 c008000008000019 0000000000000019
NIP [c000000000735d60] llist_add_batch+0x0/0x40
LR [c000000000318640] __irq_work_queue_local+0x70/0xc0
Call Trace:
[c00000001ebffc40] [c00000001ebffc0c] 0xc00000001ebffc0c (unreliable)
[c00000001ebffc60] [c0000000003186d0] irq_work_queue+0x40/0x70
[c00000001ebffc80] [c00000000004425c] machine_check_queue_event+0xbc/0xd0
[c00000001ebffcf0] [c00000000000838c] machine_check_early_common+0x16c/0x1f4

Fixes: 74c3354bc1d89 ("powerpc/pseries/mce: restore msr before returning from handler")
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
 arch/powerpc/kernel/mce.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 47a683cd00d2..9d1e39d42e3e 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -249,6 +249,7 @@ void machine_check_queue_event(void)
 {
 	int index;
 	struct machine_check_event evt;
+	unsigned long msr;
 
 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 		return;
@@ -262,8 +263,19 @@ void machine_check_queue_event(void)
 	memcpy(&local_paca->mce_info->mce_event_queue[index],
 	       &evt, sizeof(evt));
 
-	/* Queue irq work to process this event later. */
-	irq_work_queue(&mce_event_process_work);
+	/* Queue irq work to process this event later. Before
+	 * queuing the work enable translation for non radix LPAR,
+	 * as irq_work_queue may try to access memory outside RMO
+	 * region.
+	 */
+	if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) {
+		msr = mfmsr();
+		mtmsr(msr | MSR_IR | MSR_DR);
+		irq_work_queue(&mce_event_process_work);
+		mtmsr(msr);
+	} else {
+		irq_work_queue(&mce_event_process_work);
+	}
 }
 
 void mce_common_process_ue(struct pt_regs *regs,
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3 1/3] powerpc/pseries: Parse control memory access error
From: Ganesh Goudar @ 2021-09-06  8:43 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin

Add support to parse and log control memory access
error for pseries. These changes are made according to
PAPR v2.11 10.3.2.2.12.

Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
v3: Modify the commit log to mention the document according
    to which changes are made.
    Define and use a macro to check if the effective address
    is provided.

v2: No changes.
---
 arch/powerpc/platforms/pseries/ras.c | 36 ++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 56092dccfdb8..e62a0ca2611a 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -60,11 +60,17 @@ struct pseries_mc_errorlog {
 	 *      XX	2: Reserved.
 	 *        XXX	3: Type of UE error.
 	 *
-	 * For error_type != MC_ERROR_TYPE_UE
+	 * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB
 	 *   XXXXXXXX
 	 *   X		1: Effective address provided.
 	 *    XXXXX	5: Reserved.
 	 *         XX	2: Type of SLB/ERAT/TLB error.
+	 *
+	 * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS
+	 *   XXXXXXXX
+	 *   X		1: Error causing address provided.
+	 *    XXX	3: Type of error.
+	 *       XXXX	4: Reserved.
 	 */
 	u8	sub_err_type;
 	u8	reserved_1[6];
@@ -80,6 +86,7 @@ struct pseries_mc_errorlog {
 #define MC_ERROR_TYPE_TLB		0x04
 #define MC_ERROR_TYPE_D_CACHE		0x05
 #define MC_ERROR_TYPE_I_CACHE		0x07
+#define MC_ERROR_TYPE_CTRL_MEM_ACCESS	0x08
 
 /* RTAS pseries MCE error sub types */
 #define MC_ERROR_UE_INDETERMINATE		0
@@ -90,6 +97,7 @@ struct pseries_mc_errorlog {
 
 #define UE_EFFECTIVE_ADDR_PROVIDED		0x40
 #define UE_LOGICAL_ADDR_PROVIDED		0x20
+#define MC_EFFECTIVE_ADDR_PROVIDED		0x80
 
 #define MC_ERROR_SLB_PARITY		0
 #define MC_ERROR_SLB_MULTIHIT		1
@@ -103,6 +111,9 @@ struct pseries_mc_errorlog {
 #define MC_ERROR_TLB_MULTIHIT		2
 #define MC_ERROR_TLB_INDETERMINATE	3
 
+#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK	0
+#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS	1
+
 static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
 {
 	switch (mlog->error_type) {
@@ -112,6 +123,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
 	case	MC_ERROR_TYPE_ERAT:
 	case	MC_ERROR_TYPE_TLB:
 		return (mlog->sub_err_type & 0x03);
+	case	MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+		return (mlog->sub_err_type & 0x70) >> 4;
 	default:
 		return 0;
 	}
@@ -656,7 +669,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
 			mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
 			break;
 		}
-		if (mce_log->sub_err_type & 0x80)
+		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
 			eaddr = be64_to_cpu(mce_log->effective_address);
 		break;
 	case MC_ERROR_TYPE_ERAT:
@@ -673,7 +686,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
 			mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
 			break;
 		}
-		if (mce_log->sub_err_type & 0x80)
+		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
 			eaddr = be64_to_cpu(mce_log->effective_address);
 		break;
 	case MC_ERROR_TYPE_TLB:
@@ -690,7 +703,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
 			mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
 			break;
 		}
-		if (mce_log->sub_err_type & 0x80)
+		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
 			eaddr = be64_to_cpu(mce_log->effective_address);
 		break;
 	case MC_ERROR_TYPE_D_CACHE:
@@ -699,6 +712,21 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
 	case MC_ERROR_TYPE_I_CACHE:
 		mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
 		break;
+	case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+		mce_err.error_type = MCE_ERROR_TYPE_RA;
+		switch (err_sub_type) {
+		case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
+			mce_err.u.ra_error_type =
+				MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+			break;
+		case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
+			mce_err.u.ra_error_type =
+				MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+			break;
+		}
+		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+			eaddr = be64_to_cpu(mce_log->effective_address);
+		break;
 	case MC_ERROR_TYPE_UNKNOWN:
 	default:
 		mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3 2/3] selftests/powerpc: Add test for real address error handling
From: Ganesh Goudar @ 2021-09-06  8:43 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin
In-Reply-To: <20210906084303.183921-1-ganeshgr@linux.ibm.com>

Add test for real address or control memory address access
error handling, using NX-GZIP engine.

The error is injected by accessing the control memory address
using illegal instruction, on successful handling the process
attempting to access control memory address using illegal
instruction receives SIGBUS.

Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
v3: Avoid using shell script to inject error.

v2: Fix build error.
---
 tools/testing/selftests/powerpc/Makefile      |  3 +-
 tools/testing/selftests/powerpc/mce/Makefile  |  7 ++
 .../selftests/powerpc/mce/inject-ra-err.c     | 65 +++++++++++++++++++
 tools/testing/selftests/powerpc/mce/vas-api.h |  1 +
 4 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/mce/Makefile
 create mode 100644 tools/testing/selftests/powerpc/mce/inject-ra-err.c
 create mode 120000 tools/testing/selftests/powerpc/mce/vas-api.h

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 0830e63818c1..4830372d7416 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -31,7 +31,8 @@ SUB_DIRS = alignment		\
 	   vphn         \
 	   math		\
 	   ptrace	\
-	   security
+	   security	\
+	   mce
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile
new file mode 100644
index 000000000000..2424513982d9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/Makefile
@@ -0,0 +1,7 @@
+#SPDX-License-Identifier: GPL-2.0-or-later
+
+TEST_GEN_PROGS := inject-ra-err
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mce/inject-ra-err.c b/tools/testing/selftests/powerpc/mce/inject-ra-err.c
new file mode 100644
index 000000000000..94323c34d9a6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/inject-ra-err.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vas-api.h"
+#include "utils.h"
+
+static bool faulted;
+
+static void sigbus_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+	ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+	struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+	faulted = true;
+	regs->nip += 4;
+}
+
+static int test_ra_error(void)
+{
+	struct vas_tx_win_open_attr attr;
+	int fd, *paste_addr;
+	char *devname = "/dev/crypto/nx-gzip";
+	struct sigaction act = {
+		.sa_sigaction = sigbus_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+
+	memset(&attr, 0, sizeof(attr));
+	attr.version = 1;
+	attr.vas_id = 0;
+
+	SKIP_IF(access(devname, F_OK));
+
+	fd = open(devname, O_RDWR);
+	FAIL_IF(fd < 0);
+	FAIL_IF(ioctl(fd, VAS_TX_WIN_OPEN, &attr) < 0);
+	FAIL_IF(sigaction(SIGBUS, &act, NULL) != 0);
+
+	paste_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0ULL);
+
+	/* The following assignment triggers exception */
+	mb();
+	*paste_addr = 1;
+	mb();
+
+	FAIL_IF(!faulted);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_ra_error, "inject-ra-err");
+}
+
diff --git a/tools/testing/selftests/powerpc/mce/vas-api.h b/tools/testing/selftests/powerpc/mce/vas-api.h
new file mode 120000
index 000000000000..1455c1bcd351
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/vas-api.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/include/uapi/asm/vas-api.h
\ No newline at end of file
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3 3/3] powerpc/mce: Modify the real address error logging messages
From: Ganesh Goudar @ 2021-09-06  8:43 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin
In-Reply-To: <20210906084303.183921-1-ganeshgr@linux.ibm.com>

To avoid ambiguity, modify the strings in real address error
logging messages to "foreign/control memory" from "foreign",
Since the error discriptions in P9 user manual and P10 user
manual are different for same type of errors.

P9 User Manual for MCE:
DSISR:59 Host real address to foreign space during translation.
DSISR:60 Host real address to foreign space on a load or store
	 access.

P10 User Manual for MCE:
DSISR:59 D-side tablewalk used a host real address in the
	 control memory address range.
DSISR:60 D-side operand access to control memory address space.

Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
v3: No changes.

v2: No changes.
---
 arch/powerpc/kernel/mce.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9d1e39d42e3e..5baf69503349 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -400,14 +400,14 @@ void machine_check_print_event_info(struct machine_check_event *evt,
 	static const char *mc_ra_types[] = {
 		"Indeterminate",
 		"Instruction fetch (bad)",
-		"Instruction fetch (foreign)",
+		"Instruction fetch (foreign/control memory)",
 		"Page table walk ifetch (bad)",
-		"Page table walk ifetch (foreign)",
+		"Page table walk ifetch (foreign/control memory)",
 		"Load (bad)",
 		"Store (bad)",
 		"Page table walk Load/Store (bad)",
-		"Page table walk Load/Store (foreign)",
-		"Load/Store (foreign)",
+		"Page table walk Load/Store (foreign/control memory)",
+		"Load/Store (foreign/control memory)",
 	};
 	static const char *mc_link_types[] = {
 		"Indeterminate",
-- 
2.31.1


^ permalink raw reply related

* [PATCH v2] powerpc/papr_scm: Implement initial support for injecting smart errors
From: Shivaprasad G Bhat @ 2021-09-06  9:06 UTC (permalink / raw)
  To: nvdimm
  Cc: sbhat, aneesh.kumar, vaibhav, dan.j.williams, linuxppc-dev,
	ira.weiny

From: Vaibhav Jain <vaibhav@linux.ibm.com>

Presently PAPR doesn't support injecting smart errors on an
NVDIMM. This makes testing the NVDIMM health reporting functionality
difficult as simulating NVDIMM health related events need a hacked up
qemu version.

To solve this problem this patch proposes simulating certain set of
NVDIMM health related events in papr_scm. Specifically 'fatal' health
state and 'dirty' shutdown state. These error can be injected via the
user-space 'ndctl-inject-smart(1)' command. With the proposed patch and
corresponding ndctl patches following command flow is expected:

$ sudo ndctl list -DH -d nmem0
...
      "health_state":"ok",
      "shutdown_state":"clean",
...
 # inject unsafe shutdown and fatal health error
$ sudo ndctl inject-smart nmem0 -Uf
...
      "health_state":"fatal",
      "shutdown_state":"dirty",
...
 # uninject all errors
$ sudo ndctl inject-smart nmem0 -N
...
      "health_state":"ok",
      "shutdown_state":"clean",
...

The patch adds two members 'health_bitmap_mask' and
'health_bitmap_override' inside struct papr_scm_priv which are then
bit blt'ed to the health bitmaps fetched from the hypervisor. In case
we are not able to fetch health information from the hypervisor we
service the health bitmap from these two members. These members are
accessible from sysfs at nmemX/papr/health_bitmap_override

A new PDSM named 'SMART_INJECT' is proposed that accepts newly
introduced 'struct nd_papr_pdsm_smart_inject' as payload thats
exchanged between libndctl and papr_scm to indicate the requested
smart-error states.

When the processing the PDSM 'SMART_INJECT', papr_pdsm_smart_inject()
constructs a pair or 'mask' and 'override' bitmaps from the payload
and bit-blt it to the 'health_bitmap_{mask, override}' members. This
ensures the after being fetched from the hypervisor, the health_bitmap
reflects requested smart-error states.

Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
Changelog:

Since v1:
Link: https://patchwork.kernel.org/project/linux-nvdimm/list/?series=513881
* Updated the patch description.
* Removed dependency of a header movement patch.
* Removed '__packed' attribute for 'struct nd_papr_pdsm_smart_inject' [Aneesh]

 arch/powerpc/include/uapi/asm/papr_pdsm.h |   18 ++++++
 arch/powerpc/platforms/pseries/papr_scm.c |   94 ++++++++++++++++++++++++++++-
 2 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
index 82488b1e7276..17439925045c 100644
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -116,6 +116,22 @@ struct nd_papr_pdsm_health {
 	};
 };
 
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+	union {
+		struct {
+			/* One or more of PDSM_SMART_INJECT_ */
+			__u32 flags;
+			__u8 fatal_enable;
+			__u8 unsafe_shutdown_enable;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
 /*
  * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
  * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
@@ -123,12 +139,14 @@ struct nd_papr_pdsm_health {
 enum papr_pdsm {
 	PAPR_PDSM_MIN = 0x0,
 	PAPR_PDSM_HEALTH,
+	PAPR_PDSM_SMART_INJECT,
 	PAPR_PDSM_MAX,
 };
 
 /* Maximal union that can hold all possible payload types */
 union nd_pdsm_payload {
 	struct nd_papr_pdsm_health health;
+	struct nd_papr_pdsm_smart_inject smart_inject;
 	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
 } __packed;
 
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f48e87ac89c9..de4cf329cfb3 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -68,6 +68,10 @@
 #define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
 #define PAPR_SCM_PERF_STATS_VERSION 0x1
 
+/* Use bitblt method to override specific bits in the '_bitmap_' */
+#define BITBLT_BITMAP(_bitmap_, _mask_, _override_)		\
+	(((_bitmap_) & ~(_mask_)) | ((_mask_) & (_override_)))
+
 /* Struct holding a single performance metric */
 struct papr_scm_perf_stat {
 	u8 stat_id[8];
@@ -120,6 +124,12 @@ struct papr_scm_priv {
 
 	/* length of the stat buffer as expected by phyp */
 	size_t stat_buffer_len;
+
+	/* The bits which needs to be overridden */
+	u64 health_bitmap_mask;
+
+	/* The overridden values for the bits having the masks set */
+	u64 health_bitmap_override;
 };
 
 static int papr_scm_pmem_flush(struct nd_region *nd_region,
@@ -347,19 +357,28 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
 static int __drc_pmem_query_health(struct papr_scm_priv *p)
 {
 	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	u64 bitmap = 0;
 	long rc;
 
 	/* issue the hcall */
 	rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
-	if (rc != H_SUCCESS) {
+	if (rc == H_SUCCESS)
+		bitmap = ret[0] & ret[1];
+	else if (rc == H_FUNCTION)
+		dev_info_once(&p->pdev->dev,
+			      "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap");
+	else {
+
 		dev_err(&p->pdev->dev,
 			"Failed to query health information, Err:%ld\n", rc);
 		return -ENXIO;
 	}
 
 	p->lasthealth_jiffies = jiffies;
-	p->health_bitmap = ret[0] & ret[1];
-
+	/* Allow overriding specific health bits via bit blt. */
+	bitmap = BITBLT_BITMAP(bitmap, p->health_bitmap_mask,
+			       p->health_bitmap_override);
+	WRITE_ONCE(p->health_bitmap, bitmap);
 	dev_dbg(&p->pdev->dev,
 		"Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
 		ret[0], ret[1]);
@@ -669,6 +688,54 @@ static int papr_pdsm_health(struct papr_scm_priv *p,
 	return rc;
 }
 
+/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_smart_inject(struct papr_scm_priv *p,
+				  union nd_pdsm_payload *payload)
+{
+	int rc;
+	u32 supported_flags = 0;
+	u64 mask = 0, override = 0;
+
+	/* Check for individual smart error flags and update mask and override */
+	if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
+		supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
+		mask |= PAPR_PMEM_HEALTH_FATAL;
+		override |= payload->smart_inject.fatal_enable ?
+			PAPR_PMEM_HEALTH_FATAL : 0;
+	}
+
+	if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
+		supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
+		mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+		override |= payload->smart_inject.unsafe_shutdown_enable ?
+			PAPR_PMEM_SHUTDOWN_DIRTY : 0;
+	}
+
+	dev_dbg(&p->pdev->dev, "[Smart-inject] Mask=%#llx override=%#llx\n",
+		mask, override);
+
+	/* Prevent concurrent access to dimm health bitmap related members */
+	rc = mutex_lock_interruptible(&p->health_mutex);
+	if (rc)
+		return rc;
+
+	/* Bitblt mask/override to corrosponding health_bitmap couterparts */
+	p->health_bitmap_mask = BITBLT_BITMAP(p->health_bitmap_mask,
+					      mask, override);
+	p->health_bitmap_override = BITBLT_BITMAP(p->health_bitmap_override,
+						  mask, override);
+
+	/* Invalidate cached health bitmap */
+	p->lasthealth_jiffies = 0;
+
+	mutex_unlock(&p->health_mutex);
+
+	/* Return the supported flags back to userspace */
+	payload->smart_inject.flags = supported_flags;
+
+	return sizeof(struct nd_papr_pdsm_health);
+}
+
 /*
  * 'struct pdsm_cmd_desc'
  * Identifies supported PDSMs' expected length of in/out payloads
@@ -702,6 +769,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = {
 		.size_out = sizeof(struct nd_papr_pdsm_health),
 		.service = papr_pdsm_health,
 	},
+
+	[PAPR_PDSM_SMART_INJECT] = {
+		.size_in = sizeof(struct nd_papr_pdsm_smart_inject),
+		.size_out = sizeof(struct nd_papr_pdsm_smart_inject),
+		.service = papr_pdsm_smart_inject,
+	},
 	/* Empty */
 	[PAPR_PDSM_MAX] = {
 		.size_in = 0,
@@ -838,6 +911,20 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
 	return 0;
 }
 
+static ssize_t health_bitmap_override_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct nvdimm *dimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+	return sprintf(buf, "mask=%#llx override=%#llx\n",
+		       READ_ONCE(p->health_bitmap_mask),
+		       READ_ONCE(p->health_bitmap_override));
+}
+
+static DEVICE_ATTR_ADMIN_RO(health_bitmap_override);
+
 static ssize_t perf_stats_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
@@ -952,6 +1039,7 @@ static struct attribute *papr_nd_attributes[] = {
 	&dev_attr_flags.attr,
 	&dev_attr_perf_stats.attr,
 	&dev_attr_dirty_shutdown.attr,
+	&dev_attr_health_bitmap_override.attr,
 	NULL,
 };
 



^ permalink raw reply related

* [PATCH v2] tests/nvdimm/ndtest: Simulate nvdimm health, DSC and smart-inject
From: Shivaprasad G Bhat @ 2021-09-06  9:15 UTC (permalink / raw)
  To: nvdimm
  Cc: sbhat, aneesh.kumar, vaibhav, dan.j.williams, linuxppc-dev,
	ira.weiny

The 'papr_scm' module and 'papr' implementation in libndctl supports
PDSMs for reporting PAPR NVDIMM health, dirty-shutdown-count and
injecting smart-errors. This patch adds support for those PDSMs in
ndtest module so that PDSM specific paths in libndctl can be exercised.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:

Since v1:
Link: https://patchwork.kernel.org/project/linux-nvdimm/list/?series=521767
* Removed the dependency on a header movement patch

 tools/testing/nvdimm/test/ndtest.c |  148 ++++++++++++++++++++++++++++++++++++
 tools/testing/nvdimm/test/ndtest.h |   96 +++++++++++++++++++++++
 2 files changed, 244 insertions(+)

diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 6862915f1fb0..45d42cd25e82 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -48,6 +48,10 @@ static struct ndtest_dimm dimm_group1[] = {
 		.uuid_str = "1e5c75d2-b618-11ea-9aa3-507b9ddc0f72",
 		.physical_id = 0,
 		.num_formats = 2,
+		.flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+		.extension_flags = PDSM_DIMM_DSC_VALID | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+		.dimm_fuel_gauge = 95,
+		.dimm_dsc = 42,
 	},
 	{
 		.size = DIMM_SIZE,
@@ -55,6 +59,10 @@ static struct ndtest_dimm dimm_group1[] = {
 		.uuid_str = "1c4d43ac-b618-11ea-be80-507b9ddc0f72",
 		.physical_id = 1,
 		.num_formats = 2,
+		.flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+		.extension_flags = PDSM_DIMM_DSC_VALID | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+		.dimm_fuel_gauge = 95,
+		.dimm_dsc = 42,
 	},
 	{
 		.size = DIMM_SIZE,
@@ -62,6 +70,10 @@ static struct ndtest_dimm dimm_group1[] = {
 		.uuid_str = "a9f17ffc-b618-11ea-b36d-507b9ddc0f72",
 		.physical_id = 2,
 		.num_formats = 2,
+		.flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+		.extension_flags = PDSM_DIMM_DSC_VALID | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+		.dimm_fuel_gauge = 95,
+		.dimm_dsc = 42,
 	},
 	{
 		.size = DIMM_SIZE,
@@ -69,6 +81,10 @@ static struct ndtest_dimm dimm_group1[] = {
 		.uuid_str = "b6b83b22-b618-11ea-8aae-507b9ddc0f72",
 		.physical_id = 3,
 		.num_formats = 2,
+		.flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+		.extension_flags = PDSM_DIMM_DSC_VALID | PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+		.dimm_fuel_gauge = 95,
+		.dimm_dsc = 42,
 	},
 	{
 		.size = DIMM_SIZE,
@@ -296,6 +312,103 @@ static int ndtest_get_config_size(struct ndtest_dimm *dimm, unsigned int buf_len
 	return 0;
 }
 
+static int ndtest_pdsm_health(struct ndtest_dimm *dimm,
+			union nd_pdsm_payload *payload,
+			unsigned int buf_len)
+{
+	struct nd_papr_pdsm_health *health = &payload->health;
+
+	if (buf_len < sizeof(health))
+		return -EINVAL;
+
+	health->extension_flags = 0;
+	health->dimm_unarmed = !!(dimm->flags & PAPR_PMEM_UNARMED_MASK);
+	health->dimm_bad_shutdown = !!(dimm->flags & PAPR_PMEM_BAD_SHUTDOWN_MASK);
+	health->dimm_bad_restore = !!(dimm->flags & PAPR_PMEM_BAD_RESTORE_MASK);
+	health->dimm_health = PAPR_PDSM_DIMM_HEALTHY;
+
+	if (dimm->flags & PAPR_PMEM_HEALTH_FATAL)
+		health->dimm_health = PAPR_PDSM_DIMM_FATAL;
+	else if (dimm->flags & PAPR_PMEM_HEALTH_CRITICAL)
+		health->dimm_health = PAPR_PDSM_DIMM_CRITICAL;
+	else if (dimm->flags & PAPR_PMEM_HEALTH_UNHEALTHY ||
+		 dimm->flags & PAPR_PMEM_HEALTH_NON_CRITICAL)
+		health->dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
+
+	health->extension_flags = 0;
+	if (dimm->extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID) {
+		health->dimm_fuel_gauge = dimm->dimm_fuel_gauge;
+		health->extension_flags |= PDSM_DIMM_HEALTH_RUN_GAUGE_VALID;
+	}
+	if (dimm->extension_flags & PDSM_DIMM_DSC_VALID) {
+		health->dimm_dsc = dimm->dimm_dsc;
+		health->extension_flags |= PDSM_DIMM_DSC_VALID;
+	}
+
+	return 0;
+}
+
+static void smart_notify(struct ndtest_dimm *dimm)
+{
+	struct device *bus = dimm->dev->parent;
+
+	if (!(dimm->flags & PAPR_PMEM_HEALTH_NON_CRITICAL) ||
+	    (dimm->flags & PAPR_PMEM_BAD_SHUTDOWN_MASK)) {
+		device_lock(bus);
+		/* send smart notification */
+		if (dimm->notify_handle)
+			sysfs_notify_dirent(dimm->notify_handle);
+		device_unlock(bus);
+	}
+}
+
+static int ndtest_pdsm_smart_inject(struct ndtest_dimm *dimm,
+				union nd_pdsm_payload *payload,
+				unsigned int buf_len)
+{
+	struct nd_papr_pdsm_smart_inject *inj = &payload->smart_inject;
+
+	if (buf_len < sizeof(inj))
+		return -EINVAL;
+
+	if (inj->flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
+		if (inj->fatal_enable)
+			dimm->flags |= PAPR_PMEM_HEALTH_FATAL;
+		else
+			dimm->flags &= ~PAPR_PMEM_HEALTH_FATAL;
+	}
+	if (inj->flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
+		if (inj->unsafe_shutdown_enable)
+			dimm->flags |= PAPR_PMEM_SHUTDOWN_DIRTY;
+		else
+			dimm->flags &= ~PAPR_PMEM_SHUTDOWN_DIRTY;
+	}
+	smart_notify(dimm);
+
+	return 0;
+}
+
+static int ndtest_dimm_cmd_call(struct ndtest_dimm *dimm, unsigned int buf_len,
+			   void *buf)
+{
+	struct nd_cmd_pkg *call_pkg = buf;
+	unsigned int len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+	struct nd_pkg_pdsm *pdsm = (struct nd_pkg_pdsm *) call_pkg->nd_payload;
+	union nd_pdsm_payload *payload = &(pdsm->payload);
+	unsigned int func = call_pkg->nd_command;
+
+	switch (func) {
+	case PAPR_PDSM_HEALTH:
+		return ndtest_pdsm_health(dimm, payload, len);
+	case PAPR_PDSM_SMART_INJECT:
+		return ndtest_pdsm_smart_inject(dimm, payload, len);
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		      struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		      unsigned int buf_len, int *cmd_rc)
@@ -325,6 +438,9 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
 	case ND_CMD_SET_CONFIG_DATA:
 		*cmd_rc = ndtest_config_set(dimm, buf_len, buf);
 		break;
+	case ND_CMD_CALL:
+		*cmd_rc = ndtest_dimm_cmd_call(dimm, buf_len, buf);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -614,6 +730,8 @@ static void put_dimms(void *data)
 
 	for (i = 0; i < p->config->dimm_count; i++)
 		if (p->config->dimms[i].dev) {
+			if (p->config->dimms[i].notify_handle)
+				sysfs_put(p->config->dimms[i].notify_handle);
 			device_unregister(p->config->dimms[i].dev);
 			p->config->dimms[i].dev = NULL;
 		}
@@ -826,6 +944,18 @@ static ssize_t flags_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(flags);
 
+#define PAPR_PMEM_DIMM_CMD_MASK				\
+	 ((1U << PAPR_PDSM_HEALTH)			\
+	 | (1U << PAPR_PDSM_SMART_INJECT))
+
+static ssize_t dsm_mask_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%#x\n", PAPR_PMEM_DIMM_CMD_MASK);
+}
+
+static DEVICE_ATTR_RO(dsm_mask);
+
 static struct attribute *ndtest_nvdimm_attributes[] = {
 	&dev_attr_nvdimm_show_handle.attr,
 	&dev_attr_vendor.attr,
@@ -837,6 +967,7 @@ static struct attribute *ndtest_nvdimm_attributes[] = {
 	&dev_attr_format.attr,
 	&dev_attr_format1.attr,
 	&dev_attr_flags.attr,
+	&dev_attr_dsm_mask.attr,
 	NULL,
 };
 
@@ -856,6 +987,7 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
 {
 	struct device *dev = &priv->pdev.dev;
 	unsigned long dimm_flags = dimm->flags;
+	struct kernfs_node *papr_kernfs;
 
 	if (dimm->num_formats > 1) {
 		set_bit(NDD_ALIASING, &dimm_flags);
@@ -882,6 +1014,20 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
 		return -ENOMEM;
 	}
 
+	nd_synchronize();
+
+	papr_kernfs = sysfs_get_dirent(nvdimm_kobj(dimm->nvdimm)->sd, "papr");
+	if (!papr_kernfs) {
+		pr_err("Could not initialize the notifier handle\n");
+		return 0;
+	}
+
+	dimm->notify_handle = sysfs_get_dirent(papr_kernfs, "flags");
+	sysfs_put(papr_kernfs);
+	if (!dimm->notify_handle) {
+		pr_err("Could not initialize the notifier handle\n");
+		return 0;
+	}
 	return 0;
 }
 
@@ -953,6 +1099,8 @@ static int ndtest_bus_register(struct ndtest_priv *p)
 	p->bus_desc.provider_name = NULL;
 	p->bus_desc.attr_groups = ndtest_attribute_groups;
 
+	set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
 	p->bus = nvdimm_bus_register(&p->pdev.dev, &p->bus_desc);
 	if (!p->bus) {
 		dev_err(&p->pdev.dev, "Error creating nvdimm bus %pOF\n", p->dn);
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index 2c54c9cbb90c..b9b381021313 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -16,6 +16,8 @@
 #define PAPR_PMEM_HEALTH_FATAL              (1ULL << (63 - 5))
 /* SCM contents cannot persist due to current platform health status */
 #define PAPR_PMEM_HEALTH_UNHEALTHY          (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL       (1ULL << (63 - 7))
 
 /* Bits status indicators for health bitmap indicating unarmed dimm */
 #define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED |		\
@@ -80,6 +82,13 @@ struct ndtest_dimm {
 	int id;
 	int fail_cmd_code;
 	u8 no_alias;
+
+	struct kernfs_node *notify_handle;
+
+	/* SMART Health information */
+	u32 extension_flags;
+	u16 dimm_fuel_gauge;
+	u64 dimm_dsc;
 };
 
 struct ndtest_mapping {
@@ -98,6 +107,93 @@ struct ndtest_region {
 	u8 range_index;
 };
 
+#define ND_PDSM_PAYLOAD_MAX_SIZE 184
+/*
+ * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
+ * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
+ */
+enum papr_pdsm {
+	PAPR_PDSM_MIN = 0x0,
+	PAPR_PDSM_HEALTH,
+	PAPR_PDSM_SMART_INJECT,
+	PAPR_PDSM_MAX,
+};
+
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY       0
+#define PAPR_PDSM_DIMM_UNHEALTHY     1
+#define PAPR_PDSM_DIMM_CRITICAL      2
+#define PAPR_PDSM_DIMM_FATAL         3
+
+/* struct nd_papr_pdsm_health.extension_flags field flags */
+
+/* Indicate that the 'dimm_fuel_gauge' field is valid */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
+
+/* Indicate that the 'dimm_dsc' field is valid */
+#define PDSM_DIMM_DSC_VALID 2
+
+/*
+ * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
+ * Various flags indicate the health status of the dimm.
+ */
+struct nd_papr_pdsm_health {
+	union {
+		struct {
+			__u32 extension_flags;
+			__u8 dimm_unarmed;
+			__u8 dimm_bad_shutdown;
+			__u8 dimm_bad_restore;
+			__u8 dimm_scrubbed;
+			__u8 dimm_locked;
+			__u8 dimm_encrypted;
+			__u16 dimm_health;
+
+			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+			__u16 dimm_fuel_gauge;
+
+			/* Extension flag PDSM_DIMM_DSC_VALID */
+			__u64 dimm_dsc;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+	union {
+		struct {
+			/* One or more of PDSM_SMART_INJECT_ */
+			__u32 flags;
+			__u8 fatal_enable;
+			__u8 unsafe_shutdown_enable;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+/* Maximal union that can hold all possible payload types */
+union nd_pdsm_payload {
+	struct nd_papr_pdsm_health health;
+	struct nd_papr_pdsm_smart_inject smart_inject;
+	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+} __packed;
+
+/*
+ * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
+ * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
+ * that should always precede this struct when sent to papr_scm via CMD_CALL
+ * interface.
+ */
+struct nd_pkg_pdsm {
+	__s32 cmd_status;	/* Out: Sub-cmd status returned back */
+	__u16 reserved[2];	/* Ignored and to be set as '0' */
+	union nd_pdsm_payload payload;
+} __packed;
+
 struct ndtest_config {
 	struct ndtest_dimm *dimms;
 	struct ndtest_region *regions;



^ permalink raw reply related

* [RFC PATCH v2] powerpc/papr_scm: Move duplicate definitions to common header files
From: Shivaprasad G Bhat @ 2021-09-06  9:27 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel, mpe
  Cc: nvdimm, dan.j.williams, vaibhav, sbhat, aneesh.kumar

papr_scm and ndtest share common PDSM payload structs like
nd_papr_pdsm_health. Presently these structs are duplicated across papr_pdsm.h
and ndtest.h header files. Since 'ndtest' is essentially arch independent and can
run on platforms other than PPC64, a way needs to be deviced to avoid redundancy
and duplication of PDSM structs in future.

So the patch proposes moving the PDSM header from arch/powerpc/include/uapi/ to
the generic include/uapi/linux directory. Also, there are some #defines common
between papr_scm and ndtest which are not exported to the user space. So, move
them to a header file which can be shared across ndtest and papr_scm via newly
introduced include/linux/papr_scm.h.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Suggested-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
---
Changelog:

Since v1:
Link: https://patchwork.kernel.org/project/linux-nvdimm/patch/162505488483.72147.12741153746322191381.stgit@56e104a48989/
* Removed dependency on this patch for the other patches

 MAINTAINERS                               |    2 
 arch/powerpc/include/uapi/asm/papr_pdsm.h |  165 -----------------------------
 arch/powerpc/platforms/pseries/papr_scm.c |   43 --------
 include/linux/papr_scm.h                  |   48 ++++++++
 include/uapi/linux/papr_pdsm.h            |  165 +++++++++++++++++++++++++++++
 tools/testing/nvdimm/test/ndtest.c        |    2 
 tools/testing/nvdimm/test/ndtest.h        |  120 ---------------------
 7 files changed, 219 insertions(+), 326 deletions(-)
 delete mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h
 create mode 100644 include/linux/papr_scm.h
 create mode 100644 include/uapi/linux/papr_pdsm.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6c8be735cc91..03fe0c77cefa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10661,6 +10661,8 @@ F:	drivers/rtc/rtc-opal.c
 F:	drivers/scsi/ibmvscsi/
 F:	drivers/tty/hvc/hvc_opal.c
 F:	drivers/watchdog/wdrtas.c
+F:	include/linux/papr_scm.h
+F:	include/uapi/linux/papr_pdsm.h
 F:	tools/testing/selftests/powerpc
 N:	/pmac
 N:	powermac
diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
deleted file mode 100644
index 17439925045c..000000000000
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
- *
- * (C) Copyright IBM 2020
- *
- * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
- */
-
-#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
-#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
-
-#include <linux/types.h>
-#include <linux/ndctl.h>
-
-/*
- * PDSM Envelope:
- *
- * The ioctl ND_CMD_CALL exchange data between user-space and kernel via
- * envelope which consists of 2 headers sections and payload sections as
- * illustrated below:
- *  +-----------------+---------------+---------------------------+
- *  |   64-Bytes      |   8-Bytes     |       Max 184-Bytes       |
- *  +-----------------+---------------+---------------------------+
- *  | ND-HEADER       |  PDSM-HEADER  |      PDSM-PAYLOAD         |
- *  +-----------------+---------------+---------------------------+
- *  | nd_family       |               |                           |
- *  | nd_size_out     | cmd_status    |                           |
- *  | nd_size_in      | reserved      |     nd_pdsm_payload       |
- *  | nd_command      | payload   --> |                           |
- *  | nd_fw_size      |               |                           |
- *  | nd_payload ---> |               |                           |
- *  +---------------+-----------------+---------------------------+
- *
- * ND Header:
- * This is the generic libnvdimm header described as 'struct nd_cmd_pkg'
- * which is interpreted by libnvdimm before passed on to papr_scm. Important
- * member fields used are:
- * 'nd_family'		: (In) NVDIMM_FAMILY_PAPR_SCM
- * 'nd_size_in'		: (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0)
- * 'nd_size_out'        : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD
- * 'nd_command'         : (In) One of PAPR_PDSM_XXX
- * 'nd_fw_size'         : (Out) PDSM-HEADER + size of actual payload returned
- *
- * PDSM Header:
- * This is papr-scm specific header that precedes the payload. This is defined
- * as nd_cmd_pdsm_pkg.  Following fields aare available in this header:
- *
- * 'cmd_status'		: (Out) Errors if any encountered while servicing PDSM.
- * 'reserved'		: Not used, reserved for future and should be set to 0.
- * 'payload'            : A union of all the possible payload structs
- *
- * PDSM Payload:
- *
- * The layout of the PDSM Payload is defined by various structs shared between
- * papr_scm and libndctl so that contents of payload can be interpreted. As such
- * its defined as a union of all possible payload structs as
- * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command'
- * appropriate member of the union is accessed.
- */
-
-/* Max payload size that we can handle */
-#define ND_PDSM_PAYLOAD_MAX_SIZE 184
-
-/* Max payload size that we can handle */
-#define ND_PDSM_HDR_SIZE \
-	(sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE)
-
-/* Various nvdimm health indicators */
-#define PAPR_PDSM_DIMM_HEALTHY       0
-#define PAPR_PDSM_DIMM_UNHEALTHY     1
-#define PAPR_PDSM_DIMM_CRITICAL      2
-#define PAPR_PDSM_DIMM_FATAL         3
-
-/* struct nd_papr_pdsm_health.extension_flags field flags */
-
-/* Indicate that the 'dimm_fuel_gauge' field is valid */
-#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
-
-/* Indicate that the 'dimm_dsc' field is valid */
-#define PDSM_DIMM_DSC_VALID 2
-
-/*
- * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
- * Various flags indicate the health status of the dimm.
- *
- * extension_flags	: Any extension fields present in the struct.
- * dimm_unarmed		: Dimm not armed. So contents wont persist.
- * dimm_bad_shutdown	: Previous shutdown did not persist contents.
- * dimm_bad_restore	: Contents from previous shutdown werent restored.
- * dimm_scrubbed	: Contents of the dimm have been scrubbed.
- * dimm_locked		: Contents of the dimm cant be modified until CEC reboot
- * dimm_encrypted	: Contents of dimm are encrypted.
- * dimm_health		: Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
- * dimm_fuel_gauge	: Life remaining of DIMM as a percentage from 0-100
- */
-struct nd_papr_pdsm_health {
-	union {
-		struct {
-			__u32 extension_flags;
-			__u8 dimm_unarmed;
-			__u8 dimm_bad_shutdown;
-			__u8 dimm_bad_restore;
-			__u8 dimm_scrubbed;
-			__u8 dimm_locked;
-			__u8 dimm_encrypted;
-			__u16 dimm_health;
-
-			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
-			__u16 dimm_fuel_gauge;
-
-			/* Extension flag PDSM_DIMM_DSC_VALID */
-			__u64 dimm_dsc;
-		};
-		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-	};
-};
-
-/* Flags for injecting specific smart errors */
-#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
-#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
-
-struct nd_papr_pdsm_smart_inject {
-	union {
-		struct {
-			/* One or more of PDSM_SMART_INJECT_ */
-			__u32 flags;
-			__u8 fatal_enable;
-			__u8 unsafe_shutdown_enable;
-		};
-		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-	};
-};
-
-/*
- * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
- * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
- */
-enum papr_pdsm {
-	PAPR_PDSM_MIN = 0x0,
-	PAPR_PDSM_HEALTH,
-	PAPR_PDSM_SMART_INJECT,
-	PAPR_PDSM_MAX,
-};
-
-/* Maximal union that can hold all possible payload types */
-union nd_pdsm_payload {
-	struct nd_papr_pdsm_health health;
-	struct nd_papr_pdsm_smart_inject smart_inject;
-	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-} __packed;
-
-/*
- * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
- * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
- * that should always precede this struct when sent to papr_scm via CMD_CALL
- * interface.
- */
-struct nd_pkg_pdsm {
-	__s32 cmd_status;	/* Out: Sub-cmd status returned back */
-	__u16 reserved[2];	/* Ignored and to be set as '0' */
-	union nd_pdsm_payload payload;
-} __packed;
-
-#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index de4cf329cfb3..b7437c61a270 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -16,7 +16,8 @@
 #include <linux/nd.h>
 
 #include <asm/plpar_wrappers.h>
-#include <asm/papr_pdsm.h>
+#include <uapi/linux/papr_pdsm.h>
+#include <linux/papr_scm.h>
 #include <asm/mce.h>
 #include <asm/unaligned.h>
 
@@ -28,46 +29,6 @@
 	 (1ul << ND_CMD_SET_CONFIG_DATA) | \
 	 (1ul << ND_CMD_CALL))
 
-/* DIMM health bitmap bitmap indicators */
-/* SCM device is unable to persist memory contents */
-#define PAPR_PMEM_UNARMED                   (1ULL << (63 - 0))
-/* SCM device failed to persist memory contents */
-#define PAPR_PMEM_SHUTDOWN_DIRTY            (1ULL << (63 - 1))
-/* SCM device contents are persisted from previous IPL */
-#define PAPR_PMEM_SHUTDOWN_CLEAN            (1ULL << (63 - 2))
-/* SCM device contents are not persisted from previous IPL */
-#define PAPR_PMEM_EMPTY                     (1ULL << (63 - 3))
-/* SCM device memory life remaining is critically low */
-#define PAPR_PMEM_HEALTH_CRITICAL           (1ULL << (63 - 4))
-/* SCM device will be garded off next IPL due to failure */
-#define PAPR_PMEM_HEALTH_FATAL              (1ULL << (63 - 5))
-/* SCM contents cannot persist due to current platform health status */
-#define PAPR_PMEM_HEALTH_UNHEALTHY          (1ULL << (63 - 6))
-/* SCM device is unable to persist memory contents in certain conditions */
-#define PAPR_PMEM_HEALTH_NON_CRITICAL       (1ULL << (63 - 7))
-/* SCM device is encrypted */
-#define PAPR_PMEM_ENCRYPTED                 (1ULL << (63 - 8))
-/* SCM device has been scrubbed and locked */
-#define PAPR_PMEM_SCRUBBED_AND_LOCKED       (1ULL << (63 - 9))
-
-/* Bits status indicators for health bitmap indicating unarmed dimm */
-#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED |		\
-				PAPR_PMEM_HEALTH_UNHEALTHY)
-
-/* Bits status indicators for health bitmap indicating unflushed dimm */
-#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
-
-/* Bits status indicators for health bitmap indicating unrestored dimm */
-#define PAPR_PMEM_BAD_RESTORE_MASK  (PAPR_PMEM_EMPTY)
-
-/* Bit status indicators for smart event notification */
-#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
-				    PAPR_PMEM_HEALTH_FATAL |	\
-				    PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
-#define PAPR_SCM_PERF_STATS_VERSION 0x1
-
 /* Use bitblt method to override specific bits in the '_bitmap_' */
 #define BITBLT_BITMAP(_bitmap_, _mask_, _override_)		\
 	(((_bitmap_) & ~(_mask_)) | ((_mask_) & (_override_)))
diff --git a/include/linux/papr_scm.h b/include/linux/papr_scm.h
new file mode 100644
index 000000000000..f116e5ffef36
--- /dev/null
+++ b/include/linux/papr_scm.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __LINUX_PAPR_SCM_H
+#define __LINUX_PAPR_SCM_H
+
+/* DIMM health bitmap bitmap indicators */
+/* SCM device is unable to persist memory contents */
+#define PAPR_PMEM_UNARMED                   (1ULL << (63 - 0))
+/* SCM device failed to persist memory contents */
+#define PAPR_PMEM_SHUTDOWN_DIRTY            (1ULL << (63 - 1))
+/* SCM device contents are persisted from previous IPL */
+#define PAPR_PMEM_SHUTDOWN_CLEAN            (1ULL << (63 - 2))
+/* SCM device contents are not persisted from previous IPL */
+#define PAPR_PMEM_EMPTY                     (1ULL << (63 - 3))
+/* SCM device memory life remaining is critically low */
+#define PAPR_PMEM_HEALTH_CRITICAL           (1ULL << (63 - 4))
+/* SCM device will be garded off next IPL due to failure */
+#define PAPR_PMEM_HEALTH_FATAL              (1ULL << (63 - 5))
+/* SCM contents cannot persist due to current platform health status */
+#define PAPR_PMEM_HEALTH_UNHEALTHY          (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL       (1ULL << (63 - 7))
+/* SCM device is encrypted */
+#define PAPR_PMEM_ENCRYPTED                 (1ULL << (63 - 8))
+/* SCM device has been scrubbed and locked */
+#define PAPR_PMEM_SCRUBBED_AND_LOCKED       (1ULL << (63 - 9))
+
+#define PAPR_PMEM_SAVE_FAILED                (1ULL << (63 - 10))
+
+/* Bits status indicators for health bitmap indicating unarmed dimm */
+#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | PAPR_PMEM_HEALTH_UNHEALTHY)
+
+/* Bits status indicators for health bitmap indicating unflushed dimm */
+#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
+
+/* Bits status indicators for health bitmap indicating unrestored dimm */
+#define PAPR_PMEM_BAD_RESTORE_MASK  (PAPR_PMEM_EMPTY)
+
+/* Bit status indicators for smart event notification */
+#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
+				    PAPR_PMEM_HEALTH_FATAL    | \
+				    PAPR_PMEM_HEALTH_UNHEALTHY)
+
+#define PAPR_PMEM_SAVE_MASK                (PAPR_PMEM_SAVE_FAILED)
+
+#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
+#define PAPR_SCM_PERF_STATS_VERSION 0x1
+
+#endif /* __LINUX_PAPR_SCM_H */
diff --git a/include/uapi/linux/papr_pdsm.h b/include/uapi/linux/papr_pdsm.h
new file mode 100644
index 000000000000..1be9906f4540
--- /dev/null
+++ b/include/uapi/linux/papr_pdsm.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
+ *
+ * (C) Copyright IBM 2020-2021
+ *
+ * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
+ */
+
+#ifndef _UAPI_LINUX_PAPR_PDSM_H_
+#define _UAPI_LINUX_PAPR_PDSM_H_
+
+#include <linux/types.h>
+#include <linux/ndctl.h>
+
+/*
+ * PDSM Envelope:
+ *
+ * The ioctl ND_CMD_CALL exchange data between user-space and kernel via
+ * envelope which consists of 2 headers sections and payload sections as
+ * illustrated below:
+ *  +-----------------+---------------+---------------------------+
+ *  |   64-Bytes      |   8-Bytes     |       Max 184-Bytes       |
+ *  +-----------------+---------------+---------------------------+
+ *  | ND-HEADER       |  PDSM-HEADER  |      PDSM-PAYLOAD         |
+ *  +-----------------+---------------+---------------------------+
+ *  | nd_family       |               |                           |
+ *  | nd_size_out     | cmd_status    |                           |
+ *  | nd_size_in      | reserved      |     nd_pdsm_payload       |
+ *  | nd_command      | payload   --> |                           |
+ *  | nd_fw_size      |               |                           |
+ *  | nd_payload ---> |               |                           |
+ *  +---------------+-----------------+---------------------------+
+ *
+ * ND Header:
+ * This is the generic libnvdimm header described as 'struct nd_cmd_pkg'
+ * which is interpreted by libnvdimm before passed on to papr_scm. Important
+ * member fields used are:
+ * 'nd_family'		: (In) NVDIMM_FAMILY_PAPR_SCM
+ * 'nd_size_in'		: (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0)
+ * 'nd_size_out'        : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD
+ * 'nd_command'         : (In) One of PAPR_PDSM_XXX
+ * 'nd_fw_size'         : (Out) PDSM-HEADER + size of actual payload returned
+ *
+ * PDSM Header:
+ * This is papr-scm specific header that precedes the payload. This is defined
+ * as nd_cmd_pdsm_pkg.  Following fields aare available in this header:
+ *
+ * 'cmd_status'		: (Out) Errors if any encountered while servicing PDSM.
+ * 'reserved'		: Not used, reserved for future and should be set to 0.
+ * 'payload'            : A union of all the possible payload structs
+ *
+ * PDSM Payload:
+ *
+ * The layout of the PDSM Payload is defined by various structs shared between
+ * papr_scm and libndctl so that contents of payload can be interpreted. As such
+ * its defined as a union of all possible payload structs as
+ * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command'
+ * appropriate member of the union is accessed.
+ */
+
+/* Max payload size that we can handle */
+#define ND_PDSM_PAYLOAD_MAX_SIZE 184
+
+/* Max payload size that we can handle */
+#define ND_PDSM_HDR_SIZE \
+	(sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE)
+
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY       0
+#define PAPR_PDSM_DIMM_UNHEALTHY     1
+#define PAPR_PDSM_DIMM_CRITICAL      2
+#define PAPR_PDSM_DIMM_FATAL         3
+
+/* struct nd_papr_pdsm_health.extension_flags field flags */
+
+/* Indicate that the 'dimm_fuel_gauge' field is valid */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
+
+/* Indicate that the 'dimm_dsc' field is valid */
+#define PDSM_DIMM_DSC_VALID 2
+
+/*
+ * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
+ * Various flags indicate the health status of the dimm.
+ *
+ * extension_flags	: Any extension fields present in the struct.
+ * dimm_unarmed		: Dimm not armed. So contents wont persist.
+ * dimm_bad_shutdown	: Previous shutdown did not persist contents.
+ * dimm_bad_restore	: Contents from previous shutdown werent restored.
+ * dimm_scrubbed	: Contents of the dimm have been scrubbed.
+ * dimm_locked		: Contents of the dimm cant be modified until CEC reboot
+ * dimm_encrypted	: Contents of dimm are encrypted.
+ * dimm_health		: Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
+ * dimm_fuel_gauge	: Life remaining of DIMM as a percentage from 0-100
+ */
+struct nd_papr_pdsm_health {
+	union {
+		struct {
+			__u32 extension_flags;
+			__u8 dimm_unarmed;
+			__u8 dimm_bad_shutdown;
+			__u8 dimm_bad_restore;
+			__u8 dimm_scrubbed;
+			__u8 dimm_locked;
+			__u8 dimm_encrypted;
+			__u16 dimm_health;
+
+			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+			__u16 dimm_fuel_gauge;
+
+			/* Extension flag PDSM_DIMM_DSC_VALID */
+			__u64 dimm_dsc;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+	union {
+		struct {
+			/* One or more of PDSM_SMART_INJECT_ */
+			__u32 flags;
+			__u8 fatal_enable;
+			__u8 unsafe_shutdown_enable;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+/*
+ * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
+ * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
+ */
+enum papr_pdsm {
+	PAPR_PDSM_MIN = 0x0,
+	PAPR_PDSM_HEALTH,
+	PAPR_PDSM_SMART_INJECT,
+	PAPR_PDSM_MAX,
+};
+
+/* Maximal union that can hold all possible payload types */
+union nd_pdsm_payload {
+	struct nd_papr_pdsm_health health;
+	struct nd_papr_pdsm_smart_inject smart_inject;
+	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+} __packed;
+
+/*
+ * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
+ * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
+ * that should always precede this struct when sent to papr_scm via CMD_CALL
+ * interface.
+ */
+struct nd_pkg_pdsm {
+	__s32 cmd_status;	/* Out: Sub-cmd status returned back */
+	__u16 reserved[2];	/* Ignored and to be set as '0' */
+	union nd_pdsm_payload payload;
+} __packed;
+
+#endif /* _UAPI_LINUX_PAPR_PDSM_H_ */
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 45d42cd25e82..6622e8adbd11 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -13,6 +13,8 @@
 #include <nd-core.h>
 #include <linux/printk.h>
 #include <linux/seq_buf.h>
+#include <linux/papr_scm.h>
+#include <uapi/linux/papr_pdsm.h>
 
 #include "../watermark.h"
 #include "nfit_test.h"
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index b9b381021313..e18b3b006fa2 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -5,39 +5,6 @@
 #include <linux/platform_device.h>
 #include <linux/libnvdimm.h>
 
-/* SCM device is unable to persist memory contents */
-#define PAPR_PMEM_UNARMED                   (1ULL << (63 - 0))
-/* SCM device failed to persist memory contents */
-#define PAPR_PMEM_SHUTDOWN_DIRTY            (1ULL << (63 - 1))
-/* SCM device contents are not persisted from previous IPL */
-#define PAPR_PMEM_EMPTY                     (1ULL << (63 - 3))
-#define PAPR_PMEM_HEALTH_CRITICAL           (1ULL << (63 - 4))
-/* SCM device will be garded off next IPL due to failure */
-#define PAPR_PMEM_HEALTH_FATAL              (1ULL << (63 - 5))
-/* SCM contents cannot persist due to current platform health status */
-#define PAPR_PMEM_HEALTH_UNHEALTHY          (1ULL << (63 - 6))
-/* SCM device is unable to persist memory contents in certain conditions */
-#define PAPR_PMEM_HEALTH_NON_CRITICAL       (1ULL << (63 - 7))
-
-/* Bits status indicators for health bitmap indicating unarmed dimm */
-#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED |		\
-				PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_FAILED                (1ULL << (63 - 10))
-
-/* Bits status indicators for health bitmap indicating unflushed dimm */
-#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
-
-/* Bits status indicators for health bitmap indicating unrestored dimm */
-#define PAPR_PMEM_BAD_RESTORE_MASK  (PAPR_PMEM_EMPTY)
-
-/* Bit status indicators for smart event notification */
-#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
-				    PAPR_PMEM_HEALTH_FATAL |	\
-				    PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_MASK                (PAPR_PMEM_SAVE_FAILED)
-
 struct ndtest_config;
 
 struct ndtest_priv {
@@ -107,93 +74,6 @@ struct ndtest_region {
 	u8 range_index;
 };
 
-#define ND_PDSM_PAYLOAD_MAX_SIZE 184
-/*
- * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
- * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
- */
-enum papr_pdsm {
-	PAPR_PDSM_MIN = 0x0,
-	PAPR_PDSM_HEALTH,
-	PAPR_PDSM_SMART_INJECT,
-	PAPR_PDSM_MAX,
-};
-
-/* Various nvdimm health indicators */
-#define PAPR_PDSM_DIMM_HEALTHY       0
-#define PAPR_PDSM_DIMM_UNHEALTHY     1
-#define PAPR_PDSM_DIMM_CRITICAL      2
-#define PAPR_PDSM_DIMM_FATAL         3
-
-/* struct nd_papr_pdsm_health.extension_flags field flags */
-
-/* Indicate that the 'dimm_fuel_gauge' field is valid */
-#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
-
-/* Indicate that the 'dimm_dsc' field is valid */
-#define PDSM_DIMM_DSC_VALID 2
-
-/*
- * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
- * Various flags indicate the health status of the dimm.
- */
-struct nd_papr_pdsm_health {
-	union {
-		struct {
-			__u32 extension_flags;
-			__u8 dimm_unarmed;
-			__u8 dimm_bad_shutdown;
-			__u8 dimm_bad_restore;
-			__u8 dimm_scrubbed;
-			__u8 dimm_locked;
-			__u8 dimm_encrypted;
-			__u16 dimm_health;
-
-			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
-			__u16 dimm_fuel_gauge;
-
-			/* Extension flag PDSM_DIMM_DSC_VALID */
-			__u64 dimm_dsc;
-		};
-		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-	};
-};
-
-/* Flags for injecting specific smart errors */
-#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
-#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
-
-struct nd_papr_pdsm_smart_inject {
-	union {
-		struct {
-			/* One or more of PDSM_SMART_INJECT_ */
-			__u32 flags;
-			__u8 fatal_enable;
-			__u8 unsafe_shutdown_enable;
-		};
-		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-	};
-};
-
-/* Maximal union that can hold all possible payload types */
-union nd_pdsm_payload {
-	struct nd_papr_pdsm_health health;
-	struct nd_papr_pdsm_smart_inject smart_inject;
-	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-} __packed;
-
-/*
- * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
- * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
- * that should always precede this struct when sent to papr_scm via CMD_CALL
- * interface.
- */
-struct nd_pkg_pdsm {
-	__s32 cmd_status;	/* Out: Sub-cmd status returned back */
-	__u16 reserved[2];	/* Ignored and to be set as '0' */
-	union nd_pdsm_payload payload;
-} __packed;
-
 struct ndtest_config {
 	struct ndtest_dimm *dimms;
 	struct ndtest_region *regions;



^ permalink raw reply related

* [PATCH 1/5] s390/pci: refresh function handle in iomap
From: Niklas Schnelle @ 2021-09-06  9:49 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: linux-s390, Pierre Morel, Matthew Rosato, linux-kernel,
	Oliver O'Halloran, Linas Vepstas, linuxppc-dev
In-Reply-To: <20210906094927.524106-1-schnelle@linux.ibm.com>

The function handle of a PCI function is updated when disabling or
enabling it as well as when the function's availability changes or it
enters the error state.

Until now this only occurred either while there is no struct pci_dev
associated with the function yet or the function became unavailable.
This meant that leaving a stale function handle in the iomap either
didn't happen because there was no iomap yet or it lead to errors on PCI
access but so would the correct disabled function handle.

In the future a CLP Set PCI Function Disable/Enable cycle during PCI
device recovery may be done while the device is bound to a driver.  In
this case we must update the iomap associated with the now-stale
function handle to ensure that the resulting zPCI instruction references
an accurate function handle.

Since the function handle is accessed by the PCI accessor helpers
without locking use READ_ONCE()/WRITE_ONCE() to mark this access and
prevent compiler optimizations that would move the load/store.

With that infrastructure in place let's also properly update the
function handle in the existing cases. This makes sure that in the
future debugging of a zPCI function access through the handle will
show an up to date handle reducing the chance of confusion. Also it
makes sure we have one single place where a zPCI function handle is
updated after initialization.

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
 arch/s390/include/asm/pci.h |  1 +
 arch/s390/pci/pci.c         | 36 ++++++++++++++++++++++++++++++++----
 arch/s390/pci/pci_event.c   |  6 +++---
 arch/s390/pci/pci_insn.c    |  4 ++--
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index e4803ec51110..5e6cba22a801 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -211,6 +211,7 @@ int zpci_deconfigure_device(struct zpci_dev *zdev);
 int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 void zpci_remove_reserved_devices(void);
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
 
 /* CLP */
 int clp_setup_writeback_mio(void);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e7e6788d75a8..af22778551c1 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -481,6 +481,34 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
 	spin_unlock(&zpci_iomap_lock);
 }
 
+static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
+{
+	int bar, idx;
+
+	spin_lock(&zpci_iomap_lock);
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+		if (!zdev->bars[bar].size)
+			continue;
+		idx = zdev->bars[bar].map_idx;
+		if (!zpci_iomap_start[idx].count)
+			continue;
+		WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
+	}
+	spin_unlock(&zpci_iomap_lock);
+}
+
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
+{
+	if (!fh || zdev->fh == fh)
+		return;
+
+	zdev->fh = fh;
+	if (zpci_use_mio(zdev))
+		return;
+	if (zdev->has_resources && zdev_enabled(zdev))
+		zpci_do_update_iomap_fh(zdev, fh);
+}
+
 static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
 				    unsigned long size, unsigned long flags)
 {
@@ -668,7 +696,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
 	if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
 		rc = -EIO;
 	else
-		zdev->fh = fh;
+		zpci_update_fh(zdev, fh);
 	return rc;
 }
 
@@ -679,14 +707,14 @@ int zpci_disable_device(struct zpci_dev *zdev)
 
 	cc = clp_disable_fh(zdev, &fh);
 	if (!cc) {
-		zdev->fh = fh;
+		zpci_update_fh(zdev, fh);
 	} else if (cc == CLP_RC_SETPCIFN_ALRDY) {
 		pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
 			zdev->fid);
 		/* Function is already disabled - update handle */
 		rc = clp_refresh_fh(zdev->fid, &fh);
 		if (!rc) {
-			zdev->fh = fh;
+			zpci_update_fh(zdev, fh);
 			rc = -EINVAL;
 		}
 	} else {
@@ -768,7 +796,7 @@ int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
 {
 	int rc;
 
-	zdev->fh = fh;
+	zpci_update_fh(zdev, fh);
 	/* the PCI function will be scanned once function 0 appears */
 	if (!zdev->zbus->bus)
 		return 0;
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index c856f80cb21b..e868d996ec5b 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -76,7 +76,7 @@ void zpci_event_error(void *data)
 
 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
 {
-	zdev->fh = fh;
+	zpci_update_fh(zdev, fh);
 	/* Give the driver a hint that the function is
 	 * already unusable.
 	 */
@@ -117,7 +117,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		if (!zdev)
 			zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
 		else
-			zdev->fh = ccdf->fh;
+			zpci_update_fh(zdev, ccdf->fh);
 		break;
 	case 0x0303: /* Deconfiguration requested */
 		if (zdev) {
@@ -126,7 +126,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 			 */
 			if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
 				break;
-			zdev->fh = ccdf->fh;
+			zpci_update_fh(zdev, ccdf->fh);
 			zpci_deconfigure_device(zdev);
 		}
 		break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 2e43996159f0..28d863aaafea 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -163,7 +163,7 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
 			       unsigned long len)
 {
 	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
-	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+	u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
 
 	return __zpci_load(data, req, ZPCI_OFFSET(addr));
 }
@@ -244,7 +244,7 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
 				unsigned long len)
 {
 	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
-	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+	u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
 
 	return __zpci_store(data, req, ZPCI_OFFSET(addr));
 }
-- 
2.25.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox