LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 15/16] debug_vm_pgtable/savedwrite: Use savedwrite test with protnone ptes
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

Saved write support was added to track the write bit of a pte after marking the
pte protnone. This was done so that AUTONUMA can convert a write pte to protnone
and still track the old write bit. When converting it back we set the pte write
bit correctly thereby avoiding a write fault again.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 3e112d0ba1b2..eea62d5e503b 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -1006,8 +1006,8 @@ static int __init debug_vm_pgtable(void)
 	pud_leaf_tests(pud_aligned, prot);
 
 #ifdef CONFIG_NUMA_BALANCING
-	pte_savedwrite_tests(pte_aligned, prot);
-	pmd_savedwrite_tests(pmd_aligned, prot);
+	pte_savedwrite_tests(pte_aligned, protnone);
+	pmd_savedwrite_tests(pmd_aligned, protnone);
 #endif
 	pte_special_tests(pte_aligned, prot);
 	pte_protnone_tests(pte_aligned, protnone);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 16/16] debug_vm_pgtable/ppc64: Add a variant of pfn_pte/pmd
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

The tests do expect _PAGE_PTE bit set by different page table accessors.
This is not true for the kernel. Within the kernel, _PAGE_PTE bits are
usually set by set_pte_at(). To make the below tests work correctly add test
specific pfn_pte/pmd helpers that set _PAGE_PTE bit.

pte_t pte = pfn_pte(pfn, prot);
WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 65 +++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index eea62d5e503b..153c925b5273 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -31,6 +31,23 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline pte_t debug_vm_pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	pte_t pte = pfn_pte(pfn, pgprot);
+	return __pte(pte_val(pte) | _PAGE_PTE);
+
+}
+static inline pmd_t debug_vm_pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+	pmd_t pmd = pfn_pmd(pfn, pgprot);
+	return __pmd(pmd_val(pmd) | _PAGE_PTE);
+}
+#else
+#define debug_vm_pfn_pte(pfn, pgprot) pfn_pte(pfn, pgprot)
+#define debug_vm_pfn_pmd(pfn, pgprot) pfn_pmd(pfn, pgprot)
+#endif
+
 /*
  * Please refer Documentation/vm/arch_pgtable_helpers.rst for the semantics
  * expectations that are being validated here. All future changes in here
@@ -55,7 +72,7 @@
 
 static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE basic\n");
 	WARN_ON(!pte_same(pte, pte));
@@ -72,10 +89,10 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
 				      unsigned long pfn, unsigned long vaddr,
 				      pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE advanced\n");
-	pte = pfn_pte(pfn, prot);
+	pte = debug_vm_pfn_pte(pfn, prot);
 	set_pte_at(mm, vaddr, ptep, pte);
 	ptep_set_wrprotect(mm, vaddr, ptep);
 	pte = ptep_get(ptep);
@@ -85,7 +102,7 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
 	pte = ptep_get(ptep);
 	WARN_ON(!pte_none(pte));
 
-	pte = pfn_pte(pfn, prot);
+	pte = debug_vm_pfn_pte(pfn, prot);
 	pte = pte_wrprotect(pte);
 	pte = pte_mkclean(pte);
 	set_pte_at(mm, vaddr, ptep, pte);
@@ -113,7 +130,7 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
 #ifdef CONFIG_NUMA_BALANCING
 static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE saved write\n");
 	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
@@ -124,7 +141,7 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	if (!has_transparent_hugepage())
 		return;
@@ -160,7 +177,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 
 	pgtable_trans_huge_deposit(mm, pmdp, pgtable);
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd = pmd_mkhuge(debug_vm_pfn_pmd(pfn, prot));
 	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_set_wrprotect(mm, vaddr, pmdp);
 	pmd = READ_ONCE(*pmdp);
@@ -170,7 +187,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(!pmd_none(pmd));
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd = pmd_mkhuge(debug_vm_pfn_pmd(pfn, prot));
 	pmd = pmd_wrprotect(pmd);
 	pmd = pmd_mkclean(pmd);
 	set_pmd_at(mm, vaddr, pmdp, pmd);
@@ -184,7 +201,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(!pmd_none(pmd));
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd = pmd_mkhuge(debug_vm_pfn_pmd(pfn, prot));
 	pmd = pmd_mkyoung(pmd);
 	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_test_and_clear_young(vma, vaddr, pmdp);
@@ -198,7 +215,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 
 static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	pr_debug("Validating PMD leaf\n");
 	/*
@@ -230,7 +247,7 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_NUMA_BALANCING
 static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	pr_debug("Validating PMD saved write\n");
 	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
@@ -573,7 +590,7 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
 
 static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL))
 		return;
@@ -584,7 +601,7 @@ static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
 
 static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
 		return;
@@ -597,7 +614,7 @@ static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd_t pmd = pmd_mkhuge(debug_vm_pfn_pmd(pfn, prot));
 
 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
 		return;
@@ -613,7 +630,7 @@ static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { }
 #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
 static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE devmap\n");
 	WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
@@ -622,7 +639,7 @@ static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	pr_debug("Validating PMD devmap\n");
 	WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
@@ -651,7 +668,7 @@ static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
 
 static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
@@ -663,7 +680,7 @@ static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 
 static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
@@ -676,7 +693,7 @@ static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
@@ -688,7 +705,7 @@ static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 
 static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
 		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
@@ -711,7 +728,7 @@ static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot)
 	pte_t pte;
 
 	pr_debug("Validating PTE swap\n");
-	pte = pfn_pte(pfn, prot);
+	pte = debug_vm_pfn_pte(pfn, prot);
 	swp = __pte_to_swp_entry(pte);
 	pte = __swp_entry_to_pte(swp);
 	WARN_ON(pfn != pte_pfn(pte));
@@ -724,7 +741,7 @@ static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
 	pmd_t pmd;
 
 	pr_debug("Validating PMD swap\n");
-	pmd = pfn_pmd(pfn, prot);
+	pmd = debug_vm_pfn_pmd(pfn, prot);
 	swp = __pmd_to_swp_entry(pmd);
 	pmd = __swp_entry_to_pmd(swp);
 	WARN_ON(pfn != pmd_pfn(pmd));
@@ -794,7 +811,7 @@ static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte))));
 
 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
-	pte = pfn_pte(pfn, prot);
+	pte = debug_vm_pfn_pte(pfn, prot);
 
 	WARN_ON(!pte_huge(pte_mkhuge(pte)));
 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
@@ -874,7 +891,7 @@ static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot)
 	 * needs to return true. pmd_present() should be true whenever
 	 * pmd_trans_huge() returns true.
 	 */
-	pmd = pfn_pmd(pfn, prot);
+	pmd = debug_vm_pfn_pmd(pfn, prot);
 	WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));
 
 #ifndef __HAVE_ARCH_PMDP_INVALIDATE
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH 02/16] debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Christophe Leroy @ 2020-08-12  6:40 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev, Anshuman Khandual
In-Reply-To: <20200812063358.369514-2-aneesh.kumar@linux.ibm.com>



Le 12/08/2020 à 08:33, Aneesh Kumar K.V a écrit :
> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting that bit in
> random value.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>   mm/debug_vm_pgtable.c | 5 ++++-
>   1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 086309fb9b6f..4c32063a8acf 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -45,9 +45,12 @@
>    * pxx_clear() because of how dynamic page table folding works on s390. So
>    * while loading up the entries do not change the lower 4 bits. It does not
>    * have affect any other platform.
> + *
> + * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
>    */
>   #define S390_MASK_BITS	4
> -#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
> +#define PPC_MASK_BITS	2
> +#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)

Do you mean:

#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1, PPC_MASK_BITS | 
S390_MASK_BITS)

Christophe

>   #define RANDOM_NZVALUE	GENMASK(7, 0)
>   
>   static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
> 

^ permalink raw reply

* Re: [PATCH 02/16] debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Aneesh Kumar K.V @ 2020-08-12  6:43 UTC (permalink / raw)
  To: Christophe Leroy, linux-mm, akpm; +Cc: linuxppc-dev, Anshuman Khandual
In-Reply-To: <24b1e523-e87e-161b-3dc9-60bd11c8f461@csgroup.eu>

On 8/12/20 12:10 PM, Christophe Leroy wrote:
> 
> 
> Le 12/08/2020 à 08:33, Aneesh Kumar K.V a écrit :
>> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting 
>> that bit in
>> random value.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   mm/debug_vm_pgtable.c | 5 ++++-
>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>> index 086309fb9b6f..4c32063a8acf 100644
>> --- a/mm/debug_vm_pgtable.c
>> +++ b/mm/debug_vm_pgtable.c
>> @@ -45,9 +45,12 @@
>>    * pxx_clear() because of how dynamic page table folding works on 
>> s390. So
>>    * while loading up the entries do not change the lower 4 bits. It 
>> does not
>>    * have affect any other platform.
>> + *
>> + * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
>>    */
>>   #define S390_MASK_BITS    4
>> -#define RANDOM_ORVALUE    GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
>> +#define PPC_MASK_BITS    2
>> +#define RANDOM_ORVALUE    GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, 
>> S390_MASK_BITS)
> 
> Do you mean:
> 
> #define RANDOM_ORVALUE    GENMASK(BITS_PER_LONG - 1, PPC_MASK_BITS | 
> S390_MASK_BITS)


IIUC GENMASK(hi, low) generate a mask from hi to low bits. Since i want 
to avoid bit 62, I am forcing it to generate bits from (61, 4)


-aneesh

^ permalink raw reply

* Re: [PATCH] powerpc/papr_scm: Make access mode of 'perf_stats' attribute file to '0400'
From: Vaibhav Jain @ 2020-08-12  7:36 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev, linux-nvdimm
  Cc: Aneesh Kumar K . V, Santosh Sivaraj, Oliver O'Halloran,
	Dan Williams, Ira Weiny
In-Reply-To: <87wo26abmf.fsf@mpe.ellerman.id.au>

Hi Mpe,

Thanks for reviewing this patch. My responses below:

Michael Ellerman <mpe@ellerman.id.au> writes:

> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>> The newly introduced 'perf_stats' attribute uses the default access
>> mode of 0444 letting non-root users access performance stats of an
>> nvdimm and potentially force the kernel into issuing large number of
>> expensive HCALLs. Since the information exposed by this attribute
>> cannot be cached hence its better to ward of access to this attribute
>> from non-root users.
>>
>> Hence this patch updates the access-mode of 'perf_stats' sysfs
>> attribute file to 0400 to make it only readable to root-users.
>
> Or should we ratelimit it?
Ideal consumers of this data will be users with CAP_PERFMON or
CAP_SYS_ADMIN. Also they need up-to-date values for these performance stats
as these values can be time sensitive.

So rate limiting may not be a complete solution since a user running
'perf' might be throttled by another user who is simply reading the
sysfs file contents.

So instead of setting attribute mode to 0400, will add a check for
'perfmon_capable()' in perf_stats_show() denying read access to users
without CAP_PERFMON or CAP_SYS_ADMIN.


> Fixes: ??
Right. I will add this in v2.

>
>> Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
>
> cheers
>

-- 
Cheers
~ Vaibhav

^ permalink raw reply

* Re: [PATCH 01/16] powerpc/mm: Add DEBUG_VM WARN for pmd_clear
From: Anshuman Khandual @ 2020-08-12  7:46 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> With the hash page table, the kernel should not use pmd_clear for clearing
> huge pte entries. Add a DEBUG_VM WARN to catch the wrong usage.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

This particular change is very much powerpc specific. Hence please drop it from
the series which otherwise changes the page table test. Also, this series which
is not a RFC, still lacks a proper cover letter with diff stats, tree/tag on
which this applies, summary about the proposal etc. All those information will
be helpful in reviewing this series better. For now, assuming that this applies
cleanly on current master branch. But again, please do include a cover letter
in the next version.

> ---
>  arch/powerpc/include/asm/book3s/64/pgtable.h | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 6de56c3b33c4..079211968987 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -868,6 +868,13 @@ static inline bool pte_ci(pte_t pte)
>  
>  static inline void pmd_clear(pmd_t *pmdp)
>  {
> +	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
> +		/*
> +		 * Don't use this if we can possibly have a hash page table
> +		 * entry mapping this.
> +		 */
> +		WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
> +	}
>  	*pmdp = __pmd(0);
>  }
>  
> @@ -916,6 +923,13 @@ static inline int pmd_bad(pmd_t pmd)
>  
>  static inline void pud_clear(pud_t *pudp)
>  {
> +	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
> +		/*
> +		 * Don't use this if we can possibly have a hash page table
> +		 * entry mapping this.
> +		 */
> +		WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
> +	}
>  	*pudp = __pud(0);
>  }
>  
> 

^ permalink raw reply

* [PATCH 0/2] powerpc: unrel_branch_check.sh: enable llvm-objdump
From: Stephen Rothwell @ 2020-08-12  8:10 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Linux PowerPC List, Bill Wendling, Nicholas Piggin

These 2 patches enable this script to work properly when llvm-objtool
is being used.

They depend on my previos series that make this script suck less.

Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Bill Wendling <morbo@google.com>


^ permalink raw reply

* [PATCH 1/2] powerpc: unrel_branch_check.sh: use nm to find symbol value
From: Stephen Rothwell @ 2020-08-12  8:10 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Linux PowerPC List, Bill Wendling, Nicholas Piggin
In-Reply-To: <20200812081036.7969-1-sfr@canb.auug.org.au>

This is considerably faster then parsing the objdump asm output.  It will
also make the enabling of llvm-objdump a little easier.

Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Bill Wendling <morbo@google.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/Makefile.postlink           |  2 +-
 arch/powerpc/tools/unrel_branch_check.sh | 13 +++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 2268396ff4bb..a6c77f4d32b2 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -18,7 +18,7 @@ quiet_cmd_relocs_check = CHKREL  $@
 ifdef CONFIG_PPC_BOOK3S_64
       cmd_relocs_check =						\
 	$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \
-	$(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
+	$(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$(NM)" "$@"
 else
       cmd_relocs_check =						\
 	$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 70da90270c78..0369eb2e7e4b 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -5,18 +5,15 @@
 # This script checks the unrelocated code of a vmlinux for "suspicious"
 # branches to relocated code (head_64.S code).
 
-# Have Kbuild supply the path to objdump so we handle cross compilation.
+# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
 objdump="$1"
-vmlinux="$2"
+nm="$2"
+vmlinux="$3"
 
-#__end_interrupts should be located within the first 64K
 kstart=0xc000000000000000
-printf -v kend '0x%x' $(( kstart + 0x10000 ))
 
-end_intr=0x$(
-$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" 2>/dev/null |
-awk '$2 == "<__end_interrupts>:" { print $1 }'
-)
+end_intr=0x$($nm -p "$vmlinux" |
+	sed -E -n '/\s+[[:alpha:]]\s+__end_interrupts\s*$/{s///p;q}')
 if [ "$end_intr" = "0x" ]; then
 	exit 0
 fi
-- 
2.28.0


^ permalink raw reply related

* [PATCH 2/2] powerpc: unrel_branch_check.sh: enable the use of llvm-objdump v9, 10 or 11
From: Stephen Rothwell @ 2020-08-12  8:10 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Linux PowerPC List, Bill Wendling, Nicholas Piggin
In-Reply-To: <20200812081036.7969-1-sfr@canb.auug.org.au>

Currently, using llvm-objtool, this script just silently succeeds without
actually do the intended checking.  So this updates it to work properly.

Firstly, llvm-objdump does not add target symbol names to the end
of branches in its asm output, so we have to drop the branch to
__start_initialization_multiplatform using its address.

Secondly, v9 and 10 specify branch targets as .+<offset>, so we convert
those to actual addresses.

Thirdly, v10 and 11 error out on a vmlinux if given the -R option
complaining that it is "not a dynamic object".  The -R does not make
any difference to the asm output, so remove it.

Lastly, v11 produces asm that is very similar to Gnu objtool (at least
as far as branches are concerned), so no further changes are necessary
to make it work.

Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Bill Wendling <morbo@google.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/tools/unrel_branch_check.sh | 34 ++++++++++++++++++++----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 0369eb2e7e4b..8301efee1e6c 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -18,12 +18,16 @@ if [ "$end_intr" = "0x" ]; then
 	exit 0
 fi
 
-$objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
+# we know that there is a correct branch to
+# __start_initialization_multiplatform, so find its address
+# so we can exclude it.
+sim=0x$($nm -p "$vmlinux" |
+	sed -E -n '/\s+[[:alpha:]]\s+__start_initialization_multiplatform\s*$/{s///p;q}')
+
+$objdump -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
 sed -E -n '
 # match lines that start with a kernel address
 /^c[0-9a-f]*:\s*b/ {
-	# drop a target that we do not care about
-	/\<__start_initialization_multiplatform>/d
 	# drop branches via ctr or lr
 	/\<b.?.?(ct|l)r/d
 	# cope with some differences between Clang and GNU objdumps
@@ -33,14 +37,34 @@ sed -E -n '
 	s/\s0x/ /
 	s/://
 	# format for the loop below
-	s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:0x\3:\4/
+	s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:\3:\4/
 	# strip out condition registers
-	s/:0xcr[0-7],/:0x/
+	s/:cr[0-7],/:/
 	p
 }' | {
 
 all_good=true
 while IFS=: read -r from branch to sym; do
+	case "$to" in
+	c*)	to="0x$to"
+		;;
+	.+*)
+		to=${to#.+}
+		if [ "$branch" = 'b' ]; then
+			if (( to >= 0x2000000 )); then
+				to=$(( to - 0x4000000 ))
+			fi
+		elif (( to >= 0x8000 )); then
+			to=$(( to - 0x10000 ))
+		fi
+		printf -v to '0x%x' $(( "0x$from" + to ))
+		;;
+	*)	printf 'Unkown branch format\n'
+		;;
+	esac
+	if [ "$to" = "$sim" ]; then
+		continue
+	fi
 	if (( to > end_intr )); then
 		if $all_good; then
 			printf '%s\n' 'WARNING: Unrelocated relative branches'
-- 
2.28.0


^ permalink raw reply related

* Re: [PATCH v3 0/8] huge vmalloc mappings
From: Nicholas Piggin @ 2020-08-12  8:11 UTC (permalink / raw)
  To: Jonathan Cameron, Zefan Li
  Cc: linux-arch, Will Deacon, Catalin,  Marinas, x86, linux-kernel,
	linux-mm, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	Thomas Gleixner, linuxppc-dev, linux-arm-kernel
In-Reply-To: <d457aabc-9f58-f47e-f5fa-9539618b2759@huawei.com>

Excerpts from Zefan Li's message of August 12, 2020 11:07 am:
> On 2020/8/12 0:32, Jonathan Cameron wrote:
>> On Mon, 10 Aug 2020 12:27:24 +1000
>> Nicholas Piggin <npiggin@gmail.com> wrote:
>> 
>>> Not tested on x86 or arm64, would appreciate a quick test there so I can
>>> ask Andrew to put it in -mm. Other option is I can disable huge vmallocs
>>> for them for the time being.
>> 
>> Hi Nicholas,
>> 
>> For arm64 testing with a Kunpeng920.
>> 
>> I ran a quick sanity test with this series on top of mainline (yes mid merge window
>> so who knows what state is...).  Could I be missing some dependency?
>> 
>> Without them it boots, with them it doesn't.  Any immediate guesses?
>> 
> 
> I've already reported this bug in v2, and yeah I also tested it on arm64
> (not Kunpeng though), so looks like it still hasn't been fixed.

Huh, I thought I did fix it but seems not. vmap stacks shouldn't be 
big enough to use huge pages though, so I don't know what's going on
there. I'll dig around a bit more.

> 
> ...
>>>
>>> Since v2:
>>> - Rebased on vmalloc cleanups, split series into simpler pieces.
>>> - Fixed several compile errors and warnings
>>> - Keep the page array and accounting in small page units because
>>>   struct vm_struct is an interface (this should fix x86 vmap stack debug
>>>   assert). [Thanks Zefan]
> 
> though the changelog says it's fixed for x86.

Yes, my mistake that was supposed to say arm64.

Thanks,
Nick


^ permalink raw reply

* Re: [PATCH 02/16] debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Anshuman Khandual @ 2020-08-12  8:12 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-2-aneesh.kumar@linux.ibm.com>



On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting that bit in
> random value.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 086309fb9b6f..4c32063a8acf 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -45,9 +45,12 @@
>   * pxx_clear() because of how dynamic page table folding works on s390. So
>   * while loading up the entries do not change the lower 4 bits. It does not
>   * have affect any other platform.
> + *
> + * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
>   */

Please move and fold the above line with the existing paragraph.

>  #define S390_MASK_BITS	4
> -#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
> +#define PPC_MASK_BITS	2

s/PPC/PPC64/

> +#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)
>  #define RANDOM_NZVALUE	GENMASK(7, 0)
>  
>  static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
> 

With this change, RANDOM_ORVALUE will be (0x3ffffffffffffff0) which discards
both bit 63 and 62. If only bit 62 is to be avoided for ppc64 the mask should
be (0xbffffffffffffff0) instead. The following change on this patch should do
the trick.

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index eb059fef89c2..1499181fb0e9 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -45,13 +45,13 @@
  * entry type. But these bits might affect the ability to clear entries with
  * pxx_clear() because of how dynamic page table folding works on s390. So
  * while loading up the entries do not change the lower 4 bits. It does not
- * have affect any other platform.
- *
- * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
+ * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
+ * used to mark a pte entry.
  */
-#define S390_MASK_BITS 4
-#define PPC_MASK_BITS  2
-#define RANDOM_ORVALUE GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)
+#define S390_SKIP_MASK GENMASK(3, 0)
+#define PPC64_SKIP_MASK        GENMASK(62, 62)
+#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
+#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
 #define RANDOM_NZVALUE GENMASK(7, 0)
 
 static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)

^ permalink raw reply related

* Re: [PATCH 1/2] lockdep: improve current->(hard|soft)irqs_enabled synchronisation with actual irq state
From: Nicholas Piggin @ 2020-08-12  8:18 UTC (permalink / raw)
  To: peterz
  Cc: linux-arch, Alexey Kardashevskiy, Will Deacon, linux-kernel,
	Ingo Molnar, linuxppc-dev
In-Reply-To: <20200807111126.GI2674@hirez.programming.kicks-ass.net>

Excerpts from peterz@infradead.org's message of August 7, 2020 9:11 pm:
> 
> What's wrong with something like this?
> 
> AFAICT there's no reason to actually try and add IRQ tracing here, it's
> just a hand full of instructions at the most.

Because we may want to use that in other places as well, so it would
be nice to have tracing.

Hmm... also, I thought NMI context was free to call local_irq_save/restore
anyway so the bug would still be there in those cases?

Thanks,
Nick

> 
> ---
> 
> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> index 3a0db7b0b46e..6be22c1838e2 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -196,33 +196,6 @@ static inline bool arch_irqs_disabled(void)
>  		arch_local_irq_restore(flags);				\
>  	} while(0)
>  
> -#ifdef CONFIG_TRACE_IRQFLAGS
> -#define powerpc_local_irq_pmu_save(flags)			\
> -	 do {							\
> -		raw_local_irq_pmu_save(flags);			\
> -		trace_hardirqs_off();				\
> -	} while(0)
> -#define powerpc_local_irq_pmu_restore(flags)			\
> -	do {							\
> -		if (raw_irqs_disabled_flags(flags)) {		\
> -			raw_local_irq_pmu_restore(flags);	\
> -			trace_hardirqs_off();			\
> -		} else {					\
> -			trace_hardirqs_on();			\
> -			raw_local_irq_pmu_restore(flags);	\
> -		}						\
> -	} while(0)
> -#else
> -#define powerpc_local_irq_pmu_save(flags)			\
> -	do {							\
> -		raw_local_irq_pmu_save(flags);			\
> -	} while(0)
> -#define powerpc_local_irq_pmu_restore(flags)			\
> -	do {							\
> -		raw_local_irq_pmu_restore(flags);		\
> -	} while (0)
> -#endif  /* CONFIG_TRACE_IRQFLAGS */
> -
>  #endif /* CONFIG_PPC_BOOK3S */
>  
>  #ifdef CONFIG_PPC_BOOK3E
> diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
> index bc4bd19b7fc2..b357a35672b1 100644
> --- a/arch/powerpc/include/asm/local.h
> +++ b/arch/powerpc/include/asm/local.h
> @@ -32,9 +32,9 @@ static __inline__ void local_##op(long i, local_t *l)			\
>  {									\
>  	unsigned long flags;						\
>  									\
> -	powerpc_local_irq_pmu_save(flags);				\
> +	raw_powerpc_local_irq_pmu_save(flags);				\
>  	l->v c_op i;						\
> -	powerpc_local_irq_pmu_restore(flags);				\
> +	raw_powerpc_local_irq_pmu_restore(flags);				\
>  }
>  
>  #define LOCAL_OP_RETURN(op, c_op)					\
> @@ -43,9 +43,9 @@ static __inline__ long local_##op##_return(long a, local_t *l)		\
>  	long t;								\
>  	unsigned long flags;						\
>  									\
> -	powerpc_local_irq_pmu_save(flags);				\
> +	raw_powerpc_local_irq_pmu_save(flags);				\
>  	t = (l->v c_op a);						\
> -	powerpc_local_irq_pmu_restore(flags);				\
> +	raw_powerpc_local_irq_pmu_restore(flags);				\
>  									\
>  	return t;							\
>  }
> @@ -81,11 +81,11 @@ static __inline__ long local_cmpxchg(local_t *l, long o, long n)
>  	long t;
>  	unsigned long flags;
>  
> -	powerpc_local_irq_pmu_save(flags);
> +	raw_powerpc_local_irq_pmu_save(flags);
>  	t = l->v;
>  	if (t == o)
>  		l->v = n;
> -	powerpc_local_irq_pmu_restore(flags);
> +	raw_powerpc_local_irq_pmu_restore(flags);
>  
>  	return t;
>  }
> @@ -95,10 +95,10 @@ static __inline__ long local_xchg(local_t *l, long n)
>  	long t;
>  	unsigned long flags;
>  
> -	powerpc_local_irq_pmu_save(flags);
> +	raw_powerpc_local_irq_pmu_save(flags);
>  	t = l->v;
>  	l->v = n;
> -	powerpc_local_irq_pmu_restore(flags);
> +	raw_powerpc_local_irq_pmu_restore(flags);
>  
>  	return t;
>  }
> @@ -117,12 +117,12 @@ static __inline__ int local_add_unless(local_t *l, long a, long u)
>  	unsigned long flags;
>  	int ret = 0;
>  
> -	powerpc_local_irq_pmu_save(flags);
> +	raw_powerpc_local_irq_pmu_save(flags);
>  	if (l->v != u) {
>  		l->v += a;
>  		ret = 1;
>  	}
> -	powerpc_local_irq_pmu_restore(flags);
> +	raw_powerpc_local_irq_pmu_restore(flags);
>  
>  	return ret;
>  }
> 

^ permalink raw reply

* Re: [PATCH 02/16] debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Aneesh Kumar K.V @ 2020-08-12  8:25 UTC (permalink / raw)
  To: Anshuman Khandual, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <fcb44f1d-4131-9135-512e-11a5c00abcd9@arm.com>

On 8/12/20 1:42 PM, Anshuman Khandual wrote:
> 
> 
> On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
>> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting that bit in
>> random value.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   mm/debug_vm_pgtable.c | 5 ++++-
>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>> index 086309fb9b6f..4c32063a8acf 100644
>> --- a/mm/debug_vm_pgtable.c
>> +++ b/mm/debug_vm_pgtable.c
>> @@ -45,9 +45,12 @@
>>    * pxx_clear() because of how dynamic page table folding works on s390. So
>>    * while loading up the entries do not change the lower 4 bits. It does not
>>    * have affect any other platform.
>> + *
>> + * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
>>    */
> 
> Please move and fold the above line with the existing paragraph.
> 
>>   #define S390_MASK_BITS	4
>> -#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
>> +#define PPC_MASK_BITS	2
> 
> s/PPC/PPC64/
> 
>> +#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)
>>   #define RANDOM_NZVALUE	GENMASK(7, 0)
>>   
>>   static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
>>
> 
> With this change, RANDOM_ORVALUE will be (0x3ffffffffffffff0) which discards
> both bit 63 and 62. If only bit 62 is to be avoided for ppc64 the mask should
> be (0xbffffffffffffff0) instead. The following change on this patch should do
> the trick.
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index eb059fef89c2..1499181fb0e9 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -45,13 +45,13 @@
>    * entry type. But these bits might affect the ability to clear entries with
>    * pxx_clear() because of how dynamic page table folding works on s390. So
>    * while loading up the entries do not change the lower 4 bits. It does not
> - * have affect any other platform.
> - *
> - * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
> + * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
> + * used to mark a pte entry.
>    */
> -#define S390_MASK_BITS 4
> -#define PPC_MASK_BITS  2
> -#define RANDOM_ORVALUE GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)
> +#define S390_SKIP_MASK GENMASK(3, 0)
> +#define PPC64_SKIP_MASK        GENMASK(62, 62)
> +#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
> +#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
>   #define RANDOM_NZVALUE GENMASK(7, 0)
>   
>   static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
> 

I will switch to this.

-aneesh

^ permalink raw reply

* Re: [PATCH 01/16] powerpc/mm: Add DEBUG_VM WARN for pmd_clear
From: Aneesh Kumar K.V @ 2020-08-12  8:27 UTC (permalink / raw)
  To: Anshuman Khandual, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <8b951ede-d779-d18f-b2b8-d09f94af6822@arm.com>

On 8/12/20 1:16 PM, Anshuman Khandual wrote:
> On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
>> With the hash page table, the kernel should not use pmd_clear for clearing
>> huge pte entries. Add a DEBUG_VM WARN to catch the wrong usage.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> 
> This particular change is very much powerpc specific. Hence please drop it from
> the series which otherwise changes the page table test. Also, this series which
> is not a RFC, still lacks a proper cover letter with diff stats, tree/tag on
> which this applies, summary about the proposal etc. All those information will
> be helpful in reviewing this series better. For now, assuming that this applies
> cleanly on current master branch. But again, please do include a cover letter
> in the next version.


The patch series include all sort of fixes. There is no special theme 
for the series. So all that the cover letter would have is "fixes to 
make debug_vm_pgtable work on ppc64"

I tried to keep each patch simpler explaining why the current code is 
wrong.


> 
>> ---
>>   arch/powerpc/include/asm/book3s/64/pgtable.h | 14 ++++++++++++++
>>   1 file changed, 14 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 6de56c3b33c4..079211968987 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -868,6 +868,13 @@ static inline bool pte_ci(pte_t pte)
>>   
>>   static inline void pmd_clear(pmd_t *pmdp)
>>   {
>> +	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
>> +		/*
>> +		 * Don't use this if we can possibly have a hash page table
>> +		 * entry mapping this.
>> +		 */
>> +		WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
>> +	}
>>   	*pmdp = __pmd(0);
>>   }
>>   
>> @@ -916,6 +923,13 @@ static inline int pmd_bad(pmd_t pmd)
>>   
>>   static inline void pud_clear(pud_t *pudp)
>>   {
>> +	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
>> +		/*
>> +		 * Don't use this if we can possibly have a hash page table
>> +		 * entry mapping this.
>> +		 */
>> +		WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
>> +	}
>>   	*pudp = __pud(0);
>>   }
>>   
>>

-aneesh


^ permalink raw reply

* Re: [PATCH 03/16] debug_vm_pgtable/set_pte: Don't use set_pte_at to update an existing pte entry
From: Anshuman Khandual @ 2020-08-12  9:12 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-3-aneesh.kumar@linux.ibm.com>



On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> set_pte_at() should not be used to set a pte entry at locations that
> already holds a valid pte entry. Architectures like ppc64 don't do TLB
> invalidate in set_pte_at() and hence expect it to be used to set locations
> that are not a valid PTE.

Even though set_pte_at() is not really a arch page table helper and
very much arch specific, I just wonder why this deviation on ppc64
as compared to other platforms. Detecting such semantics variation
across platforms is an objective of this test.

As small nit.

Please follow the existing subject format for all patches in here.
It will improve readability and also help understand these changes
better, later on.

mm/debug_vm_pgtable: <Specify changes to an individual test>

> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 4c32063a8acf..02a7c20aa4a2 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -81,8 +81,6 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
>  	pte = ptep_get(ptep);
>  	WARN_ON(pte_write(pte));
>  
> -	pte = pfn_pte(pfn, prot);
> -	set_pte_at(mm, vaddr, ptep, pte);
>  	ptep_get_and_clear(mm, vaddr, ptep);
>  	pte = ptep_get(ptep);
>  	WARN_ON(!pte_none(pte));

This makes sense. But could you please fold this code stanza with
the previous one in order to imply that 'ptep' did have some valid
entry before being cleared and checked against pte_none().

> @@ -97,12 +95,14 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
>  	pte = ptep_get(ptep);
>  	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
>  
> -	pte = pfn_pte(pfn, prot);
> -	set_pte_at(mm, vaddr, ptep, pte);
>  	ptep_get_and_clear_full(mm, vaddr, ptep, 1);
>  	pte = ptep_get(ptep);
>  	WARN_ON(!pte_none(pte));

Same, please fold back.

>  
> +	/*
> +	 * We should clear pte before we do set_pte_at
> +	 */
> +	pte = ptep_get_and_clear(mm, vaddr, ptep);
>  	pte = pte_mkyoung(pte);
>  	set_pte_at(mm, vaddr, ptep, pte);
>  	ptep_test_and_clear_young(vma, vaddr, ptep);
>

The comment above should also explain details that are mentioned
in the commit message i.e how platforms such as ppc64 expects a
clear pte entry for set_pte_at() to work.

^ permalink raw reply

* Re: [PATCH 03/16] debug_vm_pgtable/set_pte: Don't use set_pte_at to update an existing pte entry
From: Aneesh Kumar K.V @ 2020-08-12  9:22 UTC (permalink / raw)
  To: Anshuman Khandual, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <94ed519a-2cf2-af50-82be-2a559dee7d86@arm.com>

On 8/12/20 2:42 PM, Anshuman Khandual wrote:
> 
> 
> On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
>> set_pte_at() should not be used to set a pte entry at locations that
>> already holds a valid pte entry. Architectures like ppc64 don't do TLB
>> invalidate in set_pte_at() and hence expect it to be used to set locations
>> that are not a valid PTE.
> 
> Even though set_pte_at() is not really a arch page table helper and
> very much arch specific, I just wonder why this deviation on ppc64
> as compared to other platforms. Detecting such semantics variation
> across platforms is an objective of this test.

Not sure what you mean by set_pte_at is not a page table helper. Generic 
kernel use that helper to set a pte entry.

Now w.r.t ppc64 behavior this was discussed multiple times. I guess 
Linux kernel always used set_pte_at on a none pte entry. We had some 
exceptions in the recent past. But all fixed when noticed.


383321ab8578dfe3bbcc0bc5604c0f8ae08a5c98
mm/hugetlb/migration: use set_huge_pte_at instead of set_pte_at

cee216a696b2004017a5ecb583366093d90b1568
mm/autonuma: don't use set_pte_at when updating protnone ptes

56eecdb912b536a4fa97fb5bfe5a940a54d79be6
mm: Use ptep/pmdp_set_numa() for updating _PAGE_NUMA bit

Yes. Having a testcase like this help

> 
> As small nit.
> 
> Please follow the existing subject format for all patches in here.
> It will improve readability and also help understand these changes
> better, later on.
> 
> mm/debug_vm_pgtable: <Specify changes to an individual test>
> 
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   mm/debug_vm_pgtable.c | 8 ++++----
>>   1 file changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>> index 4c32063a8acf..02a7c20aa4a2 100644
>> --- a/mm/debug_vm_pgtable.c
>> +++ b/mm/debug_vm_pgtable.c
>> @@ -81,8 +81,6 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
>>   	pte = ptep_get(ptep);
>>   	WARN_ON(pte_write(pte));
>>   
>> -	pte = pfn_pte(pfn, prot);
>> -	set_pte_at(mm, vaddr, ptep, pte);
>>   	ptep_get_and_clear(mm, vaddr, ptep);
>>   	pte = ptep_get(ptep);
>>   	WARN_ON(!pte_none(pte));
> 
> This makes sense. But could you please fold this code stanza with
> the previous one in order to imply that 'ptep' did have some valid
> entry before being cleared and checked against pte_none().
> 

will do that

>> @@ -97,12 +95,14 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
>>   	pte = ptep_get(ptep);
>>   	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
>>   
>> -	pte = pfn_pte(pfn, prot);
>> -	set_pte_at(mm, vaddr, ptep, pte);
>>   	ptep_get_and_clear_full(mm, vaddr, ptep, 1);
>>   	pte = ptep_get(ptep);
>>   	WARN_ON(!pte_none(pte));
> 
> Same, please fold back.
> 

ok


>> +	/*
>> +	 * We should clear pte before we do set_pte_at
>> +	 */
>> +	pte = ptep_get_and_clear(mm, vaddr, ptep);
>>   	pte = pte_mkyoung(pte);
>>   	set_pte_at(mm, vaddr, ptep, pte);
>>   	ptep_test_and_clear_young(vma, vaddr, ptep);
>>
> 
> The comment above should also explain details that are mentioned
> in the commit message i.e how platforms such as ppc64 expects a
> clear pte entry for set_pte_at() to work.
> 

I don't think it is specific to ppc64. There is nothing specific to 
ppc64 architecture in there. It is an optimization used in kernel to 
help architecture avoid TLB flush. I will update the comment as below


/* We should clear pte before we do set_pte_at so that set_pte_at don't 
find a valid pte at ptep *?

is that good?

-aneesh


^ permalink raw reply

* Re: [PATCH 04/16] debug_vm_pgtables/hugevmap: Use the arch helper to identify huge vmap support.
From: Anshuman Khandual @ 2020-08-12 10:30 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-4-aneesh.kumar@linux.ibm.com>


On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> ppc64 supports huge vmap only with radix translation. Hence use arch helper
> to determine the huge vmap support.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 02a7c20aa4a2..679bb3d289a3 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -206,7 +206,7 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
>  {
>  	pmd_t pmd;
>  
> -	if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
> +	if (!arch_ioremap_pmd_supported())
>  		return;
>  
>  	pr_debug("Validating PMD huge\n");
> 

Problem is arch_ioremap_pmd_supported() symbol which should also be
explicitly included via <linux/io.h>, is not available without the
config CONFIG_HAVE_ARCH_HUGE_VMAP. ioremap_pmd_enabled() should have
been better here and has a fallback for !CONFIG_HAVE_ARCH_HUGE_VMAP.
But then the symbol is local to that file. Unless we would like to
make ioremap_pxx_enabled generally available, the remaining option
would be to wrap pxx_huge_tests() with CONFIG_HAVE_ARCH_HUGE_VMAP.
Similar changes should also be done for pud_huge_tests() as well.

^ permalink raw reply

* Re: [PATCH 1/2] lockdep: improve current->(hard|soft)irqs_enabled synchronisation with actual irq state
From: peterz @ 2020-08-12 10:35 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: linux-arch, Alexey Kardashevskiy, Will Deacon, linux-kernel,
	Ingo Molnar, linuxppc-dev
In-Reply-To: <1597220073.mbvcty6ghk.astroid@bobo.none>

On Wed, Aug 12, 2020 at 06:18:28PM +1000, Nicholas Piggin wrote:
> Excerpts from peterz@infradead.org's message of August 7, 2020 9:11 pm:
> > 
> > What's wrong with something like this?
> > 
> > AFAICT there's no reason to actually try and add IRQ tracing here, it's
> > just a hand full of instructions at the most.
> 
> Because we may want to use that in other places as well, so it would
> be nice to have tracing.
> 
> Hmm... also, I thought NMI context was free to call local_irq_save/restore
> anyway so the bug would still be there in those cases?

NMI code has in_nmi() true, in which case the IRQ tracing is disabled
(except for x86 which has CONFIG_TRACE_IRQFLAGS_NMI).

^ permalink raw reply

* Re: [PATCH 05/16] debug_vm_pgtable/savedwrite: Enable savedwrite test with CONFIG_NUMA_BALANCING
From: Anshuman Khandual @ 2020-08-12 11:24 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-5-aneesh.kumar@linux.ibm.com>



On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> Saved write support was added to track the write bit of a pte after marking the
> pte protnone. This was done so that AUTONUMA can convert a write pte to protnone
> and still track the old write bit. When converting it back we set the pte write
> bit correctly thereby avoiding a write fault again. Hence enable the test only
> when CONFIG_NUMA_BALANCING is enabled.

This has not been a problem on architectures that do not define pxx_savedwrite()
as they fallback on standard pxx_write() helpers. But ppc64 defines a fall back
for pte_savedwrite() when !CONFIG_NUMA_BALANCING, which always returns false.
Agreed, it makes sense to add CONFIG_NUMA_BALANCING check. But a simple runtime
check will do as the functions are always visible/defined.

if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
	return;

> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 679bb3d289a3..de8a62d0a931 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -110,6 +110,7 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
>  	WARN_ON(pte_young(pte));
>  }
>  
> +#ifdef CONFIG_NUMA_BALANCING
>  static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
>  {
>  	pte_t pte = pfn_pte(pfn, prot);
> @@ -118,6 +119,8 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
>  	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
>  	WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
>  }
> +#endif
> +
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
>  {
> @@ -221,6 +224,7 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
>  	WARN_ON(!pmd_none(pmd));
>  }
>  
> +#ifdef CONFIG_NUMA_BALANCING
>  static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
>  {
>  	pmd_t pmd = pfn_pmd(pfn, prot);
> @@ -229,6 +233,7 @@ static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
>  	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
>  	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
>  }
> +#endif
>  
>  #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
>  static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot)
> @@ -1005,8 +1010,10 @@ static int __init debug_vm_pgtable(void)
>  	pmd_huge_tests(pmdp, pmd_aligned, prot);
>  	pud_huge_tests(pudp, pud_aligned, prot);
>  
> +#ifdef CONFIG_NUMA_BALANCING
>  	pte_savedwrite_tests(pte_aligned, prot);
>  	pmd_savedwrite_tests(pmd_aligned, prot);
> +#endif

BTW, config  wrappers should be avoided at the call site. If and when needed
a stub is defined for !CONFIG_XXX_XXX.

>  
>  	pte_unmap_unlock(ptep, ptl);
>  
> 

^ permalink raw reply

* Re: [PATCH 07/16] debug_vm_pgtable/THP: Mark the pte entry huge before using set_pud_at
From: Anshuman Khandual @ 2020-08-12 11:49 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-7-aneesh.kumar@linux.ibm.com>



On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> kernel expect entries to be marked huge before we use set_pud_at().
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index b6aca2526e01..cd609a212dd4 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -265,7 +265,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
>  				      unsigned long pfn, unsigned long vaddr,
>  				      pgprot_t prot)
>  {
> -	pud_t pud = pfn_pud(pfn, prot);
> +	pud_t pud;
>  
>  	if (!has_transparent_hugepage())
>  		return;
> @@ -274,25 +274,28 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
>  	/* Align the address wrt HPAGE_PUD_SIZE */
>  	vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE;
>  
> +	pud = pud_mkhuge(pfn_pud(pfn, prot));
>  	set_pud_at(mm, vaddr, pudp, pud);
>  	pudp_set_wrprotect(mm, vaddr, pudp);
>  	pud = READ_ONCE(*pudp);
>  	WARN_ON(pud_write(pud));
>  
>  #ifndef __PAGETABLE_PMD_FOLDED
> -	pud = pfn_pud(pfn, prot);
> +
> +	pud = pud_mkhuge(pfn_pud(pfn, prot));
>  	set_pud_at(mm, vaddr, pudp, pud);
>  	pudp_huge_get_and_clear(mm, vaddr, pudp);
>  	pud = READ_ONCE(*pudp);
>  	WARN_ON(!pud_none(pud));
>  
> -	pud = pfn_pud(pfn, prot);
> +	pud = pud_mkhuge(pfn_pud(pfn, prot));
>  	set_pud_at(mm, vaddr, pudp, pud);
>  	pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1);
>  	pud = READ_ONCE(*pudp);
>  	WARN_ON(!pud_none(pud));
>  #endif /* __PAGETABLE_PMD_FOLDED */
> -	pud = pfn_pud(pfn, prot);
> +
> +	pud = pud_mkhuge(pfn_pud(pfn, prot));
>  	pud = pud_wrprotect(pud);
>  	pud = pud_mkclean(pud);
>  	set_pud_at(mm, vaddr, pudp, pud);
> 

Looks very similar to the previous patch, hence please fold it back.
Seems fair enough that pxx_set_at() expects given entry to be huge.
But would need to run them across enabled platforms to be sure.

^ permalink raw reply

* [RFC PATCH v1 03/19] powerpc/ptrace: Consolidate reg index calculation
From: Christophe Leroy @ 2020-08-12 12:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, ldv,
	viro
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1597233555.git.christophe.leroy@csgroup.eu>

Today we have:

	#ifdef CONFIG_PPC32
		index = addr >> 2;
		if ((addr & 3) || child->thread.regs == NULL)
	#else
		index = addr >> 3;
		if ((addr & 7))
	#endif

sizeof(long) has value 4 for PPC32 and value 8 for PPC64.

Dividing by 4 is equivalent to >> 2 and dividing by 8 is equivalent
to >> 3.

And 3 and 7 are respectively (sizeof(long) - 1).

Use sizeof(long) to get rid of the #ifdef CONFIG_PPC32 and consolidate
the calculation and checking.

thread.regs have to be not NULL on both PPC32 and PPC64 so adding
that test on PPC64 is harmless.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/ptrace/ptrace.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index f6e51be47c6e..0b4645a7a1b4 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -55,14 +55,8 @@ long arch_ptrace(struct task_struct *child, long request,
 
 		ret = -EIO;
 		/* convert to index and check */
-#ifdef CONFIG_PPC32
-		index = addr >> 2;
-		if ((addr & 3) || (index > PT_FPSCR)
-		    || (child->thread.regs == NULL))
-#else
-		index = addr >> 3;
-		if ((addr & 7) || (index > PT_FPSCR))
-#endif
+		index = addr / sizeof(long);
+		if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR) || !child->thread.regs)
 			break;
 
 		CHECK_FULL_REGS(child->thread.regs);
@@ -90,14 +84,8 @@ long arch_ptrace(struct task_struct *child, long request,
 
 		ret = -EIO;
 		/* convert to index and check */
-#ifdef CONFIG_PPC32
-		index = addr >> 2;
-		if ((addr & 3) || (index > PT_FPSCR)
-		    || (child->thread.regs == NULL))
-#else
-		index = addr >> 3;
-		if ((addr & 7) || (index > PT_FPSCR))
-#endif
+		index = addr / sizeof(long);
+		if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR) || !child->thread.regs)
 			break;
 
 		CHECK_FULL_REGS(child->thread.regs);
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 04/19] powerpc/ptrace: Create ptrace_get_fpr() and ptrace_put_fpr()
From: Christophe Leroy @ 2020-08-12 12:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, ldv,
	viro
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1597233555.git.christophe.leroy@csgroup.eu>

On the same model as ptrace_get_reg() and ptrace_put_reg(),
create ptrace_get_fpr() and ptrace_put_fpr() to get/set
the floating points registers.

We move the boundary checkings in them.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/ptrace/Makefile      |  1 +
 arch/powerpc/kernel/ptrace/ptrace-decl.h |  4 +++
 arch/powerpc/kernel/ptrace/ptrace-fpu.c  | 40 ++++++++++++++++++++++++
 arch/powerpc/kernel/ptrace/ptrace.c      | 38 +++++++---------------
 4 files changed, 56 insertions(+), 27 deletions(-)
 create mode 100644 arch/powerpc/kernel/ptrace/ptrace-fpu.c

diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile
index c2f2402ebc8c..77abd1a5a508 100644
--- a/arch/powerpc/kernel/ptrace/Makefile
+++ b/arch/powerpc/kernel/ptrace/Makefile
@@ -6,6 +6,7 @@
 CFLAGS_ptrace-view.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
 obj-y				+= ptrace.o ptrace-view.o
+obj-y				+= ptrace-fpu.o
 obj-$(CONFIG_COMPAT)		+= ptrace32.o
 obj-$(CONFIG_VSX)		+= ptrace-vsx.o
 ifneq ($(CONFIG_VSX),y)
diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h
index 2ddc68412fa8..eafe5f0f6289 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-decl.h
+++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h
@@ -164,6 +164,10 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data);
 
 extern const struct user_regset_view user_ppc_native_view;
 
+/* ptrace-fpu */
+int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data);
+int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data);
+
 /* ptrace-(no)adv */
 void ppc_gethwdinfo(struct ppc_debug_info *dbginfo);
 int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
new file mode 100644
index 000000000000..8301cb52dd99
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data)
+{
+	unsigned int fpidx = index - PT_FPR0;
+
+	if (index > PT_FPSCR)
+		return -EIO;
+
+	flush_fp_to_thread(child);
+	if (fpidx < (PT_FPSCR - PT_FPR0))
+		memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long));
+	else
+		*data = child->thread.fp_state.fpscr;
+
+	return 0;
+}
+
+int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data)
+{
+	unsigned int fpidx = index - PT_FPR0;
+
+	if (index > PT_FPSCR)
+		return -EIO;
+
+	flush_fp_to_thread(child);
+	if (fpidx < (PT_FPSCR - PT_FPR0))
+		memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long));
+	else
+		child->thread.fp_state.fpscr = data;
+
+	return 0;
+}
+
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index 0b4645a7a1b4..3d44b73adb83 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -56,24 +56,17 @@ long arch_ptrace(struct task_struct *child, long request,
 		ret = -EIO;
 		/* convert to index and check */
 		index = addr / sizeof(long);
-		if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR) || !child->thread.regs)
+		if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
 			break;
 
 		CHECK_FULL_REGS(child->thread.regs);
-		if (index < PT_FPR0) {
+		if (index < PT_FPR0)
 			ret = ptrace_get_reg(child, (int) index, &tmp);
-			if (ret)
-				break;
-		} else {
-			unsigned int fpidx = index - PT_FPR0;
-
-			flush_fp_to_thread(child);
-			if (fpidx < (PT_FPSCR - PT_FPR0))
-				memcpy(&tmp, &child->thread.TS_FPR(fpidx),
-				       sizeof(long));
-			else
-				tmp = child->thread.fp_state.fpscr;
-		}
+		else
+			ret = ptrace_get_fpr(child, index, &tmp);
+
+		if (ret)
+			break;
 		ret = put_user(tmp, datalp);
 		break;
 	}
@@ -85,23 +78,14 @@ long arch_ptrace(struct task_struct *child, long request,
 		ret = -EIO;
 		/* convert to index and check */
 		index = addr / sizeof(long);
-		if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR) || !child->thread.regs)
+		if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
 			break;
 
 		CHECK_FULL_REGS(child->thread.regs);
-		if (index < PT_FPR0) {
+		if (index < PT_FPR0)
 			ret = ptrace_put_reg(child, index, data);
-		} else {
-			unsigned int fpidx = index - PT_FPR0;
-
-			flush_fp_to_thread(child);
-			if (fpidx < (PT_FPSCR - PT_FPR0))
-				memcpy(&child->thread.TS_FPR(fpidx), &data,
-				       sizeof(long));
-			else
-				child->thread.fp_state.fpscr = data;
-			ret = 0;
-		}
+		else
+			ret = ptrace_put_fpr(child, index, data);
 		break;
 	}
 
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 00/19] powerpc: Switch signal 32 to using user_access_begin() and friends
From: Christophe Leroy @ 2020-08-12 12:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, ldv,
	viro
  Cc: linuxppc-dev, linux-kernel

This series replaces copies to users by unsafe_put_user() and friends
with user_write_access_begin() dance in signal32.

The advantages are:
- No KUAP unlock/lock at every copy
- More readable code.
- Better generated code.

Copying Al Viro who did it on x86 and may have suggestions,
and Dmitry V. Levin who introduced put_compat_sigset()

Christophe Leroy (19):
  powerpc/signal: Move inline functions in signal.h
  powerpc/ptrace: Move declaration of ptrace_get_reg() and
    ptrace_set_reg()
  powerpc/ptrace: Consolidate reg index calculation
  powerpc/ptrace: Create ptrace_get_fpr() and ptrace_put_fpr()
  powerpc/signal: Don't manage floating point regs when no FPU
  powerpc/32s: Allow deselecting CONFIG_PPC_FPU on mpc832x
  powerpc/signal: Move access_ok() out of get_sigframe()
  powerpc/signal: Remove get_clean_sp()
  powerpc/signal: Call get_tm_stackpointer() from get_sigframe()
  powerpc/signal: Refactor bad frame logging
  powerpc/signal32: Simplify logging in handle_rt_signal32()
  powerpc/signal32: Regroup copies in save_user_regs() and
    save_tm_user_regs()
  powerpc/signal32: Create 'unsafe' versions of
    copy_[ck][fpr/vsx]_to_user()
  powerpc/signal32: Switch save_user_regs() and save_tm_user_regs() to
    user_access_begin() logic
  powerpc/signal32: Switch handle_signal32() to user_access_begin()
    logic
  powerpc/signal32: Switch handle_rt_signal32() to user_access_begin()
    logic
  signal: Add unsafe_put_compat_sigset()
  powerpc/signal32: Add and use unsafe_put_sigset_t()
  powerpc/signal32: Switch swap_context() to user_access_begin() logic

 arch/powerpc/Kconfig                     |   1 +
 arch/powerpc/include/asm/processor.h     |  16 +-
 arch/powerpc/include/asm/ptrace.h        |   6 -
 arch/powerpc/kernel/asm-offsets.c        |   2 +
 arch/powerpc/kernel/process.c            |   4 +
 arch/powerpc/kernel/ptrace/Makefile      |   3 +-
 arch/powerpc/kernel/ptrace/ptrace-decl.h |  21 ++
 arch/powerpc/kernel/ptrace/ptrace-fpu.c  |  40 +++
 arch/powerpc/kernel/ptrace/ptrace-view.c |   2 +
 arch/powerpc/kernel/ptrace/ptrace.c      |  54 +---
 arch/powerpc/kernel/signal.c             |  59 ++--
 arch/powerpc/kernel/signal.h             | 109 ++++++-
 arch/powerpc/kernel/signal_32.c          | 386 ++++++++++++-----------
 arch/powerpc/kernel/signal_64.c          |  19 +-
 arch/powerpc/kernel/traps.c              |   2 +
 arch/powerpc/platforms/Kconfig.cputype   |  15 +-
 include/linux/compat.h                   |  32 ++
 17 files changed, 462 insertions(+), 309 deletions(-)
 create mode 100644 arch/powerpc/kernel/ptrace/ptrace-fpu.c

-- 
2.25.0


^ permalink raw reply

* [RFC PATCH v1 02/19] powerpc/ptrace: Move declaration of ptrace_get_reg() and ptrace_set_reg()
From: Christophe Leroy @ 2020-08-12 12:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, ldv,
	viro
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1597233555.git.christophe.leroy@csgroup.eu>

ptrace_get_reg() and ptrace_set_reg() are only used internally by
ptrace.

Move them in arch/powerpc/kernel/ptrace/ptrace-decl.h

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/ptrace.h        | 6 ------
 arch/powerpc/kernel/ptrace/ptrace-decl.h | 3 +++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 155a197c0aa1..3c3cf537c3bf 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -171,12 +171,6 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 		set_thread_flag(TIF_NOERROR); \
 	} while(0)
 
-struct task_struct;
-extern int ptrace_get_reg(struct task_struct *task, int regno,
-			  unsigned long *data);
-extern int ptrace_put_reg(struct task_struct *task, int regno,
-			  unsigned long data);
-
 #define current_pt_regs() \
 	((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1)
 
diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h
index 67447a6197eb..2ddc68412fa8 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-decl.h
+++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h
@@ -159,6 +159,9 @@ int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
 
 /* ptrace-view */
 
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data);
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data);
+
 extern const struct user_regset_view user_ppc_native_view;
 
 /* ptrace-(no)adv */
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 01/19] powerpc/signal: Move inline functions in signal.h
From: Christophe Leroy @ 2020-08-12 12:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, ldv,
	viro
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1597233555.git.christophe.leroy@csgroup.eu>

To really be inlined, the functions needs to be defined in the
same C file as the caller, or in an included header.

Move functions from signal .c defined inline in signal.h

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Fixes: 3dd4eb83a9c0 ("powerpc: move common register copy functions from signal_32.c to signal.c")
---
 arch/powerpc/kernel/signal.c | 30 --------------------------
 arch/powerpc/kernel/signal.h | 41 +++++++++++++++++++++++++++++-------
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index d15a98c758b8..3b56db02b762 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -133,36 +133,6 @@ unsigned long copy_ckvsx_from_user(struct task_struct *task,
 	return 0;
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#else
-inline unsigned long copy_fpr_to_user(void __user *to,
-				      struct task_struct *task)
-{
-	return __copy_to_user(to, task->thread.fp_state.fpr,
-			      ELF_NFPREG * sizeof(double));
-}
-
-inline unsigned long copy_fpr_from_user(struct task_struct *task,
-					void __user *from)
-{
-	return __copy_from_user(task->thread.fp_state.fpr, from,
-			      ELF_NFPREG * sizeof(double));
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-inline unsigned long copy_ckfpr_to_user(void __user *to,
-					 struct task_struct *task)
-{
-	return __copy_to_user(to, task->thread.ckfp_state.fpr,
-			      ELF_NFPREG * sizeof(double));
-}
-
-inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
-						 void __user *from)
-{
-	return __copy_from_user(task->thread.ckfp_state.fpr, from,
-				ELF_NFPREG * sizeof(double));
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 #endif
 
 /* Log an error when sending an unhandled signal to a process. Controlled
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index d396efca4068..4626d39cc0f0 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -19,14 +19,6 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 			      struct task_struct *tsk);
 
-extern unsigned long copy_fpr_to_user(void __user *to,
-				      struct task_struct *task);
-extern unsigned long copy_ckfpr_to_user(void __user *to,
-					       struct task_struct *task);
-extern unsigned long copy_fpr_from_user(struct task_struct *task,
-					void __user *from);
-extern unsigned long copy_ckfpr_from_user(struct task_struct *task,
-						 void __user *from);
 extern unsigned long get_tm_stackpointer(struct task_struct *tsk);
 
 #ifdef CONFIG_VSX
@@ -38,6 +30,39 @@ extern unsigned long copy_vsx_from_user(struct task_struct *task,
 					void __user *from);
 extern unsigned long copy_ckvsx_from_user(struct task_struct *task,
 						 void __user *from);
+unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task);
+unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task);
+unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from);
+unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
+#else
+static inline unsigned long
+copy_fpr_to_user(void __user *to, struct task_struct *task)
+{
+	return __copy_to_user(to, task->thread.fp_state.fpr,
+			      ELF_NFPREG * sizeof(double));
+}
+
+static inline unsigned long
+copy_fpr_from_user(struct task_struct *task, void __user *from)
+{
+	return __copy_from_user(task->thread.fp_state.fpr, from,
+			      ELF_NFPREG * sizeof(double));
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task)
+{
+	return __copy_to_user(to, task->thread.ckfp_state.fpr,
+			      ELF_NFPREG * sizeof(double));
+}
+
+static inline unsigned long
+copy_ckfpr_from_user(struct task_struct *task, void __user *from)
+{
+	return __copy_from_user(task->thread.ckfp_state.fpr, from,
+				ELF_NFPREG * sizeof(double));
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 #endif
 
 #ifdef CONFIG_PPC64
-- 
2.25.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox