LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 05/16] debug_vm_pgtable/savedwrite: Enable savedwrite test with CONFIG_NUMA_BALANCING
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

Saved write support was added to track the write bit of a pte after marking the
pte protnone. This was done so that AUTONUMA can convert a write pte to protnone
and still track the old write bit. When converting it back we set the pte write
bit correctly thereby avoiding a write fault again. Hence enable the test only
when CONFIG_NUMA_BALANCING is enabled.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 679bb3d289a3..de8a62d0a931 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -110,6 +110,7 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
 	WARN_ON(pte_young(pte));
 }
 
+#ifdef CONFIG_NUMA_BALANCING
 static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 {
 	pte_t pte = pfn_pte(pfn, prot);
@@ -118,6 +119,8 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
 	WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
 }
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
 {
@@ -221,6 +224,7 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
 	WARN_ON(!pmd_none(pmd));
 }
 
+#ifdef CONFIG_NUMA_BALANCING
 static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 {
 	pmd_t pmd = pfn_pmd(pfn, prot);
@@ -229,6 +233,7 @@ static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
 	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
 }
+#endif
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot)
@@ -1005,8 +1010,10 @@ static int __init debug_vm_pgtable(void)
 	pmd_huge_tests(pmdp, pmd_aligned, prot);
 	pud_huge_tests(pudp, pud_aligned, prot);
 
+#ifdef CONFIG_NUMA_BALANCING
 	pte_savedwrite_tests(pte_aligned, prot);
 	pmd_savedwrite_tests(pmd_aligned, prot);
+#endif
 
 	pte_unmap_unlock(ptep, ptl);
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH 06/16] debug_vm_pgtable/THP: Mark the pte entry huge before using set_pmd_at
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

kernel expect entries to be marked huge before we use set_pmd_at().

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index de8a62d0a931..b6aca2526e01 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -149,7 +149,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 				      unsigned long pfn, unsigned long vaddr,
 				      pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd;
 
 	if (!has_transparent_hugepage())
 		return;
@@ -158,19 +158,19 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	/* Align the address wrt HPAGE_PMD_SIZE */
 	vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
 
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
 	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_set_wrprotect(mm, vaddr, pmdp);
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(pmd_write(pmd));
 
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
 	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_huge_get_and_clear(mm, vaddr, pmdp);
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(!pmd_none(pmd));
 
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
 	pmd = pmd_wrprotect(pmd);
 	pmd = pmd_mkclean(pmd);
 	set_pmd_at(mm, vaddr, pmdp, pmd);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 07/16] debug_vm_pgtable/THP: Mark the pte entry huge before using set_pud_at
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

kernel expect entries to be marked huge before we use set_pud_at().

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index b6aca2526e01..cd609a212dd4 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -265,7 +265,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 				      unsigned long pfn, unsigned long vaddr,
 				      pgprot_t prot)
 {
-	pud_t pud = pfn_pud(pfn, prot);
+	pud_t pud;
 
 	if (!has_transparent_hugepage())
 		return;
@@ -274,25 +274,28 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 	/* Align the address wrt HPAGE_PUD_SIZE */
 	vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE;
 
+	pud = pud_mkhuge(pfn_pud(pfn, prot));
 	set_pud_at(mm, vaddr, pudp, pud);
 	pudp_set_wrprotect(mm, vaddr, pudp);
 	pud = READ_ONCE(*pudp);
 	WARN_ON(pud_write(pud));
 
 #ifndef __PAGETABLE_PMD_FOLDED
-	pud = pfn_pud(pfn, prot);
+
+	pud = pud_mkhuge(pfn_pud(pfn, prot));
 	set_pud_at(mm, vaddr, pudp, pud);
 	pudp_huge_get_and_clear(mm, vaddr, pudp);
 	pud = READ_ONCE(*pudp);
 	WARN_ON(!pud_none(pud));
 
-	pud = pfn_pud(pfn, prot);
+	pud = pud_mkhuge(pfn_pud(pfn, prot));
 	set_pud_at(mm, vaddr, pudp, pud);
 	pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1);
 	pud = READ_ONCE(*pudp);
 	WARN_ON(!pud_none(pud));
 #endif /* __PAGETABLE_PMD_FOLDED */
-	pud = pfn_pud(pfn, prot);
+
+	pud = pud_mkhuge(pfn_pud(pfn, prot));
 	pud = pud_wrprotect(pud);
 	pud = pud_mkclean(pud);
 	set_pud_at(mm, vaddr, pudp, pud);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 08/16] debug_vm_pgtable/set_pmd: Don't use set_pmd_at to update an existing pmd entry
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

set_pmd_at() should not be used to set a pte entry at locations that
already holds a valid pte entry. Architectures like ppc64 don't do TLB
invalidate in set_pmd_at() and hence expect it to be used to set locations
that are not a valid PTE.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index cd609a212dd4..60bf876081b8 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -164,8 +164,6 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(pmd_write(pmd));
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
-	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_huge_get_and_clear(mm, vaddr, pmdp);
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(!pmd_none(pmd));
@@ -180,12 +178,11 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd)));
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
-	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_huge_get_and_clear_full(vma, vaddr, pmdp, 1);
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(!pmd_none(pmd));
 
+	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
 	pmd = pmd_mkyoung(pmd);
 	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_test_and_clear_young(vma, vaddr, pmdp);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 09/16] debug_vm_pgtable/set_pud: Don't use set_pud_at to update an existing pud entry
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

set_pud_at() should not be used to set a pte entry at locations that
already holds a valid pte entry. Architectures like ppc64 don't do TLB
invalidate in set_pud_at() and hence expect it to be used to set locations
that are not a valid PTE.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 60bf876081b8..644d28861ce9 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -278,9 +278,6 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 	WARN_ON(pud_write(pud));
 
 #ifndef __PAGETABLE_PMD_FOLDED
-
-	pud = pud_mkhuge(pfn_pud(pfn, prot));
-	set_pud_at(mm, vaddr, pudp, pud);
 	pudp_huge_get_and_clear(mm, vaddr, pudp);
 	pud = READ_ONCE(*pudp);
 	WARN_ON(!pud_none(pud));
@@ -302,6 +299,11 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 	pud = READ_ONCE(*pudp);
 	WARN_ON(!(pud_write(pud) && pud_dirty(pud)));
 
+	pudp_huge_get_and_clear_full(vma, vaddr, pudp, 1);
+	pud = READ_ONCE(*pudp);
+	WARN_ON(!pud_none(pud));
+
+	pud = pud_mkhuge(pfn_pud(pfn, prot));
 	pud = pud_mkyoung(pud);
 	set_pud_at(mm, vaddr, pudp, pud);
 	pudp_test_and_clear_young(vma, vaddr, pudp);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 10/16] debug_vm_pgtable/thp: Use page table depost/withdraw with THP
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

Architectures like ppc64 use deposited page table while updating the huge pte
entries.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 644d28861ce9..48475d288df1 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -147,7 +147,7 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
 static void __init pmd_advanced_tests(struct mm_struct *mm,
 				      struct vm_area_struct *vma, pmd_t *pmdp,
 				      unsigned long pfn, unsigned long vaddr,
-				      pgprot_t prot)
+				      pgprot_t prot, pgtable_t pgtable)
 {
 	pmd_t pmd;
 
@@ -158,6 +158,8 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	/* Align the address wrt HPAGE_PMD_SIZE */
 	vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
 
+	pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+
 	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
 	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_set_wrprotect(mm, vaddr, pmdp);
@@ -188,6 +190,8 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	pmdp_test_and_clear_young(vma, vaddr, pmdp);
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(pmd_young(pmd));
+
+	pgtable = pgtable_trans_huge_withdraw(mm, pmdp);
 }
 
 static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
@@ -1002,7 +1006,7 @@ static int __init debug_vm_pgtable(void)
 	pgd_clear_tests(mm, pgdp);
 
 	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
-	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot);
+	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
 	pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
 	hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH 11/16] debug_vm_pgtable/locks: Move non page table modifying test together
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

This will help in adding proper locks in a later patch

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 53 +++++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 48475d288df1..90e9c2d3a092 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -979,7 +979,7 @@ static int __init debug_vm_pgtable(void)
 	p4dp = p4d_alloc(mm, pgdp, vaddr);
 	pudp = pud_alloc(mm, p4dp, vaddr);
 	pmdp = pmd_alloc(mm, pudp, vaddr);
-	ptep = pte_alloc_map_lock(mm, pmdp, vaddr, &ptl);
+	ptep = pte_alloc_map(mm, pmdp, vaddr);
 
 	/*
 	 * Save all the page table page addresses as the page table
@@ -999,35 +999,13 @@ static int __init debug_vm_pgtable(void)
 	p4d_basic_tests(p4d_aligned, prot);
 	pgd_basic_tests(pgd_aligned, prot);
 
-	pte_clear_tests(mm, ptep, vaddr);
-	pmd_clear_tests(mm, pmdp);
-	pud_clear_tests(mm, pudp);
-	p4d_clear_tests(mm, p4dp);
-	pgd_clear_tests(mm, pgdp);
-
-	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
-	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
-	pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
-	hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
-
 	pmd_leaf_tests(pmd_aligned, prot);
 	pud_leaf_tests(pud_aligned, prot);
 
-	pmd_huge_tests(pmdp, pmd_aligned, prot);
-	pud_huge_tests(pudp, pud_aligned, prot);
-
 #ifdef CONFIG_NUMA_BALANCING
 	pte_savedwrite_tests(pte_aligned, prot);
 	pmd_savedwrite_tests(pmd_aligned, prot);
 #endif
-
-	pte_unmap_unlock(ptep, ptl);
-
-	pmd_populate_tests(mm, pmdp, saved_ptep);
-	pud_populate_tests(mm, pudp, saved_pmdp);
-	p4d_populate_tests(mm, p4dp, saved_pudp);
-	pgd_populate_tests(mm, pgdp, saved_p4dp);
-
 	pte_special_tests(pte_aligned, prot);
 	pte_protnone_tests(pte_aligned, protnone);
 	pmd_protnone_tests(pmd_aligned, protnone);
@@ -1045,11 +1023,38 @@ static int __init debug_vm_pgtable(void)
 	pmd_swap_tests(pmd_aligned, prot);
 
 	swap_migration_tests();
-	hugetlb_basic_tests(pte_aligned, prot);
 
 	pmd_thp_tests(pmd_aligned, prot);
 	pud_thp_tests(pud_aligned, prot);
 
+	/*
+	 * Page table modifying tests
+	 */
+	pte_clear_tests(mm, ptep, vaddr);
+	pmd_clear_tests(mm, pmdp);
+	pud_clear_tests(mm, pudp);
+	p4d_clear_tests(mm, p4dp);
+	pgd_clear_tests(mm, pgdp);
+
+	ptep = pte_alloc_map_lock(mm, pmdp, vaddr, &ptl);
+	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
+	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
+	pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
+	hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
+
+
+	pmd_huge_tests(pmdp, pmd_aligned, prot);
+	pud_huge_tests(pudp, pud_aligned, prot);
+
+	pte_unmap_unlock(ptep, ptl);
+
+	pmd_populate_tests(mm, pmdp, saved_ptep);
+	pud_populate_tests(mm, pudp, saved_pmdp);
+	p4d_populate_tests(mm, p4dp, saved_pudp);
+	pgd_populate_tests(mm, pgdp, saved_p4dp);
+
+	hugetlb_basic_tests(pte_aligned, prot);
+
 	p4d_free(mm, saved_p4dp);
 	pud_free(mm, saved_pudp);
 	pmd_free(mm, saved_pmdp);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 12/16] debug_vm_pgtable/locks: Take correct page table lock
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

Make sure we call pte accessors with correct lock held.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 90e9c2d3a092..061c19bba7f0 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -1027,33 +1027,39 @@ static int __init debug_vm_pgtable(void)
 	pmd_thp_tests(pmd_aligned, prot);
 	pud_thp_tests(pud_aligned, prot);
 
+	hugetlb_basic_tests(pte_aligned, prot);
+
 	/*
 	 * Page table modifying tests
 	 */
-	pte_clear_tests(mm, ptep, vaddr);
-	pmd_clear_tests(mm, pmdp);
-	pud_clear_tests(mm, pudp);
-	p4d_clear_tests(mm, p4dp);
-	pgd_clear_tests(mm, pgdp);
 
 	ptep = pte_alloc_map_lock(mm, pmdp, vaddr, &ptl);
+	pte_clear_tests(mm, ptep, vaddr);
 	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
-	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
-	pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
-	hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
-
+	pte_unmap_unlock(ptep, ptl);
 
+	ptl = pmd_lock(mm, pmdp);
+	pmd_clear_tests(mm, pmdp);
+	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
 	pmd_huge_tests(pmdp, pmd_aligned, prot);
+	pmd_populate_tests(mm, pmdp, saved_ptep);
+	spin_unlock(ptl);
+
+	ptl = pud_lock(mm, pudp);
+	pud_clear_tests(mm, pudp);
+	pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
 	pud_huge_tests(pudp, pud_aligned, prot);
+	pud_populate_tests(mm, pudp, saved_pmdp);
+	spin_unlock(ptl);
 
-	pte_unmap_unlock(ptep, ptl);
+	//hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
 
-	pmd_populate_tests(mm, pmdp, saved_ptep);
-	pud_populate_tests(mm, pudp, saved_pmdp);
+	spin_lock(&mm->page_table_lock);
+	p4d_clear_tests(mm, p4dp);
+	pgd_clear_tests(mm, pgdp);
 	p4d_populate_tests(mm, p4dp, saved_pudp);
 	pgd_populate_tests(mm, pgdp, saved_p4dp);
-
-	hugetlb_basic_tests(pte_aligned, prot);
+	spin_unlock(&mm->page_table_lock);
 
 	p4d_free(mm, saved_p4dp);
 	pud_free(mm, saved_pudp);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 13/16] debug_vm_pgtable/pmd_clear: Don't use pmd/pud_clear on pte entries
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

pmd_clear() should not be used to clear pmd level pte entries.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 061c19bba7f0..529892b9be2f 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -191,6 +191,8 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(pmd_young(pmd));
 
+	/*  Clear the pte entries  */
+	pmdp_huge_get_and_clear(mm, vaddr, pmdp);
 	pgtable = pgtable_trans_huge_withdraw(mm, pmdp);
 }
 
@@ -313,6 +315,8 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 	pudp_test_and_clear_young(vma, vaddr, pudp);
 	pud = READ_ONCE(*pudp);
 	WARN_ON(pud_young(pud));
+
+	pudp_huge_get_and_clear(mm, vaddr, pudp);
 }
 
 static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
@@ -431,8 +435,6 @@ static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp,
 	 * This entry points to next level page table page.
 	 * Hence this must not qualify as pud_bad().
 	 */
-	pmd_clear(pmdp);
-	pud_clear(pudp);
 	pud_populate(mm, pudp, pmdp);
 	pud = READ_ONCE(*pudp);
 	WARN_ON(pud_bad(pud));
@@ -564,7 +566,6 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
 	 * This entry points to next level page table page.
 	 * Hence this must not qualify as pmd_bad().
 	 */
-	pmd_clear(pmdp);
 	pmd_populate(mm, pmdp, pgtable);
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(pmd_bad(pmd));
-- 
2.26.2


^ permalink raw reply related

* [PATCH 14/16] debug_vm_pgtable/hugetlb: Disable hugetlb test on ppc64
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

The seems to be missing quite a lot of details w.r.t allocating
the correct pgtable_t page (huge_pte_alloc()), holding the right
lock (huge_pte_lock()) etc. The vma used is also not a hugetlb VMA.

ppc64 do have runtime checks within CONFIG_DEBUG_VM for most of these.
Hence disable the test on ppc64.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 529892b9be2f..3e112d0ba1b2 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -800,6 +800,7 @@ static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
 }
 
+#ifndef CONFIG_PPC_BOOK3S_64
 static void __init hugetlb_advanced_tests(struct mm_struct *mm,
 					  struct vm_area_struct *vma,
 					  pte_t *ptep, unsigned long pfn,
@@ -842,6 +843,7 @@ static void __init hugetlb_advanced_tests(struct mm_struct *mm,
 	pte = huge_ptep_get(ptep);
 	WARN_ON(!(huge_pte_write(pte) && huge_pte_dirty(pte)));
 }
+#endif
 #else  /* !CONFIG_HUGETLB_PAGE */
 static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { }
 static void __init hugetlb_advanced_tests(struct mm_struct *mm,
@@ -1053,7 +1055,9 @@ static int __init debug_vm_pgtable(void)
 	pud_populate_tests(mm, pudp, saved_pmdp);
 	spin_unlock(ptl);
 
-	//hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
+#ifndef CONFIG_PPC_BOOK3S_64
+	hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
+#endif
 
 	spin_lock(&mm->page_table_lock);
 	p4d_clear_tests(mm, p4dp);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 15/16] debug_vm_pgtable/savedwrite: Use savedwrite test with protnone ptes
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

Saved write support was added to track the write bit of a pte after marking the
pte protnone. This was done so that AUTONUMA can convert a write pte to protnone
and still track the old write bit. When converting it back we set the pte write
bit correctly thereby avoiding a write fault again.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 3e112d0ba1b2..eea62d5e503b 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -1006,8 +1006,8 @@ static int __init debug_vm_pgtable(void)
 	pud_leaf_tests(pud_aligned, prot);
 
 #ifdef CONFIG_NUMA_BALANCING
-	pte_savedwrite_tests(pte_aligned, prot);
-	pmd_savedwrite_tests(pmd_aligned, prot);
+	pte_savedwrite_tests(pte_aligned, protnone);
+	pmd_savedwrite_tests(pmd_aligned, protnone);
 #endif
 	pte_special_tests(pte_aligned, prot);
 	pte_protnone_tests(pte_aligned, protnone);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 16/16] debug_vm_pgtable/ppc64: Add a variant of pfn_pte/pmd
From: Aneesh Kumar K.V @ 2020-08-12  6:33 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linuxppc-dev, Aneesh Kumar K.V, Anshuman Khandual
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

The tests do expect _PAGE_PTE bit set by different page table accessors.
This is not true for the kernel. Within the kernel, _PAGE_PTE bits are
usually set by set_pte_at(). To make the below tests work correctly add test
specific pfn_pte/pmd helpers that set _PAGE_PTE bit.

pte_t pte = pfn_pte(pfn, prot);
WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 65 +++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index eea62d5e503b..153c925b5273 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -31,6 +31,23 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline pte_t debug_vm_pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	pte_t pte = pfn_pte(pfn, pgprot);
+	return __pte(pte_val(pte) | _PAGE_PTE);
+
+}
+static inline pmd_t debug_vm_pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+	pmd_t pmd = pfn_pmd(pfn, pgprot);
+	return __pmd(pmd_val(pmd) | _PAGE_PTE);
+}
+#else
+#define debug_vm_pfn_pte(pfn, pgprot) pfn_pte(pfn, pgprot)
+#define debug_vm_pfn_pmd(pfn, pgprot) pfn_pmd(pfn, pgprot)
+#endif
+
 /*
  * Please refer Documentation/vm/arch_pgtable_helpers.rst for the semantics
  * expectations that are being validated here. All future changes in here
@@ -55,7 +72,7 @@
 
 static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE basic\n");
 	WARN_ON(!pte_same(pte, pte));
@@ -72,10 +89,10 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
 				      unsigned long pfn, unsigned long vaddr,
 				      pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE advanced\n");
-	pte = pfn_pte(pfn, prot);
+	pte = debug_vm_pfn_pte(pfn, prot);
 	set_pte_at(mm, vaddr, ptep, pte);
 	ptep_set_wrprotect(mm, vaddr, ptep);
 	pte = ptep_get(ptep);
@@ -85,7 +102,7 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
 	pte = ptep_get(ptep);
 	WARN_ON(!pte_none(pte));
 
-	pte = pfn_pte(pfn, prot);
+	pte = debug_vm_pfn_pte(pfn, prot);
 	pte = pte_wrprotect(pte);
 	pte = pte_mkclean(pte);
 	set_pte_at(mm, vaddr, ptep, pte);
@@ -113,7 +130,7 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
 #ifdef CONFIG_NUMA_BALANCING
 static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE saved write\n");
 	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
@@ -124,7 +141,7 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	if (!has_transparent_hugepage())
 		return;
@@ -160,7 +177,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 
 	pgtable_trans_huge_deposit(mm, pmdp, pgtable);
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd = pmd_mkhuge(debug_vm_pfn_pmd(pfn, prot));
 	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_set_wrprotect(mm, vaddr, pmdp);
 	pmd = READ_ONCE(*pmdp);
@@ -170,7 +187,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(!pmd_none(pmd));
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd = pmd_mkhuge(debug_vm_pfn_pmd(pfn, prot));
 	pmd = pmd_wrprotect(pmd);
 	pmd = pmd_mkclean(pmd);
 	set_pmd_at(mm, vaddr, pmdp, pmd);
@@ -184,7 +201,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 	pmd = READ_ONCE(*pmdp);
 	WARN_ON(!pmd_none(pmd));
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd = pmd_mkhuge(debug_vm_pfn_pmd(pfn, prot));
 	pmd = pmd_mkyoung(pmd);
 	set_pmd_at(mm, vaddr, pmdp, pmd);
 	pmdp_test_and_clear_young(vma, vaddr, pmdp);
@@ -198,7 +215,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 
 static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	pr_debug("Validating PMD leaf\n");
 	/*
@@ -230,7 +247,7 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_NUMA_BALANCING
 static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	pr_debug("Validating PMD saved write\n");
 	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
@@ -573,7 +590,7 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
 
 static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL))
 		return;
@@ -584,7 +601,7 @@ static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
 
 static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
 		return;
@@ -597,7 +614,7 @@ static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd_t pmd = pmd_mkhuge(debug_vm_pfn_pmd(pfn, prot));
 
 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
 		return;
@@ -613,7 +630,7 @@ static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { }
 #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
 static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE devmap\n");
 	WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
@@ -622,7 +639,7 @@ static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	pr_debug("Validating PMD devmap\n");
 	WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
@@ -651,7 +668,7 @@ static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
 
 static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
@@ -663,7 +680,7 @@ static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 
 static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = debug_vm_pfn_pte(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
@@ -676,7 +693,7 @@ static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
@@ -688,7 +705,7 @@ static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 
 static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd = debug_vm_pfn_pmd(pfn, prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
 		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
@@ -711,7 +728,7 @@ static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot)
 	pte_t pte;
 
 	pr_debug("Validating PTE swap\n");
-	pte = pfn_pte(pfn, prot);
+	pte = debug_vm_pfn_pte(pfn, prot);
 	swp = __pte_to_swp_entry(pte);
 	pte = __swp_entry_to_pte(swp);
 	WARN_ON(pfn != pte_pfn(pte));
@@ -724,7 +741,7 @@ static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
 	pmd_t pmd;
 
 	pr_debug("Validating PMD swap\n");
-	pmd = pfn_pmd(pfn, prot);
+	pmd = debug_vm_pfn_pmd(pfn, prot);
 	swp = __pmd_to_swp_entry(pmd);
 	pmd = __swp_entry_to_pmd(swp);
 	WARN_ON(pfn != pmd_pfn(pmd));
@@ -794,7 +811,7 @@ static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte))));
 
 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
-	pte = pfn_pte(pfn, prot);
+	pte = debug_vm_pfn_pte(pfn, prot);
 
 	WARN_ON(!pte_huge(pte_mkhuge(pte)));
 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
@@ -874,7 +891,7 @@ static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot)
 	 * needs to return true. pmd_present() should be true whenever
 	 * pmd_trans_huge() returns true.
 	 */
-	pmd = pfn_pmd(pfn, prot);
+	pmd = debug_vm_pfn_pmd(pfn, prot);
 	WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));
 
 #ifndef __HAVE_ARCH_PMDP_INVALIDATE
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH 02/16] debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Christophe Leroy @ 2020-08-12  6:40 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev, Anshuman Khandual
In-Reply-To: <20200812063358.369514-2-aneesh.kumar@linux.ibm.com>



Le 12/08/2020 à 08:33, Aneesh Kumar K.V a écrit :
> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting that bit in
> random value.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>   mm/debug_vm_pgtable.c | 5 ++++-
>   1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 086309fb9b6f..4c32063a8acf 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -45,9 +45,12 @@
>    * pxx_clear() because of how dynamic page table folding works on s390. So
>    * while loading up the entries do not change the lower 4 bits. It does not
>    * have affect any other platform.
> + *
> + * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
>    */
>   #define S390_MASK_BITS	4
> -#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
> +#define PPC_MASK_BITS	2
> +#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)

Do you mean:

#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1, PPC_MASK_BITS | 
S390_MASK_BITS)

Christophe

>   #define RANDOM_NZVALUE	GENMASK(7, 0)
>   
>   static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
> 

^ permalink raw reply

* Re: [PATCH 02/16] debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Aneesh Kumar K.V @ 2020-08-12  6:43 UTC (permalink / raw)
  To: Christophe Leroy, linux-mm, akpm; +Cc: linuxppc-dev, Anshuman Khandual
In-Reply-To: <24b1e523-e87e-161b-3dc9-60bd11c8f461@csgroup.eu>

On 8/12/20 12:10 PM, Christophe Leroy wrote:
> 
> 
> Le 12/08/2020 à 08:33, Aneesh Kumar K.V a écrit :
>> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting 
>> that bit in
>> random value.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   mm/debug_vm_pgtable.c | 5 ++++-
>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>> index 086309fb9b6f..4c32063a8acf 100644
>> --- a/mm/debug_vm_pgtable.c
>> +++ b/mm/debug_vm_pgtable.c
>> @@ -45,9 +45,12 @@
>>    * pxx_clear() because of how dynamic page table folding works on 
>> s390. So
>>    * while loading up the entries do not change the lower 4 bits. It 
>> does not
>>    * have affect any other platform.
>> + *
>> + * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
>>    */
>>   #define S390_MASK_BITS    4
>> -#define RANDOM_ORVALUE    GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
>> +#define PPC_MASK_BITS    2
>> +#define RANDOM_ORVALUE    GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, 
>> S390_MASK_BITS)
> 
> Do you mean:
> 
> #define RANDOM_ORVALUE    GENMASK(BITS_PER_LONG - 1, PPC_MASK_BITS | 
> S390_MASK_BITS)


IIUC GENMASK(hi, low) generate a mask from hi to low bits. Since i want 
to avoid bit 62, I am forcing it to generate bits from (61, 4)


-aneesh

^ permalink raw reply

* Re: [PATCH] powerpc/papr_scm: Make access mode of 'perf_stats' attribute file to '0400'
From: Vaibhav Jain @ 2020-08-12  7:36 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev, linux-nvdimm
  Cc: Aneesh Kumar K . V, Santosh Sivaraj, Oliver O'Halloran,
	Dan Williams, Ira Weiny
In-Reply-To: <87wo26abmf.fsf@mpe.ellerman.id.au>

Hi Mpe,

Thanks for reviewing this patch. My responses below:

Michael Ellerman <mpe@ellerman.id.au> writes:

> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>> The newly introduced 'perf_stats' attribute uses the default access
>> mode of 0444 letting non-root users access performance stats of an
>> nvdimm and potentially force the kernel into issuing large number of
>> expensive HCALLs. Since the information exposed by this attribute
>> cannot be cached hence its better to ward of access to this attribute
>> from non-root users.
>>
>> Hence this patch updates the access-mode of 'perf_stats' sysfs
>> attribute file to 0400 to make it only readable to root-users.
>
> Or should we ratelimit it?
Ideal consumers of this data will be users with CAP_PERFMON or
CAP_SYS_ADMIN. Also they need up-to-date values for these performance stats
as these values can be time sensitive.

So rate limiting may not be a complete solution since a user running
'perf' might be throttled by another user who is simply reading the
sysfs file contents.

So instead of setting attribute mode to 0400, will add a check for
'perfmon_capable()' in perf_stats_show() denying read access to users
without CAP_PERFMON or CAP_SYS_ADMIN.


> Fixes: ??
Right. I will add this in v2.

>
>> Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
>
> cheers
>

-- 
Cheers
~ Vaibhav

^ permalink raw reply

* Re: [PATCH 01/16] powerpc/mm: Add DEBUG_VM WARN for pmd_clear
From: Anshuman Khandual @ 2020-08-12  7:46 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-1-aneesh.kumar@linux.ibm.com>

On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> With the hash page table, the kernel should not use pmd_clear for clearing
> huge pte entries. Add a DEBUG_VM WARN to catch the wrong usage.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

This particular change is very much powerpc specific. Hence please drop it from
the series which otherwise changes the page table test. Also, this series which
is not a RFC, still lacks a proper cover letter with diff stats, tree/tag on
which this applies, summary about the proposal etc. All those information will
be helpful in reviewing this series better. For now, assuming that this applies
cleanly on current master branch. But again, please do include a cover letter
in the next version.

> ---
>  arch/powerpc/include/asm/book3s/64/pgtable.h | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 6de56c3b33c4..079211968987 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -868,6 +868,13 @@ static inline bool pte_ci(pte_t pte)
>  
>  static inline void pmd_clear(pmd_t *pmdp)
>  {
> +	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
> +		/*
> +		 * Don't use this if we can possibly have a hash page table
> +		 * entry mapping this.
> +		 */
> +		WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
> +	}
>  	*pmdp = __pmd(0);
>  }
>  
> @@ -916,6 +923,13 @@ static inline int pmd_bad(pmd_t pmd)
>  
>  static inline void pud_clear(pud_t *pudp)
>  {
> +	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
> +		/*
> +		 * Don't use this if we can possibly have a hash page table
> +		 * entry mapping this.
> +		 */
> +		WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
> +	}
>  	*pudp = __pud(0);
>  }
>  
> 

^ permalink raw reply

* [PATCH 0/2] powerpc: unrel_branch_check.sh: enable llvm-objdump
From: Stephen Rothwell @ 2020-08-12  8:10 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Linux PowerPC List, Bill Wendling, Nicholas Piggin

These 2 patches enable this script to work properly when llvm-objtool
is being used.

They depend on my previos series that make this script suck less.

Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Bill Wendling <morbo@google.com>


^ permalink raw reply

* [PATCH 1/2] powerpc: unrel_branch_check.sh: use nm to find symbol value
From: Stephen Rothwell @ 2020-08-12  8:10 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Linux PowerPC List, Bill Wendling, Nicholas Piggin
In-Reply-To: <20200812081036.7969-1-sfr@canb.auug.org.au>

This is considerably faster then parsing the objdump asm output.  It will
also make the enabling of llvm-objdump a little easier.

Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Bill Wendling <morbo@google.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/Makefile.postlink           |  2 +-
 arch/powerpc/tools/unrel_branch_check.sh | 13 +++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 2268396ff4bb..a6c77f4d32b2 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -18,7 +18,7 @@ quiet_cmd_relocs_check = CHKREL  $@
 ifdef CONFIG_PPC_BOOK3S_64
       cmd_relocs_check =						\
 	$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \
-	$(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
+	$(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$(NM)" "$@"
 else
       cmd_relocs_check =						\
 	$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 70da90270c78..0369eb2e7e4b 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -5,18 +5,15 @@
 # This script checks the unrelocated code of a vmlinux for "suspicious"
 # branches to relocated code (head_64.S code).
 
-# Have Kbuild supply the path to objdump so we handle cross compilation.
+# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
 objdump="$1"
-vmlinux="$2"
+nm="$2"
+vmlinux="$3"
 
-#__end_interrupts should be located within the first 64K
 kstart=0xc000000000000000
-printf -v kend '0x%x' $(( kstart + 0x10000 ))
 
-end_intr=0x$(
-$objdump -R -d --start-address="$kstart" --stop-address="$kend" "$vmlinux" 2>/dev/null |
-awk '$2 == "<__end_interrupts>:" { print $1 }'
-)
+end_intr=0x$($nm -p "$vmlinux" |
+	sed -E -n '/\s+[[:alpha:]]\s+__end_interrupts\s*$/{s///p;q}')
 if [ "$end_intr" = "0x" ]; then
 	exit 0
 fi
-- 
2.28.0


^ permalink raw reply related

* [PATCH 2/2] powerpc: unrel_branch_check.sh: enable the use of llvm-objdump v9, 10 or 11
From: Stephen Rothwell @ 2020-08-12  8:10 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Linux PowerPC List, Bill Wendling, Nicholas Piggin
In-Reply-To: <20200812081036.7969-1-sfr@canb.auug.org.au>

Currently, using llvm-objtool, this script just silently succeeds without
actually do the intended checking.  So this updates it to work properly.

Firstly, llvm-objdump does not add target symbol names to the end
of branches in its asm output, so we have to drop the branch to
__start_initialization_multiplatform using its address.

Secondly, v9 and 10 specify branch targets as .+<offset>, so we convert
those to actual addresses.

Thirdly, v10 and 11 error out on a vmlinux if given the -R option
complaining that it is "not a dynamic object".  The -R does not make
any difference to the asm output, so remove it.

Lastly, v11 produces asm that is very similar to Gnu objtool (at least
as far as branches are concerned), so no further changes are necessary
to make it work.

Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Bill Wendling <morbo@google.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/tools/unrel_branch_check.sh | 34 ++++++++++++++++++++----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 0369eb2e7e4b..8301efee1e6c 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -18,12 +18,16 @@ if [ "$end_intr" = "0x" ]; then
 	exit 0
 fi
 
-$objdump -R -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
+# we know that there is a correct branch to
+# __start_initialization_multiplatform, so find its address
+# so we can exclude it.
+sim=0x$($nm -p "$vmlinux" |
+	sed -E -n '/\s+[[:alpha:]]\s+__start_initialization_multiplatform\s*$/{s///p;q}')
+
+$objdump -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
 sed -E -n '
 # match lines that start with a kernel address
 /^c[0-9a-f]*:\s*b/ {
-	# drop a target that we do not care about
-	/\<__start_initialization_multiplatform>/d
 	# drop branches via ctr or lr
 	/\<b.?.?(ct|l)r/d
 	# cope with some differences between Clang and GNU objdumps
@@ -33,14 +37,34 @@ sed -E -n '
 	s/\s0x/ /
 	s/://
 	# format for the loop below
-	s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:0x\3:\4/
+	s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:\3:\4/
 	# strip out condition registers
-	s/:0xcr[0-7],/:0x/
+	s/:cr[0-7],/:/
 	p
 }' | {
 
 all_good=true
 while IFS=: read -r from branch to sym; do
+	case "$to" in
+	c*)	to="0x$to"
+		;;
+	.+*)
+		to=${to#.+}
+		if [ "$branch" = 'b' ]; then
+			if (( to >= 0x2000000 )); then
+				to=$(( to - 0x4000000 ))
+			fi
+		elif (( to >= 0x8000 )); then
+			to=$(( to - 0x10000 ))
+		fi
+		printf -v to '0x%x' $(( "0x$from" + to ))
+		;;
+	*)	printf 'Unkown branch format\n'
+		;;
+	esac
+	if [ "$to" = "$sim" ]; then
+		continue
+	fi
 	if (( to > end_intr )); then
 		if $all_good; then
 			printf '%s\n' 'WARNING: Unrelocated relative branches'
-- 
2.28.0


^ permalink raw reply related

* Re: [PATCH v3 0/8] huge vmalloc mappings
From: Nicholas Piggin @ 2020-08-12  8:11 UTC (permalink / raw)
  To: Jonathan Cameron, Zefan Li
  Cc: linux-arch, Will Deacon, Catalin,  Marinas, x86, linux-kernel,
	linux-mm, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	Thomas Gleixner, linuxppc-dev, linux-arm-kernel
In-Reply-To: <d457aabc-9f58-f47e-f5fa-9539618b2759@huawei.com>

Excerpts from Zefan Li's message of August 12, 2020 11:07 am:
> On 2020/8/12 0:32, Jonathan Cameron wrote:
>> On Mon, 10 Aug 2020 12:27:24 +1000
>> Nicholas Piggin <npiggin@gmail.com> wrote:
>> 
>>> Not tested on x86 or arm64, would appreciate a quick test there so I can
>>> ask Andrew to put it in -mm. Other option is I can disable huge vmallocs
>>> for them for the time being.
>> 
>> Hi Nicholas,
>> 
>> For arm64 testing with a Kunpeng920.
>> 
>> I ran a quick sanity test with this series on top of mainline (yes mid merge window
>> so who knows what state is...).  Could I be missing some dependency?
>> 
>> Without them it boots, with them it doesn't.  Any immediate guesses?
>> 
> 
> I've already reported this bug in v2, and yeah I also tested it on arm64
> (not Kunpeng though), so looks like it still hasn't been fixed.

Huh, I thought I did fix it but seems not. vmap stacks shouldn't be 
big enough to use huge pages though, so I don't know what's going on
there. I'll dig around a bit more.

> 
> ...
>>>
>>> Since v2:
>>> - Rebased on vmalloc cleanups, split series into simpler pieces.
>>> - Fixed several compile errors and warnings
>>> - Keep the page array and accounting in small page units because
>>>   struct vm_struct is an interface (this should fix x86 vmap stack debug
>>>   assert). [Thanks Zefan]
> 
> though the changelog says it's fixed for x86.

Yes, my mistake that was supposed to say arm64.

Thanks,
Nick


^ permalink raw reply

* Re: [PATCH 02/16] debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Anshuman Khandual @ 2020-08-12  8:12 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-2-aneesh.kumar@linux.ibm.com>



On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting that bit in
> random value.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 086309fb9b6f..4c32063a8acf 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -45,9 +45,12 @@
>   * pxx_clear() because of how dynamic page table folding works on s390. So
>   * while loading up the entries do not change the lower 4 bits. It does not
>   * have affect any other platform.
> + *
> + * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
>   */

Please move and fold the above line with the existing paragraph.

>  #define S390_MASK_BITS	4
> -#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
> +#define PPC_MASK_BITS	2

s/PPC/PPC64/

> +#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)
>  #define RANDOM_NZVALUE	GENMASK(7, 0)
>  
>  static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
> 

With this change, RANDOM_ORVALUE will be (0x3ffffffffffffff0) which discards
both bit 63 and 62. If only bit 62 is to be avoided for ppc64 the mask should
be (0xbffffffffffffff0) instead. The following change on this patch should do
the trick.

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index eb059fef89c2..1499181fb0e9 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -45,13 +45,13 @@
  * entry type. But these bits might affect the ability to clear entries with
  * pxx_clear() because of how dynamic page table folding works on s390. So
  * while loading up the entries do not change the lower 4 bits. It does not
- * have affect any other platform.
- *
- * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
+ * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
+ * used to mark a pte entry.
  */
-#define S390_MASK_BITS 4
-#define PPC_MASK_BITS  2
-#define RANDOM_ORVALUE GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)
+#define S390_SKIP_MASK GENMASK(3, 0)
+#define PPC64_SKIP_MASK        GENMASK(62, 62)
+#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
+#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
 #define RANDOM_NZVALUE GENMASK(7, 0)
 
 static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)

^ permalink raw reply related

* Re: [PATCH 1/2] lockdep: improve current->(hard|soft)irqs_enabled synchronisation with actual irq state
From: Nicholas Piggin @ 2020-08-12  8:18 UTC (permalink / raw)
  To: peterz
  Cc: linux-arch, Alexey Kardashevskiy, Will Deacon, linux-kernel,
	Ingo Molnar, linuxppc-dev
In-Reply-To: <20200807111126.GI2674@hirez.programming.kicks-ass.net>

Excerpts from peterz@infradead.org's message of August 7, 2020 9:11 pm:
> 
> What's wrong with something like this?
> 
> AFAICT there's no reason to actually try and add IRQ tracing here, it's
> just a hand full of instructions at the most.

Because we may want to use that in other places as well, so it would
be nice to have tracing.

Hmm... also, I thought NMI context was free to call local_irq_save/restore
anyway so the bug would still be there in those cases?

Thanks,
Nick

> 
> ---
> 
> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> index 3a0db7b0b46e..6be22c1838e2 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -196,33 +196,6 @@ static inline bool arch_irqs_disabled(void)
>  		arch_local_irq_restore(flags);				\
>  	} while(0)
>  
> -#ifdef CONFIG_TRACE_IRQFLAGS
> -#define powerpc_local_irq_pmu_save(flags)			\
> -	 do {							\
> -		raw_local_irq_pmu_save(flags);			\
> -		trace_hardirqs_off();				\
> -	} while(0)
> -#define powerpc_local_irq_pmu_restore(flags)			\
> -	do {							\
> -		if (raw_irqs_disabled_flags(flags)) {		\
> -			raw_local_irq_pmu_restore(flags);	\
> -			trace_hardirqs_off();			\
> -		} else {					\
> -			trace_hardirqs_on();			\
> -			raw_local_irq_pmu_restore(flags);	\
> -		}						\
> -	} while(0)
> -#else
> -#define powerpc_local_irq_pmu_save(flags)			\
> -	do {							\
> -		raw_local_irq_pmu_save(flags);			\
> -	} while(0)
> -#define powerpc_local_irq_pmu_restore(flags)			\
> -	do {							\
> -		raw_local_irq_pmu_restore(flags);		\
> -	} while (0)
> -#endif  /* CONFIG_TRACE_IRQFLAGS */
> -
>  #endif /* CONFIG_PPC_BOOK3S */
>  
>  #ifdef CONFIG_PPC_BOOK3E
> diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
> index bc4bd19b7fc2..b357a35672b1 100644
> --- a/arch/powerpc/include/asm/local.h
> +++ b/arch/powerpc/include/asm/local.h
> @@ -32,9 +32,9 @@ static __inline__ void local_##op(long i, local_t *l)			\
>  {									\
>  	unsigned long flags;						\
>  									\
> -	powerpc_local_irq_pmu_save(flags);				\
> +	raw_powerpc_local_irq_pmu_save(flags);				\
>  	l->v c_op i;						\
> -	powerpc_local_irq_pmu_restore(flags);				\
> +	raw_powerpc_local_irq_pmu_restore(flags);				\
>  }
>  
>  #define LOCAL_OP_RETURN(op, c_op)					\
> @@ -43,9 +43,9 @@ static __inline__ long local_##op##_return(long a, local_t *l)		\
>  	long t;								\
>  	unsigned long flags;						\
>  									\
> -	powerpc_local_irq_pmu_save(flags);				\
> +	raw_powerpc_local_irq_pmu_save(flags);				\
>  	t = (l->v c_op a);						\
> -	powerpc_local_irq_pmu_restore(flags);				\
> +	raw_powerpc_local_irq_pmu_restore(flags);				\
>  									\
>  	return t;							\
>  }
> @@ -81,11 +81,11 @@ static __inline__ long local_cmpxchg(local_t *l, long o, long n)
>  	long t;
>  	unsigned long flags;
>  
> -	powerpc_local_irq_pmu_save(flags);
> +	raw_powerpc_local_irq_pmu_save(flags);
>  	t = l->v;
>  	if (t == o)
>  		l->v = n;
> -	powerpc_local_irq_pmu_restore(flags);
> +	raw_powerpc_local_irq_pmu_restore(flags);
>  
>  	return t;
>  }
> @@ -95,10 +95,10 @@ static __inline__ long local_xchg(local_t *l, long n)
>  	long t;
>  	unsigned long flags;
>  
> -	powerpc_local_irq_pmu_save(flags);
> +	raw_powerpc_local_irq_pmu_save(flags);
>  	t = l->v;
>  	l->v = n;
> -	powerpc_local_irq_pmu_restore(flags);
> +	raw_powerpc_local_irq_pmu_restore(flags);
>  
>  	return t;
>  }
> @@ -117,12 +117,12 @@ static __inline__ int local_add_unless(local_t *l, long a, long u)
>  	unsigned long flags;
>  	int ret = 0;
>  
> -	powerpc_local_irq_pmu_save(flags);
> +	raw_powerpc_local_irq_pmu_save(flags);
>  	if (l->v != u) {
>  		l->v += a;
>  		ret = 1;
>  	}
> -	powerpc_local_irq_pmu_restore(flags);
> +	raw_powerpc_local_irq_pmu_restore(flags);
>  
>  	return ret;
>  }
> 

^ permalink raw reply

* Re: [PATCH 02/16] debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Aneesh Kumar K.V @ 2020-08-12  8:25 UTC (permalink / raw)
  To: Anshuman Khandual, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <fcb44f1d-4131-9135-512e-11a5c00abcd9@arm.com>

On 8/12/20 1:42 PM, Anshuman Khandual wrote:
> 
> 
> On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
>> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting that bit in
>> random value.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   mm/debug_vm_pgtable.c | 5 ++++-
>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>> index 086309fb9b6f..4c32063a8acf 100644
>> --- a/mm/debug_vm_pgtable.c
>> +++ b/mm/debug_vm_pgtable.c
>> @@ -45,9 +45,12 @@
>>    * pxx_clear() because of how dynamic page table folding works on s390. So
>>    * while loading up the entries do not change the lower 4 bits. It does not
>>    * have affect any other platform.
>> + *
>> + * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
>>    */
> 
> Please move and fold the above line with the existing paragraph.
> 
>>   #define S390_MASK_BITS	4
>> -#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
>> +#define PPC_MASK_BITS	2
> 
> s/PPC/PPC64/
> 
>> +#define RANDOM_ORVALUE	GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)
>>   #define RANDOM_NZVALUE	GENMASK(7, 0)
>>   
>>   static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
>>
> 
> With this change, RANDOM_ORVALUE will be (0x3ffffffffffffff0) which discards
> both bit 63 and 62. If only bit 62 is to be avoided for ppc64 the mask should
> be (0xbffffffffffffff0) instead. The following change on this patch should do
> the trick.
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index eb059fef89c2..1499181fb0e9 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -45,13 +45,13 @@
>    * entry type. But these bits might affect the ability to clear entries with
>    * pxx_clear() because of how dynamic page table folding works on s390. So
>    * while loading up the entries do not change the lower 4 bits. It does not
> - * have affect any other platform.
> - *
> - * Also avoid the 62nd bit on ppc64 that is used to mark a pte entry.
> + * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
> + * used to mark a pte entry.
>    */
> -#define S390_MASK_BITS 4
> -#define PPC_MASK_BITS  2
> -#define RANDOM_ORVALUE GENMASK(BITS_PER_LONG - 1 - PPC_MASK_BITS, S390_MASK_BITS)
> +#define S390_SKIP_MASK GENMASK(3, 0)
> +#define PPC64_SKIP_MASK        GENMASK(62, 62)
> +#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
> +#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
>   #define RANDOM_NZVALUE GENMASK(7, 0)
>   
>   static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
> 

I will switch to this.

-aneesh

^ permalink raw reply

* Re: [PATCH 01/16] powerpc/mm: Add DEBUG_VM WARN for pmd_clear
From: Aneesh Kumar K.V @ 2020-08-12  8:27 UTC (permalink / raw)
  To: Anshuman Khandual, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <8b951ede-d779-d18f-b2b8-d09f94af6822@arm.com>

On 8/12/20 1:16 PM, Anshuman Khandual wrote:
> On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
>> With the hash page table, the kernel should not use pmd_clear for clearing
>> huge pte entries. Add a DEBUG_VM WARN to catch the wrong usage.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> 
> This particular change is very much powerpc specific. Hence please drop it from
> the series which otherwise changes the page table test. Also, this series which
> is not a RFC, still lacks a proper cover letter with diff stats, tree/tag on
> which this applies, summary about the proposal etc. All those information will
> be helpful in reviewing this series better. For now, assuming that this applies
> cleanly on current master branch. But again, please do include a cover letter
> in the next version.


The patch series include all sort of fixes. There is no special theme 
for the series. So all that the cover letter would have is "fixes to 
make debug_vm_pgtable work on ppc64"

I tried to keep each patch simpler explaining why the current code is 
wrong.


> 
>> ---
>>   arch/powerpc/include/asm/book3s/64/pgtable.h | 14 ++++++++++++++
>>   1 file changed, 14 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 6de56c3b33c4..079211968987 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -868,6 +868,13 @@ static inline bool pte_ci(pte_t pte)
>>   
>>   static inline void pmd_clear(pmd_t *pmdp)
>>   {
>> +	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
>> +		/*
>> +		 * Don't use this if we can possibly have a hash page table
>> +		 * entry mapping this.
>> +		 */
>> +		WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
>> +	}
>>   	*pmdp = __pmd(0);
>>   }
>>   
>> @@ -916,6 +923,13 @@ static inline int pmd_bad(pmd_t pmd)
>>   
>>   static inline void pud_clear(pud_t *pudp)
>>   {
>> +	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
>> +		/*
>> +		 * Don't use this if we can possibly have a hash page table
>> +		 * entry mapping this.
>> +		 */
>> +		WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
>> +	}
>>   	*pudp = __pud(0);
>>   }
>>   
>>

-aneesh


^ permalink raw reply

* Re: [PATCH 03/16] debug_vm_pgtable/set_pte: Don't use set_pte_at to update an existing pte entry
From: Anshuman Khandual @ 2020-08-12  9:12 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200812063358.369514-3-aneesh.kumar@linux.ibm.com>



On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:
> set_pte_at() should not be used to set a pte entry at locations that
> already holds a valid pte entry. Architectures like ppc64 don't do TLB
> invalidate in set_pte_at() and hence expect it to be used to set locations
> that are not a valid PTE.

Even though set_pte_at() is not really a arch page table helper and
very much arch specific, I just wonder why this deviation on ppc64
as compared to other platforms. Detecting such semantics variation
across platforms is an objective of this test.

As small nit.

Please follow the existing subject format for all patches in here.
It will improve readability and also help understand these changes
better, later on.

mm/debug_vm_pgtable: <Specify changes to an individual test>

> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 4c32063a8acf..02a7c20aa4a2 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -81,8 +81,6 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
>  	pte = ptep_get(ptep);
>  	WARN_ON(pte_write(pte));
>  
> -	pte = pfn_pte(pfn, prot);
> -	set_pte_at(mm, vaddr, ptep, pte);
>  	ptep_get_and_clear(mm, vaddr, ptep);
>  	pte = ptep_get(ptep);
>  	WARN_ON(!pte_none(pte));

This makes sense. But could you please fold this code stanza with
the previous one in order to imply that 'ptep' did have some valid
entry before being cleared and checked against pte_none().

> @@ -97,12 +95,14 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
>  	pte = ptep_get(ptep);
>  	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
>  
> -	pte = pfn_pte(pfn, prot);
> -	set_pte_at(mm, vaddr, ptep, pte);
>  	ptep_get_and_clear_full(mm, vaddr, ptep, 1);
>  	pte = ptep_get(ptep);
>  	WARN_ON(!pte_none(pte));

Same, please fold back.

>  
> +	/*
> +	 * We should clear pte before we do set_pte_at
> +	 */
> +	pte = ptep_get_and_clear(mm, vaddr, ptep);
>  	pte = pte_mkyoung(pte);
>  	set_pte_at(mm, vaddr, ptep, pte);
>  	ptep_test_and_clear_young(vma, vaddr, ptep);
>

The comment above should also explain details that are mentioned
in the commit message i.e how platforms such as ppc64 expects a
clear pte entry for set_pte_at() to work.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox