LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH V3 2/4] mm/debug_vm_pgtable: Add tests validating advanced arch page table helpers
From: Christophe Leroy @ 2020-06-27  7:26 UTC (permalink / raw)
  To: Anshuman Khandual, linux-mm
  Cc: Heiko Carstens, Paul Mackerras, H. Peter Anvin, linux-riscv,
	Will Deacon, linux-arch, linux-s390, x86, Mike Rapoport,
	Christian Borntraeger, Ingo Molnar, gerald.schaefer, ziy,
	Catalin Marinas, linux-snps-arc, Vasily Gorbik, Borislav Petkov,
	Paul Walmsley, Kirill A . Shutemov, Thomas Gleixner,
	linux-arm-kernel, Vineet Gupta, linux-kernel, Palmer Dabbelt,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1592192277-8421-3-git-send-email-anshuman.khandual@arm.com>



Le 15/06/2020 à 05:37, Anshuman Khandual a écrit :
> This adds new tests validating for these following arch advanced page table
> helpers. These tests create and test specific mapping types at various page
> table levels.
> 
> 1. pxxp_set_wrprotect()
> 2. pxxp_get_and_clear()
> 3. pxxp_set_access_flags()
> 4. pxxp_get_and_clear_full()
> 5. pxxp_test_and_clear_young()
> 6. pxx_leaf()
> 7. pxx_set_huge()
> 8. pxx_(clear|mk)_savedwrite()
> 9. huge_pxxp_xxx()
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Mike Rapoport <rppt@linux.ibm.com>
> Cc: Vineet Gupta <vgupta@synopsys.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Vasily Gorbik <gor@linux.ibm.com>
> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Paul Walmsley <paul.walmsley@sifive.com>
> Cc: Palmer Dabbelt <palmer@dabbelt.com>
> Cc: linux-snps-arc@lists.infradead.org
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-s390@vger.kernel.org
> Cc: linux-riscv@lists.infradead.org
> Cc: x86@kernel.org
> Cc: linux-mm@kvack.org
> Cc: linux-arch@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
>   mm/debug_vm_pgtable.c | 306 ++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 306 insertions(+)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index ffa163d4c63c..e3f9f8317a98 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -21,6 +21,7 @@
>   #include <linux/module.h>
>   #include <linux/pfn_t.h>
>   #include <linux/printk.h>
> +#include <linux/pgtable.h>
>   #include <linux/random.h>
>   #include <linux/spinlock.h>
>   #include <linux/swap.h>
> @@ -28,6 +29,7 @@
>   #include <linux/start_kernel.h>
>   #include <linux/sched/mm.h>
>   #include <asm/pgalloc.h>
> +#include <asm/tlbflush.h>
>   
>   #define VMFLAGS	(VM_READ|VM_WRITE|VM_EXEC)
>   
> @@ -55,6 +57,54 @@ static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
>   	WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
>   }
>   
> +static void __init pte_advanced_tests(struct mm_struct *mm,
> +			struct vm_area_struct *vma, pte_t *ptep,
> +			unsigned long pfn, unsigned long vaddr, pgprot_t prot)

Align args properly.

> +{
> +	pte_t pte = pfn_pte(pfn, prot);
> +
> +	pte = pfn_pte(pfn, prot);
> +	set_pte_at(mm, vaddr, ptep, pte);
> +	ptep_set_wrprotect(mm, vaddr, ptep);
> +	pte = READ_ONCE(*ptep);
> +	WARN_ON(pte_write(pte));
> +
> +	pte = pfn_pte(pfn, prot);
> +	set_pte_at(mm, vaddr, ptep, pte);
> +	ptep_get_and_clear(mm, vaddr, ptep);
> +	pte = READ_ONCE(*ptep);
> +	WARN_ON(!pte_none(pte));
> +
> +	pte = pfn_pte(pfn, prot);
> +	pte = pte_wrprotect(pte);
> +	pte = pte_mkclean(pte);
> +	set_pte_at(mm, vaddr, ptep, pte);
> +	pte = pte_mkwrite(pte);
> +	pte = pte_mkdirty(pte);
> +	ptep_set_access_flags(vma, vaddr, ptep, pte, 1);
> +	pte = READ_ONCE(*ptep);
> +	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
> +
> +	pte = pfn_pte(pfn, prot);
> +	set_pte_at(mm, vaddr, ptep, pte);
> +	ptep_get_and_clear_full(mm, vaddr, ptep, 1);
> +	pte = READ_ONCE(*ptep);
> +	WARN_ON(!pte_none(pte));
> +
> +	pte = pte_mkyoung(pte);
> +	set_pte_at(mm, vaddr, ptep, pte);
> +	ptep_test_and_clear_young(vma, vaddr, ptep);
> +	pte = READ_ONCE(*ptep);
> +	WARN_ON(pte_young(pte));
> +}
> +
> +static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
> +{
> +	pte_t pte = pfn_pte(pfn, prot);
> +
> +	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
> +	WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
> +}
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>   static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
>   {
> @@ -77,6 +127,89 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
>   	WARN_ON(!pmd_bad(pmd_mkhuge(pmd)));
>   }
>   
> +static void __init pmd_advanced_tests(struct mm_struct *mm,
> +		struct vm_area_struct *vma, pmd_t *pmdp,
> +		unsigned long pfn, unsigned long vaddr, pgprot_t prot)

Align args properly

> +{
> +	pmd_t pmd = pfn_pmd(pfn, prot);
> +
> +	if (!has_transparent_hugepage())
> +		return;
> +
> +	/* Align the address wrt HPAGE_PMD_SIZE */
> +	vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
> +
> +	pmd = pfn_pmd(pfn, prot);
> +	set_pmd_at(mm, vaddr, pmdp, pmd);
> +	pmdp_set_wrprotect(mm, vaddr, pmdp);
> +	pmd = READ_ONCE(*pmdp);
> +	WARN_ON(pmd_write(pmd));
> +
> +	pmd = pfn_pmd(pfn, prot);
> +	set_pmd_at(mm, vaddr, pmdp, pmd);
> +	pmdp_huge_get_and_clear(mm, vaddr, pmdp);
> +	pmd = READ_ONCE(*pmdp);
> +	WARN_ON(!pmd_none(pmd));
> +
> +	pmd = pfn_pmd(pfn, prot);
> +	pmd = pmd_wrprotect(pmd);
> +	pmd = pmd_mkclean(pmd);
> +	set_pmd_at(mm, vaddr, pmdp, pmd);
> +	pmd = pmd_mkwrite(pmd);
> +	pmd = pmd_mkdirty(pmd);
> +	pmdp_set_access_flags(vma, vaddr, pmdp, pmd, 1);
> +	pmd = READ_ONCE(*pmdp);
> +	WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd)));
> +
> +	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
> +	set_pmd_at(mm, vaddr, pmdp, pmd);
> +	pmdp_huge_get_and_clear_full(vma, vaddr, pmdp, 1);
> +	pmd = READ_ONCE(*pmdp);
> +	WARN_ON(!pmd_none(pmd));
> +
> +	pmd = pmd_mkyoung(pmd);
> +	set_pmd_at(mm, vaddr, pmdp, pmd);
> +	pmdp_test_and_clear_young(vma, vaddr, pmdp);
> +	pmd = READ_ONCE(*pmdp);
> +	WARN_ON(pmd_young(pmd));
> +}
> +
> +static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
> +{
> +	pmd_t pmd = pfn_pmd(pfn, prot);
> +
> +	/*
> +	 * PMD based THP is a leaf entry.
> +	 */
> +	pmd = pmd_mkhuge(pmd);
> +	WARN_ON(!pmd_leaf(pmd));
> +}
> +
> +static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
> +{
> +	pmd_t pmd;
> +
> +	if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
> +		return;
> +	/*
> +	 * X86 defined pmd_set_huge() verifies that the given
> +	 * PMD is not a populated non-leaf entry.
> +	 */
> +	WRITE_ONCE(*pmdp, __pmd(0));
> +	WARN_ON(!pmd_set_huge(pmdp, __pfn_to_phys(pfn), prot));
> +	WARN_ON(!pmd_clear_huge(pmdp));
> +	pmd = READ_ONCE(*pmdp);
> +	WARN_ON(!pmd_none(pmd));
> +}
> +
> +static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
> +{
> +	pmd_t pmd = pfn_pmd(pfn, prot);
> +
> +	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
> +	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
> +}
> +
>   #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
>   static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot)
>   {
> @@ -100,12 +233,115 @@ static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot)
>   	 */
>   	WARN_ON(!pud_bad(pud_mkhuge(pud)));
>   }
> +
> +static void pud_advanced_tests(struct mm_struct *mm,
> +		struct vm_area_struct *vma, pud_t *pudp,
> +		unsigned long pfn, unsigned long vaddr, pgprot_t prot)

Align args properly

> +{
> +	pud_t pud = pfn_pud(pfn, prot);
> +
> +	if (!has_transparent_hugepage())
> +		return;
> +
> +	/* Align the address wrt HPAGE_PUD_SIZE */
> +	vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE;
> +
> +	set_pud_at(mm, vaddr, pudp, pud);
> +	pudp_set_wrprotect(mm, vaddr, pudp);
> +	pud = READ_ONCE(*pudp);
> +	WARN_ON(pud_write(pud));
> +
> +#ifndef __PAGETABLE_PMD_FOLDED
> +	pud = pfn_pud(pfn, prot);
> +	set_pud_at(mm, vaddr, pudp, pud);
> +	pudp_huge_get_and_clear(mm, vaddr, pudp);
> +	pud = READ_ONCE(*pudp);
> +	WARN_ON(!pud_none(pud));
> +
> +	pud = pfn_pud(pfn, prot);
> +	set_pud_at(mm, vaddr, pudp, pud);
> +	pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1);
> +	pud = READ_ONCE(*pudp);
> +	WARN_ON(!pud_none(pud));
> +#endif /* __PAGETABLE_PMD_FOLDED */
> +	pud = pfn_pud(pfn, prot);
> +	pud = pud_wrprotect(pud);
> +	pud = pud_mkclean(pud);
> +	set_pud_at(mm, vaddr, pudp, pud);
> +	pud = pud_mkwrite(pud);
> +	pud = pud_mkdirty(pud);
> +	pudp_set_access_flags(vma, vaddr, pudp, pud, 1);
> +	pud = READ_ONCE(*pudp);
> +	WARN_ON(!(pud_write(pud) && pud_dirty(pud)));
> +
> +	pud = pud_mkyoung(pud);
> +	set_pud_at(mm, vaddr, pudp, pud);
> +	pudp_test_and_clear_young(vma, vaddr, pudp);
> +	pud = READ_ONCE(*pudp);
> +	WARN_ON(pud_young(pud));
> +}
> +
> +static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
> +{
> +	pud_t pud = pfn_pud(pfn, prot);
> +
> +	/*
> +	 * PUD based THP is a leaf entry.
> +	 */
> +	pud = pud_mkhuge(pud);
> +	WARN_ON(!pud_leaf(pud));
> +}
> +
> +static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
> +{
> +	pud_t pud;
> +
> +	if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
> +		return;
> +	/*
> +	 * X86 defined pud_set_huge() verifies that the given
> +	 * PUD is not a populated non-leaf entry.
> +	 */
> +	WRITE_ONCE(*pudp, __pud(0));
> +	WARN_ON(!pud_set_huge(pudp, __pfn_to_phys(pfn), prot));
> +	WARN_ON(!pud_clear_huge(pudp));
> +	pud = READ_ONCE(*pudp);
> +	WARN_ON(!pud_none(pud));
> +}
>   #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
>   static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
> +static void pud_advanced_tests(struct mm_struct *mm,
> +		struct vm_area_struct *vma, pud_t *pudp,
> +		unsigned long pfn, unsigned long vaddr, pgprot_t prot)

Align args properly

> +{
> +}
> +static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { }
> +static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
> +{
> +}
>   #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
>   #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
>   static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot) { }
>   static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
> +static void __init pmd_advanced_tests(struct mm_struct *mm,
> +		struct vm_area_struct *vma, pmd_t *pmdp,
> +		unsigned long pfn, unsigned long vaddr, pgprot_t prot)

Align args properly

> +{
> +}
> +static void __init pud_advanced_tests(struct mm_struct *mm,
> +		struct vm_area_struct *vma, pud_t *pudp,
> +		unsigned long pfn, unsigned long vaddr, pgprot_t prot)

Align args properly

> +{
> +}
> +static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) { }
> +static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { }
> +static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
> +{
> +}
> +static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
> +{
> +}
> +static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) { }
>   #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>   
>   static void __init p4d_basic_tests(unsigned long pfn, pgprot_t prot)
> @@ -495,8 +731,56 @@ static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
>   	WARN_ON(!pte_huge(pte_mkhuge(pte)));
>   #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
>   }
> +
> +static void __init hugetlb_advanced_tests(struct mm_struct *mm,
> +					  struct vm_area_struct *vma,
> +					  pte_t *ptep, unsigned long pfn,
> +					  unsigned long vaddr, pgprot_t prot)
> +{
> +	struct page *page = pfn_to_page(pfn);
> +	pte_t pte = READ_ONCE(*ptep);
> +	unsigned long paddr = (__pfn_to_phys(pfn) | RANDOM_ORVALUE) & PMD_MASK;
> +
> +	pte = pte_mkhuge(mk_pte(pfn_to_page(PHYS_PFN(paddr)), prot));
> +	set_huge_pte_at(mm, vaddr, ptep, pte);
> +	barrier();
> +	WARN_ON(!pte_same(pte, huge_ptep_get(ptep)));
> +	huge_pte_clear(mm, vaddr, ptep, PMD_SIZE);
> +	pte = huge_ptep_get(ptep);
> +	WARN_ON(!huge_pte_none(pte));
> +
> +	pte = mk_huge_pte(page, prot);
> +	set_huge_pte_at(mm, vaddr, ptep, pte);
> +	barrier();
> +	huge_ptep_set_wrprotect(mm, vaddr, ptep);
> +	pte = huge_ptep_get(ptep);
> +	WARN_ON(huge_pte_write(pte));
> +
> +	pte = mk_huge_pte(page, prot);
> +	set_huge_pte_at(mm, vaddr, ptep, pte);
> +	barrier();
> +	huge_ptep_get_and_clear(mm, vaddr, ptep);
> +	pte = huge_ptep_get(ptep);
> +	WARN_ON(!huge_pte_none(pte));
> +
> +	pte = mk_huge_pte(page, prot);
> +	pte = huge_pte_wrprotect(pte);
> +	set_huge_pte_at(mm, vaddr, ptep, pte);
> +	barrier();
> +	pte = huge_pte_mkwrite(pte);
> +	pte = huge_pte_mkdirty(pte);
> +	huge_ptep_set_access_flags(vma, vaddr, ptep, pte, 1);
> +	pte = huge_ptep_get(ptep);
> +	WARN_ON(!(huge_pte_write(pte) && huge_pte_dirty(pte)));
> +}
>   #else  /* !CONFIG_HUGETLB_PAGE */
>   static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { }
> +static void __init hugetlb_advanced_tests(struct mm_struct *mm,
> +					  struct vm_area_struct *vma,
> +					  pte_t *ptep, unsigned long pfn,
> +					  unsigned long vaddr, pgprot_t prot)
> +{
> +}
>   #endif /* CONFIG_HUGETLB_PAGE */
>   
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> @@ -568,6 +852,7 @@ static unsigned long __init get_random_vaddr(void)
>   
>   static int __init debug_vm_pgtable(void)
>   {
> +	struct vm_area_struct *vma;
>   	struct mm_struct *mm;
>   	pgd_t *pgdp;
>   	p4d_t *p4dp, *saved_p4dp;
> @@ -596,6 +881,12 @@ static int __init debug_vm_pgtable(void)
>   	 */
>   	protnone = __P000;
>   
> +	vma = vm_area_alloc(mm);
> +	if (!vma) {
> +		pr_err("vma allocation failed\n");
> +		return 1;
> +	}
> +
>   	/*
>   	 * PFN for mapping at PTE level is determined from a standard kernel
>   	 * text symbol. But pfns for higher page table levels are derived by
> @@ -644,6 +935,20 @@ static int __init debug_vm_pgtable(void)
>   	p4d_clear_tests(mm, p4dp);
>   	pgd_clear_tests(mm, pgdp);
>   
> +	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
> +	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot);
> +	pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
> +	hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
> +
> +	pmd_leaf_tests(pmd_aligned, prot);
> +	pud_leaf_tests(pud_aligned, prot);
> +
> +	pmd_huge_tests(pmdp, pmd_aligned, prot);
> +	pud_huge_tests(pudp, pud_aligned, prot);
> +
> +	pte_savedwrite_tests(pte_aligned, prot);
> +	pmd_savedwrite_tests(pmd_aligned, prot);
> +
>   	pte_unmap_unlock(ptep, ptl);
>   
>   	pmd_populate_tests(mm, pmdp, saved_ptep);
> @@ -678,6 +983,7 @@ static int __init debug_vm_pgtable(void)
>   	pmd_free(mm, saved_pmdp);
>   	pte_free(mm, saved_ptep);
>   
> +	vm_area_free(vma);
>   	mm_dec_nr_puds(mm);
>   	mm_dec_nr_pmds(mm);
>   	mm_dec_nr_ptes(mm);
> 

Christophe

^ permalink raw reply

* properly support exec with kernel pointers v3
From: Christoph Hellwig @ 2020-06-27  7:26 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-arch, linux-s390, linux-parisc, Arnd Bergmann, Brian Gerst,
	x86, linux-mips, linux-kernel, linux-fsdevel, Luis Chamberlain,
	sparclinux, linuxppc-dev, linux-arm-kernel

Hi all,

this series first cleans up the exec code and then adds proper
kernel_execveat and kernel_wait callers instead of relying on the fact
that the early init code and kernel threads implicitly run with
the address limit set to KERNEL_DS.

Note that the cleanup removes the compat execve(at) handlers entirely, as
we can handle the compat difference very nicely in a unified codebase.
x32 needs two hacky #defines for that for now, although those can go
away if the x32 syscall rework from Brian gets merged.

I think this is ready to get picked up.  What would the best tree be?
Most important a git tree would be good, as I have other work building
on top of it.


Changes since v2:
 - drop the kernel_wait addition, as this interacts with a series
   from Luis and should be merged together with that one

Changes since v1:
 - remove a pointless ifdef from get_user_arg_ptr
 - remove the need for a compat syscall handler for x32


Diffstat:
 arch/arm64/include/asm/unistd32.h                  |    4 
 arch/mips/kernel/syscalls/syscall_n32.tbl          |    4 
 arch/mips/kernel/syscalls/syscall_o32.tbl          |    4 
 arch/parisc/kernel/syscalls/syscall.tbl            |    4 
 arch/powerpc/kernel/syscalls/syscall.tbl           |    4 
 arch/s390/kernel/syscalls/syscall.tbl              |    4 
 arch/sparc/kernel/syscalls.S                       |    4 
 arch/x86/entry/syscall_x32.c                       |    7 
 arch/x86/entry/syscalls/syscall_32.tbl             |    4 
 arch/x86/entry/syscalls/syscall_64.tbl             |    4 
 fs/exec.c                                          |  248 ++++++++-------------
 include/linux/binfmts.h                            |   10 
 include/linux/compat.h                             |    7 
 include/uapi/asm-generic/unistd.h                  |    4 
 init/main.c                                        |    5 
 kernel/umh.c                                       |   14 -
 tools/include/uapi/asm-generic/unistd.h            |    4 
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |    4 
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |    4 
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |    4 
 20 files changed, 149 insertions(+), 198 deletions(-)

^ permalink raw reply

* [PATCH 2/5] exec: simplify the compat syscall handling
From: Christoph Hellwig @ 2020-06-27  7:27 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-arch, linux-s390, linux-parisc, Arnd Bergmann, Brian Gerst,
	x86, linux-mips, linux-kernel, linux-fsdevel, Luis Chamberlain,
	sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20200627072704.2447163-1-hch@lst.de>

The only differenence betweeen the compat exec* syscalls and their
native versions is that compat_ptr sign extension, and the fact that
the pointer arithmetics for the two dimensional arrays needs to use
the compat pointer size.  Instead of the compat wrappers and the
struct user_arg_ptr machinery just use in_compat_syscall() to do the
right thing for the compat case deep inside get_user_arg_ptr().

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/include/asm/unistd32.h             |   4 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl     |   4 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl     |   4 +-
 arch/parisc/kernel/syscalls/syscall.tbl       |   4 +-
 arch/powerpc/kernel/syscalls/syscall.tbl      |   4 +-
 arch/s390/kernel/syscalls/syscall.tbl         |   4 +-
 arch/sparc/kernel/syscalls.S                  |   4 +-
 arch/x86/entry/syscall_x32.c                  |   7 ++
 arch/x86/entry/syscalls/syscall_32.tbl        |   4 +-
 arch/x86/entry/syscalls/syscall_64.tbl        |   4 +-
 fs/exec.c                                     | 103 ++++--------------
 include/linux/compat.h                        |   7 --
 include/uapi/asm-generic/unistd.h             |   4 +-
 tools/include/uapi/asm-generic/unistd.h       |   4 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl   |   4 +-
 .../perf/arch/s390/entry/syscalls/syscall.tbl |   4 +-
 .../arch/x86/entry/syscalls/syscall_64.tbl    |   4 +-
 17 files changed, 56 insertions(+), 117 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 6d95d0c8bf2f47..141f5d2ff1c34f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -33,7 +33,7 @@ __SYSCALL(__NR_link, sys_link)
 #define __NR_unlink 10
 __SYSCALL(__NR_unlink, sys_unlink)
 #define __NR_execve 11
-__SYSCALL(__NR_execve, compat_sys_execve)
+__SYSCALL(__NR_execve, sys_execve)
 #define __NR_chdir 12
 __SYSCALL(__NR_chdir, sys_chdir)
 			/* 13 was sys_time */
@@ -785,7 +785,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 386
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 387
-__SYSCALL(__NR_execveat, compat_sys_execveat)
+__SYSCALL(__NR_execveat, sys_execveat)
 #define __NR_userfaultfd 388
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 389
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f777141f52568f..e861b5ab7179c9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -64,7 +64,7 @@
 54	n32	getsockopt			compat_sys_getsockopt
 55	n32	clone				__sys_clone
 56	n32	fork				__sys_fork
-57	n32	execve				compat_sys_execve
+57	n32	execve				sys_execve
 58	n32	exit				sys_exit
 59	n32	wait4				compat_sys_wait4
 60	n32	kill				sys_kill
@@ -328,7 +328,7 @@
 317	n32	getrandom			sys_getrandom
 318	n32	memfd_create			sys_memfd_create
 319	n32	bpf				sys_bpf
-320	n32	execveat			compat_sys_execveat
+320	n32	execveat			sys_execveat
 321	n32	userfaultfd			sys_userfaultfd
 322	n32	membarrier			sys_membarrier
 323	n32	mlock2				sys_mlock2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 13280625d312e9..bba80f74e9968e 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -18,7 +18,7 @@
 8	o32	creat				sys_creat
 9	o32	link				sys_link
 10	o32	unlink				sys_unlink
-11	o32	execve				sys_execve			compat_sys_execve
+11	o32	execve				sys_execve
 12	o32	chdir				sys_chdir
 13	o32	time				sys_time32
 14	o32	mknod				sys_mknod
@@ -367,7 +367,7 @@
 353	o32	getrandom			sys_getrandom
 354	o32	memfd_create			sys_memfd_create
 355	o32	bpf				sys_bpf
-356	o32	execveat			sys_execveat			compat_sys_execveat
+356	o32	execveat			sys_execveat
 357	o32	userfaultfd			sys_userfaultfd
 358	o32	membarrier			sys_membarrier
 359	o32	mlock2				sys_mlock2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5a758fa6ec5242..23fa0d0edf3384 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -18,7 +18,7 @@
 8	common	creat			sys_creat
 9	common	link			sys_link
 10	common	unlink			sys_unlink
-11	common	execve			sys_execve			compat_sys_execve
+11	common	execve			sys_execve
 12	common	chdir			sys_chdir
 13	32	time			sys_time32
 13	64	time			sys_time
@@ -385,7 +385,7 @@
 339	common	getrandom		sys_getrandom
 340	common	memfd_create		sys_memfd_create
 341	common	bpf			sys_bpf
-342	common	execveat		sys_execveat			compat_sys_execveat
+342	common	execveat		sys_execveat
 343	common	membarrier		sys_membarrier
 344	common	userfaultfd		sys_userfaultfd
 345	common	mlock2			sys_mlock2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index f833a319082247..c52cdab89dc0ae 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -20,7 +20,7 @@
 8	common	creat				sys_creat
 9	common	link				sys_link
 10	common	unlink				sys_unlink
-11	nospu	execve				sys_execve			compat_sys_execve
+11	nospu	execve				sys_execve
 12	common	chdir				sys_chdir
 13	32	time				sys_time32
 13	64	time				sys_time
@@ -460,7 +460,7 @@
 359	common	getrandom			sys_getrandom
 360	common	memfd_create			sys_memfd_create
 361	common	bpf				sys_bpf
-362	nospu	execveat			sys_execveat			compat_sys_execveat
+362	nospu	execveat			sys_execveat
 363	32	switch_endian			sys_ni_syscall
 363	64	switch_endian			sys_switch_endian
 363	spu	switch_endian			sys_ni_syscall
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bfdcb763395735..bd2275db2026ea 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -18,7 +18,7 @@
 8    common	creat			sys_creat			sys_creat
 9    common	link			sys_link			sys_link
 10   common	unlink			sys_unlink			sys_unlink
-11   common	execve			sys_execve			compat_sys_execve
+11   common	execve			sys_execve			sys_execve
 12   common	chdir			sys_chdir			sys_chdir
 13   32		time			-				sys_time32
 14   common	mknod			sys_mknod			sys_mknod
@@ -361,7 +361,7 @@
 351  common	bpf			sys_bpf				sys_bpf
 352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		sys_s390_pci_mmio_write
 353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		sys_s390_pci_mmio_read
-354  common	execveat		sys_execveat			compat_sys_execveat
+354  common	execveat		sys_execveat			sys_execveat
 355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
 356  common	membarrier		sys_membarrier			sys_membarrier
 357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index db42b4fb370844..70463972152a92 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -16,12 +16,12 @@ sys64_execveat:
 sunos_execv:
 	mov	%g0, %o2
 sys32_execve:
-	set	compat_sys_execve, %g1
+	set	sys_execve, %g1
 	jmpl	%g1, %g0
 	 flushw
 
 sys32_execveat:
-	set	compat_sys_execveat, %g1
+	set	sys_execveat, %g1
 	jmpl	%g1, %g0
 	 flushw
 #endif
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 3d8d70d3896c87..c9f39900a93b96 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,6 +8,13 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
+/*
+ * Reuse the 64-bit entry points for the x32 versions that occupy different
+ * slots in the syscall table.
+ */
+#define __x32_sys_execve	__x64_sys_execve
+#define __x32_sys_execveat	__x64_sys_execveat
+
 #define __SYSCALL_64(nr, sym)
 
 #define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct pt_regs *);
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d8f8a1a69ed11f..2b1eccd3f8f697 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -22,7 +22,7 @@
 8	i386	creat			sys_creat
 9	i386	link			sys_link
 10	i386	unlink			sys_unlink
-11	i386	execve			sys_execve			compat_sys_execve
+11	i386	execve			sys_execve
 12	i386	chdir			sys_chdir
 13	i386	time			sys_time32
 14	i386	mknod			sys_mknod
@@ -369,7 +369,7 @@
 355	i386	getrandom		sys_getrandom
 356	i386	memfd_create		sys_memfd_create
 357	i386	bpf			sys_bpf
-358	i386	execveat		sys_execveat			compat_sys_execveat
+358	i386	execveat		sys_execveat
 359	i386	socket			sys_socket
 360	i386	socketpair		sys_socketpair
 361	i386	bind			sys_bind
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e1370f..cb3fce6ed63ebf 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -375,7 +375,7 @@
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
-520	x32	execve			compat_sys_execve
+520	x32	execve			sys_execve
 521	x32	ptrace			compat_sys_ptrace
 522	x32	rt_sigpending		compat_sys_rt_sigpending
 523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
@@ -400,6 +400,6 @@
 542	x32	getsockopt		compat_sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
-545	x32	execveat		compat_sys_execveat
+545	x32	execveat		sys_execveat
 546	x32	preadv2			compat_sys_preadv64v2
 547	x32	pwritev2		compat_sys_pwritev64v2
diff --git a/fs/exec.c b/fs/exec.c
index 354fdaa536ae7d..4e5db0e35797a5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -386,47 +386,34 @@ static int bprm_mm_init(struct linux_binprm *bprm)
 	return err;
 }
 
-struct user_arg_ptr {
-#ifdef CONFIG_COMPAT
-	bool is_compat;
-#endif
-	union {
-		const char __user *const __user *native;
-#ifdef CONFIG_COMPAT
-		const compat_uptr_t __user *compat;
-#endif
-	} ptr;
-};
-
-static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
+static const char __user *
+get_user_arg_ptr(const char __user *const __user *argv, int nr)
 {
-	const char __user *native;
-
-#ifdef CONFIG_COMPAT
-	if (unlikely(argv.is_compat)) {
+	if (in_compat_syscall()) {
+		const compat_uptr_t __user *compat_argv =
+			compat_ptr((unsigned long)argv);
 		compat_uptr_t compat;
 
-		if (get_user(compat, argv.ptr.compat + nr))
+		if (get_user(compat, compat_argv + nr))
 			return ERR_PTR(-EFAULT);
-
 		return compat_ptr(compat);
-	}
-#endif
-
-	if (get_user(native, argv.ptr.native + nr))
-		return ERR_PTR(-EFAULT);
+	} else {
+		const char __user *native;
 
-	return native;
+		if (get_user(native, argv + nr))
+			return ERR_PTR(-EFAULT);
+		return native;
+	}
 }
 
 /*
  * count() counts the number of strings in array ARGV.
  */
-static int count(struct user_arg_ptr argv, int max)
+static int count(const char __user *const __user *argv, int max)
 {
 	int i = 0;
 
-	if (argv.ptr.native != NULL) {
+	if (argv) {
 		for (;;) {
 			const char __user *p = get_user_arg_ptr(argv, i);
 
@@ -449,7 +436,8 @@ static int count(struct user_arg_ptr argv, int max)
 }
 
 static int prepare_arg_pages(struct linux_binprm *bprm,
-			struct user_arg_ptr argv, struct user_arg_ptr envp)
+		const char __user *const __user *argv,
+		const char __user *const __user *envp)
 {
 	unsigned long limit, ptr_size;
 
@@ -497,7 +485,7 @@ static int prepare_arg_pages(struct linux_binprm *bprm,
  * processes's memory to the new process's stack.  The call to get_user_pages()
  * ensures the destination page is created and not swapped out.
  */
-static int copy_strings(int argc, struct user_arg_ptr argv,
+static int copy_strings(int argc, const char __user *const __user *argv,
 			struct linux_binprm *bprm)
 {
 	struct page *kmapped_page = NULL;
@@ -1815,10 +1803,10 @@ static int exec_binprm(struct linux_binprm *bprm)
 	return 0;
 }
 
-static int __do_execveat(int fd, struct filename *filename,
-			    struct user_arg_ptr argv,
-			    struct user_arg_ptr envp,
-			    int flags, struct file *file)
+int do_execveat(int fd, struct filename *filename,
+		const char __user *const __user *argv,
+		const char __user *const __user *envp,
+		int flags, struct file *file)
 {
 	char *pathbuf = NULL;
 	struct linux_binprm *bprm;
@@ -1969,17 +1957,6 @@ static int __do_execveat(int fd, struct filename *filename,
 	return retval;
 }
 
-int do_execveat(int fd, struct filename *filename,
-		const char __user *const __user *__argv,
-		const char __user *const __user *__envp,
-		int flags, struct file *file)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-
-	return __do_execveat(fd, filename, argv, envp, flags, file);
-}
-
 void set_binfmt(struct linux_binfmt *new)
 {
 	struct mm_struct *mm = current->mm;
@@ -2023,41 +2000,3 @@ SYSCALL_DEFINE5(execveat,
 
 	return do_execveat(fd, name, argv, envp, flags, NULL);
 }
-
-#ifdef CONFIG_COMPAT
-static int do_compat_execve(int fd, struct filename *filename,
-		const compat_uptr_t __user *__argv,
-		const compat_uptr_t __user *__envp,
-		int flags)
-{
-	struct user_arg_ptr argv = {
-		.is_compat = true,
-		.ptr.compat = __argv,
-	};
-	struct user_arg_ptr envp = {
-		.is_compat = true,
-		.ptr.compat = __envp,
-	};
-
-	return __do_execveat(fd, filename, argv, envp, flags, NULL);
-}
-
-COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
-	const compat_uptr_t __user *, argv,
-	const compat_uptr_t __user *, envp)
-{
-	return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 0);
-}
-
-COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
-		       const char __user *, filename,
-		       const compat_uptr_t __user *, argv,
-		       const compat_uptr_t __user *, envp,
-		       int,  flags)
-{
-	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
-	struct filename *name = getname_flags(filename, lookup_flags, NULL);
-
-	return do_compat_execve(fd, name, argv, envp, flags);
-}
-#endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e90100c0de72e4..5e8f6a588e0d43 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -752,10 +752,6 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg,
 asmlinkage long compat_sys_keyctl(u32 option,
 			      u32 arg2, u32 arg3, u32 arg4, u32 arg5);
 
-/* arch/example/kernel/sys_example.c */
-asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
-		     const compat_uptr_t __user *envp);
-
 /* mm/fadvise.c: No generic prototype for fadvise64_64 */
 
 /* mm/, CONFIG_MMU only */
@@ -806,9 +802,6 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 		const struct compat_iovec __user *lvec,
 		compat_ulong_t liovcnt, const struct compat_iovec __user *rvec,
 		compat_ulong_t riovcnt, compat_ulong_t flags);
-asmlinkage long compat_sys_execveat(int dfd, const char __user *filename,
-		     const compat_uptr_t __user *argv,
-		     const compat_uptr_t __user *envp, int flags);
 asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd,
 		const struct compat_iovec __user *vec,
 		compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f4a01305d9a65c..c639d04a094b8b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -640,7 +640,7 @@ __SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
 #define __NR_clone 220
 __SYSCALL(__NR_clone, sys_clone)
 #define __NR_execve 221
-__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
+__SYSCALL(__NR_execve, sys_execve)
 
 #define __NR3264_mmap 222
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
@@ -751,7 +751,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 280
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
-__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+__SYSCALL(__NR_execveat, sys_execveat)
 #define __NR_userfaultfd 282
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 283
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index f4a01305d9a65c..c639d04a094b8b 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -640,7 +640,7 @@ __SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
 #define __NR_clone 220
 __SYSCALL(__NR_clone, sys_clone)
 #define __NR_execve 221
-__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
+__SYSCALL(__NR_execve, sys_execve)
 
 #define __NR3264_mmap 222
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
@@ -751,7 +751,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 280
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
-__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+__SYSCALL(__NR_execveat, sys_execveat)
 #define __NR_userfaultfd 282
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 283
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 35b61bfc1b1ae9..42bf8b461a0ed6 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -18,7 +18,7 @@
 8	common	creat				sys_creat
 9	common	link				sys_link
 10	common	unlink				sys_unlink
-11	nospu	execve				sys_execve			compat_sys_execve
+11	nospu	execve				sys_execve			sys_execve
 12	common	chdir				sys_chdir
 13	32	time				sys_time32
 13	64	time				sys_time
@@ -454,7 +454,7 @@
 359	common	getrandom			sys_getrandom
 360	common	memfd_create			sys_memfd_create
 361	common	bpf				sys_bpf
-362	nospu	execveat			sys_execveat			compat_sys_execveat
+362	nospu	execveat			sys_execveat			sys_execveat
 363	32	switch_endian			sys_ni_syscall
 363	64	switch_endian			ppc_switch_endian
 363	spu	switch_endian			sys_ni_syscall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b38d48464368dc..f3c16f2d9746ac 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -18,7 +18,7 @@
 8    common	creat			sys_creat			compat_sys_creat
 9    common	link			sys_link			compat_sys_link
 10   common	unlink			sys_unlink			compat_sys_unlink
-11   common	execve			sys_execve			compat_sys_execve
+11   common	execve			sys_execve			sys_execve
 12   common	chdir			sys_chdir			compat_sys_chdir
 13   32		time			-				compat_sys_time
 14   common	mknod			sys_mknod			compat_sys_mknod
@@ -361,7 +361,7 @@
 351  common	bpf			sys_bpf				compat_sys_bpf
 352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		compat_sys_s390_pci_mmio_write
 353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		compat_sys_s390_pci_mmio_read
-354  common	execveat		sys_execveat			compat_sys_execveat
+354  common	execveat		sys_execveat			sys_execveat
 355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
 356  common	membarrier		sys_membarrier			sys_membarrier
 357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e1370f..cb3fce6ed63ebf 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -375,7 +375,7 @@
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
-520	x32	execve			compat_sys_execve
+520	x32	execve			sys_execve
 521	x32	ptrace			compat_sys_ptrace
 522	x32	rt_sigpending		compat_sys_rt_sigpending
 523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
@@ -400,6 +400,6 @@
 542	x32	getsockopt		compat_sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
-545	x32	execveat		compat_sys_execveat
+545	x32	execveat		sys_execveat
 546	x32	preadv2			compat_sys_preadv64v2
 547	x32	pwritev2		compat_sys_pwritev64v2
-- 
2.26.2


^ permalink raw reply related

* [PATCH 1/5] exec: cleanup the execve wrappers
From: Christoph Hellwig @ 2020-06-27  7:27 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-arch, linux-s390, linux-parisc, Arnd Bergmann, Brian Gerst,
	x86, linux-mips, linux-kernel, linux-fsdevel, Luis Chamberlain,
	sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20200627072704.2447163-1-hch@lst.de>

Remove a whole bunch of wrappers that eventually all call
__do_execve_file, and consolidate the execvce helpers to:

  (1) __do_execveat, which is the lowest level helper implementing the
      actual functionality
  (2) do_execvat, which is used by all callers that want native
      pointers
  (3) do_compat_execve, which is used by all compat syscalls

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c               | 98 +++++++++++------------------------------
 include/linux/binfmts.h | 12 ++---
 init/main.c             |  7 +--
 kernel/umh.c            | 16 +++----
 4 files changed, 41 insertions(+), 92 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a7032784..354fdaa536ae7d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1815,10 +1815,7 @@ static int exec_binprm(struct linux_binprm *bprm)
 	return 0;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-static int __do_execve_file(int fd, struct filename *filename,
+static int __do_execveat(int fd, struct filename *filename,
 			    struct user_arg_ptr argv,
 			    struct user_arg_ptr envp,
 			    int flags, struct file *file)
@@ -1972,74 +1969,16 @@ static int __do_execve_file(int fd, struct filename *filename,
 	return retval;
 }
 
-static int do_execveat_common(int fd, struct filename *filename,
-			      struct user_arg_ptr argv,
-			      struct user_arg_ptr envp,
-			      int flags)
-{
-	return __do_execve_file(fd, filename, argv, envp, flags, NULL);
-}
-
-int do_execve_file(struct file *file, void *__argv, void *__envp)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-
-	return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
-}
-
-int do_execve(struct filename *filename,
-	const char __user *const __user *__argv,
-	const char __user *const __user *__envp)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
-}
-
 int do_execveat(int fd, struct filename *filename,
 		const char __user *const __user *__argv,
 		const char __user *const __user *__envp,
-		int flags)
+		int flags, struct file *file)
 {
 	struct user_arg_ptr argv = { .ptr.native = __argv };
 	struct user_arg_ptr envp = { .ptr.native = __envp };
 
-	return do_execveat_common(fd, filename, argv, envp, flags);
-}
-
-#ifdef CONFIG_COMPAT
-static int compat_do_execve(struct filename *filename,
-	const compat_uptr_t __user *__argv,
-	const compat_uptr_t __user *__envp)
-{
-	struct user_arg_ptr argv = {
-		.is_compat = true,
-		.ptr.compat = __argv,
-	};
-	struct user_arg_ptr envp = {
-		.is_compat = true,
-		.ptr.compat = __envp,
-	};
-	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
-}
-
-static int compat_do_execveat(int fd, struct filename *filename,
-			      const compat_uptr_t __user *__argv,
-			      const compat_uptr_t __user *__envp,
-			      int flags)
-{
-	struct user_arg_ptr argv = {
-		.is_compat = true,
-		.ptr.compat = __argv,
-	};
-	struct user_arg_ptr envp = {
-		.is_compat = true,
-		.ptr.compat = __envp,
-	};
-	return do_execveat_common(fd, filename, argv, envp, flags);
+	return __do_execveat(fd, filename, argv, envp, flags, file);
 }
-#endif
 
 void set_binfmt(struct linux_binfmt *new)
 {
@@ -2070,7 +2009,7 @@ SYSCALL_DEFINE3(execve,
 		const char __user *const __user *, argv,
 		const char __user *const __user *, envp)
 {
-	return do_execve(getname(filename), argv, envp);
+	return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
 }
 
 SYSCALL_DEFINE5(execveat,
@@ -2080,18 +2019,34 @@ SYSCALL_DEFINE5(execveat,
 		int, flags)
 {
 	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+	struct filename *name = getname_flags(filename, lookup_flags, NULL);
 
-	return do_execveat(fd,
-			   getname_flags(filename, lookup_flags, NULL),
-			   argv, envp, flags);
+	return do_execveat(fd, name, argv, envp, flags, NULL);
 }
 
 #ifdef CONFIG_COMPAT
+static int do_compat_execve(int fd, struct filename *filename,
+		const compat_uptr_t __user *__argv,
+		const compat_uptr_t __user *__envp,
+		int flags)
+{
+	struct user_arg_ptr argv = {
+		.is_compat = true,
+		.ptr.compat = __argv,
+	};
+	struct user_arg_ptr envp = {
+		.is_compat = true,
+		.ptr.compat = __envp,
+	};
+
+	return __do_execveat(fd, filename, argv, envp, flags, NULL);
+}
+
 COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
 	const compat_uptr_t __user *, argv,
 	const compat_uptr_t __user *, envp)
 {
-	return compat_do_execve(getname(filename), argv, envp);
+	return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 0);
 }
 
 COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
@@ -2101,9 +2056,8 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
 		       int,  flags)
 {
 	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+	struct filename *name = getname_flags(filename, lookup_flags, NULL);
 
-	return compat_do_execveat(fd,
-				  getname_flags(filename, lookup_flags, NULL),
-				  argv, envp, flags);
+	return do_compat_execve(fd, name, argv, envp, flags);
 }
 #endif
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 4a20b7517dd036..bed702e4b1fbd9 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -134,13 +134,9 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
-extern int do_execve(struct filename *,
-		     const char __user * const __user *,
-		     const char __user * const __user *);
-extern int do_execveat(int, struct filename *,
-		       const char __user * const __user *,
-		       const char __user * const __user *,
-		       int);
-int do_execve_file(struct file *file, void *__argv, void *__envp);
+int do_execveat(int fd, struct filename *filename,
+		const char __user *const __user *__argv,
+		const char __user *const __user *__envp,
+		int flags, struct file *file);
 
 #endif /* _LINUX_BINFMTS_H */
diff --git a/init/main.c b/init/main.c
index 0ead83e86b5aa2..838950ea7bca22 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1329,9 +1329,10 @@ static int run_init_process(const char *init_filename)
 	pr_debug("  with environment:\n");
 	for (p = envp_init; *p; p++)
 		pr_debug("    %s\n", *p);
-	return do_execve(getname_kernel(init_filename),
-		(const char __user *const __user *)argv_init,
-		(const char __user *const __user *)envp_init);
+	return do_execveat(AT_FDCWD, getname_kernel(init_filename),
+			(const char __user *const __user *)argv_init,
+			(const char __user *const __user *)envp_init,
+			0, NULL);
 }
 
 static int try_to_run_init_process(const char *init_filename)
diff --git a/kernel/umh.c b/kernel/umh.c
index 79f139a7ca03c6..7aa9a5817582ca 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -103,15 +103,13 @@ static int call_usermodehelper_exec_async(void *data)
 	commit_creds(new);
 
 	sub_info->pid = task_pid_nr(current);
-	if (sub_info->file) {
-		retval = do_execve_file(sub_info->file,
-					sub_info->argv, sub_info->envp);
-		if (!retval)
-			current->flags |= PF_UMH;
-	} else
-		retval = do_execve(getname_kernel(sub_info->path),
-				   (const char __user *const __user *)sub_info->argv,
-				   (const char __user *const __user *)sub_info->envp);
+	retval = do_execveat(AT_FDCWD,
+			sub_info->path ? getname_kernel(sub_info->path) : NULL,
+			(const char __user *const __user *)sub_info->argv,
+			(const char __user *const __user *)sub_info->envp,
+			0, sub_info->file);
+	if (sub_info->file && !retval)
+		current->flags |= PF_UMH;
 out:
 	sub_info->retval = retval;
 	/*
-- 
2.26.2


^ permalink raw reply related

* [PATCH 3/5] exec: cleanup the count() function
From: Christoph Hellwig @ 2020-06-27  7:27 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-arch, linux-s390, linux-parisc, Arnd Bergmann, Brian Gerst,
	x86, linux-mips, linux-kernel, linux-fsdevel, Luis Chamberlain,
	sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20200627072704.2447163-1-hch@lst.de>

Remove the max argument as it is hard wired to MAX_ARG_STRINGS, and
give the function a slightly less generic name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 4e5db0e35797a5..a5d91f8b1341d5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -407,9 +407,9 @@ get_user_arg_ptr(const char __user *const __user *argv, int nr)
 }
 
 /*
- * count() counts the number of strings in array ARGV.
+ * count_strings() counts the number of strings in array ARGV.
  */
-static int count(const char __user *const __user *argv, int max)
+static int count_strings(const char __user *const __user *argv)
 {
 	int i = 0;
 
@@ -423,7 +423,7 @@ static int count(const char __user *const __user *argv, int max)
 			if (IS_ERR(p))
 				return -EFAULT;
 
-			if (i >= max)
+			if (i >= MAX_ARG_STRINGS)
 				return -E2BIG;
 			++i;
 
@@ -441,11 +441,11 @@ static int prepare_arg_pages(struct linux_binprm *bprm,
 {
 	unsigned long limit, ptr_size;
 
-	bprm->argc = count(argv, MAX_ARG_STRINGS);
+	bprm->argc = count_strings(argv);
 	if (bprm->argc < 0)
 		return bprm->argc;
 
-	bprm->envc = count(envp, MAX_ARG_STRINGS);
+	bprm->envc = count_strings(envp);
 	if (bprm->envc < 0)
 		return bprm->envc;
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH 5/5] exec: add a kernel_execveat helper
From: Christoph Hellwig @ 2020-06-27  7:27 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-arch, linux-s390, linux-parisc, Arnd Bergmann, Brian Gerst,
	x86, linux-mips, linux-kernel, linux-fsdevel, Luis Chamberlain,
	sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20200627072704.2447163-1-hch@lst.de>

Add a kernel_execveat helper to execute a binary with kernel space argv
and envp pointers.  Switch executing init and user mode helpers to this
new helper instead of relying on the implicit set_fs(KERNEL_DS) for early
init code and kernel threads, and move the getname call into the
do_execve helper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c               | 109 ++++++++++++++++++++++++++++++++--------
 include/linux/binfmts.h |   6 +--
 init/main.c             |   6 +--
 kernel/umh.c            |   8 ++-
 4 files changed, 95 insertions(+), 34 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 34781db6bf6889..7923b8334ae600 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -435,6 +435,21 @@ static int count_strings(const char __user *const __user *argv)
 	return i;
 }
 
+static int count_kernel_strings(const char *const *argv)
+{
+	int i;
+
+	if (!argv)
+		return 0;
+
+	for (i = 0; argv[i]; i++) {
+		if (i >= MAX_ARG_STRINGS)
+			return -E2BIG;
+	}
+
+	return i;
+}
+
 static int check_arg_limit(struct linux_binprm *bprm)
 {
 	unsigned long limit, ptr_size;
@@ -611,6 +626,19 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
 }
 EXPORT_SYMBOL(copy_string_kernel);
 
+static int copy_strings_kernel(int argc, const char *const *argv,
+		struct linux_binprm *bprm)
+{
+	int ret;
+
+	while (argc-- > 0) {
+		ret = copy_string_kernel(argv[argc], bprm);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
 #ifdef CONFIG_MMU
 
 /*
@@ -1793,9 +1821,11 @@ static int exec_binprm(struct linux_binprm *bprm)
 	return 0;
 }
 
-int do_execveat(int fd, struct filename *filename,
+static int __do_execveat(int fd, struct filename *filename,
 		const char __user *const __user *argv,
 		const char __user *const __user *envp,
+		const char *const *kernel_argv,
+		const char *const *kernel_envp,
 		int flags, struct file *file)
 {
 	char *pathbuf = NULL;
@@ -1876,16 +1906,30 @@ int do_execveat(int fd, struct filename *filename,
 	if (retval)
 		goto out_unmark;
 
-	bprm->argc = count_strings(argv);
-	if (bprm->argc < 0) {
-		retval = bprm->argc;
-		goto out;
-	}
+	if (unlikely(kernel_argv)) {
+		bprm->argc = count_kernel_strings(kernel_argv);
+		if (bprm->argc < 0) {
+			retval = bprm->argc;
+			goto out;
+		}
 
-	bprm->envc = count_strings(envp);
-	if (bprm->envc < 0) {
-		retval = bprm->envc;
-		goto out;
+		bprm->envc = count_kernel_strings(kernel_envp);
+		if (bprm->envc < 0) {
+			retval = bprm->envc;
+			goto out;
+		}
+	} else {
+		bprm->argc = count_strings(argv);
+		if (bprm->argc < 0) {
+			retval = bprm->argc;
+			goto out;
+		}
+
+		bprm->envc = count_strings(envp);
+		if (bprm->envc < 0) {
+			retval = bprm->envc;
+			goto out;
+		}
 	}
 
 	retval = check_arg_limit(bprm);
@@ -1902,13 +1946,22 @@ int do_execveat(int fd, struct filename *filename,
 		goto out;
 
 	bprm->exec = bprm->p;
-	retval = copy_strings(bprm->envc, envp, bprm);
-	if (retval < 0)
-		goto out;
 
-	retval = copy_strings(bprm->argc, argv, bprm);
-	if (retval < 0)
-		goto out;
+	if (unlikely(kernel_argv)) {
+		retval = copy_strings_kernel(bprm->envc, kernel_envp, bprm);
+		if (retval < 0)
+			goto out;
+		retval = copy_strings_kernel(bprm->argc, kernel_argv, bprm);
+		if (retval < 0)
+			goto out;
+	} else {
+		retval = copy_strings(bprm->envc, envp, bprm);
+		if (retval < 0)
+			goto out;
+		retval = copy_strings(bprm->argc, argv, bprm);
+		if (retval < 0)
+			goto out;
+	}
 
 	retval = exec_binprm(bprm);
 	if (retval < 0)
@@ -1959,6 +2012,23 @@ int do_execveat(int fd, struct filename *filename,
 	return retval;
 }
 
+static int do_execveat(int fd, const char *filename,
+		       const char __user *const __user *argv,
+		       const char __user *const __user *envp, int flags)
+{
+	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+	struct filename *name = getname_flags(filename, lookup_flags, NULL);
+
+	return __do_execveat(fd, name, argv, envp, NULL, NULL, flags, NULL);
+}
+
+int kernel_execveat(int fd, const char *filename, const char *const *argv,
+		const char *const *envp, int flags, struct file *file)
+{
+	return __do_execveat(fd, getname_kernel(filename), NULL, NULL, argv,
+			     envp, flags, file);
+}
+
 void set_binfmt(struct linux_binfmt *new)
 {
 	struct mm_struct *mm = current->mm;
@@ -1988,7 +2058,7 @@ SYSCALL_DEFINE3(execve,
 		const char __user *const __user *, argv,
 		const char __user *const __user *, envp)
 {
-	return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
+	return do_execveat(AT_FDCWD, filename, argv, envp, 0);
 }
 
 SYSCALL_DEFINE5(execveat,
@@ -1997,8 +2067,5 @@ SYSCALL_DEFINE5(execveat,
 		const char __user *const __user *, envp,
 		int, flags)
 {
-	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
-	struct filename *name = getname_flags(filename, lookup_flags, NULL);
-
-	return do_execveat(fd, name, argv, envp, flags, NULL);
+	return do_execveat(fd, filename, argv, envp, flags);
 }
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index bed702e4b1fbd9..1e61c980c16354 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -134,9 +134,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
-int do_execveat(int fd, struct filename *filename,
-		const char __user *const __user *__argv,
-		const char __user *const __user *__envp,
-		int flags, struct file *file);
+int kernel_execveat(int fd, const char *filename, const char *const *argv,
+		const char *const *envp, int flags, struct file *file);
 
 #endif /* _LINUX_BINFMTS_H */
diff --git a/init/main.c b/init/main.c
index 838950ea7bca22..33de235dc2aa00 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1329,10 +1329,8 @@ static int run_init_process(const char *init_filename)
 	pr_debug("  with environment:\n");
 	for (p = envp_init; *p; p++)
 		pr_debug("    %s\n", *p);
-	return do_execveat(AT_FDCWD, getname_kernel(init_filename),
-			(const char __user *const __user *)argv_init,
-			(const char __user *const __user *)envp_init,
-			0, NULL);
+	return kernel_execveat(AT_FDCWD, init_filename, argv_init, envp_init, 0,
+			       NULL);
 }
 
 static int try_to_run_init_process(const char *init_filename)
diff --git a/kernel/umh.c b/kernel/umh.c
index 7aa9a5817582ca..1284823dbad338 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -103,11 +103,9 @@ static int call_usermodehelper_exec_async(void *data)
 	commit_creds(new);
 
 	sub_info->pid = task_pid_nr(current);
-	retval = do_execveat(AT_FDCWD,
-			sub_info->path ? getname_kernel(sub_info->path) : NULL,
-			(const char __user *const __user *)sub_info->argv,
-			(const char __user *const __user *)sub_info->envp,
-			0, sub_info->file);
+	retval = kernel_execveat(AT_FDCWD, sub_info->path,
+			(const char *const *)sub_info->argv,
+			(const char *const *)sub_info->envp, 0, sub_info->file);
 	if (sub_info->file && !retval)
 		current->flags |= PF_UMH;
 out:
-- 
2.26.2


^ permalink raw reply related

* [PATCH 4/5] exec: split prepare_arg_pages
From: Christoph Hellwig @ 2020-06-27  7:27 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-arch, linux-s390, linux-parisc, Arnd Bergmann, Brian Gerst,
	x86, linux-mips, linux-kernel, linux-fsdevel, Luis Chamberlain,
	sparclinux, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20200627072704.2447163-1-hch@lst.de>

Move counting the arguments and enviroment variables out of
prepare_arg_pages and rename the rest of the function to check_arg_limit.
This prepares for a version of do_execvat that takes kernel pointers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index a5d91f8b1341d5..34781db6bf6889 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -435,20 +435,10 @@ static int count_strings(const char __user *const __user *argv)
 	return i;
 }
 
-static int prepare_arg_pages(struct linux_binprm *bprm,
-		const char __user *const __user *argv,
-		const char __user *const __user *envp)
+static int check_arg_limit(struct linux_binprm *bprm)
 {
 	unsigned long limit, ptr_size;
 
-	bprm->argc = count_strings(argv);
-	if (bprm->argc < 0)
-		return bprm->argc;
-
-	bprm->envc = count_strings(envp);
-	if (bprm->envc < 0)
-		return bprm->envc;
-
 	/*
 	 * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
 	 * (whichever is smaller) for the argv+env strings.
@@ -1886,7 +1876,19 @@ int do_execveat(int fd, struct filename *filename,
 	if (retval)
 		goto out_unmark;
 
-	retval = prepare_arg_pages(bprm, argv, envp);
+	bprm->argc = count_strings(argv);
+	if (bprm->argc < 0) {
+		retval = bprm->argc;
+		goto out;
+	}
+
+	bprm->envc = count_strings(envp);
+	if (bprm->envc < 0) {
+		retval = bprm->envc;
+		goto out;
+	}
+
+	retval = check_arg_limit(bprm);
 	if (retval < 0)
 		goto out;
 
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH V3 0/4] mm/debug_vm_pgtable: Add some more tests
From: Christophe Leroy @ 2020-06-27  8:17 UTC (permalink / raw)
  To: Anshuman Khandual, linux-mm
  Cc: linux-doc, Heiko Carstens, Paul Mackerras, H. Peter Anvin,
	linux-riscv, Will Deacon, linux-arch, linux-s390, Jonathan Corbet,
	x86, Mike Rapoport, Christian Borntraeger, Ingo Molnar,
	linux-arm-kernel, ziy, Catalin Marinas, linux-snps-arc,
	Vasily Gorbik, Borislav Petkov, Paul Walmsley,
	Kirill A . Shutemov, Thomas Gleixner, gerald.schaefer,
	christophe.leroy, Vineet Gupta, linux-kernel, Palmer Dabbelt,
	Andrew Morton, linuxppc-dev
In-Reply-To: <70ddc7dd-b688-b73e-642a-6363178c8cdd@arm.com>



Le 24/06/2020 à 05:13, Anshuman Khandual a écrit :
> 
> 
> On 06/15/2020 09:07 AM, Anshuman Khandual wrote:
>> This series adds some more arch page table helper validation tests which
>> are related to core and advanced memory functions. This also creates a
>> documentation, enlisting expected semantics for all page table helpers as
>> suggested by Mike Rapoport previously (https://lkml.org/lkml/2020/1/30/40).
>>
>> There are many TRANSPARENT_HUGEPAGE and ARCH_HAS_TRANSPARENT_HUGEPAGE_PUD
>> ifdefs scattered across the test. But consolidating all the fallback stubs
>> is not very straight forward because ARCH_HAS_TRANSPARENT_HUGEPAGE_PUD is
>> not explicitly dependent on ARCH_HAS_TRANSPARENT_HUGEPAGE.
>>
>> Tested on arm64, x86 platforms but only build tested on all other enabled
>> platforms through ARCH_HAS_DEBUG_VM_PGTABLE i.e powerpc, arc, s390. The
>> following failure on arm64 still exists which was mentioned previously. It
>> will be fixed with the upcoming THP migration on arm64 enablement series.
>>
>> WARNING .... mm/debug_vm_pgtable.c:860 debug_vm_pgtable+0x940/0xa54
>> WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd))))
>>
>> This series is based on v5.8-rc1.
>>
>> Changes in V3:
>>
>> - Replaced HAVE_ARCH_SOFT_DIRTY with MEM_SOFT_DIRTY
>> - Added HAVE_ARCH_HUGE_VMAP checks in pxx_huge_tests() per Gerald
>> - Updated documentation for pmd_thp_tests() per Zi Yan
>> - Replaced READ_ONCE() with huge_ptep_get() per Gerald
>> - Added pte_mkhuge() and masking with PMD_MASK per Gerald
>> - Replaced pte_same() with holding pfn check in pxx_swap_tests()
>> - Added documentation for all (#ifdef #else #endif) per Gerald
>> - Updated pmd_protnone_tests() per Gerald
>> - Updated HugeTLB PTE creation in hugetlb_advanced_tests() per Gerald
>> - Replaced [pmd|pud]_mknotpresent() with [pmd|pud]_mkinvalid()
>> - Added has_transparent_hugepage() check for PMD and PUD tests
>> - Added a patch which debug prints all individual tests being executed
>> - Updated documentation for renamed [pmd|pud]_mkinvalid() helpers
> 
> Hello Gerald/Christophe/Vineet,
> 
> It would be really great if you could give this series a quick test
> on s390/ppc/arc platforms respectively. Thank you.
> 

Running ok on powerpc 8xx after fixing build failures.

Christophe

^ permalink raw reply

* [GIT PULL] Please pull powerpc/linux.git powerpc-5.8-4 tag
From: Michael Ellerman @ 2020-06-27 12:06 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: aneesh.kumar, linux-kernel, oss, asolokha, harish, linuxppc-dev

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

Hi Linus,

Please pull some more powerpc fixes for 5.8:

The following changes since commit 48778464bb7d346b47157d21ffde2af6b2d39110:

  Linux 5.8-rc2 (2020-06-21 15:45:29 -0700)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.8-4

for you to fetch changes up to 896066aa0685af3434637998b76218c2045142a8:

  selftests/powerpc: Fix build failure in ebb tests (2020-06-26 12:53:09 +1000)

- ------------------------------------------------------------------
powerpc fixes for 5.8 #4

A fix for a crash in nested KVM when CONFIG_DEBUG_VIRTUAL=y.

Two minor build fixes.

Thanks to:
  Aneesh Kumar K.V, Arseny Solokha, Harish.

- ------------------------------------------------------------------
Aneesh Kumar K.V (1):
      powerpc/kvm/book3s64: Fix kernel crash with nested kvm & DEBUG_VIRTUAL

Arseny Solokha (1):
      powerpc/fsl_booke/32: Fix build with CONFIG_RANDOMIZE_BASE

Harish (1):
      selftests/powerpc: Fix build failure in ebb tests


 arch/powerpc/kvm/book3s_64_mmu_radix.c           | 3 ++-
 arch/powerpc/mm/nohash/kaslr_booke.c             | 1 +
 tools/testing/selftests/powerpc/pmu/ebb/Makefile | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)
-----BEGIN PGP SIGNATURE-----

iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAl73NgYACgkQUevqPMjh
pYDCnBAAnKkykVKK2ylVV1wCLp8VmgfXdi3l9Ub+wUKLARGGHXfF0q07UrNJ81QM
4t0c9XaDcuQ33GMqmhY5vP3/egomplFGmPvVMjRFinMgAzuhnDLvqDrnoiXNfOqe
dxPYlX8W9baj1ByJrYD7Z4UR9E2y8aMRcw1ZClJNw3y6WTHoUjMEbVRyhv3OpPrR
e4wwvs+GHhZpDhsC3xqsKYktzOVR0r0OGoJBPVK102bZqutURJmAkXAt5NFzPopi
aYjbtCWqbalbczj+RCob/e+G2nnqnlvtmJ3N2a1hed3kxwjDkQ8rbGbTKLasiD+d
UgUu8mccl7jvU8OylBFgysg/huhMNzT3HExYpCuK8JsHVp87ihR2+1gEYBsRK3Wj
FtH7VtnABslAjxZGg05ZuSIFF1WUVWsMd35qQHn7TpWlTzgpNqyBdtyF8IOpaSFo
yDqzuzR79Bhp6ivZHbe7JMnP2po47Ne6n+v6qK5cV3Myb+elIiLAfEorbyf6OMx2
qotpvMlzD+StI8SE/7ImSVc7Mu3QU7fN6SRqf2rd1XlqSoQwA7DvwsWnbgKz3G8X
mxhme8aw6Hc6bz3NKz7gAWocVURTpS9eE5yCUzqWuv006jDqsGv0WnQIFkoPDOCN
dKHUkqvvKeQLbv7kJzczEyz0AWNAp3aLMtn4Miszr6Oji9ZmBt0=
=Z6l0
-----END PGP SIGNATURE-----

^ permalink raw reply

* Re: [PATCH v2] powerpc: Drop CONFIG_MTD_M25P80 in 85xx-hw.config
From: Bin Meng @ 2020-06-27 12:48 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel; +Cc: Bin Meng
In-Reply-To: <1588394694-517-1-git-send-email-bmeng.cn@gmail.com>

On Sat, May 2, 2020 at 12:45 PM Bin Meng <bmeng.cn@gmail.com> wrote:
>
> From: Bin Meng <bin.meng@windriver.com>
>
> Drop CONFIG_MTD_M25P80 that was removed in
> commit b35b9a10362d ("mtd: spi-nor: Move m25p80 code in spi-nor.c")
>
> Signed-off-by: Bin Meng <bin.meng@windriver.com>
>
> ---
>
> Changes in v2:
> - correct the typo (5xx => 85xx) in the commit title
>
>  arch/powerpc/configs/85xx-hw.config | 1 -
>  1 file changed, 1 deletion(-)
>

It seems this patch isn't applied anywhere. Ping?

^ permalink raw reply

* [PATCH 0/8] mm: cleanup usage of <asm/pgalloc.h>
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Arnd Bergmann, linux-s390, linux-um, Steven Rostedt, linux-m68k,
	openrisc, Andy Lutomirski, Stafford Horne, linux-arm-kernel,
	linux-parisc, linux-mm, linux-mips, linux-alpha, Andrew Morton,
	linuxppc-dev, Mike Rapoport

From: Mike Rapoport <rppt@linux.ibm.com>

Hi,

Most architectures have very similar versions of pXd_alloc_one() and
pXd_free_one() for intermediate levels of page table. 
These patches add generic versions of these functions in
<asm-generic/pgalloc.h> and enable use of the generic functions where
appropriate.

In addition, functions declared and defined in <asm/pgalloc.h> headers
are used mostly by core mm and early mm initialization in arch and there is
no actual reason to have the <asm/pgalloc.h> included all over the place.
The first patch in this series removes unneeded includes of <asm/pgalloc.h>

In the end it didn't work out as neatly as I hoped and moving
pXd_alloc_track() definitions to <asm-generic/pgalloc.h> would require
unnecessary changes to arches that have custom page table allocations, so
I've decided to move lib/ioremap.c to mm/ and make pgalloc-track.h local to
mm/.

Joerg Roedel (1):
  mm: move p?d_alloc_track to separate header file

Mike Rapoport (7):
  mm: remove unneeded includes of <asm/pgalloc.h>
  opeinrisc: switch to generic version of pte allocation
  xtensa: switch to generic version of pte allocation
  asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()
  asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()
  asm-generic: pgalloc: provide generic pgd_free()
  mm: move lib/ioremap.c to mm/

 arch/alpha/include/asm/pgalloc.h             | 21 +----
 arch/alpha/include/asm/tlbflush.h            |  1 -
 arch/alpha/kernel/core_irongate.c            |  1 -
 arch/alpha/kernel/core_marvel.c              |  1 -
 arch/alpha/kernel/core_titan.c               |  1 -
 arch/alpha/kernel/machvec_impl.h             |  2 -
 arch/alpha/kernel/smp.c                      |  1 -
 arch/alpha/mm/numa.c                         |  1 -
 arch/arc/mm/fault.c                          |  1 -
 arch/arc/mm/init.c                           |  1 -
 arch/arm/include/asm/pgalloc.h               | 12 +--
 arch/arm/include/asm/tlb.h                   |  1 -
 arch/arm/kernel/machine_kexec.c              |  1 -
 arch/arm/kernel/smp.c                        |  1 -
 arch/arm/kernel/suspend.c                    |  1 -
 arch/arm/mach-omap2/omap-mpuss-lowpower.c    |  1 -
 arch/arm/mm/hugetlbpage.c                    |  1 -
 arch/arm/mm/mmu.c                            |  1 +
 arch/arm64/include/asm/pgalloc.h             | 39 +---------
 arch/arm64/kernel/smp.c                      |  1 -
 arch/arm64/mm/hugetlbpage.c                  |  1 -
 arch/arm64/mm/ioremap.c                      |  1 -
 arch/arm64/mm/mmu.c                          |  1 +
 arch/csky/include/asm/pgalloc.h              |  7 +-
 arch/csky/kernel/smp.c                       |  1 -
 arch/hexagon/include/asm/pgalloc.h           |  7 +-
 arch/ia64/include/asm/pgalloc.h              | 24 ------
 arch/ia64/include/asm/tlb.h                  |  1 -
 arch/ia64/kernel/process.c                   |  1 -
 arch/ia64/kernel/smp.c                       |  1 -
 arch/ia64/kernel/smpboot.c                   |  1 -
 arch/ia64/mm/contig.c                        |  1 -
 arch/ia64/mm/discontig.c                     |  1 -
 arch/ia64/mm/hugetlbpage.c                   |  1 -
 arch/ia64/mm/tlb.c                           |  1 -
 arch/m68k/include/asm/mmu_context.h          |  2 +-
 arch/m68k/include/asm/sun3_pgalloc.h         |  7 +-
 arch/m68k/kernel/dma.c                       |  2 +-
 arch/m68k/kernel/traps.c                     |  3 +-
 arch/m68k/mm/cache.c                         |  2 +-
 arch/m68k/mm/fault.c                         |  1 -
 arch/m68k/mm/kmap.c                          |  2 +-
 arch/m68k/mm/mcfmmu.c                        |  1 +
 arch/m68k/mm/memory.c                        |  1 -
 arch/m68k/sun3x/dvma.c                       |  2 +-
 arch/microblaze/include/asm/pgalloc.h        |  6 --
 arch/microblaze/include/asm/tlbflush.h       |  1 -
 arch/microblaze/kernel/process.c             |  1 -
 arch/microblaze/kernel/signal.c              |  1 -
 arch/mips/include/asm/pgalloc.h              | 19 +----
 arch/mips/sgi-ip32/ip32-memory.c             |  1 -
 arch/nds32/mm/mm-nds32.c                     |  2 +
 arch/nios2/include/asm/pgalloc.h             |  7 +-
 arch/openrisc/include/asm/pgalloc.h          | 33 +-------
 arch/openrisc/include/asm/tlbflush.h         |  1 -
 arch/openrisc/kernel/or32_ksyms.c            |  1 -
 arch/parisc/include/asm/mmu_context.h        |  1 -
 arch/parisc/include/asm/pgalloc.h            | 12 +--
 arch/parisc/kernel/cache.c                   |  1 -
 arch/parisc/kernel/pci-dma.c                 |  1 -
 arch/parisc/kernel/process.c                 |  1 -
 arch/parisc/kernel/signal.c                  |  1 -
 arch/parisc/kernel/smp.c                     |  1 -
 arch/parisc/mm/hugetlbpage.c                 |  1 -
 arch/parisc/mm/ioremap.c                     |  2 +-
 arch/powerpc/include/asm/tlb.h               |  1 -
 arch/powerpc/mm/book3s64/hash_hugetlbpage.c  |  1 -
 arch/powerpc/mm/book3s64/hash_pgtable.c      |  1 -
 arch/powerpc/mm/book3s64/hash_tlb.c          |  1 -
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c |  1 -
 arch/powerpc/mm/init_32.c                    |  1 -
 arch/powerpc/mm/kasan/8xx.c                  |  1 -
 arch/powerpc/mm/kasan/book3s_32.c            |  1 -
 arch/powerpc/mm/mem.c                        |  1 -
 arch/powerpc/mm/nohash/40x.c                 |  1 -
 arch/powerpc/mm/nohash/8xx.c                 |  1 -
 arch/powerpc/mm/nohash/fsl_booke.c           |  1 -
 arch/powerpc/mm/nohash/kaslr_booke.c         |  1 -
 arch/powerpc/mm/pgtable.c                    |  1 -
 arch/powerpc/mm/pgtable_64.c                 |  1 -
 arch/powerpc/mm/ptdump/hashpagetable.c       |  2 +-
 arch/powerpc/mm/ptdump/ptdump.c              |  1 -
 arch/powerpc/platforms/pseries/cmm.c         |  1 -
 arch/riscv/include/asm/pgalloc.h             | 18 +----
 arch/riscv/mm/fault.c                        |  1 -
 arch/s390/include/asm/tlb.h                  |  1 -
 arch/s390/include/asm/tlbflush.h             |  1 -
 arch/s390/kernel/machine_kexec.c             |  1 -
 arch/s390/kernel/ptrace.c                    |  1 -
 arch/s390/kvm/diag.c                         |  1 -
 arch/s390/kvm/priv.c                         |  1 -
 arch/s390/kvm/pv.c                           |  1 -
 arch/s390/mm/cmm.c                           |  1 -
 arch/s390/mm/mmap.c                          |  1 -
 arch/s390/mm/pgtable.c                       |  1 -
 arch/sh/include/asm/pgalloc.h                |  4 +
 arch/sh/kernel/idle.c                        |  1 -
 arch/sh/kernel/machine_kexec.c               |  1 -
 arch/sh/mm/cache-sh3.c                       |  1 -
 arch/sh/mm/cache-sh7705.c                    |  1 -
 arch/sh/mm/hugetlbpage.c                     |  1 -
 arch/sh/mm/init.c                            |  1 +
 arch/sh/mm/ioremap_fixed.c                   |  1 -
 arch/sh/mm/tlb-sh3.c                         |  1 -
 arch/sparc/include/asm/ide.h                 |  1 -
 arch/sparc/include/asm/tlb_64.h              |  1 -
 arch/sparc/kernel/leon_smp.c                 |  1 -
 arch/sparc/kernel/process_32.c               |  1 -
 arch/sparc/kernel/signal_32.c                |  1 -
 arch/sparc/kernel/smp_32.c                   |  1 -
 arch/sparc/kernel/smp_64.c                   |  1 +
 arch/sparc/kernel/sun4m_irq.c                |  1 -
 arch/sparc/mm/highmem.c                      |  1 -
 arch/sparc/mm/io-unit.c                      |  1 -
 arch/sparc/mm/iommu.c                        |  1 -
 arch/sparc/mm/tlb.c                          |  1 -
 arch/um/include/asm/pgalloc.h                |  9 +--
 arch/um/include/asm/pgtable-3level.h         |  3 -
 arch/um/kernel/mem.c                         | 17 -----
 arch/x86/ia32/ia32_aout.c                    |  1 -
 arch/x86/include/asm/mmu_context.h           |  1 -
 arch/x86/include/asm/pgalloc.h               | 42 +---------
 arch/x86/kernel/alternative.c                |  1 +
 arch/x86/kernel/apic/apic.c                  |  1 -
 arch/x86/kernel/mpparse.c                    |  1 -
 arch/x86/kernel/traps.c                      |  1 -
 arch/x86/mm/fault.c                          |  1 -
 arch/x86/mm/hugetlbpage.c                    |  1 -
 arch/x86/mm/kaslr.c                          |  1 -
 arch/x86/mm/pgtable_32.c                     |  1 -
 arch/x86/mm/pti.c                            |  1 -
 arch/x86/platform/uv/bios_uv.c               |  1 +
 arch/xtensa/include/asm/pgalloc.h            | 40 ++++------
 arch/xtensa/kernel/xtensa_ksyms.c            |  1 -
 arch/xtensa/mm/cache.c                       |  1 -
 arch/xtensa/mm/fault.c                       |  1 -
 drivers/block/xen-blkback/common.h           |  1 -
 drivers/iommu/ipmmu-vmsa.c                   |  1 -
 drivers/xen/balloon.c                        |  1 -
 drivers/xen/privcmd.c                        |  1 -
 fs/binfmt_elf_fdpic.c                        |  1 -
 include/asm-generic/pgalloc.h                | 80 ++++++++++++++++++++
 include/asm-generic/tlb.h                    |  1 -
 include/linux/mm.h                           | 45 -----------
 lib/Makefile                                 |  1 -
 mm/Makefile                                  |  2 +-
 mm/hugetlb.c                                 |  1 +
 {lib => mm}/ioremap.c                        |  2 +
 mm/pgalloc-track.h                           | 51 +++++++++++++
 mm/sparse.c                                  |  1 -
 mm/vmalloc.c                                 |  1 +
 151 files changed, 194 insertions(+), 451 deletions(-)
 rename {lib => mm}/ioremap.c (99%)
 create mode 100644 mm/pgalloc-track.h

-- 
2.26.2


^ permalink raw reply

* [PATCH 1/8] mm: remove unneeded includes of <asm/pgalloc.h>
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Arnd Bergmann, linux-s390, linux-um, Steven Rostedt, linux-m68k,
	openrisc, Andy Lutomirski, Stafford Horne, linux-arm-kernel,
	linux-parisc, linux-mm, linux-mips, linux-alpha, Andrew Morton,
	linuxppc-dev, Mike Rapoport
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

From: Mike Rapoport <rppt@linux.ibm.com>

In the most cases <asm/pgalloc.h> header is required only for allocations
of page table memory. Most of the .c files that include that header do not
use symbols declared in <asm/pgalloc.h> and do not require that header.

As for the other header files that used to include <asm/pgalloc.h>, it is
possible to move that include into the .c file that actually uses symbols
from <asm/pgalloc.h> and drop the include from the header file.

The process was somewhat automated using

	sed -i -E '/[<"]asm\/pgalloc\.h/d' \
                $(grep -L -w -f /tmp/xx \
                        $(git grep -E -l '[<"]asm/pgalloc\.h'))

where /tmp/xx contains all the symbols defined in
arch/*/include/asm/pgalloc.h.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 arch/alpha/include/asm/tlbflush.h            | 1 -
 arch/alpha/kernel/core_irongate.c            | 1 -
 arch/alpha/kernel/core_marvel.c              | 1 -
 arch/alpha/kernel/core_titan.c               | 1 -
 arch/alpha/kernel/machvec_impl.h             | 2 --
 arch/alpha/kernel/smp.c                      | 1 -
 arch/alpha/mm/numa.c                         | 1 -
 arch/arc/mm/fault.c                          | 1 -
 arch/arc/mm/init.c                           | 1 -
 arch/arm/include/asm/tlb.h                   | 1 -
 arch/arm/kernel/machine_kexec.c              | 1 -
 arch/arm/kernel/smp.c                        | 1 -
 arch/arm/kernel/suspend.c                    | 1 -
 arch/arm/mach-omap2/omap-mpuss-lowpower.c    | 1 -
 arch/arm/mm/hugetlbpage.c                    | 1 -
 arch/arm/mm/mmu.c                            | 1 +
 arch/arm64/kernel/smp.c                      | 1 -
 arch/arm64/mm/hugetlbpage.c                  | 1 -
 arch/arm64/mm/ioremap.c                      | 1 -
 arch/arm64/mm/mmu.c                          | 1 +
 arch/csky/kernel/smp.c                       | 1 -
 arch/ia64/include/asm/tlb.h                  | 1 -
 arch/ia64/kernel/process.c                   | 1 -
 arch/ia64/kernel/smp.c                       | 1 -
 arch/ia64/kernel/smpboot.c                   | 1 -
 arch/ia64/mm/contig.c                        | 1 -
 arch/ia64/mm/discontig.c                     | 1 -
 arch/ia64/mm/hugetlbpage.c                   | 1 -
 arch/ia64/mm/tlb.c                           | 1 -
 arch/m68k/include/asm/mmu_context.h          | 2 +-
 arch/m68k/kernel/dma.c                       | 2 +-
 arch/m68k/kernel/traps.c                     | 3 +--
 arch/m68k/mm/cache.c                         | 2 +-
 arch/m68k/mm/fault.c                         | 1 -
 arch/m68k/mm/kmap.c                          | 2 +-
 arch/m68k/mm/mcfmmu.c                        | 1 +
 arch/m68k/mm/memory.c                        | 1 -
 arch/m68k/sun3x/dvma.c                       | 2 +-
 arch/microblaze/include/asm/tlbflush.h       | 1 -
 arch/microblaze/kernel/process.c             | 1 -
 arch/microblaze/kernel/signal.c              | 1 -
 arch/mips/sgi-ip32/ip32-memory.c             | 1 -
 arch/openrisc/include/asm/tlbflush.h         | 1 -
 arch/openrisc/kernel/or32_ksyms.c            | 1 -
 arch/parisc/include/asm/mmu_context.h        | 1 -
 arch/parisc/kernel/cache.c                   | 1 -
 arch/parisc/kernel/pci-dma.c                 | 1 -
 arch/parisc/kernel/process.c                 | 1 -
 arch/parisc/kernel/signal.c                  | 1 -
 arch/parisc/kernel/smp.c                     | 1 -
 arch/parisc/mm/hugetlbpage.c                 | 1 -
 arch/parisc/mm/ioremap.c                     | 2 +-
 arch/powerpc/include/asm/tlb.h               | 1 -
 arch/powerpc/mm/book3s64/hash_hugetlbpage.c  | 1 -
 arch/powerpc/mm/book3s64/hash_pgtable.c      | 1 -
 arch/powerpc/mm/book3s64/hash_tlb.c          | 1 -
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 1 -
 arch/powerpc/mm/init_32.c                    | 1 -
 arch/powerpc/mm/kasan/8xx.c                  | 1 -
 arch/powerpc/mm/kasan/book3s_32.c            | 1 -
 arch/powerpc/mm/mem.c                        | 1 -
 arch/powerpc/mm/nohash/40x.c                 | 1 -
 arch/powerpc/mm/nohash/8xx.c                 | 1 -
 arch/powerpc/mm/nohash/fsl_booke.c           | 1 -
 arch/powerpc/mm/nohash/kaslr_booke.c         | 1 -
 arch/powerpc/mm/pgtable.c                    | 1 -
 arch/powerpc/mm/pgtable_64.c                 | 1 -
 arch/powerpc/mm/ptdump/hashpagetable.c       | 2 +-
 arch/powerpc/mm/ptdump/ptdump.c              | 1 -
 arch/powerpc/platforms/pseries/cmm.c         | 1 -
 arch/riscv/mm/fault.c                        | 1 -
 arch/s390/include/asm/tlb.h                  | 1 -
 arch/s390/include/asm/tlbflush.h             | 1 -
 arch/s390/kernel/machine_kexec.c             | 1 -
 arch/s390/kernel/ptrace.c                    | 1 -
 arch/s390/kvm/diag.c                         | 1 -
 arch/s390/kvm/priv.c                         | 1 -
 arch/s390/kvm/pv.c                           | 1 -
 arch/s390/mm/cmm.c                           | 1 -
 arch/s390/mm/mmap.c                          | 1 -
 arch/s390/mm/pgtable.c                       | 1 -
 arch/sh/kernel/idle.c                        | 1 -
 arch/sh/kernel/machine_kexec.c               | 1 -
 arch/sh/mm/cache-sh3.c                       | 1 -
 arch/sh/mm/cache-sh7705.c                    | 1 -
 arch/sh/mm/hugetlbpage.c                     | 1 -
 arch/sh/mm/init.c                            | 1 +
 arch/sh/mm/ioremap_fixed.c                   | 1 -
 arch/sh/mm/tlb-sh3.c                         | 1 -
 arch/sparc/include/asm/ide.h                 | 1 -
 arch/sparc/include/asm/tlb_64.h              | 1 -
 arch/sparc/kernel/leon_smp.c                 | 1 -
 arch/sparc/kernel/process_32.c               | 1 -
 arch/sparc/kernel/signal_32.c                | 1 -
 arch/sparc/kernel/smp_32.c                   | 1 -
 arch/sparc/kernel/smp_64.c                   | 1 +
 arch/sparc/kernel/sun4m_irq.c                | 1 -
 arch/sparc/mm/highmem.c                      | 1 -
 arch/sparc/mm/io-unit.c                      | 1 -
 arch/sparc/mm/iommu.c                        | 1 -
 arch/sparc/mm/tlb.c                          | 1 -
 arch/x86/ia32/ia32_aout.c                    | 1 -
 arch/x86/include/asm/mmu_context.h           | 1 -
 arch/x86/kernel/alternative.c                | 1 +
 arch/x86/kernel/apic/apic.c                  | 1 -
 arch/x86/kernel/mpparse.c                    | 1 -
 arch/x86/kernel/traps.c                      | 1 -
 arch/x86/mm/fault.c                          | 1 -
 arch/x86/mm/hugetlbpage.c                    | 1 -
 arch/x86/mm/kaslr.c                          | 1 -
 arch/x86/mm/pgtable_32.c                     | 1 -
 arch/x86/mm/pti.c                            | 1 -
 arch/x86/platform/uv/bios_uv.c               | 1 +
 arch/xtensa/kernel/xtensa_ksyms.c            | 1 -
 arch/xtensa/mm/cache.c                       | 1 -
 arch/xtensa/mm/fault.c                       | 1 -
 drivers/block/xen-blkback/common.h           | 1 -
 drivers/iommu/ipmmu-vmsa.c                   | 1 -
 drivers/xen/balloon.c                        | 1 -
 drivers/xen/privcmd.c                        | 1 -
 fs/binfmt_elf_fdpic.c                        | 1 -
 include/asm-generic/tlb.h                    | 1 -
 mm/hugetlb.c                                 | 1 +
 mm/sparse.c                                  | 1 -
 124 files changed, 16 insertions(+), 118 deletions(-)

diff --git a/arch/alpha/include/asm/tlbflush.h b/arch/alpha/include/asm/tlbflush.h
index f8b492408f51..94dc37cf873a 100644
--- a/arch/alpha/include/asm/tlbflush.h
+++ b/arch/alpha/include/asm/tlbflush.h
@@ -5,7 +5,6 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <asm/compiler.h>
-#include <asm/pgalloc.h>
 
 #ifndef __EXTERN_INLINE
 #define __EXTERN_INLINE extern inline
diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
index a9fd133a7fb2..72af1e72d833 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -302,7 +302,6 @@ irongate_init_arch(void)
 #include <linux/agp_backend.h>
 #include <linux/agpgart.h>
 #include <linux/export.h>
-#include <asm/pgalloc.h>
 
 #define GET_PAGE_DIR_OFF(addr) (addr >> 22)
 #define GET_PAGE_DIR_IDX(addr) (GET_PAGE_DIR_OFF(addr))
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 1db9d0eb2922..4c80d992a659 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -23,7 +23,6 @@
 #include <asm/ptrace.h>
 #include <asm/smp.h>
 #include <asm/gct.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/vga.h>
 
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 2a2820fb1be6..77f5d68ed04b 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -20,7 +20,6 @@
 
 #include <asm/ptrace.h>
 #include <asm/smp.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/vga.h>
 
diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h
index 38f045ec5cd2..393d5d6ca5d2 100644
--- a/arch/alpha/kernel/machvec_impl.h
+++ b/arch/alpha/kernel/machvec_impl.h
@@ -7,8 +7,6 @@
  * This file has goodies to help simplify instantiation of machine vectors.
  */
 
-#include <asm/pgalloc.h>
-
 /* Whee.  These systems don't have an HAE:
        IRONGATE, MARVEL, POLARIS, TSUNAMI, TITAN, WILDFIRE
    Fix things up for the GENERIC kernel by defining the HAE address
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 631cc17410d1..f4dd9f3f3001 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -36,7 +36,6 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 5ad6087de1d6..0636e254a22f 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 
 #include <asm/hwrpb.h>
-#include <asm/pgalloc.h>
 #include <asm/sections.h>
 
 pg_data_t node_data[MAX_NUMNODES];
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 72f5405a7ec5..7287c793d1c9 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -13,7 +13,6 @@
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
 #include <linux/mm_types.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu.h>
 
 /*
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index e7bdc2ac1c87..f886ac69d8ad 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/arcregs.h>
 
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 4d4e7b6aabff..9415222b49ad 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -27,7 +27,6 @@
 #else /* !CONFIG_MMU */
 
 #include <linux/swap.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
 static inline void __tlb_remove_table(void *_table)
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 974b6c64d3e6..5d84ad333f05 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -11,7 +11,6 @@
 #include <linux/irq.h>
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/fncpy.h>
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 9a6432557871..5d9da61eff62 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -37,7 +37,6 @@
 #include <asm/idmap.h>
 #include <asm/topology.h>
 #include <asm/mmu_context.h>
-#include <asm/pgalloc.h>
 #include <asm/procinfo.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index d2c9338d74e8..24bd20564be7 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -7,7 +7,6 @@
 #include <asm/bugs.h>
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
-#include <asm/pgalloc.h>
 #include <asm/memory.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index 67fa28532a3a..9fba98c2313a 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -42,7 +42,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/smp_scu.h>
-#include <asm/pgalloc.h>
 #include <asm/suspend.h>
 #include <asm/virt.h>
 #include <asm/hardware/cache-l2x0.h>
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
index a1e5aace897a..dd7a0277c5c0 100644
--- a/arch/arm/mm/hugetlbpage.c
+++ b/arch/arm/mm/hugetlbpage.c
@@ -17,7 +17,6 @@
 #include <asm/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
 
 /*
  * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 628028bfbb92..cecf5a5e0f6f 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -29,6 +29,7 @@
 #include <asm/traps.h>
 #include <asm/procinfo.h>
 #include <asm/memory.h>
+#include <asm/pgalloc.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index e43a8ff19f0f..8059d50bc8cb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -43,7 +43,6 @@
 #include <asm/kvm_mmu.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
-#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/smp_plat.h>
 #include <asm/sections.h>
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 0a52ce46f020..e8a9842157f5 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -17,7 +17,6 @@
 #include <asm/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
 
 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
 bool arch_hugetlb_migration_supported(struct hstate *h)
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 9be71bee902c..b5e83c46b23e 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -16,7 +16,6 @@
 
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
 
 static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
 				      pgprot_t prot, void *caller)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1df25f26571d..cafefb147a5e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -35,6 +35,7 @@
 #include <asm/mmu_context.h>
 #include <asm/ptdump.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 
 #define NO_BLOCK_MAPPINGS	BIT(0)
 #define NO_CONT_MAPPINGS	BIT(1)
diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c
index b5c5bc3afeb5..938910280ddc 100644
--- a/arch/csky/kernel/smp.c
+++ b/arch/csky/kernel/smp.c
@@ -21,7 +21,6 @@
 #include <asm/traps.h>
 #include <asm/sections.h>
 #include <asm/mmu_context.h>
-#include <asm/pgalloc.h>
 #ifdef CONFIG_CPU_HAS_FPU
 #include <abi/fpu.h>
 #endif
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index f1f257d632b3..8d9da6f08a62 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -42,7 +42,6 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 
-#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 96dfb9e4b16f..77e0f6242bf9 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -40,7 +40,6 @@
 #include <asm/elf.h>
 #include <asm/irq.h>
 #include <asm/kexec.h>
-#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/sal.h>
 #include <asm/switch_to.h>
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index bbfd421e6deb..0e2742003121 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -39,7 +39,6 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/sal.h>
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 016683b743c2..c29c600d7967 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -49,7 +49,6 @@
 #include <asm/irq.h>
 #include <asm/mca.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/sal.h>
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index d7d31c718d2d..e30e360beef8 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -21,7 +21,6 @@
 #include <linux/swap.h>
 
 #include <asm/meminit.h>
-#include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/mca.h>
 
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index dd8284bcbf16..51f3de976da7 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -24,7 +24,6 @@
 #include <linux/efi.h>
 #include <linux/nodemask.h>
 #include <linux/slab.h>
-#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/meminit.h>
 #include <asm/numa.h>
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 32352a73df0c..b331f94d20ac 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -18,7 +18,6 @@
 #include <linux/sysctl.h>
 #include <linux/log2.h>
 #include <asm/mman.h>
-#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 72cc568bc841..fdc0b6f75dd5 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -27,7 +27,6 @@
 
 #include <asm/delay.h>
 #include <asm/mmu_context.h>
-#include <asm/pgalloc.h>
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
 #include <asm/dma.h>
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index cac9f289d1f6..993fd7e37069 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -222,7 +222,7 @@ static inline void activate_mm(struct mm_struct *prev_mm,
 
 #include <asm/setup.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
 
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 871a0e11da34..b1ca3522eccc 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -15,7 +15,7 @@
 #include <linux/vmalloc.h>
 #include <linux/export.h>
 
-#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
 
 #if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE)
 void arch_dma_prep_coherent(struct page *page, size_t size)
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index df6fc782754f..546e81935fe8 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -35,10 +35,9 @@
 #include <asm/fpu.h>
 #include <linux/uaccess.h>
 #include <asm/traps.h>
-#include <asm/pgalloc.h>
 #include <asm/machdep.h>
 #include <asm/siginfo.h>
-
+#include <asm/tlbflush.h>
 
 static const char *vec_names[] = {
 	[VEC_RESETSP]	= "RESET SP",
diff --git a/arch/m68k/mm/cache.c b/arch/m68k/mm/cache.c
index 5ecb3310e874..b486c0889eec 100644
--- a/arch/m68k/mm/cache.c
+++ b/arch/m68k/mm/cache.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
 #include <asm/traps.h>
 
 
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index a94a814ad6ad..508abb63da67 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -15,7 +15,6 @@
 
 #include <asm/setup.h>
 #include <asm/traps.h>
-#include <asm/pgalloc.h>
 
 extern void die_if_kernel(char *, struct pt_regs *, long);
 
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 14d31d216cef..1269d513b221 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -19,8 +19,8 @@
 #include <asm/setup.h>
 #include <asm/segment.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/io.h>
+#include <asm/tlbflush.h>
 
 #undef DEBUG
 
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 29f47923aa46..bd01b693f225 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -20,6 +20,7 @@
 #include <asm/mmu_context.h>
 #include <asm/mcf_pgalloc.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 
 #define KMAPAREA(x)	((x >= VMALLOC_START) && (x < KMAP_END))
 
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c
index 65e0c4071912..fe75aecfb238 100644
--- a/arch/m68k/mm/memory.c
+++ b/arch/m68k/mm/memory.c
@@ -17,7 +17,6 @@
 #include <asm/setup.h>
 #include <asm/segment.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/traps.h>
 #include <asm/machdep.h>
 
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index fef52d222d46..08bb92113026 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -22,7 +22,7 @@
 #include <asm/dvma.h>
 #include <asm/io.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
 
 /* IOMMU support */
 
diff --git a/arch/microblaze/include/asm/tlbflush.h b/arch/microblaze/include/asm/tlbflush.h
index 6f8f5c77a050..1200e2bf14bb 100644
--- a/arch/microblaze/include/asm/tlbflush.h
+++ b/arch/microblaze/include/asm/tlbflush.h
@@ -15,7 +15,6 @@
 #include <asm/processor.h>	/* For TASK_SIZE */
 #include <asm/mmu.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 
 extern void _tlbie(unsigned long address);
 extern void _tlbia(void);
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 6527ec22f158..1f7662185d93 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -18,7 +18,6 @@
 #include <linux/tick.h>
 #include <linux/bitops.h>
 #include <linux/ptrace.h>
-#include <asm/pgalloc.h>
 #include <linux/uaccess.h> /* for USER_DS macros */
 #include <asm/cacheflush.h>
 
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index bdd6d0c86e16..65bf5fd8d473 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -35,7 +35,6 @@
 #include <asm/entry.h>
 #include <asm/ucontext.h>
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <linux/syscalls.h>
 #include <asm/cacheflush.h>
 #include <asm/syscalls.h>
diff --git a/arch/mips/sgi-ip32/ip32-memory.c b/arch/mips/sgi-ip32/ip32-memory.c
index be1b2cfc4c3e..62b956cc2d1d 100644
--- a/arch/mips/sgi-ip32/ip32-memory.c
+++ b/arch/mips/sgi-ip32/ip32-memory.c
@@ -14,7 +14,6 @@
 #include <asm/ip32/crime.h>
 #include <asm/bootinfo.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 
 extern void crime_init(void);
 
diff --git a/arch/openrisc/include/asm/tlbflush.h b/arch/openrisc/include/asm/tlbflush.h
index 4a4639c65cbb..185dcd3731ed 100644
--- a/arch/openrisc/include/asm/tlbflush.h
+++ b/arch/openrisc/include/asm/tlbflush.h
@@ -17,7 +17,6 @@
 
 #include <linux/mm.h>
 #include <asm/processor.h>
-#include <asm/pgalloc.h>
 #include <asm/current.h>
 #include <linux/sched.h>
 
diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 277ac7a55752..212e5f85004c 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -26,7 +26,6 @@
 #include <asm/io.h>
 #include <asm/hardirq.h>
 #include <asm/delay.h>
-#include <asm/pgalloc.h>
 
 #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
 
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 07b89c74abeb..cb5f2f730421 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -5,7 +5,6 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/atomic.h>
-#include <asm/pgalloc.h>
 #include <asm-generic/mm_hooks.h>
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 1eedfecc5137..b5e1d9f1b440 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -24,7 +24,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/shmparam.h>
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 70cd24bdcfec..642b3d9c8a88 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -32,7 +32,6 @@
 #include <asm/dma.h>    /* for DMA_CHUNK_SIZE */
 #include <asm/io.h>
 #include <asm/page.h>	/* get_order */
-#include <asm/pgalloc.h>
 #include <linux/uaccess.h>
 #include <asm/tlbflush.h>	/* for purge_tlb_*() macros */
 
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index b7abb12edd3a..c74008821075 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -47,7 +47,6 @@
 #include <asm/assembly.h>
 #include <asm/pdc.h>
 #include <asm/pdc_chassis.h>
-#include <asm/pgalloc.h>
 #include <asm/unwind.h>
 #include <asm/sections.h>
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 02895a8f2c55..5df5d4cd5d4c 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -30,7 +30,6 @@
 #include <asm/ucontext.h>
 #include <asm/rt_sigframe.h>
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/asm-offsets.h>
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index f8a842ddd82d..6271139d2213 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -39,7 +39,6 @@
 #include <asm/irq.h>		/* for CPU_IRQ_REGION and friends */
 #include <asm/mmu_context.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/unistd.h>
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index 0e1e212f1c96..d7ba014a7fbb 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -15,7 +15,6 @@
 #include <linux/sysctl.h>
 
 #include <asm/mman.h>
-#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index 6e7c005aa09b..345ff0b66499 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -11,7 +11,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/io.h>
-#include <asm/pgalloc.h>
+#include <linux/mm.h>
 
 /*
  * Generic mapping function (not visible outside):
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 862985cf5180..fbc6f3002f23 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -12,7 +12,6 @@
 #ifndef __powerpc64__
 #include <linux/pgtable.h>
 #endif
-#include <asm/pgalloc.h>
 #ifndef __powerpc64__
 #include <asm/page.h>
 #include <asm/mmu.h>
diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
index 25acb9c5ee1b..964467b3a776 100644
--- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
@@ -10,7 +10,6 @@
 
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/machdep.h>
 
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 2a99167afbaf..fd9c7f91b092 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -9,7 +9,6 @@
 #include <linux/mm_types.h>
 #include <linux/mm.h>
 
-#include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/mmu.h>
 #include <asm/tlb.h>
diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index 0fbf3dc9f2c2..eb0bccaf221e 100644
--- a/arch/powerpc/mm/book3s64/hash_tlb.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -21,7 +21,6 @@
 #include <linux/mm.h>
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <asm/bug.h>
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index c812b401b66c..cb91071eef52 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -2,7 +2,6 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/security.h>
-#include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/machdep.h>
 #include <asm/mman.h>
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 5a5469eb3174..7ea19dc4883b 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/hugetlb.h>
 
-#include <asm/pgalloc.h>
 #include <asm/prom.h>
 #include <asm/io.h>
 #include <asm/mmu.h>
diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
index 569d98a41881..2784224054f8 100644
--- a/arch/powerpc/mm/kasan/8xx.c
+++ b/arch/powerpc/mm/kasan/8xx.c
@@ -5,7 +5,6 @@
 #include <linux/kasan.h>
 #include <linux/memblock.h>
 #include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
 
 static int __init
 kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block)
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
index a32b4640b9de..202bd260a009 100644
--- a/arch/powerpc/mm/kasan/book3s_32.c
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -4,7 +4,6 @@
 
 #include <linux/kasan.h>
 #include <linux/memblock.h>
-#include <asm/pgalloc.h>
 #include <mm/mmu_decl.h>
 
 int __init kasan_init_region(void *start, size_t size)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c2c11eb8dcfc..ab12916ec1a7 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -34,7 +34,6 @@
 #include <linux/dma-direct.h>
 #include <linux/kprobes.h>
 
-#include <asm/pgalloc.h>
 #include <asm/prom.h>
 #include <asm/io.h>
 #include <asm/mmu_context.h>
diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c
index 13e74bc39ba5..95751c322f6c 100644
--- a/arch/powerpc/mm/nohash/40x.c
+++ b/arch/powerpc/mm/nohash/40x.c
@@ -32,7 +32,6 @@
 #include <linux/highmem.h>
 #include <linux/memblock.h>
 
-#include <asm/pgalloc.h>
 #include <asm/prom.h>
 #include <asm/io.h>
 #include <asm/mmu_context.h>
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 92e8929cbe3e..d2b37146ae6c 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -13,7 +13,6 @@
 #include <asm/fixmap.h>
 #include <asm/code-patching.h>
 #include <asm/inst.h>
-#include <asm/pgalloc.h>
 
 #include <mm/mmu_decl.h>
 
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c
index c06dfbb771f4..0c294827d6e5 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -37,7 +37,6 @@
 #include <linux/highmem.h>
 #include <linux/memblock.h>
 
-#include <asm/pgalloc.h>
 #include <asm/prom.h>
 #include <asm/io.h>
 #include <asm/mmu_context.h>
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 4a75f2d9bf0e..c6ee6e50a7d5 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -14,7 +14,6 @@
 #include <linux/memblock.h>
 #include <linux/libfdt.h>
 #include <linux/crash_core.h>
-#include <asm/pgalloc.h>
 #include <asm/prom.h>
 #include <asm/kdump.h>
 #include <mm/mmu_decl.h>
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 1136257c3a99..9c0547d77af3 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -23,7 +23,6 @@
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <asm/hugetlb.h>
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index bb43a8c04bee..cc6e2f94517f 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -31,7 +31,6 @@
 #include <linux/slab.h>
 #include <linux/hugetlb.h>
 
-#include <asm/pgalloc.h>
 #include <asm/page.h>
 #include <asm/prom.h>
 #include <asm/mmu_context.h>
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index a2c33efc7ce8..ff4b05a9e7f0 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -17,10 +17,10 @@
 #include <linux/seq_file.h>
 #include <linux/const.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/plpar_wrappers.h>
 #include <linux/memblock.h>
 #include <asm/firmware.h>
+#include <asm/pgalloc.h>
 
 struct pg_state {
 	struct seq_file *seq;
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index de6e05ef871c..f7ba13c41d13 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -21,7 +21,6 @@
 #include <asm/fixmap.h>
 #include <linux/const.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/hugetlb.h>
 
 #include <mm/mmu_decl.h>
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 9dba7e880885..45a3a3022a85 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -26,7 +26,6 @@
 #include <asm/firmware.h>
 #include <asm/hvcall.h>
 #include <asm/mmu.h>
-#include <asm/pgalloc.h>
 #include <linux/uaccess.h>
 #include <linux/memory.h>
 #include <asm/plpar_wrappers.h>
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index ae7b7fe24658..5873835a3e6b 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -14,7 +14,6 @@
 #include <linux/signal.h>
 #include <linux/uaccess.h>
 
-#include <asm/pgalloc.h>
 #include <asm/ptrace.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index aa406c05a350..954fa8ca6cbd 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -36,7 +36,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
 #define p4d_free_tlb p4d_free_tlb
 #define pud_free_tlb pud_free_tlb
 
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm-generic/tlb.h>
 
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 2204704840ea..acce6a08a1fa 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -5,7 +5,6 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <asm/processor.h>
-#include <asm/pgalloc.h>
 
 /*
  * Flush all TLB entries on the local CPU.
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 93c6b8932fbd..d91989c7bd6a 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -16,7 +16,6 @@
 #include <linux/debug_locks.h>
 #include <asm/cio.h>
 #include <asm/setup.h>
-#include <asm/pgalloc.h>
 #include <asm/smp.h>
 #include <asm/ipl.h>
 #include <asm/diag.h>
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 3cc15c066298..3c72a3b77253 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -25,7 +25,6 @@
 #include <linux/compat.h>
 #include <trace/syscall.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/switch_to.h>
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 563429dece03..5b8ec1c447e1 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -10,7 +10,6 @@
 
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
-#include <asm/pgalloc.h>
 #include <asm/gmap.h>
 #include <asm/virtio-ccw.h>
 #include "kvm-s390.h"
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 96ae368aa0a2..2f721a923b54 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -22,7 +22,6 @@
 #include <asm/ebcdic.h>
 #include <asm/sysinfo.h>
 #include <asm/page-states.h>
-#include <asm/pgalloc.h>
 #include <asm/gmap.h>
 #include <asm/io.h>
 #include <asm/ptrace.h>
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 63e330109b63..eb99e2f95ebe 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -9,7 +9,6 @@
 #include <linux/kvm_host.h>
 #include <linux/pagemap.h>
 #include <linux/sched/signal.h>
-#include <asm/pgalloc.h>
 #include <asm/gmap.h>
 #include <asm/uv.h>
 #include <asm/mman.h>
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 36bce727897b..04cb0956da8a 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -21,7 +21,6 @@
 #include <linux/oom.h>
 #include <linux/uaccess.h>
 
-#include <asm/pgalloc.h>
 #include <asm/diag.h>
 
 #ifdef CONFIG_CMM_IUCV
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 1b78f630a9ca..e54f928503c5 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -17,7 +17,6 @@
 #include <linux/random.h>
 #include <linux/compat.h>
 #include <linux/security.h>
-#include <asm/pgalloc.h>
 #include <asm/elf.h>
 
 static unsigned long stack_maxrandom_size(void)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 2e0cc19f4cd7..0d25f743b270 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -19,7 +19,6 @@
 #include <linux/ksm.h>
 #include <linux/mman.h>
 
-#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index c20fc5487e05..0dc0f52f9bb8 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -14,7 +14,6 @@
 #include <linux/irqflags.h>
 #include <linux/smp.h>
 #include <linux/atomic.h>
-#include <asm/pgalloc.h>
 #include <asm/smp.h>
 #include <asm/bl_bit.h>
 
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 4a98980b8a07..223c14f44af7 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -14,7 +14,6 @@
 #include <linux/ftrace.h>
 #include <linux/suspend.h>
 #include <linux/memblock.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/io.h>
 #include <asm/cacheflush.h>
diff --git a/arch/sh/mm/cache-sh3.c b/arch/sh/mm/cache-sh3.c
index 26f3bd43e850..bc595982d396 100644
--- a/arch/sh/mm/cache-sh3.c
+++ b/arch/sh/mm/cache-sh3.c
@@ -16,7 +16,6 @@
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 48978293226c..4c67b3d88775 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -20,7 +20,6 @@
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index acd5652a0de3..220d7bc43d2b 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -17,7 +17,6 @@
 #include <linux/sysctl.h>
 
 #include <asm/mman.h>
-#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index a70ba0fdd0b3..a86ce13f392c 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -27,6 +27,7 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/cache.h>
+#include <asm/pgalloc.h>
 #include <linux/sizes.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
index 07e744d75fa0..aab3f82856bb 100644
--- a/arch/sh/mm/ioremap_fixed.c
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -18,7 +18,6 @@
 #include <linux/proc_fs.h>
 #include <asm/fixmap.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/addrspace.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index 869243518bb3..fb400afc2a49 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -21,7 +21,6 @@
 
 #include <asm/io.h>
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/sparc/include/asm/ide.h b/arch/sparc/include/asm/ide.h
index 499aa2e6e276..904cc6cbc155 100644
--- a/arch/sparc/include/asm/ide.h
+++ b/arch/sparc/include/asm/ide.h
@@ -13,7 +13,6 @@
 
 #include <asm/io.h>
 #ifdef CONFIG_SPARC64
-#include <asm/pgalloc.h>
 #include <asm/spitfire.h>
 #include <asm/cacheflush.h>
 #include <asm/page.h>
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index 6820d357581c..e841cae544c2 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -4,7 +4,6 @@
 
 #include <linux/swap.h>
 #include <linux/pagemap.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 41829c024f92..1eed26d423fb 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -38,7 +38,6 @@
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/oplib.h>
 #include <asm/cpudata.h>
 #include <asm/asi.h>
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 13cb5638fab8..18e5b31a30ea 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -34,7 +34,6 @@
 #include <asm/oplib.h>
 #include <linux/uaccess.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/delay.h>
 #include <asm/processor.h>
 #include <asm/psr.h>
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 3b005b6c3e0f..f1f8c8ebe641 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -23,7 +23,6 @@
 
 #include <linux/uaccess.h>
 #include <asm/ptrace.h>
-#include <asm/pgalloc.h>
 #include <asm/cacheflush.h>	/* flush_sig_insns */
 #include <asm/switch_to.h>
 
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 76ce290c67cf..50c127ab46d5 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -29,7 +29,6 @@
 
 #include <asm/irq.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/oplib.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 0085e28bf019..e286e2badc8a 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -47,6 +47,7 @@
 #include <linux/uaccess.h>
 #include <asm/starfire.h>
 #include <asm/tlb.h>
+#include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/mdesc.h>
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
index 91b61f012d19..1079638986b5 100644
--- a/arch/sparc/kernel/sun4m_irq.c
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -16,7 +16,6 @@
 
 #include <asm/timer.h>
 #include <asm/traps.h>
-#include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <asm/cacheflush.h>
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index d1fc9a7b7d78..8f2a2afb048a 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -29,7 +29,6 @@
 
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
 #include <asm/vaddrs.h>
 
 static pte_t *kmap_pte;
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index bfcc04bfce54..430a47a1b6ae 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -15,7 +15,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 
-#include <asm/pgalloc.h>
 #include <asm/io.h>
 #include <asm/io-unit.h>
 #include <asm/mxcc.h>
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 35b002eb312e..3a388b1c5d4b 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -16,7 +16,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 
-#include <asm/pgalloc.h>
 #include <asm/io.h>
 #include <asm/mxcc.h>
 #include <asm/mbus.h>
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index a32a16c18617..20ee14739333 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -10,7 +10,6 @@
 #include <linux/swap.h>
 #include <linux/preempt.h>
 
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 385d3d172ee1..ca8a657edf59 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -30,7 +30,6 @@
 #include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/user32.h>
 #include <asm/ia32.h>
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 47562147e70b..d98016b83755 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -9,7 +9,6 @@
 
 #include <trace/events/tlb.h>
 
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
 #include <asm/debugreg.h>
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 8fd39ff74a49..ef28efb48c85 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/stringify.h>
+#include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/memory.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e0e2f020ec02..ccf726cc87b7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -40,7 +40,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/perf_event.h>
 #include <asm/x86_init.h>
-#include <asm/pgalloc.h>
 #include <linux/atomic.h>
 #include <asm/mpspec.h>
 #include <asm/i8259.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index afac7ccce72f..c27b82b62c8b 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -22,7 +22,6 @@
 #include <asm/irqdomain.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
-#include <asm/pgalloc.h>
 #include <asm/io_apic.h>
 #include <asm/proto.h>
 #include <asm/bios_ebda.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f9727b96961f..8002b8a7b4cb 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -62,7 +62,6 @@
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
-#include <asm/pgalloc.h>
 #include <asm/proto.h>
 #else
 #include <asm/processor-flags.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1ead568c0101..02536b04d9f3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -21,7 +21,6 @@
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
-#include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 #include <asm/fixmap.h>			/* VSYSCALL_ADDR		*/
 #include <asm/vsyscall.h>		/* emulate_vsyscall		*/
 #include <asm/vm86.h>			/* struct vm86			*/
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index cf5781142716..a0d023cb4292 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -17,7 +17,6 @@
 #include <asm/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
 #include <asm/elf.h>
 
 #if 0	/* This is just for testing */
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index fb620fd9dae9..6e6b39710e5f 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -26,7 +26,6 @@
 #include <linux/memblock.h>
 #include <linux/pgtable.h>
 
-#include <asm/pgalloc.h>
 #include <asm/setup.h>
 #include <asm/kaslr.h>
 
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 1953685c2ddf..c234634e26ba 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -11,7 +11,6 @@
 #include <linux/spinlock.h>
 
 #include <asm/cpu_entry_area.h>
-#include <asm/pgalloc.h>
 #include <asm/fixmap.h>
 #include <asm/e820/api.h>
 #include <asm/tlb.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index a8a924b3c335..1aab92930569 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -34,7 +34,6 @@
 #include <asm/vsyscall.h>
 #include <asm/cmdline.h>
 #include <asm/pti.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/desc.h>
 #include <asm/sections.h>
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 4494589a288a..c84f83ed0749 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <asm/efi.h>
 #include <linux/io.h>
+#include <asm/pgalloc.h>
 #include <asm/uv/bios.h>
 #include <asm/uv/uv_hub.h>
 
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 4092555828b1..2975f559272e 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -25,7 +25,6 @@
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/ftrace.h>
 #ifdef CONFIG_BLK_DEV_FD
 #include <asm/floppy.h>
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 2369433b734a..5835406b3cec 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -31,7 +31,6 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 
 /* 
  * Note:
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index c4decc73fd86..c128dcc7c85b 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -20,7 +20,6 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/hardirq.h>
-#include <asm/pgalloc.h>
 
 DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
 void bad_page_fault(struct pt_regs*, unsigned long, int);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index a3eeccf3ac5f..c6ea5d38c509 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -36,7 +36,6 @@
 #include <linux/io.h>
 #include <linux/rbtree.h>
 #include <asm/setup.h>
-#include <asm/pgalloc.h>
 #include <asm/hypervisor.h>
 #include <xen/grant_table.h>
 #include <xen/page.h>
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 4c2972f3153b..6de86e73dfc3 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -28,7 +28,6 @@
 
 #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 #include <asm/dma-iommu.h>
-#include <asm/pgalloc.h>
 #else
 #define arm_iommu_create_mapping(...)	NULL
 #define arm_iommu_attach_device(...)	-ENODEV
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 77c57568e5d7..f5c838a92b01 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -58,7 +58,6 @@
 #include <linux/sysctl.h>
 
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
 #include <asm/xen/hypervisor.h>
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index a250d118144a..5dfc59fd9f16 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -25,7 +25,6 @@
 #include <linux/miscdevice.h>
 #include <linux/moduleparam.h>
 
-#include <asm/pgalloc.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 0f45521b237c..cf306e0798fd 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -38,7 +38,6 @@
 
 #include <linux/uaccess.h>
 #include <asm/param.h>
-#include <asm/pgalloc.h>
 
 typedef char *elf_caddr_t;
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 3f1649a8cf55..36ac65f4744f 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -14,7 +14,6 @@
 #include <linux/mmu_notifier.h>
 #include <linux/swap.h>
 #include <linux/hugetlb_inline.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 57ece74e3aae..3566d369eaee 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,6 +31,7 @@
 #include <linux/cma.h>
 
 #include <asm/page.h>
+#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
 #include <linux/io.h>
diff --git a/mm/sparse.c b/mm/sparse.c
index b2b9a3e34696..97179d27801a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -16,7 +16,6 @@
 
 #include "internal.h"
 #include <asm/dma.h>
-#include <asm/pgalloc.h>
 
 /*
  * Permanent SPARSEMEM data:
-- 
2.26.2


^ permalink raw reply related

* [PATCH 2/8] opeinrisc: switch to generic version of pte allocation
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Arnd Bergmann, linux-s390, linux-um, Steven Rostedt, linux-m68k,
	openrisc, Andy Lutomirski, Stafford Horne, linux-arm-kernel,
	linux-parisc, linux-mm, linux-mips, linux-alpha, Andrew Morton,
	linuxppc-dev, Mike Rapoport
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

From: Mike Rapoport <rppt@linux.ibm.com>

Replace pte_alloc_one(), pte_free() and pte_free_kernel() with the generic
implementation. The only actual functional change is the addition of
__GFP_ACCOUT for the allocation of the user page tables.

The pte_alloc_one_kernel() is kept back because its implementation on
openrisc is different than the generic one.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 arch/openrisc/include/asm/pgalloc.h | 33 +++--------------------------
 1 file changed, 3 insertions(+), 30 deletions(-)

diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index da12a4c38c4b..88820299ecc4 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -20,6 +20,9 @@
 #include <linux/mm.h>
 #include <linux/memblock.h>
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include <asm-generic/pgalloc.h>
+
 extern int mem_init_done;
 
 #define pmd_populate_kernel(mm, pmd, pte) \
@@ -61,38 +64,8 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm)
 }
 #endif
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long)pgd);
-}
-
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL, 0);
-	if (!pte)
-		return NULL;
-	clear_page(page_address(pte));
-	if (!pgtable_pte_page_ctor(pte)) {
-		__free_page(pte);
-		return NULL;
-	}
-	return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-	pgtable_pte_page_dtor(pte);
-	__free_page(pte);
-}
-
 #define __pte_free_tlb(tlb, pte, addr)	\
 do {					\
 	pgtable_pte_page_dtor(pte);	\
-- 
2.26.2


^ permalink raw reply related

* [PATCH 3/8] xtensa: switch to generic version of pte allocation
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Arnd Bergmann, linux-s390, linux-um, Steven Rostedt, linux-m68k,
	openrisc, Andy Lutomirski, Stafford Horne, linux-arm-kernel,
	linux-parisc, linux-mm, linux-mips, linux-alpha, Andrew Morton,
	linuxppc-dev, Mike Rapoport
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

From: Mike Rapoport <rppt@linux.ibm.com>

xtensa clears PTEs during allocation of the page tables and pte_clear()
sets the PTE to a non-zero value. Splitting ptes_clear() helper out of
pte_alloc_one() and pte_alloc_one_kernel() allows reuse of base generic
allocation methods (__pte_alloc_one() and __pte_alloc_one_kernel()) and the
common GFP mask for page table allocations.

The pte_free() and pte_free_kernel() implementations on xtensa are
identical to the generic ones and can be dropped.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 arch/xtensa/include/asm/pgalloc.h | 41 ++++++++++++++-----------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 1d38f0e755ba..60ee94b42850 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -8,9 +8,14 @@
 #ifndef _XTENSA_PGALLOC_H
 #define _XTENSA_PGALLOC_H
 
+#ifdef CONFIG_MMU
 #include <linux/highmem.h>
 #include <linux/slab.h>
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include <asm-generic/pgalloc.h>
+
 /*
  * Allocating and freeing a pmd is trivial: the 1-entry pmd is
  * inside the pgd, so has no extra memory associated with it.
@@ -33,45 +38,37 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
+static inline void ptes_clear(pte_t *ptep)
+{
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		pte_clear(NULL, 0, ptep + i);
+}
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
 	pte_t *ptep;
-	int i;
 
-	ptep = (pte_t *)__get_free_page(GFP_KERNEL);
+	ptep = (pte_t *)__pte_alloc_one_kernel(mm);
 	if (!ptep)
 		return NULL;
-	for (i = 0; i < 1024; i++)
-		pte_clear(NULL, 0, ptep + i);
+	ptes_clear(ptep);
 	return ptep;
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-	pte_t *pte;
 	struct page *page;
 
-	pte = pte_alloc_one_kernel(mm);
-	if (!pte)
-		return NULL;
-	page = virt_to_page(pte);
-	if (!pgtable_pte_page_ctor(page)) {
-		__free_page(page);
+	page = __pte_alloc_one(mm, GFP_PGTABLE_USER);
+	if (!page)
 		return NULL;
-	}
+	ptes_clear(page_address(page));
 	return page;
 }
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-	pgtable_pte_page_dtor(pte);
-	__free_page(pte);
-}
 #define pmd_pgtable(pmd) pmd_page(pmd)
+#endif CONFIG_MMU
 
 #endif /* _XTENSA_PGALLOC_H */
-- 
2.26.2


^ permalink raw reply related

* [PATCH 4/8] asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Arnd Bergmann, linux-s390, linux-um, Steven Rostedt, linux-m68k,
	openrisc, Andy Lutomirski, Stafford Horne, linux-arm-kernel,
	linux-parisc, linux-mm, linux-mips, linux-alpha, Andrew Morton,
	linuxppc-dev, Mike Rapoport
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

From: Mike Rapoport <rppt@linux.ibm.com>

For most architectures that support >2 levels of page tables,
pmd_alloc_one() is a wrapper for __get_free_pages(), sometimes with
__GFP_ZERO and sometimes followed by memset(0) instead.

More elaborate versions on arm64 and x86 account memory for the user page
tables and call to pgtable_pmd_page_ctor() as the part of PMD page
initialization.

Move the arm64 version to include/asm-generic/pgalloc.h and use the generic
version on several architectures.

The pgtable_pmd_page_ctor() is a NOP when ARCH_ENABLE_SPLIT_PMD_PTLOCK is
not enabled, so there is no functional change for most architectures except
of the addition of __GFP_ACCOUNT for allocation of user page tables.

The pmd_free() is a wrapper for free_page() in all the cases, so no
functional change here.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 arch/alpha/include/asm/pgalloc.h     | 15 +---------
 arch/arm/include/asm/pgalloc.h       | 11 -------
 arch/arm64/include/asm/pgalloc.h     | 27 +----------------
 arch/ia64/include/asm/pgalloc.h      | 10 -------
 arch/mips/include/asm/pgalloc.h      |  8 ++----
 arch/parisc/include/asm/pgalloc.h    | 11 ++-----
 arch/riscv/include/asm/pgalloc.h     | 13 +--------
 arch/sh/include/asm/pgalloc.h        |  3 ++
 arch/um/include/asm/pgalloc.h        |  8 +-----
 arch/um/include/asm/pgtable-3level.h |  3 --
 arch/um/kernel/mem.c                 | 12 --------
 arch/x86/include/asm/pgalloc.h       | 26 +----------------
 include/asm-generic/pgalloc.h        | 43 ++++++++++++++++++++++++++++
 13 files changed, 55 insertions(+), 135 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index a1a29f60934c..4834cd52e9d0 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -5,7 +5,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 /*      
  * Allocate and free page tables. The xxx_kernel() versions are
@@ -40,17 +40,4 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 
-static inline pmd_t *
-pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	pmd_t *ret = (pmd_t *)__get_free_page(GFP_PGTABLE_USER);
-	return ret;
-}
-
-static inline void
-pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	free_page((unsigned long)pmd);
-}
-
 #endif /* _ALPHA_PGALLOC_H */
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 069da393110c..c5bdfd404ea5 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -22,17 +22,6 @@
 
 #ifdef CONFIG_ARM_LPAE
 
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	return (pmd_t *)get_zeroed_page(GFP_KERNEL);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-	free_page((unsigned long)pmd);
-}
-
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
 	set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 58e93583ddb6..7246d0a662e1 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -13,37 +13,12 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
 
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	gfp_t gfp = GFP_PGTABLE_USER;
-	struct page *page;
-
-	if (mm == &init_mm)
-		gfp = GFP_PGTABLE_KERNEL;
-
-	page = alloc_page(gfp);
-	if (!page)
-		return NULL;
-	if (!pgtable_pmd_page_ctor(page)) {
-		__free_page(page);
-		return NULL;
-	}
-	return page_address(page);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
-{
-	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
-	pgtable_pmd_page_dtor(virt_to_page(pmdp));
-	free_page((unsigned long)pmdp);
-}
-
 static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
 	set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 2a3050345099..5da1fc76477b 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -59,16 +59,6 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
 	pud_val(*pud_entry) = __pa(pmd);
 }
 
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	return (pmd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	free_page((unsigned long)pmd);
-}
-
 #define __pmd_free_tlb(tlb, pmd, address)	pmd_free((tlb)->mm, pmd)
 
 static inline void
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index fa77cb71f303..eed1b3e8c642 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -13,7 +13,8 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#define __HAVE_ARCH_PMD_ALLOC_ONE
+#include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 	pte_t *pte)
@@ -70,11 +71,6 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 	return pmd;
 }
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	free_pages((unsigned long)pmd, PMD_ORDER);
-}
-
 #define __pmd_free_tlb(tlb, x, addr)	pmd_free((tlb)->mm, x)
 
 #endif
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 9ac74da256b8..689766b914ed 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -10,7 +10,8 @@
 
 #include <asm/cache.h>
 
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#define __HAVE_ARCH_PMD_FREE
+#include <asm-generic/pgalloc.h>
 
 /* Allocate the top level pgd (page directory)
  *
@@ -65,14 +66,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 			(__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)));
 }
 
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER);
-	if (pmd)
-		memset(pmd, 0, PAGE_SIZE<<PMD_ORDER);
-	return pmd;
-}
-
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) {
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 3f601ee8233f..8d3135f05b8e 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -11,7 +11,7 @@
 #include <asm/tlb.h>
 
 #ifdef CONFIG_MMU
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
 	pmd_t *pmd, pte_t *pte)
@@ -62,17 +62,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #ifndef __PAGETABLE_PMD_FOLDED
 
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	return (pmd_t *)__get_free_page(
-		GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	free_page((unsigned long)pmd);
-}
-
 #define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 22d968bfe9bb..59263df76f51 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -3,6 +3,9 @@
 #define __ASM_SH_PGALLOC_H
 
 #include <asm/page.h>
+
+#define __HAVE_ARCH_PMD_ALLOC_ONE
+#define __HAVE_ARCH_PMD_FREE
 #include <asm-generic/pgalloc.h>
 
 extern pgd_t *pgd_alloc(struct mm_struct *);
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 881e76da1938..bdde433dbdec 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -10,7 +10,7 @@
 
 #include <linux/mm.h>
 
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 #define pmd_populate_kernel(mm, pmd, pte) \
 	set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
@@ -34,12 +34,6 @@ do {							\
 } while (0)
 
 #ifdef CONFIG_3_LEVEL_PGTABLES
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	free_page((unsigned long)pmd);
-}
-
 #define __pmd_free_tlb(tlb,x, address)   tlb_remove_page((tlb),virt_to_page(x))
 #endif
 
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 36f452957cef..7e6a4180db9d 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -78,9 +78,6 @@ static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
 #endif
 
-struct mm_struct;
-extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address);
-
 static inline void pud_clear (pud_t *pud)
 {
 	set_pud(pud, __pud(_PAGE_NEWPAGE));
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index c2ff76c8981e..a4accb14cbd5 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -201,18 +201,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-#ifdef CONFIG_3_LEVEL_PGTABLES
-pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
-
-	if (pmd)
-		memset(pmd, 0, PAGE_SIZE);
-
-	return pmd;
-}
-#endif
-
 void *uml_kmalloc(int size, int flags)
 {
 	return kmalloc(size, flags);
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 29aa7859bdee..25feaa117c40 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -7,7 +7,7 @@
 #include <linux/pagemap.h>
 
 #define __HAVE_ARCH_PTE_ALLOC_ONE
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 static inline int  __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
 
@@ -86,30 +86,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 #if CONFIG_PGTABLE_LEVELS > 2
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	struct page *page;
-	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
-
-	if (mm == &init_mm)
-		gfp &= ~__GFP_ACCOUNT;
-	page = alloc_pages(gfp, 0);
-	if (!page)
-		return NULL;
-	if (!pgtable_pmd_page_ctor(page)) {
-		__free_pages(page, 0);
-		return NULL;
-	}
-	return (pmd_t *)page_address(page);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-	pgtable_pmd_page_dtor(virt_to_page(pmd));
-	free_page((unsigned long)pmd);
-}
-
 extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
 
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 73f7421413cb..1bc027891a00 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -102,6 +102,49 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
 	__free_page(pte_page);
 }
 
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+#ifndef __HAVE_ARCH_PMD_ALLOC_ONE
+/**
+ * pmd_alloc_one - allocate a page for PMD-level page table
+ * @mm: the mm_struct of the current context
+ *
+ * Allocates a page and runs the pgtable_pmd_page_ctor().
+ * Allocations use %GFP_PGTABLE_USER in user context and
+ * %GFP_PGTABLE_KERNEL in kernel context.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	struct page *page;
+	gfp_t gfp = GFP_PGTABLE_USER;
+
+	if (mm == &init_mm)
+		gfp = GFP_PGTABLE_KERNEL;
+	page = alloc_pages(gfp, 0);
+	if (!page)
+		return NULL;
+	if (!pgtable_pmd_page_ctor(page)) {
+		__free_pages(page, 0);
+		return NULL;
+	}
+	return (pmd_t *)page_address(page);
+}
+#endif
+
+#ifndef __HAVE_ARCH_PMD_FREE
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+	pgtable_pmd_page_dtor(virt_to_page(pmd));
+	free_page((unsigned long)pmd);
+}
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+
 #endif /* CONFIG_MMU */
 
 #endif /* __ASM_GENERIC_PGALLOC_H */
-- 
2.26.2


^ permalink raw reply related

* [PATCH 5/8] asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Arnd Bergmann, linux-s390, linux-um, Steven Rostedt, linux-m68k,
	openrisc, Andy Lutomirski, Stafford Horne, linux-arm-kernel,
	linux-parisc, linux-mm, linux-mips, linux-alpha, Andrew Morton,
	linuxppc-dev, Mike Rapoport
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

From: Mike Rapoport <rppt@linux.ibm.com>

Several architectures define pud_alloc_one() as a wrapper for
__get_free_page() and pud_free() as a wrapper for free_page().

Provide a generic implementation in asm-generic/pgalloc.h and use it where
appropriate.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 arch/arm64/include/asm/pgalloc.h | 11 -----------
 arch/ia64/include/asm/pgalloc.h  |  9 ---------
 arch/mips/include/asm/pgalloc.h  |  6 +-----
 arch/x86/include/asm/pgalloc.h   | 15 ---------------
 include/asm-generic/pgalloc.h    | 30 ++++++++++++++++++++++++++++++
 5 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 7246d0a662e1..0965945b595d 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -37,17 +37,6 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 
 #if CONFIG_PGTABLE_LEVELS > 3
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
-{
-	BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
-	free_page((unsigned long)pudp);
-}
-
 static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
 {
 	set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 5da1fc76477b..06f80358e20f 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -41,15 +41,6 @@ p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud)
 	p4d_val(*p4d_entry) = __pa(pud);
 }
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-	free_page((unsigned long)pud);
-}
 #define __pud_free_tlb(tlb, pud, address)	pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index eed1b3e8c642..e5a840910ce0 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 
 #define __HAVE_ARCH_PMD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_ALLOC_ONE
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
@@ -87,11 +88,6 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 	return pud;
 }
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-	free_pages((unsigned long)pud, PUD_ORDER);
-}
-
 static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
 	set_p4d(p4d, __p4d((unsigned long)pud));
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 25feaa117c40..3d1085a14347 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -123,21 +123,6 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pu
 	set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
 }
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
-	if (mm == &init_mm)
-		gfp &= ~__GFP_ACCOUNT;
-	return (pud_t *)get_zeroed_page(gfp);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-	free_page((unsigned long)pud);
-}
-
 extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
 
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 1bc027891a00..d361574aaadf 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -145,6 +145,36 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
+#if CONFIG_PGTABLE_LEVELS > 3
+
+#ifndef __HAVE_ARCH_PUD_FREE
+/**
+ * pud_alloc_one - allocate a page for PUD-level page table
+ * @mm: the mm_struct of the current context
+ *
+ * Allocates a page using %GFP_PGTABLE_USER for user context and
+ * %GFP_PGTABLE_KERNEL for kernel context.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	gfp_t gfp = GFP_PGTABLE_USER;
+
+	if (mm == &init_mm)
+		gfp = GFP_PGTABLE_KERNEL;
+	return (pud_t *)get_zeroed_page(gfp);
+}
+#endif
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+	free_page((unsigned long)pud);
+}
+
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+
 #endif /* CONFIG_MMU */
 
 #endif /* __ASM_GENERIC_PGALLOC_H */
-- 
2.26.2


^ permalink raw reply related

* [PATCH 6/8] asm-generic: pgalloc: provide generic pgd_free()
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Arnd Bergmann, linux-s390, linux-um, Steven Rostedt, linux-m68k,
	openrisc, Andy Lutomirski, Stafford Horne, linux-arm-kernel,
	linux-parisc, linux-mm, linux-mips, linux-alpha, Andrew Morton,
	linuxppc-dev, Mike Rapoport
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

From: Mike Rapoport <rppt@linux.ibm.com>

Most architectures define pgd_free() as a wrapper for free_page().

Provide a generic version in asm-generic/pgalloc.h and enable its use for
most architectures.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 arch/alpha/include/asm/pgalloc.h      | 6 ------
 arch/arm/include/asm/pgalloc.h        | 1 +
 arch/arm64/include/asm/pgalloc.h      | 1 +
 arch/csky/include/asm/pgalloc.h       | 7 +------
 arch/hexagon/include/asm/pgalloc.h    | 7 +------
 arch/ia64/include/asm/pgalloc.h       | 5 -----
 arch/m68k/include/asm/sun3_pgalloc.h  | 7 +------
 arch/microblaze/include/asm/pgalloc.h | 6 ------
 arch/mips/include/asm/pgalloc.h       | 5 -----
 arch/nds32/mm/mm-nds32.c              | 2 ++
 arch/nios2/include/asm/pgalloc.h      | 7 +------
 arch/parisc/include/asm/pgalloc.h     | 1 +
 arch/riscv/include/asm/pgalloc.h      | 5 -----
 arch/sh/include/asm/pgalloc.h         | 1 +
 arch/um/include/asm/pgalloc.h         | 1 -
 arch/um/kernel/mem.c                  | 5 -----
 arch/x86/include/asm/pgalloc.h        | 1 +
 arch/xtensa/include/asm/pgalloc.h     | 5 -----
 include/asm-generic/pgalloc.h         | 7 +++++++
 19 files changed, 18 insertions(+), 62 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 4834cd52e9d0..9c6a24fe493d 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -34,10 +34,4 @@ pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
-static inline void
-pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long)pgd);
-}
-
 #endif /* _ALPHA_PGALLOC_H */
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index c5bdfd404ea5..15f4674715f8 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -65,6 +65,7 @@ static inline void clean_pte_table(pte_t *pte)
 
 #define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
 #define __HAVE_ARCH_PTE_ALLOC_ONE
+#define __HAVE_ARCH_PGD_FREE
 #include <asm-generic/pgalloc.h>
 
 static inline pte_t *
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 0965945b595d..3c6a7f5988b1 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -13,6 +13,7 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#define __HAVE_ARCH_PGD_FREE
 #include <asm-generic/pgalloc.h>
 
 #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index c7c1ed27e348..d58d8146b729 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -9,7 +9,7 @@
 #include <linux/sched.h>
 
 #define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 					pte_t *pte)
@@ -42,11 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 	return pte;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_pages((unsigned long)pgd, PGD_ORDER);
-}
-
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *ret;
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index cc9be514a676..f0c47e6a7427 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -11,7 +11,7 @@
 #include <asm/mem-layout.h>
 #include <asm/atomic.h>
 
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 extern unsigned long long kmap_generation;
 
@@ -41,11 +41,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return pgd;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long) pgd);
-}
-
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 				pgtable_t pte)
 {
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 06f80358e20f..9601cfe83c94 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -29,11 +29,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long)pgd);
-}
-
 #if CONFIG_PGTABLE_LEVELS == 4
 static inline void
 p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud)
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 11b95dadf7c0..000f64869b91 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -13,7 +13,7 @@
 
 #include <asm/tlb.h>
 
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 extern const char bad_pmd_string[];
 
@@ -40,11 +40,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
  */
 #define pmd_free(mm, x)			do { } while (0)
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-        free_page((unsigned long) pgd);
-}
-
 static inline pgd_t * pgd_alloc(struct mm_struct *mm)
 {
      pgd_t *new_pgd;
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index ebb6b7939bb8..8839ce00ea05 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -28,12 +28,6 @@ static inline pgd_t *get_pgd(void)
 	return (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
 }
 
-static inline void free_pgd(pgd_t *pgd)
-{
-	free_page((unsigned long)pgd);
-}
-
-#define pgd_free(mm, pgd)	free_pgd(pgd)
 #define pgd_alloc(mm)		get_pgd()
 
 #define pmd_pgtable(pmd)	pmd_page(pmd)
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index e5a840910ce0..8b18424b3120 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -49,11 +49,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 extern void pgd_init(unsigned long page);
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_pages((unsigned long)pgd, PGD_ORDER);
-}
-
 #define __pte_free_tlb(tlb,pte,address)			\
 do {							\
 	pgtable_pte_page_dtor(pte);			\
diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c
index 8503bee882d1..55bec50ccc03 100644
--- a/arch/nds32/mm/mm-nds32.c
+++ b/arch/nds32/mm/mm-nds32.c
@@ -2,6 +2,8 @@
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #include <linux/init_task.h>
+
+#define __HAVE_ARCH_PGD_FREE
 #include <asm/pgalloc.h>
 
 #define FIRST_KERNEL_PGD_NR	(USER_PTRS_PER_PGD)
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 0b146d773c85..e6600d2a5ae0 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -12,7 +12,7 @@
 
 #include <linux/mm.h>
 
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 	pte_t *pte)
@@ -34,11 +34,6 @@ extern void pmd_init(unsigned long page, unsigned long pagetable);
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_pages((unsigned long)pgd, PGD_ORDER);
-}
-
 #define __pte_free_tlb(tlb, pte, addr)				\
 	do {							\
 		pgtable_pte_page_dtor(pte);			\
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 689766b914ed..cc7ecc2ef55d 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -11,6 +11,7 @@
 #include <asm/cache.h>
 
 #define __HAVE_ARCH_PMD_FREE
+#define __HAVE_ARCH_PGD_FREE
 #include <asm-generic/pgalloc.h>
 
 /* Allocate the top level pgd (page directory)
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 8d3135f05b8e..23b1544e0ca5 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -55,11 +55,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return pgd;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long)pgd);
-}
-
 #ifndef __PAGETABLE_PMD_FOLDED
 
 #define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 59263df76f51..3d832472cbab 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -6,6 +6,7 @@
 
 #define __HAVE_ARCH_PMD_ALLOC_ONE
 #define __HAVE_ARCH_PMD_FREE
+#define __HAVE_ARCH_PGD_FREE
 #include <asm-generic/pgalloc.h>
 
 extern pgd_t *pgd_alloc(struct mm_struct *);
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bdde433dbdec..5393e13e07e0 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,7 +25,6 @@
  * Allocate and free page tables.
  */
 extern pgd_t *pgd_alloc(struct mm_struct *);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 #define __pte_free_tlb(tlb,pte, address)		\
 do {							\
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index a4accb14cbd5..9242dc91d751 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -196,11 +196,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	return pgd;
 }
 
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long) pgd);
-}
-
 void *uml_kmalloc(int size, int flags)
 {
 	return kmalloc(size, flags);
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 3d1085a14347..62ad61d6fefc 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -7,6 +7,7 @@
 #include <linux/pagemap.h>
 
 #define __HAVE_ARCH_PTE_ALLOC_ONE
+#define __HAVE_ARCH_PGD_FREE
 #include <asm-generic/pgalloc.h>
 
 static inline int  __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 60ee94b42850..699a8fdf9005 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -33,11 +33,6 @@ pgd_alloc(struct mm_struct *mm)
 	return (pgd_t*) __get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long)pgd);
-}
-
 static inline void ptes_clear(pte_t *ptep)
 {
 	int i;
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index d361574aaadf..6f44810921aa 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -175,6 +175,13 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 
 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
 
+#ifndef __HAVE_ARCH_PGD_FREE
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+#endif
+
 #endif /* CONFIG_MMU */
 
 #endif /* __ASM_GENERIC_PGALLOC_H */
-- 
2.26.2


^ permalink raw reply related

* [PATCH 7/8] mm: move lib/ioremap.c to mm/
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Arnd Bergmann, linux-s390, linux-um, Steven Rostedt, linux-m68k,
	openrisc, Andy Lutomirski, Stafford Horne, linux-arm-kernel,
	linux-parisc, linux-mm, linux-mips, linux-alpha, Andrew Morton,
	linuxppc-dev, Mike Rapoport
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

From: Mike Rapoport <rppt@linux.ibm.com>

The functionality in lib/ioremap.c deals with pagetables, vmalloc and
caches, so it naturally belongs to mm/
Moving it there will also allow declaring p?d_alloc_track functions in an
header file inside mm/ rather than having those declarations in
include/linux/mm.h

Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 lib/Makefile          | 1 -
 mm/Makefile           | 2 +-
 {lib => mm}/ioremap.c | 0
 3 files changed, 1 insertion(+), 2 deletions(-)
 rename {lib => mm}/ioremap.c (100%)

diff --git a/lib/Makefile b/lib/Makefile
index b1c42c10073b..5f9384adde9c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -37,7 +37,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o
 
 lib-$(CONFIG_PRINTK) += dump_stack.o
-lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o klist.o
diff --git a/mm/Makefile b/mm/Makefile
index 6e9d46b2efc9..d5649f1c12c0 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -38,7 +38,7 @@ mmu-y			:= nommu.o
 mmu-$(CONFIG_MMU)	:= highmem.o memory.o mincore.o \
 			   mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
 			   msync.o page_vma_mapped.o pagewalk.o \
-			   pgtable-generic.o rmap.o vmalloc.o
+			   pgtable-generic.o rmap.o vmalloc.o ioremap.o
 
 
 ifdef CONFIG_CROSS_MEMORY_ATTACH
diff --git a/lib/ioremap.c b/mm/ioremap.c
similarity index 100%
rename from lib/ioremap.c
rename to mm/ioremap.c
-- 
2.26.2


^ permalink raw reply related

* [PATCH 8/8] mm: move p?d_alloc_track to separate header file
From: Mike Rapoport @ 2020-06-27 14:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ia64, linux-sh, Peter Zijlstra, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	linux-arch, Stephen Rothwell, linux-hexagon, Joerg Roedel,
	Mike Rapoport, Abdul Haleem, linux-snps-arc, linux-xtensa,
	Joerg Roedel, Arnd Bergmann, linux-s390, linux-um, Steven Rostedt,
	linux-m68k, openrisc, Andy Lutomirski, Stafford Horne,
	linux-arm-kernel, linux-parisc, linux-mm, linux-mips, linux-alpha,
	Andrew Morton, linuxppc-dev, Mike Rapoport
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

From: Joerg Roedel <jroedel@suse.de>

The functions are only used in two source files, both residing in mm/
subdirectory, so there is no need for them to be in the global <linux/mm.h>
header.  Move them to the new mm/pgalloc-track.h header and include it only
where needed.

[rppt: mv include/linux/pgalloc-track.h mm/]

Link: http://lkml.kernel.org/r/20200609120533.25867-1-joro@8bytes.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Cc: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/mm.h | 45 ----------------------------------------
 mm/ioremap.c       |  2 ++
 mm/pgalloc-track.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/vmalloc.c       |  1 +
 4 files changed, 54 insertions(+), 45 deletions(-)
 create mode 100644 mm/pgalloc-track.h

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..5e878a3c7c57 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2093,51 +2093,11 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
 		NULL : pud_offset(p4d, address);
 }
 
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
-				     unsigned long address,
-				     pgtbl_mod_mask *mod_mask)
-
-{
-	if (unlikely(pgd_none(*pgd))) {
-		if (__p4d_alloc(mm, pgd, address))
-			return NULL;
-		*mod_mask |= PGTBL_PGD_MODIFIED;
-	}
-
-	return p4d_offset(pgd, address);
-}
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
-				     unsigned long address,
-				     pgtbl_mod_mask *mod_mask)
-{
-	if (unlikely(p4d_none(*p4d))) {
-		if (__pud_alloc(mm, p4d, address))
-			return NULL;
-		*mod_mask |= PGTBL_P4D_MODIFIED;
-	}
-
-	return pud_offset(p4d, address);
-}
-
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 {
 	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
 		NULL: pmd_offset(pud, address);
 }
-
-static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
-				     unsigned long address,
-				     pgtbl_mod_mask *mod_mask)
-{
-	if (unlikely(pud_none(*pud))) {
-		if (__pmd_alloc(mm, pud, address))
-			return NULL;
-		*mod_mask |= PGTBL_PUD_MODIFIED;
-	}
-
-	return pmd_offset(pud, address);
-}
 #endif /* CONFIG_MMU */
 
 #if USE_SPLIT_PTE_PTLOCKS
@@ -2253,11 +2213,6 @@ static inline void pgtable_pte_page_dtor(struct page *page)
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
-#define pte_alloc_kernel_track(pmd, address, mask)			\
-	((unlikely(pmd_none(*(pmd))) &&					\
-	  (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
-		NULL: pte_offset_kernel(pmd, address))
-
 #if USE_SPLIT_PMD_PTLOCKS
 
 static struct page *pmd_to_page(pmd_t *pmd)
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 5ee3526f71b8..5fa1ab41d152 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -13,6 +13,8 @@
 #include <linux/export.h>
 #include <asm/cacheflush.h>
 
+#include "pgalloc-track.h"
+
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 static int __read_mostly ioremap_p4d_capable;
 static int __read_mostly ioremap_pud_capable;
diff --git a/mm/pgalloc-track.h b/mm/pgalloc-track.h
new file mode 100644
index 000000000000..1dcc865029a2
--- /dev/null
+++ b/mm/pgalloc-track.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PGALLLC_TRACK_H
+#define _LINUX_PGALLLC_TRACK_H
+
+#if defined(CONFIG_MMU)
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+				     unsigned long address,
+				     pgtbl_mod_mask *mod_mask)
+{
+	if (unlikely(pgd_none(*pgd))) {
+		if (__p4d_alloc(mm, pgd, address))
+			return NULL;
+		*mod_mask |= PGTBL_PGD_MODIFIED;
+	}
+
+	return p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+				     unsigned long address,
+				     pgtbl_mod_mask *mod_mask)
+{
+	if (unlikely(p4d_none(*p4d))) {
+		if (__pud_alloc(mm, p4d, address))
+			return NULL;
+		*mod_mask |= PGTBL_P4D_MODIFIED;
+	}
+
+	return pud_offset(p4d, address);
+}
+
+static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
+				     unsigned long address,
+				     pgtbl_mod_mask *mod_mask)
+{
+	if (unlikely(pud_none(*pud))) {
+		if (__pmd_alloc(mm, pud, address))
+			return NULL;
+		*mod_mask |= PGTBL_PUD_MODIFIED;
+	}
+
+	return pmd_offset(pud, address);
+}
+#endif /* CONFIG_MMU */
+
+#define pte_alloc_kernel_track(pmd, address, mask)			\
+	((unlikely(pmd_none(*(pmd))) &&					\
+	  (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
+		NULL: pte_offset_kernel(pmd, address))
+
+#endif /* _LINUX_PGALLLC_TRACK_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5a2b55c8dd9a..5be3cf3b59de 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -41,6 +41,7 @@
 #include <asm/shmparam.h>
 
 #include "internal.h"
+#include "pgalloc-track.h"
 
 bool is_vmalloc_addr(const void *x)
 {
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH] powerpc/pseries: Use doorbells even if XIVE is available
From: Nicholas Piggin @ 2020-06-27 14:59 UTC (permalink / raw)
  To: Cédric Le Goater, linuxppc-dev, Michael Ellerman
  Cc: Anton Blanchard, kvm-ppc, David Gibson
In-Reply-To: <af42c250-cf4b-0815-c91c-9363445383e7@kaod.org>

Excerpts from Cédric Le Goater's message of June 26, 2020 5:17 pm:
> Adding David, 
> 
> On 6/25/20 3:11 AM, Michael Ellerman wrote:
>> Nicholas Piggin <npiggin@gmail.com> writes:
>>> KVM supports msgsndp in guests by trapping and emulating the
>>> instruction, so it was decided to always use XIVE for IPIs if it is
>>> available. However on PowerVM systems, msgsndp can be used and gives
>>> better performance. On large systems, high XIVE interrupt rates can
>>> have sub-linear scaling, and using msgsndp can reduce the load on
>>> the interrupt controller.
>>>
>>> So switch to using core local doorbells even if XIVE is available.
>>> This reduces performance for KVM guests with an SMT topology by
>>> about 50% for ping-pong context switching between SMT vCPUs.
>> 
>> You have to take explicit steps to configure KVM in that way with qemu.
>> eg. "qemu .. -smp 8" will give you 8 SMT1 CPUs by default.
>> 
>>> An option vector (or dt-cpu-ftrs) could be defined to disable msgsndp
>>> to get KVM performance back.
> 
> An option vector would require a PAPR change. Unless the architecture 
> reserves some bits for the implementation, but I don't think so. Same
> for CAS.
> 
>> Qemu/KVM populates /proc/device-tree/hypervisor, so we *could* look at
>> that. Though adding PowerVM/KVM specific hacks is obviously a very
>> slippery slope.
> 
> QEMU could advertise a property "emulated-msgsndp", or something similar, 
> which would be interpreted by Linux as a CPU feature and taken into account 
> when doing the IPIs.

What I'm going to do is detect KVM here (we already have a KVM detection
test using that dt property). The IPI setup code already has KVM hacks 
in it, so I don't really see the problem with puting them behind a KVM
test.

I think doing cpu ftrs or some specific entry for msgsndp in particular
is the right way to go, but in the interests of making existing KVM work
I'll do this.

Thanks,
Nick

^ permalink raw reply

* [PATCH 0/3] powerpc/pseries: IPI doorbell improvements
From: Nicholas Piggin @ 2020-06-27 15:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, kvm-ppc, Anton Blanchard, Cédric Le Goater,
	David Gibson

Thanks for the review, I think I incorporated all your comments, I
also did add KVM detection which avoids introducing a performance
regression.

Thanks,
Nick

Nicholas Piggin (3):
  powerpc: inline doorbell sending functions
  powerpc/pseries: Use doorbells even if XIVE is available
  powerpc/pseries: Add KVM guest doorbell restrictions

 arch/powerpc/include/asm/dbell.h          | 59 +++++++++++++++++++--
 arch/powerpc/include/asm/firmware.h       |  2 +
 arch/powerpc/include/asm/kvm_para.h       | 26 ++--------
 arch/powerpc/kernel/dbell.c               | 55 --------------------
 arch/powerpc/platforms/pseries/firmware.c | 14 +++++
 arch/powerpc/platforms/pseries/smp.c      | 62 ++++++++++++++++-------
 6 files changed, 119 insertions(+), 99 deletions(-)

-- 
2.23.0


^ permalink raw reply

* [PATCH 1/3] powerpc: inline doorbell sending functions
From: Nicholas Piggin @ 2020-06-27 15:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, kvm-ppc, Anton Blanchard, Cédric Le Goater,
	David Gibson
In-Reply-To: <20200627150428.2525192-1-npiggin@gmail.com>

These are only called in one place for a given platform, so inline them
for performance.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/dbell.h | 59 ++++++++++++++++++++++++++++++--
 arch/powerpc/kernel/dbell.c      | 55 -----------------------------
 2 files changed, 56 insertions(+), 58 deletions(-)

diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 4ce6808deed3..5c9625e5070b 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -13,6 +13,7 @@
 
 #include <asm/ppc-opcode.h>
 #include <asm/feature-fixups.h>
+#include <asm/kvm_ppc.h>
 
 #define PPC_DBELL_MSG_BRDCAST	(0x04000000)
 #define PPC_DBELL_TYPE(x)	(((x) & 0xf) << (63-36))
@@ -87,9 +88,6 @@ static inline void ppc_msgsync(void)
 
 #endif /* CONFIG_PPC_BOOK3S */
 
-extern void doorbell_global_ipi(int cpu);
-extern void doorbell_core_ipi(int cpu);
-extern int doorbell_try_core_ipi(int cpu);
 extern void doorbell_exception(struct pt_regs *regs);
 
 static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
@@ -100,4 +98,59 @@ static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
 	_ppc_msgsnd(msg);
 }
 
+/*
+ * Doorbells must only be used if CPU_FTR_DBELL is available.
+ * msgsnd is used in HV, and msgsndp is used in !HV.
+ *
+ * These should be used by platform code that is aware of restrictions.
+ * Other arch code should use ->cause_ipi.
+ *
+ * doorbell_global_ipi() sends a dbell to any target CPU.
+ * Must be used only by architectures that address msgsnd target
+ * by PIR/get_hard_smp_processor_id.
+ */
+static inline void doorbell_global_ipi(int cpu)
+{
+	u32 tag = get_hard_smp_processor_id(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	/* Order previous accesses vs. msgsnd, which is treated as a store */
+	ppc_msgsnd_sync();
+	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
+ * Must be used only by architectures that address msgsnd target
+ * by TIR/cpu_thread_in_core.
+ */
+static inline void doorbell_core_ipi(int cpu)
+{
+	u32 tag = cpu_thread_in_core(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	/* Order previous accesses vs. msgsnd, which is treated as a store */
+	ppc_msgsnd_sync();
+	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * Attempt to cause a core doorbell if destination is on the same core.
+ * Returns 1 on success, 0 on failure.
+ */
+static inline int doorbell_try_core_ipi(int cpu)
+{
+	int this_cpu = get_cpu();
+	int ret = 0;
+
+	if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
+		doorbell_core_ipi(cpu);
+		ret = 1;
+	}
+
+	put_cpu();
+
+	return ret;
+}
+
 #endif /* _ASM_POWERPC_DBELL_H */
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index f17ff1200eaa..52680cf07c9d 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -18,61 +18,6 @@
 
 #ifdef CONFIG_SMP
 
-/*
- * Doorbells must only be used if CPU_FTR_DBELL is available.
- * msgsnd is used in HV, and msgsndp is used in !HV.
- *
- * These should be used by platform code that is aware of restrictions.
- * Other arch code should use ->cause_ipi.
- *
- * doorbell_global_ipi() sends a dbell to any target CPU.
- * Must be used only by architectures that address msgsnd target
- * by PIR/get_hard_smp_processor_id.
- */
-void doorbell_global_ipi(int cpu)
-{
-	u32 tag = get_hard_smp_processor_id(cpu);
-
-	kvmppc_set_host_ipi(cpu);
-	/* Order previous accesses vs. msgsnd, which is treated as a store */
-	ppc_msgsnd_sync();
-	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
-}
-
-/*
- * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
- * Must be used only by architectures that address msgsnd target
- * by TIR/cpu_thread_in_core.
- */
-void doorbell_core_ipi(int cpu)
-{
-	u32 tag = cpu_thread_in_core(cpu);
-
-	kvmppc_set_host_ipi(cpu);
-	/* Order previous accesses vs. msgsnd, which is treated as a store */
-	ppc_msgsnd_sync();
-	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
-}
-
-/*
- * Attempt to cause a core doorbell if destination is on the same core.
- * Returns 1 on success, 0 on failure.
- */
-int doorbell_try_core_ipi(int cpu)
-{
-	int this_cpu = get_cpu();
-	int ret = 0;
-
-	if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
-		doorbell_core_ipi(cpu);
-		ret = 1;
-	}
-
-	put_cpu();
-
-	return ret;
-}
-
 void doorbell_exception(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-- 
2.23.0


^ permalink raw reply related

* [PATCH 2/3] powerpc/pseries: Use doorbells even if XIVE is available
From: Nicholas Piggin @ 2020-06-27 15:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, kvm-ppc, Anton Blanchard, Cédric Le Goater,
	David Gibson
In-Reply-To: <20200627150428.2525192-1-npiggin@gmail.com>

KVM supports msgsndp in guests by trapping and emulating the
instruction, so it was decided to always use XIVE for IPIs if it is
available. However on PowerVM systems, msgsndp can be used and gives
better performance. On large systems, high XIVE interrupt rates can
have sub-linear scaling, and using msgsndp can reduce the load on
the interrupt controller.

So switch to using core local doorbells even if XIVE is available.
This reduces performance for KVM guests with an SMT topology by
about 50% for ping-pong context switching between SMT vCPUs. An
option vector (or dt-cpu-ftrs) could be defined to disable msgsndp
to get KVM performance back.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/platforms/pseries/smp.c | 54 ++++++++++++++++++----------
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 6891710833be..67e6ad5076ce 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -188,13 +188,16 @@ static int pseries_smp_prepare_cpu(int cpu)
 	return 0;
 }
 
-static void smp_pseries_cause_ipi(int cpu)
+/* Cause IPI as setup by the interrupt controller (xics or xive) */
+static void (*ic_cause_ipi)(int cpu) __ro_after_init;
+
+/* Use msgsndp doorbells target is a sibling, else use interrupt controller */
+static void dbell_or_ic_cause_ipi(int cpu)
 {
-	/* POWER9 should not use this handler */
 	if (doorbell_try_core_ipi(cpu))
 		return;
 
-	icp_ops->cause_ipi(cpu);
+	ic_cause_ipi(cpu);
 }
 
 static int pseries_cause_nmi_ipi(int cpu)
@@ -218,26 +221,41 @@ static int pseries_cause_nmi_ipi(int cpu)
 	return 0;
 }
 
-static __init void pSeries_smp_probe_xics(void)
-{
-	xics_smp_probe();
-
-	if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest())
-		smp_ops->cause_ipi = smp_pseries_cause_ipi;
-	else
-		smp_ops->cause_ipi = icp_ops->cause_ipi;
-}
-
 static __init void pSeries_smp_probe(void)
 {
 	if (xive_enabled())
-		/*
-		 * Don't use P9 doorbells when XIVE is enabled. IPIs
-		 * using MMIOs should be faster
-		 */
 		xive_smp_probe();
 	else
-		pSeries_smp_probe_xics();
+		xics_smp_probe();
+
+	/* No doorbell facility, must use the interrupt controller for IPIs */
+	if (!cpu_has_feature(CPU_FTR_DBELL))
+		return;
+
+	/* Doorbells can only be used for IPIs between SMT siblings */
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
+	/*
+	 * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp faults
+	 * to the hypervisor which then reads the instruction from guest
+	 * memory. This can't be done if the guest is secure, so don't use
+	 * doorbells in secure guests.
+	 *
+	 * Under PowerVM, FSCR[MSGP] is enabled so doorbells could be used
+	 * by secure guests if we distinguished this from KVM.
+	 */
+	if (is_secure_guest())
+		return;
+
+	/*
+	 * The guest can use doobells for SMT sibling IPIs, which stay in
+	 * the core rather than going to the interrupt controller. This
+	 * tends to be slower under KVM where doorbells are emulated, but
+	 * faster for PowerVM where they're enabled.
+	 */
+	ic_cause_ipi = smp_ops->cause_ipi;
+	smp_ops->cause_ipi = dbell_or_ic_cause_ipi;
 }
 
 static struct smp_ops_t pseries_smp_ops = {
-- 
2.23.0


^ permalink raw reply related

* [PATCH 3/3] powerpc/pseries: Add KVM guest doorbell restrictions
From: Nicholas Piggin @ 2020-06-27 15:04 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, kvm-ppc, Anton Blanchard, Cédric Le Goater,
	David Gibson
In-Reply-To: <20200627150428.2525192-1-npiggin@gmail.com>

KVM guests have certain restrictions and performance quirks when
using doorbells. This patch tests for KVM environment in doorbell
setup, and optimises IPI performance:

 - PowerVM guests may now use doorbells even if they are secure.

 - KVM guests no longer use doorbells if XIVE is available.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/firmware.h       |  2 ++
 arch/powerpc/include/asm/kvm_para.h       | 26 ++--------------
 arch/powerpc/platforms/pseries/firmware.c | 14 +++++++++
 arch/powerpc/platforms/pseries/smp.c      | 38 ++++++++++++++---------
 4 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 6003c2e533a0..4dadb84ff2b2 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -134,7 +134,9 @@ extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup;
 
 #ifdef CONFIG_PPC_PSERIES
 void pseries_probe_fw_features(void);
+bool is_kvm_guest(void);
 #else
+static inline bool is_kvm_guest(void) { return false; }
 static inline void pseries_probe_fw_features(void) { };
 #endif
 
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 9c1f6b4b9bbf..744612054c94 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -8,35 +8,15 @@
 #ifndef __POWERPC_KVM_PARA_H__
 #define __POWERPC_KVM_PARA_H__
 
-#include <uapi/asm/kvm_para.h>
-
-#ifdef CONFIG_KVM_GUEST
-
-#include <linux/of.h>
-
-static inline int kvm_para_available(void)
-{
-	struct device_node *hyper_node;
-
-	hyper_node = of_find_node_by_path("/hypervisor");
-	if (!hyper_node)
-		return 0;
+#include <asm/firmware.h>
 
-	if (!of_device_is_compatible(hyper_node, "linux,kvm"))
-		return 0;
-
-	return 1;
-}
-
-#else
+#include <uapi/asm/kvm_para.h>
 
 static inline int kvm_para_available(void)
 {
-	return 0;
+	return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
 }
 
-#endif
-
 static inline unsigned int kvm_arch_para_features(void)
 {
 	unsigned long r;
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 3e49cc23a97a..f58eb10011dd 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -184,3 +184,17 @@ void __init pseries_probe_fw_features(void)
 {
 	of_scan_flat_dt(probe_fw_features, NULL);
 }
+
+bool is_kvm_guest(void)
+{
+	struct device_node *hyper_node;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	if (!hyper_node)
+		return 0;
+
+	if (!of_device_is_compatible(hyper_node, "linux,kvm"))
+		return 0;
+
+	return 1;
+}
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 67e6ad5076ce..7af0003b40b6 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -236,24 +236,32 @@ static __init void pSeries_smp_probe(void)
 	if (!cpu_has_feature(CPU_FTR_SMT))
 		return;
 
-	/*
-	 * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp faults
-	 * to the hypervisor which then reads the instruction from guest
-	 * memory. This can't be done if the guest is secure, so don't use
-	 * doorbells in secure guests.
-	 *
-	 * Under PowerVM, FSCR[MSGP] is enabled so doorbells could be used
-	 * by secure guests if we distinguished this from KVM.
-	 */
-	if (is_secure_guest())
-		return;
+	if (is_kvm_guest()) {
+		/*
+		 * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
+		 * faults to the hypervisor which then reads the instruction
+		 * from guest memory, which tends to be slower than using XIVE.
+		 */
+		if (xive_enabled())
+			return;
+
+		/*
+		 * XICS hcalls aren't as fast, so we can use msgsndp (which
+		 * also helps exercise KVM emulation), however KVM can't
+		 * emulate secure guests because it can't read the instruction
+		 * out of their memory.
+		 */
+		if (is_secure_guest())
+			return;
+	}
 
 	/*
-	 * The guest can use doobells for SMT sibling IPIs, which stay in
-	 * the core rather than going to the interrupt controller. This
-	 * tends to be slower under KVM where doorbells are emulated, but
-	 * faster for PowerVM where they're enabled.
+	 * Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are
+	 * gang scheduled on the same physical core, so doorbells are always
+	 * faster than the interrupt controller, and they can be used by
+	 * secure guests.
 	 */
+
 	ic_cause_ipi = smp_ops->cause_ipi;
 	smp_ops->cause_ipi = dbell_or_ic_cause_ipi;
 }
-- 
2.23.0


^ permalink raw reply related

* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.8-4 tag
From: pr-tracker-bot @ 2020-06-27 16:00 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: aneesh.kumar, linux-kernel, oss, Linus Torvalds, asolokha, harish,
	linuxppc-dev
In-Reply-To: <87h7uwbtn3.fsf@mpe.ellerman.id.au>

The pull request you sent on Sat, 27 Jun 2020 22:06:08 +1000:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.8-4

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/21d2f6850c09fdec730c11d35406da1dc541432d

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox