linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: "Liang, Kan" <kan.liang@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	mingo@kernel.org, acme@kernel.org,  mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@redhat.com,
	eranian@google.com
Cc: linux-arch@vger.kernel.org, ak@linux.intel.com,
	catalin.marinas@arm.com, linuxppc-dev@lists.ozlabs.org,
	willy@infradead.org, linux-kernel@vger.kernel.org,
	dave.hansen@intel.com, npiggin@gmail.com,
	aneesh.kumar@linux.ibm.com, sparclinux@vger.kernel.org,
	will@kernel.org, davem@davemloft.net,
	kirill.shutemov@linux.intel.com
Subject: Re: [PATCH 3/5] perf/core: Fix arch_perf_get_page_size()
Date: Mon, 16 Nov 2020 08:11:44 -0500	[thread overview]
Message-ID: <bbef8555-53d0-d4ce-c20d-ee64dfb9d90b@linux.intel.com> (raw)
In-Reply-To: <20201113113426.526012343@infradead.org>



On 11/13/2020 6:19 AM, Peter Zijlstra wrote:
> The (new) page-table walker in arch_perf_get_page_size() is broken in
> various ways. Specifically while it is used in a locless manner, it
> doesn't depend on CONFIG_HAVE_FAST_GUP nor uses the proper _lockless
> offset methods, nor is careful to only read each entry only once.
> 
> Also the hugetlb support is broken due to calling pte_page() without
> first checking pte_special().
> 
> Rewrite the whole thing to be a proper lockless page-table walker and
> employ the new pXX_leaf_size() pgtable functions to determine the TLB
> size without looking at the page-frames.
> 
> Fixes: 51b646b2d9f8 ("perf,mm: Handle non-page-table-aligned hugetlbfs")
> Fixes: 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE")

The issue 
(https://lkml.kernel.org/r/8e88ba79-7c40-ea32-a7ed-bdc4fc04b2af@linux.intel.com) 
has been fixed by this patch set.

Tested-by: Kan Liang <kan.liang@linux.intel.com>


> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>   arch/arm64/include/asm/pgtable.h    |    3 +
>   arch/sparc/include/asm/pgtable_64.h |   13 ++++
>   arch/sparc/mm/hugetlbpage.c         |   19 ++++--
>   include/linux/pgtable.h             |   16 +++++
>   kernel/events/core.c                |  102 +++++++++++++-----------------------
>   5 files changed, 82 insertions(+), 71 deletions(-)
> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7001,90 +7001,62 @@ static u64 perf_virt_to_phys(u64 virt)
>   	return phys_addr;
>   }
>   
> -#ifdef CONFIG_MMU
> -
>   /*
> - * Return the MMU page size of a given virtual address.
> - *
> - * This generic implementation handles page-table aligned huge pages, as well
> - * as non-page-table aligned hugetlbfs compound pages.
> - *
> - * If an architecture supports and uses non-page-table aligned pages in their
> - * kernel mapping it will need to provide it's own implementation of this
> - * function.
> + * Return the MMU/TLB page size of a given virtual address.
>    */
> -__weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
> +static u64 perf_get_tlb_page_size(struct mm_struct *mm, unsigned long addr)
>   {
> -	struct page *page;
> -	pgd_t *pgd;
> -	p4d_t *p4d;
> -	pud_t *pud;
> -	pmd_t *pmd;
> -	pte_t *pte;
> +	u64 size = 0;
>   
> -	pgd = pgd_offset(mm, addr);
> -	if (pgd_none(*pgd))
> -		return 0;
> +#ifdef CONFIG_HAVE_FAST_GUP
> +	pgd_t *pgdp, pgd;
> +	p4d_t *p4dp, p4d;
> +	pud_t *pudp, pud;
> +	pmd_t *pmdp, pmd;
> +	pte_t *ptep, pte;
>   
> -	p4d = p4d_offset(pgd, addr);
> -	if (!p4d_present(*p4d))
> +	pgdp = pgd_offset(mm, addr);
> +	pgd = READ_ONCE(*pgdp);
> +	if (pgd_none(pgd))
>   		return 0;
>   
> -	if (p4d_leaf(*p4d))
> -		return 1ULL << P4D_SHIFT;
> +	if (pgd_leaf(pgd))
> +		return pgd_leaf_size(pgd);
>   
> -	pud = pud_offset(p4d, addr);
> -	if (!pud_present(*pud))
> +	p4dp = p4d_offset_lockless(pgdp, pgd, addr);
> +	p4d = READ_ONCE(*p4dp);
> +	if (!p4d_present(p4d))
>   		return 0;
>   
> -	if (pud_leaf(*pud)) {
> -#ifdef pud_page
> -		page = pud_page(*pud);
> -		if (PageHuge(page))
> -			return page_size(compound_head(page));
> -#endif
> -		return 1ULL << PUD_SHIFT;
> -	}
> +	if (p4d_leaf(p4d))
> +		return p4d_leaf_size(p4d);
>   
> -	pmd = pmd_offset(pud, addr);
> -	if (!pmd_present(*pmd))
> +	pudp = pud_offset_lockless(p4dp, p4d, addr);
> +	pud = READ_ONCE(*pudp);
> +	if (!pud_present(pud))
>   		return 0;
>   
> -	if (pmd_leaf(*pmd)) {
> -#ifdef pmd_page
> -		page = pmd_page(*pmd);
> -		if (PageHuge(page))
> -			return page_size(compound_head(page));
> -#endif
> -		return 1ULL << PMD_SHIFT;
> -	}
> +	if (pud_leaf(pud))
> +		return pud_leaf_size(pud);
>   
> -	pte = pte_offset_map(pmd, addr);
> -	if (!pte_present(*pte)) {
> -		pte_unmap(pte);
> +	pmdp = pmd_offset_lockless(pudp, pud, addr);
> +	pmd = READ_ONCE(*pmdp);
> +	if (!pmd_present(pmd))
>   		return 0;
> -	}
>   
> -	page = pte_page(*pte);
> -	if (PageHuge(page)) {
> -		u64 size = page_size(compound_head(page));
> -		pte_unmap(pte);
> -		return size;
> -	}
> -
> -	pte_unmap(pte);
> -	return PAGE_SIZE;
> -}
> +	if (pmd_leaf(pmd))
> +		return pmd_leaf_size(pmd);
>   
> -#else
> +	ptep = pte_offset_map(&pmd, addr);
> +	pte = ptep_get_lockless(ptep);
> +	if (pte_present(pte))
> +		size = pte_leaf_size(pte);
> +	pte_unmap(ptep);
> +#endif /* CONFIG_HAVE_FAST_GUP */
>   
> -static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
> -{
> -	return 0;
> +	return size;
>   }
>   
> -#endif
> -
>   static u64 perf_get_page_size(unsigned long addr)
>   {
>   	struct mm_struct *mm;
> @@ -7109,7 +7081,7 @@ static u64 perf_get_page_size(unsigned l
>   		mm = &init_mm;
>   	}
>   
> -	size = arch_perf_get_page_size(mm, addr);
> +	size = perf_get_tlb_page_size(mm, addr);
>   
>   	local_irq_restore(flags);
>   
> 
> 

  reply	other threads:[~2020-11-16 19:51 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-13 11:19 [PATCH 0/5] perf/mm: Fix PERF_SAMPLE_*_PAGE_SIZE Peter Zijlstra
2020-11-13 11:19 ` [PATCH 1/5] mm/gup: Provide gup_get_pte() more generic Peter Zijlstra
2020-11-13 11:19 ` [PATCH 2/5] mm: Introduce pXX_leaf_size() Peter Zijlstra
2020-11-13 11:45   ` Peter Zijlstra
2020-11-13 11:19 ` [PATCH 3/5] perf/core: Fix arch_perf_get_page_size() Peter Zijlstra
2020-11-16 13:11   ` Liang, Kan [this message]
2020-11-13 11:19 ` [PATCH 4/5] arm64/mm: Implement pXX_leaf_size() support Peter Zijlstra
2020-11-13 11:19 ` [PATCH 5/5] sparc64/mm: " Peter Zijlstra
2020-11-13 13:44 ` [PATCH 0/5] perf/mm: Fix PERF_SAMPLE_*_PAGE_SIZE Christophe Leroy
2020-11-20 11:18   ` Christophe Leroy
2020-11-20 12:20     ` Peter Zijlstra
2020-11-26 10:46       ` Peter Zijlstra
2020-11-16 15:43 ` Kirill A. Shutemov
2020-11-16 15:54   ` Matthew Wilcox
2020-11-16 16:28     ` Dave Hansen
2020-11-16 16:32       ` Matthew Wilcox
2020-11-16 16:36         ` Dave Hansen
2020-11-16 16:57           ` Peter Zijlstra
2020-11-16 16:55       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bbef8555-53d0-d4ce-c20d-ee64dfb9d90b@linux.intel.com \
    --to=kan.liang@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@intel.com \
    --cc=davem@davemloft.net \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@kernel.org \
    --cc=npiggin@gmail.com \
    --cc=peterz@infradead.org \
    --cc=sparclinux@vger.kernel.org \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).