Linux Documentation
 help / color / mirror / Atom feed
From: Anshuman Khandual <anshuman.khandual@arm.com>
To: "David Hildenbrand (Arm)" <david@kernel.org>,
	Hugh Dickins <hughd@google.com>
Cc: linux-mm@kvack.org,
	Andy Shevchenko <andriy.shevchenko@linux.intel.com>,
	Rasmus Villemoes <linux@rasmusvillemoes.dk>,
	Sergey Senozhatsky <senozhatsky@chromium.org>,
	Petr Mladek <pmladek@suse.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Jonathan Corbet <corbet@lwn.net>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	Lorenzo Stoakes <ljs@kernel.org>
Subject: Re: [RFC V2 3/3] mm: Replace pgtable entry prints with new format
Date: Thu, 2 Jul 2026 09:59:49 +0530	[thread overview]
Message-ID: <5a8e82f3-ed21-48a8-af3c-36a08fd2b0ec@arm.com> (raw)
In-Reply-To: <90b5cd31-87ed-4ef7-86cc-458b9e06b02d@kernel.org>



On 30/06/26 7:06 PM, David Hildenbrand (Arm) wrote:
> On 6/16/26 08:19, Hugh Dickins wrote:
>> On Mon, 15 Jun 2026, David Hildenbrand (Arm) wrote:
>>> On 6/12/26 23:26, Hugh Dickins wrote:
>>>> ...
>>>>
>>>> The page table entry is BUGgily Bad: we want to see what it looks like
>>>> (sometimes, a sequence of bad page map entries may even show up as ASCII).
>>>
>>> But is printing raw page table entries really what we want? I guess to detect
>>> "random corruption" it might help sometimes.
>>
>> Yes, that's what it's for. What we really want is to understand what went
>> wrong: that's too much to ask of a printk, but it can give us a good clue.
>>
>>>
>>> And do we really need information about the full page table walk, or is the last
>>> level good enough?
>>
>> Page table entry and pmd entry are good enough: higher levels got
>> added at some stage, but they are unlikely to be useful here.
> 
> Yes, I added them when we're processing PUD entries we'd also want
> P4D entry + PUD entry.
> 
> This is one approach of having the printing be done mostly
> manually, supporting 32, 64 and 128bit pte_val(). As raised by Ryan,
> using local bufs to store the data to not involve printk.
> 
> 
> I played with printing the byte stream manually, but didn't really like it.
> 
> Gave it a quick test and it seems to do its trick. I have the feeling that
> this can be beautified a bit more.
> 
> 
> From 05af7317b126991a61b0a3d01c2863ce5a578d1b Mon Sep 17 00:00:00 2001
> From: "David Hildenbrand (Arm)" <david@kernel.org>
> Date: Tue, 30 Jun 2026 15:23:02 +0200
> Subject: [PATCH] tmp
> 
> Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
> ---
>  mm/memory.c | 110 +++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 87 insertions(+), 23 deletions(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index ff338c2abe923..ad39cafe110f9 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -519,9 +519,57 @@ static bool is_bad_page_map_ratelimited(void)
>  	return false;
>  }
>  
> +#define PTVAL_STR_MAX	(sizeof(u64) * 4 + 1)
> +
> +static void ptval_bytes_to_str(char *buf, size_t buf_size,
> +		const void *entry, size_t entry_size)
> +{
> +	if (WARN_ON_ONCE(buf_size < entry_size * 2 + 1)) {
> +		snprintf(buf, buf_size, "overflow");
> +		return;
> +	}
> +
> +	switch (entry_size) {
> +	case sizeof(u32):
> +		snprintf(buf, buf_size, "%08x", *(const u32 *)entry);
> +		break;
> +	case sizeof(u64):
> +		snprintf(buf, buf_size, "%016llx",
> +			 (unsigned long long)*(const u64 *)entry);
> +		break;
> +	case sizeof(u64) * 2: {

Could this be made sizeof(u128) instead ? But overall this
approach looks good.
> +		const u64 *val = entry;
> +
> +		if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
> +			snprintf(buf, buf_size, "%016llx%016llx",
> +				 (unsigned long long)val[0],
> +				 (unsigned long long)val[1]);
> +		else
> +			snprintf(buf, buf_size, "%016llx%016llx",
> +				 (unsigned long long)val[1],
> +				 (unsigned long long)val[0]);
> +		break;
> +	}
> +	default:
> +		snprintf(buf, buf_size, "unsupported");
> +		break;
> +	}
> +}
> +
> +#define ptval_to_str(buf, val)						\
> +	do {								\
> +		typeof(val) __val = (val);				\
> +									\
> +		ptval_bytes_to_str((buf), sizeof(buf), &__val,		\
> +				   sizeof(__val));			\
> +	} while (0)
> +
>  static void __print_bad_page_map_pgtable(struct mm_struct *mm, unsigned long addr)
>  {
> -	unsigned long long pgdv, p4dv, pudv, pmdv;
> +	char pgd_str[PTVAL_STR_MAX];
> +	char p4d_str[PTVAL_STR_MAX];
> +	char pud_str[PTVAL_STR_MAX];
> +	char pmd_str[PTVAL_STR_MAX];
>  	p4d_t p4d, *p4dp;
>  	pud_t pud, *pudp;
>  	pmd_t pmd, *pmdp;
> @@ -532,34 +580,34 @@ static void __print_bad_page_map_pgtable(struct mm_struct *mm, unsigned long add
>  	 * see locking requirements for print_bad_page_map().
>  	 */
>  	pgdp = pgd_offset(mm, addr);
> -	pgdv = pgd_val(*pgdp);
> +	ptval_to_str(pgd_str, pgd_val(*pgdp));
>  
>  	if (!pgd_present(*pgdp) || pgd_leaf(*pgdp)) {
> -		pr_alert("pgd:%08llx\n", pgdv);
> +		pr_alert("pgd: %s\n", pgd_str);
>  		return;
>  	}
>  
>  	p4dp = p4d_offset(pgdp, addr);
>  	p4d = p4dp_get(p4dp);
> -	p4dv = p4d_val(p4d);
> +	ptval_to_str(p4d_str, p4d_val(p4d));
>  
>  	if (!p4d_present(p4d) || p4d_leaf(p4d)) {
> -		pr_alert("pgd:%08llx p4d:%08llx\n", pgdv, p4dv);
> +		pr_alert("pgd: %s p4d: %s\n", pgd_str, p4d_str);
>  		return;
>  	}
>  
>  	pudp = pud_offset(p4dp, addr);
>  	pud = pudp_get(pudp);
> -	pudv = pud_val(pud);
> +	ptval_to_str(pud_str, pud_val(pud));
>  
>  	if (!pud_present(pud) || pud_leaf(pud)) {
> -		pr_alert("pgd:%08llx p4d:%08llx pud:%08llx\n", pgdv, p4dv, pudv);
> +		pr_alert("pgd: %s p4d: %s pud: %s\n", pgd_str, p4d_str, pud_str);
>  		return;
>  	}
>  
>  	pmdp = pmd_offset(pudp, addr);
>  	pmd = pmdp_get(pmdp);
> -	pmdv = pmd_val(pmd);
> +	ptval_to_str(pmd_str, pmd_val(pmd));
>  
>  	/*
>  	 * Dumping the PTE would be nice, but it's tricky with CONFIG_HIGHPTE,
> @@ -567,8 +615,8 @@ static void __print_bad_page_map_pgtable(struct mm_struct *mm, unsigned long add
>  	 * doing another map would be bad. print_bad_page_map() should
>  	 * already take care of printing the PTE.
>  	 */
> -	pr_alert("pgd:%08llx p4d:%08llx pud:%08llx pmd:%08llx\n", pgdv,
> -		 p4dv, pudv, pmdv);
> +	pr_alert("pgd: %s p4d: %s pud: %s pmd: %s\n", pgd_str, p4d_str, pud_str,
> +		 pmd_str);
>  }
>  
>  /*
> @@ -584,10 +632,11 @@ static void __print_bad_page_map_pgtable(struct mm_struct *mm, unsigned long add
>   * page table lock.
>   */
>  static void print_bad_page_map(struct vm_area_struct *vma,
> -		unsigned long addr, unsigned long long entry, struct page *page,
> -		enum pgtable_level level)
> +		unsigned long addr, const void *entry, size_t entry_size,
> +		struct page *page, enum pgtable_level level)
>  {
>  	struct address_space *mapping;
> +	char entry_str[PTVAL_STR_MAX];
>  	pgoff_t index;
>  
>  	if (is_bad_page_map_ratelimited())
> @@ -596,8 +645,9 @@ static void print_bad_page_map(struct vm_area_struct *vma,
>  	mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
>  	index = linear_page_index(vma, addr);
>  
> -	pr_alert("BUG: Bad page map in process %s  %s:%08llx", current->comm,
> -		 pgtable_level_to_str(level), entry);
> +	ptval_bytes_to_str(entry_str, sizeof(entry_str), entry, entry_size);
> +	pr_alert("BUG: Bad page map in process %s  %s: %s", current->comm,
> +		 pgtable_level_to_str(level), entry_str);
>  	__print_bad_page_map_pgtable(vma->vm_mm, addr);
>  	if (page)
>  		dump_page(page, "bad page map");
> @@ -627,8 +677,14 @@ static inline bool pgtable_level_has_pxx_special(enum pgtable_level level)
>  	}
>  }
>  
> -#define print_bad_pte(vma, addr, pte, page) \
> -	print_bad_page_map(vma, addr, pte_val(pte), page, PGTABLE_LEVEL_PTE)
> +static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
> +			  pte_t pte, struct page *page)
> +{
> +	typeof(pte_val(pte)) entry = pte_val(pte);
> +
> +	print_bad_page_map(vma, addr, &entry, sizeof(entry), page,
> +			   PGTABLE_LEVEL_PTE);
> +}
>  
>  /**
>   * __vm_normal_page() - Get the "struct page" associated with a page table entry.
> @@ -636,8 +692,9 @@ static inline bool pgtable_level_has_pxx_special(enum pgtable_level level)
>   * @addr: The address where the page table entry is mapped.
>   * @pfn: The PFN stored in the page table entry.
>   * @special: Whether the page table entry is marked "special".
> - * @level: The page table level for error reporting purposes only.
>   * @entry: The page table entry value for error reporting purposes only.
> + * @entry_size: The size of @entry.
> + * @level: The page table level for error reporting purposes only.
>   *
>   * "Special" mappings do not wish to be associated with a "struct page" (either
>   * it doesn't exist, or it exists but they don't want to touch it). In this
> @@ -697,7 +754,7 @@ static inline bool pgtable_level_has_pxx_special(enum pgtable_level level)
>   */
>  static inline struct page *__vm_normal_page(struct vm_area_struct *vma,
>  		unsigned long addr, unsigned long pfn, bool special,
> -		unsigned long long entry, enum pgtable_level level)
> +		const void *entry, size_t entry_size, enum pgtable_level level)
>  {
>  	if (pgtable_level_has_pxx_special(level)) {
>  		if (unlikely(special)) {
> @@ -710,7 +767,8 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma,
>  			if (is_zero_pfn(pfn) || is_huge_zero_pfn(pfn))
>  				return NULL;
>  
> -			print_bad_page_map(vma, addr, entry, NULL, level);
> +			print_bad_page_map(vma, addr, entry, entry_size, NULL,
> +					   level);
>  			return NULL;
>  		}
>  		/*
> @@ -741,7 +799,7 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma,
>  
>  	if (unlikely(pfn > highest_memmap_pfn)) {
>  		/* Corrupted page table entry. */
> -		print_bad_page_map(vma, addr, entry, NULL, level);
> +		print_bad_page_map(vma, addr, entry, entry_size, NULL, level);
>  		return NULL;
>  	}
>  	/*
> @@ -767,8 +825,10 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma,
>  struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
>  			    pte_t pte)
>  {
> +	typeof(pte_val(pte)) entry = pte_val(pte);
> +
>  	return __vm_normal_page(vma, addr, pte_pfn(pte), pte_special(pte),
> -				pte_val(pte), PGTABLE_LEVEL_PTE);
> +				&entry, sizeof(entry), PGTABLE_LEVEL_PTE);
>  }
>  
>  /**
> @@ -809,8 +869,10 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
>  struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
>  				pmd_t pmd)
>  {
> +	typeof(pmd_val(pmd)) entry = pmd_val(pmd);
> +
>  	return __vm_normal_page(vma, addr, pmd_pfn(pmd), pmd_special(pmd),
> -				pmd_val(pmd), PGTABLE_LEVEL_PMD);
> +				&entry, sizeof(entry), PGTABLE_LEVEL_PMD);
>  }
>  
>  /**
> @@ -850,8 +912,10 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma,
>  struct page *vm_normal_page_pud(struct vm_area_struct *vma,
>  		unsigned long addr, pud_t pud)
>  {
> +	typeof(pud_val(pud)) entry = pud_val(pud);
> +
>  	return __vm_normal_page(vma, addr, pud_pfn(pud), pud_special(pud),
> -				pud_val(pud), PGTABLE_LEVEL_PUD);
> +				&entry, sizeof(entry), PGTABLE_LEVEL_PUD);
>  }
>  #endif
>  


  parent reply	other threads:[~2026-07-02  4:29 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-10  4:35 [RFC V2 0/3] lib/vsprintf: Add support for pgtable entries Anshuman Khandual
2026-06-10  4:35 ` [RFC V2 1/3] " Anshuman Khandual
2026-06-10 11:13   ` Usama Arif
2026-06-11  5:15     ` Anshuman Khandual
2026-06-11  7:17       ` Andy Shevchenko
2026-06-11  9:50         ` Anshuman Khandual
2026-06-11 18:59           ` Andy Shevchenko
2026-06-10  4:35 ` [RFC V2 2/3] kunit: printf: Add test " Anshuman Khandual
2026-06-10  4:35 ` [RFC V2 3/3] mm: Replace pgtable entry prints with new format Anshuman Khandual
2026-06-11 11:32   ` David Hildenbrand (Arm)
2026-06-12 11:08   ` David Hildenbrand (Arm)
2026-06-12 21:26     ` Hugh Dickins
2026-06-15 16:01       ` David Hildenbrand (Arm)
2026-06-16  6:19         ` Hugh Dickins
2026-06-30 13:36           ` David Hildenbrand (Arm)
2026-07-01  9:00             ` Andy Shevchenko
2026-07-02  4:29             ` Anshuman Khandual [this message]
2026-06-11 19:15 ` [RFC V2 0/3] lib/vsprintf: Add support for pgtable entries Matthew Wilcox
2026-06-11 19:33   ` Andy Shevchenko
2026-06-11 19:37     ` Matthew Wilcox
2026-06-12 11:14       ` David Hildenbrand (Arm)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5a8e82f3-ed21-48a8-af3c-36a08fd2b0ec@arm.com \
    --to=anshuman.khandual@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andriy.shevchenko@linux.intel.com \
    --cc=corbet@lwn.net \
    --cc=david@kernel.org \
    --cc=hughd@google.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=ljs@kernel.org \
    --cc=pmladek@suse.com \
    --cc=rostedt@goodmis.org \
    --cc=senozhatsky@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox