* [PATCH] arm64: mm: show direct mapping use in /proc/meminfo
@ 2025-10-13 23:51 Yang Shi
2025-10-14 8:43 ` Ryan Roberts
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Yang Shi @ 2025-10-13 23:51 UTC (permalink / raw)
To: ryan.roberts, cl, catalin.marinas, will
Cc: yang, linux-arm-kernel, linux-kernel
Since commit a166563e7ec3 ("arm64: mm: support large block mapping when
rodata=full"), the direct mapping may be split on some machines instead
keeping static since boot. It makes more sense to show the direct mapping
use in /proc/meminfo than before.
This patch will make /proc/meminfo show the direct mapping use like the
below (4K base page size):
DirectMap4K: 94792 kB
DirectMap64K: 134208 kB
DirectMap2M: 1173504 kB
DirectMap32M: 5636096 kB
DirectMap1G: 529530880 kB
Although just the machines which support BBML2_NOABORT can split the
direct mapping, show it on all machines regardless of BBML2_NOABORT so
that the users have consistent view in order to avoid confusion.
Signed-off-by: Yang Shi <yang@os.amperecomputing.com>
---
arch/arm64/mm/mmu.c | 93 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 91 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b8d37eb037fc..e5da0f521e58 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -29,6 +29,7 @@
#include <linux/mm_inline.h>
#include <linux/pagewalk.h>
#include <linux/stop_machine.h>
+#include <linux/proc_fs.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -51,6 +52,17 @@
DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key);
+enum direct_map_type {
+ PTE,
+ CONT_PTE,
+ PMD,
+ CONT_PMD,
+ PUD,
+ NR_DIRECT_MAP_TYPE,
+};
+
+unsigned long direct_map_cnt[NR_DIRECT_MAP_TYPE];
+
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
@@ -171,6 +183,60 @@ static void init_clear_pgtable(void *table)
dsb(ishst);
}
+void arch_report_meminfo(struct seq_file *m)
+{
+ char *size[NR_DIRECT_MAP_TYPE];
+ unsigned int shift[NR_DIRECT_MAP_TYPE];
+
+#if defined(CONFIG_ARM64_4K_PAGES)
+ size[PTE] = "4K";
+ size[CONT_PTE] = "64K";
+ size[PMD] = "2M";
+ size[CONT_PMD] = "32M";
+ size[PUD] = "1G";
+
+ shift[PTE] = 2;
+ shift[CONT_PTE] = 6;
+ shift[PMD] = 11;
+ shift[CONT_PMD] = 15;
+ shift[PUD] = 20;
+#elif defined(CONFIG_ARM64_16K_PAGES)
+ size[PTE] = "16K";
+ size[CONT_PTE] = "2M";
+ size[PMD] = "32M";
+ size[CONT_PMD] = "1G";
+
+ shift[PTE] = 4;
+ shift[CONT_PTE] = 11;
+ shift[PMD] = 15;
+ shift[CONT_PMD] = 20;
+#elif defined(CONFIG_ARM64_64K_PAGES)
+ size[PTE] = "64K";
+ size[CONT_PTE] = "2M";
+ size[PMD] = "512M";
+ size[CONT_PMD] = "16G";
+
+ shift[PTE] = 6;
+ shift[CONT_PTE] = 11;
+ shift[PMD] = 19;
+ shift[CONT_PMD] = 24;
+#endif
+
+ seq_printf(m, "DirectMap%s: %8lu kB\n",
+ size[PTE], direct_map_cnt[PTE] << shift[PTE]);
+ seq_printf(m, "DirectMap%s: %8lu kB\n",
+ size[CONT_PTE],
+ direct_map_cnt[CONT_PTE] << shift[CONT_PTE]);
+ seq_printf(m, "DirectMap%s: %8lu kB\n",
+ size[PMD], direct_map_cnt[PMD] << shift[PMD]);
+ seq_printf(m, "DirectMap%s: %8lu kB\n",
+ size[CONT_PMD],
+ direct_map_cnt[CONT_PMD] << shift[CONT_PMD]);
+ if (pud_sect_supported())
+ seq_printf(m, "DirectMap%s: %8lu kB\n",
+ size[PUD], direct_map_cnt[PUD] << shift[PUD]);
+}
+
static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot)
{
@@ -183,6 +249,9 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
*/
__set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot));
+ if (!(pgprot_val(prot) & PTE_CONT))
+ direct_map_cnt[PTE]++;
+
/*
* After the PTE entry has been populated once, we
* only allow updates to the permission attributes.
@@ -229,8 +298,10 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
/* use a contiguous mapping if the range is suitably aligned */
if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
- (flags & NO_CONT_MAPPINGS) == 0)
+ (flags & NO_CONT_MAPPINGS) == 0) {
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ direct_map_cnt[CONT_PTE]++;
+ }
init_pte(ptep, addr, next, phys, __prot);
@@ -262,6 +333,9 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
(flags & NO_BLOCK_MAPPINGS) == 0) {
pmd_set_huge(pmdp, phys, prot);
+ if (!(pgprot_val(prot) & PTE_CONT))
+ direct_map_cnt[PMD]++;
+
/*
* After the PMD entry has been populated once, we
* only allow updates to the permission attributes.
@@ -317,8 +391,10 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
/* use a contiguous mapping if the range is suitably aligned */
if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
- (flags & NO_CONT_MAPPINGS) == 0)
+ (flags & NO_CONT_MAPPINGS) == 0) {
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ direct_map_cnt[CONT_PMD]++;
+ }
init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags);
@@ -368,6 +444,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
(flags & NO_BLOCK_MAPPINGS) == 0) {
pud_set_huge(pudp, phys, prot);
+ direct_map_cnt[PUD]++;
/*
* After the PUD entry has been populated once, we
* only allow updates to the permission attributes.
@@ -532,9 +609,13 @@ static void split_contpte(pte_t *ptep)
{
int i;
+ direct_map_cnt[CONT_PTE]--;
+
ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
for (i = 0; i < CONT_PTES; i++, ptep++)
__set_pte(ptep, pte_mknoncont(__ptep_get(ptep)));
+
+ direct_map_cnt[PTE] += CONT_PTES;
}
static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
@@ -559,8 +640,10 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
if (to_cont)
prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ direct_map_cnt[PMD]--;
for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++)
__set_pte(ptep, pfn_pte(pfn, prot));
+ direct_map_cnt[CONT_PTE] += PTRS_PER_PTE / CONT_PTES;
/*
* Ensure the pte entries are visible to the table walker by the time
@@ -576,9 +659,13 @@ static void split_contpmd(pmd_t *pmdp)
{
int i;
+ direct_map_cnt[CONT_PMD]--;
+
pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS);
for (i = 0; i < CONT_PMDS; i++, pmdp++)
set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp)));
+
+ direct_map_cnt[PMD] += CONT_PMDS;
}
static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
@@ -604,8 +691,10 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
if (to_cont)
prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ direct_map_cnt[PUD]--;
for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step)
set_pmd(pmdp, pfn_pmd(pfn, prot));
+ direct_map_cnt[CONT_PMD] += PTRS_PER_PMD/CONT_PMDS;
/*
* Ensure the pmd entries are visible to the table walker by the time
--
2.47.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH] arm64: mm: show direct mapping use in /proc/meminfo 2025-10-13 23:51 [PATCH] arm64: mm: show direct mapping use in /proc/meminfo Yang Shi @ 2025-10-14 8:43 ` Ryan Roberts 2025-10-14 20:49 ` Yang Shi 2025-10-15 12:59 ` kernel test robot 2025-10-16 7:26 ` kernel test robot 2 siblings, 1 reply; 7+ messages in thread From: Ryan Roberts @ 2025-10-14 8:43 UTC (permalink / raw) To: Yang Shi, cl, catalin.marinas, will; +Cc: linux-arm-kernel, linux-kernel On 14/10/2025 00:51, Yang Shi wrote: > Since commit a166563e7ec3 ("arm64: mm: support large block mapping when > rodata=full"), the direct mapping may be split on some machines instead > keeping static since boot. It makes more sense to show the direct mapping > use in /proc/meminfo than before. It's possible to divine this information from /sys/kernel/debug/kernel_page_tables. Dev even has a script to post-process that to provide summary stats on how much memory is mapped by each block size. Admittedly, this file is in debugfs and is not usually enabled for production builds, but Dev's patch Commit fa93b45fd397 ("arm64: Enable vmalloc-huge with ptdump"), merged for v6.18-rc1 gives us a path to enabling by default I think? But I can see the benefits of having this easily available, and I notice that other arches are already doing this. So I guess it makes sense to be consistent. > This patch will make /proc/meminfo show the direct mapping use like the > below (4K base page size): > DirectMap4K: 94792 kB > DirectMap64K: 134208 kB > DirectMap2M: 1173504 kB > DirectMap32M: 5636096 kB > DirectMap1G: 529530880 kB nit: x86, s390 and powerpc use a lower case "k" for DirectMap4k; perhaps we should follow suit? (despite being ugly). Although I think that's usually used to denote base-10 (i.e. 1000 bytes)? That's not the case here, we want base-2. > > Although just the machines which support BBML2_NOABORT can split the > direct mapping, show it on all machines regardless of BBML2_NOABORT so > that the users have consistent view in order to avoid confusion. > > Signed-off-by: Yang Shi <yang@os.amperecomputing.com> > --- > arch/arm64/mm/mmu.c | 93 ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 91 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index b8d37eb037fc..e5da0f521e58 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -29,6 +29,7 @@ > #include <linux/mm_inline.h> > #include <linux/pagewalk.h> > #include <linux/stop_machine.h> > +#include <linux/proc_fs.h> > > #include <asm/barrier.h> > #include <asm/cputype.h> > @@ -51,6 +52,17 @@ > > DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key); > > +enum direct_map_type { > + PTE, > + CONT_PTE, > + PMD, > + CONT_PMD, > + PUD, > + NR_DIRECT_MAP_TYPE, > +}; > + > +unsigned long direct_map_cnt[NR_DIRECT_MAP_TYPE]; I wonder if it would be cleaner to store this in bytes rather than blocks? Then in the code you can just add PAGE_SIZE, CONT_PTE_SIZE, PMD_SIZE, etc as appropriate, then the reporting function becomes simpler; everything is just shifted by 10 to get kB; no need for the shift array. > + > u64 kimage_voffset __ro_after_init; > EXPORT_SYMBOL(kimage_voffset); > > @@ -171,6 +183,60 @@ static void init_clear_pgtable(void *table) > dsb(ishst); > } > > +void arch_report_meminfo(struct seq_file *m) > +{ > + char *size[NR_DIRECT_MAP_TYPE]; > + unsigned int shift[NR_DIRECT_MAP_TYPE]; > + > +#if defined(CONFIG_ARM64_4K_PAGES) > + size[PTE] = "4K"; > + size[CONT_PTE] = "64K"; > + size[PMD] = "2M"; > + size[CONT_PMD] = "32M"; > + size[PUD] = "1G"; > + > + shift[PTE] = 2; > + shift[CONT_PTE] = 6; > + shift[PMD] = 11; > + shift[CONT_PMD] = 15; > + shift[PUD] = 20; > +#elif defined(CONFIG_ARM64_16K_PAGES) > + size[PTE] = "16K"; > + size[CONT_PTE] = "2M"; > + size[PMD] = "32M"; > + size[CONT_PMD] = "1G"; > + > + shift[PTE] = 4; > + shift[CONT_PTE] = 11; > + shift[PMD] = 15; > + shift[CONT_PMD] = 20; > +#elif defined(CONFIG_ARM64_64K_PAGES) > + size[PTE] = "64K"; > + size[CONT_PTE] = "2M"; > + size[PMD] = "512M"; > + size[CONT_PMD] = "16G"; > + > + shift[PTE] = 6; > + shift[CONT_PTE] = 11; > + shift[PMD] = 19; > + shift[CONT_PMD] = 24; > +#endif The ifdeffery is quite ugly. I think we can get rid of the shift array as per above. I was hoping there might be a kernel function that we could pass PAGE_SIZE, PMD_SIZE, etc to and it would give us an appropriate string but I can't find anything. I guess keping the ifdef for size[] is the most pragmatic. > + > + seq_printf(m, "DirectMap%s: %8lu kB\n", > + size[PTE], direct_map_cnt[PTE] << shift[PTE]); > + seq_printf(m, "DirectMap%s: %8lu kB\n", > + size[CONT_PTE], > + direct_map_cnt[CONT_PTE] << shift[CONT_PTE]); > + seq_printf(m, "DirectMap%s: %8lu kB\n", > + size[PMD], direct_map_cnt[PMD] << shift[PMD]); > + seq_printf(m, "DirectMap%s: %8lu kB\n", > + size[CONT_PMD], > + direct_map_cnt[CONT_PMD] << shift[CONT_PMD]); > + if (pud_sect_supported()) > + seq_printf(m, "DirectMap%s: %8lu kB\n", > + size[PUD], direct_map_cnt[PUD] << shift[PUD]); > +} > + > static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, > phys_addr_t phys, pgprot_t prot) > { > @@ -183,6 +249,9 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, > */ > __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot)); > > + if (!(pgprot_val(prot) & PTE_CONT)) > + direct_map_cnt[PTE]++; If adding size in bytes, you could just always add PAGE_SIZE here, but select the bucket based on PTE_CONT? > + > /* > * After the PTE entry has been populated once, we > * only allow updates to the permission attributes. > @@ -229,8 +298,10 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, > > /* use a contiguous mapping if the range is suitably aligned */ > if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && > - (flags & NO_CONT_MAPPINGS) == 0) > + (flags & NO_CONT_MAPPINGS) == 0) { > __prot = __pgprot(pgprot_val(prot) | PTE_CONT); > + direct_map_cnt[CONT_PTE]++; Then you don't need this. > + } > > init_pte(ptep, addr, next, phys, __prot); > > @@ -262,6 +333,9 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, > (flags & NO_BLOCK_MAPPINGS) == 0) { > pmd_set_huge(pmdp, phys, prot); > > + if (!(pgprot_val(prot) & PTE_CONT)) > + direct_map_cnt[PMD]++; Same here... > + > /* > * After the PMD entry has been populated once, we > * only allow updates to the permission attributes. > @@ -317,8 +391,10 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, > > /* use a contiguous mapping if the range is suitably aligned */ > if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && > - (flags & NO_CONT_MAPPINGS) == 0) > + (flags & NO_CONT_MAPPINGS) == 0) { > __prot = __pgprot(pgprot_val(prot) | PTE_CONT); > + direct_map_cnt[CONT_PMD]++; Then this can go too. > + } > > init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); > > @@ -368,6 +444,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, > (flags & NO_BLOCK_MAPPINGS) == 0) { > pud_set_huge(pudp, phys, prot); > > + direct_map_cnt[PUD]++; > /* > * After the PUD entry has been populated once, we > * only allow updates to the permission attributes. > @@ -532,9 +609,13 @@ static void split_contpte(pte_t *ptep) > { > int i; > > + direct_map_cnt[CONT_PTE]--; > + > ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES); > for (i = 0; i < CONT_PTES; i++, ptep++) > __set_pte(ptep, pte_mknoncont(__ptep_get(ptep))); > + > + direct_map_cnt[PTE] += CONT_PTES; > } > > static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) > @@ -559,8 +640,10 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) > if (to_cont) > prot = __pgprot(pgprot_val(prot) | PTE_CONT); > > + direct_map_cnt[PMD]--; > for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++) > __set_pte(ptep, pfn_pte(pfn, prot)); > + direct_map_cnt[CONT_PTE] += PTRS_PER_PTE / CONT_PTES; > > /* > * Ensure the pte entries are visible to the table walker by the time > @@ -576,9 +659,13 @@ static void split_contpmd(pmd_t *pmdp) > { > int i; > > + direct_map_cnt[CONT_PMD]--; > + > pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS); > for (i = 0; i < CONT_PMDS; i++, pmdp++) > set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp))); > + > + direct_map_cnt[PMD] += CONT_PMDS; > } > > static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) > @@ -604,8 +691,10 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) > if (to_cont) > prot = __pgprot(pgprot_val(prot) | PTE_CONT); > > + direct_map_cnt[PUD]--; > for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step) > set_pmd(pmdp, pfn_pmd(pfn, prot)); > + direct_map_cnt[CONT_PMD] += PTRS_PER_PMD/CONT_PMDS; > > /* > * Ensure the pmd entries are visible to the table walker by the time ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm64: mm: show direct mapping use in /proc/meminfo 2025-10-14 8:43 ` Ryan Roberts @ 2025-10-14 20:49 ` Yang Shi 2025-10-15 15:37 ` Ryan Roberts 0 siblings, 1 reply; 7+ messages in thread From: Yang Shi @ 2025-10-14 20:49 UTC (permalink / raw) To: Ryan Roberts, cl, catalin.marinas, will; +Cc: linux-arm-kernel, linux-kernel On 10/14/25 1:43 AM, Ryan Roberts wrote: > On 14/10/2025 00:51, Yang Shi wrote: >> Since commit a166563e7ec3 ("arm64: mm: support large block mapping when >> rodata=full"), the direct mapping may be split on some machines instead >> keeping static since boot. It makes more sense to show the direct mapping >> use in /proc/meminfo than before. > It's possible to divine this information from > /sys/kernel/debug/kernel_page_tables. Dev even has a script to post-process that > to provide summary stats on how much memory is mapped by each block size. > > Admittedly, this file is in debugfs and is not usually enabled for production > builds, but Dev's patch Commit fa93b45fd397 ("arm64: Enable vmalloc-huge with > ptdump"), merged for v6.18-rc1 gives us a path to enabling by default I think? First of all, it is in debugfs, not all distros actually enable debugfs. IIRC, Android has debugfs disabled. Secondly, dumping kernel page table is quite time consuming and costly. If the users just want know the direct mapping use, it sounds too overkilling. > > But I can see the benefits of having this easily available, and I notice that > other arches are already doing this. So I guess it makes sense to be consistent. Yeah, some other architectures show this too. > >> This patch will make /proc/meminfo show the direct mapping use like the >> below (4K base page size): >> DirectMap4K: 94792 kB >> DirectMap64K: 134208 kB >> DirectMap2M: 1173504 kB >> DirectMap32M: 5636096 kB >> DirectMap1G: 529530880 kB > nit: x86, s390 and powerpc use a lower case "k" for DirectMap4k; perhaps we > should follow suit? (despite being ugly). Although I think that's usually used > to denote base-10 (i.e. 1000 bytes)? That's not the case here, we want base-2. We should follow the lower case "k" so that the script used on them can work on arm64 too. Do you mean kB should be base-2? > >> Although just the machines which support BBML2_NOABORT can split the >> direct mapping, show it on all machines regardless of BBML2_NOABORT so >> that the users have consistent view in order to avoid confusion. >> >> Signed-off-by: Yang Shi <yang@os.amperecomputing.com> >> --- >> arch/arm64/mm/mmu.c | 93 ++++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 91 insertions(+), 2 deletions(-) >> >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index b8d37eb037fc..e5da0f521e58 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -29,6 +29,7 @@ >> #include <linux/mm_inline.h> >> #include <linux/pagewalk.h> >> #include <linux/stop_machine.h> >> +#include <linux/proc_fs.h> >> >> #include <asm/barrier.h> >> #include <asm/cputype.h> >> @@ -51,6 +52,17 @@ >> >> DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key); >> >> +enum direct_map_type { >> + PTE, >> + CONT_PTE, >> + PMD, >> + CONT_PMD, >> + PUD, >> + NR_DIRECT_MAP_TYPE, >> +}; >> + >> +unsigned long direct_map_cnt[NR_DIRECT_MAP_TYPE]; > I wonder if it would be cleaner to store this in bytes rather than blocks? Then > in the code you can just add PAGE_SIZE, CONT_PTE_SIZE, PMD_SIZE, etc as > appropriate, then the reporting function becomes simpler; everything is just > shifted by 10 to get kB; no need for the shift array. Yeah, good idea. > >> + >> u64 kimage_voffset __ro_after_init; >> EXPORT_SYMBOL(kimage_voffset); >> >> @@ -171,6 +183,60 @@ static void init_clear_pgtable(void *table) >> dsb(ishst); >> } >> >> +void arch_report_meminfo(struct seq_file *m) >> +{ >> + char *size[NR_DIRECT_MAP_TYPE]; >> + unsigned int shift[NR_DIRECT_MAP_TYPE]; >> + >> +#if defined(CONFIG_ARM64_4K_PAGES) >> + size[PTE] = "4K"; >> + size[CONT_PTE] = "64K"; >> + size[PMD] = "2M"; >> + size[CONT_PMD] = "32M"; >> + size[PUD] = "1G"; >> + >> + shift[PTE] = 2; >> + shift[CONT_PTE] = 6; >> + shift[PMD] = 11; >> + shift[CONT_PMD] = 15; >> + shift[PUD] = 20; >> +#elif defined(CONFIG_ARM64_16K_PAGES) >> + size[PTE] = "16K"; >> + size[CONT_PTE] = "2M"; >> + size[PMD] = "32M"; >> + size[CONT_PMD] = "1G"; >> + >> + shift[PTE] = 4; >> + shift[CONT_PTE] = 11; >> + shift[PMD] = 15; >> + shift[CONT_PMD] = 20; >> +#elif defined(CONFIG_ARM64_64K_PAGES) >> + size[PTE] = "64K"; >> + size[CONT_PTE] = "2M"; >> + size[PMD] = "512M"; >> + size[CONT_PMD] = "16G"; >> + >> + shift[PTE] = 6; >> + shift[CONT_PTE] = 11; >> + shift[PMD] = 19; >> + shift[CONT_PMD] = 24; >> +#endif > The ifdeffery is quite ugly. I think we can get rid of the shift array as per > above. I was hoping there might be a kernel function that we could pass > PAGE_SIZE, PMD_SIZE, etc to and it would give us an appropriate string but I > can't find anything. I guess keping the ifdef for size[] is the most pragmatic. Yes, I agree this is the most pragmatic way. > >> + >> + seq_printf(m, "DirectMap%s: %8lu kB\n", >> + size[PTE], direct_map_cnt[PTE] << shift[PTE]); >> + seq_printf(m, "DirectMap%s: %8lu kB\n", >> + size[CONT_PTE], >> + direct_map_cnt[CONT_PTE] << shift[CONT_PTE]); >> + seq_printf(m, "DirectMap%s: %8lu kB\n", >> + size[PMD], direct_map_cnt[PMD] << shift[PMD]); >> + seq_printf(m, "DirectMap%s: %8lu kB\n", >> + size[CONT_PMD], >> + direct_map_cnt[CONT_PMD] << shift[CONT_PMD]); >> + if (pud_sect_supported()) >> + seq_printf(m, "DirectMap%s: %8lu kB\n", >> + size[PUD], direct_map_cnt[PUD] << shift[PUD]); >> +} >> + >> static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, >> phys_addr_t phys, pgprot_t prot) >> { >> @@ -183,6 +249,9 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, >> */ >> __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot)); >> >> + if (!(pgprot_val(prot) & PTE_CONT)) >> + direct_map_cnt[PTE]++; > If adding size in bytes, you could just always add PAGE_SIZE here, but select > the bucket based on PTE_CONT? You mean: if (pgprot_val(prot) & PTE_CONT) direct_map_cnt[PTE_CONT] += PAGE_SIZE; else direct_map_cnt[PTE] += PAGE_SIZE; I don't think this is efficient for PTE_CONT, because I can just do "direct_map_cnt[PTE_CONT] += CONT_PTE_SIZE" once in alloc_init_cont_pte() instead of adding PAGE_SIZE CONT_PTES times, right? This applies to the below comments for PMD/CONT_PMD too. Thanks, Yang > >> + >> /* >> * After the PTE entry has been populated once, we >> * only allow updates to the permission attributes. >> @@ -229,8 +298,10 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, >> >> /* use a contiguous mapping if the range is suitably aligned */ >> if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && >> - (flags & NO_CONT_MAPPINGS) == 0) >> + (flags & NO_CONT_MAPPINGS) == 0) { >> __prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> + direct_map_cnt[CONT_PTE]++; > Then you don't need this. > >> + } >> >> init_pte(ptep, addr, next, phys, __prot); >> >> @@ -262,6 +333,9 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, >> (flags & NO_BLOCK_MAPPINGS) == 0) { >> pmd_set_huge(pmdp, phys, prot); >> >> + if (!(pgprot_val(prot) & PTE_CONT)) >> + direct_map_cnt[PMD]++; > Same here... > >> + >> /* >> * After the PMD entry has been populated once, we >> * only allow updates to the permission attributes. >> @@ -317,8 +391,10 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, >> >> /* use a contiguous mapping if the range is suitably aligned */ >> if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && >> - (flags & NO_CONT_MAPPINGS) == 0) >> + (flags & NO_CONT_MAPPINGS) == 0) { >> __prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> + direct_map_cnt[CONT_PMD]++; > Then this can go too. > >> + } >> >> init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); >> >> @@ -368,6 +444,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, >> (flags & NO_BLOCK_MAPPINGS) == 0) { >> pud_set_huge(pudp, phys, prot); >> >> + direct_map_cnt[PUD]++; >> /* >> * After the PUD entry has been populated once, we >> * only allow updates to the permission attributes. >> @@ -532,9 +609,13 @@ static void split_contpte(pte_t *ptep) >> { >> int i; >> >> + direct_map_cnt[CONT_PTE]--; >> + >> ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES); >> for (i = 0; i < CONT_PTES; i++, ptep++) >> __set_pte(ptep, pte_mknoncont(__ptep_get(ptep))); >> + >> + direct_map_cnt[PTE] += CONT_PTES; >> } >> >> static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) >> @@ -559,8 +640,10 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) >> if (to_cont) >> prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> >> + direct_map_cnt[PMD]--; >> for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++) >> __set_pte(ptep, pfn_pte(pfn, prot)); >> + direct_map_cnt[CONT_PTE] += PTRS_PER_PTE / CONT_PTES; >> >> /* >> * Ensure the pte entries are visible to the table walker by the time >> @@ -576,9 +659,13 @@ static void split_contpmd(pmd_t *pmdp) >> { >> int i; >> >> + direct_map_cnt[CONT_PMD]--; >> + >> pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS); >> for (i = 0; i < CONT_PMDS; i++, pmdp++) >> set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp))); >> + >> + direct_map_cnt[PMD] += CONT_PMDS; >> } >> >> static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) >> @@ -604,8 +691,10 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) >> if (to_cont) >> prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> >> + direct_map_cnt[PUD]--; >> for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step) >> set_pmd(pmdp, pfn_pmd(pfn, prot)); >> + direct_map_cnt[CONT_PMD] += PTRS_PER_PMD/CONT_PMDS; >> >> /* >> * Ensure the pmd entries are visible to the table walker by the time ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm64: mm: show direct mapping use in /proc/meminfo 2025-10-14 20:49 ` Yang Shi @ 2025-10-15 15:37 ` Ryan Roberts 2025-10-16 18:59 ` Yang Shi 0 siblings, 1 reply; 7+ messages in thread From: Ryan Roberts @ 2025-10-15 15:37 UTC (permalink / raw) To: Yang Shi, cl, catalin.marinas, will; +Cc: linux-arm-kernel, linux-kernel On 14/10/2025 21:49, Yang Shi wrote: > > > On 10/14/25 1:43 AM, Ryan Roberts wrote: >> On 14/10/2025 00:51, Yang Shi wrote: >>> Since commit a166563e7ec3 ("arm64: mm: support large block mapping when >>> rodata=full"), the direct mapping may be split on some machines instead >>> keeping static since boot. It makes more sense to show the direct mapping >>> use in /proc/meminfo than before. >> It's possible to divine this information from >> /sys/kernel/debug/kernel_page_tables. Dev even has a script to post-process that >> to provide summary stats on how much memory is mapped by each block size. >> >> Admittedly, this file is in debugfs and is not usually enabled for production >> builds, but Dev's patch Commit fa93b45fd397 ("arm64: Enable vmalloc-huge with >> ptdump"), merged for v6.18-rc1 gives us a path to enabling by default I think? > > First of all, it is in debugfs, not all distros actually enable debugfs. IIRC, > Android has debugfs disabled. > > Secondly, dumping kernel page table is quite time consuming and costly. If the > users just want know the direct mapping use, it sounds too overkilling. > >> >> But I can see the benefits of having this easily available, and I notice that >> other arches are already doing this. So I guess it makes sense to be consistent. > > Yeah, some other architectures show this too. > >> >>> This patch will make /proc/meminfo show the direct mapping use like the >>> below (4K base page size): >>> DirectMap4K: 94792 kB >>> DirectMap64K: 134208 kB >>> DirectMap2M: 1173504 kB >>> DirectMap32M: 5636096 kB >>> DirectMap1G: 529530880 kB >> nit: x86, s390 and powerpc use a lower case "k" for DirectMap4k; perhaps we >> should follow suit? (despite being ugly). Although I think that's usually used >> to denote base-10 (i.e. 1000 bytes)? That's not the case here, we want base-2. > > We should follow the lower case "k" so that the script used on them can work on > arm64 too. Thinking about this some more, I really don't love the idea of encoding the page/block sizes in the label name. This is yet another source of potential compat issue when running SW with 4K/16K/64K page size. Could we consider using PTE/CONTPTE/PMD/CONTPMD/PUD instead? > > Do you mean kB should be base-2? No I meant that kB (lower case k) often means 1000x and KiB (upper case K) often means 1024x. So having a lower case k here is technically wrong since we are talking about 4x1024 bytes, not 4x1000 bytes. But its a moot point - if we go for these label names, we should use the lower case k to be compatible. > >> >>> Although just the machines which support BBML2_NOABORT can split the >>> direct mapping, show it on all machines regardless of BBML2_NOABORT so >>> that the users have consistent view in order to avoid confusion. >>> >>> Signed-off-by: Yang Shi <yang@os.amperecomputing.com> >>> --- >>> arch/arm64/mm/mmu.c | 93 ++++++++++++++++++++++++++++++++++++++++++++- >>> 1 file changed, 91 insertions(+), 2 deletions(-) >>> >>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >>> index b8d37eb037fc..e5da0f521e58 100644 >>> --- a/arch/arm64/mm/mmu.c >>> +++ b/arch/arm64/mm/mmu.c >>> @@ -29,6 +29,7 @@ >>> #include <linux/mm_inline.h> >>> #include <linux/pagewalk.h> >>> #include <linux/stop_machine.h> >>> +#include <linux/proc_fs.h> >>> #include <asm/barrier.h> >>> #include <asm/cputype.h> >>> @@ -51,6 +52,17 @@ >>> DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key); >>> +enum direct_map_type { >>> + PTE, >>> + CONT_PTE, >>> + PMD, >>> + CONT_PMD, >>> + PUD, >>> + NR_DIRECT_MAP_TYPE, >>> +}; >>> + >>> +unsigned long direct_map_cnt[NR_DIRECT_MAP_TYPE]; >> I wonder if it would be cleaner to store this in bytes rather than blocks? Then >> in the code you can just add PAGE_SIZE, CONT_PTE_SIZE, PMD_SIZE, etc as >> appropriate, then the reporting function becomes simpler; everything is just >> shifted by 10 to get kB; no need for the shift array. > > Yeah, good idea. > >> >>> + >>> u64 kimage_voffset __ro_after_init; >>> EXPORT_SYMBOL(kimage_voffset); >>> @@ -171,6 +183,60 @@ static void init_clear_pgtable(void *table) >>> dsb(ishst); >>> } >>> +void arch_report_meminfo(struct seq_file *m) >>> +{ >>> + char *size[NR_DIRECT_MAP_TYPE]; >>> + unsigned int shift[NR_DIRECT_MAP_TYPE]; >>> + >>> +#if defined(CONFIG_ARM64_4K_PAGES) >>> + size[PTE] = "4K"; >>> + size[CONT_PTE] = "64K"; >>> + size[PMD] = "2M"; >>> + size[CONT_PMD] = "32M"; >>> + size[PUD] = "1G"; >>> + >>> + shift[PTE] = 2; >>> + shift[CONT_PTE] = 6; >>> + shift[PMD] = 11; >>> + shift[CONT_PMD] = 15; >>> + shift[PUD] = 20; >>> +#elif defined(CONFIG_ARM64_16K_PAGES) >>> + size[PTE] = "16K"; >>> + size[CONT_PTE] = "2M"; >>> + size[PMD] = "32M"; >>> + size[CONT_PMD] = "1G"; >>> + >>> + shift[PTE] = 4; >>> + shift[CONT_PTE] = 11; >>> + shift[PMD] = 15; >>> + shift[CONT_PMD] = 20; >>> +#elif defined(CONFIG_ARM64_64K_PAGES) >>> + size[PTE] = "64K"; >>> + size[CONT_PTE] = "2M"; >>> + size[PMD] = "512M"; >>> + size[CONT_PMD] = "16G"; >>> + >>> + shift[PTE] = 6; >>> + shift[CONT_PTE] = 11; >>> + shift[PMD] = 19; >>> + shift[CONT_PMD] = 24; >>> +#endif >> The ifdeffery is quite ugly. I think we can get rid of the shift array as per >> above. I was hoping there might be a kernel function that we could pass >> PAGE_SIZE, PMD_SIZE, etc to and it would give us an appropriate string but I >> can't find anything. I guess keping the ifdef for size[] is the most pragmatic. > > Yes, I agree this is the most pragmatic way. > >> >>> + >>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>> + size[PTE], direct_map_cnt[PTE] << shift[PTE]); >>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>> + size[CONT_PTE], >>> + direct_map_cnt[CONT_PTE] << shift[CONT_PTE]); >>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>> + size[PMD], direct_map_cnt[PMD] << shift[PMD]); >>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>> + size[CONT_PMD], >>> + direct_map_cnt[CONT_PMD] << shift[CONT_PMD]); >>> + if (pud_sect_supported()) >>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>> + size[PUD], direct_map_cnt[PUD] << shift[PUD]); >>> +} >>> + >>> static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, >>> phys_addr_t phys, pgprot_t prot) >>> { >>> @@ -183,6 +249,9 @@ static void init_pte(pte_t *ptep, unsigned long addr, >>> unsigned long end, >>> */ >>> __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot)); >>> + if (!(pgprot_val(prot) & PTE_CONT)) >>> + direct_map_cnt[PTE]++; >> If adding size in bytes, you could just always add PAGE_SIZE here, but select >> the bucket based on PTE_CONT? > > You mean: > > if (pgprot_val(prot) & PTE_CONT) > direct_map_cnt[PTE_CONT] += PAGE_SIZE; > else > direct_map_cnt[PTE] += PAGE_SIZE; > > I don't think this is efficient for PTE_CONT, because I can just do > "direct_map_cnt[PTE_CONT] += CONT_PTE_SIZE" once in alloc_init_cont_pte() > instead of adding PAGE_SIZE CONT_PTES times, right? Well you could make the same argument for PTEs. you just need to calculate the size. Then you don't need any code per-pte at all. Thanks, Ryan > > This applies to the below comments for PMD/CONT_PMD too. > > Thanks, > Yang > >> >>> + >>> /* >>> * After the PTE entry has been populated once, we >>> * only allow updates to the permission attributes. >>> @@ -229,8 +298,10 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned >>> long addr, >>> /* use a contiguous mapping if the range is suitably aligned */ >>> if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && >>> - (flags & NO_CONT_MAPPINGS) == 0) >>> + (flags & NO_CONT_MAPPINGS) == 0) { >>> __prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>> + direct_map_cnt[CONT_PTE]++; >> Then you don't need this. >> >>> + } >>> init_pte(ptep, addr, next, phys, __prot); >>> @@ -262,6 +333,9 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, >>> unsigned long end, >>> (flags & NO_BLOCK_MAPPINGS) == 0) { >>> pmd_set_huge(pmdp, phys, prot); >>> + if (!(pgprot_val(prot) & PTE_CONT)) >>> + direct_map_cnt[PMD]++; >> Same here... >> >>> + >>> /* >>> * After the PMD entry has been populated once, we >>> * only allow updates to the permission attributes. >>> @@ -317,8 +391,10 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned >>> long addr, >>> /* use a contiguous mapping if the range is suitably aligned */ >>> if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && >>> - (flags & NO_CONT_MAPPINGS) == 0) >>> + (flags & NO_CONT_MAPPINGS) == 0) { >>> __prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>> + direct_map_cnt[CONT_PMD]++; >> Then this can go too. >> >>> + } >>> init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); >>> @@ -368,6 +444,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long >>> addr, unsigned long end, >>> (flags & NO_BLOCK_MAPPINGS) == 0) { >>> pud_set_huge(pudp, phys, prot); >>> + direct_map_cnt[PUD]++; >>> /* >>> * After the PUD entry has been populated once, we >>> * only allow updates to the permission attributes. >>> @@ -532,9 +609,13 @@ static void split_contpte(pte_t *ptep) >>> { >>> int i; >>> + direct_map_cnt[CONT_PTE]--; >>> + >>> ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES); >>> for (i = 0; i < CONT_PTES; i++, ptep++) >>> __set_pte(ptep, pte_mknoncont(__ptep_get(ptep))); >>> + >>> + direct_map_cnt[PTE] += CONT_PTES; >>> } >>> static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) >>> @@ -559,8 +640,10 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, >>> bool to_cont) >>> if (to_cont) >>> prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>> + direct_map_cnt[PMD]--; >>> for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++) >>> __set_pte(ptep, pfn_pte(pfn, prot)); >>> + direct_map_cnt[CONT_PTE] += PTRS_PER_PTE / CONT_PTES; >>> /* >>> * Ensure the pte entries are visible to the table walker by the time >>> @@ -576,9 +659,13 @@ static void split_contpmd(pmd_t *pmdp) >>> { >>> int i; >>> + direct_map_cnt[CONT_PMD]--; >>> + >>> pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS); >>> for (i = 0; i < CONT_PMDS; i++, pmdp++) >>> set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp))); >>> + >>> + direct_map_cnt[PMD] += CONT_PMDS; >>> } >>> static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) >>> @@ -604,8 +691,10 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, >>> bool to_cont) >>> if (to_cont) >>> prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>> + direct_map_cnt[PUD]--; >>> for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step) >>> set_pmd(pmdp, pfn_pmd(pfn, prot)); >>> + direct_map_cnt[CONT_PMD] += PTRS_PER_PMD/CONT_PMDS; >>> /* >>> * Ensure the pmd entries are visible to the table walker by the time > ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm64: mm: show direct mapping use in /proc/meminfo 2025-10-15 15:37 ` Ryan Roberts @ 2025-10-16 18:59 ` Yang Shi 0 siblings, 0 replies; 7+ messages in thread From: Yang Shi @ 2025-10-16 18:59 UTC (permalink / raw) To: Ryan Roberts, cl, catalin.marinas, will; +Cc: linux-arm-kernel, linux-kernel On 10/15/25 8:37 AM, Ryan Roberts wrote: > On 14/10/2025 21:49, Yang Shi wrote: >> >> On 10/14/25 1:43 AM, Ryan Roberts wrote: >>> On 14/10/2025 00:51, Yang Shi wrote: >>>> Since commit a166563e7ec3 ("arm64: mm: support large block mapping when >>>> rodata=full"), the direct mapping may be split on some machines instead >>>> keeping static since boot. It makes more sense to show the direct mapping >>>> use in /proc/meminfo than before. >>> It's possible to divine this information from >>> /sys/kernel/debug/kernel_page_tables. Dev even has a script to post-process that >>> to provide summary stats on how much memory is mapped by each block size. >>> >>> Admittedly, this file is in debugfs and is not usually enabled for production >>> builds, but Dev's patch Commit fa93b45fd397 ("arm64: Enable vmalloc-huge with >>> ptdump"), merged for v6.18-rc1 gives us a path to enabling by default I think? >> First of all, it is in debugfs, not all distros actually enable debugfs. IIRC, >> Android has debugfs disabled. >> >> Secondly, dumping kernel page table is quite time consuming and costly. If the >> users just want know the direct mapping use, it sounds too overkilling. >> >>> But I can see the benefits of having this easily available, and I notice that >>> other arches are already doing this. So I guess it makes sense to be consistent. >> Yeah, some other architectures show this too. >> >>>> This patch will make /proc/meminfo show the direct mapping use like the >>>> below (4K base page size): >>>> DirectMap4K: 94792 kB >>>> DirectMap64K: 134208 kB >>>> DirectMap2M: 1173504 kB >>>> DirectMap32M: 5636096 kB >>>> DirectMap1G: 529530880 kB >>> nit: x86, s390 and powerpc use a lower case "k" for DirectMap4k; perhaps we >>> should follow suit? (despite being ugly). Although I think that's usually used >>> to denote base-10 (i.e. 1000 bytes)? That's not the case here, we want base-2. >> We should follow the lower case "k" so that the script used on them can work on >> arm64 too. > Thinking about this some more, I really don't love the idea of encoding the > page/block sizes in the label name. This is yet another source of potential > compat issue when running SW with 4K/16K/64K page size. Could we consider using > PTE/CONTPTE/PMD/CONTPMD/PUD instead? I'd love to it. We can remove all the ifdef churn for the size name. I used the page/block size in order to have a consistent view with other architectures. If we don't care to keep the consistency, using PTE/CONTPTE... definitely makes our life easier. > >> Do you mean kB should be base-2? > No I meant that kB (lower case k) often means 1000x and KiB (upper case K) often > means 1024x. So having a lower case k here is technically wrong since we are > talking about 4x1024 bytes, not 4x1000 bytes. > > But its a moot point - if we go for these label names, we should use the lower > case k to be compatible. It looks like /proc/meminfo uses "kB" for all items. IMHO we'd better follow it. > >>>> Although just the machines which support BBML2_NOABORT can split the >>>> direct mapping, show it on all machines regardless of BBML2_NOABORT so >>>> that the users have consistent view in order to avoid confusion. >>>> >>>> Signed-off-by: Yang Shi <yang@os.amperecomputing.com> >>>> --- >>>> arch/arm64/mm/mmu.c | 93 ++++++++++++++++++++++++++++++++++++++++++++- >>>> 1 file changed, 91 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >>>> index b8d37eb037fc..e5da0f521e58 100644 >>>> --- a/arch/arm64/mm/mmu.c >>>> +++ b/arch/arm64/mm/mmu.c >>>> @@ -29,6 +29,7 @@ >>>> #include <linux/mm_inline.h> >>>> #include <linux/pagewalk.h> >>>> #include <linux/stop_machine.h> >>>> +#include <linux/proc_fs.h> >>>> #include <asm/barrier.h> >>>> #include <asm/cputype.h> >>>> @@ -51,6 +52,17 @@ >>>> DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key); >>>> +enum direct_map_type { >>>> + PTE, >>>> + CONT_PTE, >>>> + PMD, >>>> + CONT_PMD, >>>> + PUD, >>>> + NR_DIRECT_MAP_TYPE, >>>> +}; >>>> + >>>> +unsigned long direct_map_cnt[NR_DIRECT_MAP_TYPE]; >>> I wonder if it would be cleaner to store this in bytes rather than blocks? Then >>> in the code you can just add PAGE_SIZE, CONT_PTE_SIZE, PMD_SIZE, etc as >>> appropriate, then the reporting function becomes simpler; everything is just >>> shifted by 10 to get kB; no need for the shift array. >> Yeah, good idea. >> >>>> + >>>> u64 kimage_voffset __ro_after_init; >>>> EXPORT_SYMBOL(kimage_voffset); >>>> @@ -171,6 +183,60 @@ static void init_clear_pgtable(void *table) >>>> dsb(ishst); >>>> } >>>> +void arch_report_meminfo(struct seq_file *m) >>>> +{ >>>> + char *size[NR_DIRECT_MAP_TYPE]; >>>> + unsigned int shift[NR_DIRECT_MAP_TYPE]; >>>> + >>>> +#if defined(CONFIG_ARM64_4K_PAGES) >>>> + size[PTE] = "4K"; >>>> + size[CONT_PTE] = "64K"; >>>> + size[PMD] = "2M"; >>>> + size[CONT_PMD] = "32M"; >>>> + size[PUD] = "1G"; >>>> + >>>> + shift[PTE] = 2; >>>> + shift[CONT_PTE] = 6; >>>> + shift[PMD] = 11; >>>> + shift[CONT_PMD] = 15; >>>> + shift[PUD] = 20; >>>> +#elif defined(CONFIG_ARM64_16K_PAGES) >>>> + size[PTE] = "16K"; >>>> + size[CONT_PTE] = "2M"; >>>> + size[PMD] = "32M"; >>>> + size[CONT_PMD] = "1G"; >>>> + >>>> + shift[PTE] = 4; >>>> + shift[CONT_PTE] = 11; >>>> + shift[PMD] = 15; >>>> + shift[CONT_PMD] = 20; >>>> +#elif defined(CONFIG_ARM64_64K_PAGES) >>>> + size[PTE] = "64K"; >>>> + size[CONT_PTE] = "2M"; >>>> + size[PMD] = "512M"; >>>> + size[CONT_PMD] = "16G"; >>>> + >>>> + shift[PTE] = 6; >>>> + shift[CONT_PTE] = 11; >>>> + shift[PMD] = 19; >>>> + shift[CONT_PMD] = 24; >>>> +#endif >>> The ifdeffery is quite ugly. I think we can get rid of the shift array as per >>> above. I was hoping there might be a kernel function that we could pass >>> PAGE_SIZE, PMD_SIZE, etc to and it would give us an appropriate string but I >>> can't find anything. I guess keping the ifdef for size[] is the most pragmatic. >> Yes, I agree this is the most pragmatic way. >> >>>> + >>>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>>> + size[PTE], direct_map_cnt[PTE] << shift[PTE]); >>>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>>> + size[CONT_PTE], >>>> + direct_map_cnt[CONT_PTE] << shift[CONT_PTE]); >>>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>>> + size[PMD], direct_map_cnt[PMD] << shift[PMD]); >>>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>>> + size[CONT_PMD], >>>> + direct_map_cnt[CONT_PMD] << shift[CONT_PMD]); >>>> + if (pud_sect_supported()) >>>> + seq_printf(m, "DirectMap%s: %8lu kB\n", >>>> + size[PUD], direct_map_cnt[PUD] << shift[PUD]); >>>> +} >>>> + >>>> static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, >>>> phys_addr_t phys, pgprot_t prot) >>>> { >>>> @@ -183,6 +249,9 @@ static void init_pte(pte_t *ptep, unsigned long addr, >>>> unsigned long end, >>>> */ >>>> __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot)); >>>> + if (!(pgprot_val(prot) & PTE_CONT)) >>>> + direct_map_cnt[PTE]++; >>> If adding size in bytes, you could just always add PAGE_SIZE here, but select >>> the bucket based on PTE_CONT? >> You mean: >> >> if (pgprot_val(prot) & PTE_CONT) >> direct_map_cnt[PTE_CONT] += PAGE_SIZE; >> else >> direct_map_cnt[PTE] += PAGE_SIZE; >> >> I don't think this is efficient for PTE_CONT, because I can just do >> "direct_map_cnt[PTE_CONT] += CONT_PTE_SIZE" once in alloc_init_cont_pte() >> instead of adding PAGE_SIZE CONT_PTES times, right? > Well you could make the same argument for PTEs. you just need to calculate the > size. Then you don't need any code per-pte at all. Yeah, good point. Thanks, Yang > > Thanks, > Ryan > >> This applies to the below comments for PMD/CONT_PMD too. >> >> Thanks, >> Yang >> >>>> + >>>> /* >>>> * After the PTE entry has been populated once, we >>>> * only allow updates to the permission attributes. >>>> @@ -229,8 +298,10 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned >>>> long addr, >>>> /* use a contiguous mapping if the range is suitably aligned */ >>>> if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && >>>> - (flags & NO_CONT_MAPPINGS) == 0) >>>> + (flags & NO_CONT_MAPPINGS) == 0) { >>>> __prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>>> + direct_map_cnt[CONT_PTE]++; >>> Then you don't need this. >>> >>>> + } >>>> init_pte(ptep, addr, next, phys, __prot); >>>> @@ -262,6 +333,9 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, >>>> unsigned long end, >>>> (flags & NO_BLOCK_MAPPINGS) == 0) { >>>> pmd_set_huge(pmdp, phys, prot); >>>> + if (!(pgprot_val(prot) & PTE_CONT)) >>>> + direct_map_cnt[PMD]++; >>> Same here... >>> >>>> + >>>> /* >>>> * After the PMD entry has been populated once, we >>>> * only allow updates to the permission attributes. >>>> @@ -317,8 +391,10 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned >>>> long addr, >>>> /* use a contiguous mapping if the range is suitably aligned */ >>>> if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && >>>> - (flags & NO_CONT_MAPPINGS) == 0) >>>> + (flags & NO_CONT_MAPPINGS) == 0) { >>>> __prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>>> + direct_map_cnt[CONT_PMD]++; >>> Then this can go too. >>> >>>> + } >>>> init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); >>>> @@ -368,6 +444,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long >>>> addr, unsigned long end, >>>> (flags & NO_BLOCK_MAPPINGS) == 0) { >>>> pud_set_huge(pudp, phys, prot); >>>> + direct_map_cnt[PUD]++; >>>> /* >>>> * After the PUD entry has been populated once, we >>>> * only allow updates to the permission attributes. >>>> @@ -532,9 +609,13 @@ static void split_contpte(pte_t *ptep) >>>> { >>>> int i; >>>> + direct_map_cnt[CONT_PTE]--; >>>> + >>>> ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES); >>>> for (i = 0; i < CONT_PTES; i++, ptep++) >>>> __set_pte(ptep, pte_mknoncont(__ptep_get(ptep))); >>>> + >>>> + direct_map_cnt[PTE] += CONT_PTES; >>>> } >>>> static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) >>>> @@ -559,8 +640,10 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, >>>> bool to_cont) >>>> if (to_cont) >>>> prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>>> + direct_map_cnt[PMD]--; >>>> for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++) >>>> __set_pte(ptep, pfn_pte(pfn, prot)); >>>> + direct_map_cnt[CONT_PTE] += PTRS_PER_PTE / CONT_PTES; >>>> /* >>>> * Ensure the pte entries are visible to the table walker by the time >>>> @@ -576,9 +659,13 @@ static void split_contpmd(pmd_t *pmdp) >>>> { >>>> int i; >>>> + direct_map_cnt[CONT_PMD]--; >>>> + >>>> pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS); >>>> for (i = 0; i < CONT_PMDS; i++, pmdp++) >>>> set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp))); >>>> + >>>> + direct_map_cnt[PMD] += CONT_PMDS; >>>> } >>>> static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) >>>> @@ -604,8 +691,10 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, >>>> bool to_cont) >>>> if (to_cont) >>>> prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>>> + direct_map_cnt[PUD]--; >>>> for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step) >>>> set_pmd(pmdp, pfn_pmd(pfn, prot)); >>>> + direct_map_cnt[CONT_PMD] += PTRS_PER_PMD/CONT_PMDS; >>>> /* >>>> * Ensure the pmd entries are visible to the table walker by the time ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm64: mm: show direct mapping use in /proc/meminfo 2025-10-13 23:51 [PATCH] arm64: mm: show direct mapping use in /proc/meminfo Yang Shi 2025-10-14 8:43 ` Ryan Roberts @ 2025-10-15 12:59 ` kernel test robot 2025-10-16 7:26 ` kernel test robot 2 siblings, 0 replies; 7+ messages in thread From: kernel test robot @ 2025-10-15 12:59 UTC (permalink / raw) To: Yang Shi, ryan.roberts, cl, catalin.marinas, will Cc: oe-kbuild-all, yang, linux-arm-kernel, linux-kernel Hi Yang, kernel test robot noticed the following build warnings: [auto build test WARNING on arm64/for-next/core] [also build test WARNING on linus/master v6.18-rc1 next-20251014] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Yang-Shi/arm64-mm-show-direct-mapping-use-in-proc-meminfo/20251014-075409 base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core patch link: https://lore.kernel.org/r/20251013235118.3072941-1-yang%40os.amperecomputing.com patch subject: [PATCH] arm64: mm: show direct mapping use in /proc/meminfo config: arm64-randconfig-002-20251015 (https://download.01.org/0day-ci/archive/20251015/202510152040.SlpMogzu-lkp@intel.com/config) compiler: aarch64-linux-gcc (GCC) 13.4.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251015/202510152040.SlpMogzu-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202510152040.SlpMogzu-lkp@intel.com/ All warnings (new ones prefixed by >>): >> arch/arm64/mm/mmu.c:186:6: warning: no previous prototype for 'arch_report_meminfo' [-Wmissing-prototypes] 186 | void arch_report_meminfo(struct seq_file *m) | ^~~~~~~~~~~~~~~~~~~ vim +/arch_report_meminfo +186 arch/arm64/mm/mmu.c 185 > 186 void arch_report_meminfo(struct seq_file *m) 187 { 188 char *size[NR_DIRECT_MAP_TYPE]; 189 unsigned int shift[NR_DIRECT_MAP_TYPE]; 190 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm64: mm: show direct mapping use in /proc/meminfo 2025-10-13 23:51 [PATCH] arm64: mm: show direct mapping use in /proc/meminfo Yang Shi 2025-10-14 8:43 ` Ryan Roberts 2025-10-15 12:59 ` kernel test robot @ 2025-10-16 7:26 ` kernel test robot 2 siblings, 0 replies; 7+ messages in thread From: kernel test robot @ 2025-10-16 7:26 UTC (permalink / raw) To: Yang Shi, ryan.roberts, cl, catalin.marinas, will Cc: oe-kbuild-all, yang, linux-arm-kernel, linux-kernel Hi Yang, kernel test robot noticed the following build warnings: [auto build test WARNING on arm64/for-next/core] [also build test WARNING on linus/master v6.18-rc1 next-20251015] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Yang-Shi/arm64-mm-show-direct-mapping-use-in-proc-meminfo/20251014-075409 base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core patch link: https://lore.kernel.org/r/20251013235118.3072941-1-yang%40os.amperecomputing.com patch subject: [PATCH] arm64: mm: show direct mapping use in /proc/meminfo config: arm64-randconfig-r121-20251016 (https://download.01.org/0day-ci/archive/20251016/202510161508.mbPZHa8r-lkp@intel.com/config) compiler: aarch64-linux-gcc (GCC) 8.5.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251016/202510161508.mbPZHa8r-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202510161508.mbPZHa8r-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) >> arch/arm64/mm/mmu.c:64:15: sparse: sparse: symbol 'direct_map_cnt' was not declared. Should it be static? vim +/direct_map_cnt +64 arch/arm64/mm/mmu.c 63 > 64 unsigned long direct_map_cnt[NR_DIRECT_MAP_TYPE]; 65 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-10-16 19:00 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2025-10-13 23:51 [PATCH] arm64: mm: show direct mapping use in /proc/meminfo Yang Shi 2025-10-14 8:43 ` Ryan Roberts 2025-10-14 20:49 ` Yang Shi 2025-10-15 15:37 ` Ryan Roberts 2025-10-16 18:59 ` Yang Shi 2025-10-15 12:59 ` kernel test robot 2025-10-16 7:26 ` kernel test robot
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).