* [PATCH v3 17/19] mm/sparse-vmemmap: Remove sparse_vmemmap_init_nid_late()
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
hugetlb_vmemmap_init_late() no longer has any users, so the remaining
late-init path in sparse_vmemmap_init_nid_late() is dead code.
Remove sparse_vmemmap_init_nid_late() and its declarations.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
include/linux/mmzone.h | 7 -------
mm/hugetlb_vmemmap.c | 4 ----
mm/hugetlb_vmemmap.h | 5 -----
mm/sparse-vmemmap.c | 11 -----------
mm/sparse.c | 1 -
5 files changed, 28 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1331a7b93f33..72883df17c72 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2170,8 +2170,6 @@ static inline int preinited_vmemmap_section(const struct mem_section *section)
}
void sparse_vmemmap_init_nid_early(int nid);
-void sparse_vmemmap_init_nid_late(int nid);
-
#else
static inline int preinited_vmemmap_section(const struct mem_section *section)
{
@@ -2180,10 +2178,6 @@ static inline int preinited_vmemmap_section(const struct mem_section *section)
static inline void sparse_vmemmap_init_nid_early(int nid)
{
}
-
-static inline void sparse_vmemmap_init_nid_late(int nid)
-{
-}
#endif
static inline int online_section_nr(unsigned long nr)
@@ -2388,7 +2382,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
#else
#define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
-#define sparse_vmemmap_init_nid_late(_nid) do {} while (0)
#define pfn_in_present_section pfn_valid
#endif /* CONFIG_SPARSEMEM */
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 464578ee246e..cde6f3aba87b 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -810,10 +810,6 @@ static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn)
return NULL;
}
-
-void __init hugetlb_vmemmap_init_late(int nid)
-{
-}
#endif
static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 18b490825215..7ac49c52457d 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -29,7 +29,6 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
void hugetlb_vmemmap_init_early(int nid);
-void hugetlb_vmemmap_init_late(int nid);
#endif
@@ -81,10 +80,6 @@ static inline void hugetlb_vmemmap_init_early(int nid)
{
}
-static inline void hugetlb_vmemmap_init_late(int nid)
-{
-}
-
static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
{
return 0;
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 3b036251a2f4..077686af394b 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -574,17 +574,6 @@ void __init sparse_vmemmap_init_nid_early(int nid)
{
hugetlb_vmemmap_init_early(nid);
}
-
-/*
- * This is called just before the initialization of page structures
- * through memmap_init. Zones are now initialized, so any work that
- * needs to be done that needs zone information can be done from
- * here.
- */
-void __init sparse_vmemmap_init_nid_late(int nid)
-{
- hugetlb_vmemmap_init_late(nid);
-}
#endif
static void subsection_mask_set(unsigned long *map, unsigned long pfn,
diff --git a/mm/sparse.c b/mm/sparse.c
index 3917a47153d8..324213d8bdcb 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -320,7 +320,6 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
}
}
sparse_usage_fini();
- sparse_vmemmap_init_nid_late(nid);
}
/*
--
2.54.0
^ permalink raw reply related
* [PATCH v3 18/19] mm/hugetlb: Remove unused bootmem cma field
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
struct huge_bootmem_page no longer needs to keep the CMA pointer. The
bootmem path only needs to remember whether a huge page came from CMA,
which is already encoded in the flags field.
Set HUGE_BOOTMEM_CMA when the page is allocated and drop the unused cma
field together with the redundant assignments.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
mm/hugetlb.c | 5 +----
mm/hugetlb_cma.c | 29 +++++++++++------------------
mm/internal.h | 2 --
3 files changed, 12 insertions(+), 24 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 47c3d6d11c58..fb7ad2a4a26b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3076,10 +3076,7 @@ static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
*/
INIT_LIST_HEAD(&m->list);
m->hstate = h;
- if (!hugetlb_early_cma(h)) {
- m->cma = NULL;
- m->flags = 0;
- }
+ m->flags = hugetlb_early_cma(h) ? HUGE_BOOTMEM_CMA : 0;
/* CMA pages: zone-crossing is validated in hugetlb_cma_reserve(). */
if (!hugetlb_early_cma(h) &&
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index e487d0ffffc0..4dfce68b354a 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -59,31 +59,24 @@ struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
void * __init hugetlb_cma_alloc_bootmem(struct hstate *h, int nid, bool node_exact)
{
struct cma *cma;
- struct huge_bootmem_page *m;
+ void *m;
int node;
cma = hugetlb_cma[nid];
m = cma_reserve_early(cma, huge_page_size(h));
- if (!m) {
- if (node_exact)
- return NULL;
+ if (m || node_exact)
+ return m;
- for_each_node_mask(node, hugetlb_bootmem_nodes) {
- cma = hugetlb_cma[node];
- if (!cma || node == nid)
- continue;
- m = cma_reserve_early(cma, huge_page_size(h));
- if (m)
- break;
- }
- }
-
- if (m) {
- m->flags = HUGE_BOOTMEM_CMA;
- m->cma = cma;
+ for_each_node_mask(node, hugetlb_bootmem_nodes) {
+ cma = hugetlb_cma[node];
+ if (!cma || node == nid)
+ continue;
+ m = cma_reserve_early(cma, huge_page_size(h));
+ if (m)
+ return m;
}
- return m;
+ return NULL;
}
static int __init cmdline_parse_hugetlb_cma(char *p)
diff --git a/mm/internal.h b/mm/internal.h
index 6b9802460a7c..8497673d0ac3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,13 +24,11 @@
struct folio_batch;
struct hstate;
-struct cma;
struct huge_bootmem_page {
struct list_head list;
struct hstate *hstate;
unsigned long flags;
- struct cma *cma;
};
/*
--
2.54.0
^ permalink raw reply related
* [PATCH v3 19/19] mm/mm_init: Fold __init_page_from_nid() into __init_deferred_page()
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
__init_page_from_nid() no longer has external users and is only used
locally in mm/mm_init.c under CONFIG_DEFERRED_STRUCT_PAGE_INIT.
Fold it into its sole caller __init_deferred_page() and remove the
separate helper declaration.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
v2->v3:
- fold __init_page_from_nid() into __init_deferred_page() since it
only has a single caller (suggested by Mike Rapoport)
---
mm/internal.h | 1 -
mm/mm_init.c | 44 ++++++++++++++++++--------------------------
2 files changed, 18 insertions(+), 27 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 8497673d0ac3..b33fc87e4555 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1760,7 +1760,6 @@ static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte
void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long zone, int nid);
-void __meminit __init_page_from_nid(unsigned long pfn, int nid);
/* shrinker related functions */
unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 41b83dd18c01..f1bbf3b9a321 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -688,31 +688,6 @@ static __meminit void pageblock_migratetype_init_range(unsigned long pfn,
}
#endif
-/*
- * Initialize a reserved page unconditionally, finding its zone first.
- */
-void __meminit __init_page_from_nid(unsigned long pfn, int nid)
-{
- pg_data_t *pgdat;
- int zid;
-
- pgdat = NODE_DATA(nid);
-
- for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- struct zone *zone = &pgdat->node_zones[zid];
-
- if (zone_spans_pfn(zone, pfn))
- break;
- }
- __init_single_page(pfn_to_page(pfn), pfn, zid, nid);
-
- if (pageblock_aligned(pfn)) {
- enum migratetype mt =
- kho_scratch_migratetype(pfn, MIGRATE_MOVABLE);
- init_pageblock_migratetype(pfn_to_page(pfn), mt, false);
- }
-}
-
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
{
@@ -771,10 +746,27 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
static void __meminit __init_deferred_page(unsigned long pfn, int nid)
{
+ pg_data_t *pgdat;
+ int zid;
+
if (early_page_initialised(pfn, nid))
return;
- __init_page_from_nid(pfn, nid);
+ pgdat = NODE_DATA(nid);
+
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ struct zone *zone = &pgdat->node_zones[zid];
+
+ if (zone_spans_pfn(zone, pfn))
+ break;
+ }
+ __init_single_page(pfn_to_page(pfn), pfn, zid, nid);
+
+ if (pageblock_aligned(pfn)) {
+ enum migratetype mt =
+ kho_scratch_migratetype(pfn, MIGRATE_MOVABLE);
+ init_pageblock_migratetype(pfn_to_page(pfn), mt, false);
+ }
}
#else
static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
--
2.54.0
^ permalink raw reply related
* Re: [PATCH v3 00/19] mm: Refactor bootmem gigantic hugepage allocation
From: Oscar Salvador (SUSE) @ 2026-06-02 10:34 UTC (permalink / raw)
To: Muchun Song
Cc: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Muchun Song, Mike Rapoport,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
On Tue, Jun 02, 2026 at 06:10:20PM +0800, Muchun Song wrote:
> This series is split out from the earlier larger series "mm: Generalize
> HVO for HugeTLB and device DAX" [1]. It collects the first 19 patches of
> that series as a standalone set of fixes and preparatory cleanups around
> bootmem HugeTLB handling, sparse initialization ordering, and related
> vmemmap setup.
Thanks Munchun, this split out really helps easing the review.
I think not so many patches from this series scaped review, but I shall
get back ot it later this week.
--
Oscar Salvador
SUSE Labs
^ permalink raw reply
* Re: [PATCH 01/11] params: bound array element output to the caller's page buffer
From: Andy Shevchenko @ 2026-06-02 11:26 UTC (permalink / raw)
To: Kees Cook
Cc: Luis Chamberlain, Pengpeng Hou, stable, Petr Pavlu,
Richard Weinberger, Anton Ivanov, Johannes Berg,
Rafael J. Wysocki, Len Brown, Corey Minyard, Gabriel Somlo,
Michael S. Tsirkin, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, David Airlie, Simona Vetter, Bart Van Assche,
Jason Gunthorpe, Leon Romanovsky, Laurent Pinchart, Hans de Goede,
Mauro Carvalho Chehab, Bjorn Helgaas, Hannes Reinecke,
James E.J. Bottomley, Martin K. Petersen, Daniel Lezcano,
Zhang Rui, Lukasz Luba, Greg Kroah-Hartman, Jiri Slaby,
Alan Stern, Jason Wang, Xuan Zhuo, Eugenio Pérez,
Jason Baron, Jim Cromie, Tiwei Bie, Benjamin Berg,
Ilpo Järvinen, David E. Box, Maciej W. Rozycki,
Srinivas Pandruvada, Peter Zijlstra, Heiko Carstens,
Vasily Gorbik, Sean Christopherson, Paolo Bonzini,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Vinod Koul, Frank Li, Daniel Gomez, Sami Tolvanen,
Aaron Tomlin, Alexander Potapenko, Marco Elver, Dmitry Vyukov,
Andrew Morton, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Georgia Garcia, kvm, dmaengine, linux-modules,
kasan-dev, linux-mm, apparmor, linux-security-module, linux-um,
linux-acpi, openipmi-developer, qemu-devel, intel-gfx, dri-devel,
linux-rdma, linux-media, linux-pci, linux-scsi, linux-pm,
linuxppc-dev, linux-serial, linux-usb, usb-storage,
virtualization, linux-kernel, linux-arch, netdev, linux-fsdevel,
linux-hardening
In-Reply-To: <20260521133326.2465264-1-kees@kernel.org>
On Thu, May 21, 2026 at 06:33:14AM -0700, Kees Cook wrote:
>
> param_array_get() appends each element's string representation into the
> shared sysfs page buffer by passing buffer + off to the element getter.
>
> That works for getters that only write a small bounded string, but
> param_get_charp() and similar helpers format against PAGE_SIZE from the
> pointer they receive. Once off is non-zero, an element getter can
> therefore write past the end of the original sysfs page buffer.
>
> Collect each element into a temporary PAGE_SIZE buffer first and then
> copy only the remaining space into the caller's page buffer.
...
> + elem_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
get_free_page() (or how it is called)?
> + if (!elem_buf)
> + return -ENOMEM;
> +
> for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
> - /* Replace \n with comma */
> - if (i)
> - buffer[off - 1] = ',';
> p.arg = arr->elem + arr->elemsize * i;
> check_kparam_locked(p.mod);
> - ret = arr->ops->get(buffer + off, &p);
> + ret = arr->ops->get(elem_buf, &p);
> if (ret < 0)
> - return ret;
> + goto out;
> + ret = min(ret, (int)(PAGE_SIZE - 1 - off));
It's usually discouraged to use castings in min/max/clamp. Can we make ret long
or do something different here?
> + if (!ret)
> + break;
> + /* Replace the previous element's trailing newline with a comma. */
> + if (i)
> + buffer[off - 1] = ',';
Can't we do this after with help of strreplace()?
> + memcpy(buffer + off, elem_buf, ret);
> off += ret;
> + if (off == PAGE_SIZE - 1)
> + break;
> }
> buffer[off] = '\0';
> - return off;
> + ret = off;
> +out:
> + kfree(elem_buf);
> + return ret;
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* [PATCH v2 2/2] perf script powerpc: Fix a typo in the name of H_DISABLE_AND_GET
From: Gautam Menghani @ 2026-06-02 11:56 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
jolsa, irogers, adrian.hunter, james.clark, atrajeev
Cc: Gautam Menghani, linux-perf-users, linux-kernel, linuxppc-dev
In-Reply-To: <20260602115644.8133-1-Gautam.Menghani@ibm.com>
From: Gautam Menghani <gautam@linux.ibm.com>
The name of the hcall for opcode 432 is "H_DISABLE_AND_GET". This typo
was fixed in the main file [1] in the commit 0f10228c6ff6 ("KVM: PPC: Fix
typo on H_DISABLE_AND_GET hcall").
[1]: arch/powerpc/include/asm/hvcall.h
Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
v2:
1. New patch. Separated out from the original patch
tools/perf/scripts/python/powerpc-hcalls.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py
index f897d6a5e051..4c14bd05da46 100644
--- a/tools/perf/scripts/python/powerpc-hcalls.py
+++ b/tools/perf/scripts/python/powerpc-hcalls.py
@@ -112,7 +112,7 @@ hcall_table = {
420: 'H_CREATE_RPT',
424: 'H_REMOVE_RPT',
428: 'H_REGISTER_RPAGES',
- 432: 'H_DISABLE_AND_GETC',
+ 432: 'H_DISABLE_AND_GET',
436: 'H_ERROR_DATA',
440: 'H_GET_HCA_INFO',
444: 'H_GET_PERF_COUNT',
--
2.53.0
^ permalink raw reply related
* [PATCH v2 1/2] perf script powerpc: Update the hcall list with new hcalls
From: Gautam Menghani @ 2026-06-02 11:56 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
jolsa, irogers, adrian.hunter, james.clark, atrajeev
Cc: Gautam Menghani, linux-perf-users, linux-kernel, linuxppc-dev
From: Gautam Menghani <gautam@linux.ibm.com>
Update the hcall list with the newer hcalls in PPC.
[1]: github.com/torvalds/linux/blob/master/arch/powerpc/include/asm/hvcall.h
Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
v1 -> v2:
1. Add some of the missing hcalls. (Tejas)
2. Separate out the typo fix for H_DISABLE_AND_GET in another patch.
tools/perf/scripts/python/powerpc-hcalls.py | 89 +++++++++++++++++++++
1 file changed, 89 insertions(+)
diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py
index 8b78dc790adb..f897d6a5e051 100644
--- a/tools/perf/scripts/python/powerpc-hcalls.py
+++ b/tools/perf/scripts/python/powerpc-hcalls.py
@@ -117,10 +117,12 @@ hcall_table = {
440: 'H_GET_HCA_INFO',
444: 'H_GET_PERF_COUNT',
448: 'H_MANAGE_TRACE',
+ 456: 'H_GET_CPU_CHARACTERISTICS',
468: 'H_FREE_LOGICAL_LAN_BUFFER',
472: 'H_POLL_PENDING',
484: 'H_QUERY_INT_STATE',
580: 'H_ILLAN_ATTRIBUTES',
+ 584: 'H_ADD_LOGICAL_LAN_BUFFERS',
592: 'H_MODIFY_HEA_QP',
596: 'H_QUERY_HEA_QP',
600: 'H_QUERY_HEA',
@@ -135,11 +137,17 @@ hcall_table = {
644: 'H_ADD_CONN',
648: 'H_DEL_CONN',
664: 'H_JOIN',
+ 672: 'H_VASI_SIGNAL',
676: 'H_VASI_STATE',
+ 680: 'H_VIOCTL',
688: 'H_ENABLE_CRQ',
696: 'H_GET_EM_PARMS',
720: 'H_SET_MPP',
724: 'H_GET_MPP',
+ 732: 'H_REG_SUB_CRQ',
+ 736: 'H_FREE_SUB_CRQ',
+ 740: 'H_SEND_SUB_CRQ',
+ 744: 'H_SEND_SUB_CRQ_INDIRECT',
748: 'H_HOME_NODE_ASSOCIATIVITY',
756: 'H_BEST_ENERGY',
764: 'H_XIRR_X',
@@ -147,7 +155,88 @@ hcall_table = {
772: 'H_COP',
788: 'H_GET_MPP_X',
796: 'H_SET_MODE',
+ 808: 'H_BLOCK_REMOVE',
+ 856: 'H_CLEAR_HPT',
+ 864: 'H_REQUEST_VMC',
+ 876: 'H_RESIZE_HPT_PREPARE',
+ 880: 'H_RESIZE_HPT_COMMIT',
+ 892: 'H_REGISTER_PROC_TBL',
+ 896: 'H_SIGNAL_SYS_RESET',
+ 904: 'H_ALLOCATE_VAS_WINDOW',
+ 908: 'H_MODIFY_VAS_WINDOW',
+ 912: 'H_DEALLOCATE_VAS_WINDOW',
+ 916: 'H_QUERY_VAS_WINDOW',
+ 920: 'H_QUERY_VAS_CAPABILITIES',
+ 924: 'H_QUERY_NX_CAPABILITIES',
+ 928: 'H_GET_NX_FAULT',
+ 936: 'H_INT_GET_SOURCE_INFO',
+ 940: 'H_INT_SET_SOURCE_CONFIG',
+ 944: 'H_INT_GET_SOURCE_CONFIG',
+ 948: 'H_INT_GET_QUEUE_INFO',
+ 952: 'H_INT_SET_QUEUE_CONFIG',
+ 956: 'H_INT_GET_QUEUE_CONFIG',
+ 960: 'H_INT_SET_OS_REPORTING_LINE',
+ 964: 'H_INT_GET_OS_REPORTING_LINE',
+ 968: 'H_INT_ESB',
+ 972: 'H_INT_SYNC',
+ 976: 'H_INT_RESET',
+ 996: 'H_SCM_READ_METADATA',
+ 1000: 'H_SCM_WRITE_METADATA',
+ 1004: 'H_SCM_BIND_MEM',
+ 1008: 'H_SCM_UNBIND_MEM',
+ 1012: 'H_SCM_QUERY_BLOCK_MEM_BINDING',
+ 1016: 'H_SCM_QUERY_LOGICAL_MEM_BINDING',
+ 1020: 'H_SCM_UNBIND_ALL',
+ 1024: 'H_SCM_HEALTH',
+ 1048: 'H_SCM_PERFORMANCE_STATS',
+ 1052: 'H_PKS_GET_CONFIG',
+ 1056: 'H_PKS_SET_PASSWORD',
+ 1060: 'H_PKS_GEN_PASSWORD',
+ 1068: 'H_PKS_WRITE_OBJECT',
+ 1072: 'H_PKS_GEN_KEY',
+ 1076: 'H_PKS_READ_OBJECT',
+ 1080: 'H_PKS_REMOVE_OBJECT',
+ 1084: 'H_PKS_CONFIRM_OBJECT_FLUSHED',
+ 1096: 'H_RPT_INVALIDATE',
+ 1100: 'H_SCM_FLUSH',
+ 1104: 'H_GET_ENERGY_SCALE_INFO',
+ 1108: 'H_PKS_SIGNED_UPDATE',
+ 1112: 'H_HTM',
+ 1116: 'H_WATCHDOG',
+ # Platform specific hcalls used by KVM on PowerVM
+ 1120: 'H_GUEST_GET_CAPABILITIES',
+ 1124: 'H_GUEST_SET_CAPABILITIES',
+ 1136: 'H_GUEST_CREATE',
+ 1140: 'H_GUEST_CREATE_VCPU',
+ 1144: 'H_GUEST_GET_STATE',
+ 1148: 'H_GUEST_SET_STATE',
+ 1152: 'H_GUEST_RUN_VCPU',
+ 1156: 'H_GUEST_COPY_MEMORY',
+ 1160: 'H_GUEST_DELETE',
+ # Key wrapping hcalls
+ 1168: 'H_PKS_WRAP_OBJECT',
+ 1172: 'H_PKS_UNWRAP_OBJECT',
+ # Platform-specific hcalls used by the Ultravisor
+ 61184: 'H_SVM_PAGE_IN',
+ 61188: 'H_SVM_PAGE_OUT',
+ 61192: 'H_SVM_INIT_START',
+ 61196: 'H_SVM_INIT_DONE',
+ 61204: 'H_SVM_INIT_ABORT',
+ # Platform specific hcalls used by KVM
61440: 'H_RTAS',
+ # Platform specific hcalls used by QEMU/SLOF
+ 61441: 'H_LOGICAL_MEMOP',
+ 61442: 'H_CAS',
+ 61443: 'H_UPDATE_DT',
+ # Platform specific hcalls provided by PHYP
+ 61560: 'H_GET_24X7_CATALOG_PAGE',
+ 61564: 'H_GET_24X7_DATA',
+ 61568: 'H_GET_PERF_COUNTER_INFO',
+ # Platform-specific hcalls used for nested HV KVM
+ 63488: 'H_SET_PARTITION_TABLE',
+ 63492: 'H_ENTER_NESTED',
+ 63496: 'H_TLB_INVALIDATE',
+ 63500: 'H_COPY_TOFROM_GUEST',
}
def hcall_table_lookup(opcode):
--
2.53.0
^ permalink raw reply related
* Re: [PATCH v3 00/19] mm: Refactor bootmem gigantic hugepage allocation
From: Muchun Song @ 2026-06-02 12:01 UTC (permalink / raw)
To: Oscar Salvador (SUSE)
Cc: Muchun Song, Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Mike Rapoport,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <ah6xzZBh8IysNXpI@localhost.localdomain>
> On Jun 2, 2026, at 18:34, Oscar Salvador (SUSE) <osalvador@kernel.org> wrote:
>
> On Tue, Jun 02, 2026 at 06:10:20PM +0800, Muchun Song wrote:
>> This series is split out from the earlier larger series "mm: Generalize
>> HVO for HugeTLB and device DAX" [1]. It collects the first 19 patches of
>> that series as a standalone set of fixes and preparatory cleanups around
>> bootmem HugeTLB handling, sparse initialization ordering, and related
>> vmemmap setup.
Hi Oscar,
>
> Thanks Munchun, this split out really helps easing the review.
> I think not so many patches from this series scaped review, but I shall
> get back ot it later this week.
Sounds good! Thanks for taking the time to review. Looking forward to your
feedback later this week.
Best,
Muchun
>
>
>
> --
> Oscar Salvador
> SUSE Labs
^ permalink raw reply
* Re: [PATCH] perf script powerpc: Update the hcall list with new hcalls
From: Gautam Menghani @ 2026-06-02 12:06 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Tejas Manhas, Gautam Menghani, peterz, mingo, namhyung,
mark.rutland, alexander.shishkin, jolsa, irogers, adrian.hunter,
james.clark, atrajeev, linux-perf-users, linuxppc-dev,
linux-kernel
In-Reply-To: <ahoo5dI-6fVlFvvR@x1>
On Fri, May 29, 2026 at 09:01:41PM -0300, Arnaldo Carvalho de Melo wrote:
> On Wed, May 20, 2026 at 09:25:08AM +0530, Tejas Manhas wrote:
> > From: Gautam Menghani [1]<gautam@linux.ibm.com>
> >
> > Update the hcall list with the hcalls in bcc's ppchcalls script [1].
> >
> > [1] : [2]https://github.com/iovisor/bcc/blob/master/tools/ppchcalls.py
> >
> >
> > Hi Gautam,
> >
> > This patch doesn't seem to resolve the issue where numbers are seen instead of h
> > calls.
> > I have applied to the latest upstream but the numbers are still visible in perf
> > script.
> > Also the command doesn't recognise python-audit already present in the system.
> >
> > Please find logs below for better understanding.
>
> Any progress on this?
Yeah, sent a v2 - https://lore.kernel.org/all/20260602115644.8133-1-Gautam.Menghani@ibm.com/
Thanks,
Gautam
^ permalink raw reply
* [PATCH v4] powerpc/pseries/Kconfig: Enable CONFIG_VPA_PMU to be used with KVM
From: Gautam Menghani @ 2026-06-02 12:17 UTC (permalink / raw)
To: maddy, mpe, npiggin, chleroy
Cc: Gautam Menghani, linuxppc-dev, linux-kernel, harshpb, stable,
Sean Christopherson
Currently, CONFIG_VPA_PMU is not enabled by default, and consequently
cannot be used for KVM guests at all, unless explicitly enabled on
host kernel.
Mark CONFIG_VPA_PMU as "default m" to ensure it is available when KVM is
being used.
Fixes: 176cda0619b6c ("powerpc/perf: Add perf interface to expose vpa counters")
Cc: stable@vger.kernel.org # v6.13+
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
v3 -> v4:
1. Reword the patch description (Harsh)
v2 -> v3:
1. Make CONFIG_VPA_PMU as default m so that it can separately disabled
(Sean)
v1 -> v2:
1. Rebased on latest master
arch/powerpc/platforms/pseries/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index f7052b131a4c..74910ce3a541 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -154,6 +154,7 @@ config HV_PERF_CTRS
config VPA_PMU
tristate "VPA PMU events"
depends on KVM_BOOK3S_64_HV && HV_PERF_CTRS
+ default m
help
Enable access to the VPA PMU counters via perf. This enables
code that support measurement for KVM on PowerVM(KoP) feature.
--
2.53.0
^ permalink raw reply related
* Re: [PATCH v2] perf kvm stat: Add missing mappings for PPC kvm exit reasons
From: Gautam Menghani @ 2026-06-02 12:21 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
jolsa, irogers, adrian.hunter, james.clark, atrajeev
Cc: linuxppc-dev, linux-perf-users, linux-kernel
In-Reply-To: <20260518125041.39286-1-gautam@linux.ibm.com>
On Mon, May 18, 2026 at 06:20:38PM +0530, Gautam Menghani wrote:
> The macro kvm_trace_symbol_exit is used for providing the mappings
> for the exit trap vectors and their names. Add mappings for H_FAC_UNAVAIL
> and H_VIRT so that exit reasons are displayed as string instead of
> vector numbers when using perf kvm stat.
>
> Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> ---
> v1 -> v2:
> 1. Update the patch title and description to remove dependency on
> another file trace_book3s.h
>
> tools/perf/util/kvm-stat-arch/book3s_hv_exits.h | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/kvm-stat-arch/book3s_hv_exits.h b/tools/perf/util/kvm-stat-arch/book3s_hv_exits.h
> index 2011376c7ab5..2688ca7d0399 100644
> --- a/tools/perf/util/kvm-stat-arch/book3s_hv_exits.h
> +++ b/tools/perf/util/kvm-stat-arch/book3s_hv_exits.h
> @@ -26,8 +26,10 @@
> {0xe00, "H_DATA_STORAGE"}, \
> {0xe20, "H_INST_STORAGE"}, \
> {0xe40, "H_EMUL_ASSIST"}, \
> + {0xea0, "H_VIRT"}, \
> {0xf00, "PERFMON"}, \
> {0xf20, "ALTIVEC"}, \
> - {0xf40, "VSX"}
> + {0xf40, "VSX"}, \
> + {0xf80, "H_FAC_UNAVAIL"}
>
> #endif
> --
Hi,
Please review this and let me known if any changes are needed.
Thanks
^ permalink raw reply
* Re: [PATCH 01/11] params: bound array element output to the caller's page buffer
From: Jason Gunthorpe @ 2026-06-02 12:33 UTC (permalink / raw)
To: Andy Shevchenko
Cc: Kees Cook, Luis Chamberlain, Pengpeng Hou, stable, Petr Pavlu,
Richard Weinberger, Anton Ivanov, Johannes Berg,
Rafael J. Wysocki, Len Brown, Corey Minyard, Gabriel Somlo,
Michael S. Tsirkin, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, David Airlie, Simona Vetter, Bart Van Assche,
Leon Romanovsky, Laurent Pinchart, Hans de Goede,
Mauro Carvalho Chehab, Bjorn Helgaas, Hannes Reinecke,
James E.J. Bottomley, Martin K. Petersen, Daniel Lezcano,
Zhang Rui, Lukasz Luba, Greg Kroah-Hartman, Jiri Slaby,
Alan Stern, Jason Wang, Xuan Zhuo, Eugenio Pérez,
Jason Baron, Jim Cromie, Tiwei Bie, Benjamin Berg,
Ilpo Järvinen, David E. Box, Maciej W. Rozycki,
Srinivas Pandruvada, Peter Zijlstra, Heiko Carstens,
Vasily Gorbik, Sean Christopherson, Paolo Bonzini,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Vinod Koul, Frank Li, Daniel Gomez, Sami Tolvanen,
Aaron Tomlin, Alexander Potapenko, Marco Elver, Dmitry Vyukov,
Andrew Morton, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Georgia Garcia, kvm, dmaengine, linux-modules,
kasan-dev, linux-mm, apparmor, linux-security-module, linux-um,
linux-acpi, openipmi-developer, qemu-devel, intel-gfx, dri-devel,
linux-rdma, linux-media, linux-pci, linux-scsi, linux-pm,
linuxppc-dev, linux-serial, linux-usb, usb-storage,
virtualization, linux-kernel, linux-arch, netdev, linux-fsdevel,
linux-hardening
In-Reply-To: <ah699hwLxIIOZ0-7@ashevche-desk.local>
On Tue, Jun 02, 2026 at 02:26:46PM +0300, Andy Shevchenko wrote:
> On Thu, May 21, 2026 at 06:33:14AM -0700, Kees Cook wrote:
> >
> > param_array_get() appends each element's string representation into the
> > shared sysfs page buffer by passing buffer + off to the element getter.
> >
> > That works for getters that only write a small bounded string, but
> > param_get_charp() and similar helpers format against PAGE_SIZE from the
> > pointer they receive. Once off is non-zero, an element getter can
> > therefore write past the end of the original sysfs page buffer.
> >
> > Collect each element into a temporary PAGE_SIZE buffer first and then
> > copy only the remaining space into the caller's page buffer.
>
> ...
>
> > + elem_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
>
> get_free_page() (or how it is called)?
I thought modern mm guidance was to use kmalloc whenever possible and
not use get_free_page() unless you intend to use the struct page bits?
Jason
^ permalink raw reply
* Re: [PATCH 01/11] params: bound array element output to the caller's page buffer
From: David Laight @ 2026-06-02 13:04 UTC (permalink / raw)
To: Andy Shevchenko
Cc: Kees Cook, Luis Chamberlain, Pengpeng Hou, stable, Petr Pavlu,
Richard Weinberger, Anton Ivanov, Johannes Berg,
Rafael J. Wysocki, Len Brown, Corey Minyard, Gabriel Somlo,
Michael S. Tsirkin, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, David Airlie, Simona Vetter, Bart Van Assche,
Jason Gunthorpe, Leon Romanovsky, Laurent Pinchart, Hans de Goede,
Mauro Carvalho Chehab, Bjorn Helgaas, Hannes Reinecke,
James E.J. Bottomley, Martin K. Petersen, Daniel Lezcano,
Zhang Rui, Lukasz Luba, Greg Kroah-Hartman, Jiri Slaby,
Alan Stern, Jason Wang, Xuan Zhuo, Eugenio Pérez,
Jason Baron, Jim Cromie, Tiwei Bie, Benjamin Berg,
Ilpo Järvinen, David E. Box, Maciej W. Rozycki,
Srinivas Pandruvada, Peter Zijlstra, Heiko Carstens,
Vasily Gorbik, Sean Christopherson, Paolo Bonzini,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Vinod Koul, Frank Li, Daniel Gomez, Sami Tolvanen,
Aaron Tomlin, Alexander Potapenko, Marco Elver, Dmitry Vyukov,
Andrew Morton, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Georgia Garcia, kvm, dmaengine, linux-modules,
kasan-dev, linux-mm, apparmor, linux-security-module, linux-um,
linux-acpi, openipmi-developer, qemu-devel, intel-gfx, dri-devel,
linux-rdma, linux-media, linux-pci, linux-scsi, linux-pm,
linuxppc-dev, linux-serial, linux-usb, usb-storage,
virtualization, linux-kernel, linux-arch, netdev, linux-fsdevel,
linux-hardening
In-Reply-To: <ah699hwLxIIOZ0-7@ashevche-desk.local>
On Tue, 2 Jun 2026 14:26:46 +0300
Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote:
> On Thu, May 21, 2026 at 06:33:14AM -0700, Kees Cook wrote:
> >
> > param_array_get() appends each element's string representation into the
> > shared sysfs page buffer by passing buffer + off to the element getter.
> >
> > That works for getters that only write a small bounded string, but
> > param_get_charp() and similar helpers format against PAGE_SIZE from the
> > pointer they receive. Once off is non-zero, an element getter can
> > therefore write past the end of the original sysfs page buffer.
> >
> > Collect each element into a temporary PAGE_SIZE buffer first and then
> > copy only the remaining space into the caller's page buffer.
>
> ...
>
> > + elem_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
>
> get_free_page() (or how it is called)?
The kmalloc() should be faster and I think has to be aligned.
There is another patch set to replace get_free_pages() with kmalloc().
Although all these 'show' functions should really head to using a safer
interface.
Although, at the moment, it is really difficult to find the ones that
are guaranteed to be passed a page aligned buffer.
-- David
>
> > + if (!elem_buf)
> > + return -ENOMEM;
> > +
> > for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
> > - /* Replace \n with comma */
> > - if (i)
> > - buffer[off - 1] = ',';
> > p.arg = arr->elem + arr->elemsize * i;
> > check_kparam_locked(p.mod);
> > - ret = arr->ops->get(buffer + off, &p);
> > + ret = arr->ops->get(elem_buf, &p);
> > if (ret < 0)
> > - return ret;
> > + goto out;
> > + ret = min(ret, (int)(PAGE_SIZE - 1 - off));
>
> It's usually discouraged to use castings in min/max/clamp. Can we make ret long
> or do something different here?
>
> > + if (!ret)
> > + break;
>
> > + /* Replace the previous element's trailing newline with a comma. */
> > + if (i)
> > + buffer[off - 1] = ',';
>
> Can't we do this after with help of strreplace()?
>
> > + memcpy(buffer + off, elem_buf, ret);
> > off += ret;
> > + if (off == PAGE_SIZE - 1)
> > + break;
> > }
> > buffer[off] = '\0';
> > - return off;
> > + ret = off;
> > +out:
> > + kfree(elem_buf);
> > + return ret;
>
^ permalink raw reply
* RE: [PATCH v5 05/20] dma-pool: track decrypted atomic pools and select them via attrs
From: Michael Kelley @ 2026-06-02 14:24 UTC (permalink / raw)
To: Aneesh Kumar K.V, iommu@lists.linux.dev,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev
Cc: Robin Murphy, Marek Szyprowski, Will Deacon, Marc Zyngier,
Steven Price, Suzuki K Poulose, Catalin Marinas, Jiri Pirko,
Jason Gunthorpe, Mostafa Saleh, Petr Tesarik,
Alexey Kardashevskiy, Dan Williams, Xu Yilun,
linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org,
Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
Christophe Leroy (CS GROUP), Alexander Gordeev, Gerald Schaefer,
Heiko Carstens, Vasily Gorbik, Christian Borntraeger,
Sven Schnelle, x86@kernel.org, Jiri Pirko
In-Reply-To: <yq5afr35sciu.fsf@kernel.org>
From: Aneesh Kumar K.V <aneesh.kumar@kernel.org> Sent: Monday, June 1, 2026 11:05 PM
>
> Michael Kelley <mhklinux@outlook.com> writes:
>
> > From: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>Sent: Thursday, May 21, 2026 9:28 PM
> >>
> >> Teach the atomic DMA pool code to distinguish between encrypted and
> >> unencrypted pools, and make pool allocation select the matching pool based
> >> on DMA attributes.
> >>
> >> Introduce a dma_gen_pool wrapper that records whether a pool is
> >> unencrypted, initialize that state when the atomic pools are created, and
> >> use it when expanding and resizing the pools. Update dma_alloc_from_pool()
> >> to take attrs and skip pools whose encrypted state does not match
> >> DMA_ATTR_CC_SHARED. Update dma_free_from_pool() accordingly.
> >>
> >> Also pass DMA_ATTR_CC_SHARED from the swiotlb atomic allocation path so
> >> decrypted swiotlb allocations are taken from the correct atomic pool.
> >>
> >> Tested-by: Jiri Pirko <jiri@nvidia.com>
> >> Reviewed-by: Mostafa Saleh <smostafa@google.com>
> >> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
> >> ---
> >> drivers/iommu/dma-iommu.c | 2 +-
> >> include/linux/dma-map-ops.h | 2 +-
> >> kernel/dma/direct.c | 11 ++-
> >> kernel/dma/pool.c | 167 +++++++++++++++++++++++-------------
> >> kernel/dma/swiotlb.c | 7 +-
> >> 5 files changed, 123 insertions(+), 66 deletions(-)
> >>
> >
> > [snip]
> >
> >> +static __init struct dma_gen_pool *__dma_atomic_pool_init(struct dma_gen_pool *dma_pool,
> >> + size_t pool_size, gfp_t gfp)
> >> {
> >> - struct gen_pool *pool;
> >> int ret;
> >>
> >> - pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
> >> - if (!pool)
> >> + dma_pool->pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
> >> + if (!dma_pool->pool)
> >> return NULL;
> >>
> >> - gen_pool_set_algo(pool, gen_pool_first_fit_order_align, NULL);
> >> + gen_pool_set_algo(dma_pool->pool, gen_pool_first_fit_order_align, NULL);
> >> +
> >> + /* if platform is using memory encryption atomic pools are by default decrypted. */
> >> + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
> >> + dma_pool->unencrypted = true;
> >> + else
> >> + dma_pool->unencrypted = false;
> >
> > I'm curious about the name of the "unencrypted" field in struct dma_gen_pool,
> > and similarly in Patch 7 of the series for the swiotlb struct io_tlb_pool and
> > struct io_tlb_mem. Up through v3 of this series, you used "decrypted", but
> > starting in v4 switched to "unencrypted".
> >
> > To me, the above "if" statement has some cognitive dissonance in that if
> > CC_ATTR_MEM_ENCRYPT is false (i.e., a normal VM), "unencrypted" is set
> > to false. But I think of memory in a normal VM as "unencrypted" since it
> > was never encrypted. A similar "if" statement occurs in your swiotlb changes.
> >
> > Two related concepts are captured by the field:
> > 1) Is some action needed to put the memory into the unencrypted state,
> > and to remove it from that state? This applies when assigning memory to the
> > pool, or freeing the memory in the pool.
> > 2) Is the memory currently in the unencrypted state? This applies when
> > allocating memory from the pool to a caller.
> >
> > It's hard to capture all that in a short field name. But I think I prefer "decrypted"
> > over "unencrypted". The former implies that some action was taken. It's a
> > little easier to think of a normal VM as *not* having decrypted memory. The
> > memory was never encrypted in the first place, so no decryption action was taken.
> >
> > Throughout the kernel, "decrypted" occurs much more frequently than
> > "unencrypted". We have set_memory_encrypted() and set_memory_decrypted()
> > that are "take action" names. But we also have force_dma_unencrypted(),
> > phys_to_dma_unencrypted(), and dma_addr_unencrypted(). So it's a bit
> > of a mess.
> >
> >
> > But maybe there's more background here that led to the change
> > between your v3 and v4.
> >
> > Michael
>
> The current APIs, phys_to_dma_unencrypted() and dma_addr_unencrypted(),
> are the reason I changed the pool attribute name from decrypted to
> unencrypted. The rationale was that nobody actually decrypted the
> memory; the memory was already in an unencrypted state.
>
> In other words, the DMA pool did not contain encrypted content that was
> later decrypted. Rather, the DMA pool itself was in an unencrypted
> state.
>
> IMHO, set_memory_decrypted()/set_memory_encrypted() is the right naming
> because those APIs describe an operation that transitions memory between
> states. In contrast, the pool attribute describes the state of the
> memory itself, which is why I used unencrypted rather than decrypted.
>
Except that in a normal VM, the "unencrypted" pool attribute does *not*
describe the state of the memory itself. In a normal VM, the memory is
unencrypted, but the "unencrypted" pool attribute is false. That
contradiction is the essence of my concern.
Michael
^ permalink raw reply
* Re: [PATCH v3 19/19] mm/mm_init: Fold __init_page_from_nid() into __init_deferred_page()
From: Mike Rapoport @ 2026-06-02 14:46 UTC (permalink / raw)
To: Muchun Song
Cc: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Muchun Song,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <20260602101039.1867613-20-songmuchun@bytedance.com>
On Tue, Jun 02, 2026 at 06:10:39PM +0800, Muchun Song wrote:
> __init_page_from_nid() no longer has external users and is only used
> locally in mm/mm_init.c under CONFIG_DEFERRED_STRUCT_PAGE_INIT.
>
> Fold it into its sole caller __init_deferred_page() and remove the
> separate helper declaration.
>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
> v2->v3:
> - fold __init_page_from_nid() into __init_deferred_page() since it
> only has a single caller (suggested by Mike Rapoport)
> ---
> mm/internal.h | 1 -
> mm/mm_init.c | 44 ++++++++++++++++++--------------------------
> 2 files changed, 18 insertions(+), 27 deletions(-)
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 8497673d0ac3..b33fc87e4555 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1760,7 +1760,6 @@ static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte
>
> void __meminit __init_single_page(struct page *page, unsigned long pfn,
> unsigned long zone, int nid);
> -void __meminit __init_page_from_nid(unsigned long pfn, int nid);
>
> /* shrinker related functions */
> unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 41b83dd18c01..f1bbf3b9a321 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -688,31 +688,6 @@ static __meminit void pageblock_migratetype_init_range(unsigned long pfn,
> }
> #endif
>
> -/*
> - * Initialize a reserved page unconditionally, finding its zone first.
> - */
> -void __meminit __init_page_from_nid(unsigned long pfn, int nid)
> -{
> - pg_data_t *pgdat;
> - int zid;
> -
> - pgdat = NODE_DATA(nid);
> -
> - for (zid = 0; zid < MAX_NR_ZONES; zid++) {
> - struct zone *zone = &pgdat->node_zones[zid];
> -
> - if (zone_spans_pfn(zone, pfn))
> - break;
> - }
> - __init_single_page(pfn_to_page(pfn), pfn, zid, nid);
> -
> - if (pageblock_aligned(pfn)) {
> - enum migratetype mt =
> - kho_scratch_migratetype(pfn, MIGRATE_MOVABLE);
> - init_pageblock_migratetype(pfn_to_page(pfn), mt, false);
> - }
> -}
> -
> #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
> {
> @@ -771,10 +746,27 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
>
> static void __meminit __init_deferred_page(unsigned long pfn, int nid)
> {
> + pg_data_t *pgdat;
> + int zid;
> +
> if (early_page_initialised(pfn, nid))
> return;
>
> - __init_page_from_nid(pfn, nid);
> + pgdat = NODE_DATA(nid);
Nit: we can initialize pgdat at declaration line, other than that
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> +
> + for (zid = 0; zid < MAX_NR_ZONES; zid++) {
> + struct zone *zone = &pgdat->node_zones[zid];
> +
> + if (zone_spans_pfn(zone, pfn))
> + break;
> + }
> + __init_single_page(pfn_to_page(pfn), pfn, zid, nid);
> +
> + if (pageblock_aligned(pfn)) {
> + enum migratetype mt =
> + kho_scratch_migratetype(pfn, MIGRATE_MOVABLE);
> + init_pageblock_migratetype(pfn_to_page(pfn), mt, false);
> + }
> }
> #else
> static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
> --
> 2.54.0
>
>
--
Sincerely yours,
Mike.
^ permalink raw reply
* Re: [next20260529]powerpc/pseries: multiple WARNs: RCU not watching for tracepoint and lockdep_hardirq_context() during boot and cpuidle (Power11)
From: Shrikanth Hegde @ 2026-06-02 15:24 UTC (permalink / raw)
To: Venkat Rao Bagalkote, Mukesh Kumar Chaurasiya (IBM),
Madhavan Srinivasan, Peter Zijlstra
Cc: LKML, linuxppc-dev, Paul E. McKenney, Srikar Dronamraju,
Ingo Molnar
In-Reply-To: <b44aefc5-e066-478b-8d34-50d2d0deab6b@linux.ibm.com>
Hi Venkat, Thanks for the report.
On 6/1/26 6:57 PM, Venkat Rao Bagalkote wrote:
> Greetings!!!
>
>
> I am observing multiple reproducible WARN_ONs related to RCU and lockdep
> IRQ state tracking on a Power11 pSeries system when running on latest
> linux-next kernel.
>
>
> Environment:
> Architecture: ppc64le (Power11, pSeries LPAR)
> Kernel: 7.1.0-rc5-next-20260529
> Config: PREEMPT(lazy)
> CONFIG_LOCKDEP=y
> CONFIG_PROVE_LOCKING=y
>
>
> Warning1:
>
> [ 0.008277] ------------[ cut here ]------------
> [ 0.008285] RCU not watching for tracepoint
> [ 0.008294] WARNING: ./include/trace/events/preemptirq.h:36 at
> trace_hardirqs_off+0x16c/0x1a0, CPU#1: swapper/1/0
> [ 0.008306] Modules linked in:
> [ 0.008316] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 7.1.0-
> rc5-next-20260529 #1 PREEMPT(lazy)
> [ 0.008322] Hardware name: IBM,9080-HEX Power11 (architected)
> 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
> [ 0.008327] NIP: c0000000004bb2a8 LR: c0000000004bb2a4 CTR:
> 0000000000000000
> [ 0.008331] REGS: c0000000049cb690 TRAP: 0700 Not tainted (7.1.0-
> rc5-next-20260529)
> [ 0.008336] MSR: 8000000000021033 <SF,ME,IR,DR,RI,LE> CR: 44000208
> XER: 00000005
> [ 0.008348] CFAR: c00000000022e9d4 IRQMASK: 3
> [ 0.008348] GPR00: c0000000004bb2a4 c0000000049cb950 c000000001ccf100
> 000000000000001f
> [ 0.008348] GPR04: 3fffffffffff7fff c0000000049cb740 c0000000049cb738
> 0000000000000000
> [ 0.008348] GPR08: c0000000029d1230 0000000000000001 c0000000049e8000
> 0000000000000003
> [ 0.008348] GPR12: c000000002d514e0 c000000effffeb00 0000000000000000
> 0000000000000000
> [ 0.008348] GPR16: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.008348] GPR20: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.008348] GPR24: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.008348] GPR28: 0000000000000000 0000000000000001 c000000002414988
> c00000000005be40
> [ 0.008403] NIP [c0000000004bb2a8] trace_hardirqs_off+0x16c/0x1a0
> [ 0.008408] LR [c0000000004bb2a4] trace_hardirqs_off+0x168/0x1a0
> [ 0.008413] Call Trace:
> [ 0.008416] [c0000000049cb950] [c0000000004bb2a4]
> trace_hardirqs_off+0x168/0x1a0 (unreliable)
> [ 0.008423] [c0000000049cb9d0] [c00000000005be40]
> arch_interrupt_enter_prepare+0xa0/0x19c
> [ 0.008430] [c0000000049cba00] [c00000000005bf78]
> doorbell_exception+0x3c/0x4c4
> [ 0.008436] [c0000000049cbaa0] [c00000000000a2fc]
> doorbell_super_common_virt+0x28c/0x290
> [ 0.008443] ---- interrupt: a00 at plpar_hcall_norets_notrace+0x18/0x2c
> [ 0.008449] NIP: c0000000001b4fc8 LR: c0000000001bcea0 CTR:
> 0000000000000000
> [ 0.008453] REGS: c0000000049cbad0 TRAP: 0a00 Not tainted (7.1.0-
> rc5-next-20260529)
> [ 0.008457] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR:
> 24000008 XER: 00000000
> [ 0.008469] CFAR: 0000000000000000 IRQMASK: 0
> [ 0.008469] GPR00: 0000000000000000 c0000000049cbd90 c000000001ccf100
> 0000000000000000
> [ 0.008469] GPR04: 0000000000000000 8004000038407c10 0000000000000000
> 0000000000000003
> [ 0.008469] GPR08: 0000000000000001 0000000000000000 0000000000000090
> 0000000000000001
> [ 0.008469] GPR12: 8004000038407c00 c000000effffeb00 0000000000000000
> 000000002ef01820
> [ 0.008469] GPR16: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.008469] GPR20: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000001
> [ 0.008469] GPR24: 0000000000000001 000000000000dedc c000000003086150
> 0000000000000001
> [ 0.008469] GPR28: c0000000049e8000 c000000002241548 c000000002241550
> c000000002241548
> [ 0.008523] NIP [c0000000001b4fc8] plpar_hcall_norets_notrace+0x18/0x2c
> [ 0.008528] LR [c0000000001bcea0] pseries_lpar_idle.part.0+0x74/0x160
> [ 0.008533] ---- interrupt: a00
> [ 0.008536] [c0000000049cbd90] [c0000000049cbe30] 0xc0000000049cbe30
> (unreliable)
> [ 0.008544] [c0000000049cbe10] [c000000000022c5c]
> arch_cpu_idle+0x4c/0x120
> [ 0.008551] [c0000000049cbe30] [c0000000015afe70]
> default_idle_call+0x154/0x454
> [ 0.008558] [c0000000049cbec0] [c0000000002d3dfc]
> cpuidle_idle_call+0x2dc/0x2e0
> [ 0.008565] [c0000000049cbf10] [c0000000002d3f48] do_idle+0x148/0x1f0
> [ 0.008571] [c0000000049cbf60] [c0000000002d43c8]
> cpu_startup_entry+0x4c/0x50
> [ 0.008578] [c0000000049cbf90] [c00000000006371c]
> start_secondary+0x27c/0x28c
> [ 0.008585] [c0000000049cbfe0] [c00000000000e258]
> start_secondary_prolog+0x10/0x14
> [ 0.008590] Code: 4bfffcc4 60000000 3d220132 8929db46 2c090000
> 4082ff94 3c62ffd6 3d220132 3863d398 9ba9db46 4bd73655 60000000
> <0fe00000> 60000000 4bffff74 60000000
> [ 0.008611] irq event stamp: 20
> [ 0.008614] hardirqs last enabled at (19): [<c0000000002d3dfc>]
> cpuidle_idle_call+0x2dc/0x2e0
> [ 0.008620] hardirqs last disabled at (20): [<c00000000005be40>]
> arch_interrupt_enter_prepare+0xa0/0x19c
> [ 0.008625] softirqs last enabled at (0): [<c00000000022b6ac>]
> copy_process+0xb24/0x1dec
> [ 0.008632] softirqs last disabled at (0): [<0000000000000000>] 0x0
> [ 0.008637] ---[ end trace 0000000000000000 ]---
>
Equivalent to trace_hardirqs_off is done by irqentry_enter.
So it is not essential in arch_interrupt_enter_prepare.
irqentry_enter
lockdep_hardirqs_off(CALLER_ADDR0);
trace_hardirqs_off_finish();
For reference:
void trace_hardirqs_off(void)
{
lockdep_hardirqs_off(CALLER_ADDR0);
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
}
}
void trace_hardirqs_off_finish(void)
{
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
}
}
>
> Warning2:
>
> [ 0.010098] ------------[ cut here ]------------
> [ 0.010103] DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context())
> [ 0.010107] WARNING: kernel/locking/lockdep.c:4406 at
> lockdep_hardirqs_on_prepare+0x22c/0x2d4, CPU#0: swapper/0/1
> [ 0.010116] Modules linked in:
> [ 0.010120] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W
> 7.1.0-rc5-next-20260529 #1 PREEMPT(lazy)
> [ 0.010125] Tainted: [W]=WARN
> [ 0.010127] Hardware name: IBM,9080-HEX Power11 (architected)
> 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
> [ 0.010131] NIP: c00000000031549c LR: c000000000315498 CTR:
> 0000000000000000
> [ 0.010135] REGS: c0000000045bf100 TRAP: 0700 Tainted: G W
> (7.1.0-rc5-next-20260529)
> [ 0.010139] MSR: 8000000002021033 <SF,VEC,ME,IR,DR,RI,LE> CR:
> 44044228 XER: 00000005
> [ 0.010147] CFAR: c00000000022e9d4 IRQMASK: 3
> [ 0.010147] GPR00: c000000000315498 c0000000045bf3c0 c000000001ccf100
> 000000000000002e
> [ 0.010147] GPR04: 3fffffffffff7fff c0000000045bf1b0 c0000000045bf1a8
> 0000000000000000
> [ 0.010147] GPR08: c0000000029d1230 0000000000010002 c0000000048b2b00
> 0000000000000003
> [ 0.010147] GPR12: c000000002d514e0 c000000003ea1000 c000000000011ae4
> 0000000000000000
> [ 0.010147] GPR16: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.010147] GPR20: 0000000000000000 0000000000000004 c000000000272684
> c0000000029bb0c0
> [ 0.010147] GPR24: 0000000ebc171000 c000000ebeb63850 c000000003084d00
> c00000000308b2d0
> [ 0.010147] GPR28: c00000000002a488 0000000000000001 0000000000000000
> c000000002246e08
> [ 0.010188] NIP [c00000000031549c]
> lockdep_hardirqs_on_prepare+0x22c/0x2d4
> [ 0.010192] LR [c000000000315498]
> lockdep_hardirqs_on_prepare+0x228/0x2d4
> [ 0.010196] Call Trace:
> [ 0.010198] [c0000000045bf3c0] [c000000000315498]
> lockdep_hardirqs_on_prepare+0x228/0x2d4 (unreliable)
> [ 0.010204] [c0000000045bf430] [c0000000004bb778]
> trace_hardirqs_on+0xec/0x1b0
> [ 0.010209] [c0000000045bf4b0] [c0000000015ad574]
> irqentry_exit+0x58c/0xe1c
> [ 0.010213] [c0000000045bf540] [c00000000002a488]
> timer_interrupt+0x210/0x564
> [ 0.010219] [c0000000045bf5f0] [c00000000003b960]
> __replay_soft_interrupts+0x14c/0x374
> [ 0.010224] [c0000000045bf7d0] [c00000000003bd74]
> arch_local_irq_restore.part.0+0x1ec/0x224
> [ 0.010230] [c0000000045bf810] [c0000000015c17d4]
> _raw_spin_unlock_irqrestore+0x54/0xac
> [ 0.010235] [c0000000045bf840] [c0000000002cdd54]
> set_user_nice+0x110/0x220
> [ 0.010240] [c0000000045bf8e0] [c000000000266a94]
> create_worker+0x13c/0x310
> [ 0.010245] [c0000000045bf9a0] [c0000000002726f4]
> workqueue_prepare_cpu+0x70/0xe4
> [ 0.010251] [c0000000045bf9e0] [c000000000232604]
> cpuhp_invoke_callback+0x1e8/0x3c0
> [ 0.010256] [c0000000045bfa50] [c000000000232924]
> __cpuhp_invoke_callback_range+0x148/0x230
> [ 0.010261] [c0000000045bfaf0] [c000000000234f68] _cpu_up+0x19c/0x3cc
> [ 0.010265] [c0000000045bfbb0] [c00000000023533c] cpu_up+0x1a4/0x1f4
> [ 0.010269] [c0000000045bfc40] [c00000000203d1f4]
> bringup_nonboot_cpus+0xbc/0x128
> [ 0.010275] [c0000000045bfca0] [c00000000204b98c] smp_init+0x44/0xd0
> [ 0.010279] [c0000000045bfd00] [c000000002006d4c]
> kernel_init_freeable+0x23c/0x3b0
> [ 0.010284] [c0000000045bfdc0] [c000000000011b0c] kernel_init+0x30/0x274
> [ 0.010288] [c0000000045bfe30] [c00000000000debc]
> ret_from_kernel_user_thread+0x14/0x1c
> [ 0.010292] ---- interrupt: 0 at 0x0
> [ 0.010296] Code: 4182ff74 3d22013c 3929c1d4 81290000 2c090000
> 4082ff60 3c82ffda 3c62ffd9 3884f998 38634590 4bf19461 60000000
> <0fe00000> 4bffff40 60000000 60000000
> [ 0.010310] irq event stamp: 7440
> [ 0.010312] hardirqs last enabled at (7439): [<c0000000015c1824>]
> _raw_spin_unlock_irqrestore+0xa4/0xac
> [ 0.010317] hardirqs last disabled at (7440): [<c00000000003bc30>]
> arch_local_irq_restore.part.0+0xa8/0x224
> [ 0.010323] softirqs last enabled at (0): [<c00000000022b6ac>]
> copy_process+0xb24/0x1dec
> [ 0.010328] softirqs last disabled at (0): [<0000000000000000>] 0x0
> [ 0.010331] ---[ end trace 0000000000000000 ]---
>
>
This should be fixed by the diff where interrupts are disabled
when going to irqentry_exit.
>
> Warning3:
>
> [ 1.718239] ------------[ cut here ]------------
> [ 1.718247] RCU not watching for tracepoint
> [ 1.718255] WARNING: ./include/trace/events/preemptirq.h:40 at
> trace_hardirqs_on+0x180/0x1b0, CPU#19: swapper/19/0
> [ 1.718266] Modules linked in: ibmvscsi ibmveth scsi_transport_srp
> pseries_wdt
> [ 1.718275] CPU: 19 UID: 0 PID: 0 Comm: swapper/19 Tainted: G W
> 7.1.0-rc5-next-20260529 #1 PREEMPT(lazy)
> [ 1.718280] Tainted: [W]=WARN
> [ 1.718283] Hardware name: IBM,9080-HEX Power11 (architected)
> 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
> [ 1.718287] NIP: c0000000004bb80c LR: c0000000004bb808 CTR:
> 0000000000000000
> [ 1.718290] REGS: c000000004a4b9e0 TRAP: 0700 Tainted: G W
> (7.1.0-rc5-next-20260529)
> [ 1.718294] MSR: 8000000002823033 <SF,VEC,VSX,FP,ME,IR,DR,RI,LE> CR:
> 44000208 XER: 00000005
> [ 1.718305] CFAR: c00000000022e9d4 IRQMASK: 3
> [ 1.718305] GPR00: c0000000004bb808 c000000004a4bca0 c000000001ccf100
> 000000000000001f
> [ 1.718305] GPR04: 3fffffffffff7fff c000000004a4ba90 c000000004a4ba88
> 0000000ebe5e2000
> [ 1.718305] GPR08: 0000000000000027 0000000000000002 c000000004a62b00
> 0000000000000003
> [ 1.718305] GPR12: c000000002d514e0 c000000effff1300 0000000000000000
> 000000002ef01a60
> [ 1.718305] GPR16: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 1.718305] GPR20: 0000000000000000 0000000000000000 0000000000000000
> 00000000666a3c88
> [ 1.718305] GPR24: c00000000105088c 000000000000dedc c000000003084d00
> 0000000000000000
> [ 1.718305] GPR28: c000000ec09fe440 0000000000000001 c000000002414988
> c00000000003bca8
> [ 1.718347] NIP [c0000000004bb80c] trace_hardirqs_on+0x180/0x1b0
> [ 1.718351] LR [c0000000004bb808] trace_hardirqs_on+0x17c/0x1b0
> [ 1.718355] Call Trace:
> [ 1.718357] [c000000004a4bca0] [c0000000004bb808]
> trace_hardirqs_on+0x17c/0x1b0 (unreliable)
> [ 1.718362] [c000000004a4bd20] [c00000000003bca8]
> arch_local_irq_restore.part.0+0x120/0x224
> [ 1.718369] [c000000004a4bd60] [c0000000015b065c] snooze_loop+0xa0/0x270
> [ 1.718374] [c000000004a4bda0] [c0000000015af06c]
> cpuidle_enter_state+0x110/0x8fc
> [ 1.718379] [c000000004a4be60] [c00000000105088c]
> cpuidle_enter+0x50/0x74
> [ 1.718384] [c000000004a4bea0] [c0000000002ca85c] call_cpuidle+0x48/0xa0
> [ 1.718389] [c000000004a4bec0] [c0000000002d3c80]
> cpuidle_idle_call+0x160/0x2e0
> [ 1.718395] [c000000004a4bf10] [c0000000002d3f48] do_idle+0x148/0x1f0
> [ 1.718400] [c000000004a4bf60] [c0000000002d43c8]
> cpu_startup_entry+0x4c/0x50
> [ 1.718405] [c000000004a4bf90] [c00000000006371c]
> start_secondary+0x27c/0x28c
> [ 1.718411] [c000000004a4bfe0] [c00000000000e258]
> start_secondary_prolog+0x10/0x14
> [ 1.718415] Code: 60000000 3d220132 8929db48 2c090000 4082ff64
> 3c62ffd6 39200001 3d420132 3863d398 992adb48 4bd730f1 60000000
> <0fe00000> 60000000 4bffff40 60000000
> [ 1.718430] irq event stamp: 0
> [ 1.718432] hardirqs last enabled at (0): [<0000000000000000>] 0x0
> [ 1.718436] hardirqs last disabled at (0): [<c00000000022b6ac>]
> copy_process+0xb24/0x1dec
> [ 1.718442] softirqs last enabled at (0): [<c00000000022b6ac>]
> copy_process+0xb24/0x1dec
> [ 1.718447] softirqs last disabled at (0): [<0000000000000000>] 0x0
> [ 1.718450] ---[ end trace 0000000000000000 ]---
>
>
Idle loop is expected to use the variant without the traces.
raw_* is being used in the diff.
>
> I am reporting all three warnings in one report, as its flagging
> inconsistencies around IRQ enable/disable transitions.
>
>
> If you happen to fix this, please add below tag.
>
>
> Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
>
>
>
> Regards,
>
> Venkat.
>
>
Can you try the below diff and check if all three gets solved?
diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
index de5601282755..a7f30875d14a 100644
--- a/arch/powerpc/include/asm/entry-common.h
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -210,9 +210,6 @@ static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
/* Enable MSR[RI] early, to support kernel SLB and hash faults */
#endif
- if (!regs_irqs_disabled(regs))
- trace_hardirqs_off();
-
if (user_mode(regs)) {
kuap_lock();
account_cpu_user_entry();
@@ -253,16 +250,17 @@ static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
static inline void arch_interrupt_exit_prepare(struct pt_regs *regs)
{
if (user_mode(regs)) {
- BUG_ON(regs_is_unrecoverable(regs));
- BUG_ON(regs_irqs_disabled(regs));
+ WARN_ON(regs_is_unrecoverable(regs));
+ WARN_ON(regs_irqs_disabled(regs));
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
* AMR can only have been unlocked if we interrupted the kernel.
*/
kuap_assert_locked();
-
- local_irq_disable();
}
+
+ /* irqentry_exit expects to be called with interrupts disabled */
+ local_irq_disable();
}
static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs)
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index b89e7111e7b8..a0d18deb7119 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -73,7 +73,7 @@ static int snooze_loop(struct cpuidle_device *dev,
set_thread_flag(TIF_POLLING_NRFLAG);
- local_irq_enable();
+ raw_local_irq_enable();
snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index);
dev->poll_time_limit = false;
@@ -100,7 +100,7 @@ static int snooze_loop(struct cpuidle_device *dev,
if (!dev->poll_time_limit)
clear_thread_flag(TIF_POLLING_NRFLAG);
- local_irq_disable();
+ raw_local_irq_disable();
return index;
}
^ permalink raw reply related
* Re: [PATCH v3 14/19] mm/hugetlb: Free cross-zone bootmem gigantic pages after allocation
From: Mike Rapoport @ 2026-06-02 15:41 UTC (permalink / raw)
To: Muchun Song
Cc: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Muchun Song, Mike Rapoport,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <20260602101039.1867613-15-songmuchun@bytedance.com>
On Tue, 02 Jun 2026 18:10:34 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
Hi Muchun,
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 5e557c05d80a..218fb1ca45f4 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3073,22 +3076,38 @@ static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
> [ ... skip 26 lines ... ]
> + * pages belonging to the requested node.
> + */
> + if (WARN_ON_ONCE(nid_request != NUMA_NO_NODE && nid != nid_request))
> + list_add(&m->list, &huge_boot_pages[nid_request]);
> + else
> + list_add(&m->list, &huge_boot_pages[nid]);
Can we just memblock_free() the page that intersects zones here?
Rather than making alloc_bootmem_huge_page() bool (sorry my bad :)) we
can make it return -ENOMEM when memblock_alloc() fails, 0 if the page is
not usable and 1 (i.e. number of allocated gigantic pages) if everything
is fine.
The callers would need a bit of massage, but it still seems simpler to
me than adding them to the list and then walking that list.
--
Sincerely yours,
Mike.
^ permalink raw reply
* Re: [PATCH v3 15/19] mm/hugetlb_vmemmap: Move bootmem HVO setup to early init
From: Mike Rapoport @ 2026-06-02 15:41 UTC (permalink / raw)
To: Muchun Song
Cc: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Muchun Song, Mike Rapoport,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <20260602101039.1867613-16-songmuchun@bytedance.com>
On Tue, 02 Jun 2026 18:10:35 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index ea6af85bfec1..464578ee246e 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -745,6 +745,8 @@ static bool vmemmap_should_optimize_bootmem_page(struct huge_bootmem_page *m)
> return true;
> }
>
> +static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn);
> +
Can we please move the entire function rather than add a forward
declaration?
Other than that
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
--
Sincerely yours,
Mike.
^ permalink raw reply
* Re: [PATCH v3 16/19] mm/hugetlb: Remove obsolete bootmem cross-zone checks
From: Mike Rapoport @ 2026-06-02 15:41 UTC (permalink / raw)
To: Muchun Song
Cc: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Muchun Song, Mike Rapoport,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <20260602101039.1867613-17-songmuchun@bytedance.com>
On Tue, 02 Jun 2026 18:10:36 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> Bootmem gigantic HugeTLB pages used to be validated again during
> gather_bootmem_prealloc_node() and any cross-zone pages were discarded
> there.
>
> That validation is no longer needed. Cross-zone bootmem gigantic pages
> are now detected during allocation and freed before they reach the later
> bootmem gathering path, so the remaining pages are already zone-valid.
>
> [...]
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
--
Sincerely yours,
Mike.
^ permalink raw reply
* Re: [PATCH v3 17/19] mm/sparse-vmemmap: Remove sparse_vmemmap_init_nid_late()
From: Mike Rapoport @ 2026-06-02 15:41 UTC (permalink / raw)
To: Muchun Song
Cc: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Muchun Song, Mike Rapoport,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <20260602101039.1867613-18-songmuchun@bytedance.com>
On Tue, 02 Jun 2026 18:10:37 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> hugetlb_vmemmap_init_late() no longer has any users, so the remaining
> late-init path in sparse_vmemmap_init_nid_late() is dead code.
>
> Remove sparse_vmemmap_init_nid_late() and its declarations.
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
--
Sincerely yours,
Mike.
^ permalink raw reply
* Re: [PATCH v3 18/19] mm/hugetlb: Remove unused bootmem cma field
From: Mike Rapoport @ 2026-06-02 15:41 UTC (permalink / raw)
To: Muchun Song
Cc: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Muchun Song, Mike Rapoport,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <20260602101039.1867613-19-songmuchun@bytedance.com>
On Tue, 02 Jun 2026 18:10:38 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> struct huge_bootmem_page no longer needs to keep the CMA pointer. The
> bootmem path only needs to remember whether a huge page came from CMA,
> which is already encoded in the flags field.
>
> Set HUGE_BOOTMEM_CMA when the page is allocated and drop the unused cma
> field together with the redundant assignments.
It looks like the commit does more refactoring, please mention it in the
changelog.
--
Sincerely yours,
Mike.
^ permalink raw reply
* Re: [PATCH v3 19/19] mm/mm_init: Fold __init_page_from_nid() into __init_deferred_page()
From: Mike Rapoport @ 2026-06-02 15:41 UTC (permalink / raw)
To: Muchun Song
Cc: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman, Muchun Song, Mike Rapoport,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, linux-mm,
linux-kernel, Nicholas Piggin, Christophe Leroy (CS GROUP),
Ritesh Harjani (IBM), Aneesh Kumar K.V, linuxppc-dev,
Mike Kravetz
In-Reply-To: <20260602101039.1867613-20-songmuchun@bytedance.com>
On Tue, 02 Jun 2026 18:10:39 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 41b83dd18c01..f1bbf3b9a321 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -771,10 +746,27 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
>
> static void __meminit __init_deferred_page(unsigned long pfn, int nid)
> {
> + pg_data_t *pgdat;
> + int zid;
> +
> if (early_page_initialised(pfn, nid))
> return;
>
> - __init_page_from_nid(pfn, nid);
> + pgdat = NODE_DATA(nid);
Nit: we can initialize pgdat at declaration line, other than that
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
--
Sincerely yours,
Mike.
^ permalink raw reply
* Re: [next20260529]powerpc/pseries: multiple WARNs: RCU not watching for tracepoint and lockdep_hardirq_context() during boot and cpuidle (Power11)
From: Mukesh Kumar Chaurasiya @ 2026-06-02 15:53 UTC (permalink / raw)
To: Shrikanth Hegde
Cc: Venkat Rao Bagalkote, Madhavan Srinivasan, Peter Zijlstra, LKML,
linuxppc-dev, Paul E. McKenney, Srikar Dronamraju, Ingo Molnar
In-Reply-To: <87715824-b1b8-48ed-a58b-aceefb14aede@linux.ibm.com>
On Tue, Jun 02, 2026 at 08:54:54PM +0530, Shrikanth Hegde wrote:
> Hi Venkat, Thanks for the report.
>
> On 6/1/26 6:57 PM, Venkat Rao Bagalkote wrote:
> > Greetings!!!
> >
> >
> > I am observing multiple reproducible WARN_ONs related to RCU and lockdep
> > IRQ state tracking on a Power11 pSeries system when running on latest
> > linux-next kernel.
> >
> >
> > Environment:
> > Architecture: ppc64le (Power11, pSeries LPAR)
> > Kernel: 7.1.0-rc5-next-20260529
> > Config: PREEMPT(lazy)
> > CONFIG_LOCKDEP=y
> > CONFIG_PROVE_LOCKING=y
> >
> >
> > Warning1:
> >
> > [ 0.008277] ------------[ cut here ]------------
> > [ 0.008285] RCU not watching for tracepoint
> > [ 0.008294] WARNING: ./include/trace/events/preemptirq.h:36 at
> > trace_hardirqs_off+0x16c/0x1a0, CPU#1: swapper/1/0
> > [ 0.008306] Modules linked in:
> > [ 0.008316] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 7.1.0-
> > rc5-next-20260529 #1 PREEMPT(lazy)
> > [ 0.008322] Hardware name: IBM,9080-HEX Power11 (architected)
> > 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
> > [ 0.008327] NIP: c0000000004bb2a8 LR: c0000000004bb2a4 CTR:
> > 0000000000000000
> > [ 0.008331] REGS: c0000000049cb690 TRAP: 0700 Not tainted (7.1.0-
> > rc5-next-20260529)
> > [ 0.008336] MSR: 8000000000021033 <SF,ME,IR,DR,RI,LE> CR: 44000208
> > XER: 00000005
> > [ 0.008348] CFAR: c00000000022e9d4 IRQMASK: 3
> > [ 0.008348] GPR00: c0000000004bb2a4 c0000000049cb950 c000000001ccf100
> > 000000000000001f
> > [ 0.008348] GPR04: 3fffffffffff7fff c0000000049cb740 c0000000049cb738
> > 0000000000000000
> > [ 0.008348] GPR08: c0000000029d1230 0000000000000001 c0000000049e8000
> > 0000000000000003
> > [ 0.008348] GPR12: c000000002d514e0 c000000effffeb00 0000000000000000
> > 0000000000000000
> > [ 0.008348] GPR16: 0000000000000000 0000000000000000 0000000000000000
> > 0000000000000000
> > [ 0.008348] GPR20: 0000000000000000 0000000000000000 0000000000000000
> > 0000000000000000
> > [ 0.008348] GPR24: 0000000000000000 0000000000000000 0000000000000000
> > 0000000000000000
> > [ 0.008348] GPR28: 0000000000000000 0000000000000001 c000000002414988
> > c00000000005be40
> > [ 0.008403] NIP [c0000000004bb2a8] trace_hardirqs_off+0x16c/0x1a0
> > [ 0.008408] LR [c0000000004bb2a4] trace_hardirqs_off+0x168/0x1a0
> > [ 0.008413] Call Trace:
> > [ 0.008416] [c0000000049cb950] [c0000000004bb2a4]
> > trace_hardirqs_off+0x168/0x1a0 (unreliable)
> > [ 0.008423] [c0000000049cb9d0] [c00000000005be40]
> > arch_interrupt_enter_prepare+0xa0/0x19c
> > [ 0.008430] [c0000000049cba00] [c00000000005bf78]
> > doorbell_exception+0x3c/0x4c4
> > [ 0.008436] [c0000000049cbaa0] [c00000000000a2fc]
> > doorbell_super_common_virt+0x28c/0x290
> > [ 0.008443] ---- interrupt: a00 at plpar_hcall_norets_notrace+0x18/0x2c
> > [ 0.008449] NIP: c0000000001b4fc8 LR: c0000000001bcea0 CTR:
> > 0000000000000000
> > [ 0.008453] REGS: c0000000049cbad0 TRAP: 0a00 Not tainted (7.1.0-
> > rc5-next-20260529)
> > [ 0.008457] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR:
> > 24000008 XER: 00000000
> > [ 0.008469] CFAR: 0000000000000000 IRQMASK: 0
> > [ 0.008469] GPR00: 0000000000000000 c0000000049cbd90 c000000001ccf100
> > 0000000000000000
> > [ 0.008469] GPR04: 0000000000000000 8004000038407c10 0000000000000000
> > 0000000000000003
> > [ 0.008469] GPR08: 0000000000000001 0000000000000000 0000000000000090
> > 0000000000000001
> > [ 0.008469] GPR12: 8004000038407c00 c000000effffeb00 0000000000000000
> > 000000002ef01820
> > [ 0.008469] GPR16: 0000000000000000 0000000000000000 0000000000000000
> > 0000000000000000
> > [ 0.008469] GPR20: 0000000000000000 0000000000000000 0000000000000000
> > 0000000000000001
> > [ 0.008469] GPR24: 0000000000000001 000000000000dedc c000000003086150
> > 0000000000000001
> > [ 0.008469] GPR28: c0000000049e8000 c000000002241548 c000000002241550
> > c000000002241548
> > [ 0.008523] NIP [c0000000001b4fc8] plpar_hcall_norets_notrace+0x18/0x2c
> > [ 0.008528] LR [c0000000001bcea0] pseries_lpar_idle.part.0+0x74/0x160
> > [ 0.008533] ---- interrupt: a00
> > [ 0.008536] [c0000000049cbd90] [c0000000049cbe30] 0xc0000000049cbe30
> > (unreliable)
> > [ 0.008544] [c0000000049cbe10] [c000000000022c5c]
> > arch_cpu_idle+0x4c/0x120
> > [ 0.008551] [c0000000049cbe30] [c0000000015afe70]
> > default_idle_call+0x154/0x454
> > [ 0.008558] [c0000000049cbec0] [c0000000002d3dfc]
> > cpuidle_idle_call+0x2dc/0x2e0
> > [ 0.008565] [c0000000049cbf10] [c0000000002d3f48] do_idle+0x148/0x1f0
> > [ 0.008571] [c0000000049cbf60] [c0000000002d43c8]
> > cpu_startup_entry+0x4c/0x50
> > [ 0.008578] [c0000000049cbf90] [c00000000006371c]
> > start_secondary+0x27c/0x28c
> > [ 0.008585] [c0000000049cbfe0] [c00000000000e258]
> > start_secondary_prolog+0x10/0x14
> > [ 0.008590] Code: 4bfffcc4 60000000 3d220132 8929db46 2c090000
> > 4082ff94 3c62ffd6 3d220132 3863d398 9ba9db46 4bd73655 60000000
> > <0fe00000> 60000000 4bffff74 60000000
> > [ 0.008611] irq event stamp: 20
> > [ 0.008614] hardirqs last enabled at (19): [<c0000000002d3dfc>]
> > cpuidle_idle_call+0x2dc/0x2e0
> > [ 0.008620] hardirqs last disabled at (20): [<c00000000005be40>]
> > arch_interrupt_enter_prepare+0xa0/0x19c
> > [ 0.008625] softirqs last enabled at (0): [<c00000000022b6ac>]
> > copy_process+0xb24/0x1dec
> > [ 0.008632] softirqs last disabled at (0): [<0000000000000000>] 0x0
> > [ 0.008637] ---[ end trace 0000000000000000 ]---
> >
>
> Equivalent to trace_hardirqs_off is done by irqentry_enter.
> So it is not essential in arch_interrupt_enter_prepare.
>
> irqentry_enter
> lockdep_hardirqs_off(CALLER_ADDR0);
> trace_hardirqs_off_finish();
>
>
> For reference:
> void trace_hardirqs_off(void)
> {
> lockdep_hardirqs_off(CALLER_ADDR0);
>
> if (!this_cpu_read(tracing_irq_cpu)) {
> this_cpu_write(tracing_irq_cpu, 1);
> tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
> trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
> }
> }
>
> void trace_hardirqs_off_finish(void)
> {
> if (!this_cpu_read(tracing_irq_cpu)) {
> this_cpu_write(tracing_irq_cpu, 1);
> tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
> trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
> }
>
> }
>
> >
> > Warning2:
> >
> > [ 0.010098] ------------[ cut here ]------------
> > [ 0.010103] DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context())
> > [ 0.010107] WARNING: kernel/locking/lockdep.c:4406 at
> > lockdep_hardirqs_on_prepare+0x22c/0x2d4, CPU#0: swapper/0/1
> > [ 0.010116] Modules linked in:
> > [ 0.010120] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W
> > 7.1.0-rc5-next-20260529 #1 PREEMPT(lazy)
> > [ 0.010125] Tainted: [W]=WARN
> > [ 0.010127] Hardware name: IBM,9080-HEX Power11 (architected)
> > 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
> > [ 0.010131] NIP: c00000000031549c LR: c000000000315498 CTR:
> > 0000000000000000
> > [ 0.010135] REGS: c0000000045bf100 TRAP: 0700 Tainted: G W
> > (7.1.0-rc5-next-20260529)
> > [ 0.010139] MSR: 8000000002021033 <SF,VEC,ME,IR,DR,RI,LE> CR:
> > 44044228 XER: 00000005
> > [ 0.010147] CFAR: c00000000022e9d4 IRQMASK: 3
> > [ 0.010147] GPR00: c000000000315498 c0000000045bf3c0 c000000001ccf100
> > 000000000000002e
> > [ 0.010147] GPR04: 3fffffffffff7fff c0000000045bf1b0 c0000000045bf1a8
> > 0000000000000000
> > [ 0.010147] GPR08: c0000000029d1230 0000000000010002 c0000000048b2b00
> > 0000000000000003
> > [ 0.010147] GPR12: c000000002d514e0 c000000003ea1000 c000000000011ae4
> > 0000000000000000
> > [ 0.010147] GPR16: 0000000000000000 0000000000000000 0000000000000000
> > 0000000000000000
> > [ 0.010147] GPR20: 0000000000000000 0000000000000004 c000000000272684
> > c0000000029bb0c0
> > [ 0.010147] GPR24: 0000000ebc171000 c000000ebeb63850 c000000003084d00
> > c00000000308b2d0
> > [ 0.010147] GPR28: c00000000002a488 0000000000000001 0000000000000000
> > c000000002246e08
> > [ 0.010188] NIP [c00000000031549c]
> > lockdep_hardirqs_on_prepare+0x22c/0x2d4
> > [ 0.010192] LR [c000000000315498]
> > lockdep_hardirqs_on_prepare+0x228/0x2d4
> > [ 0.010196] Call Trace:
> > [ 0.010198] [c0000000045bf3c0] [c000000000315498]
> > lockdep_hardirqs_on_prepare+0x228/0x2d4 (unreliable)
> > [ 0.010204] [c0000000045bf430] [c0000000004bb778]
> > trace_hardirqs_on+0xec/0x1b0
> > [ 0.010209] [c0000000045bf4b0] [c0000000015ad574]
> > irqentry_exit+0x58c/0xe1c
> > [ 0.010213] [c0000000045bf540] [c00000000002a488]
> > timer_interrupt+0x210/0x564
> > [ 0.010219] [c0000000045bf5f0] [c00000000003b960]
> > __replay_soft_interrupts+0x14c/0x374
> > [ 0.010224] [c0000000045bf7d0] [c00000000003bd74]
> > arch_local_irq_restore.part.0+0x1ec/0x224
> > [ 0.010230] [c0000000045bf810] [c0000000015c17d4]
> > _raw_spin_unlock_irqrestore+0x54/0xac
> > [ 0.010235] [c0000000045bf840] [c0000000002cdd54]
> > set_user_nice+0x110/0x220
> > [ 0.010240] [c0000000045bf8e0] [c000000000266a94]
> > create_worker+0x13c/0x310
> > [ 0.010245] [c0000000045bf9a0] [c0000000002726f4]
> > workqueue_prepare_cpu+0x70/0xe4
> > [ 0.010251] [c0000000045bf9e0] [c000000000232604]
> > cpuhp_invoke_callback+0x1e8/0x3c0
> > [ 0.010256] [c0000000045bfa50] [c000000000232924]
> > __cpuhp_invoke_callback_range+0x148/0x230
> > [ 0.010261] [c0000000045bfaf0] [c000000000234f68] _cpu_up+0x19c/0x3cc
> > [ 0.010265] [c0000000045bfbb0] [c00000000023533c] cpu_up+0x1a4/0x1f4
> > [ 0.010269] [c0000000045bfc40] [c00000000203d1f4]
> > bringup_nonboot_cpus+0xbc/0x128
> > [ 0.010275] [c0000000045bfca0] [c00000000204b98c] smp_init+0x44/0xd0
> > [ 0.010279] [c0000000045bfd00] [c000000002006d4c]
> > kernel_init_freeable+0x23c/0x3b0
> > [ 0.010284] [c0000000045bfdc0] [c000000000011b0c] kernel_init+0x30/0x274
> > [ 0.010288] [c0000000045bfe30] [c00000000000debc]
> > ret_from_kernel_user_thread+0x14/0x1c
> > [ 0.010292] ---- interrupt: 0 at 0x0
> > [ 0.010296] Code: 4182ff74 3d22013c 3929c1d4 81290000 2c090000
> > 4082ff60 3c82ffda 3c62ffd9 3884f998 38634590 4bf19461 60000000
> > <0fe00000> 4bffff40 60000000 60000000
> > [ 0.010310] irq event stamp: 7440
> > [ 0.010312] hardirqs last enabled at (7439): [<c0000000015c1824>]
> > _raw_spin_unlock_irqrestore+0xa4/0xac
> > [ 0.010317] hardirqs last disabled at (7440): [<c00000000003bc30>]
> > arch_local_irq_restore.part.0+0xa8/0x224
> > [ 0.010323] softirqs last enabled at (0): [<c00000000022b6ac>]
> > copy_process+0xb24/0x1dec
> > [ 0.010328] softirqs last disabled at (0): [<0000000000000000>] 0x0
> > [ 0.010331] ---[ end trace 0000000000000000 ]---
> >
> >
>
> This should be fixed by the diff where interrupts are disabled
> when going to irqentry_exit.
>
> >
> > Warning3:
> >
> > [ 1.718239] ------------[ cut here ]------------
> > [ 1.718247] RCU not watching for tracepoint
> > [ 1.718255] WARNING: ./include/trace/events/preemptirq.h:40 at
> > trace_hardirqs_on+0x180/0x1b0, CPU#19: swapper/19/0
> > [ 1.718266] Modules linked in: ibmvscsi ibmveth scsi_transport_srp
> > pseries_wdt
> > [ 1.718275] CPU: 19 UID: 0 PID: 0 Comm: swapper/19 Tainted: G W
> > 7.1.0-rc5-next-20260529 #1 PREEMPT(lazy)
> > [ 1.718280] Tainted: [W]=WARN
> > [ 1.718283] Hardware name: IBM,9080-HEX Power11 (architected)
> > 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
> > [ 1.718287] NIP: c0000000004bb80c LR: c0000000004bb808 CTR:
> > 0000000000000000
> > [ 1.718290] REGS: c000000004a4b9e0 TRAP: 0700 Tainted: G W
> > (7.1.0-rc5-next-20260529)
> > [ 1.718294] MSR: 8000000002823033 <SF,VEC,VSX,FP,ME,IR,DR,RI,LE> CR:
> > 44000208 XER: 00000005
> > [ 1.718305] CFAR: c00000000022e9d4 IRQMASK: 3
> > [ 1.718305] GPR00: c0000000004bb808 c000000004a4bca0 c000000001ccf100
> > 000000000000001f
> > [ 1.718305] GPR04: 3fffffffffff7fff c000000004a4ba90 c000000004a4ba88
> > 0000000ebe5e2000
> > [ 1.718305] GPR08: 0000000000000027 0000000000000002 c000000004a62b00
> > 0000000000000003
> > [ 1.718305] GPR12: c000000002d514e0 c000000effff1300 0000000000000000
> > 000000002ef01a60
> > [ 1.718305] GPR16: 0000000000000000 0000000000000000 0000000000000000
> > 0000000000000000
> > [ 1.718305] GPR20: 0000000000000000 0000000000000000 0000000000000000
> > 00000000666a3c88
> > [ 1.718305] GPR24: c00000000105088c 000000000000dedc c000000003084d00
> > 0000000000000000
> > [ 1.718305] GPR28: c000000ec09fe440 0000000000000001 c000000002414988
> > c00000000003bca8
> > [ 1.718347] NIP [c0000000004bb80c] trace_hardirqs_on+0x180/0x1b0
> > [ 1.718351] LR [c0000000004bb808] trace_hardirqs_on+0x17c/0x1b0
> > [ 1.718355] Call Trace:
> > [ 1.718357] [c000000004a4bca0] [c0000000004bb808]
> > trace_hardirqs_on+0x17c/0x1b0 (unreliable)
> > [ 1.718362] [c000000004a4bd20] [c00000000003bca8]
> > arch_local_irq_restore.part.0+0x120/0x224
> > [ 1.718369] [c000000004a4bd60] [c0000000015b065c] snooze_loop+0xa0/0x270
> > [ 1.718374] [c000000004a4bda0] [c0000000015af06c]
> > cpuidle_enter_state+0x110/0x8fc
> > [ 1.718379] [c000000004a4be60] [c00000000105088c]
> > cpuidle_enter+0x50/0x74
> > [ 1.718384] [c000000004a4bea0] [c0000000002ca85c] call_cpuidle+0x48/0xa0
> > [ 1.718389] [c000000004a4bec0] [c0000000002d3c80]
> > cpuidle_idle_call+0x160/0x2e0
> > [ 1.718395] [c000000004a4bf10] [c0000000002d3f48] do_idle+0x148/0x1f0
> > [ 1.718400] [c000000004a4bf60] [c0000000002d43c8]
> > cpu_startup_entry+0x4c/0x50
> > [ 1.718405] [c000000004a4bf90] [c00000000006371c]
> > start_secondary+0x27c/0x28c
> > [ 1.718411] [c000000004a4bfe0] [c00000000000e258]
> > start_secondary_prolog+0x10/0x14
> > [ 1.718415] Code: 60000000 3d220132 8929db48 2c090000 4082ff64
> > 3c62ffd6 39200001 3d420132 3863d398 992adb48 4bd730f1 60000000
> > <0fe00000> 60000000 4bffff40 60000000
> > [ 1.718430] irq event stamp: 0
> > [ 1.718432] hardirqs last enabled at (0): [<0000000000000000>] 0x0
> > [ 1.718436] hardirqs last disabled at (0): [<c00000000022b6ac>]
> > copy_process+0xb24/0x1dec
> > [ 1.718442] softirqs last enabled at (0): [<c00000000022b6ac>]
> > copy_process+0xb24/0x1dec
> > [ 1.718447] softirqs last disabled at (0): [<0000000000000000>] 0x0
> > [ 1.718450] ---[ end trace 0000000000000000 ]---
> >
> >
>
> Idle loop is expected to use the variant without the traces.
> raw_* is being used in the diff.
>
> >
> > I am reporting all three warnings in one report, as its flagging
> > inconsistencies around IRQ enable/disable transitions.
> >
> >
> > If you happen to fix this, please add below tag.
> >
> >
> > Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
> >
> >
> >
> > Regards,
> >
> > Venkat.
> >
> >
>
>
> Can you try the below diff and check if all three gets solved?
>
>
> diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
> index de5601282755..a7f30875d14a 100644
> --- a/arch/powerpc/include/asm/entry-common.h
> +++ b/arch/powerpc/include/asm/entry-common.h
> @@ -210,9 +210,6 @@ static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
> /* Enable MSR[RI] early, to support kernel SLB and hash faults */
> #endif
> - if (!regs_irqs_disabled(regs))
> - trace_hardirqs_off();
> -
> if (user_mode(regs)) {
> kuap_lock();
> account_cpu_user_entry();
> @@ -253,16 +250,17 @@ static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
> static inline void arch_interrupt_exit_prepare(struct pt_regs *regs)
> {
> if (user_mode(regs)) {
> - BUG_ON(regs_is_unrecoverable(regs));
> - BUG_ON(regs_irqs_disabled(regs));
> + WARN_ON(regs_is_unrecoverable(regs));
> + WARN_ON(regs_irqs_disabled(regs));
> /*
> * We don't need to restore AMR on the way back to userspace for KUAP.
> * AMR can only have been unlocked if we interrupted the kernel.
> */
> kuap_assert_locked();
> -
> - local_irq_disable();
> }
> +
> + /* irqentry_exit expects to be called with interrupts disabled */
> + local_irq_disable();
> }
Hey,
I tried this, it didn't resolve the issue. It's something else I guess.
> static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs)
> diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
> index b89e7111e7b8..a0d18deb7119 100644
> --- a/drivers/cpuidle/cpuidle-powernv.c
> +++ b/drivers/cpuidle/cpuidle-powernv.c
> @@ -73,7 +73,7 @@ static int snooze_loop(struct cpuidle_device *dev,
> set_thread_flag(TIF_POLLING_NRFLAG);
> - local_irq_enable();
> + raw_local_irq_enable();
> snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index);
> dev->poll_time_limit = false;
> @@ -100,7 +100,7 @@ static int snooze_loop(struct cpuidle_device *dev,
> if (!dev->poll_time_limit)
> clear_thread_flag(TIF_POLLING_NRFLAG);
> - local_irq_disable();
> + raw_local_irq_disable();
> return index;
> }
>
>
I tried this diff. Doesn't seems to solve the issue.
[ 0.010278] ------------[ cut here ]------------
[ 0.010285] DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context())
[ 0.010300] WARNING: kernel/locking/lockdep.c:4406 at lockdep_hardirqs_on_prepare+0x22c/0x2e0, CPU#184: swapper/184/0
[ 0.010316] Modules linked in:
[ 0.010332] CPU: 184 UID: 0 PID: 0 Comm: swapper/184 Not tainted 7.1.0-rc6-next-20260601-dirty #2 PREEMPT(lazy)
[ 0.010340] Hardware name: IBM,9043-MRU Power11 (architected) 0x820200 0xf000007 of:IBM,FW1110.11 (RF1110_119) hv:phyp pSeries
[ 0.010347] NIP: c00000000036acac LR: c00000000036aca8 CTR: 0000000000000000
[ 0.010353] REGS: c00000000417b4a0 TRAP: 0700 Not tainted (7.1.0-rc6-next-20260601-dirty)
[ 0.010359] MSR: 8000000002021033 <SF,VEC,ME,IR,DR,RI,LE> CR: 44004228 XER: 00000005
[ 0.010377] CFAR: c000000000261518 IRQMASK: 1
GPR00: c00000000036aca8 c00000000417b760 c00000000251a700 000000000000002e
GPR04: 3fffffffffff7fff 0000000000000001 c00000000417b558 0000000000000000
GPR08: c000000003d2cce8 0000000000010002 c000000003c91700 c00000000417b560
GPR12: c000007fd383ff90 c000000004f80000 0000000000000000 0000007ff9e90000
GPR16: 0000007ff9e70000 0000000000000000 0000007ff9e90114 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR24: 0000000048000054 c0000000032c0680 c000000004fe0000 c00000000002a4c4
GPR28: c0000000041e3d80 c0000000041ea340 c0000000032c0580 c0000000032c6e08
[ 0.010447] NIP [c00000000036acac] lockdep_hardirqs_on_prepare+0x22c/0x2e0
[ 0.010453] LR [c00000000036aca8] lockdep_hardirqs_on_prepare+0x228/0x2e0
[ 0.010464] Call Trace:
[ 0.010470] [c00000000417b760] [c00000000036aca8] lockdep_hardirqs_on_prepare+0x228/0x2e0 (unreliable)
[ 0.010482] [c00000000417b7e0] [c0000000005269e0] trace_hardirqs_on+0xf0/0x1b0
[ 0.010495] [c00000000417b860] [c000000001c5f474] irqentry_exit+0x544/0x1010
[ 0.010506] [c00000000417b8f0] [c00000000002a4c4] timer_interrupt+0x224/0x580
[ 0.010514] [c00000000417b9a0] [c00000000003d840] __replay_soft_interrupts+0x150/0x3a0
[ 0.010522] [c00000000417bb80] [c00000000003dc24] arch_local_irq_restore.part.0+0x194/0x200
[ 0.010529] [c00000000417bbc0] [c000000000187520] patch_mem+0x360/0x520
[ 0.010541] [c00000000417bc50] [c000000000060ba4] arch_jump_label_transform+0x64/0x78
[ 0.010551] [c00000000417bc70] [c0000000007043f0] __jump_label_update+0x70/0x140
[ 0.010558] [c00000000417bcf0] [c000000000704d70] static_key_enable_cpuslocked+0xd0/0x120
[ 0.010566] [c00000000417bd60] [c000000000704dec] static_key_enable+0x2c/0x50
[ 0.010573] [c00000000417bd90] [c0000000030764a8] security_add_hooks+0x74/0xb8
[ 0.010586] [c00000000417be10] [c00000000307542c] capability_init+0x30/0x48
[ 0.010598] [c00000000417be30] [c000000003075b24] lsm_init_single+0x74/0xb4
[ 0.010607] [c00000000417bea0] [c0000000030769fc] security_init+0x478/0x4a8
[ 0.010614] [c00000000417bf30] [c00000000300e838] start_kernel+0x5f0/0x670
[ 0.010623] [c00000000417bfe0] [c00000000000e998] start_here_common+0x1c/0x20
[ 0.010631] Code: 4182ff80 3d2201cd 3929fc44 81290000 2c090000 4082ff6c 3c82ffd3 3c62ffd2 3884cc60 38631590 4bef6775 60000000 <0fe00000> 4bffff4c 60000000 60000000
[ 0.010664] irq event stamp: 38250
[ 0.010672] hardirqs last enabled at (38249): [<c000000000187514>] patch_mem+0x354/0x520
[ 0.010682] hardirqs last disabled at (38250): [<c00000000003db34>] arch_local_irq_restore.part.0+0xa4/0x200
[ 0.010691] softirqs last enabled at (0): [<0000000000000000>] 0x0
[ 0.010700] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ 0.010705] ---[ end trace 0000000000000000 ]---
[...]
[ 1.002143] ------------[ cut here ]------------
[ 1.002153] RCU not watching for tracepoint
[ 1.002162] SED: plpks not available
[ 1.002165] WARNING: ./include/trace/events/preemptirq.h:36 at trace_hardirqs_off+0x168/0x190, CPU#177: swapper/177/0
[ 1.002177] Modules linked in: aes_gcm_p10_crypto crypto_simd cryptd
[ 1.002190] CPU: 177 UID: 0 PID: 0 Comm: swapper/177 Tainted: G W 7.1.0-rc6-next-20260601-dirty #2 PREEMPT(lazy)
[ 1.002202] Tainted: [W]=WARN
[ 1.002206] Hardware name: IBM,9043-MRU Power11 (architected) 0x820200 0xf000007 of:IBM,FW1110.11 (RF1110_119) hv:phyp pSeries
[ 1.002215] NIP: c000000000526f98 LR: c000000000526f94 CTR: 0000000000000000
[ 1.002224] REGS: c00000001249f9b0 TRAP: 0700 Tainted: G W (7.1.0-rc6-next-20260601-dirty)
[ 1.002234] MSR: 8000000002021033 <SF,VEC,ME,IR,DR,RI,LE> CR: 44000288 XER: 00000005
[ 1.002244] CFAR: c000000000261518 IRQMASK: 3
GPR00: c000000000526f94 c00000001249fc70 c00000000251a700 000000000000001f
GPR04: 3fffffffffff7fff 0000000000000001 c00000001249fa68 0000000000000001
GPR08: c000000003d2cce8 0000000000000002 c00000001241dc00 c00000001249fa70
GPR12: c000007fd383ff90 c000007ffbda3f00 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 000000003bbb7c02 c00000000162d534
GPR24: 00000000000000b1 0000000000000000 c0000000041e3d80 0000000000000000
GPR28: c0000000023de0a8 c000007fd5efc740 c0000000032c1ad0 c00000000003db34
[ 1.002318] NIP [c000000000526f98] trace_hardirqs_off+0x168/0x190
[ 1.002325] LR [c000000000526f94] trace_hardirqs_off+0x164/0x190
[ 1.002331] Call Trace:
[ 1.002335] [c00000001249fc70] [c000000000526f94] trace_hardirqs_off+0x164/0x190 (unreliable)
[ 1.002342] [c00000001249fcf0] [c00000000003db34] arch_local_irq_restore.part.0+0xa4/0x200
[ 1.002347] [c00000001249fd30] [c000000001c62c24] snooze_loop+0xb4/0x28c
[ 1.002353] [c00000001249fd80] [c000000001c612c8] cpuidle_enter_state+0x118/0xae0
[ 1.002358] [c00000001249fe50] [c00000000162d534] cpuidle_enter+0x44/0x60
[ 1.002363] [c00000001249fe90] [c000000000315cf8] call_cpuidle+0x48/0xa0
[ 1.002369] [c00000001249feb0] [c00000000031ffc0] cpuidle_idle_call+0x160/0x2d0
[ 1.002375] [c00000001249ff00] [c000000000320288] do_idle+0x158/0x200
[ 1.002384] [c00000001249ff50] [c000000000320714] cpu_startup_entry+0x44/0x50
[ 1.002394] [c00000001249ff80] [c00000000006a0b0] start_secondary+0x290/0x300
[ 1.002401] [c00000001249ffe0] [c00000000000e258] start_secondary_prolog+0x10/0x14
[ 1.002411] Code: 60000000 60420000 3d4201c1 894a3c68 2c0a0000 4082ff9c 3d4201c1 3c62ffcf 386386c8 992a3c68 4bd3a489 60000000 <0fe00000> 4bffff7c 49739cf9 60000000
[ 1.002426] irq event stamp: 0
[ 1.002429] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[ 1.002434] hardirqs last disabled at (0): [<c00000000025d72c>] copy_process+0xecc/0x2910
[ 1.002440] softirqs last enabled at (0): [<c00000000025d72c>] copy_process+0xecc/0x2910
[ 1.002445] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ 1.002448] ---[ end trace 0000000000000000 ]---
[ 1.002454] ------------[ cut here ]------------
[ 1.002457] RCU not watching for tracepoint
[ 1.002460] WARNING: ./include/trace/events/preemptirq.h:40 at trace_hardirqs_on+0x17c/0x1b0, CPU#177: swapper/177/0
[ 1.002468] Modules linked in: aes_gcm_p10_crypto crypto_simd cryptd
[ 1.002473] CPU: 177 UID: 0 PID: 0 Comm: swapper/177 Tainted: G W 7.1.0-rc6-next-20260601-dirty #2 PREEMPT(lazy)
[ 1.002478] Tainted: [W]=WARN
[ 1.002481] Hardware name: IBM,9043-MRU Power11 (architected) 0x820200 0xf000007 of:IBM,FW1110.11 (RF1110_119) hv:phyp pSeries
[ 1.002493] NIP: c000000000526a6c LR: c000000000526a68 CTR: 0000000000000000
[ 1.002502] REGS: c00000001249f9b0 TRAP: 0700 Tainted: G W (7.1.0-rc6-next-20260601-dirty)
[ 1.002512] MSR: 8000000002021033 <SF,VEC,ME,IR,DR,RI,LE> CR: 44000288 XER: 00000005
[ 1.002524] CFAR: c000000000261518 IRQMASK: 1
GPR00: c000000000526a68 c00000001249fc70 c00000000251a700 000000000000001f
GPR04: 3fffffffffff7fff 0000000000000001 c00000001249fa68 0000000000000001
GPR08: c000000003d2cce8 0000000000000002 c00000001241dc00 c00000001249fa70
GPR12: c000007fd383ff90 c000007ffbda3f00 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 000000003bbb7c02 c00000000162d534
GPR24: 00000000000000b1 0000000000000000 c0000000041e3d80 0000000000000000
GPR28: c0000000023de0a8 c000007fd5efc740 c0000000032c1ad0 c00000000003dbc4
[ 1.002573] NIP [c000000000526a6c] trace_hardirqs_on+0x17c/0x1b0
[ 1.002580] LR [c000000000526a68] trace_hardirqs_on+0x178/0x1b0
[ 1.002586] Call Trace:
[ 1.002590] [c00000001249fc70] [c000000000526a68] trace_hardirqs_on+0x178/0x1b0 (unreliable)
[ 1.002600] [c00000001249fcf0] [c00000000003dbc4] arch_local_irq_restore.part.0+0x134/0x200
[ 1.002607] [c00000001249fd30] [c000000001c62c24] snooze_loop+0xb4/0x28c
[ 1.002614] [c00000001249fd80] [c000000001c612c8] cpuidle_enter_state+0x118/0xae0
[ 1.002620] [c00000001249fe50] [c00000000162d534] cpuidle_enter+0x44/0x60
[ 1.002625] [c00000001249fe90] [c000000000315cf8] call_cpuidle+0x48/0xa0
[ 1.002636] [c00000001249feb0] [c00000000031ffc0] cpuidle_idle_call+0x160/0x2d0
[ 1.002645] [c00000001249ff00] [c000000000320288] do_idle+0x158/0x200
[ 1.002651] [c00000001249ff50] [c000000000320714] cpu_startup_entry+0x44/0x50
[ 1.002655] [c00000001249ff80] [c00000000006a0b0] start_secondary+0x290/0x300
[ 1.002660] [c00000001249ffe0] [c00000000000e258] start_secondary_prolog+0x10/0x14
[ 1.002664] Code: 4bffff80 3d2201c1 89293c66 2c090000 4082ff6c 39200001 3d4201c1 3c62ffcf 386386c8 992a3c66 4bd3a9b5 60000000 <0fe00000> e9010060 4bffff44 60000000
[ 1.002678] irq event stamp: 0
[ 1.002680] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[ 1.002688] hardirqs last disabled at (0): [<c00000000025d72c>] copy_process+0xecc/0x2910
[ 1.002696] softirqs last enabled at (0): [<c00000000025d72c>] copy_process+0xecc/0x2910
[ 1.002706] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ 1.002712] ---[ end trace 0000000000000000 ]---
[...]
This is most probably due to generic entry/exit.
I am trying to find the root cause for this.
Regards,
Mukesh
^ permalink raw reply
* Re: [kvm-unit-tests RFC PATCH 2/6] configure: Make arch_libdir a first-class entity
From: Andrew Jones @ 2026-06-02 15:54 UTC (permalink / raw)
To: Chinmay Rath; +Cc: thuth, npiggin, harshpb, lvivier, linuxppc-dev, kvm, sbhat
In-Reply-To: <20260602064806.3101025-3-rathc@linux.ibm.com>
On Tue, Jun 02, 2026 at 12:18:02PM +0530, Chinmay Rath wrote:
> From: Nicholas Piggin <npiggin@gmail.com>
>
> arch_libdir was brought in to improve the heuristic determination of
> the lib/ directory based on arch and testdir names, but it did not
> entirely clean that mess up.
>
> Remove the arch_libdir->arch->testdir heuristic and just require
> everybody sets arch_libdir correctly. Fail if the lib/arch or
> lib/arch/asm directories can not be found.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
> ---
> Makefile | 2 +-
> configure | 20 ++++++++++++--------
> 2 files changed, 13 insertions(+), 9 deletions(-)
>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
^ permalink raw reply
* Re: [PATCH v4] powerpc/pseries/Kconfig: Enable CONFIG_VPA_PMU to be used with KVM
From: Harsh Prateek Bora @ 2026-06-02 17:11 UTC (permalink / raw)
To: Gautam Menghani, maddy, mpe, npiggin, chleroy
Cc: linuxppc-dev, linux-kernel, stable, Sean Christopherson
In-Reply-To: <20260602121706.8423-1-gautam@linux.ibm.com>
On 02/06/26 5:47 pm, Gautam Menghani wrote:
> Currently, CONFIG_VPA_PMU is not enabled by default, and consequently
> cannot be used for KVM guests at all, unless explicitly enabled on
> host kernel.
>
> Mark CONFIG_VPA_PMU as "default m" to ensure it is available when KVM is
> being used.
>
> Fixes: 176cda0619b6c ("powerpc/perf: Add perf interface to expose vpa counters")
> Cc: stable@vger.kernel.org # v6.13+
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> ---
> v3 -> v4:
> 1. Reword the patch description (Harsh)
>
Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
> v2 -> v3:
> 1. Make CONFIG_VPA_PMU as default m so that it can separately disabled
> (Sean)
>
> v1 -> v2:
> 1. Rebased on latest master
>
> arch/powerpc/platforms/pseries/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
> index f7052b131a4c..74910ce3a541 100644
> --- a/arch/powerpc/platforms/pseries/Kconfig
> +++ b/arch/powerpc/platforms/pseries/Kconfig
> @@ -154,6 +154,7 @@ config HV_PERF_CTRS
> config VPA_PMU
> tristate "VPA PMU events"
> depends on KVM_BOOK3S_64_HV && HV_PERF_CTRS
> + default m
> help
> Enable access to the VPA PMU counters via perf. This enables
> code that support measurement for KVM on PowerVM(KoP) feature.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox