From: ilya.gladyshev@linux.dev
To: ilya.gladyshev@linux.dev
Cc: ivgorbunov@me.com, Liam.Howlett@oracle.com,
akpm@linux-foundation.org, apopple@nvidia.com,
artem.kuzin@huawei.com, baolin.wang@linux.alibaba.com,
david@kernel.org, foxido@foxido.dev, harry.yoo@oracle.com,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
lorenzo.stoakes@oracle.com, mhocko@suse.com,
muchun.song@linux.dev, rppt@kernel.org, surenb@google.com,
torvalds@linuxfoundation.org, vbabka@suse.cz,
willy@infradead.org, yuzhao@google.com, ziy@nvidia.com,
pfalcato@suse.de, kirill@shutemov.name
Subject: [PATCH v3 1/2] mm: drop page refcount zero state semantics
Date: Thu, 04 Jun 2026 10:15:12 +0000 [thread overview]
Message-ID: <7c28d766b007345f5f31ba9a086a3bffe95a013d@linux.dev> (raw)
In-Reply-To: <5dabf3a748fee0c7b142c74367e7586f5db1ed1e@linux.dev>
From: Gorbunov Ivan <ivgorbunov@me.com>
Some call sites manipulate page refcount directly via
set_page_count() instead of using more direct API like set_frozen() /
init_refcount().
This conflicts with the next patch, which will stop treating zeroed
refcount as the indicator of a frozen page. To prepare for that change,
this patch:
- "Deprecates" the internal assumption that a frozen page has refcount=0
(and vice versa). Callers of page_ref_count() still see 0 for frozen
pages.
- Inserts VM_BUG_ON() checks in every refcount API function to prevent
following errnous behaviour:
page = alloc_frozen_page() // page is frozen
page_ref_inc(page, 1) // BUG: Increment on frozen page instead of init
- Renames _unless_zero() functions into _unless_frozen()
Reviewed-by: Artem Kuzin <artem.kuzin@huawei.com>
Co-developed-by: Gladyshev Ilya <ilya.gladyshev@linux.dev>
Signed-off-by: Gladyshev Ilya <ilya.gladyshev@linux.dev>
Signed-off-by: Gorbunov Ivan <ivgorbunov@me.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com> # p2pdma.c
---
drivers/pci/p2pdma.c | 4 ++--
include/linux/mm.h | 2 +-
include/linux/page_ref.h | 31 +++++++++++++++++++++++-------
kernel/liveupdate/kexec_handover.c | 6 +++---
lib/test_hmm.c | 4 ++--
mm/hugetlb.c | 2 +-
mm/internal.h | 2 +-
mm/memremap.c | 4 ++--
mm/mm_init.c | 6 +++---
mm/page_alloc.c | 4 ++--
10 files changed, 41 insertions(+), 24 deletions(-)
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 7c898542af8d..7aca5852dccc 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -148,7 +148,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
* using it.
*/
VM_WARN_ON_ONCE_PAGE(page_ref_count(page), page);
- set_page_count(page, 1);
+ init_page_count(page);
ret = vm_insert_page(vma, vaddr, page);
if (ret) {
gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
@@ -158,7 +158,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
* because we don't want to trigger the
* p2pdma_folio_free() path.
*/
- set_page_count(page, 0);
+ set_page_count_as_frozen(page);
percpu_ref_put(ref);
return ret;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 06bbe9eba636..34f83c5c2d24 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1780,7 +1780,7 @@ static inline int folio_put_testzero(struct folio *folio)
*/
static inline bool get_page_unless_zero(struct page *page)
{
- return page_ref_add_unless_zero(page, 1);
+ return page_ref_add_unless_frozen(page, 1);
}
static inline struct folio *folio_get_nontail_page(struct page *page)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 94d3f0e71c06..24b09c8fbb68 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -62,6 +62,11 @@ static inline void __page_ref_unfreeze(struct page *page, int v)
#endif
+static inline bool __page_count_is_frozen(int count)
+{
+ return count == 0;
+}
+
static inline int page_ref_count(const struct page *page)
{
return atomic_read(&page->_refcount);
@@ -101,9 +106,9 @@ static inline void set_page_count(struct page *page, int v)
__page_ref_set(page, v);
}
-static inline void folio_set_count(struct folio *folio, int v)
+static inline void folio_init_count(struct folio *folio)
{
- set_page_count(&folio->page, v);
+ set_page_count(&folio->page, 1);
}
/*
@@ -115,8 +120,14 @@ static inline void init_page_count(struct page *page)
set_page_count(page, 1);
}
+static inline void set_page_count_as_frozen(struct page *page)
+{
+ set_page_count(page, 0);
+}
+
static inline void page_ref_add(struct page *page, int nr)
{
+ VM_BUG_ON(__page_count_is_frozen(page_count(page)));
atomic_add(nr, &page->_refcount);
if (page_ref_tracepoint_active(page_ref_mod))
__page_ref_mod(page, nr);
@@ -129,6 +140,7 @@ static inline void folio_ref_add(struct folio *folio, int nr)
static inline void page_ref_sub(struct page *page, int nr)
{
+ VM_BUG_ON(__page_count_is_frozen(page_count(page)));
atomic_sub(nr, &page->_refcount);
if (page_ref_tracepoint_active(page_ref_mod))
__page_ref_mod(page, -nr);
@@ -142,6 +154,7 @@ static inline void folio_ref_sub(struct folio *folio, int nr)
static inline int folio_ref_sub_return(struct folio *folio, int nr)
{
int ret = atomic_sub_return(nr, &folio->_refcount);
+ VM_BUG_ON(__page_count_is_frozen(ret + nr));
if (page_ref_tracepoint_active(page_ref_mod_and_return))
__page_ref_mod_and_return(&folio->page, -nr, ret);
@@ -150,6 +163,7 @@ static inline int folio_ref_sub_return(struct folio *folio, int nr)
static inline void page_ref_inc(struct page *page)
{
+ VM_BUG_ON(__page_count_is_frozen(page_count(page)));
atomic_inc(&page->_refcount);
if (page_ref_tracepoint_active(page_ref_mod))
__page_ref_mod(page, 1);
@@ -162,6 +176,7 @@ static inline void folio_ref_inc(struct folio *folio)
static inline void page_ref_dec(struct page *page)
{
+ VM_BUG_ON(__page_count_is_frozen(page_count(page)));
atomic_dec(&page->_refcount);
if (page_ref_tracepoint_active(page_ref_mod))
__page_ref_mod(page, -1);
@@ -189,6 +204,7 @@ static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
static inline int page_ref_inc_return(struct page *page)
{
int ret = atomic_inc_return(&page->_refcount);
+ VM_BUG_ON(__page_count_is_frozen(ret - 1));
if (page_ref_tracepoint_active(page_ref_mod_and_return))
__page_ref_mod_and_return(page, 1, ret);
@@ -217,6 +233,7 @@ static inline int folio_ref_dec_and_test(struct folio *folio)
static inline int page_ref_dec_return(struct page *page)
{
int ret = atomic_dec_return(&page->_refcount);
+ VM_BUG_ON(__page_count_is_frozen(ret + 1));
if (page_ref_tracepoint_active(page_ref_mod_and_return))
__page_ref_mod_and_return(page, -1, ret);
@@ -228,7 +245,7 @@ static inline int folio_ref_dec_return(struct folio *folio)
return page_ref_dec_return(&folio->page);
}
-static inline bool page_ref_add_unless_zero(struct page *page, int nr)
+static inline bool page_ref_add_unless_frozen(struct page *page, int nr)
{
bool ret = atomic_add_unless(&page->_refcount, nr, 0);
@@ -237,9 +254,9 @@ static inline bool page_ref_add_unless_zero(struct page *page, int nr)
return ret;
}
-static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr)
+static inline bool folio_ref_add_unless_frozen(struct folio *folio, int nr)
{
- return page_ref_add_unless_zero(&folio->page, nr);
+ return page_ref_add_unless_frozen(&folio->page, nr);
}
/**
@@ -255,12 +272,12 @@ static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr)
*/
static inline bool folio_try_get(struct folio *folio)
{
- return folio_ref_add_unless_zero(folio, 1);
+ return folio_ref_add_unless_frozen(folio, 1);
}
static inline bool folio_ref_try_add(struct folio *folio, int count)
{
- return folio_ref_add_unless_zero(folio, count);
+ return folio_ref_add_unless_frozen(folio, count);
}
static inline int page_ref_freeze(struct page *page, int count)
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 1b592d86dc48..b397bdb30461 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -361,7 +361,7 @@ EXPORT_SYMBOL_GPL(kho_radix_walk_tree);
static void kho_init_pages(struct page *page, unsigned long nr_pages)
{
for (unsigned long i = 0; i < nr_pages; i++) {
- set_page_count(page + i, 1);
+ init_page_count(page + i);
/* Clear each page's codetag to avoid accounting mismatch. */
clear_page_tag_ref(page + i);
}
@@ -372,13 +372,13 @@ static void kho_init_folio(struct page *page, unsigned int order)
unsigned long nr_pages = (1 << order);
/* Head page gets refcount of 1. */
- set_page_count(page, 1);
+ init_page_count(page);
/* Clear head page's codetag to avoid accounting mismatch. */
clear_page_tag_ref(page);
/* For higher order folios, tail pages get a page count of zero. */
for (unsigned long i = 1; i < nr_pages; i++)
- set_page_count(page + i, 0);
+ set_page_count_as_frozen(page + i);
if (order > 0)
prep_compound_page(page, order);
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 213504915737..0cbcf9da4911 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1715,7 +1715,7 @@ static void dmirror_devmem_folio_split(struct folio *head, struct folio *tail)
if (tail == NULL) {
folio_reset_order(rfolio);
rfolio->mapping = NULL;
- folio_set_count(rfolio, 1);
+ folio_init_count(rfolio);
return;
}
@@ -1729,7 +1729,7 @@ static void dmirror_devmem_folio_split(struct folio *head, struct folio *tail)
folio_page(tail, 0)->mapping = folio_page(head, 0)->mapping;
tail->pgmap = head->pgmap;
- folio_set_count(page_folio(rpage_tail), 1);
+ folio_init_count(page_folio(rpage_tail));
}
static const struct dev_pagemap_ops dmirror_devmem_ops = {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4b80b167cc9c..9a5ecdc71c44 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3130,7 +3130,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
for (pfn = head_pfn + start_page_number; pfn < end_pfn; page++, pfn++) {
__init_single_page(page, pfn, zone, nid);
prep_compound_tail(page, &folio->page, order);
- set_page_count(page, 0);
+ set_page_count_as_frozen(page);
}
}
diff --git a/mm/internal.h b/mm/internal.h
index 5a2ddcf68e0b..3f2a91de8a80 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -595,7 +595,7 @@ static inline void set_page_refcounted(struct page *page)
{
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(page_ref_count(page), page);
- set_page_count(page, 1);
+ init_page_count(page);
}
static inline void set_pages_refcounted(struct page *page, unsigned long nr_pages)
diff --git a/mm/memremap.c b/mm/memremap.c
index 053842d45cb1..8025cc27b408 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -462,7 +462,7 @@ void free_zone_device_folio(struct folio *folio)
* Reset the refcount to 1 to prepare for handing out the page
* again.
*/
- folio_set_count(folio, 1);
+ folio_init_count(folio);
break;
case MEMORY_DEVICE_FS_DAX:
@@ -519,7 +519,7 @@ void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap,
* memunmap_pages().
*/
WARN_ON_ONCE(!percpu_ref_tryget_many(&page_pgmap(page)->ref, 1 << order));
- set_page_count(page, 1);
+ init_page_count(page);
lock_page(page);
if (order)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f9f8e1af921c..cb40f63084bc 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1040,7 +1040,7 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
case MEMORY_DEVICE_PRIVATE:
case MEMORY_DEVICE_COHERENT:
case MEMORY_DEVICE_PCI_P2PDMA:
- set_page_count(page, 0);
+ set_page_count_as_frozen(page);
break;
case MEMORY_DEVICE_GENERIC:
@@ -1086,7 +1086,7 @@ static void __ref memmap_init_compound(struct page *head,
__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
prep_compound_tail(page, head, order);
- set_page_count(page, 0);
+ set_page_count_as_frozen(page);
}
prep_compound_head(head, order);
}
@@ -2224,7 +2224,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
do {
__ClearPageReserved(p);
- set_page_count(p, 0);
+ set_page_count_as_frozen(p);
} while (++p, --i);
init_pageblock_migratetype(page, MIGRATE_CMA, false);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d49c254174da..617937e42b2e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1599,14 +1599,14 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
for (loop = 0; loop < nr_pages; loop++, p++) {
VM_WARN_ON_ONCE(PageReserved(p));
__ClearPageOffline(p);
- set_page_count(p, 0);
+ set_page_count_as_frozen(p);
}
adjust_managed_page_count(page, nr_pages);
} else {
for (loop = 0; loop < nr_pages; loop++, p++) {
__ClearPageReserved(p);
- set_page_count(p, 0);
+ set_page_count_as_frozen(p);
}
/* memblock adjusts totalram_pages() manually. */
--
2.43.0
next prev parent reply other threads:[~2026-06-04 10:15 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-04 10:13 [PATCH v3 0/2] mm: improve folio refcount scalability ilya.gladyshev
2026-06-04 10:15 ` ilya.gladyshev [this message]
2026-06-04 11:04 ` [PATCH v3 1/2] mm: drop page refcount zero state semantics Kiryl Shutsemau
2026-06-04 12:47 ` ilya.gladyshev
2026-06-04 10:15 ` [PATCH v3 2/2] mm: implement page refcount locking via dedicated bit ilya.gladyshev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7c28d766b007345f5f31ba9a086a3bffe95a013d@linux.dev \
--to=ilya.gladyshev@linux.dev \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=artem.kuzin@huawei.com \
--cc=baolin.wang@linux.alibaba.com \
--cc=david@kernel.org \
--cc=foxido@foxido.dev \
--cc=harry.yoo@oracle.com \
--cc=ivgorbunov@me.com \
--cc=kirill@shutemov.name \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mhocko@suse.com \
--cc=muchun.song@linux.dev \
--cc=pfalcato@suse.de \
--cc=rppt@kernel.org \
--cc=surenb@google.com \
--cc=torvalds@linuxfoundation.org \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=yuzhao@google.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox