Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: ilya.gladyshev@linux.dev
To: ilya.gladyshev@linux.dev
Cc: ivgorbunov@me.com, Liam.Howlett@oracle.com,
	akpm@linux-foundation.org, apopple@nvidia.com,
	artem.kuzin@huawei.com, baolin.wang@linux.alibaba.com,
	david@kernel.org, foxido@foxido.dev, harry.yoo@oracle.com,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	lorenzo.stoakes@oracle.com, mhocko@suse.com,
	muchun.song@linux.dev, rppt@kernel.org, surenb@google.com,
	torvalds@linuxfoundation.org, vbabka@suse.cz,
	willy@infradead.org, yuzhao@google.com, ziy@nvidia.com,
	pfalcato@suse.de, kirill@shutemov.name
Subject: [PATCH v3 1/2] mm: drop page refcount zero state semantics
Date: Thu, 04 Jun 2026 10:15:12 +0000	[thread overview]
Message-ID: <7c28d766b007345f5f31ba9a086a3bffe95a013d@linux.dev> (raw)
In-Reply-To: <5dabf3a748fee0c7b142c74367e7586f5db1ed1e@linux.dev>

From: Gorbunov Ivan <ivgorbunov@me.com>

Some call sites manipulate page refcount directly via
set_page_count() instead of using more direct API like set_frozen() /
init_refcount().

This conflicts with the next patch, which will stop treating zeroed
refcount as the indicator of a frozen page. To prepare for that change,
this patch:

- "Deprecates" the internal assumption that a frozen page has refcount=0
(and vice versa). Callers of page_ref_count() still see 0 for frozen
pages.

- Inserts VM_BUG_ON() checks in every refcount API function to prevent
  following errnous behaviour:

page = alloc_frozen_page() // page is frozen
page_ref_inc(page, 1) // BUG: Increment on frozen page instead of init

- Renames _unless_zero() functions into _unless_frozen()

Reviewed-by: Artem Kuzin <artem.kuzin@huawei.com>
Co-developed-by: Gladyshev Ilya <ilya.gladyshev@linux.dev>
Signed-off-by: Gladyshev Ilya <ilya.gladyshev@linux.dev>
Signed-off-by: Gorbunov Ivan <ivgorbunov@me.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com> # p2pdma.c
---
 drivers/pci/p2pdma.c               |  4 ++--
 include/linux/mm.h                 |  2 +-
 include/linux/page_ref.h           | 31 +++++++++++++++++++++++-------
 kernel/liveupdate/kexec_handover.c |  6 +++---
 lib/test_hmm.c                     |  4 ++--
 mm/hugetlb.c                       |  2 +-
 mm/internal.h                      |  2 +-
 mm/memremap.c                      |  4 ++--
 mm/mm_init.c                       |  6 +++---
 mm/page_alloc.c                    |  4 ++--
 10 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 7c898542af8d..7aca5852dccc 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -148,7 +148,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 		 * using it.
 		 */
 		VM_WARN_ON_ONCE_PAGE(page_ref_count(page), page);
-		set_page_count(page, 1);
+		init_page_count(page);
 		ret = vm_insert_page(vma, vaddr, page);
 		if (ret) {
 			gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
@@ -158,7 +158,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 			 * because we don't want to trigger the
 			 * p2pdma_folio_free() path.
 			 */
-			set_page_count(page, 0);
+			set_page_count_as_frozen(page);
 			percpu_ref_put(ref);
 			return ret;
 		}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 06bbe9eba636..34f83c5c2d24 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1780,7 +1780,7 @@ static inline int folio_put_testzero(struct folio *folio)
  */
 static inline bool get_page_unless_zero(struct page *page)
 {
-	return page_ref_add_unless_zero(page, 1);
+	return page_ref_add_unless_frozen(page, 1);
 }
 
 static inline struct folio *folio_get_nontail_page(struct page *page)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 94d3f0e71c06..24b09c8fbb68 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -62,6 +62,11 @@ static inline void __page_ref_unfreeze(struct page *page, int v)
 
 #endif
 
+static inline bool __page_count_is_frozen(int count)
+{
+	return count == 0;
+}
+
 static inline int page_ref_count(const struct page *page)
 {
 	return atomic_read(&page->_refcount);
@@ -101,9 +106,9 @@ static inline void set_page_count(struct page *page, int v)
 		__page_ref_set(page, v);
 }
 
-static inline void folio_set_count(struct folio *folio, int v)
+static inline void folio_init_count(struct folio *folio)
 {
-	set_page_count(&folio->page, v);
+	set_page_count(&folio->page, 1);
 }
 
 /*
@@ -115,8 +120,14 @@ static inline void init_page_count(struct page *page)
 	set_page_count(page, 1);
 }
 
+static inline void set_page_count_as_frozen(struct page *page)
+{
+	set_page_count(page, 0);
+}
+
 static inline void page_ref_add(struct page *page, int nr)
 {
+	VM_BUG_ON(__page_count_is_frozen(page_count(page)));
 	atomic_add(nr, &page->_refcount);
 	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, nr);
@@ -129,6 +140,7 @@ static inline void folio_ref_add(struct folio *folio, int nr)
 
 static inline void page_ref_sub(struct page *page, int nr)
 {
+	VM_BUG_ON(__page_count_is_frozen(page_count(page)));
 	atomic_sub(nr, &page->_refcount);
 	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, -nr);
@@ -142,6 +154,7 @@ static inline void folio_ref_sub(struct folio *folio, int nr)
 static inline int folio_ref_sub_return(struct folio *folio, int nr)
 {
 	int ret = atomic_sub_return(nr, &folio->_refcount);
+	VM_BUG_ON(__page_count_is_frozen(ret + nr));
 
 	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(&folio->page, -nr, ret);
@@ -150,6 +163,7 @@ static inline int folio_ref_sub_return(struct folio *folio, int nr)
 
 static inline void page_ref_inc(struct page *page)
 {
+	VM_BUG_ON(__page_count_is_frozen(page_count(page)));
 	atomic_inc(&page->_refcount);
 	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, 1);
@@ -162,6 +176,7 @@ static inline void folio_ref_inc(struct folio *folio)
 
 static inline void page_ref_dec(struct page *page)
 {
+	VM_BUG_ON(__page_count_is_frozen(page_count(page)));
 	atomic_dec(&page->_refcount);
 	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, -1);
@@ -189,6 +204,7 @@ static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
 static inline int page_ref_inc_return(struct page *page)
 {
 	int ret = atomic_inc_return(&page->_refcount);
+	VM_BUG_ON(__page_count_is_frozen(ret - 1));
 
 	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(page, 1, ret);
@@ -217,6 +233,7 @@ static inline int folio_ref_dec_and_test(struct folio *folio)
 static inline int page_ref_dec_return(struct page *page)
 {
 	int ret = atomic_dec_return(&page->_refcount);
+	VM_BUG_ON(__page_count_is_frozen(ret + 1));
 
 	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(page, -1, ret);
@@ -228,7 +245,7 @@ static inline int folio_ref_dec_return(struct folio *folio)
 	return page_ref_dec_return(&folio->page);
 }
 
-static inline bool page_ref_add_unless_zero(struct page *page, int nr)
+static inline bool page_ref_add_unless_frozen(struct page *page, int nr)
 {
 	bool ret = atomic_add_unless(&page->_refcount, nr, 0);
 
@@ -237,9 +254,9 @@ static inline bool page_ref_add_unless_zero(struct page *page, int nr)
 	return ret;
 }
 
-static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr)
+static inline bool folio_ref_add_unless_frozen(struct folio *folio, int nr)
 {
-	return page_ref_add_unless_zero(&folio->page, nr);
+	return page_ref_add_unless_frozen(&folio->page, nr);
 }
 
 /**
@@ -255,12 +272,12 @@ static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr)
  */
 static inline bool folio_try_get(struct folio *folio)
 {
-	return folio_ref_add_unless_zero(folio, 1);
+	return folio_ref_add_unless_frozen(folio, 1);
 }
 
 static inline bool folio_ref_try_add(struct folio *folio, int count)
 {
-	return folio_ref_add_unless_zero(folio, count);
+	return folio_ref_add_unless_frozen(folio, count);
 }
 
 static inline int page_ref_freeze(struct page *page, int count)
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 1b592d86dc48..b397bdb30461 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -361,7 +361,7 @@ EXPORT_SYMBOL_GPL(kho_radix_walk_tree);
 static void kho_init_pages(struct page *page, unsigned long nr_pages)
 {
 	for (unsigned long i = 0; i < nr_pages; i++) {
-		set_page_count(page + i, 1);
+		init_page_count(page + i);
 		/* Clear each page's codetag to avoid accounting mismatch. */
 		clear_page_tag_ref(page + i);
 	}
@@ -372,13 +372,13 @@ static void kho_init_folio(struct page *page, unsigned int order)
 	unsigned long nr_pages = (1 << order);
 
 	/* Head page gets refcount of 1. */
-	set_page_count(page, 1);
+	init_page_count(page);
 	/* Clear head page's codetag to avoid accounting mismatch. */
 	clear_page_tag_ref(page);
 
 	/* For higher order folios, tail pages get a page count of zero. */
 	for (unsigned long i = 1; i < nr_pages; i++)
-		set_page_count(page + i, 0);
+		set_page_count_as_frozen(page + i);
 
 	if (order > 0)
 		prep_compound_page(page, order);
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 213504915737..0cbcf9da4911 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1715,7 +1715,7 @@ static void dmirror_devmem_folio_split(struct folio *head, struct folio *tail)
 	if (tail == NULL) {
 		folio_reset_order(rfolio);
 		rfolio->mapping = NULL;
-		folio_set_count(rfolio, 1);
+		folio_init_count(rfolio);
 		return;
 	}
 
@@ -1729,7 +1729,7 @@ static void dmirror_devmem_folio_split(struct folio *head, struct folio *tail)
 
 	folio_page(tail, 0)->mapping = folio_page(head, 0)->mapping;
 	tail->pgmap = head->pgmap;
-	folio_set_count(page_folio(rpage_tail), 1);
+	folio_init_count(page_folio(rpage_tail));
 }
 
 static const struct dev_pagemap_ops dmirror_devmem_ops = {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4b80b167cc9c..9a5ecdc71c44 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3130,7 +3130,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
 	for (pfn = head_pfn + start_page_number; pfn < end_pfn; page++, pfn++) {
 		__init_single_page(page, pfn, zone, nid);
 		prep_compound_tail(page, &folio->page, order);
-		set_page_count(page, 0);
+		set_page_count_as_frozen(page);
 	}
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index 5a2ddcf68e0b..3f2a91de8a80 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -595,7 +595,7 @@ static inline void set_page_refcounted(struct page *page)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(page_ref_count(page), page);
-	set_page_count(page, 1);
+	init_page_count(page);
 }
 
 static inline void set_pages_refcounted(struct page *page, unsigned long nr_pages)
diff --git a/mm/memremap.c b/mm/memremap.c
index 053842d45cb1..8025cc27b408 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -462,7 +462,7 @@ void free_zone_device_folio(struct folio *folio)
 		 * Reset the refcount to 1 to prepare for handing out the page
 		 * again.
 		 */
-		folio_set_count(folio, 1);
+		folio_init_count(folio);
 		break;
 
 	case MEMORY_DEVICE_FS_DAX:
@@ -519,7 +519,7 @@ void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap,
 	 * memunmap_pages().
 	 */
 	WARN_ON_ONCE(!percpu_ref_tryget_many(&page_pgmap(page)->ref, 1 << order));
-	set_page_count(page, 1);
+	init_page_count(page);
 	lock_page(page);
 
 	if (order)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f9f8e1af921c..cb40f63084bc 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1040,7 +1040,7 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
 	case MEMORY_DEVICE_PRIVATE:
 	case MEMORY_DEVICE_COHERENT:
 	case MEMORY_DEVICE_PCI_P2PDMA:
-		set_page_count(page, 0);
+		set_page_count_as_frozen(page);
 		break;
 
 	case MEMORY_DEVICE_GENERIC:
@@ -1086,7 +1086,7 @@ static void __ref memmap_init_compound(struct page *head,
 
 		__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
 		prep_compound_tail(page, head, order);
-		set_page_count(page, 0);
+		set_page_count_as_frozen(page);
 	}
 	prep_compound_head(head, order);
 }
@@ -2224,7 +2224,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
 
 	do {
 		__ClearPageReserved(p);
-		set_page_count(p, 0);
+		set_page_count_as_frozen(p);
 	} while (++p, --i);
 
 	init_pageblock_migratetype(page, MIGRATE_CMA, false);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d49c254174da..617937e42b2e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1599,14 +1599,14 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
 		for (loop = 0; loop < nr_pages; loop++, p++) {
 			VM_WARN_ON_ONCE(PageReserved(p));
 			__ClearPageOffline(p);
-			set_page_count(p, 0);
+			set_page_count_as_frozen(p);
 		}
 
 		adjust_managed_page_count(page, nr_pages);
 	} else {
 		for (loop = 0; loop < nr_pages; loop++, p++) {
 			__ClearPageReserved(p);
-			set_page_count(p, 0);
+			set_page_count_as_frozen(p);
 		}
 
 		/* memblock adjusts totalram_pages() manually. */
-- 
2.43.0


  reply	other threads:[~2026-06-04 10:15 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04 10:13 [PATCH v3 0/2] mm: improve folio refcount scalability ilya.gladyshev
2026-06-04 10:15 ` ilya.gladyshev [this message]
2026-06-04 11:04   ` [PATCH v3 1/2] mm: drop page refcount zero state semantics Kiryl Shutsemau
2026-06-04 12:47     ` ilya.gladyshev
2026-06-04 10:15 ` [PATCH v3 2/2] mm: implement page refcount locking via dedicated bit ilya.gladyshev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7c28d766b007345f5f31ba9a086a3bffe95a013d@linux.dev \
    --to=ilya.gladyshev@linux.dev \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=artem.kuzin@huawei.com \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=david@kernel.org \
    --cc=foxido@foxido.dev \
    --cc=harry.yoo@oracle.com \
    --cc=ivgorbunov@me.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=pfalcato@suse.de \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=torvalds@linuxfoundation.org \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=yuzhao@google.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox