All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Gladyshev Ilya" <ilya.gladyshev@linux.dev>
To: "Gladyshev Ilya" <ilya.gladyshev@linux.dev>
Cc: "Linus Torvalds" <torvalds@linuxfoundation.org>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	ivgorbunov@me.com, Liam.Howlett@oracle.com, apopple@nvidia.com,
	artem.kuzin@huawei.com, baolin.wang@linux.alibaba.com,
	foxido@foxido.dev, harry.yoo@oracle.com,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	lorenzo.stoakes@oracle.com, mhocko@suse.com,
	muchun.song@linux.dev, rppt@kernel.org, surenb@google.com,
	vbabka@suse.cz, yuzhao@google.com, ziy@nvidia.com,
	pfalcato@suse.de, kirill@shutemov.name
Subject: [PATCH v5 1/2] mm: drop page refcount zero state semantics
Date: Fri, 26 Jun 2026 18:46:02 +0000	[thread overview]
Message-ID: <3fbebdd9b59c2fa39cc7c86a2af13e96b95ff0c8@linux.dev> (raw)
In-Reply-To: <bb1cb750157733f897df2e261cad3d5c42acc172@linux.dev>

From: Gorbunov Ivan <ivgorbunov@me.com>

Some call sites manipulate page refcount directly via
set_page_count() instead of using more direct API like set_frozen() /
init_refcount().

This conflicts with the next patch, which will stop treating zeroed
refcount as the indicator of a frozen page. To prepare for that change,
this patch:

- "Deprecates" the internal assumption that a frozen page has refcount=0
(and vice versa). Callers of page_ref_count() still see 0 for frozen
pages.

- Inserts VM_BUG_ON() checks in every refcount API function to prevent
  following errnous behaviour:

page = alloc_frozen_page() // page is frozen
page_ref_inc(page, 1) // BUG: Increment on frozen page instead of init

- Renames _unless_zero() functions into _unless_frozen()

Reviewed-by: Artem Kuzin <artem.kuzin@huawei.com>
Co-developed-by: Gladyshev Ilya <ilya.gladyshev@linux.dev>
Signed-off-by: Gladyshev Ilya <ilya.gladyshev@linux.dev>
Signed-off-by: Gorbunov Ivan <ivgorbunov@me.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com> # p2pdma.c
---
 drivers/pci/p2pdma.c               |  4 ++--
 drivers/virtio/virtio_mem.c        |  2 +-
 include/linux/mm.h                 |  2 +-
 include/linux/page_ref.h           | 38 ++++++++++++++++++++++++------
 kernel/liveupdate/kexec_handover.c |  6 ++---
 lib/test_hmm.c                     |  4 ++--
 mm/hugetlb.c                       |  2 +-
 mm/internal.h                      |  2 +-
 mm/memremap.c                      |  4 ++--
 mm/mm_init.c                       |  6 ++---
 mm/page_alloc.c                    |  4 ++--
 11 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index b2d5266f8653..fca9a7a7759e 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -148,7 +148,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 		 * using it.
 		 */
 		VM_WARN_ON_ONCE_PAGE(page_ref_count(page), page);
-		set_page_count(page, 1);
+		init_page_count(page);
 		ret = vm_insert_page(vma, vaddr, page);
 		if (ret) {
 			gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
@@ -158,7 +158,7 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 			 * because we don't want to trigger the
 			 * p2pdma_folio_free() path.
 			 */
-			set_page_count(page, 0);
+			set_page_count_frozen(page);
 			percpu_ref_put(ref);
 			return ret;
 		}
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 11c441501582..6abaf984915a 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -1293,7 +1293,7 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
 	 * when going offline.
 	 */
 	for (i = 0; i < nr_pages; i++)
-		page_ref_inc(pfn_to_page(pfn + i));
+		init_page_count(pfn_to_page(pfn + i));
 }
 
 static void virtio_mem_online_page(struct virtio_mem *vm,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 485df9c2dbdd..9e87095fd032 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1824,7 +1824,7 @@ static inline int folio_put_testzero(struct folio *folio)
  */
 static inline bool get_page_unless_zero(struct page *page)
 {
-	return page_ref_add_unless_zero(page, 1);
+	return page_ref_add_unless_frozen(page, 1);
 }
 
 static inline struct folio *folio_get_nontail_page(struct page *page)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 9f5c75d06f76..61a2c9b9b73c 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -62,6 +62,16 @@ static inline void __page_ref_unfreeze(struct page *page, int v)
 
 #endif
 
+static inline bool __page_count_is_frozen(int count)
+{
+	return count == 0;
+}
+
+static inline bool __page_is_frozen(const struct page *page)
+{
+	return __page_count_is_frozen(atomic_read(&page->_refcount));
+}
+
 static inline int page_ref_count(const struct page *page)
 {
 	return atomic_read(&page->_refcount);
@@ -119,9 +129,9 @@ static inline void set_page_count(struct page *page, int v)
 		__page_ref_set(page, v);
 }
 
-static inline void folio_set_count(struct folio *folio, int v)
+static inline void folio_init_count(struct folio *folio)
 {
-	set_page_count(&folio->page, v);
+	set_page_count(&folio->page, 1);
 }
 
 /*
@@ -133,8 +143,14 @@ static inline void init_page_count(struct page *page)
 	set_page_count(page, 1);
 }
 
+static inline void set_page_count_frozen(struct page *page)
+{
+	set_page_count(page, 0);
+}
+
 static inline void page_ref_add(struct page *page, int nr)
 {
+	VM_WARN_ON_ONCE_PAGE(__page_is_frozen(page), page);
 	atomic_add(nr, &page->_refcount);
 	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, nr);
@@ -147,6 +163,7 @@ static inline void folio_ref_add(struct folio *folio, int nr)
 
 static inline void page_ref_sub(struct page *page, int nr)
 {
+	VM_WARN_ON_ONCE_PAGE(__page_is_frozen(page), page);
 	atomic_sub(nr, &page->_refcount);
 	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, -nr);
@@ -160,6 +177,7 @@ static inline void folio_ref_sub(struct folio *folio, int nr)
 static inline int folio_ref_sub_return(struct folio *folio, int nr)
 {
 	int ret = atomic_sub_return(nr, &folio->_refcount);
+	VM_WARN_ON_ONCE_FOLIO(__page_count_is_frozen(ret + nr), folio);
 
 	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(&folio->page, -nr, ret);
@@ -168,6 +186,7 @@ static inline int folio_ref_sub_return(struct folio *folio, int nr)
 
 static inline void page_ref_inc(struct page *page)
 {
+	VM_WARN_ON_ONCE_PAGE(__page_is_frozen(page), page);
 	atomic_inc(&page->_refcount);
 	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, 1);
@@ -180,6 +199,7 @@ static inline void folio_ref_inc(struct folio *folio)
 
 static inline void page_ref_dec(struct page *page)
 {
+	VM_WARN_ON_ONCE_PAGE(__page_is_frozen(page), page);
 	atomic_dec(&page->_refcount);
 	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, -1);
@@ -193,6 +213,7 @@ static inline void folio_ref_dec(struct folio *folio)
 static inline int page_ref_sub_and_test(struct page *page, int nr)
 {
 	int ret = atomic_sub_and_test(nr, &page->_refcount);
+	VM_WARN_ON_ONCE_PAGE(__page_count_is_frozen(ret + nr), page);
 
 	if (page_ref_tracepoint_active(page_ref_mod_and_test))
 		__page_ref_mod_and_test(page, -nr, ret);
@@ -207,6 +228,7 @@ static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
 static inline int page_ref_inc_return(struct page *page)
 {
 	int ret = atomic_inc_return(&page->_refcount);
+	VM_WARN_ON_ONCE_PAGE(__page_count_is_frozen(ret - 1), page);
 
 	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(page, 1, ret);
@@ -221,6 +243,7 @@ static inline int folio_ref_inc_return(struct folio *folio)
 static inline int page_ref_dec_and_test(struct page *page)
 {
 	int ret = atomic_dec_and_test(&page->_refcount);
+	VM_WARN_ON_ONCE_PAGE(__page_count_is_frozen(ret + 1), page);
 
 	if (page_ref_tracepoint_active(page_ref_mod_and_test))
 		__page_ref_mod_and_test(page, -1, ret);
@@ -235,6 +258,7 @@ static inline int folio_ref_dec_and_test(struct folio *folio)
 static inline int page_ref_dec_return(struct page *page)
 {
 	int ret = atomic_dec_return(&page->_refcount);
+	VM_WARN_ON_ONCE_PAGE(__page_count_is_frozen(ret + 1), page);
 
 	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(page, -1, ret);
@@ -246,7 +270,7 @@ static inline int folio_ref_dec_return(struct folio *folio)
 	return page_ref_dec_return(&folio->page);
 }
 
-static inline bool page_ref_add_unless_zero(struct page *page, int nr)
+static inline bool page_ref_add_unless_frozen(struct page *page, int nr)
 {
 	bool ret = atomic_add_unless(&page->_refcount, nr, 0);
 
@@ -255,9 +279,9 @@ static inline bool page_ref_add_unless_zero(struct page *page, int nr)
 	return ret;
 }
 
-static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr)
+static inline bool folio_ref_add_unless_frozen(struct folio *folio, int nr)
 {
-	return page_ref_add_unless_zero(&folio->page, nr);
+	return page_ref_add_unless_frozen(&folio->page, nr);
 }
 
 /**
@@ -273,12 +297,12 @@ static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr)
  */
 static inline bool folio_try_get(struct folio *folio)
 {
-	return folio_ref_add_unless_zero(folio, 1);
+	return folio_ref_add_unless_frozen(folio, 1);
 }
 
 static inline bool folio_ref_try_add(struct folio *folio, int count)
 {
-	return folio_ref_add_unless_zero(folio, count);
+	return folio_ref_add_unless_frozen(folio, count);
 }
 
 static inline int page_ref_freeze(struct page *page, int count)
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 4834a809985a..1f2ed4837d60 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -361,7 +361,7 @@ EXPORT_SYMBOL_GPL(kho_radix_walk_tree);
 static void kho_init_pages(struct page *page, unsigned long nr_pages)
 {
 	for (unsigned long i = 0; i < nr_pages; i++) {
-		set_page_count(page + i, 1);
+		init_page_count(page + i);
 		/* Clear each page's codetag to avoid accounting mismatch. */
 		clear_page_tag_ref(page + i);
 	}
@@ -372,13 +372,13 @@ static void kho_init_folio(struct page *page, unsigned int order)
 	unsigned long nr_pages = (1 << order);
 
 	/* Head page gets refcount of 1. */
-	set_page_count(page, 1);
+	init_page_count(page);
 	/* Clear head page's codetag to avoid accounting mismatch. */
 	clear_page_tag_ref(page);
 
 	/* For higher order folios, tail pages get a page count of zero. */
 	for (unsigned long i = 1; i < nr_pages; i++)
-		set_page_count(page + i, 0);
+		set_page_count_frozen(page + i);
 
 	if (order > 0)
 		prep_compound_page(page, order);
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 9c59d1ceb5b5..a033bf73d6be 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1746,7 +1746,7 @@ static void dmirror_devmem_folio_split(struct folio *head, struct folio *tail)
 	if (tail == NULL) {
 		folio_reset_order(rfolio);
 		rfolio->mapping = NULL;
-		folio_set_count(rfolio, 1);
+		folio_init_count(rfolio);
 		return;
 	}
 
@@ -1760,7 +1760,7 @@ static void dmirror_devmem_folio_split(struct folio *head, struct folio *tail)
 
 	folio_page(tail, 0)->mapping = folio_page(head, 0)->mapping;
 	tail->pgmap = head->pgmap;
-	folio_set_count(page_folio(rpage_tail), 1);
+	folio_init_count(page_folio(rpage_tail));
 }
 
 static const struct dev_pagemap_ops dmirror_devmem_ops = {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 571212b80835..622e5fed3d17 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3134,7 +3134,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
 	for (pfn = head_pfn + start_page_number; pfn < end_pfn; page++, pfn++) {
 		__init_single_page(page, pfn, zone, nid);
 		prep_compound_tail(page, &folio->page, order);
-		set_page_count(page, 0);
+		set_page_count_frozen(page);
 	}
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index 181e79f1d6a2..6895c6500805 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -586,7 +586,7 @@ static inline void set_page_refcounted(struct page *page)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(page_ref_count(page), page);
-	set_page_count(page, 1);
+	init_page_count(page);
 }
 
 static inline void set_pages_refcounted(struct page *page, unsigned long nr_pages)
diff --git a/mm/memremap.c b/mm/memremap.c
index 81766d822400..9ffb006bc307 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -462,7 +462,7 @@ void free_zone_device_folio(struct folio *folio)
 		 * Reset the refcount to 1 to prepare for handing out the page
 		 * again.
 		 */
-		folio_set_count(folio, 1);
+		folio_init_count(folio);
 		break;
 
 	case MEMORY_DEVICE_FS_DAX:
@@ -519,7 +519,7 @@ void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap,
 	 * memunmap_pages().
 	 */
 	WARN_ON_ONCE(!percpu_ref_tryget_many(&page_pgmap(page)->ref, 1 << order));
-	set_page_count(page, 1);
+	init_page_count(page);
 	lock_page(page);
 
 	if (order)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 0f64909e8d20..5b6da9d21dfa 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1043,7 +1043,7 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
 	case MEMORY_DEVICE_PRIVATE:
 	case MEMORY_DEVICE_COHERENT:
 	case MEMORY_DEVICE_PCI_P2PDMA:
-		set_page_count(page, 0);
+		set_page_count_frozen(page);
 		break;
 
 	case MEMORY_DEVICE_GENERIC:
@@ -1096,7 +1096,7 @@ static void __ref memmap_init_compound(struct page *head,
 
 		__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
 		prep_compound_tail(page, head, order);
-		set_page_count(page, 0);
+		set_page_count_frozen(page);
 	}
 	prep_compound_head(head, order);
 }
@@ -2227,7 +2227,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
 
 	do {
 		__ClearPageReserved(p);
-		set_page_count(p, 0);
+		set_page_count_frozen(p);
 	} while (++p, --i);
 
 	init_pageblock_migratetype(page, MIGRATE_CMA, false);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ee902a468c2f..5db92c51a0d5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1605,14 +1605,14 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
 		for (loop = 0; loop < nr_pages; loop++, p++) {
 			VM_WARN_ON_ONCE(PageReserved(p));
 			__ClearPageOffline(p);
-			set_page_count(p, 0);
+			set_page_count_frozen(p);
 		}
 
 		adjust_managed_page_count(page, nr_pages);
 	} else {
 		for (loop = 0; loop < nr_pages; loop++, p++) {
 			__ClearPageReserved(p);
-			set_page_count(p, 0);
+			set_page_count_frozen(p);
 		}
 
 		/* memblock adjusts totalram_pages() manually. */
-- 
2.54.0


  reply	other threads:[~2026-06-26 18:46 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-26 18:44 [PATCH v5 0/2] mm: improve folio refcount scalability Gladyshev Ilya
2026-06-26 18:46 ` Gladyshev Ilya [this message]
2026-06-26 18:46 ` [PATCH v5 2/2] mm: implement page refcount locking via dedicated bit Gladyshev Ilya
2026-06-27  1:15 ` [PATCH v5 0/2] mm: improve folio refcount scalability Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3fbebdd9b59c2fa39cc7c86a2af13e96b95ff0c8@linux.dev \
    --to=ilya.gladyshev@linux.dev \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=artem.kuzin@huawei.com \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=foxido@foxido.dev \
    --cc=harry.yoo@oracle.com \
    --cc=ivgorbunov@me.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=pfalcato@suse.de \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=torvalds@linuxfoundation.org \
    --cc=vbabka@suse.cz \
    --cc=yuzhao@google.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.