All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm/huge_memory.c: introduce folio_split_unmapped
@ 2025-11-14  1:22 Balbir Singh
  2025-11-14  2:11 ` Zi Yan
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Balbir Singh @ 2025-11-14  1:22 UTC (permalink / raw)
  To: linux-kernel, linux-mm, dri-devel
  Cc: Balbir Singh, Andrew Morton, David Hildenbrand, Zi Yan,
	Joshua Hahn, Rakie Kim, Byungchul Park, Gregory Price, Ying Huang,
	Alistair Popple, Oscar Salvador, Lorenzo Stoakes, Baolin Wang,
	Liam R. Howlett, Nico Pache, Ryan Roberts, Dev Jain, Barry Song,
	Lyude Paul, Danilo Krummrich, David Airlie, Simona Vetter,
	Ralph Campbell, Mika Penttilä, Matthew Brost,
	Francois Dugast

Unmapped was added as a parameter to __folio_split() and related
call sites to support splitting of folios already in the midst
of a migration. This special case arose for device private folio
migration since during migration there could be a disconnect between
source and destination on the folio size.

Introduce folio_split_unmapped() to handle this special case. Also
refactor code and add __folio_freeze_and_split_unmapped() helper that
is common to both __folio_split() and folio_split_unmapped().

This in turn removes the special casing introduced by the unmapped
parameter in __folio_split().

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>

Suggested-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
---
 include/linux/huge_mm.h |   5 +-
 mm/huge_memory.c        | 332 ++++++++++++++++++++++------------------
 mm/migrate_device.c     |   3 +-
 3 files changed, 191 insertions(+), 149 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e2e91aa1a042..1d439de1ca2c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -371,7 +371,8 @@ enum split_type {
 
 bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
 int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
-		unsigned int new_order, bool unmapped);
+		unsigned int new_order);
+int folio_split_unmapped(struct folio *folio, unsigned int new_order);
 int min_order_for_split(struct folio *folio);
 int split_folio_to_list(struct folio *folio, struct list_head *list);
 bool folio_split_supported(struct folio *folio, unsigned int new_order,
@@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
 static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		unsigned int new_order)
 {
-	return __split_huge_page_to_list_to_order(page, list, new_order, false);
+	return __split_huge_page_to_list_to_order(page, list, new_order);
 }
 static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e4dadcb9e90b..afc10079724c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3739,6 +3739,151 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
 	return true;
 }
 
+static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int new_order,
+					     struct page *split_at, struct xa_state *xas,
+					     struct address_space *mapping, bool do_lru,
+					     struct list_head *list, enum split_type split_type,
+					     pgoff_t end, int extra_pins)
+{
+	struct folio *end_folio = folio_next(folio);
+	struct folio *new_folio, *next;
+	int old_order = folio_order(folio);
+	int nr_shmem_dropped = 0;
+	int ret = 0;
+	struct deferred_split *ds_queue;
+
+	/* Prevent deferred_split_scan() touching ->_refcount */
+	ds_queue = folio_split_queue_lock(folio);
+	if (folio_ref_freeze(folio, 1 + extra_pins)) {
+		struct swap_cluster_info *ci = NULL;
+		struct lruvec *lruvec;
+		int expected_refs;
+
+		if (old_order > 1) {
+			if (!list_empty(&folio->_deferred_list)) {
+				ds_queue->split_queue_len--;
+				/*
+				 * Reinitialize page_deferred_list after removing the
+				 * page from the split_queue, otherwise a subsequent
+				 * split will see list corruption when checking the
+				 * page_deferred_list.
+				 */
+				list_del_init(&folio->_deferred_list);
+			}
+			if (folio_test_partially_mapped(folio)) {
+				folio_clear_partially_mapped(folio);
+				mod_mthp_stat(old_order,
+					MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
+			}
+		}
+		split_queue_unlock(ds_queue);
+		if (mapping) {
+			int nr = folio_nr_pages(folio);
+
+			if (folio_test_pmd_mappable(folio) &&
+			    new_order < HPAGE_PMD_ORDER) {
+				if (folio_test_swapbacked(folio)) {
+					__lruvec_stat_mod_folio(folio,
+							NR_SHMEM_THPS, -nr);
+				} else {
+					__lruvec_stat_mod_folio(folio,
+							NR_FILE_THPS, -nr);
+					filemap_nr_thps_dec(mapping);
+				}
+			}
+		}
+
+		if (folio_test_swapcache(folio)) {
+			if (mapping) {
+				VM_WARN_ON_ONCE_FOLIO(mapping, folio);
+				return -EINVAL;
+			}
+
+			ci = swap_cluster_get_and_lock(folio);
+		}
+
+		/* lock lru list/PageCompound, ref frozen by page_ref_freeze */
+		if (do_lru)
+			lruvec = folio_lruvec_lock(folio);
+
+		ret = __split_unmapped_folio(folio, new_order, split_at, xas,
+					     mapping, split_type);
+
+		/*
+		 * Unfreeze after-split folios and put them back to the right
+		 * list. @folio should be kept frozon until page cache
+		 * entries are updated with all the other after-split folios
+		 * to prevent others seeing stale page cache entries.
+		 * As a result, new_folio starts from the next folio of
+		 * @folio.
+		 */
+		for (new_folio = folio_next(folio); new_folio != end_folio;
+		     new_folio = next) {
+			unsigned long nr_pages = folio_nr_pages(new_folio);
+
+			next = folio_next(new_folio);
+
+			zone_device_private_split_cb(folio, new_folio);
+
+			expected_refs = folio_expected_ref_count(new_folio) + 1;
+			folio_ref_unfreeze(new_folio, expected_refs);
+
+			if (do_lru)
+				lru_add_split_folio(folio, new_folio, lruvec, list);
+
+			/*
+			 * Anonymous folio with swap cache.
+			 * NOTE: shmem in swap cache is not supported yet.
+			 */
+			if (ci) {
+				__swap_cache_replace_folio(ci, folio, new_folio);
+				continue;
+			}
+
+			/* Anonymous folio without swap cache */
+			if (!mapping)
+				continue;
+
+			/* Add the new folio to the page cache. */
+			if (new_folio->index < end) {
+				__xa_store(&mapping->i_pages, new_folio->index,
+					   new_folio, 0);
+				continue;
+			}
+
+			/* Drop folio beyond EOF: ->index >= end */
+			if (shmem_mapping(mapping))
+				nr_shmem_dropped += nr_pages;
+			else if (folio_test_clear_dirty(new_folio))
+				folio_account_cleaned(
+					new_folio, inode_to_wb(mapping->host));
+			__filemap_remove_folio(new_folio, NULL);
+			folio_put_refs(new_folio, nr_pages);
+		}
+
+		zone_device_private_split_cb(folio, NULL);
+		/*
+		 * Unfreeze @folio only after all page cache entries, which
+		 * used to point to it, have been updated with new folios.
+		 * Otherwise, a parallel folio_try_get() can grab @folio
+		 * and its caller can see stale page cache entries.
+		 */
+		expected_refs = folio_expected_ref_count(folio) + 1;
+		folio_ref_unfreeze(folio, expected_refs);
+
+		if (do_lru)
+			unlock_page_lruvec(lruvec);
+
+		if (ci)
+			swap_cluster_unlock(ci);
+	} else {
+		split_queue_unlock(ds_queue);
+		return -EAGAIN;
+	}
+
+	return ret;
+}
+
 /**
  * __folio_split() - split a folio at @split_at to a @new_order folio
  * @folio: folio to split
@@ -3747,7 +3892,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
  * @lock_at: a page within @folio to be left locked to caller
  * @list: after-split folios will be put on it if non NULL
  * @split_type: perform uniform split or not (non-uniform split)
- * @unmapped: The pages are already unmapped, they are migration entries.
  *
  * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
  * It is in charge of checking whether the split is supported or not and
@@ -3763,9 +3907,8 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
  */
 static int __folio_split(struct folio *folio, unsigned int new_order,
 		struct page *split_at, struct page *lock_at,
-		struct list_head *list, enum split_type split_type, bool unmapped)
+		struct list_head *list, enum split_type split_type)
 {
-	struct deferred_split *ds_queue;
 	XA_STATE(xas, &folio->mapping->i_pages, folio->index);
 	struct folio *end_folio = folio_next(folio);
 	bool is_anon = folio_test_anon(folio);
@@ -3809,14 +3952,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
 		 * is taken to serialise against parallel split or collapse
 		 * operations.
 		 */
-		if (!unmapped) {
-			anon_vma = folio_get_anon_vma(folio);
-			if (!anon_vma) {
-				ret = -EBUSY;
-				goto out;
-			}
-			anon_vma_lock_write(anon_vma);
+		anon_vma = folio_get_anon_vma(folio);
+		if (!anon_vma) {
+			ret = -EBUSY;
+			goto out;
 		}
+		anon_vma_lock_write(anon_vma);
 		mapping = NULL;
 	} else {
 		unsigned int min_order;
@@ -3882,8 +4023,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
 		goto out_unlock;
 	}
 
-	if (!unmapped)
-		unmap_folio(folio);
+	unmap_folio(folio);
 
 	/* block interrupt reentry in xa_lock and spinlock */
 	local_irq_disable();
@@ -3900,142 +4040,14 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
 		}
 	}
 
-	/* Prevent deferred_split_scan() touching ->_refcount */
-	ds_queue = folio_split_queue_lock(folio);
-	if (folio_ref_freeze(folio, 1 + extra_pins)) {
-		struct swap_cluster_info *ci = NULL;
-		struct lruvec *lruvec;
-		int expected_refs;
-
-		if (old_order > 1) {
-			if (!list_empty(&folio->_deferred_list)) {
-				ds_queue->split_queue_len--;
-				/*
-				 * Reinitialize page_deferred_list after removing the
-				 * page from the split_queue, otherwise a subsequent
-				 * split will see list corruption when checking the
-				 * page_deferred_list.
-				 */
-				list_del_init(&folio->_deferred_list);
-			}
-			if (folio_test_partially_mapped(folio)) {
-				folio_clear_partially_mapped(folio);
-				mod_mthp_stat(old_order,
-					MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
-			}
-		}
-		split_queue_unlock(ds_queue);
-		if (mapping) {
-			int nr = folio_nr_pages(folio);
-
-			if (folio_test_pmd_mappable(folio) &&
-			    new_order < HPAGE_PMD_ORDER) {
-				if (folio_test_swapbacked(folio)) {
-					__lruvec_stat_mod_folio(folio,
-							NR_SHMEM_THPS, -nr);
-				} else {
-					__lruvec_stat_mod_folio(folio,
-							NR_FILE_THPS, -nr);
-					filemap_nr_thps_dec(mapping);
-				}
-			}
-		}
-
-		if (folio_test_swapcache(folio)) {
-			if (mapping) {
-				VM_WARN_ON_ONCE_FOLIO(mapping, folio);
-				ret = -EINVAL;
-				goto fail;
-			}
-
-			ci = swap_cluster_get_and_lock(folio);
-		}
-
-		/* lock lru list/PageCompound, ref frozen by page_ref_freeze */
-		lruvec = folio_lruvec_lock(folio);
-
-		ret = __split_unmapped_folio(folio, new_order, split_at, &xas,
-					     mapping, split_type);
-
-		/*
-		 * Unfreeze after-split folios and put them back to the right
-		 * list. @folio should be kept frozon until page cache
-		 * entries are updated with all the other after-split folios
-		 * to prevent others seeing stale page cache entries.
-		 * As a result, new_folio starts from the next folio of
-		 * @folio.
-		 */
-		for (new_folio = folio_next(folio); new_folio != end_folio;
-		     new_folio = next) {
-			unsigned long nr_pages = folio_nr_pages(new_folio);
-
-			next = folio_next(new_folio);
-
-			zone_device_private_split_cb(folio, new_folio);
-
-			expected_refs = folio_expected_ref_count(new_folio) + 1;
-			folio_ref_unfreeze(new_folio, expected_refs);
-
-			if (!unmapped)
-				lru_add_split_folio(folio, new_folio, lruvec, list);
-
-			/*
-			 * Anonymous folio with swap cache.
-			 * NOTE: shmem in swap cache is not supported yet.
-			 */
-			if (ci) {
-				__swap_cache_replace_folio(ci, folio, new_folio);
-				continue;
-			}
-
-			/* Anonymous folio without swap cache */
-			if (!mapping)
-				continue;
-
-			/* Add the new folio to the page cache. */
-			if (new_folio->index < end) {
-				__xa_store(&mapping->i_pages, new_folio->index,
-					   new_folio, 0);
-				continue;
-			}
-
-			/* Drop folio beyond EOF: ->index >= end */
-			if (shmem_mapping(mapping))
-				nr_shmem_dropped += nr_pages;
-			else if (folio_test_clear_dirty(new_folio))
-				folio_account_cleaned(
-					new_folio, inode_to_wb(mapping->host));
-			__filemap_remove_folio(new_folio, NULL);
-			folio_put_refs(new_folio, nr_pages);
-		}
-
-		zone_device_private_split_cb(folio, NULL);
-		/*
-		 * Unfreeze @folio only after all page cache entries, which
-		 * used to point to it, have been updated with new folios.
-		 * Otherwise, a parallel folio_try_get() can grab @folio
-		 * and its caller can see stale page cache entries.
-		 */
-		expected_refs = folio_expected_ref_count(folio) + 1;
-		folio_ref_unfreeze(folio, expected_refs);
-
-		unlock_page_lruvec(lruvec);
-
-		if (ci)
-			swap_cluster_unlock(ci);
-	} else {
-		split_queue_unlock(ds_queue);
-		ret = -EAGAIN;
-	}
+	ret = __folio_freeze_and_split_unmapped(folio, new_order, split_at, &xas, mapping,
+						true, list, split_type, end, extra_pins);
 fail:
 	if (mapping)
 		xas_unlock(&xas);
 
 	local_irq_enable();
 
-	if (unmapped)
-		return ret;
-
 	if (nr_shmem_dropped)
 		shmem_uncharge(mapping->host, nr_shmem_dropped);
 
@@ -4079,6 +4091,36 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
 	return ret;
 }
 
+/*
+ * This function is a helper for splitting folios that have already been unmapped.
+ * The use case is that the device or the CPU can refuse to migrate THP pages in
+ * the middle of migration, due to allocation issues on either side
+ *
+ * The high level code is copied from __folio_split, since the pages are anonymous
+ * and are already isolated from the LRU, the code has been simplified to not
+ * burden __folio_split with unmapped sprinkled into the code.
+ *
+ * None of the split folios are unlocked
+ */
+int folio_split_unmapped(struct folio *folio, unsigned int new_order)
+{
+	int extra_pins, ret = 0;
+
+	VM_WARN_ON_FOLIO(folio_mapped(folio), folio);
+	VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
+	VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio);
+
+	if (!can_split_folio(folio, 1, &extra_pins))
+		return -EAGAIN;
+
+	local_irq_disable();
+	ret = __folio_freeze_and_split_unmapped(folio, new_order, &folio->page, NULL,
+						NULL, false, NULL, SPLIT_TYPE_UNIFORM,
+						0, extra_pins);
+	local_irq_enable();
+	return ret;
+}
+
 /*
  * This function splits a large folio into smaller folios of order @new_order.
  * @page can point to any page of the large folio to split. The split operation
@@ -4127,12 +4169,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
  * with the folio. Splitting to order 0 is compatible with all folios.
  */
 int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
-				     unsigned int new_order, bool unmapped)
+				     unsigned int new_order)
 {
 	struct folio *folio = page_folio(page);
 
 	return __folio_split(folio, new_order, &folio->page, page, list,
-			     SPLIT_TYPE_UNIFORM, unmapped);
+			     SPLIT_TYPE_UNIFORM);
 }
 
 /**
@@ -4163,7 +4205,7 @@ int folio_split(struct folio *folio, unsigned int new_order,
 		struct page *split_at, struct list_head *list)
 {
 	return __folio_split(folio, new_order, split_at, &folio->page, list,
-			     SPLIT_TYPE_NON_UNIFORM, false);
+			     SPLIT_TYPE_NON_UNIFORM);
 }
 
 int min_order_for_split(struct folio *folio)
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index c50abbd32f21..723ef42550dc 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -918,8 +918,7 @@ static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate,
 
 	folio_get(folio);
 	split_huge_pmd_address(migrate->vma, addr, true);
-	ret = __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL,
-							0, true);
+	ret = folio_split_unmapped(folio, 0);
 	if (ret)
 		return ret;
 	migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND;
-- 
2.51.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread
* Re: [PATCH] mm/huge_memory.c: introduce folio_split_unmapped
  2025-11-14  1:22 [PATCH] mm/huge_memory.c: introduce folio_split_unmapped Balbir Singh
@ 2025-11-19 12:32 ` Dan Carpenter
  2025-11-14  3:21 ` Wei Yang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 17+ messages in thread
From: kernel test robot @ 2025-11-15  5:09 UTC (permalink / raw)
  To: oe-kbuild; +Cc: lkp, Dan Carpenter

BCC: lkp@intel.com
CC: oe-kbuild-all@lists.linux.dev
In-Reply-To: <20251114012228.2634882-1-balbirs@nvidia.com>
References: <20251114012228.2634882-1-balbirs@nvidia.com>
TO: Balbir Singh <balbirs@nvidia.com>
TO: linux-kernel@vger.kernel.org
TO: linux-mm@kvack.org
TO: dri-devel@lists.freedesktop.org
CC: Balbir Singh <balbirs@nvidia.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linux Memory Management List <linux-mm@kvack.org>
CC: David Hildenbrand <david@redhat.com>
CC: Zi Yan <ziy@nvidia.com>
CC: Joshua Hahn <joshua.hahnjy@gmail.com>
CC: Rakie Kim <rakie.kim@sk.com>
CC: Byungchul Park <byungchul@sk.com>
CC: Gregory Price <gourry@gourry.net>
CC: Ying Huang <ying.huang@linux.alibaba.com>
CC: Alistair Popple <apopple@nvidia.com>
CC: Oscar Salvador <osalvador@suse.de>
CC: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
CC: Baolin Wang <baolin.wang@linux.alibaba.com>
CC: "Liam R. Howlett" <Liam.Howlett@oracle.com>
CC: Nico Pache <npache@redhat.com>
CC: Ryan Roberts <ryan.roberts@arm.com>
CC: Dev Jain <dev.jain@arm.com>
CC: Barry Song <baohua@kernel.org>
CC: Lyude Paul <lyude@redhat.com>
CC: Danilo Krummrich <dakr@kernel.org>
CC: David Airlie <airlied@gmail.com>
CC: Simona Vetter <simona@ffwll.ch>
CC: Ralph Campbell <rcampbell@nvidia.com>
CC: "Mika Penttilä" <mpenttil@redhat.com>
CC: Matthew Brost <matthew.brost@intel.com>
CC: Francois Dugast <francois.dugast@intel.com>

Hi Balbir,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Balbir-Singh/mm-huge_memory-c-introduce-folio_split_unmapped/20251114-093541
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20251114012228.2634882-1-balbirs%40nvidia.com
patch subject: [PATCH] mm/huge_memory.c: introduce folio_split_unmapped
:::::: branch date: 27 hours ago
:::::: commit date: 27 hours ago
config: i386-randconfig-141-20251115 (https://download.01.org/0day-ci/archive/20251115/202511151216.rhK2ItOb-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.4.0-5) 12.4.0

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <error27@gmail.com>
| Closes: https://lore.kernel.org/r/202511151216.rhK2ItOb-lkp@intel.com/

smatch warnings:
mm/huge_memory.c:4044 __folio_split() error: uninitialized symbol 'end'.
mm/huge_memory.c:4052 __folio_split() error: we previously assumed 'mapping' could be null (see line 4046)

vim +/end +4044 mm/huge_memory.c

f6b1f167ffe29f Balbir Singh            2025-11-14  3886  
aeb1f4fbd83085 Zi Yan                  2025-10-31  3887  /**
aeb1f4fbd83085 Zi Yan                  2025-10-31  3888   * __folio_split() - split a folio at @split_at to a @new_order folio
58729c04cf1092 Zi Yan                  2025-03-07  3889   * @folio: folio to split
58729c04cf1092 Zi Yan                  2025-03-07  3890   * @new_order: the order of the new folio
58729c04cf1092 Zi Yan                  2025-03-07  3891   * @split_at: a page within the new folio
58729c04cf1092 Zi Yan                  2025-03-07  3892   * @lock_at: a page within @folio to be left locked to caller
58729c04cf1092 Zi Yan                  2025-03-07  3893   * @list: after-split folios will be put on it if non NULL
3c844d850e4486 Wei Yang                2025-11-06  3894   * @split_type: perform uniform split or not (non-uniform split)
58729c04cf1092 Zi Yan                  2025-03-07  3895   *
58729c04cf1092 Zi Yan                  2025-03-07  3896   * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
58729c04cf1092 Zi Yan                  2025-03-07  3897   * It is in charge of checking whether the split is supported or not and
58729c04cf1092 Zi Yan                  2025-03-07  3898   * preparing @folio for __split_unmapped_folio().
58729c04cf1092 Zi Yan                  2025-03-07  3899   *
6c7de9c83be68b Zi Yan                  2025-07-18  3900   * After splitting, the after-split folio containing @lock_at remains locked
6c7de9c83be68b Zi Yan                  2025-07-18  3901   * and others are unlocked:
6c7de9c83be68b Zi Yan                  2025-07-18  3902   * 1. for uniform split, @lock_at points to one of @folio's subpages;
6c7de9c83be68b Zi Yan                  2025-07-18  3903   * 2. for buddy allocator like (non-uniform) split, @lock_at points to @folio.
6c7de9c83be68b Zi Yan                  2025-07-18  3904   *
aeb1f4fbd83085 Zi Yan                  2025-10-31  3905   * Return: 0 - successful, <0 - failed (if -ENOMEM is returned, @folio might be
58729c04cf1092 Zi Yan                  2025-03-07  3906   * split but not to @new_order, the caller needs to check)
58729c04cf1092 Zi Yan                  2025-03-07  3907   */
6384dd1d18de7b Zi Yan                  2025-03-07  3908  static int __folio_split(struct folio *folio, unsigned int new_order,
58729c04cf1092 Zi Yan                  2025-03-07  3909  		struct page *split_at, struct page *lock_at,
f6b1f167ffe29f Balbir Singh            2025-11-14  3910  		struct list_head *list, enum split_type split_type)
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3911  {
58729c04cf1092 Zi Yan                  2025-03-07  3912  	XA_STATE(xas, &folio->mapping->i_pages, folio->index);
6c7de9c83be68b Zi Yan                  2025-07-18  3913  	struct folio *end_folio = folio_next(folio);
5d65c8d758f259 Barry Song              2024-08-24  3914  	bool is_anon = folio_test_anon(folio);
baa355fd331424 Kirill A. Shutemov      2016-07-26  3915  	struct address_space *mapping = NULL;
5d65c8d758f259 Barry Song              2024-08-24  3916  	struct anon_vma *anon_vma = NULL;
8ec26327c18e1d Wei Yang                2025-10-10  3917  	int old_order = folio_order(folio);
6c7de9c83be68b Zi Yan                  2025-07-18  3918  	struct folio *new_folio, *next;
391dc7f40590d7 Zi Yan                  2025-07-18  3919  	int nr_shmem_dropped = 0;
391dc7f40590d7 Zi Yan                  2025-07-18  3920  	int remap_flags = 0;
504e070dc08f75 Yang Shi                2021-06-15  3921  	int extra_pins, ret;
006d3ff27e884f Hugh Dickins            2018-11-30  3922  	pgoff_t end;
478d134e9506c7 Xu Yu                   2022-04-28  3923  	bool is_hzp;
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3924  
714b056c832106 Zi Yan                  2025-07-17  3925  	VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
714b056c832106 Zi Yan                  2025-07-17  3926  	VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio);
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3927  
58729c04cf1092 Zi Yan                  2025-03-07  3928  	if (folio != page_folio(split_at) || folio != page_folio(lock_at))
1412ecb3d256e5 Zi Yan                  2024-03-07  3929  		return -EINVAL;
1412ecb3d256e5 Zi Yan                  2024-03-07  3930  
8ec26327c18e1d Wei Yang                2025-10-10  3931  	if (new_order >= old_order)
c010d47f107f60 Zi Yan                  2024-02-26  3932  		return -EINVAL;
58729c04cf1092 Zi Yan                  2025-03-07  3933  
aa27253af32c74 Wei Yang                2025-11-06  3934  	if (!folio_split_supported(folio, new_order, split_type, /* warn = */ true))
6a50c9b512f773 Ran Xiaokai             2024-06-07  3935  		return -EINVAL;
c010d47f107f60 Zi Yan                  2024-02-26  3936  
5beaee54a324ba Matthew Wilcox (Oracle  2024-03-26  3937) 	is_hzp = is_huge_zero_folio(folio);
4737edbbdd4958 Naoya Horiguchi         2023-04-06  3938  	if (is_hzp) {
4737edbbdd4958 Naoya Horiguchi         2023-04-06  3939  		pr_warn_ratelimited("Called split_huge_page for huge zero page\n");
478d134e9506c7 Xu Yu                   2022-04-28  3940  		return -EBUSY;
4737edbbdd4958 Naoya Horiguchi         2023-04-06  3941  	}
478d134e9506c7 Xu Yu                   2022-04-28  3942  
3e9a13daa61253 Matthew Wilcox (Oracle  2022-09-02  3943) 	if (folio_test_writeback(folio))
59807685a7e77e Ying Huang              2017-09-06  3944  		return -EBUSY;
59807685a7e77e Ying Huang              2017-09-06  3945  
5d65c8d758f259 Barry Song              2024-08-24  3946  	if (is_anon) {
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3947  		/*
c1e8d7c6a7a682 Michel Lespinasse       2020-06-08  3948  		 * The caller does not necessarily hold an mmap_lock that would
baa355fd331424 Kirill A. Shutemov      2016-07-26  3949  		 * prevent the anon_vma disappearing so we first we take a
baa355fd331424 Kirill A. Shutemov      2016-07-26  3950  		 * reference to it and then lock the anon_vma for write. This
2f031c6f042cb8 Matthew Wilcox (Oracle  2022-01-29  3951) 		 * is similar to folio_lock_anon_vma_read except the write lock
baa355fd331424 Kirill A. Shutemov      2016-07-26  3952  		 * is taken to serialise against parallel split or collapse
baa355fd331424 Kirill A. Shutemov      2016-07-26  3953  		 * operations.
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3954  		 */
29eea9b5a9c9ec Matthew Wilcox (Oracle  2022-09-02  3955) 		anon_vma = folio_get_anon_vma(folio);
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3956  		if (!anon_vma) {
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3957  			ret = -EBUSY;
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3958  			goto out;
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3959  		}
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  3960  		anon_vma_lock_write(anon_vma);
3d4c0d98eb8572 Balbir Singh            2025-10-01  3961  		mapping = NULL;
baa355fd331424 Kirill A. Shutemov      2016-07-26  3962  	} else {
e220917fa50774 Luis Chamberlain        2024-08-22  3963  		unsigned int min_order;
6a3edd29395631 Yin Fengwei             2022-08-10  3964  		gfp_t gfp;
6a3edd29395631 Yin Fengwei             2022-08-10  3965  
3e9a13daa61253 Matthew Wilcox (Oracle  2022-09-02  3966) 		mapping = folio->mapping;
baa355fd331424 Kirill A. Shutemov      2016-07-26  3967  
baa355fd331424 Kirill A. Shutemov      2016-07-26  3968  		/* Truncated ? */
6384dd1d18de7b Zi Yan                  2025-03-07  3969  		/*
6384dd1d18de7b Zi Yan                  2025-03-07  3970  		 * TODO: add support for large shmem folio in swap cache.
6384dd1d18de7b Zi Yan                  2025-03-07  3971  		 * When shmem is in swap cache, mapping is NULL and
6384dd1d18de7b Zi Yan                  2025-03-07  3972  		 * folio_test_swapcache() is true.
6384dd1d18de7b Zi Yan                  2025-03-07  3973  		 */
baa355fd331424 Kirill A. Shutemov      2016-07-26  3974  		if (!mapping) {
baa355fd331424 Kirill A. Shutemov      2016-07-26  3975  			ret = -EBUSY;
baa355fd331424 Kirill A. Shutemov      2016-07-26  3976  			goto out;
baa355fd331424 Kirill A. Shutemov      2016-07-26  3977  		}
baa355fd331424 Kirill A. Shutemov      2016-07-26  3978  
e220917fa50774 Luis Chamberlain        2024-08-22  3979  		min_order = mapping_min_folio_order(folio->mapping);
e220917fa50774 Luis Chamberlain        2024-08-22  3980  		if (new_order < min_order) {
e220917fa50774 Luis Chamberlain        2024-08-22  3981  			ret = -EINVAL;
e220917fa50774 Luis Chamberlain        2024-08-22  3982  			goto out;
e220917fa50774 Luis Chamberlain        2024-08-22  3983  		}
e220917fa50774 Luis Chamberlain        2024-08-22  3984  
6a3edd29395631 Yin Fengwei             2022-08-10  3985  		gfp = current_gfp_context(mapping_gfp_mask(mapping) &
6a3edd29395631 Yin Fengwei             2022-08-10  3986  							GFP_RECLAIM_MASK);
6a3edd29395631 Yin Fengwei             2022-08-10  3987  
0201ebf274a306 David Howells           2023-06-28  3988  		if (!filemap_release_folio(folio, gfp)) {
6a3edd29395631 Yin Fengwei             2022-08-10  3989  			ret = -EBUSY;
6a3edd29395631 Yin Fengwei             2022-08-10  3990  			goto out;
6a3edd29395631 Yin Fengwei             2022-08-10  3991  		}
6a3edd29395631 Yin Fengwei             2022-08-10  3992  
3c844d850e4486 Wei Yang                2025-11-06  3993  		if (split_type == SPLIT_TYPE_UNIFORM) {
58729c04cf1092 Zi Yan                  2025-03-07  3994  			xas_set_order(&xas, folio->index, new_order);
8ec26327c18e1d Wei Yang                2025-10-10  3995  			xas_split_alloc(&xas, folio, old_order, gfp);
6b24ca4a1a8d4e Matthew Wilcox (Oracle  2020-06-27  3996) 			if (xas_error(&xas)) {
6b24ca4a1a8d4e Matthew Wilcox (Oracle  2020-06-27  3997) 				ret = xas_error(&xas);
6b24ca4a1a8d4e Matthew Wilcox (Oracle  2020-06-27  3998) 				goto out;
6b24ca4a1a8d4e Matthew Wilcox (Oracle  2020-06-27  3999) 			}
58729c04cf1092 Zi Yan                  2025-03-07  4000  		}
6b24ca4a1a8d4e Matthew Wilcox (Oracle  2020-06-27  4001) 
baa355fd331424 Kirill A. Shutemov      2016-07-26  4002  		anon_vma = NULL;
baa355fd331424 Kirill A. Shutemov      2016-07-26  4003  		i_mmap_lock_read(mapping);
006d3ff27e884f Hugh Dickins            2018-11-30  4004  
006d3ff27e884f Hugh Dickins            2018-11-30  4005  		/*
58729c04cf1092 Zi Yan                  2025-03-07  4006  		 *__split_unmapped_folio() may need to trim off pages beyond
58729c04cf1092 Zi Yan                  2025-03-07  4007  		 * EOF: but on 32-bit, i_size_read() takes an irq-unsafe
58729c04cf1092 Zi Yan                  2025-03-07  4008  		 * seqlock, which cannot be nested inside the page tree lock.
58729c04cf1092 Zi Yan                  2025-03-07  4009  		 * So note end now: i_size itself may be changed at any moment,
58729c04cf1092 Zi Yan                  2025-03-07  4010  		 * but folio lock is good enough to serialize the trimming.
006d3ff27e884f Hugh Dickins            2018-11-30  4011  		 */
006d3ff27e884f Hugh Dickins            2018-11-30  4012  		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
d144bf6205342a Hugh Dickins            2021-09-02  4013  		if (shmem_mapping(mapping))
d144bf6205342a Hugh Dickins            2021-09-02  4014  			end = shmem_fallocend(mapping->host, end);
baa355fd331424 Kirill A. Shutemov      2016-07-26  4015  	}
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4016  
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4017  	/*
684555aacc90d7 Matthew Wilcox (Oracle  2022-09-02  4018) 	 * Racy check if we can split the page, before unmap_folio() will
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4019  	 * split PMDs
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4020  	 */
8710f6ed34e7bc David Hildenbrand       2024-08-02  4021  	if (!can_split_folio(folio, 1, &extra_pins)) {
fd4a7ac32918d3 Baolin Wang             2022-10-24  4022  		ret = -EAGAIN;
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4023  		goto out_unlock;
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4024  	}
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4025  
684555aacc90d7 Matthew Wilcox (Oracle  2022-09-02  4026) 	unmap_folio(folio);
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4027  
b6769834aac1d4 Alex Shi                2020-12-15  4028  	/* block interrupt reentry in xa_lock and spinlock */
b6769834aac1d4 Alex Shi                2020-12-15  4029  	local_irq_disable();
baa355fd331424 Kirill A. Shutemov      2016-07-26  4030  	if (mapping) {
baa355fd331424 Kirill A. Shutemov      2016-07-26  4031  		/*
3e9a13daa61253 Matthew Wilcox (Oracle  2022-09-02  4032) 		 * Check if the folio is present in page cache.
3e9a13daa61253 Matthew Wilcox (Oracle  2022-09-02  4033) 		 * We assume all tail are present too, if folio is there.
baa355fd331424 Kirill A. Shutemov      2016-07-26  4034  		 */
6b24ca4a1a8d4e Matthew Wilcox (Oracle  2020-06-27  4035) 		xas_lock(&xas);
6b24ca4a1a8d4e Matthew Wilcox (Oracle  2020-06-27  4036) 		xas_reset(&xas);
391dc7f40590d7 Zi Yan                  2025-07-18  4037  		if (xas_load(&xas) != folio) {
391dc7f40590d7 Zi Yan                  2025-07-18  4038  			ret = -EAGAIN;
baa355fd331424 Kirill A. Shutemov      2016-07-26  4039  			goto fail;
baa355fd331424 Kirill A. Shutemov      2016-07-26  4040  		}
391dc7f40590d7 Zi Yan                  2025-07-18  4041  	}
baa355fd331424 Kirill A. Shutemov      2016-07-26  4042  
f6b1f167ffe29f Balbir Singh            2025-11-14  4043  	ret = __folio_freeze_and_split_unmapped(folio, new_order, split_at, &xas, mapping,
f6b1f167ffe29f Balbir Singh            2025-11-14 @4044  						true, list, split_type, end, extra_pins);
391dc7f40590d7 Zi Yan                  2025-07-18  4045  fail:
6c7de9c83be68b Zi Yan                  2025-07-18 @4046  	if (mapping)
6c7de9c83be68b Zi Yan                  2025-07-18  4047  		xas_unlock(&xas);
6c7de9c83be68b Zi Yan                  2025-07-18  4048  
6c7de9c83be68b Zi Yan                  2025-07-18  4049  	local_irq_enable();
6c7de9c83be68b Zi Yan                  2025-07-18  4050  
391dc7f40590d7 Zi Yan                  2025-07-18  4051  	if (nr_shmem_dropped)
391dc7f40590d7 Zi Yan                  2025-07-18 @4052  		shmem_uncharge(mapping->host, nr_shmem_dropped);
6c7de9c83be68b Zi Yan                  2025-07-18  4053  
958fea4c1e2eb6 Balbir Singh            2025-10-01  4054  	if (!ret && is_anon && !folio_is_device_private(folio))
391dc7f40590d7 Zi Yan                  2025-07-18  4055  		remap_flags = RMP_USE_SHARED_ZEROPAGE;
958fea4c1e2eb6 Balbir Singh            2025-10-01  4056  
8ec26327c18e1d Wei Yang                2025-10-10  4057  	remap_page(folio, 1 << old_order, remap_flags);
6c7de9c83be68b Zi Yan                  2025-07-18  4058  
6c7de9c83be68b Zi Yan                  2025-07-18  4059  	/*
6c7de9c83be68b Zi Yan                  2025-07-18  4060  	 * Unlock all after-split folios except the one containing
6c7de9c83be68b Zi Yan                  2025-07-18  4061  	 * @lock_at page. If @folio is not split, it will be kept locked.
6c7de9c83be68b Zi Yan                  2025-07-18  4062  	 */
391dc7f40590d7 Zi Yan                  2025-07-18  4063  	for (new_folio = folio; new_folio != end_folio; new_folio = next) {
6c7de9c83be68b Zi Yan                  2025-07-18  4064  		next = folio_next(new_folio);
6c7de9c83be68b Zi Yan                  2025-07-18  4065  		if (new_folio == page_folio(lock_at))
6c7de9c83be68b Zi Yan                  2025-07-18  4066  			continue;
6c7de9c83be68b Zi Yan                  2025-07-18  4067  
6c7de9c83be68b Zi Yan                  2025-07-18  4068  		folio_unlock(new_folio);
6c7de9c83be68b Zi Yan                  2025-07-18  4069  		/*
6c7de9c83be68b Zi Yan                  2025-07-18  4070  		 * Subpages may be freed if there wasn't any mapping
6c7de9c83be68b Zi Yan                  2025-07-18  4071  		 * like if add_to_swap() is running on a lru page that
6c7de9c83be68b Zi Yan                  2025-07-18  4072  		 * had its mapping zapped. And freeing these pages
6c7de9c83be68b Zi Yan                  2025-07-18  4073  		 * requires taking the lru_lock so we do the put_page
6c7de9c83be68b Zi Yan                  2025-07-18  4074  		 * of the tail pages after the split is complete.
6c7de9c83be68b Zi Yan                  2025-07-18  4075  		 */
6c7de9c83be68b Zi Yan                  2025-07-18  4076  		free_folio_and_swap_cache(new_folio);
6c7de9c83be68b Zi Yan                  2025-07-18  4077  	}
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4078  
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4079  out_unlock:
baa355fd331424 Kirill A. Shutemov      2016-07-26  4080  	if (anon_vma) {
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4081  		anon_vma_unlock_write(anon_vma);
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4082  		put_anon_vma(anon_vma);
baa355fd331424 Kirill A. Shutemov      2016-07-26  4083  	}
baa355fd331424 Kirill A. Shutemov      2016-07-26  4084  	if (mapping)
baa355fd331424 Kirill A. Shutemov      2016-07-26  4085  		i_mmap_unlock_read(mapping);
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4086  out:
69a37a8ba1b408 Matthew Wilcox (Oracle  2022-06-08  4087) 	xas_destroy(&xas);
8ec26327c18e1d Wei Yang                2025-10-10  4088  	if (old_order == HPAGE_PMD_ORDER)
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4089  		count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
8ec26327c18e1d Wei Yang                2025-10-10  4090  	count_mthp_stat(old_order, !ret ? MTHP_STAT_SPLIT : MTHP_STAT_SPLIT_FAILED);
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4091  	return ret;
e9b61f19858a5d Kirill A. Shutemov      2016-01-15  4092  }
9a982250f773cc Kirill A. Shutemov      2016-01-15  4093  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2025-11-20  5:26 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-14  1:22 [PATCH] mm/huge_memory.c: introduce folio_split_unmapped Balbir Singh
2025-11-14  2:11 ` Zi Yan
2025-11-14  3:21 ` Wei Yang
2025-11-14  3:25   ` Wei Yang
2025-11-14  3:30   ` Balbir Singh
2025-11-14  8:02     ` Wei Yang
2025-11-14  8:36 ` David Hildenbrand (Red Hat)
2025-11-14  9:10   ` Balbir Singh
2025-11-18 20:18     ` David Hildenbrand (Red Hat)
2025-11-15  2:15 ` kernel test robot
2025-11-15  2:33   ` Balbir Singh
2025-11-15  2:36     ` Zi Yan
  -- strict thread matches above, loose matches on Subject: below --
2025-11-15  5:09 kernel test robot
2025-11-19 12:32 ` Dan Carpenter
2025-11-19 23:58 ` Balbir Singh
2025-11-20  0:29   ` Zi Yan
2025-11-20  5:26   ` Dan Carpenter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.