From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org, willy@infradead.org,
djwong@kernel.org, brauner@kernel.org, david@fromorbit.com,
chandan.babu@oracle.com
Cc: hare@suse.de, ritesh.list@gmail.com, john.g.garry@oracle.com,
ziy@nvidia.com, linux-fsdevel@vger.kernel.org,
linux-xfs@vger.kernel.org, linux-mm@kvack.org,
linux-block@vger.kernel.org, gost.dev@samsung.com,
p.raghav@samsung.com, kernel@pankajraghav.com, mcgrof@kernel.org
Subject: [PATCH v5 05/11] mm: split a folio in minimum folio order chunks
Date: Fri, 3 May 2024 02:53:47 -0700 [thread overview]
Message-ID: <20240503095353.3798063-6-mcgrof@kernel.org> (raw)
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
split_folio() and split_folio_to_list() assume order 0, to support
minorder we must expand these to check the folio mapping order and use
that.
Set new_order to be at least minimum folio order if it is set in
split_huge_page_to_list() so that we can maintain minimum folio order
requirement in the page cache.
Update the debugfs write files used for testing to ensure the order
is respected as well. We simply enforce the min order when a file
mapping is used.
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
include/linux/huge_mm.h | 12 ++++++----
mm/huge_memory.c | 50 ++++++++++++++++++++++++++++++++++++++---
2 files changed, 55 insertions(+), 7 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index de0c89105076..06748a8fa43b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -87,6 +87,8 @@ extern struct kobj_attribute shmem_enabled_attr;
#define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \
(!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order)))
+#define split_folio(f) split_folio_to_list(f, NULL)
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
@@ -267,9 +269,10 @@ void folio_prep_large_rmappable(struct folio *folio);
bool can_split_folio(struct folio *folio, int *pextra_pins);
int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order);
+int split_folio_to_list(struct folio *folio, struct list_head *list);
static inline int split_huge_page(struct page *page)
{
- return split_huge_page_to_list_to_order(page, NULL, 0);
+ return split_folio(page_folio(page));
}
void deferred_split_folio(struct folio *folio);
@@ -432,6 +435,10 @@ static inline int split_huge_page(struct page *page)
{
return 0;
}
+static inline int split_folio_to_list(struct page *page, struct list_head *list)
+{
+ return 0;
+}
static inline void deferred_split_folio(struct folio *folio) {}
#define split_huge_pmd(__vma, __pmd, __address) \
do { } while (0)
@@ -532,9 +539,6 @@ static inline int split_folio_to_order(struct folio *folio, int new_order)
return split_folio_to_list_to_order(folio, NULL, new_order);
}
-#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
-#define split_folio(f) split_folio_to_order(f, 0)
-
/*
* archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
* limitations in the implementation like arm64 MTE can override this to
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 89f58c7603b2..c0cc8f32fe42 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3035,6 +3035,9 @@ bool can_split_folio(struct folio *folio, int *pextra_pins)
* Returns 0 if the hugepage is split successfully.
* Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
* us.
+ *
+ * Callers should ensure that the order respects the address space mapping
+ * min-order if one is set.
*/
int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order)
@@ -3107,6 +3110,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
mapping = NULL;
anon_vma_lock_write(anon_vma);
} else {
+ unsigned int min_order;
gfp_t gfp;
mapping = folio->mapping;
@@ -3117,6 +3121,14 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
goto out;
}
+ min_order = mapping_min_folio_order(folio->mapping);
+ if (new_order < min_order) {
+ VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u",
+ min_order);
+ ret = -EINVAL;
+ goto out;
+ }
+
gfp = current_gfp_context(mapping_gfp_mask(mapping) &
GFP_RECLAIM_MASK);
@@ -3227,6 +3239,21 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
return ret;
}
+int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+ unsigned int min_order = 0;
+
+ if (!folio_test_anon(folio)) {
+ if (!folio->mapping) {
+ count_vm_event(THP_SPLIT_PAGE_FAILED);
+ return -EBUSY;
+ }
+ min_order = mapping_min_folio_order(folio->mapping);
+ }
+
+ return split_huge_page_to_list_to_order(&folio->page, list, min_order);
+}
+
void folio_undo_large_rmappable(struct folio *folio)
{
struct deferred_split *ds_queue;
@@ -3466,6 +3493,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
struct vm_area_struct *vma = vma_lookup(mm, addr);
struct page *page;
struct folio *folio;
+ unsigned int target_order = new_order;
if (!vma)
break;
@@ -3502,7 +3530,18 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
if (!folio_trylock(folio))
goto next;
- if (!split_folio_to_order(folio, new_order))
+ if (!folio_test_anon(folio)) {
+ unsigned int min_order;
+
+ if (!folio->mapping)
+ goto next;
+
+ min_order = mapping_min_folio_order(folio->mapping);
+ if (new_order < target_order)
+ target_order = min_order;
+ }
+
+ if (!split_folio_to_order(folio, target_order))
split++;
folio_unlock(folio);
@@ -3545,14 +3584,19 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
for (index = off_start; index < off_end; index += nr_pages) {
struct folio *folio = filemap_get_folio(mapping, index);
+ unsigned int min_order, target_order = new_order;
nr_pages = 1;
if (IS_ERR(folio))
continue;
- if (!folio_test_large(folio))
+ if (!folio->mapping || !folio_test_large(folio))
goto next;
+ min_order = mapping_min_folio_order(mapping);
+ if (new_order < min_order)
+ target_order = min_order;
+
total++;
nr_pages = folio_nr_pages(folio);
@@ -3562,7 +3606,7 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
if (!folio_trylock(folio))
goto next;
- if (!split_folio_to_order(folio, new_order))
+ if (!split_folio_to_order(folio, target_order))
split++;
folio_unlock(folio);
--
2.43.0
next prev parent reply other threads:[~2024-05-03 9:54 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-03 9:53 [PATCH v5 00/11] enable bs > ps in XFS Luis Chamberlain
2024-05-03 9:53 ` [PATCH v5 01/11] readahead: rework loop in page_cache_ra_unbounded() Luis Chamberlain
2024-05-03 9:53 ` [PATCH v5 02/11] fs: Allow fine-grained control of folio sizes Luis Chamberlain
2024-05-03 9:53 ` [PATCH v5 03/11] filemap: allocate mapping_min_order folios in the page cache Luis Chamberlain
2024-05-03 9:53 ` [PATCH v5 04/11] readahead: allocate folios with mapping_min_order in readahead Luis Chamberlain
2024-05-03 14:32 ` Hannes Reinecke
2024-05-03 9:53 ` Luis Chamberlain [this message]
2024-05-03 14:53 ` [PATCH v5 05/11] mm: split a folio in minimum folio order chunks Zi Yan
2024-05-15 15:32 ` Matthew Wilcox
2024-05-16 14:56 ` Pankaj Raghav (Samsung)
2024-05-03 9:53 ` [PATCH v5 06/11] filemap: cap PTE range to be created to allowed zero fill in folio_map_range() Luis Chamberlain
2024-05-03 9:53 ` [PATCH v5 07/11] iomap: fix iomap_dio_zero() for fs bs > system page size Luis Chamberlain
2024-05-07 14:58 ` [RFC] iomap: use huge zero folio in iomap_dio_zero Pankaj Raghav (Samsung)
2024-05-07 15:11 ` Zi Yan
2024-05-07 16:11 ` Christoph Hellwig
2024-05-08 11:39 ` Pankaj Raghav (Samsung)
2024-05-08 11:43 ` Christoph Hellwig
2024-05-09 12:31 ` Pankaj Raghav (Samsung)
2024-05-09 12:46 ` Christoph Hellwig
2024-05-09 12:55 ` Pankaj Raghav (Samsung)
2024-05-09 12:58 ` Christoph Hellwig
2024-05-09 14:32 ` Darrick J. Wong
2024-05-09 15:05 ` Christoph Hellwig
2024-05-09 15:08 ` Darrick J. Wong
2024-05-09 15:09 ` Christoph Hellwig
2024-05-15 0:50 ` Matthew Wilcox
2024-05-15 2:34 ` Keith Busch
2024-05-15 4:04 ` Matthew Wilcox
2024-05-15 15:59 ` Pankaj Raghav (Samsung)
2024-05-15 18:03 ` Matthew Wilcox
2024-05-16 15:02 ` Pankaj Raghav (Samsung)
2024-05-17 12:36 ` Hannes Reinecke
2024-05-17 12:56 ` Hannes Reinecke
2024-05-17 13:30 ` Matthew Wilcox
2024-05-15 11:48 ` Christoph Hellwig
2024-05-07 16:00 ` [PATCH v5 07/11] iomap: fix iomap_dio_zero() for fs bs > system page size Matthew Wilcox
2024-05-07 16:10 ` Christoph Hellwig
2024-05-07 16:11 ` Matthew Wilcox
2024-05-07 16:13 ` Christoph Hellwig
2024-05-08 4:24 ` Matthew Wilcox
2024-05-08 11:22 ` Pankaj Raghav (Samsung)
2024-05-08 11:36 ` Christoph Hellwig
2024-05-08 11:20 ` Pankaj Raghav (Samsung)
2024-05-03 9:53 ` [PATCH v5 08/11] xfs: use kvmalloc for xattr buffers Luis Chamberlain
2024-05-03 9:53 ` [PATCH v5 09/11] xfs: expose block size in stat Luis Chamberlain
2024-05-03 9:53 ` [PATCH v5 10/11] xfs: make the calculation generic in xfs_sb_validate_fsb_count() Luis Chamberlain
2024-05-07 8:40 ` John Garry
2024-05-07 21:13 ` Darrick J. Wong
2024-05-08 11:28 ` Pankaj Raghav (Samsung)
2024-05-03 9:53 ` [PATCH v5 11/11] xfs: enable block size larger than page size support Luis Chamberlain
2024-05-07 0:05 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240503095353.3798063-6-mcgrof@kernel.org \
--to=mcgrof@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=brauner@kernel.org \
--cc=chandan.babu@oracle.com \
--cc=david@fromorbit.com \
--cc=djwong@kernel.org \
--cc=gost.dev@samsung.com \
--cc=hare@suse.de \
--cc=john.g.garry@oracle.com \
--cc=kernel@pankajraghav.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-xfs@vger.kernel.org \
--cc=p.raghav@samsung.com \
--cc=ritesh.list@gmail.com \
--cc=willy@infradead.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).