* [PATCH] mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases
@ 2024-11-17 21:32 John Hubbard
2024-11-17 23:57 ` Sasha Levin
0 siblings, 1 reply; 2+ messages in thread
From: John Hubbard @ 2024-11-17 21:32 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: LKML, John Hubbard, David Hildenbrand, Oscar Salvador,
Vivek Kasireddy, Dave Airlie, Gerd Hoffmann, Matthew Wilcox,
Christoph Hellwig, Jason Gunthorpe, Peter Xu, Arnd Bergmann,
Daniel Vetter, Dongwon Kim, Hugh Dickins, Junxiao Chang, stable,
Andrew Morton
commit 53ba78de064b ("mm/gup: introduce
check_and_migrate_movable_folios()") created a new constraint on the
pin_user_pages*() API family: a potentially large internal allocation must
now occur, for FOLL_LONGTERM cases.
A user-visible consequence has now appeared: user space can no longer pin
more than 2GB of memory anymore on x86_64. That's because, on a 4KB
PAGE_SIZE system, when user space tries to (indirectly, via a device
driver that calls pin_user_pages()) pin 2GB, this requires an allocation
of a folio pointers array of MAX_PAGE_ORDER size, which is the limit for
kmalloc().
In addition to the directly visible effect described above, there is also
the problem of adding an unnecessary allocation. The **pages array
argument has already been allocated, and there is no need for a redundant
**folios array allocation in this case.
Fix this by avoiding the new allocation entirely. This is done by
referring to either the original page[i] within **pages, or to the
associated folio. Thanks to David Hildenbrand for suggesting this
approach and for providing the initial implementation (which I've tested
and adjusted slightly) as well.
[jhubbard@nvidia.com]: tweaked the patch to apply to linux-stable/6.11.y
[jhubbard@nvidia.com: whitespace tweak, per David]
Link: https://lkml.kernel.org/r/131cf9c8-ebc0-4cbb-b722-22fa8527bf3c@nvidia.com
[jhubbard@nvidia.com: bypass pofs_get_folio(), per Oscar]
Link: https://lkml.kernel.org/r/c1587c7f-9155-45be-bd62-1e36c0dd6923@nvidia.com
Link: https://lkml.kernel.org/r/20241105032944.141488-2-jhubbard@nvidia.com
Fixes: 53ba78de064b ("mm/gup: introduce check_and_migrate_movable_folios()")
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Vivek Kasireddy <vivek.kasireddy@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/gup.c | 114 +++++++++++++++++++++++++++++++++++++------------------
1 file changed, 77 insertions(+), 37 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index 947881ff5e8f..fd3d7900c24b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2282,20 +2282,57 @@ struct page *get_dump_page(unsigned long addr)
#endif /* CONFIG_ELF_CORE */
#ifdef CONFIG_MIGRATION
+
+/*
+ * An array of either pages or folios ("pofs"). Although it may seem tempting to
+ * avoid this complication, by simply interpreting a list of folios as a list of
+ * pages, that approach won't work in the longer term, because eventually the
+ * layouts of struct page and struct folio will become completely different.
+ * Furthermore, this pof approach avoids excessive page_folio() calls.
+ */
+struct pages_or_folios {
+ union {
+ struct page **pages;
+ struct folio **folios;
+ void **entries;
+ };
+ bool has_folios;
+ long nr_entries;
+};
+
+static struct folio *pofs_get_folio(struct pages_or_folios *pofs, long i)
+{
+ if (pofs->has_folios)
+ return pofs->folios[i];
+ return page_folio(pofs->pages[i]);
+}
+
+static void pofs_clear_entry(struct pages_or_folios *pofs, long i)
+{
+ pofs->entries[i] = NULL;
+}
+
+static void pofs_unpin(struct pages_or_folios *pofs)
+{
+ if (pofs->has_folios)
+ unpin_folios(pofs->folios, pofs->nr_entries);
+ else
+ unpin_user_pages(pofs->pages, pofs->nr_entries);
+}
+
/*
* Returns the number of collected folios. Return value is always >= 0.
*/
static unsigned long collect_longterm_unpinnable_folios(
- struct list_head *movable_folio_list,
- unsigned long nr_folios,
- struct folio **folios)
+ struct list_head *movable_folio_list,
+ struct pages_or_folios *pofs)
{
unsigned long i, collected = 0;
struct folio *prev_folio = NULL;
bool drain_allow = true;
- for (i = 0; i < nr_folios; i++) {
- struct folio *folio = folios[i];
+ for (i = 0; i < pofs->nr_entries; i++) {
+ struct folio *folio = pofs_get_folio(pofs, i);
if (folio == prev_folio)
continue;
@@ -2336,16 +2373,15 @@ static unsigned long collect_longterm_unpinnable_folios(
* Returns -EAGAIN if all folios were successfully migrated or -errno for
* failure (or partial success).
*/
-static int migrate_longterm_unpinnable_folios(
- struct list_head *movable_folio_list,
- unsigned long nr_folios,
- struct folio **folios)
+static int
+migrate_longterm_unpinnable_folios(struct list_head *movable_folio_list,
+ struct pages_or_folios *pofs)
{
int ret;
unsigned long i;
- for (i = 0; i < nr_folios; i++) {
- struct folio *folio = folios[i];
+ for (i = 0; i < pofs->nr_entries; i++) {
+ struct folio *folio = pofs_get_folio(pofs, i);
if (folio_is_device_coherent(folio)) {
/*
@@ -2353,7 +2389,7 @@ static int migrate_longterm_unpinnable_folios(
* convert the pin on the source folio to a normal
* reference.
*/
- folios[i] = NULL;
+ pofs_clear_entry(pofs, i);
folio_get(folio);
gup_put_folio(folio, 1, FOLL_PIN);
@@ -2372,8 +2408,8 @@ static int migrate_longterm_unpinnable_folios(
* calling folio_isolate_lru() which takes a reference so the
* folio won't be freed if it's migrating.
*/
- unpin_folio(folios[i]);
- folios[i] = NULL;
+ unpin_folio(folio);
+ pofs_clear_entry(pofs, i);
}
if (!list_empty(movable_folio_list)) {
@@ -2396,12 +2432,26 @@ static int migrate_longterm_unpinnable_folios(
return -EAGAIN;
err:
- unpin_folios(folios, nr_folios);
+ pofs_unpin(pofs);
putback_movable_pages(movable_folio_list);
return ret;
}
+static long
+check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
+{
+ LIST_HEAD(movable_folio_list);
+ unsigned long collected;
+
+ collected = collect_longterm_unpinnable_folios(&movable_folio_list,
+ pofs);
+ if (!collected)
+ return 0;
+
+ return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
+}
+
/*
* Check whether all folios are *allowed* to be pinned indefinitely (longterm).
* Rather confusingly, all folios in the range are required to be pinned via
@@ -2421,16 +2471,13 @@ static int migrate_longterm_unpinnable_folios(
static long check_and_migrate_movable_folios(unsigned long nr_folios,
struct folio **folios)
{
- unsigned long collected;
- LIST_HEAD(movable_folio_list);
+ struct pages_or_folios pofs = {
+ .folios = folios,
+ .has_folios = true,
+ .nr_entries = nr_folios,
+ };
- collected = collect_longterm_unpinnable_folios(&movable_folio_list,
- nr_folios, folios);
- if (!collected)
- return 0;
-
- return migrate_longterm_unpinnable_folios(&movable_folio_list,
- nr_folios, folios);
+ return check_and_migrate_movable_pages_or_folios(&pofs);
}
/*
@@ -2442,20 +2489,13 @@ static long check_and_migrate_movable_folios(unsigned long nr_folios,
static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages)
{
- struct folio **folios;
- long i, ret;
+ struct pages_or_folios pofs = {
+ .pages = pages,
+ .has_folios = false,
+ .nr_entries = nr_pages,
+ };
- folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL);
- if (!folios)
- return -ENOMEM;
-
- for (i = 0; i < nr_pages; i++)
- folios[i] = page_folio(pages[i]);
-
- ret = check_and_migrate_movable_folios(nr_pages, folios);
-
- kfree(folios);
- return ret;
+ return check_and_migrate_movable_pages_or_folios(&pofs);
}
#else
static long check_and_migrate_movable_pages(unsigned long nr_pages,
--
2.47.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases
2024-11-17 21:32 [PATCH] mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases John Hubbard
@ 2024-11-17 23:57 ` Sasha Levin
0 siblings, 0 replies; 2+ messages in thread
From: Sasha Levin @ 2024-11-17 23:57 UTC (permalink / raw)
To: stable; +Cc: John Hubbard, Sasha Levin
[ Sasha's backport helper bot ]
Hi,
Found matching upstream commit: 94efde1d15399f5c88e576923db9bcd422d217f2
Note: The patch differs from the upstream commit:
---
--- - 2024-11-17 18:44:18.356569875 -0500
+++ /tmp/tmp.ThMcI6kBWx 2024-11-17 18:44:18.344880435 -0500
@@ -1,8 +1,3 @@
-commit 53ba78de064b ("mm/gup: introduce
-check_and_migrate_movable_folios()") created a new constraint on the
-pin_user_pages*() API family: a potentially large internal allocation must
-now occur, for FOLL_LONGTERM cases.
-
A user-visible consequence has now appeared: user space can no longer pin
more than 2GB of memory anymore on x86_64. That's because, on a 4KB
PAGE_SIZE system, when user space tries to (indirectly, via a device
@@ -21,6 +16,7 @@
approach and for providing the initial implementation (which I've tested
and adjusted slightly) as well.
+[jhubbard@nvidia.com]: tweaked the patch to apply to linux-stable/6.11.y
[jhubbard@nvidia.com: whitespace tweak, per David]
Link: https://lkml.kernel.org/r/131cf9c8-ebc0-4cbb-b722-22fa8527bf3c@nvidia.com
[jhubbard@nvidia.com: bypass pofs_get_folio(), per Oscar]
@@ -46,14 +42,14 @@
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
- mm/gup.c | 116 ++++++++++++++++++++++++++++++++++++-------------------
- 1 file changed, 77 insertions(+), 39 deletions(-)
+ mm/gup.c | 114 +++++++++++++++++++++++++++++++++++++------------------
+ 1 file changed, 77 insertions(+), 37 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
-index 4637dab7b54f1..ad0c8922dac3c 100644
+index 947881ff5e8f..fd3d7900c24b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
-@@ -2273,20 +2273,57 @@ struct page *get_dump_page(unsigned long addr)
+@@ -2282,20 +2282,57 @@ struct page *get_dump_page(unsigned long addr)
#endif /* CONFIG_ELF_CORE */
#ifdef CONFIG_MIGRATION
@@ -116,7 +112,7 @@
if (folio == prev_folio)
continue;
-@@ -2327,16 +2364,15 @@ static unsigned long collect_longterm_unpinnable_folios(
+@@ -2336,16 +2373,15 @@ static unsigned long collect_longterm_unpinnable_folios(
* Returns -EAGAIN if all folios were successfully migrated or -errno for
* failure (or partial success).
*/
@@ -138,7 +134,7 @@
if (folio_is_device_coherent(folio)) {
/*
-@@ -2344,7 +2380,7 @@ static int migrate_longterm_unpinnable_folios(
+@@ -2353,7 +2389,7 @@ static int migrate_longterm_unpinnable_folios(
* convert the pin on the source folio to a normal
* reference.
*/
@@ -147,7 +143,7 @@
folio_get(folio);
gup_put_folio(folio, 1, FOLL_PIN);
-@@ -2363,8 +2399,8 @@ static int migrate_longterm_unpinnable_folios(
+@@ -2372,8 +2408,8 @@ static int migrate_longterm_unpinnable_folios(
* calling folio_isolate_lru() which takes a reference so the
* folio won't be freed if it's migrating.
*/
@@ -158,7 +154,7 @@
}
if (!list_empty(movable_folio_list)) {
-@@ -2387,12 +2423,26 @@ static int migrate_longterm_unpinnable_folios(
+@@ -2396,12 +2432,26 @@ static int migrate_longterm_unpinnable_folios(
return -EAGAIN;
err:
@@ -184,9 +180,9 @@
+}
+
/*
- * Check whether all folios are *allowed* to be pinned indefinitely (long term).
+ * Check whether all folios are *allowed* to be pinned indefinitely (longterm).
* Rather confusingly, all folios in the range are required to be pinned via
-@@ -2417,16 +2467,13 @@ static int migrate_longterm_unpinnable_folios(
+@@ -2421,16 +2471,13 @@ static int migrate_longterm_unpinnable_folios(
static long check_and_migrate_movable_folios(unsigned long nr_folios,
struct folio **folios)
{
@@ -209,7 +205,7 @@
}
/*
-@@ -2436,22 +2483,13 @@ static long check_and_migrate_movable_folios(unsigned long nr_folios,
+@@ -2442,20 +2489,13 @@ static long check_and_migrate_movable_folios(unsigned long nr_folios,
static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages)
{
@@ -222,10 +218,8 @@
+ };
- folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL);
-- if (!folios) {
-- unpin_user_pages(pages, nr_pages);
+- if (!folios)
- return -ENOMEM;
-- }
-
- for (i = 0; i < nr_pages; i++)
- folios[i] = page_folio(pages[i]);
@@ -238,3 +232,6 @@
}
#else
static long check_and_migrate_movable_pages(unsigned long nr_pages,
+--
+2.47.0
+
---
Results of testing on various branches:
| Branch | Patch Apply | Build Test |
|---------------------------|-------------|------------|
| stable/linux-6.11.y | Success | Success |
| stable/linux-6.6.y | Failed | N/A |
| stable/linux-6.1.y | Failed | N/A |
| stable/linux-5.15.y | Failed | N/A |
| stable/linux-5.10.y | Failed | N/A |
| stable/linux-5.4.y | Failed | N/A |
| stable/linux-4.19.y | Failed | N/A |
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-11-17 23:57 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-17 21:32 [PATCH] mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases John Hubbard
2024-11-17 23:57 ` Sasha Levin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox