* [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-11 16:41 ` Zi Yan
0 siblings, 0 replies; 42+ messages in thread
From: Zi Yan @ 2022-02-11 16:41 UTC (permalink / raw)
To: David Hildenbrand, linux-mm
Cc: linux-kernel, Michael Ellerman, Christoph Hellwig,
Marek Szyprowski, Robin Murphy, linuxppc-dev, virtualization,
iommu, Vlastimil Babka, Mel Gorman, Eric Ren, Mike Rapoport,
Oscar Salvador, Zi Yan
From: Zi Yan <ziy@nvidia.com>
alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
pageblocks with different migratetypes. It might unnecessarily convert
extra pageblocks at the beginning and at the end of the range. Change
alloc_contig_range() to work at pageblock granularity.
Special handling is needed for free pages and in-use pages across the
boundaries of the range specified alloc_contig_range(). Because these
partially isolated pages causes free page accounting issues. The free
pages will be split and freed into separate migratetype lists; the
in-use pages will be migrated then the freed pages will be handled.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
include/linux/page-isolation.h | 2 +-
mm/internal.h | 3 +
mm/memory_hotplug.c | 3 +-
mm/page_alloc.c | 235 +++++++++++++++++++++++++--------
mm/page_isolation.c | 33 ++++-
5 files changed, 211 insertions(+), 65 deletions(-)
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 4ef7be6def83..78ff940cc169 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
*/
int
start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- unsigned migratetype, int flags);
+ unsigned migratetype, int flags, gfp_t gfp_flags);
/*
* Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
diff --git a/mm/internal.h b/mm/internal.h
index 0d240e876831..509cbdc25992 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
int
isolate_migratepages_range(struct compact_control *cc,
unsigned long low_pfn, unsigned long end_pfn);
+
+int
+isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
#endif
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ce68098832aa..82406d2f3e46 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn,
MIGRATE_MOVABLE,
- MEMORY_OFFLINE | REPORT_FAILURE);
+ MEMORY_OFFLINE | REPORT_FAILURE,
+ GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
if (ret) {
reason = "failure to isolate range";
goto failed_removal_pcplists_disabled;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 62ef78f3d771..7a4fa21aea5c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
#endif
/* [start, end) must belong to a single zone. */
-static int __alloc_contig_migrate_range(struct compact_control *cc,
+int __alloc_contig_migrate_range(struct compact_control *cc,
unsigned long start, unsigned long end)
{
/* This function is based on compact_zone() from compaction.c. */
@@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
return 0;
}
+/**
+ * split_free_page() -- split a free page at split_pfn_offset
+ * @free_page: the original free page
+ * @order: the order of the page
+ * @split_pfn_offset: split offset within the page
+ *
+ * It is used when the free page crosses two pageblocks with different migratetypes
+ * at split_pfn_offset within the page. The split free page will be put into
+ * separate migratetype lists afterwards. Otherwise, the function achieves
+ * nothing.
+ */
+static inline void split_free_page(struct page *free_page,
+ int order, unsigned long split_pfn_offset)
+{
+ struct zone *zone = page_zone(free_page);
+ unsigned long free_page_pfn = page_to_pfn(free_page);
+ unsigned long pfn;
+ unsigned long flags;
+ int free_page_order;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ del_page_from_free_list(free_page, zone, order);
+ for (pfn = free_page_pfn;
+ pfn < free_page_pfn + (1UL << order);) {
+ int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
+
+ free_page_order = order_base_2(split_pfn_offset);
+ __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
+ mt, FPI_NONE);
+ pfn += 1UL << free_page_order;
+ split_pfn_offset -= (1UL << free_page_order);
+ /* we have done the first part, now switch to second part */
+ if (split_pfn_offset == 0)
+ split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/**
+ * isolate_single_pageblock() -- tries to isolate a pageblock that might be
+ * within a free or in-use page.
+ * @boundary_pfn: pageblock-aligned pfn that a page might cross
+ * @gfp_flags: GFP flags used for migrating pages
+ * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
+ * the boundary_pfn
+ *
+ * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
+ * pageblock. When not all pageblocks within a page are isolated at the same
+ * time, free page accounting can go wrong. For example, in the case of
+ * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
+ * [ MAX_ORDER-1 ]
+ * [ pageblock0 | pageblock1 ]
+ * When either pageblock is isolated, if it is a free page, the page is not
+ * split into separate migratetype lists, which is supposed to; if it is an
+ * in-use page and freed later, __free_one_page() does not split the free page
+ * either. The function handles this by splitting the free page or migrating
+ * the in-use page then splitting the free page.
+ */
+int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
+ int isolate_before_boundary)
+{
+ unsigned char saved_mt;
+ /*
+ * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
+ * avoid isolate pageblocks belonging to a bigger free or in-use page
+ */
+ unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
+ unsigned long isolated_pageblock_pfn;
+ unsigned long pfn;
+
+ VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
+
+ if (isolate_before_boundary)
+ isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
+ else
+ isolated_pageblock_pfn = boundary_pfn;
+
+ saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
+ set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);
+
+ for (pfn = start_pfn; pfn < boundary_pfn;) {
+ struct page *page = pfn_to_page(pfn);
+
+ /*
+ * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
+ * aligned, if there is any free pages in [start_pfn, boundary_pfn),
+ * its head page will always be in the range.
+ */
+ if (PageBuddy(page)) {
+ int order = buddy_order(page);
+
+ if (pfn + (1UL << order) > boundary_pfn)
+ split_free_page(page, order, boundary_pfn - pfn);
+ pfn += (1UL << order);
+ continue;
+ }
+ /*
+ * migrate compound pages then let the free page handling code
+ * above do the rest
+ */
+ if (PageHuge(page) || PageTransCompound(page)) {
+ unsigned long nr_pages = compound_nr(page);
+ int order = compound_order(page);
+ struct page *head = compound_head(page);
+ unsigned long head_pfn = page_to_pfn(head);
+
+ if (head_pfn + nr_pages >= boundary_pfn) {
+ int ret;
+ struct compact_control cc = {
+ .nr_migratepages = 0,
+ .order = -1,
+ .zone = page_zone(pfn_to_page(head_pfn)),
+ .mode = MIGRATE_SYNC,
+ .ignore_skip_hint = true,
+ .no_set_skip_hint = true,
+ .gfp_mask = current_gfp_context(gfp_flags),
+ .alloc_contig = true,
+ };
+
+ INIT_LIST_HEAD(&cc.migratepages);
+
+ ret = __alloc_contig_migrate_range(&cc, head_pfn,
+ head_pfn + nr_pages);
+
+ if (ret) {
+ /* restore the original migratetype */
+ set_pageblock_migratetype(
+ pfn_to_page(isolated_pageblock_pfn),
+ saved_mt);
+ return -EBUSY;
+ }
+ /*
+ * reset pfn, let the free page handling code
+ * above split the free page to the right
+ * migratetype list.
+ *
+ * head_pfn is not used here as a hugetlb page
+ * order can be bigger than MAX_ORDER-1, but
+ * after it is freed, the free page order is not.
+ * Use pfn within the range to find the head of
+ * the free page and reset order to 0 if a hugetlb
+ * page with >MAX_ORDER-1 order is encountered.
+ */
+ if (order > MAX_ORDER-1)
+ order = 0;
+ while (!PageBuddy(pfn_to_page(pfn))) {
+ order++;
+ pfn &= ~0UL << order;
+ }
+ continue;
+ }
+ pfn += nr_pages;
+ continue;
+ }
+
+ pfn++;
+ }
+ return 0;
+}
+
+
/**
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
@@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype, gfp_t gfp_mask)
{
- unsigned long outer_start, outer_end;
- unsigned int order;
+ unsigned long outer_end;
+ unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
+ unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
int ret = 0;
struct compact_control cc = {
@@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* What we do here is we mark all pageblocks in range as
* MIGRATE_ISOLATE. Because pageblock and max order pages may
* have different sizes, and due to the way page allocator
- * work, we align the range to biggest of the two pages so
- * that page allocator won't try to merge buddies from
- * different pageblocks and change MIGRATE_ISOLATE to some
- * other migration type.
+ * work, start_isolate_page_range() has special handlings for this.
*
* Once the pageblocks are marked as MIGRATE_ISOLATE, we
* migrate the pages from an unaligned range (ie. pages that
- * we are interested in). This will put all the pages in
+ * we are interested in). This will put all the pages in
* range back to page allocator as MIGRATE_ISOLATE.
*
* When this is done, we take the pages in range from page
@@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* put back to page allocator so that buddy can use them.
*/
- ret = start_isolate_page_range(start, end, migratetype, 0);
+ ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
if (ret)
- return ret;
+ goto done;
drain_all_pages(cc.zone);
@@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
goto done;
ret = 0;
- /*
- * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
- * aligned blocks that are marked as MIGRATE_ISOLATE. What's
- * more, all pages in [start, end) are free in page allocator.
- * What we are going to do is to allocate all pages from
- * [start, end) (that is remove them from page allocator).
- *
- * The only problem is that pages at the beginning and at the
- * end of interesting range may be not aligned with pages that
- * page allocator holds, ie. they can be part of higher order
- * pages. Because of this, we reserve the bigger range and
- * once this is done free the pages we are not interested in.
- *
- * We don't have to hold zone->lock here because the pages are
- * isolated thus they won't get removed from buddy.
- */
-
- order = 0;
- outer_start = start;
- while (!PageBuddy(pfn_to_page(outer_start))) {
- if (++order >= MAX_ORDER) {
- outer_start = start;
- break;
- }
- outer_start &= ~0UL << order;
- }
-
- if (outer_start != start) {
- order = buddy_order(pfn_to_page(outer_start));
-
- /*
- * outer_start page could be small order buddy page and
- * it doesn't include start page. Adjust outer_start
- * in this case to report failed page properly
- * on tracepoint in test_pages_isolated()
- */
- if (outer_start + (1UL << order) <= start)
- outer_start = start;
- }
-
/* Make sure the range is really isolated. */
- if (test_pages_isolated(outer_start, end, 0)) {
+ if (test_pages_isolated(alloc_start, alloc_end, 0)) {
ret = -EBUSY;
goto done;
}
/* Grab isolated pages from freelists. */
- outer_end = isolate_freepages_range(&cc, outer_start, end);
+ outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
if (!outer_end) {
ret = -EBUSY;
goto done;
}
/* Free head and tail (if any) */
- if (start != outer_start)
- free_contig_range(outer_start, start - outer_start);
- if (end != outer_end)
- free_contig_range(end, outer_end - end);
+ if (start != alloc_start)
+ free_contig_range(alloc_start, start - alloc_start);
+ if (end != alloc_end)
+ free_contig_range(end, alloc_end - end);
done:
- undo_isolate_page_range(pfn_max_align_down(start),
- pfn_max_align_up(end), migratetype);
+ undo_isolate_page_range(alloc_start,
+ alloc_end, migratetype);
return ret;
}
EXPORT_SYMBOL(alloc_contig_range);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 64d093ab83ec..0256d5e1032c 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* and PageOffline() pages.
* REPORT_FAILURE - report details about the failure to
* isolate the range
+ * @gfp_flags: GFP flags used for migrating pages that sit across the
+ * range boundaries.
*
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
* the range will never be allocated. Any free pages and pages freed in the
@@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* pages in the range finally, the caller have to free all pages in the range.
* test_page_isolated() can be used for test it.
*
+ * The function first tries to isolate the pageblocks at the beginning and end
+ * of the range, since there might be pages across the range boundaries.
+ * Afterwards, it isolates the rest of the range.
+ *
* There is no high level synchronization mechanism that prevents two threads
* from trying to isolate overlapping ranges. If this happens, one thread
* will notice pageblocks in the overlapping range already set to isolate.
@@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* Return: 0 on success and -EBUSY if any part of range cannot be isolated.
*/
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- unsigned migratetype, int flags)
+ unsigned migratetype, int flags, gfp_t gfp_flags)
{
unsigned long pfn;
struct page *page;
+ /* isolation is done at page block granularity */
+ unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
+ unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
+ int ret;
- unsigned long isolate_start = pfn_max_align_down(start_pfn);
- unsigned long isolate_end = pfn_max_align_up(end_pfn);
+ /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
+ ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
+ if (ret)
+ return ret;
+
+ /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
+ ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
+ if (ret) {
+ unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
+ return ret;
+ }
- for (pfn = isolate_start;
- pfn < isolate_end;
+ /* skip isolated pageblocks at the beginning and end */
+ for (pfn = isolate_start + pageblock_nr_pages;
+ pfn < isolate_end - pageblock_nr_pages;
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
if (page && set_migratetype_isolate(page, migratetype, flags,
start_pfn, end_pfn)) {
undo_isolate_page_range(isolate_start, pfn, migratetype);
+ unset_migratetype_isolate(
+ pfn_to_page(isolate_end - pageblock_nr_pages),
+ migratetype);
return -EBUSY;
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-11 16:41 ` Zi Yan
0 siblings, 0 replies; 42+ messages in thread
From: Zi Yan @ 2022-02-11 16:41 UTC (permalink / raw)
To: David Hildenbrand, linux-mm
Cc: Mel Gorman, Zi Yan, Oscar Salvador, Robin Murphy, linux-kernel,
iommu, Mike Rapoport, Eric Ren, virtualization, linuxppc-dev,
Christoph Hellwig, Vlastimil Babka, Marek Szyprowski
From: Zi Yan <ziy@nvidia.com>
alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
pageblocks with different migratetypes. It might unnecessarily convert
extra pageblocks at the beginning and at the end of the range. Change
alloc_contig_range() to work at pageblock granularity.
Special handling is needed for free pages and in-use pages across the
boundaries of the range specified alloc_contig_range(). Because these
partially isolated pages causes free page accounting issues. The free
pages will be split and freed into separate migratetype lists; the
in-use pages will be migrated then the freed pages will be handled.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
include/linux/page-isolation.h | 2 +-
mm/internal.h | 3 +
mm/memory_hotplug.c | 3 +-
mm/page_alloc.c | 235 +++++++++++++++++++++++++--------
mm/page_isolation.c | 33 ++++-
5 files changed, 211 insertions(+), 65 deletions(-)
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 4ef7be6def83..78ff940cc169 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
*/
int
start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- unsigned migratetype, int flags);
+ unsigned migratetype, int flags, gfp_t gfp_flags);
/*
* Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
diff --git a/mm/internal.h b/mm/internal.h
index 0d240e876831..509cbdc25992 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
int
isolate_migratepages_range(struct compact_control *cc,
unsigned long low_pfn, unsigned long end_pfn);
+
+int
+isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
#endif
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ce68098832aa..82406d2f3e46 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn,
MIGRATE_MOVABLE,
- MEMORY_OFFLINE | REPORT_FAILURE);
+ MEMORY_OFFLINE | REPORT_FAILURE,
+ GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
if (ret) {
reason = "failure to isolate range";
goto failed_removal_pcplists_disabled;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 62ef78f3d771..7a4fa21aea5c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
#endif
/* [start, end) must belong to a single zone. */
-static int __alloc_contig_migrate_range(struct compact_control *cc,
+int __alloc_contig_migrate_range(struct compact_control *cc,
unsigned long start, unsigned long end)
{
/* This function is based on compact_zone() from compaction.c. */
@@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
return 0;
}
+/**
+ * split_free_page() -- split a free page at split_pfn_offset
+ * @free_page: the original free page
+ * @order: the order of the page
+ * @split_pfn_offset: split offset within the page
+ *
+ * It is used when the free page crosses two pageblocks with different migratetypes
+ * at split_pfn_offset within the page. The split free page will be put into
+ * separate migratetype lists afterwards. Otherwise, the function achieves
+ * nothing.
+ */
+static inline void split_free_page(struct page *free_page,
+ int order, unsigned long split_pfn_offset)
+{
+ struct zone *zone = page_zone(free_page);
+ unsigned long free_page_pfn = page_to_pfn(free_page);
+ unsigned long pfn;
+ unsigned long flags;
+ int free_page_order;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ del_page_from_free_list(free_page, zone, order);
+ for (pfn = free_page_pfn;
+ pfn < free_page_pfn + (1UL << order);) {
+ int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
+
+ free_page_order = order_base_2(split_pfn_offset);
+ __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
+ mt, FPI_NONE);
+ pfn += 1UL << free_page_order;
+ split_pfn_offset -= (1UL << free_page_order);
+ /* we have done the first part, now switch to second part */
+ if (split_pfn_offset == 0)
+ split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/**
+ * isolate_single_pageblock() -- tries to isolate a pageblock that might be
+ * within a free or in-use page.
+ * @boundary_pfn: pageblock-aligned pfn that a page might cross
+ * @gfp_flags: GFP flags used for migrating pages
+ * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
+ * the boundary_pfn
+ *
+ * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
+ * pageblock. When not all pageblocks within a page are isolated at the same
+ * time, free page accounting can go wrong. For example, in the case of
+ * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
+ * [ MAX_ORDER-1 ]
+ * [ pageblock0 | pageblock1 ]
+ * When either pageblock is isolated, if it is a free page, the page is not
+ * split into separate migratetype lists, which is supposed to; if it is an
+ * in-use page and freed later, __free_one_page() does not split the free page
+ * either. The function handles this by splitting the free page or migrating
+ * the in-use page then splitting the free page.
+ */
+int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
+ int isolate_before_boundary)
+{
+ unsigned char saved_mt;
+ /*
+ * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
+ * avoid isolate pageblocks belonging to a bigger free or in-use page
+ */
+ unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
+ unsigned long isolated_pageblock_pfn;
+ unsigned long pfn;
+
+ VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
+
+ if (isolate_before_boundary)
+ isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
+ else
+ isolated_pageblock_pfn = boundary_pfn;
+
+ saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
+ set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);
+
+ for (pfn = start_pfn; pfn < boundary_pfn;) {
+ struct page *page = pfn_to_page(pfn);
+
+ /*
+ * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
+ * aligned, if there is any free pages in [start_pfn, boundary_pfn),
+ * its head page will always be in the range.
+ */
+ if (PageBuddy(page)) {
+ int order = buddy_order(page);
+
+ if (pfn + (1UL << order) > boundary_pfn)
+ split_free_page(page, order, boundary_pfn - pfn);
+ pfn += (1UL << order);
+ continue;
+ }
+ /*
+ * migrate compound pages then let the free page handling code
+ * above do the rest
+ */
+ if (PageHuge(page) || PageTransCompound(page)) {
+ unsigned long nr_pages = compound_nr(page);
+ int order = compound_order(page);
+ struct page *head = compound_head(page);
+ unsigned long head_pfn = page_to_pfn(head);
+
+ if (head_pfn + nr_pages >= boundary_pfn) {
+ int ret;
+ struct compact_control cc = {
+ .nr_migratepages = 0,
+ .order = -1,
+ .zone = page_zone(pfn_to_page(head_pfn)),
+ .mode = MIGRATE_SYNC,
+ .ignore_skip_hint = true,
+ .no_set_skip_hint = true,
+ .gfp_mask = current_gfp_context(gfp_flags),
+ .alloc_contig = true,
+ };
+
+ INIT_LIST_HEAD(&cc.migratepages);
+
+ ret = __alloc_contig_migrate_range(&cc, head_pfn,
+ head_pfn + nr_pages);
+
+ if (ret) {
+ /* restore the original migratetype */
+ set_pageblock_migratetype(
+ pfn_to_page(isolated_pageblock_pfn),
+ saved_mt);
+ return -EBUSY;
+ }
+ /*
+ * reset pfn, let the free page handling code
+ * above split the free page to the right
+ * migratetype list.
+ *
+ * head_pfn is not used here as a hugetlb page
+ * order can be bigger than MAX_ORDER-1, but
+ * after it is freed, the free page order is not.
+ * Use pfn within the range to find the head of
+ * the free page and reset order to 0 if a hugetlb
+ * page with >MAX_ORDER-1 order is encountered.
+ */
+ if (order > MAX_ORDER-1)
+ order = 0;
+ while (!PageBuddy(pfn_to_page(pfn))) {
+ order++;
+ pfn &= ~0UL << order;
+ }
+ continue;
+ }
+ pfn += nr_pages;
+ continue;
+ }
+
+ pfn++;
+ }
+ return 0;
+}
+
+
/**
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
@@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype, gfp_t gfp_mask)
{
- unsigned long outer_start, outer_end;
- unsigned int order;
+ unsigned long outer_end;
+ unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
+ unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
int ret = 0;
struct compact_control cc = {
@@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* What we do here is we mark all pageblocks in range as
* MIGRATE_ISOLATE. Because pageblock and max order pages may
* have different sizes, and due to the way page allocator
- * work, we align the range to biggest of the two pages so
- * that page allocator won't try to merge buddies from
- * different pageblocks and change MIGRATE_ISOLATE to some
- * other migration type.
+ * work, start_isolate_page_range() has special handlings for this.
*
* Once the pageblocks are marked as MIGRATE_ISOLATE, we
* migrate the pages from an unaligned range (ie. pages that
- * we are interested in). This will put all the pages in
+ * we are interested in). This will put all the pages in
* range back to page allocator as MIGRATE_ISOLATE.
*
* When this is done, we take the pages in range from page
@@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* put back to page allocator so that buddy can use them.
*/
- ret = start_isolate_page_range(start, end, migratetype, 0);
+ ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
if (ret)
- return ret;
+ goto done;
drain_all_pages(cc.zone);
@@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
goto done;
ret = 0;
- /*
- * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
- * aligned blocks that are marked as MIGRATE_ISOLATE. What's
- * more, all pages in [start, end) are free in page allocator.
- * What we are going to do is to allocate all pages from
- * [start, end) (that is remove them from page allocator).
- *
- * The only problem is that pages at the beginning and at the
- * end of interesting range may be not aligned with pages that
- * page allocator holds, ie. they can be part of higher order
- * pages. Because of this, we reserve the bigger range and
- * once this is done free the pages we are not interested in.
- *
- * We don't have to hold zone->lock here because the pages are
- * isolated thus they won't get removed from buddy.
- */
-
- order = 0;
- outer_start = start;
- while (!PageBuddy(pfn_to_page(outer_start))) {
- if (++order >= MAX_ORDER) {
- outer_start = start;
- break;
- }
- outer_start &= ~0UL << order;
- }
-
- if (outer_start != start) {
- order = buddy_order(pfn_to_page(outer_start));
-
- /*
- * outer_start page could be small order buddy page and
- * it doesn't include start page. Adjust outer_start
- * in this case to report failed page properly
- * on tracepoint in test_pages_isolated()
- */
- if (outer_start + (1UL << order) <= start)
- outer_start = start;
- }
-
/* Make sure the range is really isolated. */
- if (test_pages_isolated(outer_start, end, 0)) {
+ if (test_pages_isolated(alloc_start, alloc_end, 0)) {
ret = -EBUSY;
goto done;
}
/* Grab isolated pages from freelists. */
- outer_end = isolate_freepages_range(&cc, outer_start, end);
+ outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
if (!outer_end) {
ret = -EBUSY;
goto done;
}
/* Free head and tail (if any) */
- if (start != outer_start)
- free_contig_range(outer_start, start - outer_start);
- if (end != outer_end)
- free_contig_range(end, outer_end - end);
+ if (start != alloc_start)
+ free_contig_range(alloc_start, start - alloc_start);
+ if (end != alloc_end)
+ free_contig_range(end, alloc_end - end);
done:
- undo_isolate_page_range(pfn_max_align_down(start),
- pfn_max_align_up(end), migratetype);
+ undo_isolate_page_range(alloc_start,
+ alloc_end, migratetype);
return ret;
}
EXPORT_SYMBOL(alloc_contig_range);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 64d093ab83ec..0256d5e1032c 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* and PageOffline() pages.
* REPORT_FAILURE - report details about the failure to
* isolate the range
+ * @gfp_flags: GFP flags used for migrating pages that sit across the
+ * range boundaries.
*
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
* the range will never be allocated. Any free pages and pages freed in the
@@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* pages in the range finally, the caller have to free all pages in the range.
* test_page_isolated() can be used for test it.
*
+ * The function first tries to isolate the pageblocks at the beginning and end
+ * of the range, since there might be pages across the range boundaries.
+ * Afterwards, it isolates the rest of the range.
+ *
* There is no high level synchronization mechanism that prevents two threads
* from trying to isolate overlapping ranges. If this happens, one thread
* will notice pageblocks in the overlapping range already set to isolate.
@@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* Return: 0 on success and -EBUSY if any part of range cannot be isolated.
*/
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- unsigned migratetype, int flags)
+ unsigned migratetype, int flags, gfp_t gfp_flags)
{
unsigned long pfn;
struct page *page;
+ /* isolation is done at page block granularity */
+ unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
+ unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
+ int ret;
- unsigned long isolate_start = pfn_max_align_down(start_pfn);
- unsigned long isolate_end = pfn_max_align_up(end_pfn);
+ /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
+ ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
+ if (ret)
+ return ret;
+
+ /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
+ ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
+ if (ret) {
+ unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
+ return ret;
+ }
- for (pfn = isolate_start;
- pfn < isolate_end;
+ /* skip isolated pageblocks at the beginning and end */
+ for (pfn = isolate_start + pageblock_nr_pages;
+ pfn < isolate_end - pageblock_nr_pages;
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
if (page && set_migratetype_isolate(page, migratetype, flags,
start_pfn, end_pfn)) {
undo_isolate_page_range(isolate_start, pfn, migratetype);
+ unset_migratetype_isolate(
+ pfn_to_page(isolate_end - pageblock_nr_pages),
+ migratetype);
return -EBUSY;
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
2022-02-11 16:41 ` Zi Yan
@ 2022-02-11 23:06 ` kernel test robot
-1 siblings, 0 replies; 42+ messages in thread
From: kernel test robot @ 2022-02-11 23:06 UTC (permalink / raw)
To: Zi Yan; +Cc: llvm, kbuild-all
Hi Zi,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on hnaz-mm/master]
[also build test WARNING on powerpc/next linux/master linus/master v5.17-rc3 next-20220211]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
base: https://github.com/hnaz/linux-mm master
config: hexagon-randconfig-r045-20220211 (https://download.01.org/0day-ci/archive/20220212/202202120607.4dI5nqMH-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project f6685f774697c85d6a352dcea013f46a99f9fe31)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/5aacb9dfc8abb1a0610b70226606408a96d0e997
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
git checkout 5aacb9dfc8abb1a0610b70226606408a96d0e997
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
mm/page_alloc.c:3869:15: warning: no previous prototype for function 'should_fail_alloc_page' [-Wmissing-prototypes]
noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
^
mm/page_alloc.c:3869:10: note: declare 'static' if the function is not intended to be used outside of this translation unit
noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
^
static
>> mm/page_alloc.c:8988:5: warning: no previous prototype for function '__alloc_contig_migrate_range' [-Wmissing-prototypes]
int __alloc_contig_migrate_range(struct compact_control *cc,
^
mm/page_alloc.c:8988:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int __alloc_contig_migrate_range(struct compact_control *cc,
^
static
2 warnings generated.
Kconfig warnings: (for reference only)
WARNING: unmet direct dependencies detected for OMAP_GPMC
Depends on MEMORY && OF_ADDRESS
Selected by
- MTD_NAND_OMAP2 && MTD && MTD_RAW_NAND && (ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST && HAS_IOMEM
vim +/__alloc_contig_migrate_range +8988 mm/page_alloc.c
8986
8987 /* [start, end) must belong to a single zone. */
> 8988 int __alloc_contig_migrate_range(struct compact_control *cc,
8989 unsigned long start, unsigned long end)
8990 {
8991 /* This function is based on compact_zone() from compaction.c. */
8992 unsigned int nr_reclaimed;
8993 unsigned long pfn = start;
8994 unsigned int tries = 0;
8995 int ret = 0;
8996 struct migration_target_control mtc = {
8997 .nid = zone_to_nid(cc->zone),
8998 .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
8999 };
9000
9001 lru_cache_disable();
9002
9003 while (pfn < end || !list_empty(&cc->migratepages)) {
9004 if (fatal_signal_pending(current)) {
9005 ret = -EINTR;
9006 break;
9007 }
9008
9009 if (list_empty(&cc->migratepages)) {
9010 cc->nr_migratepages = 0;
9011 ret = isolate_migratepages_range(cc, pfn, end);
9012 if (ret && ret != -EAGAIN)
9013 break;
9014 pfn = cc->migrate_pfn;
9015 tries = 0;
9016 } else if (++tries == 5) {
9017 ret = -EBUSY;
9018 break;
9019 }
9020
9021 nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,
9022 &cc->migratepages);
9023 cc->nr_migratepages -= nr_reclaimed;
9024
9025 ret = migrate_pages(&cc->migratepages, alloc_migration_target,
9026 NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);
9027
9028 /*
9029 * On -ENOMEM, migrate_pages() bails out right away. It is pointless
9030 * to retry again over this error, so do the same here.
9031 */
9032 if (ret == -ENOMEM)
9033 break;
9034 }
9035
9036 lru_cache_enable();
9037 if (ret < 0) {
9038 if (ret == -EBUSY)
9039 alloc_contig_dump_pages(&cc->migratepages);
9040 putback_movable_pages(&cc->migratepages);
9041 return ret;
9042 }
9043 return 0;
9044 }
9045
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-11 23:06 ` kernel test robot
0 siblings, 0 replies; 42+ messages in thread
From: kernel test robot @ 2022-02-11 23:06 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 5018 bytes --]
Hi Zi,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on hnaz-mm/master]
[also build test WARNING on powerpc/next linux/master linus/master v5.17-rc3 next-20220211]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
base: https://github.com/hnaz/linux-mm master
config: hexagon-randconfig-r045-20220211 (https://download.01.org/0day-ci/archive/20220212/202202120607.4dI5nqMH-lkp(a)intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project f6685f774697c85d6a352dcea013f46a99f9fe31)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/5aacb9dfc8abb1a0610b70226606408a96d0e997
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
git checkout 5aacb9dfc8abb1a0610b70226606408a96d0e997
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
mm/page_alloc.c:3869:15: warning: no previous prototype for function 'should_fail_alloc_page' [-Wmissing-prototypes]
noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
^
mm/page_alloc.c:3869:10: note: declare 'static' if the function is not intended to be used outside of this translation unit
noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
^
static
>> mm/page_alloc.c:8988:5: warning: no previous prototype for function '__alloc_contig_migrate_range' [-Wmissing-prototypes]
int __alloc_contig_migrate_range(struct compact_control *cc,
^
mm/page_alloc.c:8988:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int __alloc_contig_migrate_range(struct compact_control *cc,
^
static
2 warnings generated.
Kconfig warnings: (for reference only)
WARNING: unmet direct dependencies detected for OMAP_GPMC
Depends on MEMORY && OF_ADDRESS
Selected by
- MTD_NAND_OMAP2 && MTD && MTD_RAW_NAND && (ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST && HAS_IOMEM
vim +/__alloc_contig_migrate_range +8988 mm/page_alloc.c
8986
8987 /* [start, end) must belong to a single zone. */
> 8988 int __alloc_contig_migrate_range(struct compact_control *cc,
8989 unsigned long start, unsigned long end)
8990 {
8991 /* This function is based on compact_zone() from compaction.c. */
8992 unsigned int nr_reclaimed;
8993 unsigned long pfn = start;
8994 unsigned int tries = 0;
8995 int ret = 0;
8996 struct migration_target_control mtc = {
8997 .nid = zone_to_nid(cc->zone),
8998 .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
8999 };
9000
9001 lru_cache_disable();
9002
9003 while (pfn < end || !list_empty(&cc->migratepages)) {
9004 if (fatal_signal_pending(current)) {
9005 ret = -EINTR;
9006 break;
9007 }
9008
9009 if (list_empty(&cc->migratepages)) {
9010 cc->nr_migratepages = 0;
9011 ret = isolate_migratepages_range(cc, pfn, end);
9012 if (ret && ret != -EAGAIN)
9013 break;
9014 pfn = cc->migrate_pfn;
9015 tries = 0;
9016 } else if (++tries == 5) {
9017 ret = -EBUSY;
9018 break;
9019 }
9020
9021 nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,
9022 &cc->migratepages);
9023 cc->nr_migratepages -= nr_reclaimed;
9024
9025 ret = migrate_pages(&cc->migratepages, alloc_migration_target,
9026 NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);
9027
9028 /*
9029 * On -ENOMEM, migrate_pages() bails out right away. It is pointless
9030 * to retry again over this error, so do the same here.
9031 */
9032 if (ret == -ENOMEM)
9033 break;
9034 }
9035
9036 lru_cache_enable();
9037 if (ret < 0) {
9038 if (ret == -EBUSY)
9039 alloc_contig_dump_pages(&cc->migratepages);
9040 putback_movable_pages(&cc->migratepages);
9041 return ret;
9042 }
9043 return 0;
9044 }
9045
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
2022-02-11 16:41 ` Zi Yan
` (2 preceding siblings ...)
(?)
@ 2022-02-11 23:06 ` kernel test robot
-1 siblings, 0 replies; 42+ messages in thread
From: kernel test robot @ 2022-02-11 23:06 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 4049 bytes --]
Hi Zi,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on hnaz-mm/master]
[also build test WARNING on powerpc/next linux/master linus/master v5.17-rc3 next-20220211]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
base: https://github.com/hnaz/linux-mm master
config: x86_64-randconfig-a011 (https://download.01.org/0day-ci/archive/20220212/202202120616.vgw2VpUz-lkp(a)intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/5aacb9dfc8abb1a0610b70226606408a96d0e997
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
git checkout 5aacb9dfc8abb1a0610b70226606408a96d0e997
# save the config file to linux build tree
mkdir build_dir
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
mm/page_alloc.c:3869:15: warning: no previous prototype for 'should_fail_alloc_page' [-Wmissing-prototypes]
3869 | noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
| ^~~~~~~~~~~~~~~~~~~~~~
>> mm/page_alloc.c:8988:5: warning: no previous prototype for '__alloc_contig_migrate_range' [-Wmissing-prototypes]
8988 | int __alloc_contig_migrate_range(struct compact_control *cc,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
vim +/__alloc_contig_migrate_range +8988 mm/page_alloc.c
8986
8987 /* [start, end) must belong to a single zone. */
> 8988 int __alloc_contig_migrate_range(struct compact_control *cc,
8989 unsigned long start, unsigned long end)
8990 {
8991 /* This function is based on compact_zone() from compaction.c. */
8992 unsigned int nr_reclaimed;
8993 unsigned long pfn = start;
8994 unsigned int tries = 0;
8995 int ret = 0;
8996 struct migration_target_control mtc = {
8997 .nid = zone_to_nid(cc->zone),
8998 .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
8999 };
9000
9001 lru_cache_disable();
9002
9003 while (pfn < end || !list_empty(&cc->migratepages)) {
9004 if (fatal_signal_pending(current)) {
9005 ret = -EINTR;
9006 break;
9007 }
9008
9009 if (list_empty(&cc->migratepages)) {
9010 cc->nr_migratepages = 0;
9011 ret = isolate_migratepages_range(cc, pfn, end);
9012 if (ret && ret != -EAGAIN)
9013 break;
9014 pfn = cc->migrate_pfn;
9015 tries = 0;
9016 } else if (++tries == 5) {
9017 ret = -EBUSY;
9018 break;
9019 }
9020
9021 nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,
9022 &cc->migratepages);
9023 cc->nr_migratepages -= nr_reclaimed;
9024
9025 ret = migrate_pages(&cc->migratepages, alloc_migration_target,
9026 NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);
9027
9028 /*
9029 * On -ENOMEM, migrate_pages() bails out right away. It is pointless
9030 * to retry again over this error, so do the same here.
9031 */
9032 if (ret == -ENOMEM)
9033 break;
9034 }
9035
9036 lru_cache_enable();
9037 if (ret < 0) {
9038 if (ret == -EBUSY)
9039 alloc_contig_dump_pages(&cc->migratepages);
9040 putback_movable_pages(&cc->migratepages);
9041 return ret;
9042 }
9043 return 0;
9044 }
9045
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
2022-02-11 16:41 ` Zi Yan
@ 2022-02-11 23:17 ` kernel test robot
-1 siblings, 0 replies; 42+ messages in thread
From: kernel test robot @ 2022-02-11 23:17 UTC (permalink / raw)
To: Zi Yan; +Cc: llvm, kbuild-all
Hi Zi,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on hnaz-mm/master]
[also build test ERROR on powerpc/next linux/master linus/master v5.17-rc3 next-20220211]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
base: https://github.com/hnaz/linux-mm master
config: x86_64-randconfig-a012 (https://download.01.org/0day-ci/archive/20220212/202202120720.MfxEFq7T-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project f6685f774697c85d6a352dcea013f46a99f9fe31)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/5aacb9dfc8abb1a0610b70226606408a96d0e997
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
git checkout 5aacb9dfc8abb1a0610b70226606408a96d0e997
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> mm/page_isolation.c:332:8: error: implicit declaration of function 'isolate_single_pageblock' [-Werror,-Wimplicit-function-declaration]
ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
^
1 error generated.
vim +/isolate_single_pageblock +332 mm/page_isolation.c
274
275 /**
276 * start_isolate_page_range() - make page-allocation-type of range of pages to
277 * be MIGRATE_ISOLATE.
278 * @start_pfn: The lower PFN of the range to be isolated.
279 * @end_pfn: The upper PFN of the range to be isolated.
280 * @migratetype: Migrate type to set in error recovery.
281 * @flags: The following flags are allowed (they can be combined in
282 * a bit mask)
283 * MEMORY_OFFLINE - isolate to offline (!allocate) memory
284 * e.g., skip over PageHWPoison() pages
285 * and PageOffline() pages.
286 * REPORT_FAILURE - report details about the failure to
287 * isolate the range
288 * @gfp_flags: GFP flags used for migrating pages that sit across the
289 * range boundaries.
290 *
291 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
292 * the range will never be allocated. Any free pages and pages freed in the
293 * future will not be allocated again. If specified range includes migrate types
294 * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
295 * pages in the range finally, the caller have to free all pages in the range.
296 * test_page_isolated() can be used for test it.
297 *
298 * The function first tries to isolate the pageblocks at the beginning and end
299 * of the range, since there might be pages across the range boundaries.
300 * Afterwards, it isolates the rest of the range.
301 *
302 * There is no high level synchronization mechanism that prevents two threads
303 * from trying to isolate overlapping ranges. If this happens, one thread
304 * will notice pageblocks in the overlapping range already set to isolate.
305 * This happens in set_migratetype_isolate, and set_migratetype_isolate
306 * returns an error. We then clean up by restoring the migration type on
307 * pageblocks we may have modified and return -EBUSY to caller. This
308 * prevents two threads from simultaneously working on overlapping ranges.
309 *
310 * Please note that there is no strong synchronization with the page allocator
311 * either. Pages might be freed while their page blocks are marked ISOLATED.
312 * A call to drain_all_pages() after isolation can flush most of them. However
313 * in some cases pages might still end up on pcp lists and that would allow
314 * for their allocation even when they are in fact isolated already. Depending
315 * on how strong of a guarantee the caller needs, zone_pcp_disable/enable()
316 * might be used to flush and disable pcplist before isolation and enable after
317 * unisolation.
318 *
319 * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
320 */
321 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
322 unsigned migratetype, int flags, gfp_t gfp_flags)
323 {
324 unsigned long pfn;
325 struct page *page;
326 /* isolation is done at page block granularity */
327 unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
328 unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
329 int ret;
330
331 /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
> 332 ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
333 if (ret)
334 return ret;
335
336 /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
337 ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
338 if (ret) {
339 unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
340 return ret;
341 }
342
343 /* skip isolated pageblocks at the beginning and end */
344 for (pfn = isolate_start + pageblock_nr_pages;
345 pfn < isolate_end - pageblock_nr_pages;
346 pfn += pageblock_nr_pages) {
347 page = __first_valid_page(pfn, pageblock_nr_pages);
348 if (page && set_migratetype_isolate(page, migratetype, flags,
349 start_pfn, end_pfn)) {
350 undo_isolate_page_range(isolate_start, pfn, migratetype);
351 unset_migratetype_isolate(
352 pfn_to_page(isolate_end - pageblock_nr_pages),
353 migratetype);
354 return -EBUSY;
355 }
356 }
357 return 0;
358 }
359
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-11 23:17 ` kernel test robot
0 siblings, 0 replies; 42+ messages in thread
From: kernel test robot @ 2022-02-11 23:17 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 6600 bytes --]
Hi Zi,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on hnaz-mm/master]
[also build test ERROR on powerpc/next linux/master linus/master v5.17-rc3 next-20220211]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
base: https://github.com/hnaz/linux-mm master
config: x86_64-randconfig-a012 (https://download.01.org/0day-ci/archive/20220212/202202120720.MfxEFq7T-lkp(a)intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project f6685f774697c85d6a352dcea013f46a99f9fe31)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/5aacb9dfc8abb1a0610b70226606408a96d0e997
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zi-Yan/Use-pageblock_order-for-cma-and-alloc_contig_range-alignment/20220212-004358
git checkout 5aacb9dfc8abb1a0610b70226606408a96d0e997
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> mm/page_isolation.c:332:8: error: implicit declaration of function 'isolate_single_pageblock' [-Werror,-Wimplicit-function-declaration]
ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
^
1 error generated.
vim +/isolate_single_pageblock +332 mm/page_isolation.c
274
275 /**
276 * start_isolate_page_range() - make page-allocation-type of range of pages to
277 * be MIGRATE_ISOLATE.
278 * @start_pfn: The lower PFN of the range to be isolated.
279 * @end_pfn: The upper PFN of the range to be isolated.
280 * @migratetype: Migrate type to set in error recovery.
281 * @flags: The following flags are allowed (they can be combined in
282 * a bit mask)
283 * MEMORY_OFFLINE - isolate to offline (!allocate) memory
284 * e.g., skip over PageHWPoison() pages
285 * and PageOffline() pages.
286 * REPORT_FAILURE - report details about the failure to
287 * isolate the range
288 * @gfp_flags: GFP flags used for migrating pages that sit across the
289 * range boundaries.
290 *
291 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
292 * the range will never be allocated. Any free pages and pages freed in the
293 * future will not be allocated again. If specified range includes migrate types
294 * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
295 * pages in the range finally, the caller have to free all pages in the range.
296 * test_page_isolated() can be used for test it.
297 *
298 * The function first tries to isolate the pageblocks at the beginning and end
299 * of the range, since there might be pages across the range boundaries.
300 * Afterwards, it isolates the rest of the range.
301 *
302 * There is no high level synchronization mechanism that prevents two threads
303 * from trying to isolate overlapping ranges. If this happens, one thread
304 * will notice pageblocks in the overlapping range already set to isolate.
305 * This happens in set_migratetype_isolate, and set_migratetype_isolate
306 * returns an error. We then clean up by restoring the migration type on
307 * pageblocks we may have modified and return -EBUSY to caller. This
308 * prevents two threads from simultaneously working on overlapping ranges.
309 *
310 * Please note that there is no strong synchronization with the page allocator
311 * either. Pages might be freed while their page blocks are marked ISOLATED.
312 * A call to drain_all_pages() after isolation can flush most of them. However
313 * in some cases pages might still end up on pcp lists and that would allow
314 * for their allocation even when they are in fact isolated already. Depending
315 * on how strong of a guarantee the caller needs, zone_pcp_disable/enable()
316 * might be used to flush and disable pcplist before isolation and enable after
317 * unisolation.
318 *
319 * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
320 */
321 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
322 unsigned migratetype, int flags, gfp_t gfp_flags)
323 {
324 unsigned long pfn;
325 struct page *page;
326 /* isolation is done at page block granularity */
327 unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
328 unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
329 int ret;
330
331 /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
> 332 ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
333 if (ret)
334 return ret;
335
336 /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
337 ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
338 if (ret) {
339 unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
340 return ret;
341 }
342
343 /* skip isolated pageblocks at the beginning and end */
344 for (pfn = isolate_start + pageblock_nr_pages;
345 pfn < isolate_end - pageblock_nr_pages;
346 pfn += pageblock_nr_pages) {
347 page = __first_valid_page(pfn, pageblock_nr_pages);
348 if (page && set_migratetype_isolate(page, migratetype, flags,
349 start_pfn, end_pfn)) {
350 undo_isolate_page_range(isolate_start, pfn, migratetype);
351 unset_migratetype_isolate(
352 pfn_to_page(isolate_end - pageblock_nr_pages),
353 migratetype);
354 return -EBUSY;
355 }
356 }
357 return 0;
358 }
359
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
2022-02-11 16:41 ` Zi Yan
(?)
(?)
@ 2022-02-14 7:26 ` Christoph Hellwig
-1 siblings, 0 replies; 42+ messages in thread
From: Christoph Hellwig @ 2022-02-14 7:26 UTC (permalink / raw)
To: Zi Yan
Cc: Mel Gorman, David Hildenbrand, Michael Ellerman, linuxppc-dev,
linux-kernel, virtualization, linux-mm, iommu, Mike Rapoport,
Eric Ren, Oscar Salvador, Robin Murphy, Christoph Hellwig,
Vlastimil Babka
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
Please avoid the completely unreadably long line. i.e.
int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
int isolate_before_boundary);
Same in various other spots.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 7:26 ` Christoph Hellwig
0 siblings, 0 replies; 42+ messages in thread
From: Christoph Hellwig @ 2022-02-14 7:26 UTC (permalink / raw)
To: Zi Yan
Cc: David Hildenbrand, linux-mm, linux-kernel, Michael Ellerman,
Christoph Hellwig, Marek Szyprowski, Robin Murphy, linuxppc-dev,
virtualization, iommu, Vlastimil Babka, Mel Gorman, Eric Ren,
Mike Rapoport, Oscar Salvador
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
Please avoid the completely unreadably long line. i.e.
int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
int isolate_before_boundary);
Same in various other spots.
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 7:26 ` Christoph Hellwig
0 siblings, 0 replies; 42+ messages in thread
From: Christoph Hellwig @ 2022-02-14 7:26 UTC (permalink / raw)
To: Zi Yan
Cc: Mel Gorman, Michael Ellerman, linuxppc-dev, linux-kernel,
virtualization, linux-mm, iommu, Mike Rapoport, Eric Ren,
Oscar Salvador, Robin Murphy, Christoph Hellwig, Vlastimil Babka,
Marek Szyprowski
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
Please avoid the completely unreadably long line. i.e.
int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
int isolate_before_boundary);
Same in various other spots.
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 7:26 ` Christoph Hellwig
0 siblings, 0 replies; 42+ messages in thread
From: Christoph Hellwig @ 2022-02-14 7:26 UTC (permalink / raw)
To: Zi Yan
Cc: Mel Gorman, David Hildenbrand, linuxppc-dev, linux-kernel,
virtualization, linux-mm, iommu, Mike Rapoport, Eric Ren,
Oscar Salvador, Robin Murphy, Christoph Hellwig, Vlastimil Babka,
Marek Szyprowski
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
Please avoid the completely unreadably long line. i.e.
int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
int isolate_before_boundary);
Same in various other spots.
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
2022-02-14 7:26 ` Christoph Hellwig
(?)
@ 2022-02-14 15:46 ` Zi Yan
-1 siblings, 0 replies; 42+ messages in thread
From: Zi Yan via iommu @ 2022-02-14 15:46 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Mel Gorman, David Hildenbrand, Michael Ellerman, linuxppc-dev,
linux-kernel, virtualization, linux-mm, iommu, Mike Rapoport,
Eric Ren, Oscar Salvador, Robin Murphy, Vlastimil Babka
[-- Attachment #1.1: Type: text/plain, Size: 586 bytes --]
On 14 Feb 2022, at 2:26, Christoph Hellwig wrote:
>> +int
>> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
>
> Please avoid the completely unreadably long line. i.e.
>
> int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
> int isolate_before_boundary);
>
> Same in various other spots.
OK. Thanks for pointing it out. checkpatch.pl did not report any
warning about this. It seems that the column limit has been relaxed
to 100. Anyway, I will make it shorter.
--
Best Regards,
Yan, Zi
[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]
[-- Attachment #2: Type: text/plain, Size: 156 bytes --]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 15:46 ` Zi Yan
0 siblings, 0 replies; 42+ messages in thread
From: Zi Yan @ 2022-02-14 15:46 UTC (permalink / raw)
To: Christoph Hellwig
Cc: David Hildenbrand, linux-mm, linux-kernel, Michael Ellerman,
Marek Szyprowski, Robin Murphy, linuxppc-dev, virtualization,
iommu, Vlastimil Babka, Mel Gorman, Eric Ren, Mike Rapoport,
Oscar Salvador
[-- Attachment #1: Type: text/plain, Size: 586 bytes --]
On 14 Feb 2022, at 2:26, Christoph Hellwig wrote:
>> +int
>> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
>
> Please avoid the completely unreadably long line. i.e.
>
> int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
> int isolate_before_boundary);
>
> Same in various other spots.
OK. Thanks for pointing it out. checkpatch.pl did not report any
warning about this. It seems that the column limit has been relaxed
to 100. Anyway, I will make it shorter.
--
Best Regards,
Yan, Zi
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 15:46 ` Zi Yan
0 siblings, 0 replies; 42+ messages in thread
From: Zi Yan @ 2022-02-14 15:46 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Mel Gorman, David Hildenbrand, linuxppc-dev, linux-kernel,
virtualization, linux-mm, iommu, Mike Rapoport, Eric Ren,
Oscar Salvador, Robin Murphy, Vlastimil Babka, Marek Szyprowski
[-- Attachment #1: Type: text/plain, Size: 586 bytes --]
On 14 Feb 2022, at 2:26, Christoph Hellwig wrote:
>> +int
>> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
>
> Please avoid the completely unreadably long line. i.e.
>
> int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
> int isolate_before_boundary);
>
> Same in various other spots.
OK. Thanks for pointing it out. checkpatch.pl did not report any
warning about this. It seems that the column limit has been relaxed
to 100. Anyway, I will make it shorter.
--
Best Regards,
Yan, Zi
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
2022-02-11 16:41 ` Zi Yan
(?)
@ 2022-02-14 7:59 ` Christophe Leroy
-1 siblings, 0 replies; 42+ messages in thread
From: Christophe Leroy @ 2022-02-14 7:59 UTC (permalink / raw)
To: Zi Yan, David Hildenbrand, linux-mm@kvack.org
Cc: Mel Gorman, linuxppc-dev@lists.ozlabs.org,
linux-kernel@vger.kernel.org,
virtualization@lists.linux-foundation.org,
iommu@lists.linux-foundation.org, Vlastimil Babka, Eric Ren,
Robin Murphy, Christoph Hellwig, Mike Rapoport, Oscar Salvador
Le 11/02/2022 à 17:41, Zi Yan a écrit :
> From: Zi Yan <ziy@nvidia.com>
>
> alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
> pageblocks with different migratetypes. It might unnecessarily convert
> extra pageblocks at the beginning and at the end of the range. Change
> alloc_contig_range() to work at pageblock granularity.
>
> Special handling is needed for free pages and in-use pages across the
> boundaries of the range specified alloc_contig_range(). Because these
> partially isolated pages causes free page accounting issues. The free
> pages will be split and freed into separate migratetype lists; the
> in-use pages will be migrated then the freed pages will be handled.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> include/linux/page-isolation.h | 2 +-
> mm/internal.h | 3 +
> mm/memory_hotplug.c | 3 +-
> mm/page_alloc.c | 235 +++++++++++++++++++++++++--------
> mm/page_isolation.c | 33 ++++-
> 5 files changed, 211 insertions(+), 65 deletions(-)
>
> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> index 4ef7be6def83..78ff940cc169 100644
> --- a/include/linux/page-isolation.h
> +++ b/include/linux/page-isolation.h
> @@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
> */
> int
> start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> - unsigned migratetype, int flags);
> + unsigned migratetype, int flags, gfp_t gfp_flags);
>
> /*
> * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
> diff --git a/mm/internal.h b/mm/internal.h
> index 0d240e876831..509cbdc25992 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
> int
> isolate_migratepages_range(struct compact_control *cc,
> unsigned long low_pfn, unsigned long end_pfn);
> +
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
> #endif
> int find_suitable_fallback(struct free_area *area, unsigned int order,
> int migratetype, bool only_stealable, bool *can_steal);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index ce68098832aa..82406d2f3e46 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
> /* set above range as isolated */
> ret = start_isolate_page_range(start_pfn, end_pfn,
> MIGRATE_MOVABLE,
> - MEMORY_OFFLINE | REPORT_FAILURE);
> + MEMORY_OFFLINE | REPORT_FAILURE,
> + GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
> if (ret) {
> reason = "failure to isolate range";
> goto failed_removal_pcplists_disabled;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 62ef78f3d771..7a4fa21aea5c 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
> #endif
>
> /* [start, end) must belong to a single zone. */
> -static int __alloc_contig_migrate_range(struct compact_control *cc,
> +int __alloc_contig_migrate_range(struct compact_control *cc,
> unsigned long start, unsigned long end)
> {
> /* This function is based on compact_zone() from compaction.c. */
> @@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
> return 0;
> }
>
> +/**
> + * split_free_page() -- split a free page at split_pfn_offset
> + * @free_page: the original free page
> + * @order: the order of the page
> + * @split_pfn_offset: split offset within the page
> + *
> + * It is used when the free page crosses two pageblocks with different migratetypes
> + * at split_pfn_offset within the page. The split free page will be put into
> + * separate migratetype lists afterwards. Otherwise, the function achieves
> + * nothing.
> + */
> +static inline void split_free_page(struct page *free_page,
> + int order, unsigned long split_pfn_offset)
> +{
> + struct zone *zone = page_zone(free_page);
> + unsigned long free_page_pfn = page_to_pfn(free_page);
> + unsigned long pfn;
> + unsigned long flags;
> + int free_page_order;
> +
> + spin_lock_irqsave(&zone->lock, flags);
> + del_page_from_free_list(free_page, zone, order);
> + for (pfn = free_page_pfn;
> + pfn < free_page_pfn + (1UL << order);) {
> + int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
> +
> + free_page_order = order_base_2(split_pfn_offset);
> + __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
> + mt, FPI_NONE);
> + pfn += 1UL << free_page_order;
> + split_pfn_offset -= (1UL << free_page_order);
> + /* we have done the first part, now switch to second part */
> + if (split_pfn_offset == 0)
> + split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
> + }
> + spin_unlock_irqrestore(&zone->lock, flags);
> +}
> +
> +/**
> + * isolate_single_pageblock() -- tries to isolate a pageblock that might be
> + * within a free or in-use page.
> + * @boundary_pfn: pageblock-aligned pfn that a page might cross
> + * @gfp_flags: GFP flags used for migrating pages
> + * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
> + * the boundary_pfn
> + *
> + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
> + * pageblock. When not all pageblocks within a page are isolated at the same
> + * time, free page accounting can go wrong. For example, in the case of
> + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
> + * [ MAX_ORDER-1 ]
> + * [ pageblock0 | pageblock1 ]
> + * When either pageblock is isolated, if it is a free page, the page is not
> + * split into separate migratetype lists, which is supposed to; if it is an
> + * in-use page and freed later, __free_one_page() does not split the free page
> + * either. The function handles this by splitting the free page or migrating
> + * the in-use page then splitting the free page.
> + */
> +int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
> + int isolate_before_boundary)
Do you need such big param name ?
See
https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#naming
isolate_before_boundary could probably be shorter.
And should be a bool by the way.
> +{
> + unsigned char saved_mt;
> + /*
> + * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
> + * avoid isolate pageblocks belonging to a bigger free or in-use page
> + */
> + unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
> + unsigned long isolated_pageblock_pfn;
Variable name too long.
> + unsigned long pfn;
> +
> + VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
> +
> + if (isolate_before_boundary)
> + isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
> + else
> + isolated_pageblock_pfn = boundary_pfn;
> +
> + saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
> + set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);
> +
> + for (pfn = start_pfn; pfn < boundary_pfn;) {
This loop is a bit long a deep. Isn't there a way to put what's in "if
(PageHuge(page) || PageTransCompound(page))" into a sub-function ?
See
https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#functions
> + struct page *page = pfn_to_page(pfn);
> +
> + /*
> + * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
> + * aligned, if there is any free pages in [start_pfn, boundary_pfn),
> + * its head page will always be in the range.
> + */
> + if (PageBuddy(page)) {
> + int order = buddy_order(page);
> +
> + if (pfn + (1UL << order) > boundary_pfn)
> + split_free_page(page, order, boundary_pfn - pfn);
> + pfn += (1UL << order);
> + continue;
> + }
> + /*
> + * migrate compound pages then let the free page handling code
> + * above do the rest
> + */
> + if (PageHuge(page) || PageTransCompound(page)) {
> + unsigned long nr_pages = compound_nr(page);
> + int order = compound_order(page);
> + struct page *head = compound_head(page);
> + unsigned long head_pfn = page_to_pfn(head);
> +
> + if (head_pfn + nr_pages >= boundary_pfn) {
> + int ret;
> + struct compact_control cc = {
> + .nr_migratepages = 0,
> + .order = -1,
> + .zone = page_zone(pfn_to_page(head_pfn)),
> + .mode = MIGRATE_SYNC,
> + .ignore_skip_hint = true,
> + .no_set_skip_hint = true,
> + .gfp_mask = current_gfp_context(gfp_flags),
> + .alloc_contig = true,
> + };
> +
> + INIT_LIST_HEAD(&cc.migratepages);
> +
> + ret = __alloc_contig_migrate_range(&cc, head_pfn,
> + head_pfn + nr_pages);
> +
> + if (ret) {
> + /* restore the original migratetype */
> + set_pageblock_migratetype(
> + pfn_to_page(isolated_pageblock_pfn),
> + saved_mt);
> + return -EBUSY;
> + }
> + /*
> + * reset pfn, let the free page handling code
> + * above split the free page to the right
> + * migratetype list.
> + *
> + * head_pfn is not used here as a hugetlb page
> + * order can be bigger than MAX_ORDER-1, but
> + * after it is freed, the free page order is not.
> + * Use pfn within the range to find the head of
> + * the free page and reset order to 0 if a hugetlb
> + * page with >MAX_ORDER-1 order is encountered.
> + */
> + if (order > MAX_ORDER-1)
> + order = 0;
> + while (!PageBuddy(pfn_to_page(pfn))) {
> + order++;
> + pfn &= ~0UL << order;
> + }
> + continue;
> + }
> + pfn += nr_pages;
> + continue;
> + }
> +
> + pfn++;
> + }
> + return 0;
> +}
> +
> +
> /**
> * alloc_contig_range() -- tries to allocate given range of pages
> * @start: start PFN to allocate
> @@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
> int alloc_contig_range(unsigned long start, unsigned long end,
> unsigned migratetype, gfp_t gfp_mask)
> {
> - unsigned long outer_start, outer_end;
> - unsigned int order;
> + unsigned long outer_end;
> + unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
> + unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
> int ret = 0;
>
> struct compact_control cc = {
> @@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> * What we do here is we mark all pageblocks in range as
> * MIGRATE_ISOLATE. Because pageblock and max order pages may
> * have different sizes, and due to the way page allocator
> - * work, we align the range to biggest of the two pages so
> - * that page allocator won't try to merge buddies from
> - * different pageblocks and change MIGRATE_ISOLATE to some
> - * other migration type.
> + * work, start_isolate_page_range() has special handlings for this.
> *
> * Once the pageblocks are marked as MIGRATE_ISOLATE, we
> * migrate the pages from an unaligned range (ie. pages that
> - * we are interested in). This will put all the pages in
> + * we are interested in). This will put all the pages in
> * range back to page allocator as MIGRATE_ISOLATE.
> *
> * When this is done, we take the pages in range from page
> @@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> * put back to page allocator so that buddy can use them.
> */
>
> - ret = start_isolate_page_range(start, end, migratetype, 0);
> + ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
> if (ret)
> - return ret;
> + goto done;
>
> drain_all_pages(cc.zone);
>
> @@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> goto done;
> ret = 0;
>
> - /*
> - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
> - * aligned blocks that are marked as MIGRATE_ISOLATE. What's
> - * more, all pages in [start, end) are free in page allocator.
> - * What we are going to do is to allocate all pages from
> - * [start, end) (that is remove them from page allocator).
> - *
> - * The only problem is that pages at the beginning and at the
> - * end of interesting range may be not aligned with pages that
> - * page allocator holds, ie. they can be part of higher order
> - * pages. Because of this, we reserve the bigger range and
> - * once this is done free the pages we are not interested in.
> - *
> - * We don't have to hold zone->lock here because the pages are
> - * isolated thus they won't get removed from buddy.
> - */
> -
> - order = 0;
> - outer_start = start;
> - while (!PageBuddy(pfn_to_page(outer_start))) {
> - if (++order >= MAX_ORDER) {
> - outer_start = start;
> - break;
> - }
> - outer_start &= ~0UL << order;
> - }
> -
> - if (outer_start != start) {
> - order = buddy_order(pfn_to_page(outer_start));
> -
> - /*
> - * outer_start page could be small order buddy page and
> - * it doesn't include start page. Adjust outer_start
> - * in this case to report failed page properly
> - * on tracepoint in test_pages_isolated()
> - */
> - if (outer_start + (1UL << order) <= start)
> - outer_start = start;
> - }
> -
> /* Make sure the range is really isolated. */
> - if (test_pages_isolated(outer_start, end, 0)) {
> + if (test_pages_isolated(alloc_start, alloc_end, 0)) {
> ret = -EBUSY;
> goto done;
> }
>
> /* Grab isolated pages from freelists. */
> - outer_end = isolate_freepages_range(&cc, outer_start, end);
> + outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
> if (!outer_end) {
> ret = -EBUSY;
> goto done;
> }
>
> /* Free head and tail (if any) */
> - if (start != outer_start)
> - free_contig_range(outer_start, start - outer_start);
> - if (end != outer_end)
> - free_contig_range(end, outer_end - end);
> + if (start != alloc_start)
> + free_contig_range(alloc_start, start - alloc_start);
> + if (end != alloc_end)
> + free_contig_range(end, alloc_end - end);
>
> done:
> - undo_isolate_page_range(pfn_max_align_down(start),
> - pfn_max_align_up(end), migratetype);
> + undo_isolate_page_range(alloc_start,
> + alloc_end, migratetype);
> return ret;
> }
> EXPORT_SYMBOL(alloc_contig_range);
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index 64d093ab83ec..0256d5e1032c 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * and PageOffline() pages.
> * REPORT_FAILURE - report details about the failure to
> * isolate the range
> + * @gfp_flags: GFP flags used for migrating pages that sit across the
> + * range boundaries.
> *
> * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
> * the range will never be allocated. Any free pages and pages freed in the
> @@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * pages in the range finally, the caller have to free all pages in the range.
> * test_page_isolated() can be used for test it.
> *
> + * The function first tries to isolate the pageblocks at the beginning and end
> + * of the range, since there might be pages across the range boundaries.
> + * Afterwards, it isolates the rest of the range.
> + *
> * There is no high level synchronization mechanism that prevents two threads
> * from trying to isolate overlapping ranges. If this happens, one thread
> * will notice pageblocks in the overlapping range already set to isolate.
> @@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
> */
> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> - unsigned migratetype, int flags)
> + unsigned migratetype, int flags, gfp_t gfp_flags)
> {
> unsigned long pfn;
> struct page *page;
> + /* isolation is done at page block granularity */
> + unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
> + unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
> + int ret;
>
> - unsigned long isolate_start = pfn_max_align_down(start_pfn);
> - unsigned long isolate_end = pfn_max_align_up(end_pfn);
> + /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
> + ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
> + if (ret)
> + return ret;
> +
> + /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
> + ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
> + if (ret) {
> + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
> + return ret;
> + }
>
> - for (pfn = isolate_start;
> - pfn < isolate_end;
> + /* skip isolated pageblocks at the beginning and end */
> + for (pfn = isolate_start + pageblock_nr_pages;
> + pfn < isolate_end - pageblock_nr_pages;
> pfn += pageblock_nr_pages) {
> page = __first_valid_page(pfn, pageblock_nr_pages);
> if (page && set_migratetype_isolate(page, migratetype, flags,
> start_pfn, end_pfn)) {
> undo_isolate_page_range(isolate_start, pfn, migratetype);
> + unset_migratetype_isolate(
> + pfn_to_page(isolate_end - pageblock_nr_pages),
> + migratetype);
> return -EBUSY;
> }
> }
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 7:59 ` Christophe Leroy
0 siblings, 0 replies; 42+ messages in thread
From: Christophe Leroy @ 2022-02-14 7:59 UTC (permalink / raw)
To: Zi Yan, David Hildenbrand, linux-mm@kvack.org
Cc: Mel Gorman, Oscar Salvador, Robin Murphy,
linux-kernel@vger.kernel.org, iommu@lists.linux-foundation.org,
Mike Rapoport, Eric Ren,
virtualization@lists.linux-foundation.org,
linuxppc-dev@lists.ozlabs.org, Christoph Hellwig, Vlastimil Babka,
Marek Szyprowski
Le 11/02/2022 à 17:41, Zi Yan a écrit :
> From: Zi Yan <ziy@nvidia.com>
>
> alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
> pageblocks with different migratetypes. It might unnecessarily convert
> extra pageblocks at the beginning and at the end of the range. Change
> alloc_contig_range() to work at pageblock granularity.
>
> Special handling is needed for free pages and in-use pages across the
> boundaries of the range specified alloc_contig_range(). Because these
> partially isolated pages causes free page accounting issues. The free
> pages will be split and freed into separate migratetype lists; the
> in-use pages will be migrated then the freed pages will be handled.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> include/linux/page-isolation.h | 2 +-
> mm/internal.h | 3 +
> mm/memory_hotplug.c | 3 +-
> mm/page_alloc.c | 235 +++++++++++++++++++++++++--------
> mm/page_isolation.c | 33 ++++-
> 5 files changed, 211 insertions(+), 65 deletions(-)
>
> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> index 4ef7be6def83..78ff940cc169 100644
> --- a/include/linux/page-isolation.h
> +++ b/include/linux/page-isolation.h
> @@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
> */
> int
> start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> - unsigned migratetype, int flags);
> + unsigned migratetype, int flags, gfp_t gfp_flags);
>
> /*
> * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
> diff --git a/mm/internal.h b/mm/internal.h
> index 0d240e876831..509cbdc25992 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
> int
> isolate_migratepages_range(struct compact_control *cc,
> unsigned long low_pfn, unsigned long end_pfn);
> +
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
> #endif
> int find_suitable_fallback(struct free_area *area, unsigned int order,
> int migratetype, bool only_stealable, bool *can_steal);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index ce68098832aa..82406d2f3e46 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
> /* set above range as isolated */
> ret = start_isolate_page_range(start_pfn, end_pfn,
> MIGRATE_MOVABLE,
> - MEMORY_OFFLINE | REPORT_FAILURE);
> + MEMORY_OFFLINE | REPORT_FAILURE,
> + GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
> if (ret) {
> reason = "failure to isolate range";
> goto failed_removal_pcplists_disabled;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 62ef78f3d771..7a4fa21aea5c 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
> #endif
>
> /* [start, end) must belong to a single zone. */
> -static int __alloc_contig_migrate_range(struct compact_control *cc,
> +int __alloc_contig_migrate_range(struct compact_control *cc,
> unsigned long start, unsigned long end)
> {
> /* This function is based on compact_zone() from compaction.c. */
> @@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
> return 0;
> }
>
> +/**
> + * split_free_page() -- split a free page at split_pfn_offset
> + * @free_page: the original free page
> + * @order: the order of the page
> + * @split_pfn_offset: split offset within the page
> + *
> + * It is used when the free page crosses two pageblocks with different migratetypes
> + * at split_pfn_offset within the page. The split free page will be put into
> + * separate migratetype lists afterwards. Otherwise, the function achieves
> + * nothing.
> + */
> +static inline void split_free_page(struct page *free_page,
> + int order, unsigned long split_pfn_offset)
> +{
> + struct zone *zone = page_zone(free_page);
> + unsigned long free_page_pfn = page_to_pfn(free_page);
> + unsigned long pfn;
> + unsigned long flags;
> + int free_page_order;
> +
> + spin_lock_irqsave(&zone->lock, flags);
> + del_page_from_free_list(free_page, zone, order);
> + for (pfn = free_page_pfn;
> + pfn < free_page_pfn + (1UL << order);) {
> + int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
> +
> + free_page_order = order_base_2(split_pfn_offset);
> + __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
> + mt, FPI_NONE);
> + pfn += 1UL << free_page_order;
> + split_pfn_offset -= (1UL << free_page_order);
> + /* we have done the first part, now switch to second part */
> + if (split_pfn_offset == 0)
> + split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
> + }
> + spin_unlock_irqrestore(&zone->lock, flags);
> +}
> +
> +/**
> + * isolate_single_pageblock() -- tries to isolate a pageblock that might be
> + * within a free or in-use page.
> + * @boundary_pfn: pageblock-aligned pfn that a page might cross
> + * @gfp_flags: GFP flags used for migrating pages
> + * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
> + * the boundary_pfn
> + *
> + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
> + * pageblock. When not all pageblocks within a page are isolated at the same
> + * time, free page accounting can go wrong. For example, in the case of
> + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
> + * [ MAX_ORDER-1 ]
> + * [ pageblock0 | pageblock1 ]
> + * When either pageblock is isolated, if it is a free page, the page is not
> + * split into separate migratetype lists, which is supposed to; if it is an
> + * in-use page and freed later, __free_one_page() does not split the free page
> + * either. The function handles this by splitting the free page or migrating
> + * the in-use page then splitting the free page.
> + */
> +int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
> + int isolate_before_boundary)
Do you need such big param name ?
See
https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#naming
isolate_before_boundary could probably be shorter.
And should be a bool by the way.
> +{
> + unsigned char saved_mt;
> + /*
> + * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
> + * avoid isolate pageblocks belonging to a bigger free or in-use page
> + */
> + unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
> + unsigned long isolated_pageblock_pfn;
Variable name too long.
> + unsigned long pfn;
> +
> + VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
> +
> + if (isolate_before_boundary)
> + isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
> + else
> + isolated_pageblock_pfn = boundary_pfn;
> +
> + saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
> + set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);
> +
> + for (pfn = start_pfn; pfn < boundary_pfn;) {
This loop is a bit long a deep. Isn't there a way to put what's in "if
(PageHuge(page) || PageTransCompound(page))" into a sub-function ?
See
https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#functions
> + struct page *page = pfn_to_page(pfn);
> +
> + /*
> + * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
> + * aligned, if there is any free pages in [start_pfn, boundary_pfn),
> + * its head page will always be in the range.
> + */
> + if (PageBuddy(page)) {
> + int order = buddy_order(page);
> +
> + if (pfn + (1UL << order) > boundary_pfn)
> + split_free_page(page, order, boundary_pfn - pfn);
> + pfn += (1UL << order);
> + continue;
> + }
> + /*
> + * migrate compound pages then let the free page handling code
> + * above do the rest
> + */
> + if (PageHuge(page) || PageTransCompound(page)) {
> + unsigned long nr_pages = compound_nr(page);
> + int order = compound_order(page);
> + struct page *head = compound_head(page);
> + unsigned long head_pfn = page_to_pfn(head);
> +
> + if (head_pfn + nr_pages >= boundary_pfn) {
> + int ret;
> + struct compact_control cc = {
> + .nr_migratepages = 0,
> + .order = -1,
> + .zone = page_zone(pfn_to_page(head_pfn)),
> + .mode = MIGRATE_SYNC,
> + .ignore_skip_hint = true,
> + .no_set_skip_hint = true,
> + .gfp_mask = current_gfp_context(gfp_flags),
> + .alloc_contig = true,
> + };
> +
> + INIT_LIST_HEAD(&cc.migratepages);
> +
> + ret = __alloc_contig_migrate_range(&cc, head_pfn,
> + head_pfn + nr_pages);
> +
> + if (ret) {
> + /* restore the original migratetype */
> + set_pageblock_migratetype(
> + pfn_to_page(isolated_pageblock_pfn),
> + saved_mt);
> + return -EBUSY;
> + }
> + /*
> + * reset pfn, let the free page handling code
> + * above split the free page to the right
> + * migratetype list.
> + *
> + * head_pfn is not used here as a hugetlb page
> + * order can be bigger than MAX_ORDER-1, but
> + * after it is freed, the free page order is not.
> + * Use pfn within the range to find the head of
> + * the free page and reset order to 0 if a hugetlb
> + * page with >MAX_ORDER-1 order is encountered.
> + */
> + if (order > MAX_ORDER-1)
> + order = 0;
> + while (!PageBuddy(pfn_to_page(pfn))) {
> + order++;
> + pfn &= ~0UL << order;
> + }
> + continue;
> + }
> + pfn += nr_pages;
> + continue;
> + }
> +
> + pfn++;
> + }
> + return 0;
> +}
> +
> +
> /**
> * alloc_contig_range() -- tries to allocate given range of pages
> * @start: start PFN to allocate
> @@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
> int alloc_contig_range(unsigned long start, unsigned long end,
> unsigned migratetype, gfp_t gfp_mask)
> {
> - unsigned long outer_start, outer_end;
> - unsigned int order;
> + unsigned long outer_end;
> + unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
> + unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
> int ret = 0;
>
> struct compact_control cc = {
> @@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> * What we do here is we mark all pageblocks in range as
> * MIGRATE_ISOLATE. Because pageblock and max order pages may
> * have different sizes, and due to the way page allocator
> - * work, we align the range to biggest of the two pages so
> - * that page allocator won't try to merge buddies from
> - * different pageblocks and change MIGRATE_ISOLATE to some
> - * other migration type.
> + * work, start_isolate_page_range() has special handlings for this.
> *
> * Once the pageblocks are marked as MIGRATE_ISOLATE, we
> * migrate the pages from an unaligned range (ie. pages that
> - * we are interested in). This will put all the pages in
> + * we are interested in). This will put all the pages in
> * range back to page allocator as MIGRATE_ISOLATE.
> *
> * When this is done, we take the pages in range from page
> @@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> * put back to page allocator so that buddy can use them.
> */
>
> - ret = start_isolate_page_range(start, end, migratetype, 0);
> + ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
> if (ret)
> - return ret;
> + goto done;
>
> drain_all_pages(cc.zone);
>
> @@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> goto done;
> ret = 0;
>
> - /*
> - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
> - * aligned blocks that are marked as MIGRATE_ISOLATE. What's
> - * more, all pages in [start, end) are free in page allocator.
> - * What we are going to do is to allocate all pages from
> - * [start, end) (that is remove them from page allocator).
> - *
> - * The only problem is that pages at the beginning and at the
> - * end of interesting range may be not aligned with pages that
> - * page allocator holds, ie. they can be part of higher order
> - * pages. Because of this, we reserve the bigger range and
> - * once this is done free the pages we are not interested in.
> - *
> - * We don't have to hold zone->lock here because the pages are
> - * isolated thus they won't get removed from buddy.
> - */
> -
> - order = 0;
> - outer_start = start;
> - while (!PageBuddy(pfn_to_page(outer_start))) {
> - if (++order >= MAX_ORDER) {
> - outer_start = start;
> - break;
> - }
> - outer_start &= ~0UL << order;
> - }
> -
> - if (outer_start != start) {
> - order = buddy_order(pfn_to_page(outer_start));
> -
> - /*
> - * outer_start page could be small order buddy page and
> - * it doesn't include start page. Adjust outer_start
> - * in this case to report failed page properly
> - * on tracepoint in test_pages_isolated()
> - */
> - if (outer_start + (1UL << order) <= start)
> - outer_start = start;
> - }
> -
> /* Make sure the range is really isolated. */
> - if (test_pages_isolated(outer_start, end, 0)) {
> + if (test_pages_isolated(alloc_start, alloc_end, 0)) {
> ret = -EBUSY;
> goto done;
> }
>
> /* Grab isolated pages from freelists. */
> - outer_end = isolate_freepages_range(&cc, outer_start, end);
> + outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
> if (!outer_end) {
> ret = -EBUSY;
> goto done;
> }
>
> /* Free head and tail (if any) */
> - if (start != outer_start)
> - free_contig_range(outer_start, start - outer_start);
> - if (end != outer_end)
> - free_contig_range(end, outer_end - end);
> + if (start != alloc_start)
> + free_contig_range(alloc_start, start - alloc_start);
> + if (end != alloc_end)
> + free_contig_range(end, alloc_end - end);
>
> done:
> - undo_isolate_page_range(pfn_max_align_down(start),
> - pfn_max_align_up(end), migratetype);
> + undo_isolate_page_range(alloc_start,
> + alloc_end, migratetype);
> return ret;
> }
> EXPORT_SYMBOL(alloc_contig_range);
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index 64d093ab83ec..0256d5e1032c 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * and PageOffline() pages.
> * REPORT_FAILURE - report details about the failure to
> * isolate the range
> + * @gfp_flags: GFP flags used for migrating pages that sit across the
> + * range boundaries.
> *
> * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
> * the range will never be allocated. Any free pages and pages freed in the
> @@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * pages in the range finally, the caller have to free all pages in the range.
> * test_page_isolated() can be used for test it.
> *
> + * The function first tries to isolate the pageblocks at the beginning and end
> + * of the range, since there might be pages across the range boundaries.
> + * Afterwards, it isolates the rest of the range.
> + *
> * There is no high level synchronization mechanism that prevents two threads
> * from trying to isolate overlapping ranges. If this happens, one thread
> * will notice pageblocks in the overlapping range already set to isolate.
> @@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
> */
> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> - unsigned migratetype, int flags)
> + unsigned migratetype, int flags, gfp_t gfp_flags)
> {
> unsigned long pfn;
> struct page *page;
> + /* isolation is done at page block granularity */
> + unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
> + unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
> + int ret;
>
> - unsigned long isolate_start = pfn_max_align_down(start_pfn);
> - unsigned long isolate_end = pfn_max_align_up(end_pfn);
> + /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
> + ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
> + if (ret)
> + return ret;
> +
> + /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
> + ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
> + if (ret) {
> + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
> + return ret;
> + }
>
> - for (pfn = isolate_start;
> - pfn < isolate_end;
> + /* skip isolated pageblocks at the beginning and end */
> + for (pfn = isolate_start + pageblock_nr_pages;
> + pfn < isolate_end - pageblock_nr_pages;
> pfn += pageblock_nr_pages) {
> page = __first_valid_page(pfn, pageblock_nr_pages);
> if (page && set_migratetype_isolate(page, migratetype, flags,
> start_pfn, end_pfn)) {
> undo_isolate_page_range(isolate_start, pfn, migratetype);
> + unset_migratetype_isolate(
> + pfn_to_page(isolate_end - pageblock_nr_pages),
> + migratetype);
> return -EBUSY;
> }
> }
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 7:59 ` Christophe Leroy
0 siblings, 0 replies; 42+ messages in thread
From: Christophe Leroy @ 2022-02-14 7:59 UTC (permalink / raw)
To: Zi Yan, David Hildenbrand, linux-mm@kvack.org
Cc: Mel Gorman, linuxppc-dev@lists.ozlabs.org,
linux-kernel@vger.kernel.org,
virtualization@lists.linux-foundation.org,
iommu@lists.linux-foundation.org, Vlastimil Babka, Eric Ren,
Marek Szyprowski, Robin Murphy, Christoph Hellwig, Mike Rapoport,
Oscar Salvador
Le 11/02/2022 à 17:41, Zi Yan a écrit :
> From: Zi Yan <ziy@nvidia.com>
>
> alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
> pageblocks with different migratetypes. It might unnecessarily convert
> extra pageblocks at the beginning and at the end of the range. Change
> alloc_contig_range() to work at pageblock granularity.
>
> Special handling is needed for free pages and in-use pages across the
> boundaries of the range specified alloc_contig_range(). Because these
> partially isolated pages causes free page accounting issues. The free
> pages will be split and freed into separate migratetype lists; the
> in-use pages will be migrated then the freed pages will be handled.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> include/linux/page-isolation.h | 2 +-
> mm/internal.h | 3 +
> mm/memory_hotplug.c | 3 +-
> mm/page_alloc.c | 235 +++++++++++++++++++++++++--------
> mm/page_isolation.c | 33 ++++-
> 5 files changed, 211 insertions(+), 65 deletions(-)
>
> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> index 4ef7be6def83..78ff940cc169 100644
> --- a/include/linux/page-isolation.h
> +++ b/include/linux/page-isolation.h
> @@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
> */
> int
> start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> - unsigned migratetype, int flags);
> + unsigned migratetype, int flags, gfp_t gfp_flags);
>
> /*
> * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
> diff --git a/mm/internal.h b/mm/internal.h
> index 0d240e876831..509cbdc25992 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
> int
> isolate_migratepages_range(struct compact_control *cc,
> unsigned long low_pfn, unsigned long end_pfn);
> +
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
> #endif
> int find_suitable_fallback(struct free_area *area, unsigned int order,
> int migratetype, bool only_stealable, bool *can_steal);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index ce68098832aa..82406d2f3e46 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
> /* set above range as isolated */
> ret = start_isolate_page_range(start_pfn, end_pfn,
> MIGRATE_MOVABLE,
> - MEMORY_OFFLINE | REPORT_FAILURE);
> + MEMORY_OFFLINE | REPORT_FAILURE,
> + GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
> if (ret) {
> reason = "failure to isolate range";
> goto failed_removal_pcplists_disabled;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 62ef78f3d771..7a4fa21aea5c 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
> #endif
>
> /* [start, end) must belong to a single zone. */
> -static int __alloc_contig_migrate_range(struct compact_control *cc,
> +int __alloc_contig_migrate_range(struct compact_control *cc,
> unsigned long start, unsigned long end)
> {
> /* This function is based on compact_zone() from compaction.c. */
> @@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
> return 0;
> }
>
> +/**
> + * split_free_page() -- split a free page at split_pfn_offset
> + * @free_page: the original free page
> + * @order: the order of the page
> + * @split_pfn_offset: split offset within the page
> + *
> + * It is used when the free page crosses two pageblocks with different migratetypes
> + * at split_pfn_offset within the page. The split free page will be put into
> + * separate migratetype lists afterwards. Otherwise, the function achieves
> + * nothing.
> + */
> +static inline void split_free_page(struct page *free_page,
> + int order, unsigned long split_pfn_offset)
> +{
> + struct zone *zone = page_zone(free_page);
> + unsigned long free_page_pfn = page_to_pfn(free_page);
> + unsigned long pfn;
> + unsigned long flags;
> + int free_page_order;
> +
> + spin_lock_irqsave(&zone->lock, flags);
> + del_page_from_free_list(free_page, zone, order);
> + for (pfn = free_page_pfn;
> + pfn < free_page_pfn + (1UL << order);) {
> + int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
> +
> + free_page_order = order_base_2(split_pfn_offset);
> + __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
> + mt, FPI_NONE);
> + pfn += 1UL << free_page_order;
> + split_pfn_offset -= (1UL << free_page_order);
> + /* we have done the first part, now switch to second part */
> + if (split_pfn_offset == 0)
> + split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
> + }
> + spin_unlock_irqrestore(&zone->lock, flags);
> +}
> +
> +/**
> + * isolate_single_pageblock() -- tries to isolate a pageblock that might be
> + * within a free or in-use page.
> + * @boundary_pfn: pageblock-aligned pfn that a page might cross
> + * @gfp_flags: GFP flags used for migrating pages
> + * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
> + * the boundary_pfn
> + *
> + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
> + * pageblock. When not all pageblocks within a page are isolated at the same
> + * time, free page accounting can go wrong. For example, in the case of
> + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
> + * [ MAX_ORDER-1 ]
> + * [ pageblock0 | pageblock1 ]
> + * When either pageblock is isolated, if it is a free page, the page is not
> + * split into separate migratetype lists, which is supposed to; if it is an
> + * in-use page and freed later, __free_one_page() does not split the free page
> + * either. The function handles this by splitting the free page or migrating
> + * the in-use page then splitting the free page.
> + */
> +int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
> + int isolate_before_boundary)
Do you need such big param name ?
See
https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#naming
isolate_before_boundary could probably be shorter.
And should be a bool by the way.
> +{
> + unsigned char saved_mt;
> + /*
> + * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
> + * avoid isolate pageblocks belonging to a bigger free or in-use page
> + */
> + unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
> + unsigned long isolated_pageblock_pfn;
Variable name too long.
> + unsigned long pfn;
> +
> + VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
> +
> + if (isolate_before_boundary)
> + isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
> + else
> + isolated_pageblock_pfn = boundary_pfn;
> +
> + saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
> + set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);
> +
> + for (pfn = start_pfn; pfn < boundary_pfn;) {
This loop is a bit long a deep. Isn't there a way to put what's in "if
(PageHuge(page) || PageTransCompound(page))" into a sub-function ?
See
https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#functions
> + struct page *page = pfn_to_page(pfn);
> +
> + /*
> + * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
> + * aligned, if there is any free pages in [start_pfn, boundary_pfn),
> + * its head page will always be in the range.
> + */
> + if (PageBuddy(page)) {
> + int order = buddy_order(page);
> +
> + if (pfn + (1UL << order) > boundary_pfn)
> + split_free_page(page, order, boundary_pfn - pfn);
> + pfn += (1UL << order);
> + continue;
> + }
> + /*
> + * migrate compound pages then let the free page handling code
> + * above do the rest
> + */
> + if (PageHuge(page) || PageTransCompound(page)) {
> + unsigned long nr_pages = compound_nr(page);
> + int order = compound_order(page);
> + struct page *head = compound_head(page);
> + unsigned long head_pfn = page_to_pfn(head);
> +
> + if (head_pfn + nr_pages >= boundary_pfn) {
> + int ret;
> + struct compact_control cc = {
> + .nr_migratepages = 0,
> + .order = -1,
> + .zone = page_zone(pfn_to_page(head_pfn)),
> + .mode = MIGRATE_SYNC,
> + .ignore_skip_hint = true,
> + .no_set_skip_hint = true,
> + .gfp_mask = current_gfp_context(gfp_flags),
> + .alloc_contig = true,
> + };
> +
> + INIT_LIST_HEAD(&cc.migratepages);
> +
> + ret = __alloc_contig_migrate_range(&cc, head_pfn,
> + head_pfn + nr_pages);
> +
> + if (ret) {
> + /* restore the original migratetype */
> + set_pageblock_migratetype(
> + pfn_to_page(isolated_pageblock_pfn),
> + saved_mt);
> + return -EBUSY;
> + }
> + /*
> + * reset pfn, let the free page handling code
> + * above split the free page to the right
> + * migratetype list.
> + *
> + * head_pfn is not used here as a hugetlb page
> + * order can be bigger than MAX_ORDER-1, but
> + * after it is freed, the free page order is not.
> + * Use pfn within the range to find the head of
> + * the free page and reset order to 0 if a hugetlb
> + * page with >MAX_ORDER-1 order is encountered.
> + */
> + if (order > MAX_ORDER-1)
> + order = 0;
> + while (!PageBuddy(pfn_to_page(pfn))) {
> + order++;
> + pfn &= ~0UL << order;
> + }
> + continue;
> + }
> + pfn += nr_pages;
> + continue;
> + }
> +
> + pfn++;
> + }
> + return 0;
> +}
> +
> +
> /**
> * alloc_contig_range() -- tries to allocate given range of pages
> * @start: start PFN to allocate
> @@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
> int alloc_contig_range(unsigned long start, unsigned long end,
> unsigned migratetype, gfp_t gfp_mask)
> {
> - unsigned long outer_start, outer_end;
> - unsigned int order;
> + unsigned long outer_end;
> + unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
> + unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
> int ret = 0;
>
> struct compact_control cc = {
> @@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> * What we do here is we mark all pageblocks in range as
> * MIGRATE_ISOLATE. Because pageblock and max order pages may
> * have different sizes, and due to the way page allocator
> - * work, we align the range to biggest of the two pages so
> - * that page allocator won't try to merge buddies from
> - * different pageblocks and change MIGRATE_ISOLATE to some
> - * other migration type.
> + * work, start_isolate_page_range() has special handlings for this.
> *
> * Once the pageblocks are marked as MIGRATE_ISOLATE, we
> * migrate the pages from an unaligned range (ie. pages that
> - * we are interested in). This will put all the pages in
> + * we are interested in). This will put all the pages in
> * range back to page allocator as MIGRATE_ISOLATE.
> *
> * When this is done, we take the pages in range from page
> @@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> * put back to page allocator so that buddy can use them.
> */
>
> - ret = start_isolate_page_range(start, end, migratetype, 0);
> + ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
> if (ret)
> - return ret;
> + goto done;
>
> drain_all_pages(cc.zone);
>
> @@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> goto done;
> ret = 0;
>
> - /*
> - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
> - * aligned blocks that are marked as MIGRATE_ISOLATE. What's
> - * more, all pages in [start, end) are free in page allocator.
> - * What we are going to do is to allocate all pages from
> - * [start, end) (that is remove them from page allocator).
> - *
> - * The only problem is that pages at the beginning and at the
> - * end of interesting range may be not aligned with pages that
> - * page allocator holds, ie. they can be part of higher order
> - * pages. Because of this, we reserve the bigger range and
> - * once this is done free the pages we are not interested in.
> - *
> - * We don't have to hold zone->lock here because the pages are
> - * isolated thus they won't get removed from buddy.
> - */
> -
> - order = 0;
> - outer_start = start;
> - while (!PageBuddy(pfn_to_page(outer_start))) {
> - if (++order >= MAX_ORDER) {
> - outer_start = start;
> - break;
> - }
> - outer_start &= ~0UL << order;
> - }
> -
> - if (outer_start != start) {
> - order = buddy_order(pfn_to_page(outer_start));
> -
> - /*
> - * outer_start page could be small order buddy page and
> - * it doesn't include start page. Adjust outer_start
> - * in this case to report failed page properly
> - * on tracepoint in test_pages_isolated()
> - */
> - if (outer_start + (1UL << order) <= start)
> - outer_start = start;
> - }
> -
> /* Make sure the range is really isolated. */
> - if (test_pages_isolated(outer_start, end, 0)) {
> + if (test_pages_isolated(alloc_start, alloc_end, 0)) {
> ret = -EBUSY;
> goto done;
> }
>
> /* Grab isolated pages from freelists. */
> - outer_end = isolate_freepages_range(&cc, outer_start, end);
> + outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
> if (!outer_end) {
> ret = -EBUSY;
> goto done;
> }
>
> /* Free head and tail (if any) */
> - if (start != outer_start)
> - free_contig_range(outer_start, start - outer_start);
> - if (end != outer_end)
> - free_contig_range(end, outer_end - end);
> + if (start != alloc_start)
> + free_contig_range(alloc_start, start - alloc_start);
> + if (end != alloc_end)
> + free_contig_range(end, alloc_end - end);
>
> done:
> - undo_isolate_page_range(pfn_max_align_down(start),
> - pfn_max_align_up(end), migratetype);
> + undo_isolate_page_range(alloc_start,
> + alloc_end, migratetype);
> return ret;
> }
> EXPORT_SYMBOL(alloc_contig_range);
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index 64d093ab83ec..0256d5e1032c 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * and PageOffline() pages.
> * REPORT_FAILURE - report details about the failure to
> * isolate the range
> + * @gfp_flags: GFP flags used for migrating pages that sit across the
> + * range boundaries.
> *
> * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
> * the range will never be allocated. Any free pages and pages freed in the
> @@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * pages in the range finally, the caller have to free all pages in the range.
> * test_page_isolated() can be used for test it.
> *
> + * The function first tries to isolate the pageblocks at the beginning and end
> + * of the range, since there might be pages across the range boundaries.
> + * Afterwards, it isolates the rest of the range.
> + *
> * There is no high level synchronization mechanism that prevents two threads
> * from trying to isolate overlapping ranges. If this happens, one thread
> * will notice pageblocks in the overlapping range already set to isolate.
> @@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
> * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
> */
> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> - unsigned migratetype, int flags)
> + unsigned migratetype, int flags, gfp_t gfp_flags)
> {
> unsigned long pfn;
> struct page *page;
> + /* isolation is done at page block granularity */
> + unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
> + unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
> + int ret;
>
> - unsigned long isolate_start = pfn_max_align_down(start_pfn);
> - unsigned long isolate_end = pfn_max_align_up(end_pfn);
> + /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
> + ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
> + if (ret)
> + return ret;
> +
> + /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
> + ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
> + if (ret) {
> + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
> + return ret;
> + }
>
> - for (pfn = isolate_start;
> - pfn < isolate_end;
> + /* skip isolated pageblocks at the beginning and end */
> + for (pfn = isolate_start + pageblock_nr_pages;
> + pfn < isolate_end - pageblock_nr_pages;
> pfn += pageblock_nr_pages) {
> page = __first_valid_page(pfn, pageblock_nr_pages);
> if (page && set_migratetype_isolate(page, migratetype, flags,
> start_pfn, end_pfn)) {
> undo_isolate_page_range(isolate_start, pfn, migratetype);
> + unset_migratetype_isolate(
> + pfn_to_page(isolate_end - pageblock_nr_pages),
> + migratetype);
> return -EBUSY;
> }
> }
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
2022-02-14 7:59 ` Christophe Leroy
(?)
@ 2022-02-14 16:03 ` Zi Yan
-1 siblings, 0 replies; 42+ messages in thread
From: Zi Yan via iommu @ 2022-02-14 16:03 UTC (permalink / raw)
To: Christophe Leroy
Cc: David Hildenbrand, Robin Murphy, linuxppc-dev, linux-kernel,
virtualization, linux-mm, iommu, Vlastimil Babka, Eric Ren,
Mel Gorman, Christoph Hellwig, Mike Rapoport, Oscar Salvador
[-- Attachment #1.1: Type: text/plain, Size: 18627 bytes --]
On 14 Feb 2022, at 2:59, Christophe Leroy wrote:
> Le 11/02/2022 à 17:41, Zi Yan a écrit :
>> From: Zi Yan <ziy@nvidia.com>
>>
>> alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
>> pageblocks with different migratetypes. It might unnecessarily convert
>> extra pageblocks at the beginning and at the end of the range. Change
>> alloc_contig_range() to work at pageblock granularity.
>>
>> Special handling is needed for free pages and in-use pages across the
>> boundaries of the range specified alloc_contig_range(). Because these
>> partially isolated pages causes free page accounting issues. The free
>> pages will be split and freed into separate migratetype lists; the
>> in-use pages will be migrated then the freed pages will be handled.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> include/linux/page-isolation.h | 2 +-
>> mm/internal.h | 3 +
>> mm/memory_hotplug.c | 3 +-
>> mm/page_alloc.c | 235 +++++++++++++++++++++++++--------
>> mm/page_isolation.c | 33 ++++-
>> 5 files changed, 211 insertions(+), 65 deletions(-)
>>
>> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
>> index 4ef7be6def83..78ff940cc169 100644
>> --- a/include/linux/page-isolation.h
>> +++ b/include/linux/page-isolation.h
>> @@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
>> */
>> int
>> start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> - unsigned migratetype, int flags);
>> + unsigned migratetype, int flags, gfp_t gfp_flags);
>>
>> /*
>> * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
>> diff --git a/mm/internal.h b/mm/internal.h
>> index 0d240e876831..509cbdc25992 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
>> int
>> isolate_migratepages_range(struct compact_control *cc,
>> unsigned long low_pfn, unsigned long end_pfn);
>> +
>> +int
>> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
>> #endif
>> int find_suitable_fallback(struct free_area *area, unsigned int order,
>> int migratetype, bool only_stealable, bool *can_steal);
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index ce68098832aa..82406d2f3e46 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
>> /* set above range as isolated */
>> ret = start_isolate_page_range(start_pfn, end_pfn,
>> MIGRATE_MOVABLE,
>> - MEMORY_OFFLINE | REPORT_FAILURE);
>> + MEMORY_OFFLINE | REPORT_FAILURE,
>> + GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
>> if (ret) {
>> reason = "failure to isolate range";
>> goto failed_removal_pcplists_disabled;
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 62ef78f3d771..7a4fa21aea5c 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
>> #endif
>>
>> /* [start, end) must belong to a single zone. */
>> -static int __alloc_contig_migrate_range(struct compact_control *cc,
>> +int __alloc_contig_migrate_range(struct compact_control *cc,
>> unsigned long start, unsigned long end)
>> {
>> /* This function is based on compact_zone() from compaction.c. */
>> @@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
>> return 0;
>> }
>>
>> +/**
>> + * split_free_page() -- split a free page at split_pfn_offset
>> + * @free_page: the original free page
>> + * @order: the order of the page
>> + * @split_pfn_offset: split offset within the page
>> + *
>> + * It is used when the free page crosses two pageblocks with different migratetypes
>> + * at split_pfn_offset within the page. The split free page will be put into
>> + * separate migratetype lists afterwards. Otherwise, the function achieves
>> + * nothing.
>> + */
>> +static inline void split_free_page(struct page *free_page,
>> + int order, unsigned long split_pfn_offset)
>> +{
>> + struct zone *zone = page_zone(free_page);
>> + unsigned long free_page_pfn = page_to_pfn(free_page);
>> + unsigned long pfn;
>> + unsigned long flags;
>> + int free_page_order;
>> +
>> + spin_lock_irqsave(&zone->lock, flags);
>> + del_page_from_free_list(free_page, zone, order);
>> + for (pfn = free_page_pfn;
>> + pfn < free_page_pfn + (1UL << order);) {
>> + int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
>> +
>> + free_page_order = order_base_2(split_pfn_offset);
>> + __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
>> + mt, FPI_NONE);
>> + pfn += 1UL << free_page_order;
>> + split_pfn_offset -= (1UL << free_page_order);
>> + /* we have done the first part, now switch to second part */
>> + if (split_pfn_offset == 0)
>> + split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
>> + }
>> + spin_unlock_irqrestore(&zone->lock, flags);
>> +}
>> +
>> +/**
>> + * isolate_single_pageblock() -- tries to isolate a pageblock that might be
>> + * within a free or in-use page.
>> + * @boundary_pfn: pageblock-aligned pfn that a page might cross
>> + * @gfp_flags: GFP flags used for migrating pages
>> + * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
>> + * the boundary_pfn
>> + *
>> + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
>> + * pageblock. When not all pageblocks within a page are isolated at the same
>> + * time, free page accounting can go wrong. For example, in the case of
>> + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
>> + * [ MAX_ORDER-1 ]
>> + * [ pageblock0 | pageblock1 ]
>> + * When either pageblock is isolated, if it is a free page, the page is not
>> + * split into separate migratetype lists, which is supposed to; if it is an
>> + * in-use page and freed later, __free_one_page() does not split the free page
>> + * either. The function handles this by splitting the free page or migrating
>> + * the in-use page then splitting the free page.
>> + */
>> +int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
>> + int isolate_before_boundary)
>
> Do you need such big param name ?
I am happy to take any suggestion.
>
> See
> https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#naming
>
> isolate_before_boundary could probably be shorter.
isolate_before instead?
>
> And should be a bool by the way.
Sure.
>
>> +{
>> + unsigned char saved_mt;
>> + /*
>> + * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
>> + * avoid isolate pageblocks belonging to a bigger free or in-use page
>> + */
>> + unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
>> + unsigned long isolated_pageblock_pfn;
>
> Variable name too long.
>
>> + unsigned long pfn;
>> +
>> + VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
>> +
>> + if (isolate_before_boundary)
>> + isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
>> + else
>> + isolated_pageblock_pfn = boundary_pfn;
>> +
>> + saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
>> + set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);
>> +
>> + for (pfn = start_pfn; pfn < boundary_pfn;) {
>
> This loop is a bit long a deep. Isn't there a way to put what's in "if
> (PageHuge(page) || PageTransCompound(page))" into a sub-function ?
>
Let me give it a try.
> See
> https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#functions
>
Thanks for the review.
>> + struct page *page = pfn_to_page(pfn);
>> +
>> + /*
>> + * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
>> + * aligned, if there is any free pages in [start_pfn, boundary_pfn),
>> + * its head page will always be in the range.
>> + */
>> + if (PageBuddy(page)) {
>> + int order = buddy_order(page);
>> +
>> + if (pfn + (1UL << order) > boundary_pfn)
>> + split_free_page(page, order, boundary_pfn - pfn);
>> + pfn += (1UL << order);
>> + continue;
>> + }
>> + /*
>> + * migrate compound pages then let the free page handling code
>> + * above do the rest
>> + */
>> + if (PageHuge(page) || PageTransCompound(page)) {
>> + unsigned long nr_pages = compound_nr(page);
>> + int order = compound_order(page);
>> + struct page *head = compound_head(page);
>> + unsigned long head_pfn = page_to_pfn(head);
>> +
>> + if (head_pfn + nr_pages >= boundary_pfn) {
>> + int ret;
>> + struct compact_control cc = {
>> + .nr_migratepages = 0,
>> + .order = -1,
>> + .zone = page_zone(pfn_to_page(head_pfn)),
>> + .mode = MIGRATE_SYNC,
>> + .ignore_skip_hint = true,
>> + .no_set_skip_hint = true,
>> + .gfp_mask = current_gfp_context(gfp_flags),
>> + .alloc_contig = true,
>> + };
>> +
>> + INIT_LIST_HEAD(&cc.migratepages);
>> +
>> + ret = __alloc_contig_migrate_range(&cc, head_pfn,
>> + head_pfn + nr_pages);
>> +
>> + if (ret) {
>> + /* restore the original migratetype */
>> + set_pageblock_migratetype(
>> + pfn_to_page(isolated_pageblock_pfn),
>> + saved_mt);
>> + return -EBUSY;
>> + }
>> + /*
>> + * reset pfn, let the free page handling code
>> + * above split the free page to the right
>> + * migratetype list.
>> + *
>> + * head_pfn is not used here as a hugetlb page
>> + * order can be bigger than MAX_ORDER-1, but
>> + * after it is freed, the free page order is not.
>> + * Use pfn within the range to find the head of
>> + * the free page and reset order to 0 if a hugetlb
>> + * page with >MAX_ORDER-1 order is encountered.
>> + */
>> + if (order > MAX_ORDER-1)
>> + order = 0;
>> + while (!PageBuddy(pfn_to_page(pfn))) {
>> + order++;
>> + pfn &= ~0UL << order;
>> + }
>> + continue;
>> + }
>> + pfn += nr_pages;
>> + continue;
>> + }
>> +
>> + pfn++;
>> + }
>> + return 0;
>> +}
>> +
>> +
>> /**
>> * alloc_contig_range() -- tries to allocate given range of pages
>> * @start: start PFN to allocate
>> @@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
>> int alloc_contig_range(unsigned long start, unsigned long end,
>> unsigned migratetype, gfp_t gfp_mask)
>> {
>> - unsigned long outer_start, outer_end;
>> - unsigned int order;
>> + unsigned long outer_end;
>> + unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
>> + unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
>> int ret = 0;
>>
>> struct compact_control cc = {
>> @@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> * What we do here is we mark all pageblocks in range as
>> * MIGRATE_ISOLATE. Because pageblock and max order pages may
>> * have different sizes, and due to the way page allocator
>> - * work, we align the range to biggest of the two pages so
>> - * that page allocator won't try to merge buddies from
>> - * different pageblocks and change MIGRATE_ISOLATE to some
>> - * other migration type.
>> + * work, start_isolate_page_range() has special handlings for this.
>> *
>> * Once the pageblocks are marked as MIGRATE_ISOLATE, we
>> * migrate the pages from an unaligned range (ie. pages that
>> - * we are interested in). This will put all the pages in
>> + * we are interested in). This will put all the pages in
>> * range back to page allocator as MIGRATE_ISOLATE.
>> *
>> * When this is done, we take the pages in range from page
>> @@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> * put back to page allocator so that buddy can use them.
>> */
>>
>> - ret = start_isolate_page_range(start, end, migratetype, 0);
>> + ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
>> if (ret)
>> - return ret;
>> + goto done;
>>
>> drain_all_pages(cc.zone);
>>
>> @@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> goto done;
>> ret = 0;
>>
>> - /*
>> - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
>> - * aligned blocks that are marked as MIGRATE_ISOLATE. What's
>> - * more, all pages in [start, end) are free in page allocator.
>> - * What we are going to do is to allocate all pages from
>> - * [start, end) (that is remove them from page allocator).
>> - *
>> - * The only problem is that pages at the beginning and at the
>> - * end of interesting range may be not aligned with pages that
>> - * page allocator holds, ie. they can be part of higher order
>> - * pages. Because of this, we reserve the bigger range and
>> - * once this is done free the pages we are not interested in.
>> - *
>> - * We don't have to hold zone->lock here because the pages are
>> - * isolated thus they won't get removed from buddy.
>> - */
>> -
>> - order = 0;
>> - outer_start = start;
>> - while (!PageBuddy(pfn_to_page(outer_start))) {
>> - if (++order >= MAX_ORDER) {
>> - outer_start = start;
>> - break;
>> - }
>> - outer_start &= ~0UL << order;
>> - }
>> -
>> - if (outer_start != start) {
>> - order = buddy_order(pfn_to_page(outer_start));
>> -
>> - /*
>> - * outer_start page could be small order buddy page and
>> - * it doesn't include start page. Adjust outer_start
>> - * in this case to report failed page properly
>> - * on tracepoint in test_pages_isolated()
>> - */
>> - if (outer_start + (1UL << order) <= start)
>> - outer_start = start;
>> - }
>> -
>> /* Make sure the range is really isolated. */
>> - if (test_pages_isolated(outer_start, end, 0)) {
>> + if (test_pages_isolated(alloc_start, alloc_end, 0)) {
>> ret = -EBUSY;
>> goto done;
>> }
>>
>> /* Grab isolated pages from freelists. */
>> - outer_end = isolate_freepages_range(&cc, outer_start, end);
>> + outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
>> if (!outer_end) {
>> ret = -EBUSY;
>> goto done;
>> }
>>
>> /* Free head and tail (if any) */
>> - if (start != outer_start)
>> - free_contig_range(outer_start, start - outer_start);
>> - if (end != outer_end)
>> - free_contig_range(end, outer_end - end);
>> + if (start != alloc_start)
>> + free_contig_range(alloc_start, start - alloc_start);
>> + if (end != alloc_end)
>> + free_contig_range(end, alloc_end - end);
>>
>> done:
>> - undo_isolate_page_range(pfn_max_align_down(start),
>> - pfn_max_align_up(end), migratetype);
>> + undo_isolate_page_range(alloc_start,
>> + alloc_end, migratetype);
>> return ret;
>> }
>> EXPORT_SYMBOL(alloc_contig_range);
>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>> index 64d093ab83ec..0256d5e1032c 100644
>> --- a/mm/page_isolation.c
>> +++ b/mm/page_isolation.c
>> @@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * and PageOffline() pages.
>> * REPORT_FAILURE - report details about the failure to
>> * isolate the range
>> + * @gfp_flags: GFP flags used for migrating pages that sit across the
>> + * range boundaries.
>> *
>> * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
>> * the range will never be allocated. Any free pages and pages freed in the
>> @@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * pages in the range finally, the caller have to free all pages in the range.
>> * test_page_isolated() can be used for test it.
>> *
>> + * The function first tries to isolate the pageblocks at the beginning and end
>> + * of the range, since there might be pages across the range boundaries.
>> + * Afterwards, it isolates the rest of the range.
>> + *
>> * There is no high level synchronization mechanism that prevents two threads
>> * from trying to isolate overlapping ranges. If this happens, one thread
>> * will notice pageblocks in the overlapping range already set to isolate.
>> @@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
>> */
>> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> - unsigned migratetype, int flags)
>> + unsigned migratetype, int flags, gfp_t gfp_flags)
>> {
>> unsigned long pfn;
>> struct page *page;
>> + /* isolation is done at page block granularity */
>> + unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
>> + unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
>> + int ret;
>>
>> - unsigned long isolate_start = pfn_max_align_down(start_pfn);
>> - unsigned long isolate_end = pfn_max_align_up(end_pfn);
>> + /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
>> + ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
>> + if (ret)
>> + return ret;
>> +
>> + /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
>> + ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
>> + if (ret) {
>> + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>> + return ret;
>> + }
>>
>> - for (pfn = isolate_start;
>> - pfn < isolate_end;
>> + /* skip isolated pageblocks at the beginning and end */
>> + for (pfn = isolate_start + pageblock_nr_pages;
>> + pfn < isolate_end - pageblock_nr_pages;
>> pfn += pageblock_nr_pages) {
>> page = __first_valid_page(pfn, pageblock_nr_pages);
>> if (page && set_migratetype_isolate(page, migratetype, flags,
>> start_pfn, end_pfn)) {
>> undo_isolate_page_range(isolate_start, pfn, migratetype);
>> + unset_migratetype_isolate(
>> + pfn_to_page(isolate_end - pageblock_nr_pages),
>> + migratetype);
>> return -EBUSY;
>> }
>> }
--
Best Regards,
Yan, Zi
[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]
[-- Attachment #2: Type: text/plain, Size: 156 bytes --]
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 16:03 ` Zi Yan
0 siblings, 0 replies; 42+ messages in thread
From: Zi Yan @ 2022-02-14 16:03 UTC (permalink / raw)
To: Christophe Leroy
Cc: David Hildenbrand, linux-mm, Mel Gorman, Oscar Salvador,
Robin Murphy, linux-kernel, iommu, Mike Rapoport, Eric Ren,
virtualization, linuxppc-dev, Christoph Hellwig, Vlastimil Babka,
Marek Szyprowski
[-- Attachment #1: Type: text/plain, Size: 18627 bytes --]
On 14 Feb 2022, at 2:59, Christophe Leroy wrote:
> Le 11/02/2022 à 17:41, Zi Yan a écrit :
>> From: Zi Yan <ziy@nvidia.com>
>>
>> alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
>> pageblocks with different migratetypes. It might unnecessarily convert
>> extra pageblocks at the beginning and at the end of the range. Change
>> alloc_contig_range() to work at pageblock granularity.
>>
>> Special handling is needed for free pages and in-use pages across the
>> boundaries of the range specified alloc_contig_range(). Because these
>> partially isolated pages causes free page accounting issues. The free
>> pages will be split and freed into separate migratetype lists; the
>> in-use pages will be migrated then the freed pages will be handled.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> include/linux/page-isolation.h | 2 +-
>> mm/internal.h | 3 +
>> mm/memory_hotplug.c | 3 +-
>> mm/page_alloc.c | 235 +++++++++++++++++++++++++--------
>> mm/page_isolation.c | 33 ++++-
>> 5 files changed, 211 insertions(+), 65 deletions(-)
>>
>> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
>> index 4ef7be6def83..78ff940cc169 100644
>> --- a/include/linux/page-isolation.h
>> +++ b/include/linux/page-isolation.h
>> @@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
>> */
>> int
>> start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> - unsigned migratetype, int flags);
>> + unsigned migratetype, int flags, gfp_t gfp_flags);
>>
>> /*
>> * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
>> diff --git a/mm/internal.h b/mm/internal.h
>> index 0d240e876831..509cbdc25992 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
>> int
>> isolate_migratepages_range(struct compact_control *cc,
>> unsigned long low_pfn, unsigned long end_pfn);
>> +
>> +int
>> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
>> #endif
>> int find_suitable_fallback(struct free_area *area, unsigned int order,
>> int migratetype, bool only_stealable, bool *can_steal);
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index ce68098832aa..82406d2f3e46 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
>> /* set above range as isolated */
>> ret = start_isolate_page_range(start_pfn, end_pfn,
>> MIGRATE_MOVABLE,
>> - MEMORY_OFFLINE | REPORT_FAILURE);
>> + MEMORY_OFFLINE | REPORT_FAILURE,
>> + GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
>> if (ret) {
>> reason = "failure to isolate range";
>> goto failed_removal_pcplists_disabled;
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 62ef78f3d771..7a4fa21aea5c 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
>> #endif
>>
>> /* [start, end) must belong to a single zone. */
>> -static int __alloc_contig_migrate_range(struct compact_control *cc,
>> +int __alloc_contig_migrate_range(struct compact_control *cc,
>> unsigned long start, unsigned long end)
>> {
>> /* This function is based on compact_zone() from compaction.c. */
>> @@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
>> return 0;
>> }
>>
>> +/**
>> + * split_free_page() -- split a free page at split_pfn_offset
>> + * @free_page: the original free page
>> + * @order: the order of the page
>> + * @split_pfn_offset: split offset within the page
>> + *
>> + * It is used when the free page crosses two pageblocks with different migratetypes
>> + * at split_pfn_offset within the page. The split free page will be put into
>> + * separate migratetype lists afterwards. Otherwise, the function achieves
>> + * nothing.
>> + */
>> +static inline void split_free_page(struct page *free_page,
>> + int order, unsigned long split_pfn_offset)
>> +{
>> + struct zone *zone = page_zone(free_page);
>> + unsigned long free_page_pfn = page_to_pfn(free_page);
>> + unsigned long pfn;
>> + unsigned long flags;
>> + int free_page_order;
>> +
>> + spin_lock_irqsave(&zone->lock, flags);
>> + del_page_from_free_list(free_page, zone, order);
>> + for (pfn = free_page_pfn;
>> + pfn < free_page_pfn + (1UL << order);) {
>> + int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
>> +
>> + free_page_order = order_base_2(split_pfn_offset);
>> + __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
>> + mt, FPI_NONE);
>> + pfn += 1UL << free_page_order;
>> + split_pfn_offset -= (1UL << free_page_order);
>> + /* we have done the first part, now switch to second part */
>> + if (split_pfn_offset == 0)
>> + split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
>> + }
>> + spin_unlock_irqrestore(&zone->lock, flags);
>> +}
>> +
>> +/**
>> + * isolate_single_pageblock() -- tries to isolate a pageblock that might be
>> + * within a free or in-use page.
>> + * @boundary_pfn: pageblock-aligned pfn that a page might cross
>> + * @gfp_flags: GFP flags used for migrating pages
>> + * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
>> + * the boundary_pfn
>> + *
>> + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
>> + * pageblock. When not all pageblocks within a page are isolated at the same
>> + * time, free page accounting can go wrong. For example, in the case of
>> + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
>> + * [ MAX_ORDER-1 ]
>> + * [ pageblock0 | pageblock1 ]
>> + * When either pageblock is isolated, if it is a free page, the page is not
>> + * split into separate migratetype lists, which is supposed to; if it is an
>> + * in-use page and freed later, __free_one_page() does not split the free page
>> + * either. The function handles this by splitting the free page or migrating
>> + * the in-use page then splitting the free page.
>> + */
>> +int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
>> + int isolate_before_boundary)
>
> Do you need such big param name ?
I am happy to take any suggestion.
>
> See
> https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#naming
>
> isolate_before_boundary could probably be shorter.
isolate_before instead?
>
> And should be a bool by the way.
Sure.
>
>> +{
>> + unsigned char saved_mt;
>> + /*
>> + * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
>> + * avoid isolate pageblocks belonging to a bigger free or in-use page
>> + */
>> + unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
>> + unsigned long isolated_pageblock_pfn;
>
> Variable name too long.
>
>> + unsigned long pfn;
>> +
>> + VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
>> +
>> + if (isolate_before_boundary)
>> + isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
>> + else
>> + isolated_pageblock_pfn = boundary_pfn;
>> +
>> + saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
>> + set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);
>> +
>> + for (pfn = start_pfn; pfn < boundary_pfn;) {
>
> This loop is a bit long a deep. Isn't there a way to put what's in "if
> (PageHuge(page) || PageTransCompound(page))" into a sub-function ?
>
Let me give it a try.
> See
> https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#functions
>
Thanks for the review.
>> + struct page *page = pfn_to_page(pfn);
>> +
>> + /*
>> + * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
>> + * aligned, if there is any free pages in [start_pfn, boundary_pfn),
>> + * its head page will always be in the range.
>> + */
>> + if (PageBuddy(page)) {
>> + int order = buddy_order(page);
>> +
>> + if (pfn + (1UL << order) > boundary_pfn)
>> + split_free_page(page, order, boundary_pfn - pfn);
>> + pfn += (1UL << order);
>> + continue;
>> + }
>> + /*
>> + * migrate compound pages then let the free page handling code
>> + * above do the rest
>> + */
>> + if (PageHuge(page) || PageTransCompound(page)) {
>> + unsigned long nr_pages = compound_nr(page);
>> + int order = compound_order(page);
>> + struct page *head = compound_head(page);
>> + unsigned long head_pfn = page_to_pfn(head);
>> +
>> + if (head_pfn + nr_pages >= boundary_pfn) {
>> + int ret;
>> + struct compact_control cc = {
>> + .nr_migratepages = 0,
>> + .order = -1,
>> + .zone = page_zone(pfn_to_page(head_pfn)),
>> + .mode = MIGRATE_SYNC,
>> + .ignore_skip_hint = true,
>> + .no_set_skip_hint = true,
>> + .gfp_mask = current_gfp_context(gfp_flags),
>> + .alloc_contig = true,
>> + };
>> +
>> + INIT_LIST_HEAD(&cc.migratepages);
>> +
>> + ret = __alloc_contig_migrate_range(&cc, head_pfn,
>> + head_pfn + nr_pages);
>> +
>> + if (ret) {
>> + /* restore the original migratetype */
>> + set_pageblock_migratetype(
>> + pfn_to_page(isolated_pageblock_pfn),
>> + saved_mt);
>> + return -EBUSY;
>> + }
>> + /*
>> + * reset pfn, let the free page handling code
>> + * above split the free page to the right
>> + * migratetype list.
>> + *
>> + * head_pfn is not used here as a hugetlb page
>> + * order can be bigger than MAX_ORDER-1, but
>> + * after it is freed, the free page order is not.
>> + * Use pfn within the range to find the head of
>> + * the free page and reset order to 0 if a hugetlb
>> + * page with >MAX_ORDER-1 order is encountered.
>> + */
>> + if (order > MAX_ORDER-1)
>> + order = 0;
>> + while (!PageBuddy(pfn_to_page(pfn))) {
>> + order++;
>> + pfn &= ~0UL << order;
>> + }
>> + continue;
>> + }
>> + pfn += nr_pages;
>> + continue;
>> + }
>> +
>> + pfn++;
>> + }
>> + return 0;
>> +}
>> +
>> +
>> /**
>> * alloc_contig_range() -- tries to allocate given range of pages
>> * @start: start PFN to allocate
>> @@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
>> int alloc_contig_range(unsigned long start, unsigned long end,
>> unsigned migratetype, gfp_t gfp_mask)
>> {
>> - unsigned long outer_start, outer_end;
>> - unsigned int order;
>> + unsigned long outer_end;
>> + unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
>> + unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
>> int ret = 0;
>>
>> struct compact_control cc = {
>> @@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> * What we do here is we mark all pageblocks in range as
>> * MIGRATE_ISOLATE. Because pageblock and max order pages may
>> * have different sizes, and due to the way page allocator
>> - * work, we align the range to biggest of the two pages so
>> - * that page allocator won't try to merge buddies from
>> - * different pageblocks and change MIGRATE_ISOLATE to some
>> - * other migration type.
>> + * work, start_isolate_page_range() has special handlings for this.
>> *
>> * Once the pageblocks are marked as MIGRATE_ISOLATE, we
>> * migrate the pages from an unaligned range (ie. pages that
>> - * we are interested in). This will put all the pages in
>> + * we are interested in). This will put all the pages in
>> * range back to page allocator as MIGRATE_ISOLATE.
>> *
>> * When this is done, we take the pages in range from page
>> @@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> * put back to page allocator so that buddy can use them.
>> */
>>
>> - ret = start_isolate_page_range(start, end, migratetype, 0);
>> + ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
>> if (ret)
>> - return ret;
>> + goto done;
>>
>> drain_all_pages(cc.zone);
>>
>> @@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> goto done;
>> ret = 0;
>>
>> - /*
>> - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
>> - * aligned blocks that are marked as MIGRATE_ISOLATE. What's
>> - * more, all pages in [start, end) are free in page allocator.
>> - * What we are going to do is to allocate all pages from
>> - * [start, end) (that is remove them from page allocator).
>> - *
>> - * The only problem is that pages at the beginning and at the
>> - * end of interesting range may be not aligned with pages that
>> - * page allocator holds, ie. they can be part of higher order
>> - * pages. Because of this, we reserve the bigger range and
>> - * once this is done free the pages we are not interested in.
>> - *
>> - * We don't have to hold zone->lock here because the pages are
>> - * isolated thus they won't get removed from buddy.
>> - */
>> -
>> - order = 0;
>> - outer_start = start;
>> - while (!PageBuddy(pfn_to_page(outer_start))) {
>> - if (++order >= MAX_ORDER) {
>> - outer_start = start;
>> - break;
>> - }
>> - outer_start &= ~0UL << order;
>> - }
>> -
>> - if (outer_start != start) {
>> - order = buddy_order(pfn_to_page(outer_start));
>> -
>> - /*
>> - * outer_start page could be small order buddy page and
>> - * it doesn't include start page. Adjust outer_start
>> - * in this case to report failed page properly
>> - * on tracepoint in test_pages_isolated()
>> - */
>> - if (outer_start + (1UL << order) <= start)
>> - outer_start = start;
>> - }
>> -
>> /* Make sure the range is really isolated. */
>> - if (test_pages_isolated(outer_start, end, 0)) {
>> + if (test_pages_isolated(alloc_start, alloc_end, 0)) {
>> ret = -EBUSY;
>> goto done;
>> }
>>
>> /* Grab isolated pages from freelists. */
>> - outer_end = isolate_freepages_range(&cc, outer_start, end);
>> + outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
>> if (!outer_end) {
>> ret = -EBUSY;
>> goto done;
>> }
>>
>> /* Free head and tail (if any) */
>> - if (start != outer_start)
>> - free_contig_range(outer_start, start - outer_start);
>> - if (end != outer_end)
>> - free_contig_range(end, outer_end - end);
>> + if (start != alloc_start)
>> + free_contig_range(alloc_start, start - alloc_start);
>> + if (end != alloc_end)
>> + free_contig_range(end, alloc_end - end);
>>
>> done:
>> - undo_isolate_page_range(pfn_max_align_down(start),
>> - pfn_max_align_up(end), migratetype);
>> + undo_isolate_page_range(alloc_start,
>> + alloc_end, migratetype);
>> return ret;
>> }
>> EXPORT_SYMBOL(alloc_contig_range);
>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>> index 64d093ab83ec..0256d5e1032c 100644
>> --- a/mm/page_isolation.c
>> +++ b/mm/page_isolation.c
>> @@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * and PageOffline() pages.
>> * REPORT_FAILURE - report details about the failure to
>> * isolate the range
>> + * @gfp_flags: GFP flags used for migrating pages that sit across the
>> + * range boundaries.
>> *
>> * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
>> * the range will never be allocated. Any free pages and pages freed in the
>> @@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * pages in the range finally, the caller have to free all pages in the range.
>> * test_page_isolated() can be used for test it.
>> *
>> + * The function first tries to isolate the pageblocks at the beginning and end
>> + * of the range, since there might be pages across the range boundaries.
>> + * Afterwards, it isolates the rest of the range.
>> + *
>> * There is no high level synchronization mechanism that prevents two threads
>> * from trying to isolate overlapping ranges. If this happens, one thread
>> * will notice pageblocks in the overlapping range already set to isolate.
>> @@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
>> */
>> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> - unsigned migratetype, int flags)
>> + unsigned migratetype, int flags, gfp_t gfp_flags)
>> {
>> unsigned long pfn;
>> struct page *page;
>> + /* isolation is done at page block granularity */
>> + unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
>> + unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
>> + int ret;
>>
>> - unsigned long isolate_start = pfn_max_align_down(start_pfn);
>> - unsigned long isolate_end = pfn_max_align_up(end_pfn);
>> + /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
>> + ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
>> + if (ret)
>> + return ret;
>> +
>> + /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
>> + ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
>> + if (ret) {
>> + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>> + return ret;
>> + }
>>
>> - for (pfn = isolate_start;
>> - pfn < isolate_end;
>> + /* skip isolated pageblocks at the beginning and end */
>> + for (pfn = isolate_start + pageblock_nr_pages;
>> + pfn < isolate_end - pageblock_nr_pages;
>> pfn += pageblock_nr_pages) {
>> page = __first_valid_page(pfn, pageblock_nr_pages);
>> if (page && set_migratetype_isolate(page, migratetype, flags,
>> start_pfn, end_pfn)) {
>> undo_isolate_page_range(isolate_start, pfn, migratetype);
>> + unset_migratetype_isolate(
>> + pfn_to_page(isolate_end - pageblock_nr_pages),
>> + migratetype);
>> return -EBUSY;
>> }
>> }
--
Best Regards,
Yan, Zi
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity
@ 2022-02-14 16:03 ` Zi Yan
0 siblings, 0 replies; 42+ messages in thread
From: Zi Yan @ 2022-02-14 16:03 UTC (permalink / raw)
To: Christophe Leroy
Cc: David Hildenbrand, Robin Murphy, linuxppc-dev, linux-kernel,
virtualization, linux-mm, iommu, Vlastimil Babka, Eric Ren,
Marek Szyprowski, Mel Gorman, Christoph Hellwig, Mike Rapoport,
Oscar Salvador
[-- Attachment #1: Type: text/plain, Size: 18627 bytes --]
On 14 Feb 2022, at 2:59, Christophe Leroy wrote:
> Le 11/02/2022 à 17:41, Zi Yan a écrit :
>> From: Zi Yan <ziy@nvidia.com>
>>
>> alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
>> pageblocks with different migratetypes. It might unnecessarily convert
>> extra pageblocks at the beginning and at the end of the range. Change
>> alloc_contig_range() to work at pageblock granularity.
>>
>> Special handling is needed for free pages and in-use pages across the
>> boundaries of the range specified alloc_contig_range(). Because these
>> partially isolated pages causes free page accounting issues. The free
>> pages will be split and freed into separate migratetype lists; the
>> in-use pages will be migrated then the freed pages will be handled.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> include/linux/page-isolation.h | 2 +-
>> mm/internal.h | 3 +
>> mm/memory_hotplug.c | 3 +-
>> mm/page_alloc.c | 235 +++++++++++++++++++++++++--------
>> mm/page_isolation.c | 33 ++++-
>> 5 files changed, 211 insertions(+), 65 deletions(-)
>>
>> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
>> index 4ef7be6def83..78ff940cc169 100644
>> --- a/include/linux/page-isolation.h
>> +++ b/include/linux/page-isolation.h
>> @@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
>> */
>> int
>> start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> - unsigned migratetype, int flags);
>> + unsigned migratetype, int flags, gfp_t gfp_flags);
>>
>> /*
>> * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
>> diff --git a/mm/internal.h b/mm/internal.h
>> index 0d240e876831..509cbdc25992 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
>> int
>> isolate_migratepages_range(struct compact_control *cc,
>> unsigned long low_pfn, unsigned long end_pfn);
>> +
>> +int
>> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
>> #endif
>> int find_suitable_fallback(struct free_area *area, unsigned int order,
>> int migratetype, bool only_stealable, bool *can_steal);
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index ce68098832aa..82406d2f3e46 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
>> /* set above range as isolated */
>> ret = start_isolate_page_range(start_pfn, end_pfn,
>> MIGRATE_MOVABLE,
>> - MEMORY_OFFLINE | REPORT_FAILURE);
>> + MEMORY_OFFLINE | REPORT_FAILURE,
>> + GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
>> if (ret) {
>> reason = "failure to isolate range";
>> goto failed_removal_pcplists_disabled;
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 62ef78f3d771..7a4fa21aea5c 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
>> #endif
>>
>> /* [start, end) must belong to a single zone. */
>> -static int __alloc_contig_migrate_range(struct compact_control *cc,
>> +int __alloc_contig_migrate_range(struct compact_control *cc,
>> unsigned long start, unsigned long end)
>> {
>> /* This function is based on compact_zone() from compaction.c. */
>> @@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
>> return 0;
>> }
>>
>> +/**
>> + * split_free_page() -- split a free page at split_pfn_offset
>> + * @free_page: the original free page
>> + * @order: the order of the page
>> + * @split_pfn_offset: split offset within the page
>> + *
>> + * It is used when the free page crosses two pageblocks with different migratetypes
>> + * at split_pfn_offset within the page. The split free page will be put into
>> + * separate migratetype lists afterwards. Otherwise, the function achieves
>> + * nothing.
>> + */
>> +static inline void split_free_page(struct page *free_page,
>> + int order, unsigned long split_pfn_offset)
>> +{
>> + struct zone *zone = page_zone(free_page);
>> + unsigned long free_page_pfn = page_to_pfn(free_page);
>> + unsigned long pfn;
>> + unsigned long flags;
>> + int free_page_order;
>> +
>> + spin_lock_irqsave(&zone->lock, flags);
>> + del_page_from_free_list(free_page, zone, order);
>> + for (pfn = free_page_pfn;
>> + pfn < free_page_pfn + (1UL << order);) {
>> + int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
>> +
>> + free_page_order = order_base_2(split_pfn_offset);
>> + __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
>> + mt, FPI_NONE);
>> + pfn += 1UL << free_page_order;
>> + split_pfn_offset -= (1UL << free_page_order);
>> + /* we have done the first part, now switch to second part */
>> + if (split_pfn_offset == 0)
>> + split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
>> + }
>> + spin_unlock_irqrestore(&zone->lock, flags);
>> +}
>> +
>> +/**
>> + * isolate_single_pageblock() -- tries to isolate a pageblock that might be
>> + * within a free or in-use page.
>> + * @boundary_pfn: pageblock-aligned pfn that a page might cross
>> + * @gfp_flags: GFP flags used for migrating pages
>> + * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
>> + * the boundary_pfn
>> + *
>> + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
>> + * pageblock. When not all pageblocks within a page are isolated at the same
>> + * time, free page accounting can go wrong. For example, in the case of
>> + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
>> + * [ MAX_ORDER-1 ]
>> + * [ pageblock0 | pageblock1 ]
>> + * When either pageblock is isolated, if it is a free page, the page is not
>> + * split into separate migratetype lists, which is supposed to; if it is an
>> + * in-use page and freed later, __free_one_page() does not split the free page
>> + * either. The function handles this by splitting the free page or migrating
>> + * the in-use page then splitting the free page.
>> + */
>> +int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
>> + int isolate_before_boundary)
>
> Do you need such big param name ?
I am happy to take any suggestion.
>
> See
> https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#naming
>
> isolate_before_boundary could probably be shorter.
isolate_before instead?
>
> And should be a bool by the way.
Sure.
>
>> +{
>> + unsigned char saved_mt;
>> + /*
>> + * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
>> + * avoid isolate pageblocks belonging to a bigger free or in-use page
>> + */
>> + unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
>> + unsigned long isolated_pageblock_pfn;
>
> Variable name too long.
>
>> + unsigned long pfn;
>> +
>> + VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
>> +
>> + if (isolate_before_boundary)
>> + isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
>> + else
>> + isolated_pageblock_pfn = boundary_pfn;
>> +
>> + saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
>> + set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);
>> +
>> + for (pfn = start_pfn; pfn < boundary_pfn;) {
>
> This loop is a bit long a deep. Isn't there a way to put what's in "if
> (PageHuge(page) || PageTransCompound(page))" into a sub-function ?
>
Let me give it a try.
> See
> https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#functions
>
Thanks for the review.
>> + struct page *page = pfn_to_page(pfn);
>> +
>> + /*
>> + * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
>> + * aligned, if there is any free pages in [start_pfn, boundary_pfn),
>> + * its head page will always be in the range.
>> + */
>> + if (PageBuddy(page)) {
>> + int order = buddy_order(page);
>> +
>> + if (pfn + (1UL << order) > boundary_pfn)
>> + split_free_page(page, order, boundary_pfn - pfn);
>> + pfn += (1UL << order);
>> + continue;
>> + }
>> + /*
>> + * migrate compound pages then let the free page handling code
>> + * above do the rest
>> + */
>> + if (PageHuge(page) || PageTransCompound(page)) {
>> + unsigned long nr_pages = compound_nr(page);
>> + int order = compound_order(page);
>> + struct page *head = compound_head(page);
>> + unsigned long head_pfn = page_to_pfn(head);
>> +
>> + if (head_pfn + nr_pages >= boundary_pfn) {
>> + int ret;
>> + struct compact_control cc = {
>> + .nr_migratepages = 0,
>> + .order = -1,
>> + .zone = page_zone(pfn_to_page(head_pfn)),
>> + .mode = MIGRATE_SYNC,
>> + .ignore_skip_hint = true,
>> + .no_set_skip_hint = true,
>> + .gfp_mask = current_gfp_context(gfp_flags),
>> + .alloc_contig = true,
>> + };
>> +
>> + INIT_LIST_HEAD(&cc.migratepages);
>> +
>> + ret = __alloc_contig_migrate_range(&cc, head_pfn,
>> + head_pfn + nr_pages);
>> +
>> + if (ret) {
>> + /* restore the original migratetype */
>> + set_pageblock_migratetype(
>> + pfn_to_page(isolated_pageblock_pfn),
>> + saved_mt);
>> + return -EBUSY;
>> + }
>> + /*
>> + * reset pfn, let the free page handling code
>> + * above split the free page to the right
>> + * migratetype list.
>> + *
>> + * head_pfn is not used here as a hugetlb page
>> + * order can be bigger than MAX_ORDER-1, but
>> + * after it is freed, the free page order is not.
>> + * Use pfn within the range to find the head of
>> + * the free page and reset order to 0 if a hugetlb
>> + * page with >MAX_ORDER-1 order is encountered.
>> + */
>> + if (order > MAX_ORDER-1)
>> + order = 0;
>> + while (!PageBuddy(pfn_to_page(pfn))) {
>> + order++;
>> + pfn &= ~0UL << order;
>> + }
>> + continue;
>> + }
>> + pfn += nr_pages;
>> + continue;
>> + }
>> +
>> + pfn++;
>> + }
>> + return 0;
>> +}
>> +
>> +
>> /**
>> * alloc_contig_range() -- tries to allocate given range of pages
>> * @start: start PFN to allocate
>> @@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
>> int alloc_contig_range(unsigned long start, unsigned long end,
>> unsigned migratetype, gfp_t gfp_mask)
>> {
>> - unsigned long outer_start, outer_end;
>> - unsigned int order;
>> + unsigned long outer_end;
>> + unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
>> + unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
>> int ret = 0;
>>
>> struct compact_control cc = {
>> @@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> * What we do here is we mark all pageblocks in range as
>> * MIGRATE_ISOLATE. Because pageblock and max order pages may
>> * have different sizes, and due to the way page allocator
>> - * work, we align the range to biggest of the two pages so
>> - * that page allocator won't try to merge buddies from
>> - * different pageblocks and change MIGRATE_ISOLATE to some
>> - * other migration type.
>> + * work, start_isolate_page_range() has special handlings for this.
>> *
>> * Once the pageblocks are marked as MIGRATE_ISOLATE, we
>> * migrate the pages from an unaligned range (ie. pages that
>> - * we are interested in). This will put all the pages in
>> + * we are interested in). This will put all the pages in
>> * range back to page allocator as MIGRATE_ISOLATE.
>> *
>> * When this is done, we take the pages in range from page
>> @@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> * put back to page allocator so that buddy can use them.
>> */
>>
>> - ret = start_isolate_page_range(start, end, migratetype, 0);
>> + ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
>> if (ret)
>> - return ret;
>> + goto done;
>>
>> drain_all_pages(cc.zone);
>>
>> @@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> goto done;
>> ret = 0;
>>
>> - /*
>> - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
>> - * aligned blocks that are marked as MIGRATE_ISOLATE. What's
>> - * more, all pages in [start, end) are free in page allocator.
>> - * What we are going to do is to allocate all pages from
>> - * [start, end) (that is remove them from page allocator).
>> - *
>> - * The only problem is that pages at the beginning and at the
>> - * end of interesting range may be not aligned with pages that
>> - * page allocator holds, ie. they can be part of higher order
>> - * pages. Because of this, we reserve the bigger range and
>> - * once this is done free the pages we are not interested in.
>> - *
>> - * We don't have to hold zone->lock here because the pages are
>> - * isolated thus they won't get removed from buddy.
>> - */
>> -
>> - order = 0;
>> - outer_start = start;
>> - while (!PageBuddy(pfn_to_page(outer_start))) {
>> - if (++order >= MAX_ORDER) {
>> - outer_start = start;
>> - break;
>> - }
>> - outer_start &= ~0UL << order;
>> - }
>> -
>> - if (outer_start != start) {
>> - order = buddy_order(pfn_to_page(outer_start));
>> -
>> - /*
>> - * outer_start page could be small order buddy page and
>> - * it doesn't include start page. Adjust outer_start
>> - * in this case to report failed page properly
>> - * on tracepoint in test_pages_isolated()
>> - */
>> - if (outer_start + (1UL << order) <= start)
>> - outer_start = start;
>> - }
>> -
>> /* Make sure the range is really isolated. */
>> - if (test_pages_isolated(outer_start, end, 0)) {
>> + if (test_pages_isolated(alloc_start, alloc_end, 0)) {
>> ret = -EBUSY;
>> goto done;
>> }
>>
>> /* Grab isolated pages from freelists. */
>> - outer_end = isolate_freepages_range(&cc, outer_start, end);
>> + outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
>> if (!outer_end) {
>> ret = -EBUSY;
>> goto done;
>> }
>>
>> /* Free head and tail (if any) */
>> - if (start != outer_start)
>> - free_contig_range(outer_start, start - outer_start);
>> - if (end != outer_end)
>> - free_contig_range(end, outer_end - end);
>> + if (start != alloc_start)
>> + free_contig_range(alloc_start, start - alloc_start);
>> + if (end != alloc_end)
>> + free_contig_range(end, alloc_end - end);
>>
>> done:
>> - undo_isolate_page_range(pfn_max_align_down(start),
>> - pfn_max_align_up(end), migratetype);
>> + undo_isolate_page_range(alloc_start,
>> + alloc_end, migratetype);
>> return ret;
>> }
>> EXPORT_SYMBOL(alloc_contig_range);
>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>> index 64d093ab83ec..0256d5e1032c 100644
>> --- a/mm/page_isolation.c
>> +++ b/mm/page_isolation.c
>> @@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * and PageOffline() pages.
>> * REPORT_FAILURE - report details about the failure to
>> * isolate the range
>> + * @gfp_flags: GFP flags used for migrating pages that sit across the
>> + * range boundaries.
>> *
>> * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
>> * the range will never be allocated. Any free pages and pages freed in the
>> @@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * pages in the range finally, the caller have to free all pages in the range.
>> * test_page_isolated() can be used for test it.
>> *
>> + * The function first tries to isolate the pageblocks at the beginning and end
>> + * of the range, since there might be pages across the range boundaries.
>> + * Afterwards, it isolates the rest of the range.
>> + *
>> * There is no high level synchronization mechanism that prevents two threads
>> * from trying to isolate overlapping ranges. If this happens, one thread
>> * will notice pageblocks in the overlapping range already set to isolate.
>> @@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>> * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
>> */
>> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> - unsigned migratetype, int flags)
>> + unsigned migratetype, int flags, gfp_t gfp_flags)
>> {
>> unsigned long pfn;
>> struct page *page;
>> + /* isolation is done at page block granularity */
>> + unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
>> + unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
>> + int ret;
>>
>> - unsigned long isolate_start = pfn_max_align_down(start_pfn);
>> - unsigned long isolate_end = pfn_max_align_up(end_pfn);
>> + /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
>> + ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
>> + if (ret)
>> + return ret;
>> +
>> + /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
>> + ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
>> + if (ret) {
>> + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>> + return ret;
>> + }
>>
>> - for (pfn = isolate_start;
>> - pfn < isolate_end;
>> + /* skip isolated pageblocks at the beginning and end */
>> + for (pfn = isolate_start + pageblock_nr_pages;
>> + pfn < isolate_end - pageblock_nr_pages;
>> pfn += pageblock_nr_pages) {
>> page = __first_valid_page(pfn, pageblock_nr_pages);
>> if (page && set_migratetype_isolate(page, migratetype, flags,
>> start_pfn, end_pfn)) {
>> undo_isolate_page_range(isolate_start, pfn, migratetype);
>> + unset_migratetype_isolate(
>> + pfn_to_page(isolate_end - pageblock_nr_pages),
>> + migratetype);
>> return -EBUSY;
>> }
>> }
--
Best Regards,
Yan, Zi
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]
^ permalink raw reply [flat|nested] 42+ messages in thread