From: Johannes Weiner <hannes@cmpxchg.org>
To: kernel test robot <oliver.sang@intel.com>
Cc: oe-lkp@lists.linux.dev, lkp@intel.com,
Andrew Morton <akpm@linux-foundation.org>,
Vlastimil Babka <vbabka@suse.cz>,
Brendan Jackman <jackmanb@google.com>,
linux-mm@kvack.org
Subject: Re: [linux-next:master] [mm] c2f6ea38fc: vm-scalability.throughput 56.4% regression
Date: Wed, 2 Apr 2025 15:50:42 -0400 [thread overview]
Message-ID: <20250402195042.GC198651@cmpxchg.org> (raw)
In-Reply-To: <202503271547.fc08b188-lkp@intel.com>
Hello,
On Thu, Mar 27, 2025 at 04:20:41PM +0800, kernel test robot wrote:
> kernel test robot noticed a 56.4% regression of vm-scalability.throughput on:
>
> commit: c2f6ea38fc1b640aa7a2e155cc1c0410ff91afa2 ("mm: page_alloc: don't steal single pages from biggest buddy")
> https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
>
> testcase: vm-scalability
> config: x86_64-rhel-9.4
> compiler: gcc-12
> test machine: 224 threads 4 sockets Intel(R) Xeon(R) Platinum 8380H CPU @ 2.90GHz (Cooper Lake) with 192G memory
> parameters:
>
> runtime: 300s
> test: lru-file-mmap-read
> cpufreq_governor: performance
Thanks for the report.
Would you be able to re-test with the below patch applied?
There are more details in the thread here:
https://lore.kernel.org/all/20250402194425.GB198651@cmpxchg.org/
It's on top of the following upstream commit:
commit acc4d5ff0b61eb1715c498b6536c38c1feb7f3c1 (origin/master, origin/HEAD)
Merge: 3491aa04787f f278b6d5bb46
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue Apr 1 20:00:51 2025 -0700
Merge tag 'net-6.15-rc0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Thanks!
---
From 13433454403e0c6f99ccc3b76c609034fe47e41c Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 2 Apr 2025 14:23:53 -0400
Subject: [PATCH] mm: page_alloc: speed up fallbacks in rmqueue_bulk()
Not-yet-signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
mm/page_alloc.c | 100 +++++++++++++++++++++++++++++++++++-------------
1 file changed, 74 insertions(+), 26 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f51aa6051a99..03b0d45ed45a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2194,11 +2194,11 @@ try_to_claim_block(struct zone *zone, struct page *page,
* The use of signed ints for order and current_order is a deliberate
* deviation from the rest of this file, to make the for loop
* condition simpler.
- *
- * Return the stolen page, or NULL if none can be found.
*/
+
+/* Try to claim a whole foreign block, take a page, expand the remainder */
static __always_inline struct page *
-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
+__rmqueue_claim(struct zone *zone, int order, int start_migratetype,
unsigned int alloc_flags)
{
struct free_area *area;
@@ -2236,14 +2236,26 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
page = try_to_claim_block(zone, page, current_order, order,
start_migratetype, fallback_mt,
alloc_flags);
- if (page)
- goto got_one;
+ if (page) {
+ trace_mm_page_alloc_extfrag(page, order, current_order,
+ start_migratetype, fallback_mt);
+ return page;
+ }
}
- if (alloc_flags & ALLOC_NOFRAGMENT)
- return NULL;
+ return NULL;
+}
+
+/* Try to steal a single page from a foreign block */
+static __always_inline struct page *
+__rmqueue_steal(struct zone *zone, int order, int start_migratetype)
+{
+ struct free_area *area;
+ int current_order;
+ struct page *page;
+ int fallback_mt;
+ bool claim_block;
- /* No luck claiming pageblock. Find the smallest fallback page */
for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
area = &(zone->free_area[current_order]);
fallback_mt = find_suitable_fallback(area, current_order,
@@ -2253,25 +2265,28 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
page = get_page_from_free_area(area, fallback_mt);
page_del_and_expand(zone, page, order, current_order, fallback_mt);
- goto got_one;
+ trace_mm_page_alloc_extfrag(page, order, current_order,
+ start_migratetype, fallback_mt);
+ return page;
}
return NULL;
-
-got_one:
- trace_mm_page_alloc_extfrag(page, order, current_order,
- start_migratetype, fallback_mt);
-
- return page;
}
+enum rmqueue_mode {
+ RMQUEUE_NORMAL,
+ RMQUEUE_CMA,
+ RMQUEUE_CLAIM,
+ RMQUEUE_STEAL,
+};
+
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
*/
static __always_inline struct page *
__rmqueue(struct zone *zone, unsigned int order, int migratetype,
- unsigned int alloc_flags)
+ unsigned int alloc_flags, enum rmqueue_mode *mode)
{
struct page *page;
@@ -2290,16 +2305,47 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
}
}
- page = __rmqueue_smallest(zone, order, migratetype);
- if (unlikely(!page)) {
- if (alloc_flags & ALLOC_CMA)
+ /*
+ * Try the different freelists, native then foreign.
+ *
+ * The fallback logic is expensive and rmqueue_bulk() calls in
+ * a loop with the zone->lock held, meaning the freelists are
+ * not subject to any outside changes. Remember in *mode where
+ * we found pay dirt, to save us the search on the next call.
+ */
+ switch (*mode) {
+ case RMQUEUE_NORMAL:
+ page = __rmqueue_smallest(zone, order, migratetype);
+ if (page)
+ return page;
+ fallthrough;
+ case RMQUEUE_CMA:
+ if (alloc_flags & ALLOC_CMA) {
page = __rmqueue_cma_fallback(zone, order);
-
- if (!page)
- page = __rmqueue_fallback(zone, order, migratetype,
- alloc_flags);
+ if (page) {
+ *mode = RMQUEUE_CMA;
+ return page;
+ }
+ }
+ fallthrough;
+ case RMQUEUE_CLAIM:
+ page = __rmqueue_claim(zone, order, migratetype, alloc_flags);
+ if (page) {
+ /* Replenished native freelist, back to normal mode */
+ *mode = RMQUEUE_NORMAL;
+ return page;
+ }
+ fallthrough;
+ case RMQUEUE_STEAL:
+ if (!(alloc_flags & ALLOC_NOFRAGMENT)) {
+ page = __rmqueue_steal(zone, order, migratetype);
+ if (page) {
+ *mode = RMQUEUE_STEAL;
+ return page;
+ }
+ }
}
- return page;
+ return NULL;
}
/*
@@ -2311,6 +2357,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
+ enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
unsigned long flags;
int i;
@@ -2321,7 +2368,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
}
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype,
- alloc_flags);
+ alloc_flags, &rmqm);
if (unlikely(page == NULL))
break;
@@ -2934,6 +2981,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
{
struct page *page;
unsigned long flags;
+ enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
do {
page = NULL;
@@ -2945,7 +2993,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
if (alloc_flags & ALLOC_HIGHATOMIC)
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (!page) {
- page = __rmqueue(zone, order, migratetype, alloc_flags);
+ page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm);
/*
* If the allocation fails, allow OOM handling and
--
2.49.0
next prev parent reply other threads:[~2025-04-02 19:50 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-27 8:20 [linux-next:master] [mm] c2f6ea38fc: vm-scalability.throughput 56.4% regression kernel test robot
2025-04-02 19:50 ` Johannes Weiner [this message]
2025-04-03 8:48 ` Oliver Sang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250402195042.GC198651@cmpxchg.org \
--to=hannes@cmpxchg.org \
--cc=akpm@linux-foundation.org \
--cc=jackmanb@google.com \
--cc=linux-mm@kvack.org \
--cc=lkp@intel.com \
--cc=oe-lkp@lists.linux.dev \
--cc=oliver.sang@intel.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.