Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: JP Kobryn <jp.kobryn@linux.dev>
To: akpm@linux-foundation.org, david@kernel.org, ljs@kernel.org,
	liam@infradead.org, vbabka@kernel.org, rppt@kernel.org,
	surenb@google.com, mhocko@suse.com, jackmanb@google.com,
	hannes@cmpxchg.org, ziy@nvidia.com, fvdl@google.com,
	linux-mm@kvack.org
Cc: shakeel.butt@linux.dev, usama.arif@linux.dev,
	linux-kernel@vger.kernel.org
Subject: [PATCH v2] mm/page_alloc: use existing highatomic reserves on the buddy fastpath
Date: Wed, 17 Jun 2026 16:49:58 -0700	[thread overview]
Message-ID: <20260617234958.150339-1-jp.kobryn@linux.dev> (raw)

ALLOC_HIGHATOMIC currently provides both access to MIGRATE_HIGHATOMIC free
pages and permission to create new highatomic pageblock reserves. This
makes it unsuitable for the fastpath.

However, the fastpath can reach rmqueue_buddy() while MIGRATE_HIGHATOMIC
reserves have free pages available. In this situation, the allocation can
fall back to other migratetypes without trying those reserves first.

Allow high-priority non-blocking allocations above order-0 and up to the
costly order to use existing MIGRATE_HIGHATOMIC reserves on the buddy
fastpath. Change the semantics of ALLOC_HIGHATOMIC so that it only allows
access to the reserves without permission to grow them. Add a new flag
ALLOC_HIGHATOMIC_RESERVE that specifically allows growing the reserves.

A UDP receive workload was run with free MIGRATE_HIGHATOMIC pageblocks
available in the target zone. Before this patch, the workload did not
consume these blocks. With this patch, eligible order-1 allocations
reaching the buddy path consumed existing MIGRATE_HIGHATOMIC pageblocks,
with no highatomic misses observed. The workload did not grow highatomic
reserves and NAPI page-frag allocations remained healthy with no failures
or order-0 fallbacks.

Signed-off-by: JP Kobryn <jp.kobryn@linux.dev>
Reviewed-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
---
v2:
  - decouple use semantics from ALLOC_HIGHATOMIC_RESERVE
  - update changelog to reflect above change and reword test paragraph
  - adjust comment in PCP path
  - rebase onto Linus' tree ~v7.2-rc1

v1: https://lore.kernel.org/linux-mm/20260616191420.52556-1-jp.kobryn@linux.dev/

 mm/internal.h   |  1 +
 mm/page_alloc.c | 30 +++++++++++++++++++++++++-----
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 5a2ddcf68e0b..6700659615e8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1478,6 +1478,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
 #define ALLOC_TRYLOCK		0x400 /* Only use spin_trylock in allocation path */
 #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+#define ALLOC_HIGHATOMIC_RESERVE	0x1000 /* Allows growing MIGRATE_HIGHATOMIC reserves */
 
 /* Flags that allow allocations below the min watermark. */
 #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d49c254174da..ed919e2ac99a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3238,7 +3238,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
 	 * If this is a high-order atomic allocation then check
 	 * if the pageblock should be reserved for the future
 	 */
-	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC_RESERVE))
 		reserve_highatomic_pageblock(page, order, zone);
 
 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
@@ -3320,8 +3320,9 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 			 *
 			 * Instead, direct it towards the reserves by
 			 * returning NULL, which will make the caller fall
-			 * back to rmqueue_buddy. This will try to use the
-			 * reserves first and grow them if needed.
+			 * back to rmqueue_buddy. There it will try to use
+			 * the reserves first and grow them if needed and
+			 * permitted by the ALLOC_HIGHATOMIC_RESERVE flag.
 			 */
 			if (alloc_flags & ALLOC_HIGHATOMIC)
 				return NULL;
@@ -3768,6 +3769,24 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
 	return alloc_flags;
 }
 
+/*
+ * Let high-priority non-blocking allocations above order-0 and up
+ * to the costly order try to use existing MIGRATE_HIGHATOMIC
+ * reserves on the fastpath.
+ */
+static inline unsigned int
+alloc_flags_highatomic_fastpath(gfp_t gfp_mask, unsigned int order)
+{
+	if (!order || order > PAGE_ALLOC_COSTLY_ORDER)
+		return 0;
+	if (!(gfp_mask & __GFP_HIGH))
+		return 0;
+	if (gfp_mask & (__GFP_DIRECT_RECLAIM | __GFP_NOMEMALLOC))
+		return 0;
+
+	return ALLOC_HIGHATOMIC;
+}
+
 /* Must be called after current_gfp_context() which can change gfp_mask */
 static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
 						  unsigned int alloc_flags)
@@ -4495,7 +4514,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
 			alloc_flags |= ALLOC_NON_BLOCK;
 
 			if (order > 0 && (alloc_flags & ALLOC_MIN_RESERVE))
-				alloc_flags |= ALLOC_HIGHATOMIC;
+				alloc_flags |= (ALLOC_HIGHATOMIC | ALLOC_HIGHATOMIC_RESERVE);
 		}
 
 		/*
@@ -5215,7 +5234,8 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
 	 * Forbid the first pass from falling back to types that fragment
 	 * memory until all local zones are considered.
 	 */
-	alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp);
+	alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp) |
+			alloc_flags_highatomic_fastpath(alloc_gfp, order);
 
 	/* First allocation attempt */
 	page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
-- 
2.54.0



                 reply	other threads:[~2026-06-17 23:50 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260617234958.150339-1-jp.kobryn@linux.dev \
    --to=jp.kobryn@linux.dev \
    --cc=akpm@linux-foundation.org \
    --cc=david@kernel.org \
    --cc=fvdl@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=jackmanb@google.com \
    --cc=liam@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=mhocko@suse.com \
    --cc=rppt@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=usama.arif@linux.dev \
    --cc=vbabka@kernel.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox