[PATCH] mm/page_alloc: use existing highatomic reserves on the buddy fastpath

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] mm/page_alloc: use existing highatomic reserves on the buddy fastpath
@ 2026-06-16 19:14 JP Kobryn
  2026-06-17 13:02 ` Vlastimil Babka (SUSE)
  0 siblings, 1 reply; 2+ messages in thread
From: JP Kobryn @ 2026-06-16 19:14 UTC (permalink / raw)
  To: akpm, david, ljs, liam, vbabka, rppt, surenb, mhocko, jackmanb,
	hannes, ziy, fvdl, linux-mm
  Cc: shakeel.butt, usama.arif, linux-kernel

ALLOC_HIGHATOMIC currently provides both access to MIGRATE_HIGHATOMIC free
pages and permission to create new highatomic pageblock reserves. This
makes it unsuitable for the fastpath.

However, the fastpath can reach rmqueue_buddy() while MIGRATE_HIGHATOMIC
reserves have free pages available. In this situation, the allocation can
fall back to other migratetypes without trying those reserves first.

Allow high-priority non-blocking allocations above order-0 and up to the
costly order to use existing MIGRATE_HIGHATOMIC reserves on the buddy
fastpath without granting permission to grow these reserves. Add
ALLOC_HIGHATOMIC_RESERVE for allocations that may both access
MIGRATE_HIGHATOMIC and grow the reserves. Change the semantics of
ALLOC_HIGHATOMIC so that it may only access the reserves.

A UDP receive workload was run with free MIGRATE_HIGHATOMIC pageblocks
available in the target zone. Before this patch, the workload did not
consume these blocks. With this patch, comparable runs consumed available
blocks for 96-100% of eligible order-1 atomic allocations reaching the
buddy path, with no highatomic misses observed. The workload did not grow
highatomic reserves and NAPI page-frag allocations remained healthy with no
failures or order-0 fallbacks.

Signed-off-by: JP Kobryn <jp.kobryn@linux.dev>
---
 mm/internal.h   |  4 +++-
 mm/page_alloc.c | 34 +++++++++++++++++++++++++++-------
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 181e79f1d6a2..a7693a9fdd29 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1477,9 +1477,11 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
 #define ALLOC_TRYLOCK		0x400 /* Only use spin_trylock in allocation path */
 #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+#define ALLOC_HIGHATOMIC_RESERVE	0x1000 /* Allows growing MIGRATE_HIGHATOMIC reserves */
 
 /* Flags that allow allocations below the min watermark. */
-#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
+#define ALLOC_RESERVES (ALLOC_NON_BLOCK | ALLOC_MIN_RESERVE | \
+	ALLOC_HIGHATOMIC | ALLOC_OOM | ALLOC_HIGHATOMIC_RESERVE)
 
 enum ttu_flags;
 struct tlbflush_unmap_batch;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ee902a468c2f..e1c28bc0ba3f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3222,7 +3222,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
 		} else {
 			spin_lock_irqsave(&zone->lock, flags);
 		}
-		if (alloc_flags & ALLOC_HIGHATOMIC)
+		if (alloc_flags & (ALLOC_HIGHATOMIC | ALLOC_HIGHATOMIC_RESERVE))
 			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
 		if (!page) {
 			enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
@@ -3250,7 +3250,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
 	 * If this is a high-order atomic allocation then check
 	 * if the pageblock should be reserved for the future
 	 */
-	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC_RESERVE))
 		reserve_highatomic_pageblock(page, order, zone);
 
 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
@@ -3333,9 +3333,10 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 			 * Instead, direct it towards the reserves by
 			 * returning NULL, which will make the caller fall
 			 * back to rmqueue_buddy. This will try to use the
-			 * reserves first and grow them if needed.
+			 * reserves first and grow them if permitted by
+			 * the ALLOC_HIGHATOMIC_RESERVE flag.
 			 */
-			if (alloc_flags & ALLOC_HIGHATOMIC)
+			if (alloc_flags & (ALLOC_HIGHATOMIC | ALLOC_HIGHATOMIC_RESERVE))
 				return NULL;
 
 			alloced = rmqueue_bulk(zone, order,
@@ -3653,7 +3654,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 			return true;
 		}
 #endif
-		if ((alloc_flags & (ALLOC_HIGHATOMIC|ALLOC_OOM)) &&
+		if ((alloc_flags & (ALLOC_HIGHATOMIC | ALLOC_HIGHATOMIC_RESERVE | ALLOC_OOM)) &&
 		    !free_area_empty(area, MIGRATE_HIGHATOMIC)) {
 			return true;
 		}
@@ -3773,6 +3774,24 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
 	return alloc_flags;
 }
 
+/*
+ * Let high-priority non-blocking allocations above order-0 and up
+ * to the costly order try to use existing MIGRATE_HIGHATOMIC
+ * reserves on the fastpath.
+ */
+static inline unsigned int
+alloc_flags_highatomic_fastpath(gfp_t gfp_mask, unsigned int order)
+{
+	if (!order || order > PAGE_ALLOC_COSTLY_ORDER)
+		return 0;
+	if (!(gfp_mask & __GFP_HIGH))
+		return 0;
+	if (gfp_mask & (__GFP_DIRECT_RECLAIM | __GFP_NOMEMALLOC))
+		return 0;
+
+	return ALLOC_HIGHATOMIC;
+}
+
 /* Must be called after current_gfp_context() which can change gfp_mask */
 static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
 						  unsigned int alloc_flags)
@@ -4504,7 +4523,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
 			alloc_flags |= ALLOC_NON_BLOCK;
 
 			if (order > 0 && (alloc_flags & ALLOC_MIN_RESERVE))
-				alloc_flags |= ALLOC_HIGHATOMIC;
+				alloc_flags |= ALLOC_HIGHATOMIC_RESERVE;
 		}
 
 		/*
@@ -5298,7 +5317,8 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
 	 * Forbid the first pass from falling back to types that fragment
 	 * memory until all local zones are considered.
 	 */
-	alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp);
+	alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp) |
+			alloc_flags_highatomic_fastpath(alloc_gfp, order);
 
 	/* First allocation attempt */
 	page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] mm/page_alloc: use existing highatomic reserves on the buddy fastpath
  2026-06-16 19:14 [PATCH] mm/page_alloc: use existing highatomic reserves on the buddy fastpath JP Kobryn
@ 2026-06-17 13:02 ` Vlastimil Babka (SUSE)
  0 siblings, 0 replies; 2+ messages in thread
From: Vlastimil Babka (SUSE) @ 2026-06-17 13:02 UTC (permalink / raw)
  To: JP Kobryn, akpm, david, ljs, liam, rppt, surenb, mhocko, jackmanb,
	hannes, ziy, fvdl, linux-mm
  Cc: shakeel.butt, usama.arif, linux-kernel

On 6/16/26 21:14, JP Kobryn wrote:
> ALLOC_HIGHATOMIC currently provides both access to MIGRATE_HIGHATOMIC free
> pages and permission to create new highatomic pageblock reserves. This
> makes it unsuitable for the fastpath.
> 
> However, the fastpath can reach rmqueue_buddy() while MIGRATE_HIGHATOMIC
> reserves have free pages available. In this situation, the allocation can
> fall back to other migratetypes without trying those reserves first.
> 
> Allow high-priority non-blocking allocations above order-0 and up to the
> costly order to use existing MIGRATE_HIGHATOMIC reserves on the buddy
> fastpath without granting permission to grow these reserves. Add
> ALLOC_HIGHATOMIC_RESERVE for allocations that may both access
> MIGRATE_HIGHATOMIC and grow the reserves. Change the semantics of
> ALLOC_HIGHATOMIC so that it may only access the reserves.
> 
> A UDP receive workload was run with free MIGRATE_HIGHATOMIC pageblocks
> available in the target zone. Before this patch, the workload did not
> consume these blocks. With this patch, comparable runs consumed available
> blocks for 96-100% of eligible order-1 atomic allocations reaching the
> buddy path, with no highatomic misses observed. The workload did not grow
> highatomic reserves and NAPI page-frag allocations remained healthy with no
> failures or order-0 fallbacks.

Great.

> Signed-off-by: JP Kobryn <jp.kobryn@linux.dev>

LGTM, I have just one style suggestion. If you agree and apply it, feel free
to add:

Reviewed-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>

... and (unless other reviews raise something) send v2 rebased to 7.2-rc1
once it's released. Thanks!

> ---
>  mm/internal.h   |  4 +++-
>  mm/page_alloc.c | 34 +++++++++++++++++++++++++++-------
>  2 files changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/internal.h b/mm/internal.h
> index 181e79f1d6a2..a7693a9fdd29 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1477,9 +1477,11 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
>  #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
>  #define ALLOC_TRYLOCK		0x400 /* Only use spin_trylock in allocation path */
>  #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
> +#define ALLOC_HIGHATOMIC_RESERVE	0x1000 /* Allows growing MIGRATE_HIGHATOMIC reserves */
>  
>  /* Flags that allow allocations below the min watermark. */
> -#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
> +#define ALLOC_RESERVES (ALLOC_NON_BLOCK | ALLOC_MIN_RESERVE | \
> +	ALLOC_HIGHATOMIC | ALLOC_OOM | ALLOC_HIGHATOMIC_RESERVE)
>  
>  enum ttu_flags;
>  struct tlbflush_unmap_batch;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index ee902a468c2f..e1c28bc0ba3f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3222,7 +3222,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
>  		} else {
>  			spin_lock_irqsave(&zone->lock, flags);
>  		}
> -		if (alloc_flags & ALLOC_HIGHATOMIC)
> +		if (alloc_flags & (ALLOC_HIGHATOMIC | ALLOC_HIGHATOMIC_RESERVE))

I'd keep checking only ALLOC_HIGHATOMIC ...

>  			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
>  		if (!page) {
>  			enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
> @@ -3250,7 +3250,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
>  	 * If this is a high-order atomic allocation then check
>  	 * if the pageblock should be reserved for the future
>  	 */
> -	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
> +	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC_RESERVE))
>  		reserve_highatomic_pageblock(page, order, zone);
>  
>  	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
> @@ -3333,9 +3333,10 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
>  			 * Instead, direct it towards the reserves by
>  			 * returning NULL, which will make the caller fall
>  			 * back to rmqueue_buddy. This will try to use the
> -			 * reserves first and grow them if needed.
> +			 * reserves first and grow them if permitted by
> +			 * the ALLOC_HIGHATOMIC_RESERVE flag.
>  			 */
> -			if (alloc_flags & ALLOC_HIGHATOMIC)
> +			if (alloc_flags & (ALLOC_HIGHATOMIC | ALLOC_HIGHATOMIC_RESERVE))

Here too ...

>  				return NULL;
>  
>  			alloced = rmqueue_bulk(zone, order,
> @@ -3653,7 +3654,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
>  			return true;
>  		}
>  #endif
> -		if ((alloc_flags & (ALLOC_HIGHATOMIC|ALLOC_OOM)) &&
> +		if ((alloc_flags & (ALLOC_HIGHATOMIC | ALLOC_HIGHATOMIC_RESERVE | ALLOC_OOM)) &&

... ditto ...

>  		    !free_area_empty(area, MIGRATE_HIGHATOMIC)) {
>  			return true;
>  		}
> @@ -3773,6 +3774,24 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
>  	return alloc_flags;
>  }
>  
> +/*
> + * Let high-priority non-blocking allocations above order-0 and up
> + * to the costly order try to use existing MIGRATE_HIGHATOMIC
> + * reserves on the fastpath.
> + */
> +static inline unsigned int
> +alloc_flags_highatomic_fastpath(gfp_t gfp_mask, unsigned int order)
> +{
> +	if (!order || order > PAGE_ALLOC_COSTLY_ORDER)
> +		return 0;
> +	if (!(gfp_mask & __GFP_HIGH))
> +		return 0;
> +	if (gfp_mask & (__GFP_DIRECT_RECLAIM | __GFP_NOMEMALLOC))
> +		return 0;
> +
> +	return ALLOC_HIGHATOMIC;
> +}
> +
>  /* Must be called after current_gfp_context() which can change gfp_mask */
>  static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
>  						  unsigned int alloc_flags)
> @@ -4504,7 +4523,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
>  			alloc_flags |= ALLOC_NON_BLOCK;
>  
>  			if (order > 0 && (alloc_flags & ALLOC_MIN_RESERVE))
> -				alloc_flags |= ALLOC_HIGHATOMIC;
> +				alloc_flags |= ALLOC_HIGHATOMIC_RESERVE;

And only here add both ALLOC_HIGHATOMIC and ALLOC_HIGHATOMIC_RESERVE.
I.e. ALLOC_HIGHATOMIC_RESERVE would not be a superset of ALLOC_HIGHATOMIC,
but access to reserves and ability to grow them would be decoupled. The
comments on the flags actually suggest that's the case.

>  		}
>  
>  		/*
> @@ -5298,7 +5317,8 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
>  	 * Forbid the first pass from falling back to types that fragment
>  	 * memory until all local zones are considered.
>  	 */
> -	alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp);
> +	alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp) |
> +			alloc_flags_highatomic_fastpath(alloc_gfp, order);
>  
>  	/* First allocation attempt */
>  	page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-06-17 13:02 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-16 19:14 [PATCH] mm/page_alloc: use existing highatomic reserves on the buddy fastpath JP Kobryn
2026-06-17 13:02 ` Vlastimil Babka (SUSE)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.