All of lore.kernel.org
 help / color / mirror / Atom feed
From: Baoquan He <bhe@redhat.com>
To: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	LKML <linux-kernel@vger.kernel.org>,
	stable@vger.kernel.org, lirongqing <lirongqing@baidu.com>
Subject: Re: [PATCH v3] mm/vmalloc: Use dedicated unbound workqueues for vmap drain
Date: Thu, 2 Apr 2026 08:23:10 +0800	[thread overview]
Message-ID: <ac227uLhjH8pETb5@fedora> (raw)
In-Reply-To: <20260331202352.879718-1-urezki@gmail.com>

On 03/31/26 at 10:23pm, Uladzislau Rezki (Sony) wrote:
> drain_vmap_area_work() function can take >10ms to complete
> when there are many accumulated vmap areas in a system with
> high CPU count, causing workqueue watchdog warnings when run
> via schedule_work():
> 
>   workqueue: drain_vmap_area_work hogged CPU for >10000us
> 
> Move the top-level drain work to a dedicated WQ_UNBOUND
> workqueue so the scheduler can run this background work
> on any available CPU, improving responsiveness. Use the
> WQ_MEM_RECLAIM to ensure forward progress under memory
> pressure.
> 
> Move purge helpers to separate WQ_UNBOUND | WQ_MEM_RECLAIM
> workqueue. This allows drain_vmap_work to wait for helpers
> completion without creating dependency on the same rescuer
> thread and avoid a potential parent/child deadlock.
> 
> Simplify purge helper scheduling by removing cpumask-based
> iteration to iterating directly over vmap nodes checking
> work_queued state.
> 
> Cc: stable@vger.kernel.org
> Cc: lirongqing <lirongqing@baidu.com>
> Fixes: 72210662c5a2 ("mm: vmalloc: offload free_vmap_area_lock lock")
> Link: https://lore.kernel.org/all/20260319074307.2325-1-lirongqing@baidu.com/
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  mm/vmalloc.c | 79 ++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 52 insertions(+), 27 deletions(-)

LGTM,

Reviewed-by: Baoquan He <bhe@redhat.com>

> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 61caa55a4402..0fa1208a910b 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -949,6 +949,7 @@ static struct vmap_node {
>  	struct list_head purge_list;
>  	struct work_struct purge_work;
>  	unsigned long nr_purged;
> +	bool work_queued;
>  } single;
>  
>  /*
> @@ -1067,6 +1068,8 @@ static void reclaim_and_purge_vmap_areas(void);
>  static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
>  static void drain_vmap_area_work(struct work_struct *work);
>  static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
> +static struct workqueue_struct *drain_vmap_helpers_wq;
> +static struct workqueue_struct *drain_vmap_wq;
>  
>  static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
>  static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
> @@ -2335,6 +2338,16 @@ static void purge_vmap_node(struct work_struct *work)
>  	reclaim_list_global(&local_list);
>  }
>  
> +static bool
> +schedule_drain_vmap_work(struct workqueue_struct *wq,
> +		struct work_struct *work)
> +{
> +	if (wq)
> +		return queue_work(wq, work);
> +
> +	return false;
> +}
> +
>  /*
>   * Purges all lazily-freed vmap areas.
>   */
> @@ -2342,19 +2355,12 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end,
>  		bool full_pool_decay)
>  {
>  	unsigned long nr_purged_areas = 0;
> +	unsigned int nr_purge_nodes = 0;
>  	unsigned int nr_purge_helpers;
> -	static cpumask_t purge_nodes;
> -	unsigned int nr_purge_nodes;
>  	struct vmap_node *vn;
> -	int i;
>  
>  	lockdep_assert_held(&vmap_purge_lock);
>  
> -	/*
> -	 * Use cpumask to mark which node has to be processed.
> -	 */
> -	purge_nodes = CPU_MASK_NONE;
> -
>  	for_each_vmap_node(vn) {
>  		INIT_LIST_HEAD(&vn->purge_list);
>  		vn->skip_populate = full_pool_decay;
> @@ -2374,10 +2380,9 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end,
>  		end = max(end, list_last_entry(&vn->purge_list,
>  			struct vmap_area, list)->va_end);
>  
> -		cpumask_set_cpu(node_to_id(vn), &purge_nodes);
> +		nr_purge_nodes++;
>  	}
>  
> -	nr_purge_nodes = cpumask_weight(&purge_nodes);
>  	if (nr_purge_nodes > 0) {
>  		flush_tlb_kernel_range(start, end);
>  
> @@ -2385,29 +2390,31 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end,
>  		nr_purge_helpers = atomic_long_read(&vmap_lazy_nr) / lazy_max_pages();
>  		nr_purge_helpers = clamp(nr_purge_helpers, 1U, nr_purge_nodes) - 1;
>  
> -		for_each_cpu(i, &purge_nodes) {
> -			vn = &vmap_nodes[i];
> +		for_each_vmap_node(vn) {
> +			vn->work_queued = false;
> +
> +			if (list_empty(&vn->purge_list))
> +				continue;
>  
>  			if (nr_purge_helpers > 0) {
>  				INIT_WORK(&vn->purge_work, purge_vmap_node);
> +				vn->work_queued = schedule_drain_vmap_work(
> +					READ_ONCE(drain_vmap_helpers_wq), &vn->purge_work);
>  
> -				if (cpumask_test_cpu(i, cpu_online_mask))
> -					schedule_work_on(i, &vn->purge_work);
> -				else
> -					schedule_work(&vn->purge_work);
> -
> -				nr_purge_helpers--;
> -			} else {
> -				vn->purge_work.func = NULL;
> -				purge_vmap_node(&vn->purge_work);
> -				nr_purged_areas += vn->nr_purged;
> +				if (vn->work_queued) {
> +					nr_purge_helpers--;
> +					continue;
> +				}
>  			}
> -		}
>  
> -		for_each_cpu(i, &purge_nodes) {
> -			vn = &vmap_nodes[i];
> +			/* Sync path. Process locally. */
> +			purge_vmap_node(&vn->purge_work);
> +			nr_purged_areas += vn->nr_purged;
> +		}
>  
> -			if (vn->purge_work.func) {
> +		/* Wait for completion if queued any. */
> +		for_each_vmap_node(vn) {
> +			if (vn->work_queued) {
>  				flush_work(&vn->purge_work);
>  				nr_purged_areas += vn->nr_purged;
>  			}
> @@ -2471,7 +2478,8 @@ static void free_vmap_area_noflush(struct vmap_area *va)
>  
>  	/* After this point, we may free va at any time */
>  	if (unlikely(nr_lazy > nr_lazy_max))
> -		schedule_work(&drain_vmap_work);
> +		schedule_drain_vmap_work(READ_ONCE(drain_vmap_wq),
> +			&drain_vmap_work);
>  }
>  
>  /*
> @@ -5483,3 +5491,20 @@ void __init vmalloc_init(void)
>  	vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
>  	shrinker_register(vmap_node_shrinker);
>  }
> +
> +static int __init vmalloc_init_workqueue(void)
> +{
> +	struct workqueue_struct *drain_wq, *helpers_wq;
> +	unsigned int flags = WQ_UNBOUND | WQ_MEM_RECLAIM;
> +
> +	drain_wq = alloc_workqueue("vmap_drain", flags, 0);
> +	WARN_ON_ONCE(drain_wq == NULL);
> +	WRITE_ONCE(drain_vmap_wq, drain_wq);
> +
> +	helpers_wq = alloc_workqueue("vmap_drain_helpers", flags, 0);
> +	WARN_ON_ONCE(helpers_wq == NULL);
> +	WRITE_ONCE(drain_vmap_helpers_wq, helpers_wq);
> +
> +	return 0;
> +}
> +early_initcall(vmalloc_init_workqueue);
> -- 
> 2.47.3
> 



  parent reply	other threads:[~2026-04-02  0:23 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-31 20:23 [PATCH v3] mm/vmalloc: Use dedicated unbound workqueues for vmap drain Uladzislau Rezki (Sony)
2026-03-31 22:40 ` Andrew Morton
2026-04-01  9:47 ` Baoquan He
2026-04-02  0:22   ` Baoquan He
2026-04-02 16:05     ` Uladzislau Rezki
2026-04-02  0:23 ` Baoquan He [this message]
2026-04-02 16:06   ` Uladzislau Rezki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ac227uLhjH8pETb5@fedora \
    --to=bhe@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lirongqing@baidu.com \
    --cc=stable@vger.kernel.org \
    --cc=urezki@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.