From: Uladzislau Rezki <urezki@gmail.com>
To: "Li,Rongqing(ACG CCN)" <lirongqing@baidu.com>
Cc: Uladzislau Rezki <urezki@gmail.com>,
Andrew Morton <akpm@linux-foundation.org>,
"linux-mm@kvack.org" <linux-mm@kvack.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: 答复: [????] Re: [PATCH v2] mm/vmalloc: use dedicated unbound workqueue for vmap area draining
Date: Thu, 19 Mar 2026 14:23:37 +0100 [thread overview]
Message-ID: <abv42QtTuwJCv1St@milan> (raw)
In-Reply-To: <73a0ae8d2a334777a199a1555d6fdaaa@baidu.com>
On Thu, Mar 19, 2026 at 10:05:42AM +0000, Li,Rongqing(ACG CCN) wrote:
>
>
> > On Thu, Mar 19, 2026 at 03:43:07AM -0400, lirongqing wrote:
> > > From: Li RongQing <lirongqing@baidu.com>
> > >
> > > The drain_vmap_area_work() function can take >10ms to complete when
> > > there are many accumulated vmap areas in a system with a high CPU
> > > count, causing workqueue watchdog warnings when run via
> > > schedule_work():
> > >
> > > [ 2069.796205] workqueue: drain_vmap_area_work hogged CPU
> > for >10000us
> > > 4 times, consider switching to WQ_UNBOUND [ 2192.823225] workqueue:
> > > drain_vmap_area_work hogged CPU for >10000us 5 times, consider
> > > switching to WQ_UNBOUND
> > >
> > > Switch to a dedicated WQ_UNBOUND workqueue to allow the scheduler to
> > > run this background task on any available CPU, improving responsiveness.
> > > Use WQ_MEM_RECLAIM to ensure forward progress under memory
> > pressure.
> > >
> > > Create vmap_drain_wq in vmalloc_init_late() which is called after
> > > workqueue_init_early() in start_kernel() to avoid boot-time crashes.
> > >
> > > Suggested-by: Uladzislau Rezki <urezki@gmail.com>
> > > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > > ---
> > > Diff with v1: create dedicated unbound workqueue
> > >
> > > include/linux/vmalloc.h | 2 ++
> > > init/main.c | 1 +
> > > mm/vmalloc.c | 14 +++++++++++++-
> > > 3 files changed, 16 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index
> > > e8e94f9..c028603 100644
> > > --- a/include/linux/vmalloc.h
> > > +++ b/include/linux/vmalloc.h
> > > @@ -301,11 +301,13 @@ static inline void set_vm_flush_reset_perms(void
> > *addr)
> > > if (vm)
> > > vm->flags |= VM_FLUSH_RESET_PERMS;
> > > }
> > > +void __init vmalloc_init_late(void);
> > > #else /* !CONFIG_MMU */
> > > #define VMALLOC_TOTAL 0UL
> > >
> > > static inline unsigned long vmalloc_nr_pages(void) { return 0; }
> > > static inline void set_vm_flush_reset_perms(void *addr) {}
> > > +static inline void __init vmalloc_init_late(void) {}
> > > #endif /* CONFIG_MMU */
> > >
> > > #if defined(CONFIG_MMU) && defined(CONFIG_SMP) diff --git
> > > a/init/main.c b/init/main.c index 1cb395d..50b497f 100644
> > > --- a/init/main.c
> > > +++ b/init/main.c
> > > @@ -1099,6 +1099,7 @@ void start_kernel(void)
> > > * workqueue_init().
> > > */
> > > workqueue_init_early();
> > > + vmalloc_init_late();
> > >
> > No, no. We should not patch main.c for such purpose :)
> >
> > > rcu_init();
> > > kvfree_rcu_init();
> > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 61caa55..a52ccd4 100644
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -1067,6 +1067,7 @@ static void
> > reclaim_and_purge_vmap_areas(void);
> > > static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
> > > static void drain_vmap_area_work(struct work_struct *work); static
> > > DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
> > > +static struct workqueue_struct *vmap_drain_wq;
> > >
> > > static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
> > > static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr; @@
> > > -2471,7 +2472,7 @@ static void free_vmap_area_noflush(struct vmap_area
> > > *va)
> > >
> > > /* After this point, we may free va at any time */
> > > if (unlikely(nr_lazy > nr_lazy_max))
> > > - schedule_work(&drain_vmap_work);
> > > + queue_work(vmap_drain_wq, &drain_vmap_work);
> > > }
> > >
> > > /*
> > > @@ -5422,6 +5423,17 @@ vmap_node_shrink_scan(struct shrinker
> > *shrink, struct shrink_control *sc)
> > > return SHRINK_STOP;
> > > }
> > >
> > > +void __init vmalloc_init_late(void)
> > > +{
> > > + vmap_drain_wq = alloc_workqueue("vmap_drain",
> > > + WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
> > > + if (!vmap_drain_wq) {
> > > + pr_warn("vmap_drain_wq creation failed, using
> > system_unbound_wq\n");
> > > + vmap_drain_wq = system_unbound_wq;
> > > + }
> > > +
> > > +}
> > > +
> > > void __init vmalloc_init(void)
> > > {
> > > struct shrinker *vmap_node_shrinker;
> > > --
> > > 2.9.4
> > >
> > Why can't you add this into the vmalloc_ini()?
> >
>
> If alloc_workqueue() is added into vmalloc_ini(), system will crash and fail to boot, sine allocate workqueue depends on workqueue_init_early()
>
> Maybe this commit 3347fa092821("workqueue: make workqueue available early during boot") shows the reason
>
That is true.
<snip>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 61caa55a4402..81e1e74346d5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1067,6 +1067,7 @@ static void reclaim_and_purge_vmap_areas(void);
static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
static void drain_vmap_area_work(struct work_struct *work);
static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
+static struct workqueue_struct *drain_vmap_wq;
static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
@@ -2437,6 +2438,17 @@ static void drain_vmap_area_work(struct work_struct *work)
mutex_unlock(&vmap_purge_lock);
}
+static void
+schedule_drain_vmap_work(unsigned long nr_lazy, unsigned long nr_lazy_max)
+{
+ if (unlikely(nr_lazy > nr_lazy_max)) {
+ struct workqueue_struct *wq = READ_ONCE(drain_vmap_wq);
+
+ if (wq)
+ queue_work(wq, &drain_vmap_work);
+ }
+}
+
/*
* Free a vmap area, caller ensuring that the area has been unmapped,
* unlinked and flush_cache_vunmap had been called for the correct
@@ -2470,8 +2482,7 @@ static void free_vmap_area_noflush(struct vmap_area *va)
trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max);
/* After this point, we may free va at any time */
- if (unlikely(nr_lazy > nr_lazy_max))
- schedule_work(&drain_vmap_work);
+ schedule_drain_vmap_work(nr_lazy, nr_lazy_max);
}
/*
@@ -5483,3 +5494,15 @@ void __init vmalloc_init(void)
vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
shrinker_register(vmap_node_shrinker);
}
+
+static int __init vmalloc_init_workqueue(void)
+{
+ struct workqueue_struct *wq;
+
+ wq = alloc_workqueue("vmap_drain", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+ WARN_ON(wq == NULL);
+ WRITE_ONCE(drain_vmap_wq, wq);
+
+ return 0;
+}
+early_initcall(vmalloc_init_workqueue);
<snip>
--
Uladzislau Rezki
next prev parent reply other threads:[~2026-03-19 13:23 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-19 7:43 [PATCH v2] mm/vmalloc: use dedicated unbound workqueue for vmap area draining lirongqing
2026-03-19 9:39 ` Uladzislau Rezki
2026-03-19 10:05 ` 答复: [????] " Li,Rongqing(ACG CCN)
2026-03-19 13:23 ` Uladzislau Rezki [this message]
2026-03-20 5:48 ` 答复: [????] Re: ??: " Li,Rongqing(ACG CCN)
2026-03-20 3:16 ` Andrew Morton
2026-03-20 9:51 ` [syzbot ci] " syzbot ci
2026-03-24 13:32 ` [PATCH v2] " kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=abv42QtTuwJCv1St@milan \
--to=urezki@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lirongqing@baidu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.