[PATCH v2] mm/vmalloc: use dedicated unbound workqueue for vmap area draining

From: lirongqing

Date: Thu Mar 19 2026 - 03:45:54 EST


From: Li RongQing <lirongqing@xxxxxxxxx>

The drain_vmap_area_work() function can take >10ms to complete when
there are many accumulated vmap areas in a system with a high CPU
count, causing workqueue watchdog warnings when run via
schedule_work():

[ 2069.796205] workqueue: drain_vmap_area_work hogged CPU for >10000us 4 times, consider switching to WQ_UNBOUND
[ 2192.823225] workqueue: drain_vmap_area_work hogged CPU for >10000us 5 times, consider switching to WQ_UNBOUND

Switch to a dedicated WQ_UNBOUND workqueue to allow the scheduler to
run this background task on any available CPU, improving responsiveness.
Use WQ_MEM_RECLAIM to ensure forward progress under memory pressure.

Create vmap_drain_wq in vmalloc_init_late() which is called after
workqueue_init_early() in start_kernel() to avoid boot-time crashes.

Suggested-by: Uladzislau Rezki <urezki@xxxxxxxxx>
Signed-off-by: Li RongQing <lirongqing@xxxxxxxxx>
---
Diff with v1: create dedicated unbound workqueue

include/linux/vmalloc.h | 2 ++
init/main.c | 1 +
mm/vmalloc.c | 14 +++++++++++++-
3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index e8e94f9..c028603 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -301,11 +301,13 @@ static inline void set_vm_flush_reset_perms(void *addr)
if (vm)
vm->flags |= VM_FLUSH_RESET_PERMS;
}
+void __init vmalloc_init_late(void);
#else /* !CONFIG_MMU */
#define VMALLOC_TOTAL 0UL

static inline unsigned long vmalloc_nr_pages(void) { return 0; }
static inline void set_vm_flush_reset_perms(void *addr) {}
+static inline void __init vmalloc_init_late(void) {}
#endif /* CONFIG_MMU */

#if defined(CONFIG_MMU) && defined(CONFIG_SMP)
diff --git a/init/main.c b/init/main.c
index 1cb395d..50b497f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1099,6 +1099,7 @@ void start_kernel(void)
* workqueue_init().
*/
workqueue_init_early();
+ vmalloc_init_late();

rcu_init();
kvfree_rcu_init();
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 61caa55..a52ccd4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1067,6 +1067,7 @@ static void reclaim_and_purge_vmap_areas(void);
static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
static void drain_vmap_area_work(struct work_struct *work);
static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
+static struct workqueue_struct *vmap_drain_wq;

static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
@@ -2471,7 +2472,7 @@ static void free_vmap_area_noflush(struct vmap_area *va)

/* After this point, we may free va at any time */
if (unlikely(nr_lazy > nr_lazy_max))
- schedule_work(&drain_vmap_work);
+ queue_work(vmap_drain_wq, &drain_vmap_work);
}

/*
@@ -5422,6 +5423,17 @@ vmap_node_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}

+void __init vmalloc_init_late(void)
+{
+ vmap_drain_wq = alloc_workqueue("vmap_drain",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+ if (!vmap_drain_wq) {
+ pr_warn("vmap_drain_wq creation failed, using system_unbound_wq\n");
+ vmap_drain_wq = system_unbound_wq;
+ }
+
+}
+
void __init vmalloc_init(void)
{
struct shrinker *vmap_node_shrinker;
--
2.9.4