[PATCH-cgroup 1/4] workqueue: Add workqueue_unbound_exclude_cpumask() to exclude CPUs from wq_unbound_cpumask

From: Waiman Long
Date: Fri Oct 13 2023 - 14:12:47 EST


When the "isolcpus" boot command line option is used to add a set
of isolated CPUs, those CPUs will be excluded automatically from
wq_unbound_cpumask to avoid running work functions from unbound
workqueues.

Recently cpuset has been extended to allow the creation of partitions
of isolated CPUs dynamically. To make it closer to the "isolcpus"
in functionality, the CPUs in those isolated cpuset partitions should be
excluded from wq_unbound_cpumask as well. This can be done currently by
explicitly writing to the workqueue's cpumask sysfs file after creating
the isolated partitions. However, this process can be error prone.
Ideally, the cpuset code should be allowed to request the workqueue code
to exclude those isolated CPUs from wq_unbound_cpumask so that this
operation can be done automatically and the isolated CPUs will be returned
back to wq_unbound_cpumask after the destructions of the isolated
cpuset partitions.

This patch adds a new workqueue_unbound_exclude_cpumask() to enable
that. This new function will exclude the specified isolated CPUs
from wq_unbound_cpumask. To be able to restore those isolated CPUs
back after the destruction of isolated cpuset partitions, a new
wq_user_unbound_cpumask is added to store the user provided unbound
cpumask either from the boot command line options or from writing to
the cpumask sysfs file. This new cpumask provides the basis for CPU
exclusion.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
include/linux/workqueue.h | 2 +-
kernel/workqueue.c | 42 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1c1d06804d45..a936460ccc7e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -483,7 +483,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void);
void free_workqueue_attrs(struct workqueue_attrs *attrs);
int apply_workqueue_attrs(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs);
-int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
+extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask);

extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 010b674b02a7..19d403aa41b0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -381,6 +381,9 @@ static bool workqueue_freezing; /* PL: have wqs started freezing? */
/* PL&A: allowable cpus for unbound wqs and work items */
static cpumask_var_t wq_unbound_cpumask;

+/* PL: user-provided unbound cpumask via sysfs */
+static cpumask_var_t wq_user_unbound_cpumask;
+
/* for further constrain wq_unbound_cpumask by cmdline parameter*/
static struct cpumask wq_cmdline_cpumask __initdata;

@@ -5825,7 +5828,7 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
* -EINVAL - Invalid @cpumask
* -ENOMEM - Failed to allocate memory for attrs or pwqs.
*/
-int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
+static int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
{
int ret = -EINVAL;

@@ -5836,6 +5839,7 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
cpumask_and(cpumask, cpumask, cpu_possible_mask);
if (!cpumask_empty(cpumask)) {
apply_wqattrs_lock();
+ cpumask_copy(wq_user_unbound_cpumask, cpumask);
if (cpumask_equal(cpumask, wq_unbound_cpumask)) {
ret = 0;
goto out_unlock;
@@ -5850,6 +5854,40 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
return ret;
}

+/**
+ * workqueue_unbound_exclude_cpumask - Exclude given CPUs from unbound cpumask
+ * @exclude_cpumask: the cpumask to be excluded from wq_unbound_cpumask
+ *
+ * This function can be called from cpuset code to provide a set of isolated
+ * CPUs that should be excluded from wq_unbound_cpumask.
+ */
+int workqueue_unbound_exclude_cpumask(cpumask_var_t exclude_cpumask)
+{
+ cpumask_var_t cpumask;
+ int ret = 0;
+
+ if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ /*
+ * The caller of this function may have called cpus_read_lock(),
+ * use cpus_read_trylock() to avoid potential deadlock.
+ */
+ if (!cpus_read_trylock())
+ return -EBUSY;
+ mutex_lock(&wq_pool_mutex);
+
+ if (!cpumask_andnot(cpumask, wq_user_unbound_cpumask, exclude_cpumask))
+ ret = -EINVAL; /* The new cpumask can't be empty */
+ else if (!cpumask_equal(cpumask, wq_unbound_cpumask))
+ ret = workqueue_apply_unbound_cpumask(cpumask);
+
+ mutex_unlock(&wq_pool_mutex);
+ cpus_read_unlock();
+ free_cpumask_var(cpumask);
+ return ret;
+}
+
static int parse_affn_scope(const char *val)
{
int i;
@@ -6520,11 +6558,13 @@ void __init workqueue_init_early(void)
BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));

BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
+ BUG_ON(!alloc_cpumask_var(&wq_user_unbound_cpumask, GFP_KERNEL));
cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));

if (!cpumask_empty(&wq_cmdline_cpumask))
cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask);
+ cpumask_copy(wq_user_unbound_cpumask, wq_unbound_cpumask);

pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);

--
2.39.3