[PATCH] workqueue: avoid nohz_full cores

From: cmetcalf
Date: Tue Mar 31 2015 - 15:26:20 EST


From: Chris Metcalf <cmetcalf@xxxxxxxxxx>

When queuing work, we should avoid queuing it on the local cpu if
we are using WORK_CPU_UNBOUND and the local cpu is nohz_full, since
the workqueue will mean a later interrupt of the nohz_full process
that presumably would prefer continuing to have 100% of the core
without interrupts.

Likewise, remove the nohz_full cores from unbound workqueues. If
all the cores are nohz_full, we leave them in.

Signed-off-by: Chris Metcalf <cmetcalf@xxxxxxxxxx>
---
Note that this patch depends on my earlier commit, not yet pulled
into Linus' tree, that added the tick_nohz_full_clear_cpus() API:
https://lkml.org/lkml/2015/3/24/956

include/linux/tick.h | 9 +++++++++
kernel/workqueue.c | 10 +++++++++-
2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 29456c443970..119ed00c96d5 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -224,6 +224,15 @@ static inline bool is_housekeeping_cpu(int cpu)
return true;
}

+static inline int prefer_housekeeping_cpu(int cpu)
+{
+#ifdef CONFIG_NO_HZ_FULL
+ if (tick_nohz_full_cpu(cpu))
+ return cpumask_next(-1, housekeeping_mask);
+#endif
+ return cpu;
+}
+
static inline void housekeeping_affine(struct task_struct *t)
{
#ifdef CONFIG_NO_HZ_FULL
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f28849394791..ebe5ce3ae42d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -48,6 +48,7 @@
#include <linux/nodemask.h>
#include <linux/moduleparam.h>
#include <linux/uaccess.h>
+#include <linux/tick.h>

#include "workqueue_internal.h"

@@ -1303,7 +1304,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
return;
retry:
if (req_cpu == WORK_CPU_UNBOUND)
- cpu = raw_smp_processor_id();
+ cpu = prefer_housekeeping_cpu(raw_smp_processor_id());

/* pwq which will be used unless @work is executing elsewhere */
if (!(wq->flags & WQ_UNBOUND))
@@ -3782,6 +3783,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
/* make a copy of @attrs and sanitize it */
copy_workqueue_attrs(new_attrs, attrs);
cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
+ tick_nohz_full_clear_cpus(new_attrs->cpumask);

/*
* We may create multiple pwqs with differing cpumasks. Make a
@@ -3810,6 +3812,12 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,

for_each_node(node) {
if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
+#ifdef CONFIG_NO_HZ_FULL
+ tick_nohz_full_clear_cpus(tmp_attrs->cpumask);
+ if (cpumask_empty(tmp_attrs->cpumask))
+ cpumask_copy(tmp_attrs->cpumask,
+ new_attrs->cpumask);
+#endif
pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
if (!pwq_tbl[node])
goto enomem_pwq;
--
2.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/