[PATCH 07/32] cpuset: Set up interface for nohz flag

From: Frederic Weisbecker
Date: Wed Mar 21 2012 - 09:59:21 EST


Prepare the interface to implement the nohz cpuset flag.
This flag, once set, will tell the system to try to
shutdown the periodic timer tick when possible.

We use here a per cpu refcounter. As long as a CPU
is contained into at least one cpuset that has the
nohz flag set, it is part of the set of CPUs that
run into adaptive nohz mode.

[ include build fix from Zen Lin ]

Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Alessio Igor Bogani <abogani@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Avi Kivity <avi@xxxxxxxxxx>
Cc: Chris Metcalf <cmetcalf@xxxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Daniel Lezcano <daniel.lezcano@xxxxxxxxxx>
Cc: Geoff Levand <geoff@xxxxxxxxxxxxx>
Cc: Gilad Ben Yossef <gilad@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Max Krasnyansky <maxk@xxxxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Stephen Hemminger <shemminger@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Sven-Thorsten Dietrich <thebigcorporation@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Zen Lin <zen@xxxxxxxxxxxxxx>
---
arch/Kconfig | 3 ++
include/linux/cpuset.h | 25 +++++++++++++++++++++++
init/Kconfig | 8 +++++++
kernel/cpuset.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 88 insertions(+), 0 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4f55c73..a0710f6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -177,6 +177,9 @@ config HAVE_ARCH_JUMP_LABEL
bool

config HAVE_ARCH_MUTEX_CPU_RELAX
+ bool
+
+config HAVE_CPUSETS_NO_HZ
bool

config HAVE_RCU_TABLE_FREE
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index e9eaec5..5510708 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -244,4 +244,29 @@ static inline void put_mems_allowed(void)

#endif /* !CONFIG_CPUSETS */

+#ifdef CONFIG_CPUSETS_NO_HZ
+
+DECLARE_PER_CPU(int, cpu_adaptive_nohz_ref);
+
+static inline bool cpuset_cpu_adaptive_nohz(int cpu)
+{
+ if (per_cpu(cpu_adaptive_nohz_ref, cpu) > 0)
+ return true;
+
+ return false;
+}
+
+static inline bool cpuset_adaptive_nohz(void)
+{
+ if (__get_cpu_var(cpu_adaptive_nohz_ref) > 0)
+ return true;
+
+ return false;
+}
+#else
+static inline bool cpuset_cpu_adaptive_nohz(int cpu) { return false; }
+static inline bool cpuset_adaptive_nohz(void) { return false; }
+
+#endif /* CONFIG_CPUSETS_NO_HZ */
+
#endif /* _LINUX_CPUSET_H */
diff --git a/init/Kconfig b/init/Kconfig
index 3f42cd6..43f7687 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -638,6 +638,14 @@ config PROC_PID_CPUSET
depends on CPUSETS
default y

+config CPUSETS_NO_HZ
+ bool "Tickless cpusets"
+ depends on CPUSETS && HAVE_CPUSETS_NO_HZ
+ help
+ This options let you apply a nohz property to a cpuset such
+ that the periodic timer tick tries to be avoided when possible on
+ the concerned CPUs.
+
config CGROUP_CPUACCT
bool "Simple CPU accounting cgroup subsystem"
help
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a09ac2b..5a28cf8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -145,6 +145,7 @@ typedef enum {
CS_SCHED_LOAD_BALANCE,
CS_SPREAD_PAGE,
CS_SPREAD_SLAB,
+ CS_ADAPTIVE_NOHZ,
} cpuset_flagbits_t;

/* convenient tests for these bits */
@@ -183,6 +184,11 @@ static inline int is_spread_slab(const struct cpuset *cs)
return test_bit(CS_SPREAD_SLAB, &cs->flags);
}

+static inline int is_adaptive_nohz(const struct cpuset *cs)
+{
+ return test_bit(CS_ADAPTIVE_NOHZ, &cs->flags);
+}
+
static struct cpuset top_cpuset = {
.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
};
@@ -1211,6 +1217,31 @@ static void cpuset_change_flag(struct task_struct *tsk,
cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk);
}

+#ifdef CONFIG_CPUSETS_NO_HZ
+
+DEFINE_PER_CPU(int, cpu_adaptive_nohz_ref);
+
+static void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs)
+{
+ int cpu;
+ int val;
+
+ if (is_adaptive_nohz(old_cs) == is_adaptive_nohz(cs))
+ return;
+
+ for_each_cpu(cpu, cs->cpus_allowed) {
+ if (is_adaptive_nohz(cs))
+ per_cpu(cpu_adaptive_nohz_ref, cpu) += 1;
+ else
+ per_cpu(cpu_adaptive_nohz_ref, cpu) -= 1;
+ }
+}
+#else
+static inline void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs)
+{
+}
+#endif
+
/*
* update_tasks_flags - update the spread flags of tasks in the cpuset.
* @cs: the cpuset in which each task's spread flags needs to be changed
@@ -1276,6 +1307,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
|| (is_spread_page(cs) != is_spread_page(trialcs)));

+ update_nohz_cpus(cs, trialcs);
+
mutex_lock(&callback_mutex);
cs->flags = trialcs->flags;
mutex_unlock(&callback_mutex);
@@ -1488,6 +1521,7 @@ typedef enum {
FILE_MEMORY_PRESSURE,
FILE_SPREAD_PAGE,
FILE_SPREAD_SLAB,
+ FILE_ADAPTIVE_NOHZ,
} cpuset_filetype_t;

static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
@@ -1527,6 +1561,11 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
case FILE_SPREAD_SLAB:
retval = update_flag(CS_SPREAD_SLAB, cs, val);
break;
+#ifdef CONFIG_CPUSETS_NO_HZ
+ case FILE_ADAPTIVE_NOHZ:
+ retval = update_flag(CS_ADAPTIVE_NOHZ, cs, val);
+ break;
+#endif
default:
retval = -EINVAL;
break;
@@ -1686,6 +1725,10 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
return is_spread_page(cs);
case FILE_SPREAD_SLAB:
return is_spread_slab(cs);
+#ifdef CONFIG_CPUSETS_NO_HZ
+ case FILE_ADAPTIVE_NOHZ:
+ return is_adaptive_nohz(cs);
+#endif
default:
BUG();
}
@@ -1794,6 +1837,15 @@ static struct cftype files[] = {
.write_u64 = cpuset_write_u64,
.private = FILE_SPREAD_SLAB,
},
+
+#ifdef CONFIG_CPUSETS_NO_HZ
+ {
+ .name = "adaptive_nohz",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_ADAPTIVE_NOHZ,
+ },
+#endif
};

static struct cftype cft_memory_pressure_enabled = {
--
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/