[PATCH 7/7] smp/hotplug: Hotplug state fail injection

From: Peter Zijlstra
Date: Wed Sep 20 2017 - 13:07:16 EST


Add a sysfs file to one-time fail a specific state. This can be used
to test the state rollback code paths.

Something like this (hotplug-up.sh):

#!/bin/bash

echo 0 > /debug/sched_debug
echo 1 > /debug/tracing/events/cpuhp/enable

ALL_STATES=`cat /sys/devices/system/cpu/hotplug/states | cut -d':' -f1`
STATES=${1:-$ALL_STATES}

for state in $STATES
do
echo 0 > /sys/devices/system/cpu/cpu1/online
echo 0 > /debug/tracing/trace
echo Fail state: $state
echo $state > /sys/devices/system/cpu/cpu1/hotplug/fail
cat /sys/devices/system/cpu/cpu1/hotplug/fail
echo 1 > /sys/devices/system/cpu/cpu1/online

cat /debug/tracing/trace > hotfail-${state}.trace

sleep 1
done

Can be used to test for all possible rollback (barring multi-instance)
scenarios on CPU-up, CPU-down is a trivial modification of the above.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/cpuhotplug.h | 3 +-
kernel/cpu.c | 60 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 61 insertions(+), 2 deletions(-)

--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -22,7 +22,8 @@
*/

enum cpuhp_state {
- CPUHP_OFFLINE,
+ CPUHP_INVALID = -1,
+ CPUHP_OFFLINE = 0,
CPUHP_CREATE_THREADS,
CPUHP_PERF_PREPARE,
CPUHP_PERF_X86_PREPARE,
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -52,6 +52,7 @@
struct cpuhp_cpu_state {
enum cpuhp_state state;
enum cpuhp_state target;
+ enum cpuhp_state fail;
#ifdef CONFIG_SMP
struct task_struct *thread;
bool should_run;
@@ -67,7 +68,9 @@ struct cpuhp_cpu_state {
#endif
};

-static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
+ .fail = CPUHP_INVALID,
+};

#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
static struct lockdep_map cpuhp_state_up_map =
@@ -180,6 +183,15 @@ static int cpuhp_invoke_callback(unsigne
int (*cb)(unsigned int cpu);
int ret, cnt;

+ if (st->fail == state) {
+ st->fail = CPUHP_INVALID;
+
+ if (!(bringup ? step->startup.single : step->teardown.single))
+ return 0;
+
+ return -EAGAIN;
+ }
+
if (!step->multi_instance) {
WARN_ON_ONCE(lastp && *lastp);
cb = bringup ? step->startup.single : step->teardown.single;
@@ -1806,9 +1818,55 @@ static ssize_t show_cpuhp_target(struct
}
static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);

+
+static ssize_t write_cpuhp_fail(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+ struct cpuhp_step *sp;
+ int fail, ret;
+
+ ret = kstrtoint(buf, 10, &fail);
+ if (ret)
+ return ret;
+
+ /*
+ * Cannot fail STARTING/DYING callbacks.
+ */
+ if (cpuhp_is_atomic_state(fail))
+ return -EINVAL;
+
+ /*
+ * Cannot fail anything that doesn't have callbacks.
+ */
+ mutex_lock(&cpuhp_state_mutex);
+ sp = cpuhp_get_step(fail);
+ if (!sp->startup.single && !sp->teardown.single)
+ ret = -EINVAL;
+ mutex_unlock(&cpuhp_state_mutex);
+ if (ret)
+ return ret;
+
+ st->fail = fail;
+
+ return count;
+}
+
+static ssize_t show_cpuhp_fail(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+ return sprintf(buf, "%d\n", st->fail);
+}
+
+static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
+
static struct attribute *cpuhp_cpu_attrs[] = {
&dev_attr_state.attr,
&dev_attr_target.attr,
+ &dev_attr_fail.attr,
NULL
};