[PATCH] kcsan: Add option to allow watcher interruptions

From: Marco Elver
Date: Thu Feb 20 2020 - 09:16:05 EST


Add option to allow interrupts while a watchpoint is set up. This can be
enabled either via CONFIG_KCSAN_INTERRUPT_WATCHER or via the boot
parameter 'kcsan.interrupt_watcher=1'.

Note that, currently not all safe per-CPU access primitives and patterns
are accounted for, which could result in false positives. For example,
asm-generic/percpu.h uses plain operations, which by default are
instrumented. On interrupts and subsequent accesses to the same
variable, KCSAN would currently report a data race with this option.

Therefore, this option should currently remain disabled by default, but
may be enabled for specific test scenarios.

Signed-off-by: Marco Elver <elver@xxxxxxxxxx>
---

As an example, the first data race that this found:

write to 0xffff88806b3324b8 of 4 bytes by interrupt on cpu 0:
rcu_preempt_read_enter kernel/rcu/tree_plugin.h:353 [inline]
__rcu_read_lock+0x3c/0x50 kernel/rcu/tree_plugin.h:373
rcu_read_lock include/linux/rcupdate.h:599 [inline]
cpuacct_charge+0x36/0x80 kernel/sched/cpuacct.c:347
cgroup_account_cputime include/linux/cgroup.h:773 [inline]
update_curr+0xe2/0x1d0 kernel/sched/fair.c:860
enqueue_entity+0x130/0x5d0 kernel/sched/fair.c:4005
enqueue_task_fair+0xb0/0x420 kernel/sched/fair.c:5260
enqueue_task kernel/sched/core.c:1302 [inline]
activate_task+0x6d/0x110 kernel/sched/core.c:1324
ttwu_do_activate.isra.0+0x40/0x60 kernel/sched/core.c:2266
ttwu_queue kernel/sched/core.c:2411 [inline]
try_to_wake_up+0x3be/0x6c0 kernel/sched/core.c:2645
wake_up_process+0x10/0x20 kernel/sched/core.c:2669
hrtimer_wakeup+0x4c/0x60 kernel/time/hrtimer.c:1769
__run_hrtimer kernel/time/hrtimer.c:1517 [inline]
__hrtimer_run_queues+0x274/0x5f0 kernel/time/hrtimer.c:1579
hrtimer_interrupt+0x22d/0x490 kernel/time/hrtimer.c:1641
local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1119 [inline]
smp_apic_timer_interrupt+0xdc/0x280 arch/x86/kernel/apic/apic.c:1144
apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829
delay_tsc+0x38/0xc0 arch/x86/lib/delay.c:68 <--- interrupt while delayed
__delay arch/x86/lib/delay.c:161 [inline]
__const_udelay+0x33/0x40 arch/x86/lib/delay.c:175
__udelay+0x10/0x20 arch/x86/lib/delay.c:181
kcsan_setup_watchpoint+0x17f/0x400 kernel/kcsan/core.c:428
check_access kernel/kcsan/core.c:550 [inline]
__tsan_read4+0xc6/0x100 kernel/kcsan/core.c:685 <--- Enter KCSAN runtime
rcu_preempt_read_enter kernel/rcu/tree_plugin.h:353 [inline] <---+
__rcu_read_lock+0x2a/0x50 kernel/rcu/tree_plugin.h:373 |
rcu_read_lock include/linux/rcupdate.h:599 [inline] |
lock_page_memcg+0x31/0x110 mm/memcontrol.c:1972 |
|
read to 0xffff88806b3324b8 of 4 bytes by task 6131 on cpu 0: |
rcu_preempt_read_enter kernel/rcu/tree_plugin.h:353 [inline] ----+
__rcu_read_lock+0x2a/0x50 kernel/rcu/tree_plugin.h:373
rcu_read_lock include/linux/rcupdate.h:599 [inline]
lock_page_memcg+0x31/0x110 mm/memcontrol.c:1972

The writer is doing 'current->rcu_read_lock_nesting++'. The read is as
vulnerable to compiler optimizations and would therefore conclude this
is a valid data race.
---
kernel/kcsan/core.c | 30 ++++++++----------------------
lib/Kconfig.kcsan | 11 +++++++++++
2 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 589b1e7f0f253..43eb5f850c68e 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -21,6 +21,7 @@ static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
static unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
static unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH;
+static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER);

#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
@@ -30,6 +31,7 @@ module_param_named(early_enable, kcsan_early_enable, bool, 0);
module_param_named(udelay_task, kcsan_udelay_task, uint, 0644);
module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644);
module_param_named(skip_watch, kcsan_skip_watch, long, 0644);
+module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444);

bool kcsan_enabled;

@@ -354,7 +356,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
unsigned long access_mask;
enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
unsigned long ua_flags = user_access_save();
- unsigned long irq_flags;
+ unsigned long irq_flags = 0;

/*
* Always reset kcsan_skip counter in slow-path to avoid underflow; see
@@ -370,26 +372,9 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
goto out;
}

- /*
- * Disable interrupts & preemptions to avoid another thread on the same
- * CPU accessing memory locations for the set up watchpoint; this is to
- * avoid reporting races to e.g. CPU-local data.
- *
- * An alternative would be adding the source CPU to the watchpoint
- * encoding, and checking that watchpoint-CPU != this-CPU. There are
- * several problems with this:
- * 1. we should avoid stealing more bits from the watchpoint encoding
- * as it would affect accuracy, as well as increase performance
- * overhead in the fast-path;
- * 2. if we are preempted, but there *is* a genuine data race, we
- * would *not* report it -- since this is the common case (vs.
- * CPU-local data accesses), it makes more sense (from a data race
- * detection point of view) to simply disable preemptions to ensure
- * as many tasks as possible run on other CPUs.
- *
- * Use raw versions, to avoid lockdep recursion via IRQ flags tracing.
- */
- raw_local_irq_save(irq_flags);
+ if (!kcsan_interrupt_watcher)
+ /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
+ raw_local_irq_save(irq_flags);

watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -524,7 +509,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)

kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
- raw_local_irq_restore(irq_flags);
+ if (!kcsan_interrupt_watcher)
+ raw_local_irq_restore(irq_flags);
out:
user_access_restore(ua_flags);
}
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index ba9268076cfbc..0f1447ff8f558 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -101,6 +101,17 @@ config KCSAN_SKIP_WATCH_RANDOMIZE
KCSAN_WATCH_SKIP. If false, the chosen value is always
KCSAN_WATCH_SKIP.

+config KCSAN_INTERRUPT_WATCHER
+ bool "Interruptible watchers"
+ help
+ If enabled, a task that set up a watchpoint may be interrupted while
+ delayed. This option will allow KCSAN to detect races between
+ interrupted tasks and other threads of execution on the same CPU.
+
+ Currently disabled by default, because not all safe per-CPU access
+ primitives and patterns may be accounted for, and therefore could
+ result in false positives.
+
config KCSAN_REPORT_ONCE_IN_MS
int "Duration in milliseconds, in which any given race is only reported once"
default 3000
--
2.25.0.265.gbab2e86ba0-goog