[PATCH v2] kcov: fix state corruption under CONFIG_PREEMPT_RT by eliminating per-cpu data
From: Tetsuo Handa
Date: Wed May 06 2026 - 07:56:19 EST
Problem:
In CONFIG_PREEMPT_RT=y kernels, KCOV experiences logical errors and
WARNINGs (as reported by syzbot). The root cause is a twofold mismatch
between KCOV's design and the RT preemptive model:
1. Reentrancy on the same CPU: KCOV uses per_cpu variables
(kcov_percpu_data) to save/restore states and provide a temporary
irq_area. In RT kernels, local_lock_irqsave() does not disable
preemption. Thus, a task executing KCOV code can be preempted by a
threaded Softirq on the same CPU. If the Softirq also triggers KCOV,
it overwrites the per_cpu data, corrupting the preempted task's state.
2. Context Confusion: PREEMPT_RT often executes Softirqs within the
task_struct context of the currently running task (e.g., a kworker or
threaded IRQ handler). Since KCOV relies on in_task() to decide whether
to modify current->kcov_mode, it mistakenly modifies the kcov_mode of
an unrelated kworker when a Softirq "borrows" its context.
Solution:
This patch eliminates the use of per_cpu data structures for KCOV when
CONFIG_PREEMPT_RT is enabled, moving the necessary state into task_struct.
* Task-local Storage: Added kcov_saved_* fields to task_struct under
CONFIG_PREEMPT_RT. This ensures that even if a task is preempted during
remote coverage collection, its state remains isolated and travels with
the task itself.
* Eliminate irq_area Sharing: In RT kernels, kcov_remote_start() now
always utilizes kcov_remote_area_get() (the remote area pool) instead
of the shared per-cpu irq_area. This prevents buffer collision between
preempting contexts on the same CPU.
* Consistent Context Checks: Replaced unreliable in_task() checks with
explicit in_hardirq() || in_nmi() guards in critical paths to ensure
KCOV's state machine remains consistent regardless of whether a Softirq
is threaded or "borrowing" a task context.
* Conditional Lock Removal: Since all data is now task-local in the RT
case, the requirement for local_lock is removed for
CONFIG_PREEMPT_RT=y, reducing unnecessary locking overhead.
This change ensures that KCOV is fully reentrant and safe for the
PREEMPT_RT execution model without sacrificing the performance of
non-RT kernels.
Link: https://syzkaller.appspot.com/bug?extid=e6686317bd9fe911591a
Analyzed-by: AI Mode in Google Search (no mail address)
Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts")
---
Only compile tested. I don't have environment to measure how ignoring
CONFIG_KCOV_IRQ_AREA_SIZE affects reliability / performance. We might
need to enforce CONFIG_KCOV_IRQ_AREA_SIZE for in_serving_softirq() case
if remote_arg->area_size < CONFIG_KCOV_IRQ_AREA_SIZE in
ioctl(KCOV_REMOTE_ENABLE) request. Please be sure to test this patch
using CONFIG_PREEMPT_RT=y and CONFIG_PREEMPT_RT=n kernels in local
syzkaller environment before sending upstream.
include/linux/sched.h | 10 +++++
kernel/kcov.c | 87 ++++++++++++++++++++++++++++++++++---------
2 files changed, 79 insertions(+), 18 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 368c7b4d7cb5..2c963f4271d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1522,6 +1522,16 @@ struct task_struct {
/* Collect coverage from softirq context: */
unsigned int kcov_softirq;
+
+#ifdef CONFIG_PREEMPT_RT
+ /* Temporary storage for preempting remote coverage collection: */
+ unsigned int kcov_saved_mode;
+ unsigned int kcov_saved_size;
+ void *kcov_saved_area;
+ struct kcov *kcov_saved_kcov;
+ int kcov_saved_sequence;
+#endif
+
#endif
#ifdef CONFIG_MEMCG_V1
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 0b369e88c7c9..3178a0e03c3b 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -88,6 +88,7 @@ static DEFINE_SPINLOCK(kcov_remote_lock);
static DEFINE_HASHTABLE(kcov_remote_map, 4);
static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);
+#ifndef CONFIG_PREEMPT_RT
struct kcov_percpu_data {
void *irq_area;
local_lock_t lock;
@@ -102,6 +103,7 @@ struct kcov_percpu_data {
static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = {
.lock = INIT_LOCAL_LOCK(lock),
};
+#endif
/* Must be called with kcov_remote_lock locked. */
static struct kcov_remote *kcov_remote_find(u64 handle)
@@ -823,6 +825,44 @@ static inline bool kcov_mode_enabled(unsigned int mode)
return (mode & ~KCOV_IN_CTXSW) != KCOV_MODE_DISABLED;
}
+#ifdef CONFIG_PREEMPT_RT
+static inline void kcov_local_lock_irqsave(unsigned long flags) { }
+static inline void kcov_local_unlock_irqrestore(unsigned long flags) { }
+
+static void kcov_remote_softirq_start(struct task_struct *t)
+{
+ unsigned int mode;
+
+ mode = READ_ONCE(t->kcov_mode);
+ barrier();
+ if (kcov_mode_enabled(mode)) {
+ t->kcov_saved_mode = mode;
+ t->kcov_saved_size = t->kcov_size;
+ t->kcov_saved_area = t->kcov_area;
+ t->kcov_saved_sequence = t->kcov_sequence;
+ t->kcov_saved_kcov = t->kcov;
+ kcov_stop(t);
+ }
+}
+
+static void kcov_remote_softirq_stop(struct task_struct *t)
+{
+ if (t->kcov_saved_kcov) {
+ kcov_start(t, t->kcov_saved_kcov, t->kcov_saved_size,
+ t->kcov_saved_area, t->kcov_saved_mode,
+ t->kcov_saved_sequence);
+ t->kcov_saved_mode = 0;
+ t->kcov_saved_size = 0;
+ t->kcov_saved_area = NULL;
+ t->kcov_saved_sequence = 0;
+ t->kcov_saved_kcov = NULL;
+ }
+}
+
+#else
+#define kcov_local_lock_irqsave(flags) local_lock_irqsave(&kcov_percpu_data.lock, flags)
+#define kcov_local_unlock_irqrestore(flags) local_unlock_irqrestore(&kcov_percpu_data.lock, flags)
+
static void kcov_remote_softirq_start(struct task_struct *t)
__must_hold(&kcov_percpu_data.lock)
{
@@ -858,6 +898,8 @@ static void kcov_remote_softirq_stop(struct task_struct *t)
}
}
+#endif
+
void kcov_remote_start(u64 handle)
{
struct task_struct *t = current;
@@ -869,12 +911,13 @@ void kcov_remote_start(u64 handle)
int sequence;
unsigned long flags;
- if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
+ /* Don't use in_task() in order to allow consistent checks in RT kernels. */
+ if (in_hardirq() || in_nmi())
return;
- if (!in_task() && !in_softirq_really())
+ if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
return;
- local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ kcov_local_lock_irqsave(flags);
/*
* Check that kcov_remote_start() is not called twice in background
@@ -882,7 +925,7 @@ void kcov_remote_start(u64 handle)
*/
mode = READ_ONCE(t->kcov_mode);
if (WARN_ON(in_task() && kcov_mode_enabled(mode))) {
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
return;
}
/*
@@ -891,7 +934,7 @@ void kcov_remote_start(u64 handle)
* happened while collecting coverage from a background thread.
*/
if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) {
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
return;
}
@@ -899,11 +942,11 @@ void kcov_remote_start(u64 handle)
remote = kcov_remote_find(handle);
if (!remote) {
spin_unlock(&kcov_remote_lock);
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
return;
}
kcov_debug("handle = %llx, context: %s\n", handle,
- in_task() ? "task" : "softirq");
+ IS_ENABLED(CONFIG_PREEMPT_RT) || in_task() ? "task" : "softirq");
kcov = remote->kcov;
/* Put in kcov_remote_stop(). */
kcov_get(kcov);
@@ -915,6 +958,10 @@ void kcov_remote_start(u64 handle)
*/
mode = context_unsafe(kcov->mode);
sequence = kcov->sequence;
+#ifdef CONFIG_PREEMPT_RT
+ size = kcov->remote_size;
+ area = kcov_remote_area_get(size);
+#else
if (in_task()) {
size = kcov->remote_size;
area = kcov_remote_area_get(size);
@@ -922,17 +969,18 @@ void kcov_remote_start(u64 handle)
size = CONFIG_KCOV_IRQ_AREA_SIZE;
area = this_cpu_ptr(&kcov_percpu_data)->irq_area;
}
+#endif
spin_unlock(&kcov_remote_lock);
- /* Can only happen when in_task(). */
+ /* Can only happen when CONFIG_PREEMPT_RT=y or in_task(). */
if (!area) {
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
area = vmalloc(size * sizeof(unsigned long));
if (!area) {
kcov_put(kcov);
return;
}
- local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ kcov_local_lock_irqsave(flags);
}
/* Reset coverage size. */
@@ -944,7 +992,7 @@ void kcov_remote_start(u64 handle)
}
kcov_start(t, kcov, size, area, mode, sequence);
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
}
EXPORT_SYMBOL(kcov_remote_start);
@@ -1024,15 +1072,16 @@ void kcov_remote_stop(void)
int sequence;
unsigned long flags;
- if (!in_task() && !in_softirq_really())
+ /* Don't use in_task() in order to allow consistent checks in RT kernels. */
+ if (in_hardirq() || in_nmi())
return;
- local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ kcov_local_lock_irqsave(flags);
mode = READ_ONCE(t->kcov_mode);
barrier();
if (!kcov_mode_enabled(mode)) {
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
return;
}
/*
@@ -1040,12 +1089,12 @@ void kcov_remote_stop(void)
* actually found the remote handle and started collecting coverage.
*/
if (in_serving_softirq() && !t->kcov_softirq) {
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
return;
}
/* Make sure that kcov_softirq is only set when in softirq. */
if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) {
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
return;
}
@@ -1069,13 +1118,13 @@ void kcov_remote_stop(void)
kcov_move_area(kcov->mode, kcov->area, kcov->size, area);
spin_unlock(&kcov->lock);
- if (in_task()) {
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || in_task()) {
spin_lock(&kcov_remote_lock);
kcov_remote_area_put(area, size);
spin_unlock(&kcov_remote_lock);
}
- local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ kcov_local_unlock_irqrestore(flags);
/* Get in kcov_remote_start(). */
kcov_put(kcov);
@@ -1119,6 +1168,7 @@ static void __init selftest(void)
static int __init kcov_init(void)
{
+#ifndef CONFIG_PREEMPT_RT
int cpu;
for_each_possible_cpu(cpu) {
@@ -1128,6 +1178,7 @@ static int __init kcov_init(void)
return -ENOMEM;
per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area;
}
+#endif
/*
* The kcov debugfs file won't ever get removed and thus,
--
2.47.3