Re: [PATCH v3] kcov: move kcov_remote_data to task_struct for RT and remove local_lock
From: Dmitry Vyukov
Date: Fri May 15 2026 - 03:12:44 EST
On Sat, 9 May 2026 at 10:09, Tetsuo Handa
<penguin-kernel@xxxxxxxxxxxxxxxxxxx> wrote:
>
> In CONFIG_PREEMPT_RT=y kernels, softirqs are executed in a per-CPU
> ksoftirqd thread or in the context of the task that raised the softirq.
> This means in_task() can return true even while serving softirqs. This
> behavior causes KCOV to incorrectly identify the context, leading to state
> corruption and various kcov-related warnings reported by syzbot.
>
> Furthermore, commit d5d2c51f1e5f ("kcov: replace local_irq_save() with a
> local_lock_t") introduced local_lock to protect per-CPU kcov_percpu_data.
> However, the need for this physical CPU-level locking is questionable
> because:
>
> 1. kcov_remote_start/stop() already bail out early if called from
> in_hardirq() or in_nmi().
> 2. The core tracing function check_kcov_mode() also skips hardirq/NMI
> contexts, meaning no KCOV state is accessed during hardirqs even if
> they interrupt a KCOV-enabled task/softirq.
> 3. In non-RT kernels, softirqs do not nest, so no concurrent access to
> per-CPU data occurs between softirqs.
> 4. In RT kernels, while softirqs can be preempted, this patch moves the
> KCOV state from per-CPU variables to task_struct (per-task), eliminating
> the contention on shared per-CPU resources.
>
> By moving kcov_remote_data to task_struct for RT kernels and replacing
> the in_task() check with !in_serving_softirq(), we ensure consistent
> context detection. Since the data is now isolated per-task and not accessed
> by hardirqs, the local_lock (and the original local_irq_save) is no longer
> necessary and is removed to reduce overhead.
>
> Changes:
>
> * Move remote coverage state from kcov_percpu_data to task_struct under
> CONFIG_PREEMPT_RT.
> * Replace in_task() with !in_serving_softirq() in kcov_remote_start/stop()
> for accurate context tracking.
> * Remove local_lock and IRQ disabling from kcov_remote_start/stop() as the
> state is now task-local and hardirqs are already excluded.
> * Ensure CONFIG_PREEMPT_RT=y uses kcov_remote_area_get() (the
> vmalloc-backed pool) instead of the single per-CPU irq_area.
>
> Analyzed-by: AI Mode in Google Search (no mail address)
> Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
> Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts")
> ---
> Only compile tested. I don't have environment to measure how not
+Aleksandr Nogikh
I don't see this patch on syzbot CI:
https://ci.syzbot.org/?name=kcov%3A+move+kcov_remote_data+to+task_struct+for+RT+and+remove+local_lock
How can we make it at least tested on syzbot CI? Otherwise it risks
breaking syzbot when merged.
> preallocating CONFIG_KCOV_IRQ_AREA_SIZE bytes of buffers and
> add to the global pool in kcov_init() impacts reliability / performance.
> Please be sure to test this patch using CONFIG_PREEMPT_RT=y and
> CONFIG_PREEMPT_RT=n kernels in local syzkaller environment before
> sending upstream.
>
> This patch is expected to address various KCOV related reports.
> But I don't add Closes: lines because we can't tell whether this patch
> alone is sufficient for marking as Closes:. We will find the answer
> some time after being sent to upstream.
>
> https://syzkaller.appspot.com/bug?extid=90984d3713722683112e
> https://syzkaller.appspot.com/bug?extid=47cf95ca1f9dcca872c8
> https://syzkaller.appspot.com/bug?extid=8a173e13208949931dc7
> https://syzkaller.appspot.com/bug?extid=3f51ad7ac3ae57a6fdcc
> https://syzkaller.appspot.com/bug?extid=e6686317bd9fe911591a
>
> include/linux/sched.h | 10 +++++
> kernel/kcov.c | 94 +++++++++++++++++++++++++------------------
> lib/Kconfig.debug | 1 +
> 3 files changed, 65 insertions(+), 40 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 368c7b4d7cb5..2c963f4271d6 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1522,6 +1522,16 @@ struct task_struct {
>
> /* Collect coverage from softirq context: */
> unsigned int kcov_softirq;
> +
> +#ifdef CONFIG_PREEMPT_RT
Is it possible to unify the logic between RT and non-RT kernels (not
have any ifdef's)?
The logic is already rather complicated, having 2 work modes, and
limited testing will be problematic in the future.
> + /* Temporary storage for preempting remote coverage collection: */
> + unsigned int kcov_saved_mode;
> + unsigned int kcov_saved_size;
> + void *kcov_saved_area;
> + struct kcov *kcov_saved_kcov;
> + int kcov_saved_sequence;
> +#endif
> +
> #endif
>
> #ifdef CONFIG_MEMCG_V1
> diff --git a/kernel/kcov.c b/kernel/kcov.c
> index 0b369e88c7c9..965c11a75b36 100644
> --- a/kernel/kcov.c
> +++ b/kernel/kcov.c
> @@ -88,9 +88,9 @@ static DEFINE_SPINLOCK(kcov_remote_lock);
> static DEFINE_HASHTABLE(kcov_remote_map, 4);
> static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);
>
> +#ifndef CONFIG_PREEMPT_RT
> struct kcov_percpu_data {
> void *irq_area;
> - local_lock_t lock;
>
> unsigned int saved_mode;
> unsigned int saved_size;
> @@ -100,8 +100,8 @@ struct kcov_percpu_data {
> };
>
> static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = {
> - .lock = INIT_LOCAL_LOCK(lock),
> };
> +#endif
>
> /* Must be called with kcov_remote_lock locked. */
> static struct kcov_remote *kcov_remote_find(u64 handle)
> @@ -823,8 +823,38 @@ static inline bool kcov_mode_enabled(unsigned int mode)
> return (mode & ~KCOV_IN_CTXSW) != KCOV_MODE_DISABLED;
> }
>
> +#ifdef CONFIG_PREEMPT_RT
> +static void kcov_remote_softirq_start(struct task_struct *t)
> +{
> + unsigned int mode;
> +
> + mode = READ_ONCE(t->kcov_mode);
> + barrier();
> + if (kcov_mode_enabled(mode)) {
> + t->kcov_saved_mode = mode;
> + t->kcov_saved_size = t->kcov_size;
> + t->kcov_saved_area = t->kcov_area;
> + t->kcov_saved_sequence = t->kcov_sequence;
> + t->kcov_saved_kcov = t->kcov;
> + kcov_stop(t);
> + }
> +}
> +
> +static void kcov_remote_softirq_stop(struct task_struct *t)
> +{
> + if (t->kcov_saved_kcov) {
> + kcov_start(t, t->kcov_saved_kcov, t->kcov_saved_size,
> + t->kcov_saved_area, t->kcov_saved_mode,
> + t->kcov_saved_sequence);
> + t->kcov_saved_mode = 0;
> + t->kcov_saved_size = 0;
> + t->kcov_saved_area = NULL;
> + t->kcov_saved_sequence = 0;
> + t->kcov_saved_kcov = NULL;
> + }
> +}
> +#else
> static void kcov_remote_softirq_start(struct task_struct *t)
> - __must_hold(&kcov_percpu_data.lock)
> {
> struct kcov_percpu_data *data = this_cpu_ptr(&kcov_percpu_data);
> unsigned int mode;
> @@ -842,7 +872,6 @@ static void kcov_remote_softirq_start(struct task_struct *t)
> }
>
> static void kcov_remote_softirq_stop(struct task_struct *t)
> - __must_hold(&kcov_percpu_data.lock)
> {
> struct kcov_percpu_data *data = this_cpu_ptr(&kcov_percpu_data);
>
> @@ -857,6 +886,7 @@ static void kcov_remote_softirq_stop(struct task_struct *t)
> data->saved_kcov = NULL;
> }
> }
> +#endif
>
> void kcov_remote_start(u64 handle)
> {
> @@ -867,43 +897,35 @@ void kcov_remote_start(u64 handle)
> void *area;
> unsigned int size;
> int sequence;
> - unsigned long flags;
>
> - if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
> + /* Don't use in_task() in order to allow consistent checks in RT kernels. */
> + if (in_hardirq() || in_nmi())
> return;
> - if (!in_task() && !in_softirq_really())
> + if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
> return;
>
> - local_lock_irqsave(&kcov_percpu_data.lock, flags);
> -
> /*
> * Check that kcov_remote_start() is not called twice in background
> * threads nor called by user tasks (with enabled kcov).
> */
> - mode = READ_ONCE(t->kcov_mode);
> - if (WARN_ON(in_task() && kcov_mode_enabled(mode))) {
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> + if (WARN_ON(!in_serving_softirq() && kcov_mode_enabled(READ_ONCE(t->kcov_mode))))
> return;
> - }
> /*
> * Check that kcov_remote_start() is not called twice in softirqs.
> * Note, that kcov_remote_start() can be called from a softirq that
> * happened while collecting coverage from a background thread.
> */
> - if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) {
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> + if (WARN_ON(in_serving_softirq() && t->kcov_softirq))
> return;
> - }
>
> spin_lock(&kcov_remote_lock);
> remote = kcov_remote_find(handle);
> if (!remote) {
> spin_unlock(&kcov_remote_lock);
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> return;
> }
> kcov_debug("handle = %llx, context: %s\n", handle,
> - in_task() ? "task" : "softirq");
> + !in_serving_softirq() ? "task" : "softirq");
> kcov = remote->kcov;
> /* Put in kcov_remote_stop(). */
> kcov_get(kcov);
> @@ -915,6 +937,10 @@ void kcov_remote_start(u64 handle)
> */
> mode = context_unsafe(kcov->mode);
> sequence = kcov->sequence;
> +#ifdef CONFIG_PREEMPT_RT
> + size = kcov->remote_size;
> + area = kcov_remote_area_get(size);
> +#else
> if (in_task()) {
> size = kcov->remote_size;
> area = kcov_remote_area_get(size);
> @@ -922,17 +948,16 @@ void kcov_remote_start(u64 handle)
> size = CONFIG_KCOV_IRQ_AREA_SIZE;
> area = this_cpu_ptr(&kcov_percpu_data)->irq_area;
> }
> +#endif
> spin_unlock(&kcov_remote_lock);
>
> - /* Can only happen when in_task(). */
> + /* Can only happen when CONFIG_PREEMPT_RT=y or in_task(). */
> if (!area) {
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> area = vmalloc(size * sizeof(unsigned long));
> if (!area) {
> kcov_put(kcov);
> return;
> }
> - local_lock_irqsave(&kcov_percpu_data.lock, flags);
> }
>
> /* Reset coverage size. */
> @@ -943,9 +968,6 @@ void kcov_remote_start(u64 handle)
> t->kcov_softirq = 1;
> }
> kcov_start(t, kcov, size, area, mode, sequence);
> -
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> -
> }
> EXPORT_SYMBOL(kcov_remote_start);
>
> @@ -1022,32 +1044,24 @@ void kcov_remote_stop(void)
> void *area;
> unsigned int size;
> int sequence;
> - unsigned long flags;
>
> - if (!in_task() && !in_softirq_really())
> + /* Don't use in_task() in order to allow consistent checks in RT kernels. */
> + if (in_hardirq() || in_nmi())
> return;
>
> - local_lock_irqsave(&kcov_percpu_data.lock, flags);
> -
> mode = READ_ONCE(t->kcov_mode);
> barrier();
> - if (!kcov_mode_enabled(mode)) {
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> + if (!kcov_mode_enabled(mode))
> return;
> - }
> /*
> * When in softirq, check if the corresponding kcov_remote_start()
> * actually found the remote handle and started collecting coverage.
> */
> - if (in_serving_softirq() && !t->kcov_softirq) {
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> + if (in_serving_softirq() && !t->kcov_softirq)
> return;
> - }
> /* Make sure that kcov_softirq is only set when in softirq. */
> - if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) {
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> + if (WARN_ON(!in_serving_softirq() && t->kcov_softirq))
> return;
> - }
>
> kcov = t->kcov;
> area = t->kcov_area;
> @@ -1069,14 +1083,12 @@ void kcov_remote_stop(void)
> kcov_move_area(kcov->mode, kcov->area, kcov->size, area);
> spin_unlock(&kcov->lock);
>
> - if (in_task()) {
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) || in_task()) {
> spin_lock(&kcov_remote_lock);
> kcov_remote_area_put(area, size);
> spin_unlock(&kcov_remote_lock);
> }
>
> - local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
> -
> /* Get in kcov_remote_start(). */
> kcov_put(kcov);
> }
> @@ -1119,6 +1131,7 @@ static void __init selftest(void)
>
> static int __init kcov_init(void)
> {
> +#ifndef CONFIG_PREEMPT_RT
> int cpu;
>
> for_each_possible_cpu(cpu) {
> @@ -1128,6 +1141,7 @@ static int __init kcov_init(void)
> return -ENOMEM;
> per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area;
> }
> +#endif
>
> /*
> * The kcov debugfs file won't ever get removed and thus,
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 13f3297aa823..493be2c73c9d 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2247,6 +2247,7 @@ config KCOV_INSTRUMENT_ALL
> config KCOV_IRQ_AREA_SIZE
> hex "Size of interrupt coverage collection area in words"
> depends on KCOV
> + depends on !PREEMPT_RT
> default 0x40000
> help
> KCOV uses preallocated per-cpu areas to collect coverage from
> --
> 2.47.3
>
>