[RFC PATCH v2 1/6] kcov: add per-task dataflow tracking for function arguments/return values
From: Yunseong Kim
Date: Wed Jun 03 2026 - 13:48:27 EST
Add a new KCOV subsystem that captures function arguments at entry and
return values at exit, with automatic struct field expansion using
compiler-generated DebugInfo metadata.
Key components:
- CONFIG_KCOV_DATAFLOW_ARGS: enables argument capture
- CONFIG_KCOV_DATAFLOW_RET: enables return value capture
- /sys/kernel/debug/kcov_dataflow: separate device from legacy kcov
- Ioctl namespace 'd' (KCOV_DF_INIT_TRACE, KCOV_DF_ENABLE, KCOV_DF_DISABLE)
- Per-task buffer: task->kcov_df_area with atomic xadd reservation
- Fault-tolerant: all reads via copy_from_kernel_nofault()
- Recursion-safe: notrace __no_sanitize_coverage noinline
- ERR_PTR aware: skips struct expansion for error pointers
The callbacks (__sanitizer_cov_trace_args/ret) are inserted by the
compiler when -fsanitize-coverage=dataflow-args,dataflow-ret is used.
The Kconfig options depend on cc-option to verify compiler support.
Buffer format (TLV records, all u64):
area[0]: atomic word count
[pos+0]: type_and_seq (0xE=entry, 0xF=return in upper 4 bits)
[pos+1]: PC
[pos+2]: meta (arg_idx | arg_size | ptr)
[pos+3..N]: field values read via copy_from_kernel_nofault()
This is completely independent from legacy /sys/kernel/debug/kcov.
Existing users (syzkaller, oss-fuzz) are unaffected.
Signed-off-by: Yunseong Kim <yunseong.kim@xxxxxxxx>
---
include/linux/sched.h | 8 ++
kernel/kcov.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++++++
lib/Kconfig.debug | 22 ++++
3 files changed, 321 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c4433c185ad8..03be4b495f70 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1533,6 +1533,14 @@ struct task_struct {
/* KCOV sequence number: */
int kcov_sequence;
+ /* KCOV dataflow per-task sequence counter for TLV records: */
+ u32 kcov_dataflow_seq;
+
+ /* KCOV dataflow: separate buffer for trace-args/trace-ret */
+ unsigned int kcov_df_size;
+ void *kcov_df_area;
+ bool kcov_df_enabled;
+
/* Collect coverage from softirq context: */
unsigned int kcov_softirq;
#endif
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 1df373fb562b..d3c9c0efe961 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -353,6 +353,288 @@ void notrace __sanitizer_cov_trace_switch(kcov_u64 val, void *arg)
EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */
+#if defined(CONFIG_KCOV_DATAFLOW_ARGS) || defined(CONFIG_KCOV_DATAFLOW_RET)
+/*
+ * KCOV Dataflow: /sys/kernel/debug/kcov_dataflow
+ *
+ * Completely separate from legacy /sys/kernel/debug/kcov.
+ * Own buffer, own ioctl, own mmap. No printk — buffer only.
+ *
+ * TLV record layout (all u64):
+ * area[0]: total u64 words written (atomic counter)
+ * [pos+0]: type_and_seq (0xE=entry|0xF=return in upper 4 bits, seq in lower 24)
+ * [pos+1]: PC
+ * [pos+2]: raw pointer | (arg_idx << 56) | (arg_size << 48) for entry
+ * [pos+3..N]: field values (or scalar value if num_fields=0)
+ */
+#define KCOV_DF_TYPE_ENTRY 0xE0000000ULL
+#define KCOV_DF_TYPE_RET 0xF0000000ULL
+#define KCOV_DF_MAGIC_BAD 0xBADADD85ULL
+#define KCOV_DF_IS_ERR(p) ((unsigned long)(p) >= (unsigned long)-4095UL)
+
+/* Ioctl commands for /sys/kernel/debug/kcov_dataflow */
+#define KCOV_DF_INIT_TRACE _IOR('d', 1, unsigned long)
+#define KCOV_DF_ENABLE _IO('d', 100)
+#define KCOV_DF_DISABLE _IO('d', 101)
+
+struct kcov_dataflow {
+ refcount_t refcount;
+ spinlock_t lock;
+ unsigned int size; /* in u64 words */
+ void *area;
+ struct task_struct *t;
+};
+
+static void kcov_df_put(struct kcov_dataflow *df)
+{
+ if (refcount_dec_and_test(&df->refcount)) {
+ vfree(df->area);
+ kfree(df);
+ }
+}
+
+/*
+ * Core write function — no printk, no locks, just atomic buffer write.
+ * Called from __sanitizer_cov_trace_args/ret in instrumented code.
+ */
+static noinline notrace __no_sanitize_coverage void
+kcov_df_write(u64 type_marker, u64 pc, u64 meta, void *ptr,
+ u64 *offsets, u32 num_fields)
+{
+ struct task_struct *t = current;
+ u64 *area;
+ unsigned long pos, max_pos;
+ u32 record_len, seq, i;
+
+ if (!t->kcov_df_enabled)
+ return;
+
+ area = (u64 *)t->kcov_df_area;
+ if (!area)
+ return;
+
+ max_pos = t->kcov_df_size;
+
+ /* Record: header(1) + pc(1) + meta(1) + fields or scalar(max 1) */
+ record_len = 3 + (num_fields > 0 ? num_fields : 1);
+
+ /* Atomic reservation */
+ pos = 1 + xadd((unsigned long *)&area[0], record_len);
+ if (unlikely(pos + record_len > max_pos)) {
+ xadd((unsigned long *)&area[0], -(long)record_len);
+ return;
+ }
+
+ seq = ++t->kcov_dataflow_seq;
+ area[pos] = type_marker | (seq & 0x00FFFFFFULL);
+ area[pos + 1] = pc;
+ area[pos + 2] = meta;
+
+ if (num_fields == 0) {
+ /* Scalar: read value from ptr using size from meta */
+ u64 val = 0;
+ u32 sz = (meta >> 48) & 0xFF;
+
+ if (sz > sizeof(val))
+ sz = sizeof(val);
+ if (ptr && !KCOV_DF_IS_ERR(ptr))
+ copy_from_kernel_nofault(&val, ptr, sz);
+ area[pos + 3] = val;
+ } else {
+ /* Struct fields */
+ if (KCOV_DF_IS_ERR(ptr)) {
+ for (i = 0; i < num_fields; i++)
+ area[pos + 3 + i] = KCOV_DF_MAGIC_BAD;
+ return;
+ }
+ for (i = 0; i < num_fields; i++) {
+ u64 off, sz, val = KCOV_DF_MAGIC_BAD;
+ void *fa;
+
+ if (copy_from_kernel_nofault(&off, &offsets[i * 2], sizeof(off)) ||
+ copy_from_kernel_nofault(&sz, &offsets[i * 2 + 1], sizeof(sz))) {
+ area[pos + 3 + i] = KCOV_DF_MAGIC_BAD;
+ continue;
+ }
+ fa = (void *)((unsigned long)ptr + off);
+ val = 0;
+ if (sz <= sizeof(val))
+ copy_from_kernel_nofault(&val, fa, sz);
+ else
+ copy_from_kernel_nofault(&val, fa, sizeof(val));
+ area[pos + 3 + i] = val;
+ }
+ }
+}
+
+#ifdef CONFIG_KCOV_DATAFLOW_ARGS
+noinline void notrace __no_sanitize_coverage
+__sanitizer_cov_trace_args(u64 pc, u32 arg_idx, u32 arg_size, void *arg_ptr,
+ u64 *offsets, u32 num_fields);
+
+noinline void notrace __no_sanitize_coverage
+__sanitizer_cov_trace_args(u64 pc, u32 arg_idx, u32 arg_size, void *arg_ptr,
+ u64 *offsets, u32 num_fields)
+{
+ /* meta: [arg_idx(8) | arg_size(8) | ptr(48)] */
+ u64 meta = ((u64)arg_idx << 56) | ((u64)arg_size << 48) |
+ ((u64)(unsigned long)arg_ptr & 0xFFFFFFFFFFFFULL);
+ kcov_df_write(KCOV_DF_TYPE_ENTRY, pc, meta, arg_ptr,
+ offsets, num_fields);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_args);
+#endif
+
+#ifdef CONFIG_KCOV_DATAFLOW_RET
+noinline void notrace __no_sanitize_coverage
+__sanitizer_cov_trace_ret(u64 pc, u32 ret_size, void *ret_val,
+ u64 *offsets, u32 num_fields);
+
+noinline void notrace __no_sanitize_coverage
+__sanitizer_cov_trace_ret(u64 pc, u32 ret_size, void *ret_val,
+ u64 *offsets, u32 num_fields)
+{
+ u64 meta = ((u64)ret_size << 48) |
+ ((u64)(unsigned long)ret_val & 0xFFFFFFFFFFFFULL);
+ kcov_df_write(KCOV_DF_TYPE_RET, pc, meta, ret_val,
+ offsets, num_fields);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_ret);
+#endif
+
+/* --- /sys/kernel/debug/kcov_dataflow file operations --- */
+
+static int kcov_df_open(struct inode *inode, struct file *filep)
+{
+ struct kcov_dataflow *df;
+
+ df = kzalloc(sizeof(*df), GFP_KERNEL);
+ if (!df)
+ return -ENOMEM;
+ spin_lock_init(&df->lock);
+ refcount_set(&df->refcount, 1);
+ filep->private_data = df;
+ return nonseekable_open(inode, filep);
+}
+
+static int kcov_df_close(struct inode *inode, struct file *filep)
+{
+ struct kcov_dataflow *df = filep->private_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&df->lock, flags);
+ if (df->t == current) {
+ current->kcov_df_enabled = false;
+ current->kcov_df_area = NULL;
+ current->kcov_df_size = 0;
+ df->t = NULL;
+ }
+ spin_unlock_irqrestore(&df->lock, flags);
+ kcov_df_put(df);
+ return 0;
+}
+
+static int kcov_df_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+ struct kcov_dataflow *df = filep->private_data;
+ unsigned long size, off;
+ struct page *page;
+ unsigned long flags;
+ void *area;
+ int res = 0;
+
+ spin_lock_irqsave(&df->lock, flags);
+ size = df->size * sizeof(u64);
+ if (!df->area || vma->vm_pgoff != 0 ||
+ vma->vm_end - vma->vm_start != size) {
+ res = -EINVAL;
+ goto out;
+ }
+ area = df->area;
+ spin_unlock_irqrestore(&df->lock, flags);
+
+ vm_flags_set(vma, VM_DONTEXPAND);
+ for (off = 0; off < size; off += PAGE_SIZE) {
+ page = vmalloc_to_page(area + off);
+ res = vm_insert_page(vma, vma->vm_start + off, page);
+ if (res)
+ return res;
+ }
+ return 0;
+out:
+ spin_unlock_irqrestore(&df->lock, flags);
+ return res;
+}
+
+static long kcov_df_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+ struct kcov_dataflow *df = filep->private_data;
+ unsigned long flags;
+ unsigned long size;
+ int res = 0;
+
+ spin_lock_irqsave(&df->lock, flags);
+ switch (cmd) {
+ case KCOV_DF_INIT_TRACE:
+ if (df->area) {
+ res = -EBUSY;
+ break;
+ }
+ size = arg;
+ if (size < 2 || size > (128 << 20) / sizeof(u64)) {
+ res = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&df->lock, flags);
+ df->area = vmalloc_user(size * sizeof(u64));
+ if (!df->area)
+ return -ENOMEM;
+ spin_lock_irqsave(&df->lock, flags);
+ df->size = size;
+ break;
+
+ case KCOV_DF_ENABLE:
+ if (!df->area || df->t) {
+ res = -EINVAL;
+ break;
+ }
+ df->t = current;
+ current->kcov_df_area = df->area;
+ current->kcov_df_size = df->size;
+ current->kcov_dataflow_seq = 0;
+ /* Barrier before enabling */
+ barrier();
+ current->kcov_df_enabled = true;
+ break;
+
+ case KCOV_DF_DISABLE:
+ if (df->t != current) {
+ res = -EINVAL;
+ break;
+ }
+ current->kcov_df_enabled = false;
+ barrier();
+ current->kcov_df_area = NULL;
+ current->kcov_df_size = 0;
+ df->t = NULL;
+ break;
+
+ default:
+ res = -ENOTTY;
+ }
+ spin_unlock_irqrestore(&df->lock, flags);
+ return res;
+}
+
+static const struct file_operations kcov_df_fops = {
+ .open = kcov_df_open,
+ .unlocked_ioctl = kcov_df_ioctl,
+ .compat_ioctl = kcov_df_ioctl,
+ .mmap = kcov_df_mmap,
+ .release = kcov_df_close,
+};
+#endif /* CONFIG_KCOV_DATAFLOW_ARGS || CONFIG_KCOV_DATAFLOW_RET */
+
static void kcov_start(struct task_struct *t, struct kcov *kcov,
unsigned int size, void *area, enum kcov_mode mode,
int sequence)
@@ -1146,6 +1428,15 @@ static int __init kcov_init(void)
*/
debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops);
+#if defined(CONFIG_KCOV_DATAFLOW_ARGS) || defined(CONFIG_KCOV_DATAFLOW_RET)
+ /*
+ * Toggle verbose printk: echo 1 > /sys/kernel/debug/kcov_dataflow_verbose
+ * Default off — zero overhead when not debugging.
+ */
+ debugfs_create_file_unsafe("kcov_dataflow", 0600, NULL, NULL,
+ &kcov_df_fops);
+#endif
+
#ifdef CONFIG_KCOV_SELFTEST
selftest();
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e2f976c3301b..abd1a94589aa 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2261,6 +2261,28 @@ config KCOV_SELFTEST
On test failure, causes the kernel to panic. Recommended to be
enabled, ensuring critical functionality works as intended.
+
+config KCOV_DATAFLOW_ARGS
+ bool "Enable KCOV dataflow: function argument capture"
+ depends on KCOV
+ depends on $(cc-option,-fsanitize-coverage=dataflow-args)
+ help
+ Captures function arguments at entry via /sys/kernel/debug/kcov_dataflow.
+ Struct pointer arguments are auto-expanded using compiler DebugInfo
+ metadata, recording individual field values at runtime.
+ Enable per-module with: KCOV_DATAFLOW_file.o := y in the Makefile.
+ Requires clang with -fsanitize-coverage=dataflow-args support.
+
+config KCOV_DATAFLOW_RET
+ bool "Enable KCOV dataflow: return value capture"
+ depends on KCOV
+ depends on $(cc-option,-fsanitize-coverage=dataflow-ret)
+ help
+ Captures function return values via /sys/kernel/debug/kcov_dataflow.
+ Struct pointer returns are auto-expanded using compiler DebugInfo
+ metadata, recording individual field values at runtime.
+ Enable per-module with: KCOV_DATAFLOW_file.o := y in the Makefile.
+ Requires clang with -fsanitize-coverage=dataflow-ret support.
config DEBUG_AID_FOR_SYZBOT
bool "Additional debug code for syzbot"
default n
--
2.43.0