[RFC PATCH v2 14/14] Documentation: add kcov-dataflow.rst

From: Yunseong Kim

Date: Thu Jun 11 2026 - 12:30:02 EST


Add documentation for the KCOV-Dataflow subsystem covering:
- Prerequisites and Kconfig options
- Per-module and per-directory instrumentation
- Data collection example with buffer parsing
- Ring buffer TLV record format
- Safety properties
- Ioctl interface reference
- Compatibility with legacy KCOV
- Rust module support via post-compilation pipeline
- Fork/child process tracing pattern

Signed-off-by: Yunseong Kim <yunseong.kim@xxxxxxxx>
---
Documentation/dev-tools/index.rst | 1 +
Documentation/dev-tools/kcov-dataflow.rst | 321 +++++++++++++++++++++++++
tools/testing/selftests/kcov_dataflow/Makefile | 2 +-
3 files changed, 323 insertions(+), 1 deletion(-)

diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index 59cbb77b33ff..541c58cc65ea 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -24,6 +24,7 @@ Documentation/process/debugging/index.rst
context-analysis
sparse
kcov
+ kcov-dataflow
gcov
kasan
kmsan
diff --git a/Documentation/dev-tools/kcov-dataflow.rst b/Documentation/dev-tools/kcov-dataflow.rst
new file mode 100644
index 000000000000..603c83946d12
--- /dev/null
+++ b/Documentation/dev-tools/kcov-dataflow.rst
@@ -0,0 +1,321 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+KCOV-Dataflow: function argument and return value extraction
+=============================================================
+
+KCOV-Dataflow captures function arguments and return values, including
+automatic struct field decomposition, at instrumented kernel function
+boundaries. It provides per-task, lock-free ring buffers accessible via
+``mmap()``, enabling data-flow-aware fuzzing and post-mortem contract
+verification.
+
+Unlike KCOV's ``trace-pc`` which reports *which* code executed,
+KCOV-Dataflow reports *what values* were passed and returned. This is
+a completely separate device from ``/sys/kernel/debug/kcov``.
+
+Prerequisites
+-------------
+
+KCOV-Dataflow requires Clang/LLVM with the ``trace-args`` and
+``trace-ret`` SanitizerCoverage extensions. Standard (unpatched)
+compilers will not expose these Kconfig options.
+
+To enable KCOV-Dataflow, configure the kernel with::
+
+ CONFIG_KCOV=y
+ CONFIG_KCOV_DATAFLOW_ARGS=y
+ CONFIG_KCOV_DATAFLOW_RET=y
+
+Optional: instrument the entire kernel (significant overhead)::
+
+ CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL=y
+
+Coverage data becomes accessible once debugfs is mounted::
+
+ mount -t debugfs none /sys/kernel/debug
+
+Per-module instrumentation
+--------------------------
+
+To instrument a specific module, add to its Makefile::
+
+ KCOV_DATAFLOW_my_module.o := y
+
+For example, to instrument the Android binder driver::
+
+ # drivers/android/Makefile
+ KCOV_DATAFLOW_binder.o := y
+ KCOV_DATAFLOW_binder_alloc.o := y
+
+To instrument an entire directory, set the variable without a filename::
+
+ # fs/Makefile
+ KCOV_DATAFLOW := y
+
+The build system automatically adds the required compiler flags
+(``-fsanitize-coverage=trace-args,trace-ret``). Debug info is provided
+by ``CONFIG_DEBUG_INFO`` which is a Kconfig dependency.
+
+Data collection
+---------------
+
+The following program demonstrates how to collect function argument and
+return value data for a single syscall:
+
+.. code-block:: c
+
+ #include <stdio.h>
+ #include <stdint.h>
+ #include <stdlib.h>
+ #include <sys/types.h>
+ #include <sys/ioctl.h>
+ #include <sys/mman.h>
+ #include <unistd.h>
+ #include <fcntl.h>
+
+ #define KCOV_DF_INIT_TRACE _IOR('d', 1, unsigned long)
+ #define KCOV_DF_ENABLE _IO('d', 100)
+ #define KCOV_DF_DISABLE _IO('d', 101)
+ #define BUF_SIZE (1 << 20) /* 1M words = 8MB */
+
+ int main(void)
+ {
+ int fd;
+ uint64_t *buf, n, i;
+
+ fd = open("/sys/kernel/debug/kcov_dataflow", O_RDWR);
+ if (fd == -1)
+ perror("open"), exit(1);
+
+ /* Allocate buffer (size in u64 words). */
+ if (ioctl(fd, KCOV_DF_INIT_TRACE, BUF_SIZE))
+ perror("ioctl(INIT)"), exit(1);
+
+ /* Map the buffer into user space. */
+ buf = (uint64_t *)mmap(NULL, BUF_SIZE * sizeof(uint64_t),
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (buf == MAP_FAILED)
+ perror("mmap"), exit(1);
+
+ /* Enable data-flow collection for this task. */
+ if (ioctl(fd, KCOV_DF_ENABLE, 0))
+ perror("ioctl(ENABLE)"), exit(1);
+
+ /* Reset counter. */
+ __atomic_store_n(&buf[0], 0, __ATOMIC_RELAXED);
+
+ /* === Trigger syscall(s) here === */
+ read(-1, NULL, 0);
+
+ /* Read how many words were written. */
+ n = __atomic_load_n(&buf[0], __ATOMIC_RELAXED);
+
+ /* Parse TLV records. */
+ i = 1;
+ while (i + 3 < n) {
+ uint64_t type_seq = buf[i];
+ uint64_t pc = buf[i + 1];
+ uint64_t meta = buf[i + 2];
+ uint32_t type = (type_seq >> 28) & 0xF;
+ uint32_t num_vals = (type_seq >> 24) & 0xF;
+ uint32_t seq = type_seq & 0x00FFFFFF;
+ uint32_t arg_idx = (meta >> 56) & 0xFF;
+ uint32_t size = (meta >> 48) & 0xFF;
+
+ if (type_seq >> 32 || (type != 0xE && type != 0xF)) {
+ i++;
+ continue;
+ }
+ if (!num_vals)
+ num_vals = 1;
+
+ printf("[%s] seq=%u pc=0x%lx arg_idx=%u size=%u val=0x%lx\n",
+ type == 0xE ? "ENTRY" : "RET",
+ seq, pc, arg_idx, size, buf[i + 3]);
+ i += 3 + num_vals;
+ }
+
+ if (ioctl(fd, KCOV_DF_DISABLE, 0))
+ perror("ioctl(DISABLE)"), exit(1);
+
+ munmap(buf, BUF_SIZE * sizeof(uint64_t));
+ close(fd);
+ return 0;
+ }
+
+Ring buffer format
+------------------
+
+The buffer is an array of ``u64`` words::
+
+ buf[0]: atomic counter -- total words written
+
+Each record occupies 3 + N words:
+
+.. list-table::
+ :header-rows: 1
+
+ * - Offset
+ - Field
+ - Description
+ * - 0
+ - type_and_seq
+ - bits[31:28] = 0xE (entry) or 0xF (return), bits[27:24] = num_vals,
+ bits[23:0] = sequence number
+ * - 1
+ - pc
+ - Instrumented function address
+ * - 2
+ - meta
+ - bits[63:56] = arg_idx (0 for return), bits[55:48] = size in bytes,
+ bits[47:0] = raw pointer value
+ * - 3..N
+ - field_val[0..N]
+ - Struct field values or single scalar
+
+Magic values:
+
+- ``0xBADADD85``: field read failed (pointer was invalid/freed/poisoned)
+
+Safety
+------
+
+- Callbacks are ``notrace``, ``__no_sanitize_coverage``, ``noinline``
+ to prevent recursion.
+- All pointer reads use ``copy_from_kernel_nofault()`` -- survives
+ freed, poisoned, or unmapped memory.
+- An ``in_task()`` guard rejects calls from hardirq/softirq/NMI context,
+ preventing reentrant buffer corruption.
+- No ``printk`` or allocation in the data path.
+- When not enabled for a task, overhead is a single boolean check.
+
+Ioctl interface
+---------------
+
+.. list-table::
+ :header-rows: 1
+
+ * - Command
+ - Value
+ - Description
+ * - KCOV_DF_INIT_TRACK
+ - ``_IOR('d', 1, unsigned long)``
+ - Allocate buffer (size in u64 words)
+ * - KCOV_DF_ENABLE
+ - ``_IO('d', 100)``
+ - Start collection for current task
+ * - KCOV_DF_DISABLE
+ - ``_IO('d', 101)``
+ - Stop collection
+
+Compatibility
+-------------
+
+KCOV-Dataflow is completely independent from legacy KCOV:
+
+- Separate device: ``/sys/kernel/debug/kcov_dataflow``
+- Separate ioctl namespace (``'d'`` vs ``'c'``)
+- Separate per-task buffer
+- Both can be used simultaneously without interference
+- syzkaller and other KCOV users are unaffected
+
+Rust module support
+-------------------
+
+Rust kernel modules are supported via a post-compilation pipeline::
+
+ rustc --emit=llvm-ir -g module.rs
+ opt -passes=sancov-module \
+ -sanitizer-coverage-trace-args \
+ -sanitizer-coverage-trace-ret module.ll -S -o module_inst.ll
+ llc -filetype=obj module_inst.ll -o module.o
+
+Selftests
+---------
+
+Automated tests and visualization tools are in
+``tools/testing/selftests/kcov_dataflow/``::
+
+ # Automated ioctl interface test (TAP output):
+ make -C tools/testing/selftests/kcov_dataflow
+ vng --user root --exec \
+ tools/testing/selftests/kcov_dataflow/user_ioctl/user_ioctl
+
+ # Load a test module and view captured records:
+ make LLVM=1 CC=clang M=tools/testing/selftests/kcov_dataflow/eight_args_c modules
+ vng --user root --exec \
+ "python3 tools/testing/selftests/kcov_dataflow/trigger-view.py \
+ eight_args_c -C 8 --ko \
+ tools/testing/selftests/kcov_dataflow/eight_args_c/eight_args_c.ko"
+
+ # Binderfs ioctl capture test (requires CONFIG_ANDROID_BINDER_IPC):
+ make -C tools/testing/selftests/kcov_dataflow/binderfs
+ vng --user root --exec \
+ tools/testing/selftests/kcov_dataflow/binderfs/binderfs_test
+
+See ``tools/testing/selftests/kcov_dataflow/README.rst`` for details.
+
+Tracing child processes
+-----------------------
+
+KCOV-Dataflow is per-task: after ``fork()``, the child does not inherit
+the enabled state. To trace child processes, re-enable on the inherited
+file descriptor in the child before ``exec()``. The ``mmap``'d buffer is
+shared (``MAP_SHARED``), so both parent and child write to the same ring
+buffer atomically.
+
+.. code-block:: c
+
+ #include <stdio.h>
+ #include <stdint.h>
+ #include <stdlib.h>
+ #include <sys/ioctl.h>
+ #include <sys/mman.h>
+ #include <sys/wait.h>
+ #include <unistd.h>
+ #include <fcntl.h>
+
+ #define KCOV_DF_INIT_TRACE _IOR('d', 1, unsigned long)
+ #define KCOV_DF_ENABLE _IO('d', 100)
+ #define KCOV_DF_DISABLE _IO('d', 101)
+ #define BUF_SIZE (1 << 20)
+
+ int main(int argc, char **argv)
+ {
+ int fd = open("/sys/kernel/debug/kcov_dataflow", O_RDWR);
+ ioctl(fd, KCOV_DF_INIT_TRACE, BUF_SIZE);
+ uint64_t *buf = mmap(NULL, BUF_SIZE * 8,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ /* Enable for parent task. */
+ ioctl(fd, KCOV_DF_ENABLE, 0);
+ __atomic_store_n(&buf[0], 0, __ATOMIC_RELAXED);
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ /*
+ * Child: re-enable on inherited fd.
+ * The shared mmap buffer receives records from both tasks.
+ */
+ ioctl(fd, KCOV_DF_ENABLE, 0);
+ execvp(argv[1], &argv[1]);
+ _exit(1);
+ }
+
+ waitpid(pid, NULL, 0);
+ ioctl(fd, KCOV_DF_DISABLE, 0);
+
+ uint64_t n = __atomic_load_n(&buf[0], __ATOMIC_RELAXED);
+ printf("Captured %lu words from parent + child\n", n);
+
+ munmap(buf, BUF_SIZE * 8);
+ close(fd);
+ return 0;
+ }
+
+Note: the child's ``ioctl(fd, KCOV_DF_ENABLE)`` will fail if the parent
+has not yet called ``KCOV_DF_DISABLE``, because only one task can be
+associated with a descriptor at a time. For true multi-process tracing,
+open a separate ``kcov_dataflow`` fd per child, or disable in the parent
+before the child enables (as shown above -- the parent is blocked in
+``waitpid`` so it generates no records during that time anyway).
diff --git a/tools/testing/selftests/kcov_dataflow/Makefile b/tools/testing/selftests/kcov_dataflow/Makefile
index 6412c90edfa1..9691b41ffd3e 100644
--- a/tools/testing/selftests/kcov_dataflow/Makefile
+++ b/tools/testing/selftests/kcov_dataflow/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := user_ioctl/user_ioctl
-TEST_PROGS := run_eight_args_c.sh run_rust_ffi_contract.sh
+TEST_PROGS := run_eight_args_c.sh run_eight_args_rust.sh run_rust_ffi_contract.sh
include ../lib.mk

--
2.43.0