[PATCH 1/4] ras-tools: add bpftrace verification and opcode decode helpers

From: Ruidong Tian

Date: Tue Jun 16 2026 - 21:53:16 EST


From: Ruidong Tian <ruidong.trd@xxxxxxxxxxxxxxxxx>

einj_mem_uc_verify.bt:
arm64-focused bpftrace script that observes key facts during MC-safe
fault injection tests:
- Fault PC and call stack from do_sea's pt_regs
- Extable entry type (UACCESS_ERR_ZERO, UACCESS_CPY, etc.)
- Fixup/recovery address from search_exception_tables

decode_opcode.py:
Post-processing filter for the bpftrace output that:
- Calculates KASLR offset from /proc/kallsyms vs vmlinux _text
- Disassembles fault PC instructions via capstone (arm64)
- Appends source file:line via addr2line when DWARF is available
- Supports --context N to show surrounding instructions

Together they form the runtime verification pipeline:
bpftrace einj_mem_uc_verify.bt | sudo python3 decode_opcode.py

example
=========================================
input: sudo bpftrace einj_mem_uc_verify.bt -c './einj_mem_uc copyin -f' | sudo python3 decode_opcode.py

[do_sea ] Fault PC (faulting insn): <__arch_copy_from_user> ffffb45742beb300 linux/arch/arm64/lib/copy_template.S:165
[do_sea ] Call stack:
[do_sea ] <copy_folio_from_iter_atomic>
[do_sea ] <generic_perform_write>
[do_sea ] <shmem_file_write_iter>
[do_sea ] <new_sync_write>
[do_sea ] <vfs_write>
[do_sea ] <ksys_write>
[extable ] Fault type: UACCESS_ERR_ZERO
[extable ] Recovery addr (fixup): <__arch_copy_from_user> ffffb45742beb3a0 linux/arch/arm64/lib/copy_from_user.S:75
[do_sea ] Fault PC (faulting insn): <__arch_copy_from_user> ffffb45742beb3a8 linux/arch/arm64/lib/copy_from_user.S:78
[do_sea ] Call stack:
[do_sea ] <copy_folio_from_iter_atomic>
[do_sea ] <generic_perform_write>
[do_sea ] <shmem_file_write_iter>
[do_sea ] <new_sync_write>
[do_sea ] <vfs_write>
[do_sea ] <ksys_write>
[extable ] Fault type: UACCESS_ERR_ZERO
[extable ] Recovery addr (fixup): <__arch_copy_from_user> ffffb45742beb3b0 linux/arch/arm64/lib/copy_from_user.S:80

Signed-off-by: Ruidong Tian <ruidong.trd@xxxxxxxxxxxxxxxxx>
---
decode_opcode.py | 274 ++++++++++++++++++++++++++++++++++++++++++
einj_mem_uc_verify.bt | 108 +++++++++++++++++
2 files changed, 382 insertions(+)
create mode 100644 decode_opcode.py
create mode 100644 einj_mem_uc_verify.bt

diff --git a/decode_opcode.py b/decode_opcode.py
new file mode 100644
index 0000000..0b7d4ae
--- /dev/null
+++ b/decode_opcode.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+"""
+decode_opcode.py - Parse bpftrace output from einj_mem_uc_verify.bt in real time,
+ disassemble instructions near the fault PC using KASLR offset.
+
+Usage:
+ sudo bpftrace einj_mem_uc_verify.bt -c './einj_mem_uc pwrite_uc' \
+ | sudo python3 decode_opcode.py [--vmlinux /path/to/vmlinux] [--context 5]
+
+Default vmlinux path: ../vmlinux relative to this script.
+
+Dependencies:
+ pip install capstone pyelftools
+"""
+
+import sys
+import re
+import os
+import argparse
+
+# ---------- dependency check ----------
+try:
+ from capstone import Cs, CS_ARCH_ARM64, CS_MODE_ARM
+ HAS_CAPSTONE = True
+except ImportError:
+ HAS_CAPSTONE = False
+
+try:
+ from elftools.elf.elffile import ELFFile
+ HAS_ELFTOOLS = True
+except ImportError:
+ HAS_ELFTOOLS = False
+
+import subprocess
+
+HAS_DWARF = False # lazy detection, checked on first use
+_dwarf_checked = False
+
+def check_dwarf(vmlinux_path):
+ global HAS_DWARF, _dwarf_checked
+ if _dwarf_checked:
+ return HAS_DWARF
+ _dwarf_checked = True
+ try:
+ with open(vmlinux_path, 'rb') as f:
+ elf = ELFFile(f)
+ HAS_DWARF = elf.get_section_by_name('.debug_info') is not None
+ except Exception:
+ HAS_DWARF = False
+ if HAS_DWARF:
+ pass # silently available
+ else:
+ print(f'[decode ] vmlinux has no DWARF (CONFIG_DEBUG_INFO not enabled), source location unavailable', flush=True)
+ return HAS_DWARF
+
+# ---------- command-line arguments ----------
+parser = argparse.ArgumentParser(description='decode bpftrace arm64 opcode with KASLR')
+parser.add_argument('--vmlinux',
+ default=os.path.join(os.path.dirname(__file__), '..', 'vmlinux'),
+ help='path to vmlinux (default: ../vmlinux)')
+parser.add_argument('--context', type=int, default=0,
+ help='number of context instructions to show (default: 0, target only)')
+args, _ = parser.parse_known_args()
+
+VMLINUX = os.path.realpath(args.vmlinux)
+CTX = args.context
+
+# ---------- compute KASLR offset ----------
+def get_kaslr_offset(vmlinux_path):
+ """Return (kaslr_offset, runtime_text, compiled_text). Requires root to read /proc/kallsyms."""
+ # runtime _text
+ runtime_text = None
+ try:
+ with open('/proc/kallsyms') as f:
+ for line in f:
+ parts = line.split()
+ if len(parts) >= 3 and parts[2] == '_text':
+ runtime_text = int(parts[0], 16)
+ break
+ except PermissionError:
+ print('[decode ] ERROR: cannot read /proc/kallsyms, run as root', flush=True)
+ return None, None, None
+
+ if runtime_text is None:
+ print('[decode ] ERROR: _text not found in /proc/kallsyms', flush=True)
+ return None, None, None
+
+ if runtime_text == 0:
+ print('[decode ] ERROR: /proc/kallsyms returns 0 for _text -- run as root', flush=True)
+ print('[decode ] hint: sudo python3 decode_opcode.py', flush=True)
+ return None, None, None
+
+ # compile-time _text (from vmlinux ELF symbol table)
+ compiled_text = None
+ if HAS_ELFTOOLS:
+ try:
+ with open(vmlinux_path, 'rb') as f:
+ elf = ELFFile(f)
+ for sec_name in ('.symtab', '.dynsym'):
+ sym_tab = elf.get_section_by_name(sec_name)
+ if sym_tab:
+ for sym in sym_tab.iter_symbols():
+ if sym.name == '_text':
+ compiled_text = sym['st_value']
+ break
+ if compiled_text is not None:
+ break
+ except Exception as e:
+ print(f'[decode ] WARNING: elftools error: {e}', flush=True)
+
+ if compiled_text is None:
+ # fallback: nm
+ import subprocess
+ try:
+ out = subprocess.check_output(['nm', '-n', vmlinux_path],
+ stderr=subprocess.DEVNULL, text=True)
+ for line in out.splitlines():
+ parts = line.split()
+ if len(parts) >= 3 and parts[2] == '_text':
+ compiled_text = int(parts[0], 16)
+ break
+ except Exception as e:
+ print(f'[decode ] WARNING: nm fallback failed: {e}', flush=True)
+
+ if compiled_text is None:
+ print('[decode ] ERROR: cannot determine compiled _text from vmlinux', flush=True)
+ return None, None, None
+
+ offset = runtime_text - compiled_text
+ print(f'[decode ] KASLR offset={offset:#x} '
+ f'_text(runtime={runtime_text:#x} compiled={compiled_text:#x})', flush=True)
+ return offset, runtime_text, compiled_text
+
+
+# ---------- read raw bytes from vmlinux (by compile-time virtual address) ----------
+def read_vmlinux_bytes(vmlinux_path, compiled_addr, n_bytes):
+ if not HAS_ELFTOOLS:
+ return None
+ try:
+ with open(vmlinux_path, 'rb') as f:
+ elf = ELFFile(f)
+ for seg in elf.iter_segments():
+ vaddr = seg['p_vaddr']
+ filesz = seg['p_filesz']
+ if vaddr <= compiled_addr < vaddr + filesz:
+ file_off = seg['p_offset'] + (compiled_addr - vaddr)
+ # do not exceed segment boundary
+ avail = vaddr + filesz - compiled_addr
+ f.seek(file_off)
+ return f.read(min(n_bytes, avail))
+ except Exception as e:
+ print(f'[decode ] read_vmlinux_bytes error: {e}', flush=True)
+ return None
+
+
+# ---------- disassemble a single instruction (for OPCODE and fixup) ----------
+def disasm_one(runtime_pc, kaslr_offset, vmlinux_path, label='>>>'):
+ """Disassemble one instruction; output format matches context display."""
+ if kaslr_offset is None or not HAS_CAPSTONE or not os.path.exists(vmlinux_path):
+ return
+ compiled_pc = runtime_pc - kaslr_offset
+ raw = read_vmlinux_bytes(vmlinux_path, compiled_pc, 4)
+ if not raw or len(raw) < 4:
+ print(f'[decode ] {label} {runtime_pc:#x}: <cannot read bytes>', flush=True)
+ return
+ cs = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
+ insns = list(cs.disasm(raw, runtime_pc))
+ if insns:
+ ins = insns[0]
+ print(f'[decode ] {label} {ins.address:#x}: {ins.mnemonic:<8} {ins.op_str}', flush=True)
+ else:
+ print(f'[decode ] {label} {runtime_pc:#x}: <capstone failed to decode>', flush=True)
+
+
+# ---------- disassemble with surrounding context ----------
+def disasm_context(runtime_pc, kaslr_offset, vmlinux_path, ctx=0):
+ if kaslr_offset is None:
+ print('[decode ] cannot compute KASLR offset, skipping disassembly', flush=True)
+ return
+ if not HAS_CAPSTONE:
+ print('[decode ] capstone not installed, skipping disassembly (pip install capstone)', flush=True)
+ return
+ if not os.path.exists(vmlinux_path):
+ print(f'[decode ] vmlinux not found: {vmlinux_path}', flush=True)
+ return
+
+ compiled_pc = runtime_pc - kaslr_offset
+ start_compiled = compiled_pc - ctx * 4
+ total_bytes = (ctx * 2 + 1) * 4
+
+ raw = read_vmlinux_bytes(vmlinux_path, start_compiled, total_bytes)
+ if not raw or len(raw) < 4:
+ print(f'[decode ] cannot read bytes from vmlinux, skipping disassembly', flush=True)
+ return
+
+ cs = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
+ # use runtime addresses so output matches bpftrace directly
+ start_runtime = start_compiled + kaslr_offset
+ insns = list(cs.disasm(raw, start_runtime))
+
+ print(f'[decode ] --- context (pc={runtime_pc:#x}, +/-{ctx} insns) ---', flush=True)
+ for ins in insns:
+ marker = '>>>' if ins.address == runtime_pc else ' '
+ print(f'[decode ] {marker} {ins.address:#x}: {ins.mnemonic:<8} {ins.op_str}',
+ flush=True)
+ print(f'[decode ] ---', flush=True)
+
+
+# ---------- source location (brief, returns file:line string) ----------
+def get_source_brief(runtime_pc, kaslr_offset, vmlinux_path):
+ """Return absolute 'path:line' string, or None."""
+ if kaslr_offset is None or not check_dwarf(vmlinux_path):
+ return None
+ compiled_pc = runtime_pc - kaslr_offset
+ try:
+ out = subprocess.check_output(
+ ['addr2line', '-e', vmlinux_path, '-f', '-C',
+ hex(compiled_pc)],
+ stderr=subprocess.DEVNULL, universal_newlines=True
+ ).strip()
+ if out and '??' not in out:
+ # addr2line -f outputs: function\nfile:line
+ for ln in out.splitlines():
+ if '/' in ln or ':' in ln:
+ return ln.strip()
+ return None
+ except Exception:
+ return None
+
+
+# ---------- initialization ----------
+if not HAS_CAPSTONE:
+ print('[decode ] WARNING: capstone not installed -> pip install capstone', flush=True)
+if not HAS_ELFTOOLS:
+ print('[decode ] WARNING: pyelftools not installed -> pip install pyelftools', flush=True)
+
+if not os.path.exists(VMLINUX):
+ print(f'[decode ] WARNING: vmlinux not found at {VMLINUX}', flush=True)
+ KASLR_OFFSET = None
+else:
+ KASLR_OFFSET, _, _ = get_kaslr_offset(VMLINUX)
+
+# ---------- main loop: pass through all lines, trigger disasm on key lines ----------
+# do_sea Fault PC (with <ksym>)
+DOSEA_PC_RE = re.compile(r'\[do_sea\s+\].*Fault PC \(faulting insn\):.*?([0-9a-f]{8,})\s*$')
+# Recovery addr (fixup)
+FIXUP_RE = re.compile(r'Recovery addr \(fixup\):.*?([0-9a-f]{8,})\s*$')
+
+for line in sys.stdin:
+ # ---- do_sea Fault PC: append source location at end of line ----
+ m = DOSEA_PC_RE.search(line)
+ if m:
+ pc = int(m.group(1), 16)
+ src = get_source_brief(pc, KASLR_OFFSET, VMLINUX)
+ if src:
+ line = line.rstrip('\n') + ' ' + src + '\n'
+ sys.stdout.write(line)
+ sys.stdout.flush()
+ continue
+
+ # ---- Recovery addr: append source location at end of line ----
+ mf = FIXUP_RE.search(line)
+ if mf:
+ fixup_pc = int(mf.group(1), 16)
+ src = get_source_brief(fixup_pc, KASLR_OFFSET, VMLINUX)
+ if src:
+ line = line.rstrip('\n') + ' ' + src + '\n'
+ sys.stdout.write(line)
+ sys.stdout.flush()
+ continue
+
+ # ---- pass through other lines unchanged ----
+ sys.stdout.write(line)
+ sys.stdout.flush()
diff --git a/einj_mem_uc_verify.bt b/einj_mem_uc_verify.bt
new file mode 100644
index 0000000..9a2bb70
--- /dev/null
+++ b/einj_mem_uc_verify.bt
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * einj_mem_uc_verify.bt - arm64-focused bpftrace verification script
+ *
+ * Observes four key facts during an MC-safe fault injection:
+ * 1. Fault PC - from do_sea's pt_regs->pc
+ * 2. Extable type - e.g. UACCESS_ERR_ZERO, from exception_table_entry
+ * 3. Call context - pt_regs->lr at fault time (caller of faulting function)
+ * 4. Fixup target - recovery address from search_exception_tables retval
+ *
+ * Usage:
+ * bpftrace ras-tools/einj_mem_uc_verify.bt
+ * # in another terminal:
+ * sudo ras-tools/einj_mem_uc -f pwrite_uc
+ */
+
+BEGIN {
+ @target = "einj_mem_uc";
+ printf("Filter: comm == \"%s\"\n", @target);
+ printf("Press Ctrl-C to stop and print the hit-count summary.\n\n");
+}
+
+/* ----------------------------------------------------------------------
+ * 1. do_sea* - arm64 SEA entry
+ * Extracts fault PC and LR from pt_regs.
+ * ---------------------------------------------------------------------- */
+
+kprobe:do_sea*
+{
+ $regs_ptr = arg2;
+ $pc = *(uint64 *)($regs_ptr + 256); /* pt_regs->pc */
+ $lr = *(uint64 *)($regs_ptr + 240); /* pt_regs->regs[30] = lr */
+ $fp0 = *(uint64 *)($regs_ptr + 232); /* pt_regs->regs[29] = fp */
+
+ /* frame walk (arm64 standard frame record: [fp+0]=prev_fp, [fp+8]=saved_lr) */
+ $fp1 = *(uint64 *)($fp0 + 0); $ra1 = *(uint64 *)($fp0 + 8);
+ $fp2 = *(uint64 *)($fp1 + 0); $ra2 = *(uint64 *)($fp1 + 8);
+ $fp3 = *(uint64 *)($fp2 + 0); $ra3 = *(uint64 *)($fp2 + 8);
+ $fp4 = *(uint64 *)($fp3 + 0); $ra4 = *(uint64 *)($fp3 + 8);
+ $fp5 = *(uint64 *)($fp4 + 0); $ra5 = *(uint64 *)($fp4 + 8);
+
+ @hits[probe] = count();
+ printf("[do_sea ] Fault PC (faulting insn): <%s> %lx\n", ksym($pc), $pc);
+ printf("[do_sea ] Call stack:\n");
+ printf("[do_sea ] <%s>\n", ksym($lr));
+ printf("[do_sea ] <%s>\n", ksym($ra1));
+ printf("[do_sea ] <%s>\n", ksym($ra2));
+ printf("[do_sea ] <%s>\n", ksym($ra3));
+ printf("[do_sea ] <%s>\n", ksym($ra4));
+ printf("[do_sea ] <%s>\n", ksym($ra5));
+}
+
+/* ----------------------------------------------------------------------
+ * 2. extable lookup: fixup_exception_me + search_exception_tables
+ *
+ * struct exception_table_entry (arm64):
+ * [0] int insn 4B
+ * [4] int fixup 4B (PC-relative offset)
+ * [8] short type 2B
+ * [10] short data 2B
+ *
+ * EX_TYPE constants:
+ * 0 NONE 1 BPF 2 UACCESS_ERR_ZERO 3 KACCESS_ERR_ZERO
+ * 4 UACCESS_CPY 5 LOAD_UNALIGNED_ZEROPAD 6 KACCESS_ERR_ZERO_MEM_ERR
+ * ---------------------------------------------------------------------- */
+
+kprobe:fixup_exception_me
+/comm == @target/
+{
+ $pc = *(uint64 *)(arg0 + 256);
+ @fxme_pc[tid] = $pc;
+}
+
+kretprobe:search_exception_tables
+/@fxme_pc[tid]/
+{
+ if (retval != 0) {
+ $ex = retval;
+ $type = *(int16 *)($ex + 8);
+ $fixup_rel = *(int32 *)($ex + 4);
+ $fixup_abs = $ex + (uint64)4 + (uint64)(int64)$fixup_rel;
+
+ if ($type == 0) {
+ printf("[extable ] Fault type: NONE\n");
+ } else if ($type == 2) {
+ printf("[extable ] Fault type: UACCESS_ERR_ZERO\n");
+ } else if ($type == 3) {
+ printf("[extable ] Fault type: KACCESS_ERR_ZERO\n");
+ } else if ($type == 4) {
+ printf("[extable ] Fault type: UACCESS_CPY\n");
+ } else if ($type == 6) {
+ printf("[extable ] Fault type: KACCESS_ERR_ZERO_MEM_ERR\n");
+ } else {
+ printf("[extable ] Fault type: unknown(%d)\n", $type);
+ }
+ printf("[extable ] Recovery addr (fixup): <%s> %lx\n", ksym($fixup_abs), $fixup_abs);
+ } else {
+ printf("[extable ] Fault PC %lx: NO extable entry found\n", @fxme_pc[tid]);
+ }
+ delete(@fxme_pc[tid]);
+}
+
+END {
+ printf("\n==== hit counts ====\n");
+ print(@hits);
+ clear(@hits);
+ clear(@target);
+}
--
2.39.3