Firstly, thanks a lot for working on the exception fixup handling!
Specific comments below, but first some generic input.
For the subject, please use "riscv, bpf" or "riscv: bpf:" instead of
"riscv/eBPF:", and target the bpf-next tree (details in
Documentation/bpf/bpf_devel_QA.rst).
Also, it would be really nice if the RISC-V 32b support got the same
fixup love! ;-)
I haven't taken the code for a spin/selftest run yet, but I'll try to
do it this week!
Subjective style input: Please use the full 100 chars lines, now that
we can! ;-)
On Fri, 15 Oct 2021 at 17:37, Tong Tiangen <tongtiangen@xxxxxxxxxx> wrote:
When a tracing BPF program attempts to read memory without using the
bpf_probe_read() helper, the verifier marks the load instruction with
the BPF_PROBE_MEM flag. Since the riscv JIT does not currently recognize
this flag it falls back to the interpreter.
Add support for BPF_PROBE_MEM, by appending an exception table to the
BPF program. If the load instruction causes a data abort, the fixup
infrastructure finds the exception table and fixes up the fault, by
clearing the destination register and jumping over the faulting
instruction.
A more generic solution would add a "handler" field to the table entry.
So, would it make sense to add a handler field, to be more consistent
with say, x86?
This code is heavily based on the ARM64 one. Would a "handler", make
the ARM64 code simpler as well?
The same issue in ARM64 is fixed in:
commit 800834285361 ("bpf, arm64: Add BPF exception tables"),
Signed-off-by: Tong Tiangen <tongtiangen@xxxxxxxxxx>
Tested-by: Pu Lehui <pulehui@xxxxxxxxxx>
---
arch/riscv/include/asm/Kbuild | 1 -
arch/riscv/include/asm/extable.h | 49 ++++++++
arch/riscv/include/asm/uaccess.h | 13 ---
arch/riscv/mm/extable.c | 13 ++-
arch/riscv/net/bpf_jit.h | 1 +
arch/riscv/net/bpf_jit_comp64.c | 187 +++++++++++++++++++++++++------
arch/riscv/net/bpf_jit_core.c | 18 ++-
7 files changed, 222 insertions(+), 60 deletions(-)
create mode 100644 arch/riscv/include/asm/extable.h
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 445ccc97305a..57b86fd9916c 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
generic-y += early_ioremap.h
-generic-y += extable.h
generic-y += flat.h
generic-y += kvm_para.h
generic-y += user.h
diff --git a/arch/riscv/include/asm/extable.h b/arch/riscv/include/asm/extable.h
new file mode 100644
index 000000000000..8a52cdd122de
--- /dev/null
+++ b/arch/riscv/include/asm/extable.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_EXTABLE_H
+#define __ASM_EXTABLE_H
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+struct exception_table_entry {
+ unsigned long insn, fixup;
+};
+
RISC-V uses the generic exception_table_entry (linux/extable.h), so I
don't see why it should be redefined here, without any
changes.
I'd suggest that instead of creating a new extable.h, simply fold the
changes directly in the mm/except.c. More on that below.
Unless, the "handler field" route is taken. Then, the new
exception_table_entry should go here.
+#ifdef CONFIG_BPF_JIT
+static inline bool in_bpf_jit(struct pt_regs *regs)
+{
+ if (!IS_ENABLED(CONFIG_BPF_JIT))
+ return false;
+
+ return regs->epc >= BPF_JIT_REGION_START &&
+ regs->epc < BPF_JIT_REGION_END;
Nit/FYI: 100 char lines is OK nowadays! ;-)
+}
+
+int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
+ struct pt_regs *regs);
+#else /* !CONFIG_BPF_JIT */
+static inline bool in_bpf_jit(struct pt_regs *regs)
+{
+ return 0;
+}
+
+static inline
+int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ return 0;
+}
+#endif /* !CONFIG_BPF_JIT */
+
+struct pt_regs;
+extern int fixup_exception(struct pt_regs *regs);
+#endif
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index f314ff44c48d..96ea91dc0e9c 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -56,19 +56,6 @@ static inline int __access_ok(unsigned long addr, unsigned long size)
return size <= TASK_SIZE && addr <= TASK_SIZE - size;
}
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path. This means when everything is well,
- * we don't even have to jump over them. Further, they do not intrude
- * on our cache or tlb entries.
- */
-
...and don't remove this...
#define __LSW 0
#define __MSW 1
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 2fc729422151..f8055c6d0f32 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -16,9 +16,12 @@ int fixup_exception(struct pt_regs *regs)
const struct exception_table_entry *fixup;
fixup = search_exception_tables(regs->epc);
- if (fixup) {
- regs->epc = fixup->fixup;
- return 1;
- }
- return 0;
+ if (!fixup)
+ return 0;
+
+ if (in_bpf_jit(regs))
+ return rv_bpf_fixup_exception(fixup, regs);
+
...instead add the definition of in_bpf_jit() here, and the
CONFIG_BPF_JIT around the "if (in_bpf_jit()...)"
+ regs->epc = fixup->fixup;
+ return 1;
}
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 75c1e9996867..82f717cc98f7 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -71,6 +71,7 @@ struct rv_jit_context {
int ninsns;
int epilogue_offset;
int *offset; /* BPF to RV */
+ int exentry_idx;
I'd prefer if this would be named "nexentries" or something, so it's
more consistent with "ninsns", clear that "this is number of extable
entries".
unsigned long flags;
int stack_size;
};
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 3af4131c22c7..97411d43785e 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -5,6 +5,7 @@
*
*/
+#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include "bpf_jit.h"
@@ -27,6 +28,21 @@ static const int regmap[] = {
[BPF_REG_AX] = RV_REG_T0,
};
+static const int pt_regmap[] = {
+ [RV_REG_A5] = offsetof(struct pt_regs, a5),
+ [RV_REG_A0] = offsetof(struct pt_regs, a0),
+ [RV_REG_A1] = offsetof(struct pt_regs, a1),
+ [RV_REG_A2] = offsetof(struct pt_regs, a2),
+ [RV_REG_A3] = offsetof(struct pt_regs, a3),
+ [RV_REG_A4] = offsetof(struct pt_regs, a4),
+ [RV_REG_S1] = offsetof(struct pt_regs, s1),
+ [RV_REG_S2] = offsetof(struct pt_regs, s2),
+ [RV_REG_S3] = offsetof(struct pt_regs, s3),
+ [RV_REG_S4] = offsetof(struct pt_regs, s4),
+ [RV_REG_S5] = offsetof(struct pt_regs, s5),
+ [RV_REG_T0] = offsetof(struct pt_regs, t0),
+};
+
enum {
RV_CTX_F_SEEN_TAIL_CALL = 0,
RV_CTX_F_SEEN_CALL = RV_REG_RA,
@@ -440,6 +456,71 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
return 0;
}
+#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
+#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
+
+int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
+ int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
+
+ *(unsigned long *)((unsigned char *)regs + pt_regmap[regs_offset]) = 0;
+ regs->epc = (unsigned long)&ex->fixup - offset;
+
+ return 1;
+}
+
+/* For accesses to BTF pointers, add an entry to the exception table */
+static int add_exception_handler(const struct bpf_insn *insn,
+ struct rv_jit_context *ctx,
+ int dst_reg, int insn_len)
+{
+ off_t offset;
+ unsigned long pc;
+ struct exception_table_entry *ex;
+
Nit: Please use reverse xmas tree sorting (longest lines first).
[...]
Cheers,
Björn
.