Re: [PATCH v2] riscv: Add tracepoints for SBI calls and returns

From: Palmer Dabbelt
Date: Wed Jul 10 2024 - 16:29:17 EST


On Mon, 25 Mar 2024 20:39:27 PDT (-0700), samuel.holland@xxxxxxxxxx wrote:
Hi Alex,

On 2024-03-22 4:27 AM, Alexandre Ghiti wrote:
On Fri, Mar 22, 2024 at 12:01 AM Samuel Holland
<samuel.holland@xxxxxxxxxx> wrote:

These are useful for measuring the latency of SBI calls. The SBI HSM
extension is excluded because those functions are called from contexts
such as cpuidle where instrumentation is not allowed.

Reviewed-by: Andrew Jones <ajones@xxxxxxxxxxxxxxxx>
Signed-off-by: Samuel Holland <samuel.holland@xxxxxxxxxx>
---

Changes in v2:
- Remove some blank lines

arch/riscv/include/asm/trace.h | 54 ++++++++++++++++++++++++++++++++++
arch/riscv/kernel/sbi.c | 7 +++++
2 files changed, 61 insertions(+)
create mode 100644 arch/riscv/include/asm/trace.h

diff --git a/arch/riscv/include/asm/trace.h b/arch/riscv/include/asm/trace.h
new file mode 100644
index 000000000000..6151cee5450c
--- /dev/null
+++ b/arch/riscv/include/asm/trace.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM riscv
+
+#if !defined(_TRACE_RISCV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RISCV_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT_CONDITION(sbi_call,
+ TP_PROTO(int ext, int fid),
+ TP_ARGS(ext, fid),
+ TP_CONDITION(ext != SBI_EXT_HSM),
+
+ TP_STRUCT__entry(
+ __field(int, ext)
+ __field(int, fid)
+ ),
+
+ TP_fast_assign(
+ __entry->ext = ext;
+ __entry->fid = fid;
+ ),
+
+ TP_printk("ext=0x%x fid=%d", __entry->ext, __entry->fid)
+);
+
+TRACE_EVENT_CONDITION(sbi_return,
+ TP_PROTO(int ext, long error, long value),
+ TP_ARGS(ext, error, value),
+ TP_CONDITION(ext != SBI_EXT_HSM),
+
+ TP_STRUCT__entry(
+ __field(long, error)
+ __field(long, value)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->value = value;
+ ),
+
+ TP_printk("error=%ld value=0x%lx", __entry->error, __entry->value)
+);
+
+#endif /* _TRACE_RISCV_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index e66e0999a800..a1d21d8f5293 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -14,6 +14,9 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>

+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
+
/* default SBI version is 0.1 */
unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
@@ -31,6 +34,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
{
struct sbiret ret;

+ trace_sbi_call(ext, fid);
+
register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
@@ -46,6 +51,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
ret.error = a0;
ret.value = a1;

+ trace_sbi_return(ext, ret.error, ret.value);
+
return ret;
}
EXPORT_SYMBOL(sbi_ecall);
--
2.43.1


sbi_ecall() is used by flush_icache_all() so I was a bit scared we
would need to patch 2 instructions using a tracepoint and then to use
flush_icache_all() which could bug if only one of the patched
instruction was seen. But I took a look at the disassembled code, and
tracepoints happen to only need to patch 1 nop into an unconditional
jump, which is not a problem then.

Right, tracepoints use the static branch infrastructure, which uses a single
instruction.

But then when disassembling, I noticed that the addition of the
tracepoint comes with "quite" some overhead:

Before:

Dump of assembler code for function sbi_ecall:
0xffffffff800085e0 <+0>: add sp,sp,-32
0xffffffff800085e2 <+2>: sd s0,24(sp)
0xffffffff800085e4 <+4>: mv t1,a0
0xffffffff800085e6 <+6>: add s0,sp,32
0xffffffff800085e8 <+8>: mv t3,a1
0xffffffff800085ea <+10>: mv a0,a2
0xffffffff800085ec <+12>: mv a1,a3
0xffffffff800085ee <+14>: mv a2,a4
0xffffffff800085f0 <+16>: mv a3,a5
0xffffffff800085f2 <+18>: mv a4,a6
0xffffffff800085f4 <+20>: mv a5,a7
0xffffffff800085f6 <+22>: mv a6,t3
0xffffffff800085f8 <+24>: mv a7,t1
0xffffffff800085fa <+26>: ecall
0xffffffff800085fe <+30>: ld s0,24(sp)
0xffffffff80008600 <+32>: add sp,sp,32
0xffffffff80008602 <+34>: ret

After:

Dump of assembler code for function sbi_ecall:
0xffffffff8000bbf2 <+0>: add sp,sp,-112
0xffffffff8000bbf4 <+2>: sd s0,96(sp)
0xffffffff8000bbf6 <+4>: sd s1,88(sp)
0xffffffff8000bbf8 <+6>: sd s3,72(sp)
0xffffffff8000bbfa <+8>: sd s4,64(sp)
0xffffffff8000bbfc <+10>: sd s5,56(sp)
0xffffffff8000bbfe <+12>: sd s6,48(sp)
0xffffffff8000bc00 <+14>: sd s7,40(sp)
0xffffffff8000bc02 <+16>: sd s8,32(sp)
0xffffffff8000bc04 <+18>: sd s9,24(sp)
0xffffffff8000bc06 <+20>: sd ra,104(sp)
0xffffffff8000bc08 <+22>: sd s2,80(sp)
0xffffffff8000bc0a <+24>: add s0,sp,112
0xffffffff8000bc0c <+26>: mv s1,a0
0xffffffff8000bc0e <+28>: mv s3,a1
0xffffffff8000bc10 <+30>: mv s9,a2
0xffffffff8000bc12 <+32>: mv s8,a3
0xffffffff8000bc14 <+34>: mv s7,a4
0xffffffff8000bc16 <+36>: mv s6,a5
0xffffffff8000bc18 <+38>: mv s5,a6
0xffffffff8000bc1a <+40>: mv s4,a7
0xffffffff8000bc1c <+42>: nop
0xffffffff8000bc20 <+46>: mv a0,s9
0xffffffff8000bc22 <+48>: mv a1,s8
0xffffffff8000bc24 <+50>: mv a2,s7
0xffffffff8000bc26 <+52>: mv a3,s6
0xffffffff8000bc28 <+54>: mv a4,s5
0xffffffff8000bc2a <+56>: mv a5,s4
0xffffffff8000bc2c <+58>: mv a6,s3
0xffffffff8000bc2e <+60>: mv a7,s1
0xffffffff8000bc30 <+62>: ecall
0xffffffff8000bc34 <+66>: mv s4,a0
0xffffffff8000bc36 <+68>: mv s3,a1
0xffffffff8000bc38 <+70>: nop
0xffffffff8000bc3c <+74>: ld ra,104(sp)
0xffffffff8000bc3e <+76>: ld s0,96(sp)
0xffffffff8000bc40 <+78>: ld s1,88(sp)
0xffffffff8000bc42 <+80>: ld s2,80(sp)
0xffffffff8000bc44 <+82>: ld s5,56(sp)
0xffffffff8000bc46 <+84>: ld s6,48(sp)
0xffffffff8000bc48 <+86>: ld s7,40(sp)
0xffffffff8000bc4a <+88>: ld s8,32(sp)
0xffffffff8000bc4c <+90>: ld s9,24(sp)
0xffffffff8000bc4e <+92>: mv a0,s4
0xffffffff8000bc50 <+94>: mv a1,s3
0xffffffff8000bc52 <+96>: ld s4,64(sp)
0xffffffff8000bc54 <+98>: ld s3,72(sp)
0xffffffff8000bc56 <+100>: add sp,sp,112
0xffffffff8000bc58 <+102>: ret

Not sure this is really problematic though, let me know what you think.

The overhead is only incurred when tracepoints are enabled. But when they are
enabled, I do not think it is avoidable, unless we were to hide this tracepoint
behind some Kconfig option. I don't think the overhead enough to be a problem,
but I suppose it's up to maintainer judgement.

IIUC it's actually just the printk() inside the tracepoint that's adding the overhead, not the tracepoints themselves? We're shimming every SBI call through sbi_ecall(), which fills out every argument, so we end up saving/restoring them all around the printk(). We could convert these over to sbi_ecall_N like we do for syscalls in glibc, then we'd only need to save/restore the arguments that have something useful.

I'm going to pick this one up, though, as it's not directly a problem with the patch in question. It's staged and should show up on for-next eventually, stuff might be a bit chaotic until we figure out all the poop...


Regards,
Samuel