[PATCH v2 7/8] powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI
From: Michael Ellerman
Date: Mon Feb 29 2016 - 04:26:58 EST
From: Torsten Duwe <duwe@xxxxxxx>
The gcc switch -mprofile-kernel defines a new ABI for calling _mcount()
very early in the function with minimal overhead.
Although mprofile-kernel has been available since GCC 3.4, there were
bugs which were only fixed recently. Currently it is known to work in
GCC 4.9, 5 and 6.
Additionally there are two possible code sequences generated by the
flag, the first uses mflr/std/bl and the second is optimised to omit the
std. Currently only gcc 6 has the optimised sequence. This patch
supports both sequences.
Initial work started by Vojtech Pavlik, used with permission.
Key changes:
- rework _mcount() to work for both the old and new ABIs.
- implement new versions of ftrace_caller() and ftrace_graph_caller()
which deal with the new ABI.
- updates to __ftrace_make_nop() to recognise the new mcount calling
sequence.
- updates to __ftrace_make_call() to recognise the nop'ed sequence.
- implement ftrace_modify_call().
- updates to the module loader to surpress the toc save in the module
stub when calling mcount with the new ABI.
Signed-off-by: Torsten Duwe <duwe@xxxxxxx>
Signed-off-by: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
---
arch/powerpc/include/asm/code-patching.h | 21 ++++
arch/powerpc/include/asm/ftrace.h | 5 +
arch/powerpc/kernel/entry_64.S | 166 ++++++++++++++++++++++++++++++-
arch/powerpc/kernel/ftrace.c | 103 ++++++++++++++++---
arch/powerpc/kernel/module_64.c | 50 +++++++++-
5 files changed, 325 insertions(+), 20 deletions(-)
v2: Use a static inline for squash_toc_save_inst()
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 840a5509b3f1..994c60a857ce 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func)
#endif
}
+#ifdef CONFIG_PPC64
+/*
+ * Some instruction encodings commonly used in dynamic ftracing
+ * and function live patching.
+ */
+
+/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define R2_STACK_OFFSET 24
+#else
+#define R2_STACK_OFFSET 40
+#endif
+
+#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \
+ ___PPC_RA(__REG_R1) | R2_STACK_OFFSET)
+
+/* usually preceded by a mflr r0 */
+#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \
+ ___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
+#endif /* CONFIG_PPC64 */
+
#endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index ef89b1465573..50ca7585abe2 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -46,6 +46,8 @@
extern void _mcount(void);
#ifdef CONFIG_DYNAMIC_FTRACE
+# define FTRACE_ADDR ((unsigned long)ftrace_caller)
+# define FTRACE_REGS_ADDR FTRACE_ADDR
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/* reloction of mcount call site is the same as the address */
@@ -58,6 +60,9 @@ struct dyn_arch_ftrace {
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
#endif
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0d525ce3717f..ec7f8aada697 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1143,8 +1143,12 @@ _GLOBAL(enter_prom)
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
- blr
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+#ifndef CC_USING_MPROFILE_KERNEL
_GLOBAL_TOC(ftrace_caller)
/* Taken from output of objdump from lib64/glibc */
mflr r3
@@ -1166,6 +1170,115 @@ _GLOBAL(ftrace_graph_stub)
ld r0, 128(r1)
mtlr r0
addi r1, r1, 112
+
+#else /* CC_USING_MPROFILE_KERNEL */
+/*
+ *
+ * ftrace_caller() is the function that replaces _mcount() when ftrace is
+ * active.
+ *
+ * We arrive here after a function A calls function B, and we are the trace
+ * function for B. When we enter r1 points to A's stack frame, B has not yet
+ * had a chance to allocate one yet.
+ *
+ * Additionally r2 may point either to the TOC for A, or B, depending on
+ * whether B did a TOC setup sequence before calling us.
+ *
+ * On entry the LR points back to the _mcount() call site, and r0 holds the
+ * saved LR as it was on entry to B, ie. the original return address at the
+ * call site in A.
+ *
+ * Our job is to save the register state into a struct pt_regs (on the stack)
+ * and then arrange for the ftrace function to be called.
+ */
+_GLOBAL(ftrace_caller)
+ /* Save the original return address in A's stack frame */
+ std r0,LRSAVE(r1)
+
+ /* Create our stack frame + pt_regs */
+ stdu r1,-SWITCH_FRAME_SIZE(r1)
+
+ /* Save all gprs to pt_regs */
+ SAVE_8GPRS(0,r1)
+ SAVE_8GPRS(8,r1)
+ SAVE_8GPRS(16,r1)
+ SAVE_8GPRS(24,r1)
+
+ /* Load special regs for save below */
+ mfmsr r8
+ mfctr r9
+ mfxer r10
+ mfcr r11
+
+ /* Get the _mcount() call site out of LR */
+ mflr r7
+ /* Save it as pt_regs->nip & pt_regs->link */
+ std r7, _NIP(r1)
+ std r7, _LINK(r1)
+
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, 24(r1)
+ ld r2,PACATOC(r13) /* get kernel TOC in r2 */
+
+ addis r3,r2,function_trace_op@toc@ha
+ addi r3,r3,function_trace_op@toc@l
+ ld r5,0(r3)
+
+ /* Calculate ip from nip-4 into r3 for call below */
+ subi r3, r7, MCOUNT_INSN_SIZE
+
+ /* Put the original return address in r4 as parent_ip */
+ mr r4, r0
+
+ /* Save special regs */
+ std r8, _MSR(r1)
+ std r9, _CTR(r1)
+ std r10, _XER(r1)
+ std r11, _CCR(r1)
+
+ /* Load &pt_regs in r6 for call below */
+ addi r6, r1 ,STACK_FRAME_OVERHEAD
+
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+
+ /* Load ctr with the possibly modified NIP */
+ ld r3, _NIP(r1)
+ mtctr r3
+
+ /* Restore gprs */
+ REST_8GPRS(0,r1)
+ REST_8GPRS(8,r1)
+ REST_8GPRS(16,r1)
+ REST_8GPRS(24,r1)
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+
+ /* Pop our stack frame */
+ addi r1, r1, SWITCH_FRAME_SIZE
+
+ /* Restore original LR for return to B */
+ ld r0, LRSAVE(r1)
+ mtlr r0
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ stdu r1, -112(r1)
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+ addi r1, r1, 112
+#endif
+
+ ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */
+ mtlr r0
+ bctr /* jump after _mcount site */
+#endif /* CC_USING_MPROFILE_KERNEL */
+
_GLOBAL(ftrace_stub)
blr
#else
@@ -1198,6 +1311,7 @@ _GLOBAL(ftrace_stub)
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifndef CC_USING_MPROFILE_KERNEL
_GLOBAL(ftrace_graph_caller)
/* load r4 with local address */
ld r4, 128(r1)
@@ -1222,6 +1336,56 @@ _GLOBAL(ftrace_graph_caller)
addi r1, r1, 112
blr
+#else /* CC_USING_MPROFILE_KERNEL */
+_GLOBAL(ftrace_graph_caller)
+ /* with -mprofile-kernel, parameter regs are still alive at _mcount */
+ std r10, 104(r1)
+ std r9, 96(r1)
+ std r8, 88(r1)
+ std r7, 80(r1)
+ std r6, 72(r1)
+ std r5, 64(r1)
+ std r4, 56(r1)
+ std r3, 48(r1)
+
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, 24(r1)
+ ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+
+ mfctr r4 /* ftrace_caller has moved local addr here */
+ std r4, 40(r1)
+ mflr r3 /* ftrace_caller has restored LR from stack */
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR to this.
+ */
+ mtlr r3
+
+ ld r0, 40(r1)
+ mtctr r0
+ ld r10, 104(r1)
+ ld r9, 96(r1)
+ ld r8, 88(r1)
+ ld r7, 80(r1)
+ ld r6, 72(r1)
+ ld r5, 64(r1)
+ ld r4, 56(r1)
+ ld r3, 48(r1)
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+
+ addi r1, r1, 112
+ mflr r0
+ std r0, LRSAVE(r1)
+ bctr
+#endif /* CC_USING_MPROFILE_KERNEL */
+
_GLOBAL(return_to_handler)
/* need to save return values */
std r4, -32(r1)
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 62899fbae703..9dac18dabd03 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
return -EFAULT;
/* Make sure it is what we expect it to be */
- if (replaced != old)
+ if (replaced != old) {
+ pr_err("%p: replaced (%#x) != old (%#x)",
+ (void *)ip, replaced, old);
return -EINVAL;
+ }
/* replace the text with the new text */
if (patch_instruction((unsigned int *)ip, new))
@@ -108,11 +111,13 @@ __ftrace_make_nop(struct module *mod,
{
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
- unsigned int op;
+ unsigned int op, pop;
/* read where this goes */
- if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
+ if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+ pr_err("Fetching opcode failed.\n");
return -EFAULT;
+ }
/* Make sure that that this is still a 24bit jump */
if (!is_bl_op(op)) {
@@ -152,10 +157,42 @@ __ftrace_make_nop(struct module *mod,
*
* Use a b +8 to jump over the load.
*/
- op = 0x48000008; /* b +8 */
- if (patch_instruction((unsigned int *)ip, op))
+ pop = PPC_INST_BRANCH | 8; /* b +8 */
+
+ /*
+ * Check what is in the next instruction. We can see ld r2,40(r1), but
+ * on first pass after boot we will see mflr r0.
+ */
+ if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
+ pr_err("Fetching op failed.\n");
+ return -EFAULT;
+ }
+
+ if (op != PPC_INST_LD_TOC) {
+ unsigned int inst;
+
+ if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) {
+ pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+ return -EFAULT;
+ }
+
+ /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */
+ if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) {
+ pr_err("Unexpected instructions around bl _mcount\n"
+ "when enabling dynamic ftrace!\t"
+ "(%08x,bl,%08x)\n", inst, op);
+ return -EINVAL;
+ }
+
+ /* When using -mkernel_profile there is no load to jump over */
+ pop = PPC_INST_NOP;
+ }
+
+ if (patch_instruction((unsigned int *)ip, pop)) {
+ pr_err("Patching NOP failed.\n");
return -EPERM;
+ }
return 0;
}
@@ -281,16 +318,15 @@ int ftrace_make_nop(struct module *mod,
#ifdef CONFIG_MODULES
#ifdef CONFIG_PPC64
+/*
+ * Examine the existing instructions for __ftrace_make_call.
+ * They should effectively be a NOP, and follow formal constraints,
+ * depending on the ABI. Return false if they don't.
+ */
+#ifndef CC_USING_MPROFILE_KERNEL
static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
{
- unsigned int op[2];
- void *ip = (void *)rec->ip;
-
- /* read where this goes */
- if (probe_kernel_read(op, ip, sizeof(op)))
- return -EFAULT;
-
/*
* We expect to see:
*
@@ -300,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* The load offset is different depending on the ABI. For simplicity
* just mask it out when doing the compare.
*/
- if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
- pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
+ if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000))
+ return 0;
+ return 1;
+}
+#else
+static int
+expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
+{
+ /* look for patched "NOP" on ppc64 with -mprofile-kernel */
+ if (op0 != PPC_INST_NOP)
+ return 0;
+ return 1;
+}
+#endif
+
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op[2];
+ void *ip = (void *)rec->ip;
+
+ /* read where this goes */
+ if (probe_kernel_read(op, ip, sizeof(op)))
+ return -EFAULT;
+
+ if (!expected_nop_sequence(ip, op[0], op[1])) {
+ pr_err("Unexpected call sequence at %p: %x %x\n",
+ ip, op[0], op[1]);
return -EINVAL;
}
@@ -324,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return 0;
}
-#else
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ return ftrace_make_call(rec, addr);
+}
+#endif
+
+#else /* !CONFIG_PPC64: */
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 76c0963572f5..2e412c4d150b 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -42,7 +42,6 @@
--RR. */
#if defined(_CALL_ELF) && _CALL_ELF == 2
-#define R2_STACK_OFFSET 24
/* An address is simply the address of the function. */
typedef unsigned long func_desc_t;
@@ -74,7 +73,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
}
#else
-#define R2_STACK_OFFSET 40
/* An address is address of the OPD entry, which contains address of fn. */
typedef struct ppc64_opd_entry func_desc_t;
@@ -451,20 +449,47 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
return (unsigned long)&stubs[i];
}
+#ifdef CC_USING_MPROFILE_KERNEL
+static bool is_early_mcount_callsite(u32 *instruction)
+{
+ /*
+ * Check if this is one of the -mprofile-kernel sequences.
+ */
+ if (instruction[-1] == PPC_INST_STD_LR &&
+ instruction[-2] == PPC_INST_MFLR)
+ return true;
+
+ if (instruction[-1] == PPC_INST_MFLR)
+ return true;
+
+ return false;
+}
+#else
+/* without -mprofile-kernel, mcount calls are never early */
+static bool is_early_mcount_callsite(u32 *instruction)
+{
+ return false;
+}
+#endif
+
/* We expect a noop next: if it is, replace it with instruction to
restore r2. */
static int restore_r2(u32 *instruction, struct module *me)
{
if (*instruction != PPC_INST_NOP) {
+ if (is_early_mcount_callsite(instruction - 1))
+ return 1;
pr_err("%s: Expect noop after relocate, got %08x\n",
me->name, *instruction);
return 0;
}
/* ld r2,R2_STACK_OFFSET(r1) */
- *instruction = 0xe8410000 | R2_STACK_OFFSET;
+ *instruction = PPC_INST_LD_TOC;
return 1;
}
+static void squash_toc_save_inst(const char *name, unsigned long addr);
+
int apply_relocate_add(Elf64_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
@@ -586,6 +611,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
return -ENOENT;
if (!restore_r2((u32 *)location + 1, me))
return -ENOEXEC;
+
+ squash_toc_save_inst(strtab + sym->st_name, value);
} else
value += local_entry_offset(sym);
@@ -674,6 +701,21 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
#ifdef CONFIG_DYNAMIC_FTRACE
#ifdef CC_USING_MPROFILE_KERNEL
+/*
+ * In case of _mcount calls, do not save the current callee's TOC (in r2) into
+ * the original caller's stack frame. If we did we would clobber the saved TOC
+ * value of the original caller.
+ */
+static void squash_toc_save_inst(const char *name, unsigned long addr)
+{
+ struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr;
+
+ /* Only for calls to _mcount */
+ if (strcmp("_mcount", name) != 0)
+ return;
+
+ stub->jump[2] = PPC_INST_NOP;
+}
#define PACATOC offsetof(struct paca_struct, kernel_toc)
@@ -732,6 +774,8 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module
return (unsigned long)entry;
}
#else
+static void squash_toc_save_inst(const char *name, unsigned long addr) { }
+
static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me)
{
return stub_for_addr(sechdrs, (unsigned long)ftrace_caller, me);
--
2.5.0