Re: [PATCH v3 5/6] x86/ftrace: Use text_poke()

From: Peter Zijlstra
Date: Fri Oct 11 2019 - 06:46:25 EST


On Thu, Oct 10, 2019 at 01:48:30PM -0400, Steven Rostedt wrote:
> On Thu, 10 Oct 2019 19:28:19 +0200
> Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>
> > > That is, I really hate the above "set_ro" hack. This is because you
> > > moved the ro setting to create_trampoline() and then forcing the
> > > text_poke() on text that has just been created. I prefer to just modify
> > > it and then setting it to ro before it gets executed. Otherwise we need
> > > to do all these dances.
> >
> > I thought create_trampoline() finished the whole thing; if it does not,
> > either make create_trampoline() do everything, or add a
> > finish_trampoline() callback to mark it complete.
>
> I'm good with a finish_trampoline(). I can make a patch that does that.

I found it easier to just make create_trampoline do it all. The below
patch seems to cure both issues for me.

---
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1213,6 +1213,11 @@ void text_poke_queue(void *addr, const v
{
struct text_poke_loc *tp;

+ if (unlikely(system_state == SYSTEM_BOOTING)) {
+ text_poke_early(addr, opcode, len);
+ return;
+ }
+
text_poke_flush(addr);

tp = &tp_vec[tp_vec_nr++];
@@ -1230,10 +1235,15 @@ void text_poke_queue(void *addr, const v
* dynamically allocated memory. This function should be used when it is
* not possible to allocate memory.
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
+void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{
struct text_poke_loc tp;

+ if (unlikely(system_state == SYSTEM_BOOTING)) {
+ text_poke_early(addr, opcode, len);
+ return;
+ }
+
text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1);
}
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -34,6 +34,8 @@

#ifdef CONFIG_DYNAMIC_FTRACE

+static int ftrace_poke_late = 0;
+
int ftrace_arch_code_modify_prepare(void)
__acquires(&text_mutex)
{
@@ -43,12 +45,15 @@ int ftrace_arch_code_modify_prepare(void
* ftrace has it set to "read/write".
*/
mutex_lock(&text_mutex);
+ ftrace_poke_late = 1;
return 0;
}

int ftrace_arch_code_modify_post_process(void)
__releases(&text_mutex)
{
+ text_poke_finish();
+ ftrace_poke_late = 0;
mutex_unlock(&text_mutex);
return 0;
}
@@ -116,7 +121,10 @@ ftrace_modify_code_direct(unsigned long
return ret;

/* replace the text with the new text */
- text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
+ if (ftrace_poke_late)
+ text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
+ else
+ text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
return 0;
}

@@ -308,11 +316,12 @@ union ftrace_op_code_union {
#define RET_SIZE 1

static unsigned long
-create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size, ftrace_func_t func)
{
unsigned long start_offset;
unsigned long end_offset;
unsigned long op_offset;
+ unsigned long call_offset;
unsigned long offset;
unsigned long npages;
unsigned long size;
@@ -329,10 +338,12 @@ create_trampoline(struct ftrace_ops *ops
start_offset = (unsigned long)ftrace_regs_caller;
end_offset = (unsigned long)ftrace_regs_caller_end;
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
+ call_offset = (unsigned long)ftrace_regs_call;
} else {
start_offset = (unsigned long)ftrace_caller;
end_offset = (unsigned long)ftrace_epilogue;
op_offset = (unsigned long)ftrace_caller_op_ptr;
+ call_offset = (unsigned long)ftrace_call;
}

size = end_offset - start_offset;
@@ -389,6 +400,14 @@ create_trampoline(struct ftrace_ops *ops
/* put in the new offset to the ftrace_ops */
memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);

+ /* put in the call to the function */
+ mutex_lock(&text_mutex);
+ call_offset -= start_offset;
+ memcpy(trampoline + call_offset,
+ text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, func),
+ CALL_INSN_SIZE);
+ mutex_unlock(&text_mutex);
+
/* ALLOC_TRAMP flags lets us know we created it */
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;

@@ -426,23 +445,23 @@ void arch_ftrace_update_trampoline(struc
unsigned int size;
const char *new;

- if (ops->trampoline) {
- /*
- * The ftrace_ops caller may set up its own trampoline.
- * In such a case, this code must not modify it.
- */
- if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
- return;
- } else {
- ops->trampoline = create_trampoline(ops, &size);
+ if (!ops->trampoline) {
+ ops->trampoline = create_trampoline(ops, &size, ftrace_ops_get_func(ops));
if (!ops->trampoline)
return;
ops->trampoline_size = size;
+ return;
}

+ /*
+ * The ftrace_ops caller may set up its own trampoline.
+ * In such a case, this code must not modify it.
+ */
+ if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
+ return;
+
offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
ip = ops->trampoline + offset;
-
func = ftrace_ops_get_func(ops);

mutex_lock(&text_mutex);