[PATCH 5.15 80/83] x86/alternative: Fix race in try_get_desc()

From: Greg Kroah-Hartman
Date: Mon Oct 03 2022 - 03:31:21 EST


From: Nadav Amit <namit@xxxxxxxxxx>

commit efd608fa7403ba106412b437f873929e2c862e28 upstream.

I encountered some occasional crashes of poke_int3_handler() when
kprobes are set, while accessing desc->vec.

The text poke mechanism claims to have an RCU-like behavior, but it
does not appear that there is any quiescent state to ensure that
nobody holds reference to desc. As a result, the following race
appears to be possible, which can lead to memory corruption.

CPU0 CPU1
---- ----
text_poke_bp_batch()
-> smp_store_release(&bp_desc, &desc)

[ notice that desc is on
the stack ]

poke_int3_handler()

[ int3 might be kprobe's
so sync events are do not
help ]

-> try_get_desc(descp=&bp_desc)
desc = __READ_ONCE(bp_desc)

if (!desc) [false, success]
WRITE_ONCE(bp_desc, NULL);
atomic_dec_and_test(&desc.refs)

[ success, desc space on the stack
is being reused and might have
non-zero value. ]
arch_atomic_inc_not_zero(&desc->refs)

[ might succeed since desc points to
stack memory that was freed and might
be reused. ]

Fix this issue with small backportable patch. Instead of trying to
make RCU-like behavior for bp_desc, just eliminate the unnecessary
level of indirection of bp_desc, and hold the whole descriptor as a
global. Anyhow, there is only a single descriptor at any given
moment.

Fixes: 1f676247f36a4 ("x86/alternatives: Implement a better poke_int3_handler() completion scheme")
Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: stable@xxxxxxxxxx
Link: https://lkml.kernel.org/r/20220920224743.3089-1-namit@xxxxxxxxxx
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/kernel/alternative.c | 45 +++++++++++++++++++++---------------------
1 file changed, 23 insertions(+), 22 deletions(-)

--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1200,22 +1200,23 @@ struct bp_patching_desc {
atomic_t refs;
};

-static struct bp_patching_desc *bp_desc;
+static struct bp_patching_desc bp_desc;

static __always_inline
-struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
+struct bp_patching_desc *try_get_desc(void)
{
- /* rcu_dereference */
- struct bp_patching_desc *desc = __READ_ONCE(*descp);
+ struct bp_patching_desc *desc = &bp_desc;

- if (!desc || !arch_atomic_inc_not_zero(&desc->refs))
+ if (!arch_atomic_inc_not_zero(&desc->refs))
return NULL;

return desc;
}

-static __always_inline void put_desc(struct bp_patching_desc *desc)
+static __always_inline void put_desc(void)
{
+ struct bp_patching_desc *desc = &bp_desc;
+
smp_mb__before_atomic();
arch_atomic_dec(&desc->refs);
}
@@ -1248,15 +1249,15 @@ noinstr int poke_int3_handler(struct pt_

/*
* Having observed our INT3 instruction, we now must observe
- * bp_desc:
+ * bp_desc with non-zero refcount:
*
- * bp_desc = desc INT3
+ * bp_desc.refs = 1 INT3
* WMB RMB
- * write INT3 if (desc)
+ * write INT3 if (bp_desc.refs != 0)
*/
smp_rmb();

- desc = try_get_desc(&bp_desc);
+ desc = try_get_desc();
if (!desc)
return 0;

@@ -1310,7 +1311,7 @@ noinstr int poke_int3_handler(struct pt_
ret = 1;

out_put:
- put_desc(desc);
+ put_desc();
return ret;
}

@@ -1341,18 +1342,20 @@ static int tp_vec_nr;
*/
static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
- struct bp_patching_desc desc = {
- .vec = tp,
- .nr_entries = nr_entries,
- .refs = ATOMIC_INIT(1),
- };
unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
int do_sync;

lockdep_assert_held(&text_mutex);

- smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */
+ bp_desc.vec = tp;
+ bp_desc.nr_entries = nr_entries;
+
+ /*
+ * Corresponds to the implicit memory barrier in try_get_desc() to
+ * ensure reading a non-zero refcount provides up to date bp_desc data.
+ */
+ atomic_set_release(&bp_desc.refs, 1);

/*
* Corresponding read barrier in int3 notifier for making sure the
@@ -1440,12 +1443,10 @@ static void text_poke_bp_batch(struct te
text_poke_sync();

/*
- * Remove and synchronize_rcu(), except we have a very primitive
- * refcount based completion.
+ * Remove and wait for refs to be zero.
*/
- WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */
- if (!atomic_dec_and_test(&desc.refs))
- atomic_cond_read_acquire(&desc.refs, !VAL);
+ if (!atomic_dec_and_test(&bp_desc.refs))
+ atomic_cond_read_acquire(&bp_desc.refs, !VAL);
}

static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,