[PATCH v3] sched: set TIF_NEED_RESCHED before calling __trace_set_need_resched()

From: Sechang Lim

Date: Tue Jun 30 2026 - 04:51:45 EST


set_tsk_need_resched() tests TIF_NEED_RESCHED, calls
__trace_set_need_resched() if the flag is clear, then sets it via
set_tsk_thread_flag(). A BPF raw_tp program attached to
sched_set_need_resched executes synchronously inside __bpf_trace_run().
On return, __bpf_trace_run() drops the RCU lock with
rcu_read_unlock_migrate(), which on the preempt-or-BH-disabled path
calls set_need_resched_current() -> set_tsk_need_resched() again.

set_tsk_thread_flag() follows the tracepoint call, so every re-entrant
frame sees TIF_NEED_RESCHED clear and calls __trace_set_need_resched()
again:

BUG: TASK stack guard page was hit at ffffc9001224ff98
Oops: stack guard page: 0000 [#1] SMP KASAN PTI
RIP: 0010:__bpf_trace_sched_set_need_resched_tp+0x1c/0x190
Call Trace:
trace_sched_set_need_resched_tp+0x110/0x130
set_tsk_need_resched include/linux/sched.h:2076
set_need_resched_current include/linux/sched.h:2094
rcu_read_unlock_special+0x43a/0x440
__rcu_read_unlock+0x9e/0x120
rcu_read_unlock_migrate+0xa9/0x240
__bpf_trace_run+0x131/0x180
bpf_trace_run3+0x333/0x430
__bpf_trace_sched_set_need_resched_tp+0x13a/0x190
trace_sched_set_need_resched_tp+0x110/0x130
set_tsk_need_resched include/linux/sched.h:2076
...

__resched_curr() has the same ordering, firing the tracepoint before
setting the flag via set_ti_thread_flag() or set_nr_and_not_polling().
Fix it for consistency.

Replace the separate test_tsk_thread_flag() + set_tsk_thread_flag() pair
in set_tsk_need_resched() with test_and_set_tsk_thread_flag(). In
__resched_curr(), move the tracepoint call after the flag is set in
each path.

Fixes: adcc3bfa8806 ("sched: Adapt sched tracepoints for RV task model")
Signed-off-by: Sechang Lim <rhkrqnwk98@xxxxxxxxx>
---
v3:
- reorder need_ipi variable. (K Prateek Nayak)

v2:
- https://lore.kernel.org/all/20260627081657.499781-1-rhkrqnwk98@xxxxxxxxx/

v1:
- https://lore.kernel.org/all/20260625065656.392182-1-rhkrqnwk98@xxxxxxxxx/

include/linux/sched.h | 5 ++---
kernel/sched/core.c | 7 +++++--
2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ee06cba5c6f5..c9efd08dae92 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2071,10 +2071,9 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)

static inline void set_tsk_need_resched(struct task_struct *tsk)
{
- if (tracepoint_enabled(sched_set_need_resched_tp) &&
- !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED))
+ if (!test_and_set_tsk_thread_flag(tsk, TIF_NEED_RESCHED) &&
+ tracepoint_enabled(sched_set_need_resched_tp))
__trace_set_need_resched(tsk, TIF_NEED_RESCHED);
- set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}

static inline void clear_tsk_need_resched(struct task_struct *tsk)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8871449d3c6..19de28f0d85a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1171,6 +1171,7 @@ static void __resched_curr(struct rq *rq, int tif)
{
struct task_struct *curr = rq->curr;
struct thread_info *cti = task_thread_info(curr);
+ bool need_ipi;
int cpu;

lockdep_assert_rq_held(rq);
@@ -1187,15 +1188,17 @@ static void __resched_curr(struct rq *rq, int tif)

cpu = cpu_of(rq);

- trace_sched_set_need_resched_tp(curr, cpu, tif);
if (cpu == smp_processor_id()) {
set_ti_thread_flag(cti, tif);
if (tif == TIF_NEED_RESCHED)
set_preempt_need_resched();
+ trace_sched_set_need_resched_tp(curr, cpu, tif);
return;
}

- if (set_nr_and_not_polling(cti, tif)) {
+ need_ipi = set_nr_and_not_polling(cti, tif);
+ trace_sched_set_need_resched_tp(curr, cpu, tif);
+ if (need_ipi) {
if (tif == TIF_NEED_RESCHED)
smp_send_reschedule(cpu);
} else {
--
2.43.0