Re: [PATCH v2] sched: set TIF_NEED_RESCHED before calling __trace_set_need_resched()

From: K Prateek Nayak

Date: Tue Jun 30 2026 - 12:18:19 EST


Hello Gabriele,

On 6/30/2026 2:28 PM, Gabriele Monaco wrote:
> On Mon, 2026-06-29 at 23:05 +0530, K Prateek Nayak wrote:
>>> That said, we should indeed add the tracepoint to that path and probably
>>> adapt the monitor if that's making it fail indirectly.
>>
>> For the npr use-case, I think the current scheme is fine since
>> only SM_PREEMPt counts as a "schedule_entry_preempt" transition
>> and only that can transition the state machine out of the
>> "any_thread_running" state.
>
> Right, the monitor can live without it, but I wonder if we need to put that
> tracepoint for correctness sake. After all, however unlikely, that's a
> need_resched too.
>
> (then if the monitor really saw a need_resched after it's sched_entry, it would
> stay erroneously in rescheduling).
>
> Anyway this all isn't related to the patch.

Ack! Would something like this work for completeness:

(Lightly tested; Based on current tip:sched/core)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 96226707c2f6..934f540d0d3f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1049,9 +1049,16 @@ static inline void hrtick_schedule_exit(struct rq *rq) { }
* this avoids any races wrt polling state changes and thereby avoids
* spurious IPIs.
*/
-static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
+static inline bool set_nr_and_not_polling(struct rq *rq, int tif)
{
- return !(fetch_or(&ti->flags, 1 << tif) & _TIF_POLLING_NRFLAG);
+ struct task_struct *curr = rq->curr;
+ struct thread_info *ti = task_thread_info(curr);
+ unsigned long old_flags = fetch_or(&ti->flags, 1 << tif);
+
+ if (trace_sched_set_need_resched_tp_enabled() && !(old_flags & (1 << tif)))
+ trace_call__sched_set_need_resched_tp(curr, cpu_of(rq), tif);
+
+ return !(old_flags & _TIF_POLLING_NRFLAG);
}

/*
@@ -1076,8 +1083,11 @@ static bool set_nr_if_polling(struct task_struct *p)
}

#else
-static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
+static inline bool set_nr_and_not_polling(struct rq *rq, int tif)
{
+ struct task_struct *curr = rq->curr;
+ struct thread_info *ti = task_thread_info(curr);
+
set_ti_thread_flag(ti, tif);
return true;
}
@@ -1202,15 +1212,17 @@ static void __resched_curr(struct rq *rq, int tif)

cpu = cpu_of(rq);

- trace_sched_set_need_resched_tp(curr, cpu, tif);
if (cpu == smp_processor_id()) {
- set_ti_thread_flag(cti, tif);
+ int set = test_and_set_ti_thread_flag(cti, tif);
+
+ if (trace_sched_set_need_resched_tp_enabled() && !set)
+ trace_call__sched_set_need_resched_tp(curr, cpu, tif);
if (tif == TIF_NEED_RESCHED)
set_preempt_need_resched();
return;
}

- if (set_nr_and_not_polling(cti, tif)) {
+ if (set_nr_and_not_polling(rq, tif)) {
if (tif == TIF_NEED_RESCHED)
smp_send_reschedule(cpu);
} else {
@@ -1350,7 +1362,7 @@ static void wake_up_idle_cpu(int cpu)
* and testing of the above solutions didn't appear to report
* much benefits.
*/
- if (set_nr_and_not_polling(task_thread_info(rq->idle), TIF_NEED_RESCHED))
+ if (set_nr_and_not_polling(rq, TIF_NEED_RESCHED))
smp_send_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
---

>
> Thanks,
> Gabriele
>

--
Thanks and Regards,
Prateek