Re: [PATCH v3 05/21] sched/cache: Assign preferred LLC ID to processes

From: Madadi Vineeth Reddy

Date: Sat Feb 14 2026 - 13:38:38 EST

On 11/02/26 03:48, Tim Chen wrote:
> With cache-aware scheduling enabled, each task is assigned a
> preferred LLC ID. This allows quick identification of the LLC domain
> where the task prefers to run, similar to numa_preferred_nid in
> NUMA balancing.
>
> Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
> ---
>
> Notes:
> v2->v3:
> Add comments around code handling NUMA balance conflict with cache aware
> scheduling. (Peter Zijlstra)
>
> Check if NUMA balancing is disabled before checking numa_preferred_nid
> (Jianyong Wu)
>
> include/linux/sched.h | 1 +
> init/init_task.c | 3 +++
> kernel/sched/fair.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 46 insertions(+)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 2817a21ee055..c98bd1c46088 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1411,6 +1411,7 @@ struct task_struct {
>
> #ifdef CONFIG_SCHED_CACHE
> struct callback_head cache_work;
> + int preferred_llc;
> #endif
>
> struct rseq_data rseq;
> diff --git a/init/init_task.c b/init/init_task.c
> index 49b13d7c3985..baa420de2644 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -218,6 +218,9 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
> .numa_group = NULL,
> .numa_faults = NULL,
> #endif
> +#ifdef CONFIG_SCHED_CACHE
> + .preferred_llc = -1,
> +#endif
> #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
> .kasan_depth = 1,
> #endif
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index bf5f39a01017..0b4ed0f2809d 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1273,11 +1273,43 @@ static unsigned long fraction_mm_sched(struct rq *rq,
> return div64_u64(NICE_0_LOAD * pcpu_sched->runtime, rq->cpu_runtime + 1);
> }
>
> +static int get_pref_llc(struct task_struct *p, struct mm_struct *mm)
> +{
> + int mm_sched_llc = -1;
> +
> + if (!mm)
> + return -1;
> +
> + if (mm->sc_stat.cpu != -1) {
> + mm_sched_llc = llc_id(mm->sc_stat.cpu);
> +
> +#ifdef CONFIG_NUMA_BALANCING
> + /*
> + * Don't assign preferred LLC if it
> + * conflicts with NUMA balancing.
> + * This can happen when sched_setnuma() gets
> + * called, however it is not much of an issue
> + * because we expect account_mm_sched() to get
> + * called fairly regularly -- at a higher rate
> + * than sched_setnuma() at least -- and thus the
> + * conflict only exists for a short period of time.
> + */
> + if (static_branch_likely(&sched_numa_balancing) &&
> + p->numa_preferred_nid >= 0 &&
> + cpu_to_node(mm->sc_stat.cpu) != p->numa_preferred_nid)
> + mm_sched_llc = -1;
> +#endif
> + }
> +
> + return mm_sched_llc;
> +}
> +
> static inline
> void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
> {
> struct sched_cache_time *pcpu_sched;
> struct mm_struct *mm = p->mm;
> + int mm_sched_llc = -1;
> unsigned long epoch;
>
> if (!sched_cache_enabled())
> @@ -1311,6 +1343,11 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
> if (mm->sc_stat.cpu != -1)
> mm->sc_stat.cpu = -1;
> }
> +
> + mm_sched_llc = get_pref_llc(p, mm);
> +
> + if (p->preferred_llc != mm_sched_llc)
> + p->preferred_llc = mm_sched_llc;

This writes to p->preferred_llc without using WRITE_ONCE(). If later patches read p->preferred_llc from
load balancing or migration paths on other CPUs, wouldn't this create a data race?

For example:
CPU 0: Task is running, account_mm_sched() writes p->preferred_llc
CPU 1: Load balancer reads p->preferred_llc to make migration decisions

Should this use WRITE_ONCE() and READ_ONCE() at the read sites, unless all accesses are guaranteed to be
under rq->lock?

Thanks,
Vineeth

> }
>
> static void task_tick_cache(struct rq *rq, struct task_struct *p)
> @@ -1440,6 +1477,11 @@ void init_sched_mm(struct task_struct *p) { }
>
> static void task_tick_cache(struct rq *rq, struct task_struct *p) { }
>
> +static inline int get_pref_llc(struct task_struct *p,
> + struct mm_struct *mm)
> +{
> + return -1;
> +}
> #endif
>
> /*