[PATCH 2/2] sched: Move task_mm_cid_work to RCU callback

From: Gabriele Monaco
Date: Mon Dec 02 2024 - 11:09:32 EST


Currently, the task_mm_cid_work function is called in a task work
triggered by a scheduler tick. This can delay the execution of the
task for the entire duration of the function.

This patch runs the task_mm_cid_work in the RCU callback thread rather
than in the task context before returning to userspace.

The main advantage of this change is that the function can be offloaded
to a different CPU and even preempted by RT tasks.

On a busy system, this may mean the function gets called less often, but
the current behaviour already doesn't provide guarantees.

Signed-off-by: Gabriele Monaco <gmonaco@xxxxxxxxxx>
---
include/linux/sched.h | 1 -
kernel/sched/core.c | 17 ++++++-----------
2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d380bffee2ef..5d141c310917 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1374,7 +1374,6 @@ struct task_struct {
int last_mm_cid; /* Most recent cid in mm */
int migrate_from_cpu;
int mm_cid_active; /* Whether cid bitmap is active */
- struct callback_head cid_work;
#endif

struct tlbflush_unmap_batch tlb_ubc;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 57b50b5952fa..0fc1a972fd4f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10520,17 +10520,15 @@ static void sched_mm_cid_remote_clear_weight(struct mm_struct *mm, int cpu,
sched_mm_cid_remote_clear(mm, pcpu_cid, cpu);
}

-static void task_mm_cid_work(struct callback_head *work)
+static void task_mm_cid_work(struct rcu_head *rhp)
{
unsigned long now = jiffies, old_scan, next_scan;
- struct task_struct *t = current;
+ struct task_struct *t = container_of(rhp, struct task_struct, rcu);
struct cpumask *cidmask;
struct mm_struct *mm;
int weight, cpu;

- SCHED_WARN_ON(t != container_of(work, struct task_struct, cid_work));
-
- work->next = work; /* Prevent double-add */
+ rhp->next = rhp; /* Prevent double-add */
if (t->flags & PF_EXITING)
return;
mm = t->mm;
@@ -10574,23 +10572,20 @@ void init_sched_mm_cid(struct task_struct *t)
if (mm_users == 1)
mm->mm_cid_next_scan = jiffies + msecs_to_jiffies(MM_CID_SCAN_DELAY);
}
- t->cid_work.next = &t->cid_work; /* Protect against double add */
- init_task_work(&t->cid_work, task_mm_cid_work);
}

void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
{
- struct callback_head *work = &curr->cid_work;
+ struct rcu_head *rhp = &curr->rcu;
unsigned long now = jiffies;

if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) ||
- work->next != work)
+ rhp->next != rhp)
return;
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
return;

- /* No page allocation under rq lock */
- task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC);
+ call_rcu(rhp, task_mm_cid_work);
}

void sched_mm_cid_exit_signals(struct task_struct *t)
--
2.47.0