[PATCH 4/9] sched: Default core-sched policy

From: Peter Zijlstra
Date: Thu Apr 01 2021 - 14:17:47 EST


Implement default core scheduling policy.

- fork() / clone(): inherit cookie from parent
- exec(): if cookie then new cookie

Did that exec() thing want to change cookie on suid instead, just like
perf_event_exit_task() ?

Note that sched_core_fork() is called from under tasklist_lock, and
not from sched_fork() earlier. This avoids a few races later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
fs/exec.c | 4 +++-
include/linux/sched.h | 4 ++++
include/linux/sched/task.h | 4 ++--
kernel/fork.c | 3 +++
kernel/sched/core.c | 11 +++++++++--
kernel/sched/core_sched.c | 21 +++++++++++++++++++++
6 files changed, 42 insertions(+), 5 deletions(-)

--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1807,7 +1807,9 @@ static int bprm_execve(struct linux_binp
if (IS_ERR(file))
goto out_unmark;

- sched_exec();
+ retval = sched_exec();
+ if (retval)
+ goto out;

bprm->file = file;
/*
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2171,8 +2171,12 @@ const struct cpumask *sched_trace_rd_spa

#ifdef CONFIG_SCHED_CORE
extern void sched_core_free(struct task_struct *tsk);
+extern int sched_core_exec(void);
+extern void sched_core_fork(struct task_struct *p);
#else
static inline void sched_core_free(struct task_struct *tsk) { }
+static inline int sched_core_exec(void) { return 0; }
+static inline void sched_core_fork(struct task_struct *p) { }
#endif

#endif
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -94,9 +94,9 @@ extern void free_task(struct task_struct

/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
-extern void sched_exec(void);
+extern int sched_exec(void);
#else
-#define sched_exec() {}
+static inline int sched_exec(void) { return 0; }
#endif

static inline struct task_struct *get_task_struct(struct task_struct *t)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2242,6 +2242,8 @@ static __latent_entropy struct task_stru

klp_copy_process(p);

+ sched_core_fork(p);
+
spin_lock(&current->sighand->siglock);

/*
@@ -2329,6 +2331,7 @@ static __latent_entropy struct task_stru
return p;

bad_fork_cancel_cgroup:
+ sched_core_free(p);
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p, args);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4762,11 +4762,17 @@ unsigned long nr_iowait(void)
* sched_exec - execve() is a valuable balancing opportunity, because at
* this point the task has the smallest effective memory and cache footprint.
*/
-void sched_exec(void)
+int sched_exec(void)
{
struct task_struct *p = current;
unsigned long flags;
int dest_cpu;
+ int ret;
+
+ /* this may change what tasks current can share a core with */
+ ret = sched_core_exec();
+ if (ret)
+ return ret;

raw_spin_lock_irqsave(&p->pi_lock, flags);
dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), WF_EXEC);
@@ -4778,10 +4784,11 @@ void sched_exec(void)

raw_spin_unlock_irqrestore(&p->pi_lock, flags);
stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
- return;
+ return 0;
}
unlock:
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ return 0;
}

#endif
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -100,7 +100,28 @@ static unsigned long sched_core_clone_co
return cookie;
}

+void sched_core_fork(struct task_struct *p)
+{
+ RB_CLEAR_NODE(&p->core_node);
+ p->core_cookie = sched_core_clone_cookie(current);
+}
+
void sched_core_free(struct task_struct *p)
{
sched_core_put_cookie(p->core_cookie);
}
+
+int sched_core_exec(void)
+{
+ /* absent a policy mech, if task had a cookie, give it a new one */
+ if (current->core_cookie) {
+ unsigned long cookie = sched_core_alloc_cookie();
+ if (!cookie)
+ return -ENOMEM;
+ cookie = sched_core_update_cookie(current, cookie);
+ sched_core_put_cookie(cookie);
+ }
+
+ return 0;
+}
+