[tip:sched/numa] sched, mm: Rework sched_{fork,exec} node assignment

From: tip-bot for Peter Zijlstra
Date: Fri May 18 2012 - 06:34:42 EST


Commit-ID: 0ea86208345b9a41863c3929c69b7ccfbfa6275e
Gitweb: http://git.kernel.org/tip/0ea86208345b9a41863c3929c69b7ccfbfa6275e
Author: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
AuthorDate: Sat, 3 Mar 2012 16:54:42 +0100
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 18 May 2012 08:16:22 +0200

sched, mm: Rework sched_{fork,exec} node assignment

Rework the scheduler fork,exec hooks to allow home-node assignment.

In particular:

- call sched_fork() after the mm is set up and the thread
group list is initialized (such that we can iterate the mm_owner
thread group).
- call sched_exec() after we've got our fresh mm.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Cc: Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
Cc: Paul Turner <pjt@xxxxxxxxxx>
Cc: Dan Smith <danms@xxxxxxxxxx>
Cc: Bharata B Rao <bharata.rao@xxxxxxxxx>
Cc: Lee Schermerhorn <Lee.Schermerhorn@xxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Link: http://lkml.kernel.org/n/tip-qg63apu64i9i4slg4bor4oww@xxxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
fs/exec.c | 4 ++--
include/linux/sched.h | 4 ++--
kernel/fork.c | 9 +++++----
kernel/sched/core.c | 7 +++++--
kernel/sched/sched.h | 5 +++++
5 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index b1fd202..8a12a7f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1506,8 +1506,6 @@ static int do_execve_common(const char *filename,
if (IS_ERR(file))
goto out_unmark;

- sched_exec();
-
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
@@ -1516,6 +1514,8 @@ static int do_execve_common(const char *filename,
if (retval)
goto out_file;

+ sched_exec(bprm->mm);
+
bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
goto out;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 49378f0..8bb49f6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1966,9 +1966,9 @@ task_sched_runtime(struct task_struct *task);

/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
-extern void sched_exec(void);
+extern void sched_exec(struct mm_struct *mm);
#else
-#define sched_exec() {}
+#define sched_exec(mm) {}
#endif

extern void sched_clock_idle_sleep_event(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index f01be7f..7acf6ae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1302,9 +1302,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->memcg_batch.memcg = NULL;
#endif

- /* Perform scheduler related setup. Assign this task to a CPU. */
- sched_fork(p);
-
retval = perf_event_init_task(p);
if (retval)
goto bad_fork_cleanup_policy;
@@ -1357,6 +1354,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
+ INIT_LIST_HEAD(&p->thread_group);
+ /* Perform scheduler related setup. Assign this task to a CPU. */
+ sched_fork(p);
+
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
@@ -1405,7 +1407,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* We dont wake it up yet.
*/
p->group_leader = p;
- INIT_LIST_HEAD(&p->thread_group);

/* Now that the task is set up, run cgroup callbacks if
* necessary. We need to run them before the task is visible
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8fd0325..9c9c0ee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1798,8 +1798,9 @@ void sched_fork(struct task_struct *p)
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
#endif
-
put_cpu();
+
+ select_task_node(p, p->mm, SD_BALANCE_FORK);
}

/*
@@ -2565,12 +2566,14 @@ static void update_cpu_load_active(struct rq *this_rq)
* sched_exec - execve() is a valuable balancing opportunity, because at
* this point the task has the smallest effective memory and cache footprint.
*/
-void sched_exec(void)
+void sched_exec(struct mm_struct *mm)
{
struct task_struct *p = current;
unsigned long flags;
int dest_cpu;

+ select_task_node(p, mm, SD_BALANCE_EXEC);
+
raw_spin_lock_irqsave(&p->pi_lock, flags);
dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
if (dest_cpu == smp_processor_id())
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ba9dccf..8f93e91 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1156,3 +1156,8 @@ enum rq_nohz_flag_bits {

#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
#endif
+
+/*
+ * Macro to avoid argument evaluation
+ */
+#define select_task_node(p, mm, sd_flags) do { } while (0)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/