[PATCH RFC] sched: Add a per-thread core scheduling interface
From: Joel Fernandes (Google)
Date: Wed May 20 2020 - 18:26:56 EST
Add a per-thread core scheduling interface which allows a thread to tag
itself and enable core scheduling. Based on discussion at OSPM with
maintainers, we propose a prctl(2) interface accepting values of 0 or 1.
1 - enable core scheduling for the task.
0 - disable core scheduling for the task.
Special cases:
(1)
The core-scheduling patchset contains a CGroup interface as well. In
order for us to respect users of that interface, we avoid overriding the
tag if a task was CGroup-tagged because the task becomes inconsistent
with the CGroup tag. Instead return -EBUSY.
(2)
If a task is prctl-tagged, allow the CGroup interface to override
the task's tag.
ChromeOS will use core-scheduling to securely enable hyperthreading.
This cuts down the keypress latency in Google docs from 150ms to 50ms
while improving the camera streaming frame rate by ~3%.
Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
---
include/linux/sched.h | 6 ++++
include/uapi/linux/prctl.h | 3 ++
kernel/sched/core.c | 57 ++++++++++++++++++++++++++++++++++++++
kernel/sys.c | 3 ++
4 files changed, 69 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fe6ae59fcadbe..8a40a093aa2ca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1986,6 +1986,12 @@ static inline void rseq_execve(struct task_struct *t)
#endif
+#ifdef CONFIG_SCHED_CORE
+int task_set_core_sched(int set, struct task_struct *tsk);
+#else
+int task_set_core_sched(int set, struct task_struct *tsk) { return -ENOTSUPP; }
+#endif
+
void __exit_umh(struct task_struct *tsk);
static inline void exit_umh(struct task_struct *tsk)
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e362..dba0c70f9cce6 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -238,4 +238,7 @@ struct prctl_mm_map {
#define PR_SET_IO_FLUSHER 57
#define PR_GET_IO_FLUSHER 58
+/* Core scheduling per-task interface */
+#define PR_SET_CORE_SCHED 59
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 684359ff357e7..780514d03da47 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3320,6 +3320,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
#endif
#ifdef CONFIG_SCHED_CORE
RB_CLEAR_NODE(&p->core_node);
+
+ /*
+ * If task is using prctl(2) for tagging, do the prctl(2)-style tagging
+ * for the child as well.
+ */
+ if (current->core_cookie && ((unsigned long)current == current->core_cookie))
+ task_set_core_sched(1, p);
#endif
return 0;
}
@@ -7857,6 +7864,56 @@ void __cant_sleep(const char *file, int line, int preempt_offset)
EXPORT_SYMBOL_GPL(__cant_sleep);
#endif
+#ifdef CONFIG_SCHED_CORE
+
+/* Ensure that all siblings have rescheduled once */
+static int task_set_core_sched_stopper(void *data)
+{
+ return 0;
+}
+
+int task_set_core_sched(int set, struct task_struct *tsk)
+{
+ if (!tsk)
+ tsk = current;
+
+ if (set > 1)
+ return -ERANGE;
+
+ if (!static_branch_likely(&sched_smt_present))
+ return -EINVAL;
+
+ /*
+ * If cookie was set previously, return -EBUSY if either of the
+ * following are true:
+ * 1. Task was previously tagged by CGroup method.
+ * 2. Task or its parent were tagged by prctl().
+ *
+ * Note that, if CGroup tagging is done after prctl(), then that would
+ * override the cookie. However, if prctl() is done after task was
+ * added to tagged CGroup, then the prctl() returns -EBUSY.
+ */
+ if (!!tsk->core_cookie == set) {
+ if ((tsk->core_cookie == (unsigned long)tsk) ||
+ (tsk->core_cookie == (unsigned long)tsk->sched_task_group)) {
+ return -EBUSY;
+ }
+ }
+
+ if (set)
+ sched_core_get();
+
+ tsk->core_cookie = set ? (unsigned long)tsk : 0;
+
+ stop_machine(task_set_core_sched_stopper, NULL, NULL);
+
+ if (!set)
+ sched_core_put();
+
+ return 0;
+}
+#endif
+
#ifdef CONFIG_MAGIC_SYSRQ
void normalize_rt_tasks(void)
{
diff --git a/kernel/sys.c b/kernel/sys.c
index d325f3ab624a9..5c3bcf40dcb34 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2514,6 +2514,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER;
break;
+ case PR_SET_CORE_SCHED:
+ error = task_set_core_sched(arg2, NULL);
+ break;
default:
error = -EINVAL;
break;
--
2.26.2.761.g0e0b3e54be-goog