[PATCH v2 3/3] sched: Make uclamp changes depend on CAP_SYS_NICE
From: Quentin Perret
Date: Thu Jun 10 2021 - 11:13:38 EST
There is currently nothing preventing tasks from changing their per-task
clamp values in anyway that they like. The rationale is probably that
system administrators are still able to limit those clamps thanks to the
cgroup interface. However, this causes pain in a system where both
per-task and per-cgroup clamp values are expected to be under the
control of core system components (as is the case for Android).
To fix this, let's require CAP_SYS_NICE to increase per-task clamp
values. This allows unprivileged tasks to lower their requests, but not
increase them, which is consistent with the existing behaviour for nice
values.
Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx>
---
kernel/sched/core.c | 55 +++++++++++++++++++++++++++++++++++++++------
1 file changed, 48 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d4aedbbcf96..6e24daca8d53 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1430,6 +1430,11 @@ static int uclamp_validate(struct task_struct *p,
if (util_min != -1 && util_max != -1 && util_min > util_max)
return -EINVAL;
+ return 0;
+}
+
+static void uclamp_enable(void)
+{
/*
* We have valid uclamp attributes; make sure uclamp is enabled.
*
@@ -1438,8 +1443,32 @@ static int uclamp_validate(struct task_struct *p,
* scheduler locks.
*/
static_branch_enable(&sched_uclamp_used);
+}
- return 0;
+static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
+{
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+ int util_min = p->uclamp_req[UCLAMP_MIN].value;
+
+ if (attr->sched_util_min + 1 > util_min + 1)
+ return false;
+
+ if (rt_task(p) && attr->sched_util_min == -1 &&
+ util_min < sysctl_sched_uclamp_util_min_rt_default)
+ return false;
+ }
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+ int util_max = p->uclamp_req[UCLAMP_MAX].value;
+
+ if (attr->sched_util_max + 1 > util_max + 1)
+ return false;
+
+ if (attr->sched_util_max == -1 && util_max < uclamp_none(UCLAMP_MAX))
+ return false;
+ }
+
+ return true;
}
static bool uclamp_reset(const struct sched_attr *attr,
@@ -1580,6 +1609,11 @@ static inline int uclamp_validate(struct task_struct *p,
{
return -EOPNOTSUPP;
}
+static inline void uclamp_enable(void) { }
+static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
+{
+ return true;
+}
static void __setscheduler_uclamp(struct task_struct *p,
const struct sched_attr *attr) { }
static inline void uclamp_fork(struct task_struct *p) { }
@@ -6116,6 +6150,13 @@ static int __sched_setscheduler(struct task_struct *p,
(rt_policy(policy) != (attr->sched_priority != 0)))
return -EINVAL;
+ /* Update task specific "requested" clamps */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
+ retval = uclamp_validate(p, attr);
+ if (retval)
+ return retval;
+ }
+
/*
* Allow unprivileged RT tasks to decrease priority:
*/
@@ -6165,6 +6206,10 @@ static int __sched_setscheduler(struct task_struct *p,
/* Normal users shall not reset the sched_reset_on_fork flag: */
if (p->sched_reset_on_fork && !reset_on_fork)
return -EPERM;
+
+ /* Can't increase util-clamps */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP && !uclamp_reduce(p, attr))
+ return -EPERM;
}
if (user) {
@@ -6176,12 +6221,8 @@ static int __sched_setscheduler(struct task_struct *p,
return retval;
}
- /* Update task specific "requested" clamps */
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
- retval = uclamp_validate(p, attr);
- if (retval)
- return retval;
- }
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
+ uclamp_enable();
if (pi)
cpuset_read_lock();
--
2.32.0.272.g935e593368-goog