[PATCH] sched: Make uclamp changes depend on CAP_SYS_NICE

From: Quentin Perret
Date: Wed Jun 09 2021 - 14:00:46 EST


There is currently nothing preventing tasks from changing their per-task
clamp values in anyway that they like. The rationale is probably that
systems administrator are still able to limit those clamps thanks to the
cgroup interface. However, this causes pain in a system where both
per-task and per-cgroup clamps values are expected to be under the
control of core system components (as is the case for Android).

To fix this, let's require CAP_SYS_NICE to increase per-task clamp
values. This allows unprivileged tasks to lower their requests, but not
increase them, which is consistent with the existing behaviour for nice
values.

Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx>
---
kernel/sched/core.c | 48 ++++++++++++++++++++++++++++++++++++++-------
1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d4aedbbcf96..1e5f9ae441a0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1430,6 +1430,11 @@ static int uclamp_validate(struct task_struct *p,
if (util_min != -1 && util_max != -1 && util_min > util_max)
return -EINVAL;

+ return 0;
+}
+
+static void uclamp_enable(void)
+{
/*
* We have valid uclamp attributes; make sure uclamp is enabled.
*
@@ -1438,8 +1443,25 @@ static int uclamp_validate(struct task_struct *p,
* scheduler locks.
*/
static_branch_enable(&sched_uclamp_used);
+}

- return 0;
+static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
+{
+ int util_min, util_max;
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+ util_min = p->uclamp_req[UCLAMP_MIN].value;
+ if (attr->sched_util_min > util_min)
+ return false;
+ }
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+ util_max = p->uclamp_req[UCLAMP_MAX].value;
+ if (attr->sched_util_max > util_max)
+ return false;
+ }
+
+ return true;
}

static bool uclamp_reset(const struct sched_attr *attr,
@@ -1580,6 +1602,11 @@ static inline int uclamp_validate(struct task_struct *p,
{
return -EOPNOTSUPP;
}
+static inline void uclamp_enable(void) { }
+static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
+{
+ return true;
+}
static void __setscheduler_uclamp(struct task_struct *p,
const struct sched_attr *attr) { }
static inline void uclamp_fork(struct task_struct *p) { }
@@ -6116,6 +6143,13 @@ static int __sched_setscheduler(struct task_struct *p,
(rt_policy(policy) != (attr->sched_priority != 0)))
return -EINVAL;

+ /* Update task specific "requested" clamps */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
+ retval = uclamp_validate(p, attr);
+ if (retval)
+ return retval;
+ }
+
/*
* Allow unprivileged RT tasks to decrease priority:
*/
@@ -6165,6 +6199,10 @@ static int __sched_setscheduler(struct task_struct *p,
/* Normal users shall not reset the sched_reset_on_fork flag: */
if (p->sched_reset_on_fork && !reset_on_fork)
return -EPERM;
+
+ /* Can't increase util-clamps */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP && !uclamp_reduce(p, attr))
+ return -EPERM;
}

if (user) {
@@ -6176,12 +6214,8 @@ static int __sched_setscheduler(struct task_struct *p,
return retval;
}

- /* Update task specific "requested" clamps */
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
- retval = uclamp_validate(p, attr);
- if (retval)
- return retval;
- }
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
+ uclamp_enable();

if (pi)
cpuset_read_lock();
--
2.32.0.272.g935e593368-goog