Re: [PATCH 4/6] sched/deadline: Create DL BW alloc, free & check overflow interface

From: Waiman Long
Date: Wed Mar 29 2023 - 10:38:17 EST



On 3/29/23 08:55, Juri Lelli wrote:
From: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>

Rework the existing dl_cpu_busy() interface which offers DL BW overflow
checking and per-task DL BW allocation.

Add dl_bw_free() as an interface to be able to free DL BW.
It will be used to allow freeing of the DL BW request done during
cpuset_can_attach() in case multiple controllers are attached to the
cgroup next to the cpuset controller and one of the non-cpuset
can_attach() fails.

dl_bw_alloc() (and dl_bw_free()) now take a `u64 dl_bw` parameter
instead of `struct task_struct *p` used in dl_cpu_busy(). This allows
to allocate DL BW for a set of tasks too rater than only for a single
Typo: "rater" => "rather"
task.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Signed-off-by: Juri Lelli <juri.lelli@xxxxxxxxxx>
---
include/linux/sched.h | 2 ++
kernel/sched/core.c | 4 ++--
kernel/sched/deadline.c | 53 +++++++++++++++++++++++++++++++----------
kernel/sched/sched.h | 2 +-
4 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d654eb4cabd..6f3d84e0ed08 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1848,6 +1848,8 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags)
extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus);
+extern int dl_bw_alloc(int cpu, u64 dl_bw);
+extern void dl_bw_free(int cpu, u64 dl_bw);
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 179266ff653f..c83dae6b8586 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9294,7 +9294,7 @@ int task_can_attach(struct task_struct *p,
if (unlikely(cpu >= nr_cpu_ids))
return -EINVAL;
- ret = dl_cpu_busy(cpu, p);
+ ret = dl_bw_alloc(cpu, p->dl.dl_bw);
}
out:
@@ -9579,7 +9579,7 @@ static void cpuset_cpu_active(void)
static int cpuset_cpu_inactive(unsigned int cpu)
{
if (!cpuhp_tasks_frozen) {
- int ret = dl_cpu_busy(cpu, NULL);
+ int ret = dl_bw_check_overflow(cpu);
if (ret)
return ret;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 8f92f0f87383..5b6965e0e537 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -3057,26 +3057,38 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
return ret;
}
-int dl_cpu_busy(int cpu, struct task_struct *p)
+enum dl_bw_request {
+ dl_bw_req_check_overflow = 0,
+ dl_bw_req_alloc,
+ dl_bw_req_free
+};
+
+static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
{
- unsigned long flags, cap;
+ unsigned long flags;
struct dl_bw *dl_b;
- bool overflow;
+ bool overflow = 0;
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
- cap = dl_bw_capacity(cpu);
- overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
- if (!overflow && p) {
- /*
- * We reserve space for this task in the destination
- * root_domain, as we can't fail after this point.
- * We will free resources in the source root_domain
- * later on (see set_cpus_allowed_dl()).
- */
- __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
+ if (req == dl_bw_req_free) {
+ __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
+ } else {
+ unsigned long cap = dl_bw_capacity(cpu);
+
+ overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
+
+ if (req == dl_bw_req_alloc && !overflow) {
+ /*
+ * We reserve space in the destination
+ * root_domain, as we can't fail after this point.
+ * We will free resources in the source root_domain
+ * later on (see set_cpus_allowed_dl()).
+ */
+ __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
+ }
}
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
@@ -3084,6 +3096,21 @@ int dl_cpu_busy(int cpu, struct task_struct *p)
return overflow ? -EBUSY : 0;
}
+
+int dl_bw_check_overflow(int cpu)
+{
+ return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
+}
+
+int dl_bw_alloc(int cpu, u64 dl_bw)
+{
+ return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
+}
+
+void dl_bw_free(int cpu, u64 dl_bw)
+{
+ dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
+}
#endif
#ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 060616944d7a..81ecfd1a1a48 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -330,7 +330,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
extern bool __checkparam_dl(const struct sched_attr *attr);
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
-extern int dl_cpu_busy(int cpu, struct task_struct *p);
+extern int dl_bw_check_overflow(int cpu);
#ifdef CONFIG_CGROUP_SCHED