Re: [PATCH rfc 2/6] bpf: sched: add convenient helpers to identify sched entities
From: Yafang Shao
Date: Thu Nov 25 2021 - 01:11:33 EST
> On Sep 17, 2021, at 12:24 AM, Roman Gushchin <guro@xxxxxx> wrote:
>
> This patch adds 3 helpers useful for dealing with sched entities:
> u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se);
> u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se);
> long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid);
>
> Sched entity is a basic structure used by the scheduler to represent
> schedulable objects: tasks and cgroups (if CONFIG_FAIR_GROUP_SCHED
> is enabled). It will be passed as an argument to many bpf hooks, so
> scheduler bpf programs need a convenient way to deal with it.
>
> bpf_sched_entity_to_tgidpid() and bpf_sched_entity_to_cgrpid() are
> useful to identify a sched entity in userspace terms (pid, tgid and
> cgroup id). bpf_sched_entity_belongs_to_cgrp() allows to check whether
> a sched entity belongs to sub-tree of a cgroup. It allows to write
> cgroup-specific scheduler policies even without enabling the cgroup
> cpu controller.
>
> Signed-off-by: Roman Gushchin <guro@xxxxxx>
> ---
> include/uapi/linux/bpf.h | 23 +++++++++++
> kernel/sched/bpf_sched.c | 74 ++++++++++++++++++++++++++++++++++
> scripts/bpf_doc.py | 2 +
> tools/include/uapi/linux/bpf.h | 23 +++++++++++
> 4 files changed, 122 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6dfbebb8fc8f..199e4a92820d 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4900,6 +4900,26 @@ union bpf_attr {
> * **-EINVAL** if *flags* is not zero.
> *
> * **-ENOENT** if architecture does not support branch records.
> + *
> + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se)
> + * Description
> + * Return task's encoded tgid and pid if the sched entity is a task.
> + * Return
> + * Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise.
> + *
> + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se)
> + * Description
> + * Return cgroup id if the given sched entity is a cgroup.
> + * Return
> + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise.
> + *
> + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid)
> + * Description
> + * Checks whether the sched entity belongs to a cgroup or
> + * it's sub-tree. It doesn't require a cgroup CPU controller
> + * to be enabled.
> + * Return
> + * 1 if the sched entity belongs to a cgroup, 0 otherwise.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -5079,6 +5099,9 @@ union bpf_attr {
> FN(get_attach_cookie), \
> FN(task_pt_regs), \
> FN(get_branch_snapshot), \
> + FN(sched_entity_to_tgidpid), \
> + FN(sched_entity_to_cgrpid), \
> + FN(sched_entity_belongs_to_cgrp), \
> /* */
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c
> index 2f05c186cfd0..ead691dc6e85 100644
> --- a/kernel/sched/bpf_sched.c
> +++ b/kernel/sched/bpf_sched.c
> @@ -42,12 +42,86 @@ int bpf_sched_verify_prog(struct bpf_verifier_log *vlog,
> return 0;
> }
>
> +BPF_CALL_1(bpf_sched_entity_to_tgidpid, struct sched_entity *, se)
> +{
> + if (entity_is_task(se)) {
> + struct task_struct *task = task_of(se);
> +
> + return (u64) task->tgid << 32 | task->pid;
> + } else {
> + return (u64) -1;
> + }
> +}
> +
> +BPF_CALL_1(bpf_sched_entity_to_cgrpid, struct sched_entity *, se)
> +{
> +#ifdef CONFIG_FAIR_GROUP_SCHED
> + if (!entity_is_task(se))
> + return cgroup_id(se->cfs_rq->tg->css.cgroup);
> +#endif
> + return (u64) -1;
> +}
> +
> +BPF_CALL_2(bpf_sched_entity_belongs_to_cgrp, struct sched_entity *, se,
> + u64, cgrpid)
> +{
> +#ifdef CONFIG_CGROUPS
> + struct cgroup *cgrp;
> + int level;
> +
> + if (entity_is_task(se))
> + cgrp = task_dfl_cgroup(task_of(se));
> +#ifdef CONFIG_FAIR_GROUP_SCHED
> + else
> + cgrp = se->cfs_rq->tg->css.cgroup;
It is incorrect.
It should use se->my_q->tg->css.cgroup and some possible NULL check. (for autogroup)
se->cfs_rq and se->my_q are different. se->my_q is the cfs_rq of this se itself, while the se->cfs_rq may be the parent.
> +#endif
> +
> + for (level = cgrp->level; level; level--)
> + if (cgrp->ancestor_ids[level] == cgrpid)
> + return 1;
> +#endif
> + return 0;
> +}
> +
> +BTF_ID_LIST_SINGLE(btf_sched_entity_ids, struct, sched_entity)
> +
> +static const struct bpf_func_proto bpf_sched_entity_to_tgidpid_proto = {
> + .func = bpf_sched_entity_to_tgidpid,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_BTF_ID,
> + .arg1_btf_id = &btf_sched_entity_ids[0],
> +};
> +
> +static const struct bpf_func_proto bpf_sched_entity_to_cgrpid_proto = {
> + .func = bpf_sched_entity_to_cgrpid,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_BTF_ID,
> + .arg1_btf_id = &btf_sched_entity_ids[0],
> +};
> +
> +static const struct bpf_func_proto bpf_sched_entity_belongs_to_cgrp_proto = {
> + .func = bpf_sched_entity_belongs_to_cgrp,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_BTF_ID,
> + .arg1_btf_id = &btf_sched_entity_ids[0],
> + .arg2_type = ARG_ANYTHING,
> +};
> +
> static const struct bpf_func_proto *
> bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> {
> switch (func_id) {
> case BPF_FUNC_trace_printk:
> return bpf_get_trace_printk_proto();
> + case BPF_FUNC_sched_entity_to_tgidpid:
> + return &bpf_sched_entity_to_tgidpid_proto;
> + case BPF_FUNC_sched_entity_to_cgrpid:
> + return &bpf_sched_entity_to_cgrpid_proto;
> + case BPF_FUNC_sched_entity_belongs_to_cgrp:
> + return &bpf_sched_entity_belongs_to_cgrp_proto;
> default:
> return NULL;
> }
> diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
> index 00ac7b79cddb..84019ba5b67b 100755
> --- a/scripts/bpf_doc.py
> +++ b/scripts/bpf_doc.py
> @@ -548,6 +548,7 @@ class PrinterHelpers(Printer):
> 'struct socket',
> 'struct file',
> 'struct bpf_timer',
> + 'struct sched_entity',
> ]
> known_types = {
> '...',
> @@ -596,6 +597,7 @@ class PrinterHelpers(Printer):
> 'struct socket',
> 'struct file',
> 'struct bpf_timer',
> + 'struct sched_entity',
> }
> mapped_types = {
> 'u8': '__u8',
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 6dfbebb8fc8f..199e4a92820d 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -4900,6 +4900,26 @@ union bpf_attr {
> * **-EINVAL** if *flags* is not zero.
> *
> * **-ENOENT** if architecture does not support branch records.
> + *
> + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se)
> + * Description
> + * Return task's encoded tgid and pid if the sched entity is a task.
> + * Return
> + * Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise.
> + *
> + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se)
> + * Description
> + * Return cgroup id if the given sched entity is a cgroup.
> + * Return
> + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise.
> + *
> + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid)
> + * Description
> + * Checks whether the sched entity belongs to a cgroup or
> + * it's sub-tree. It doesn't require a cgroup CPU controller
> + * to be enabled.
> + * Return
> + * 1 if the sched entity belongs to a cgroup, 0 otherwise.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -5079,6 +5099,9 @@ union bpf_attr {
> FN(get_attach_cookie), \
> FN(task_pt_regs), \
> FN(get_branch_snapshot), \
> + FN(sched_entity_to_tgidpid), \
> + FN(sched_entity_to_cgrpid), \
> + FN(sched_entity_belongs_to_cgrp), \
> /* */
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.31.1
>
>