Re: [RFC PATCH v2 4/5] pvsched: bpf support for pvsched

From: Vineeth Remanan Pillai
Date: Mon Apr 08 2024 - 10:02:45 EST


Adding sched_ext folks

On Wed, Apr 3, 2024 at 10:01 AM Vineeth Pillai (Google)
<vineeth@xxxxxxxxxxxxxxx> wrote:
>
> Add support for implementing bpf pvsched drivers. bpf programs can use
> the struct_ops to define the callbacks of pvsched drivers.
>
> This is only a skeleton of the bpf framework for pvsched. Some
> verification details are not implemented yet.
>
> Signed-off-by: Vineeth Pillai (Google) <vineeth@xxxxxxxxxxxxxxx>
> Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
> ---
> kernel/bpf/bpf_struct_ops_types.h | 4 +
> virt/pvsched/Makefile | 2 +-
> virt/pvsched/pvsched_bpf.c | 141 ++++++++++++++++++++++++++++++
> 3 files changed, 146 insertions(+), 1 deletion(-)
> create mode 100644 virt/pvsched/pvsched_bpf.c
>
> diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
> index 5678a9ddf817..9d5e4d1a331a 100644
> --- a/kernel/bpf/bpf_struct_ops_types.h
> +++ b/kernel/bpf/bpf_struct_ops_types.h
> @@ -9,4 +9,8 @@ BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
> #include <net/tcp.h>
> BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
> #endif
> +#ifdef CONFIG_PARAVIRT_SCHED_HOST
> +#include <linux/pvsched.h>
> +BPF_STRUCT_OPS_TYPE(pvsched_vcpu_ops)
> +#endif
> #endif
> diff --git a/virt/pvsched/Makefile b/virt/pvsched/Makefile
> index 4ca38e30479b..02bc072cd806 100644
> --- a/virt/pvsched/Makefile
> +++ b/virt/pvsched/Makefile
> @@ -1,2 +1,2 @@
>
> -obj-$(CONFIG_PARAVIRT_SCHED_HOST) += pvsched.o
> +obj-$(CONFIG_PARAVIRT_SCHED_HOST) += pvsched.o pvsched_bpf.o
> diff --git a/virt/pvsched/pvsched_bpf.c b/virt/pvsched/pvsched_bpf.c
> new file mode 100644
> index 000000000000..b125089abc3b
> --- /dev/null
> +++ b/virt/pvsched/pvsched_bpf.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Google */
> +
> +#include <linux/types.h>
> +#include <linux/bpf_verifier.h>
> +#include <linux/bpf.h>
> +#include <linux/btf.h>
> +#include <linux/filter.h>
> +#include <linux/pvsched.h>
> +
> +
> +/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
> +extern struct bpf_struct_ops bpf_pvsched_vcpu_ops;
> +
> +static int bpf_pvsched_vcpu_init(struct btf *btf)
> +{
> + return 0;
> +}
> +
> +static bool bpf_pvsched_vcpu_is_valid_access(int off, int size,
> + enum bpf_access_type type,
> + const struct bpf_prog *prog,
> + struct bpf_insn_access_aux *info)
> +{
> + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
> + return false;
> + if (type != BPF_READ)
> + return false;
> + if (off % size != 0)
> + return false;
> +
> + if (!btf_ctx_access(off, size, type, prog, info))
> + return false;
> +
> + return true;
> +}
> +
> +static int bpf_pvsched_vcpu_btf_struct_access(struct bpf_verifier_log *log,
> + const struct bpf_reg_state *reg,
> + int off, int size)
> +{
> + /*
> + * TODO: Enable write access to Guest shared mem.
> + */
> + return -EACCES;
> +}
> +
> +static const struct bpf_func_proto *
> +bpf_pvsched_vcpu_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> + return bpf_base_func_proto(func_id);
> +}
> +
> +static const struct bpf_verifier_ops bpf_pvsched_vcpu_verifier_ops = {
> + .get_func_proto = bpf_pvsched_vcpu_get_func_proto,
> + .is_valid_access = bpf_pvsched_vcpu_is_valid_access,
> + .btf_struct_access = bpf_pvsched_vcpu_btf_struct_access,
> +};
> +
> +static int bpf_pvsched_vcpu_init_member(const struct btf_type *t,
> + const struct btf_member *member,
> + void *kdata, const void *udata)
> +{
> + const struct pvsched_vcpu_ops *uvm_ops;
> + struct pvsched_vcpu_ops *vm_ops;
> + u32 moff;
> +
> + uvm_ops = (const struct pvsched_vcpu_ops *)udata;
> + vm_ops = (struct pvsched_vcpu_ops *)kdata;
> +
> + moff = __btf_member_bit_offset(t, member) / 8;
> + switch (moff) {
> + case offsetof(struct pvsched_vcpu_ops, events):
> + vm_ops->events = *(u32 *)(udata + moff);
> + return 1;
> + case offsetof(struct pvsched_vcpu_ops, name):
> + if (bpf_obj_name_cpy(vm_ops->name, uvm_ops->name,
> + sizeof(vm_ops->name)) <= 0)
> + return -EINVAL;
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +static int bpf_pvsched_vcpu_check_member(const struct btf_type *t,
> + const struct btf_member *member,
> + const struct bpf_prog *prog)
> +{
> + return 0;
> +}
> +
> +static int bpf_pvsched_vcpu_reg(void *kdata)
> +{
> + return pvsched_register_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
> +}
> +
> +static void bpf_pvsched_vcpu_unreg(void *kdata)
> +{
> + pvsched_unregister_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
> +}
> +
> +static int bpf_pvsched_vcpu_validate(void *kdata)
> +{
> + return pvsched_validate_vcpu_ops((struct pvsched_vcpu_ops *)kdata);
> +}
> +
> +static int bpf_pvsched_vcpu_update(void *kdata, void *old_kdata)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static int __pvsched_vcpu_register(struct pid *pid)
> +{
> + return 0;
> +}
> +static void __pvsched_vcpu_unregister(struct pid *pid)
> +{
> +}
> +static void __pvsched_notify_event(void *addr, struct pid *pid, u32 event)
> +{
> +}
> +
> +static struct pvsched_vcpu_ops __bpf_ops_pvsched_vcpu_ops = {
> + .pvsched_vcpu_register = __pvsched_vcpu_register,
> + .pvsched_vcpu_unregister = __pvsched_vcpu_unregister,
> + .pvsched_vcpu_notify_event = __pvsched_notify_event,
> +};
> +
> +struct bpf_struct_ops bpf_pvsched_vcpu_ops = {
> + .init = &bpf_pvsched_vcpu_init,
> + .validate = bpf_pvsched_vcpu_validate,
> + .update = bpf_pvsched_vcpu_update,
> + .verifier_ops = &bpf_pvsched_vcpu_verifier_ops,
> + .reg = bpf_pvsched_vcpu_reg,
> + .unreg = bpf_pvsched_vcpu_unreg,
> + .check_member = bpf_pvsched_vcpu_check_member,
> + .init_member = bpf_pvsched_vcpu_init_member,
> + .name = "pvsched_vcpu_ops",
> + .cfi_stubs = &__bpf_ops_pvsched_vcpu_ops,
> +};
> --
> 2.40.1
>