Re: [RFC PATCH v2 3/5] kvm: interface for managing pvsched driver for guest VMs

From: Vineeth Remanan Pillai
Date: Mon Apr 08 2024 - 10:14:04 EST


Adding sched_ext folks

On Wed, Apr 3, 2024 at 10:01 AM Vineeth Pillai (Google)
<vineeth@xxxxxxxxxxxxxxx> wrote:
>
> Implement ioctl for assigning and unassigning pvsched driver for a
> guest. VMMs would need to adopt this ioctls for supporting the feature.
> Also add a temporary debugfs interface for managing this.
>
> Ideally, the hypervisor would be able to determine the pvsched driver
> based on the information received from the guest. Guest VMs with the
> feature enabled would request hypervisor to select a pvsched driver.
> ioctl api is an override mechanism to give more control to the admin.
>
> Signed-off-by: Vineeth Pillai (Google) <vineeth@xxxxxxxxxxxxxxx>
> Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
> ---
> include/uapi/linux/kvm.h | 6 ++
> virt/kvm/kvm_main.c | 117 +++++++++++++++++++++++++++++++++++++++
> 2 files changed, 123 insertions(+)
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index c3308536482b..4b29bdad4188 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -2227,4 +2227,10 @@ struct kvm_create_guest_memfd {
> __u64 reserved[6];
> };
>
> +struct kvm_pvsched_ops {
> + __u8 ops_name[32]; /* PVSCHED_NAME_MAX */
> +};
> +
> +#define KVM_GET_PVSCHED_OPS _IOR(KVMIO, 0xe4, struct kvm_pvsched_ops)
> +#define KVM_REPLACE_PVSCHED_OPS _IOWR(KVMIO, 0xe5, struct kvm_pvsched_ops)
> #endif /* __LINUX_KVM_H */
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0546814e4db7..b3d9c362d2e3 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1223,6 +1223,79 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
> }
> }
>
> +#ifdef CONFIG_PARAVIRT_SCHED_KVM
> +static int pvsched_vcpu_ops_show(struct seq_file *m, void *data)
> +{
> + char ops_name[PVSCHED_NAME_MAX];
> + struct pvsched_vcpu_ops *ops;
> + struct kvm *kvm = (struct kvm *) m->private;
> +
> + rcu_read_lock();
> + ops = rcu_dereference(kvm->pvsched_ops);
> + if (ops)
> + strncpy(ops_name, ops->name, PVSCHED_NAME_MAX);
> + rcu_read_unlock();
> +
> + seq_printf(m, "%s\n", ops_name);
> +
> + return 0;
> +}
> +
> +static ssize_t
> +pvsched_vcpu_ops_write(struct file *filp, const char __user *ubuf,
> + size_t cnt, loff_t *ppos)
> +{
> + int ret;
> + char *cmp;
> + char buf[PVSCHED_NAME_MAX];
> + struct inode *inode;
> + struct kvm *kvm;
> +
> + if (cnt > PVSCHED_NAME_MAX)
> + return -EINVAL;
> +
> + if (copy_from_user(&buf, ubuf, cnt))
> + return -EFAULT;
> +
> + cmp = strstrip(buf);
> +
> + inode = file_inode(filp);
> + inode_lock(inode);
> + kvm = (struct kvm *)inode->i_private;
> + ret = kvm_replace_pvsched_ops(kvm, cmp);
> + inode_unlock(inode);
> +
> + if (ret)
> + return ret;
> +
> + *ppos += cnt;
> + return cnt;
> +}
> +
> +static int pvsched_vcpu_ops_open(struct inode *inode, struct file *filp)
> +{
> + return single_open(filp, pvsched_vcpu_ops_show, inode->i_private);
> +}
> +
> +static const struct file_operations pvsched_vcpu_ops_fops = {
> + .open = pvsched_vcpu_ops_open,
> + .write = pvsched_vcpu_ops_write,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
> +
> +static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm)
> +{
> + debugfs_create_file("pvsched_vcpu_ops", 0644, kvm->debugfs_dentry, kvm,
> + &pvsched_vcpu_ops_fops);
> +}
> +#else
> +static void kvm_create_vm_pvsched_debugfs(struct kvm *kvm)
> +{
> +}
> +#endif
> +
> static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
> {
> static DEFINE_MUTEX(kvm_debugfs_lock);
> @@ -1288,6 +1361,8 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
> &stat_fops_per_vm);
> }
>
> + kvm_create_vm_pvsched_debugfs(kvm);
> +
> ret = kvm_arch_create_vm_debugfs(kvm);
> if (ret)
> goto out_err;
> @@ -5474,6 +5549,48 @@ static long kvm_vm_ioctl(struct file *filp,
> r = kvm_gmem_create(kvm, &guest_memfd);
> break;
> }
> +#endif
> +#ifdef CONFIG_PARAVIRT_SCHED_KVM
> + case KVM_REPLACE_PVSCHED_OPS:
> + struct pvsched_vcpu_ops *ops;
> + struct kvm_pvsched_ops in_ops, out_ops;
> +
> + r = -EFAULT;
> + if (copy_from_user(&in_ops, argp, sizeof(in_ops)))
> + goto out;
> +
> + out_ops.ops_name[0] = 0;
> +
> + rcu_read_lock();
> + ops = rcu_dereference(kvm->pvsched_ops);
> + if (ops)
> + strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX);
> + rcu_read_unlock();
> +
> + r = kvm_replace_pvsched_ops(kvm, (char *)in_ops.ops_name);
> + if (r)
> + goto out;
> +
> + r = -EFAULT;
> + if (copy_to_user(argp, &out_ops, sizeof(out_ops)))
> + goto out;
> +
> + r = 0;
> + break;
> + case KVM_GET_PVSCHED_OPS:
> + out_ops.ops_name[0] = 0;
> + rcu_read_lock();
> + ops = rcu_dereference(kvm->pvsched_ops);
> + if (ops)
> + strncpy(out_ops.ops_name, ops->name, PVSCHED_NAME_MAX);
> + rcu_read_unlock();
> +
> + r = -EFAULT;
> + if (copy_to_user(argp, &out_ops, sizeof(out_ops)))
> + goto out;
> +
> + r = 0;
> + break;
> #endif
> default:
> r = kvm_arch_vm_ioctl(filp, ioctl, arg);
> --
> 2.40.1
>