Re: [PATCH v2 4/5] KVM: add KVM_USER_EXIT vcpu ioctl for userspace exit

From: Avi Kivity
Date: Sun Aug 16 2015 - 16:27:38 EST


On 08/05/2015 07:33 PM, Radim KrÄmÃÅ wrote:
The guest can use KVM_USER_EXIT instead of a signal-based exiting to
userspace. Availability depends on KVM_CAP_USER_EXIT.
Only x86 is implemented so far.

Signed-off-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
---
v2:
* use vcpu ioctl instead of vm one [4/5]
* shrink kvm_user_exit from 64 to 32 bytes [4/5]

Documentation/virtual/kvm/api.txt | 30 ++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 24 ++++++++++++++++++++++++
include/uapi/linux/kvm.h | 7 +++++++
virt/kvm/kvm_main.c | 5 +++--
4 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3c714d43a717..c5844f0b8e7c 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3020,6 +3020,36 @@ Returns: 0 on success, -1 on error
Queues an SMI on the thread's vcpu.
+
+4.97 KVM_USER_EXIT
+
+Capability: KVM_CAP_USER_EXIT
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_user_exit (in)
+Returns: 0 on success,
+ -EFAULT if the parameter couldn't be read,
+ -EINVAL if 'reserved' is not zeroed,
+
+struct kvm_user_exit {
+ __u8 reserved[32];
+};
+
+The ioctl is asynchronous to VCPU execution and can be issued from all threads.
+format

This breaks an invariant of vcpu ioctls, and also forces a cacheline bounce when we fget() the vcpu fd.

We should really try to avoid this. One options is to have a KVM_VCPU_MAKE_EXITFD vcpu ioctl, which returns an eventfd that you then write into. You can make as many exitfds as you like, one for each waking thread, so they never cause cacheline conflicts.

Edit: I see the invariant was already broken. But the other comment stands.

+Make vcpu_id exit to userspace as soon as possible. If the VCPU is not running
+in kernel at the time, it will exit early on the next call to KVM_RUN.
+If the VCPU was going to exit because of other reasons when KVM_USER_EXIT was
+issued, it will keep the original exit reason and not exit early on next
+KVM_RUN.
+If VCPU exited because of KVM_USER_EXIT, the exit reason is KVM_EXIT_REQUEST.
+
+This ioctl has very similar effect (same sans some races on userspace exit) as
+sending a signal (that is blocked in userspace and set in KVM_SET_SIGNAL_MASK)
+to the VCPU thread.
+
+
+
5. The kvm_run structure
------------------------
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3493457ad0a1..27d777eb34e4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2466,6 +2466,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
#endif
+ case KVM_CAP_USER_EXIT:
r = 1;
break;
case KVM_CAP_X86_SMM:
@@ -3077,6 +3078,20 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
return 0;
}
+int kvm_vcpu_ioctl_user_exit(struct kvm_vcpu *vcpu, struct kvm_user_exit *info)
+{
+ struct kvm_user_exit valid = {};
+ BUILD_BUG_ON(sizeof(valid) != 32);
+
+ if (memcmp(info, &valid, sizeof(valid)))
+ return -EINVAL;
+
+ kvm_make_request(KVM_REQ_EXIT, vcpu);
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -3341,6 +3356,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_set_guest_paused(vcpu);
goto out;
}
+ case KVM_USER_EXIT: {
+ struct kvm_user_exit info;
+
+ r = -EFAULT;
+ if (copy_from_user(&info, argp, sizeof(info)))
+ goto out;
+ r = kvm_vcpu_ioctl_user_exit(vcpu, &info);
+ break;
+ }
default:
r = -EINVAL;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d996a7cdb4d2..bc5a1abe9626 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -826,6 +826,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_X86_SMM 117
#define KVM_CAP_MULTI_ADDRESS_SPACE 118
#define KVM_CAP_SPLIT_IRQCHIP 119
+#define KVM_CAP_USER_EXIT 120
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1008,6 +1009,10 @@ struct kvm_device_attr {
__u64 addr; /* userspace address of attr data */
};
+struct kvm_user_exit {
+ __u8 reserved[32];
+};
+
#define KVM_DEV_VFIO_GROUP 1
#define KVM_DEV_VFIO_GROUP_ADD 1
#define KVM_DEV_VFIO_GROUP_DEL 2
@@ -1119,6 +1124,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
/* Available with KVM_CAP_PPC_RTAS */
#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
+/* Available with KVM_CAP_USER_EXIT */
+#define KVM_USER_EXIT _IOW(KVMIO, 0xad, struct kvm_user_exit)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b34a328fdac1..d7ffe6090520 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2248,15 +2248,16 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
return -EINVAL;
-#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
/*
* Special cases: vcpu ioctls that are asynchronous to vcpu execution,
* so vcpu_load() would break it.
*/
+#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
#endif
-
+ if (ioctl == KVM_USER_EXIT)
+ return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
r = vcpu_load(vcpu);
if (r)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/