Re: [PATCH v1 15/16] kvm: arm64: Allow configuring physical address space size
From: Christoffer Dall
Date: Thu Feb 08 2018 - 06:14:25 EST
On Tue, Jan 09, 2018 at 07:04:10PM +0000, Suzuki K Poulose wrote:
> Allow the guests to choose a larger physical address space size.
> The default and minimum size is 40bits. A guest can change this
> right after the VM creation, but before the stage2 entry page
> tables are allocated (i.e, before it registers a memory range
> or maps a device address). The size is restricted to the maximum
> supported by the host. Also, the guest can only increase the PA size,
> from the existing value, as reducing it could break the devices which
> may have verified their physical address for validity and may do a
> lazy mapping(e.g, VGIC).
>
> Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
> Cc: Christoffer Dall <cdall@xxxxxxxxxx>
> Cc: Peter Maydell <peter.maydell@xxxxxxxxxx>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
> ---
> Documentation/virtual/kvm/api.txt | 27 ++++++++++++++++++++++++++
> arch/arm/include/asm/kvm_host.h | 7 +++++++
> arch/arm64/include/asm/kvm_host.h | 1 +
> arch/arm64/include/asm/kvm_mmu.h | 41 ++++++++++++++++++++++++++++++---------
> arch/arm64/kvm/reset.c | 28 ++++++++++++++++++++++++++
> include/uapi/linux/kvm.h | 4 ++++
> virt/kvm/arm/arm.c | 2 +-
> 7 files changed, 100 insertions(+), 10 deletions(-)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 57d3ee9e4bde..a203faf768c4 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3403,6 +3403,33 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
> or if no page table is present for the addresses (e.g. when using
> hugepages).
>
> +4.109 KVM_ARM_GET_PHYS_SHIFT
> +
> +Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
> +Architectures: arm64
> +Type: vm ioctl
> +Parameters: __u32 (out)
> +Returns: 0 on success, a negative value on error
> +
> +This ioctl is used to get the current maximum physical address size for
> +the VM. The value is Log2(Maximum_Physical_Address). This is neither the
> + amount of physical memory assigned to the VM nor the maximum physical address
> +that a real CPU on the host can handle. Rather, this is the upper limit of the
> +guest physical address that can be used by the VM.
What is the point of this? Presumably if userspace has set the size, it
already knows the size?
> +
> +4.109 KVM_ARM_SET_PHYS_SHIFT
> +
> +Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
> +Architectures: arm64
> +Type: vm ioctl
> +Parameters: __u32 (in)
> +Returns: 0 on success, a negative value on error
> +
> +This ioctl is used to set the maximum physical address size for
> +the VM. The value is Log2(Maximum_Physical_Address). The value can only
> +be increased from the existing setting. The value cannot be changed
> +after the stage-2 page tables are allocated and will return an error.
> +
Is there a way for userspace to discover what the underlying hardware
can actually support, beyond trial-and-error on this ioctl?
> 5. The kvm_run structure
> ------------------------
>
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index a9f7d3f47134..fa8e68a4f692 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -268,6 +268,13 @@ static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
> return 0;
> }
>
> +static inline long kvm_arch_dev_vm_ioctl(struct kvm *kvm,
> + unsigned int ioctl,
> + unsigned long arg)
> +{
> + return -EINVAL;
> +}
> +
> int kvm_perf_init(void);
> int kvm_perf_teardown(void);
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 1e66e5ab3dde..2895c2cda8fc 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -50,6 +50,7 @@
> int __attribute_const__ kvm_target_cpu(void);
> int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
> int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
> +long kvm_arch_dev_vm_ioctl(struct kvm *kvm, unsigned int ioctl, unsigned long arg);
> void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
>
> struct kvm_arch {
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index ab6a8b905065..ab7f50f20bcd 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -347,21 +347,44 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
> return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
> }
>
> +static inline int kvm_reconfig_stage2(struct kvm *kvm, u32 phys_shift)
> +{
> + int rc = 0;
> + unsigned int pa_max, parange;
> +
> + parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
> + pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
> + /* Raise it to 40bits for backward compatibility */
> + pa_max = (pa_max < 40) ? 40 : pa_max;
> + /* Make sure the size is supported/available */
> + if (phys_shift > PHYS_MASK_SHIFT || phys_shift > pa_max)
> + return -EINVAL;
> + /*
> + * The stage2 PGD is dependent on the settings we initialise here
> + * and should be allocated only after this step. We cannot allow
> + * down sizing the IPA size as there could be devices or memory
> + * regions, that depend on the previous size.
> + */
> + mutex_lock(&kvm->slots_lock);
> + if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift) {
> + rc = -EPERM;
> + } else if (phys_shift > kvm->arch.phys_shift) {
> + kvm->arch.phys_shift = phys_shift;
> + kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> + kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> + TCR_T0SZ(kvm->arch.phys_shift);
> + }
I think you can rework the above to make it more obvious what's going on
in this way:
rc = -EPERM;
if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift)
goto out_unlock;
rc = 0;
if (phys_shift == kvm->arch.phys_shift)
goto out_unlock;
kvm->arch.phys_shift = phys_shift;
kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
TCR_T0SZ(kvm->arch.phys_shift);
out_unlock:
> + mutex_unlock(&kvm->slots_lock);
> + return rc;
> +}
> +
> /*
> * kvm_init_stage2_config: Initialise the VM specific stage2 page table
> * details to default IPA size.
> */
> static inline void kvm_init_stage2_config(struct kvm *kvm)
> {
> - /*
> - * The stage2 PGD is dependent on the settings we initialise here
> - * and should be allocated only after this step.
> - */
> - VM_BUG_ON(kvm->arch.pgd != NULL);
> - kvm->arch.phys_shift = KVM_PHYS_SHIFT_DEFAULT;
> - kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> - kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> - TCR_T0SZ(kvm->arch.phys_shift);
> + kvm_reconfig_stage2(kvm, KVM_PHYS_SHIFT_DEFAULT);
> }
>
> #endif /* __ASSEMBLY__ */
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 3256b9228e75..90ceca823aca 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -23,6 +23,7 @@
> #include <linux/kvm_host.h>
> #include <linux/kvm.h>
> #include <linux/hw_breakpoint.h>
> +#include <linux/uaccess.h>
>
> #include <kvm/arm_arch_timer.h>
>
> @@ -81,6 +82,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
> case KVM_CAP_VCPU_ATTRIBUTES:
> r = 1;
> break;
> + case KVM_CAP_ARM_CONFIG_PHYS_SHIFT:
> + r = 1;
> + break;
> default:
> r = 0;
> }
> @@ -88,6 +92,30 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
> return r;
> }
>
> +long kvm_arch_dev_vm_ioctl(struct kvm *kvm,
> + unsigned int ioctl, unsigned long arg)
> +{
> + void __user *argp = (void __user *)arg;
> + u32 phys_shift;
> + long r = -EFAULT;
> +
> + switch (ioctl) {
> + case KVM_ARM_GET_PHYS_SHIFT:
> + phys_shift = kvm_phys_shift(kvm);
> + if (!put_user(phys_shift, (u32 __user *)argp))
> + r = 0;
> + break;
> + case KVM_ARM_SET_PHYS_SHIFT:
> + if (!get_user(phys_shift, (u32 __user*)argp))
> + r = kvm_reconfig_stage2(kvm, phys_shift);
> + break;
> + default:
> + r = -EINVAL;
> + }
> + return r;
> +}
> +
> +
> /**
> * kvm_reset_vcpu - sets core registers and sys_regs to reset value
> * @vcpu: The VCPU pointer
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 496e59a2738b..66bfbe19b434 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_HYPERV_SYNIC2 148
> #define KVM_CAP_HYPERV_VP_INDEX 149
> #define KVM_CAP_S390_AIS_MIGRATION 150
> +#define KVM_CAP_ARM_CONFIG_PHYS_SHIFT 151
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -1261,6 +1262,9 @@ struct kvm_s390_ucas_mapping {
> #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
> /* Available with KVM_CAP_PPC_RADIX_MMU */
> #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
> +/* Available with KVM_CAP_ARM_CONFIG_PHYS_SHIFT */
> +#define KVM_ARM_GET_PHYS_SHIFT _IOR(KVMIO, 0xb1, __u32)
> +#define KVM_ARM_SET_PHYS_SHIFT _IOW(KVMIO, 0xb2, __u32)
>
> /* ioctl for vm fd */
> #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index e0bf8d19fcfe..05fc49304722 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -1136,7 +1136,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
> return 0;
> }
> default:
> - return -EINVAL;
> + return kvm_arch_dev_vm_ioctl(kvm, ioctl, arg);
> }
> }
>
> --
> 2.13.6
>
Have you considered making this capability a generic capability and
encoding this in the 'type' argument to KVM_CREATE_VM? This would
significantly simplify all the above and would allow you to drop patch 8
and 9 I think.
Thanks,
-Christoffer