Re: [RFC PATCH 06/28] arm64: RME: ioctls to create and configure realms

From: Steven Price
Date: Fri Mar 10 2023 - 10:55:25 EST


On 06/03/2023 19:10, Zhi Wang wrote:
> On Fri, 27 Jan 2023 11:29:10 +0000
> Steven Price <steven.price@xxxxxxx> wrote:
>
>> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves
>> delegating pages to the RMM to hold the Realm Descriptor (RD) and for
>> the base level of the Realm Translation Tables (RTT). A VMID also need
>> to be picked, since the RMM has a separate VMID address space a
>> dedicated allocator is added for this purpose.
>>
>> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm
>> before it is created.
>>
>> Signed-off-by: Steven Price <steven.price@xxxxxxx>
>> ---
>> arch/arm64/include/asm/kvm_rme.h | 14 ++
>> arch/arm64/kvm/arm.c | 19 ++
>> arch/arm64/kvm/mmu.c | 6 +
>> arch/arm64/kvm/reset.c | 33 +++
>> arch/arm64/kvm/rme.c | 357 +++++++++++++++++++++++++++++++
>> 5 files changed, 429 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
>> index c26bc2c6770d..055a22accc08 100644
>> --- a/arch/arm64/include/asm/kvm_rme.h
>> +++ b/arch/arm64/include/asm/kvm_rme.h
>> @@ -6,6 +6,8 @@
>> #ifndef __ASM_KVM_RME_H
>> #define __ASM_KVM_RME_H
>>
>> +#include <uapi/linux/kvm.h>
>> +
>> enum realm_state {
>> REALM_STATE_NONE,
>> REALM_STATE_NEW,
>> @@ -15,8 +17,20 @@ enum realm_state {
>>
>> struct realm {
>> enum realm_state state;
>> +
>> + void *rd;
>> + struct realm_params *params;
>> +
>> + unsigned long num_aux;
>> + unsigned int vmid;
>> + unsigned int ia_bits;
>> };
>>
>> int kvm_init_rme(void);
>> +u32 kvm_realm_ipa_limit(void);
>> +
>> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
>> +int kvm_init_realm_vm(struct kvm *kvm);
>> +void kvm_destroy_realm(struct kvm *kvm);
>>
>> #endif
>> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
>> index d97b39d042ab..50f54a63732a 100644
>> --- a/arch/arm64/kvm/arm.c
>> +++ b/arch/arm64/kvm/arm.c
>> @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>> r = 0;
>> set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
>> break;
>> + case KVM_CAP_ARM_RME:
>> + if (!static_branch_unlikely(&kvm_rme_is_available))
>> + return -EINVAL;
>> + mutex_lock(&kvm->lock);
>> + r = kvm_realm_enable_cap(kvm, cap);
>> + mutex_unlock(&kvm->lock);
>> + break;
>> default:
>> r = -EINVAL;
>> break;
>> @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>> */
>> kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
>>
>> + /* Initialise the realm bits after the generic bits are enabled */
>> + if (kvm_is_realm(kvm)) {
>> + ret = kvm_init_realm_vm(kvm);
>> + if (ret)
>> + goto err_free_cpumask;
>> + }
>> +
>> return 0;
>>
>> err_free_cpumask:
>> @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>> kvm_destroy_vcpus(kvm);
>>
>> kvm_unshare_hyp(kvm, kvm + 1);
>> +
>> + kvm_destroy_realm(kvm);
>> }
>>
>> int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> case KVM_CAP_ARM_PTRAUTH_GENERIC:
>> r = system_has_full_ptr_auth();
>> break;
>> + case KVM_CAP_ARM_RME:
>> + r = static_key_enabled(&kvm_rme_is_available);
>> + break;
>> default:
>> r = 0;
>> }
>> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>> index 31d7fa4c7c14..d0f707767d05 100644
>> --- a/arch/arm64/kvm/mmu.c
>> +++ b/arch/arm64/kvm/mmu.c
>> @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
>> struct kvm_pgtable *pgt = NULL;
>>
>> write_lock(&kvm->mmu_lock);
>> + if (kvm_is_realm(kvm) &&
>> + kvm_realm_state(kvm) != REALM_STATE_DYING) {
>> + /* TODO: teardown rtts */
>> + write_unlock(&kvm->mmu_lock);
>> + return;
>> + }
>> pgt = mmu->pgt;
>> if (pgt) {
>> mmu->pgd_phys = 0;
>> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
>> index e0267f672b8a..c165df174737 100644
>> --- a/arch/arm64/kvm/reset.c
>> +++ b/arch/arm64/kvm/reset.c
>> @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void)
>>
>> return 0;
>> }
>> +
>
> The below function doesn't have an user in this patch. Also,
> it looks like a part of copy from kvm_init_stage2_mmu()
> in arch/arm64/kvm/mmu.c.

Good spot ;) Yes I discovered this, it should have been removed - it's
no longer used. I think this was an error when I was rebasing:
kvm_arm_setup-stage2() was removed in 315775ff7c6d ("KVM: arm64:
Consolidate stage-2 initialisation into a single function").

Steve

>> +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
>> +{
>> + u64 mmfr0, mmfr1;
>> + u32 phys_shift;
>> + u32 ipa_limit = kvm_ipa_limit;
>> +
>> + if (kvm_is_realm(kvm))
>> + ipa_limit = kvm_realm_ipa_limit();
>> +
>> + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
>> + return -EINVAL;
>> +
>> + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
>> + if (phys_shift) {
>> + if (phys_shift > ipa_limit ||
>> + phys_shift < ARM64_MIN_PARANGE_BITS)
>> + return -EINVAL;
>> + } else {
>> + phys_shift = KVM_PHYS_SHIFT;
>> + if (phys_shift > ipa_limit) {
>> + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
>> + current->comm);
>> + return -EINVAL;
>> + }
>> + }
>> +
>> + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
>> + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
>> + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
>> +
>> + return 0;
>> +}
>> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
>> index f6b587bc116e..9f8c5a91b8fc 100644
>> --- a/arch/arm64/kvm/rme.c
>> +++ b/arch/arm64/kvm/rme.c
>> @@ -5,9 +5,49 @@
>>
>> #include <linux/kvm_host.h>
>>
>> +#include <asm/kvm_emulate.h>
>> +#include <asm/kvm_mmu.h>
>> #include <asm/rmi_cmds.h>
>> #include <asm/virt.h>
>>
>> +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/
>> +#include <asm/kvm_pgtable.h>
>> +
>> +struct kvm_pgtable_walk_data {
>> + struct kvm_pgtable *pgt;
>> + struct kvm_pgtable_walker *walker;
>> +
>> + u64 addr;
>> + u64 end;
>> +};
>> +
>> +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
>> +{
>> + u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
>> + u64 mask = BIT(pgt->ia_bits) - 1;
>> +
>> + return (addr & mask) >> shift;
>> +}
>> +
>> +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
>> +{
>> + struct kvm_pgtable pgt = {
>> + .ia_bits = ia_bits,
>> + .start_level = start_level,
>> + };
>> +
>> + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
>> +}
>> +
>> +/******************/
>> +
>> +static unsigned long rmm_feat_reg0;
>> +
>> +static bool rme_supports(unsigned long feature)
>> +{
>> + return !!u64_get_bits(rmm_feat_reg0, feature);
>> +}
>> +
>> static int rmi_check_version(void)
>> {
>> struct arm_smccc_res res;
>> @@ -33,8 +73,319 @@ static int rmi_check_version(void)
>> return 0;
>> }
>>
>> +static unsigned long create_realm_feat_reg0(struct kvm *kvm)
>> +{
>> + unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
>> + u64 feat_reg0 = 0;
>> +
>> + int num_bps = u64_get_bits(rmm_feat_reg0,
>> + RMI_FEATURE_REGISTER_0_NUM_BPS);
>> + int num_wps = u64_get_bits(rmm_feat_reg0,
>> + RMI_FEATURE_REGISTER_0_NUM_WPS);
>> +
>> + feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ);
>> + feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS);
>> + feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS);
>> +
>> + return feat_reg0;
>> +}
>> +
>> +u32 kvm_realm_ipa_limit(void)
>> +{
>> + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ);
>> +}
>> +
>> +static u32 get_start_level(struct kvm *kvm)
>> +{
>> + long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr);
>> +
>> + return VTCR_EL2_TGRAN_SL0_BASE - sl0;
>> +}
>> +
>> +static int realm_create_rd(struct kvm *kvm)
>> +{
>> + struct realm *realm = &kvm->arch.realm;
>> + struct realm_params *params = realm->params;
>> + void *rd = NULL;
>> + phys_addr_t rd_phys, params_phys;
>> + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
>> + unsigned int pgd_sz;
>> + int i, r;
>> +
>> + if (WARN_ON(realm->rd) || WARN_ON(!realm->params))
>> + return -EEXIST;
>> +
>> + rd = (void *)__get_free_page(GFP_KERNEL);
>> + if (!rd)
>> + return -ENOMEM;
>> +
>> + rd_phys = virt_to_phys(rd);
>> + if (rmi_granule_delegate(rd_phys)) {
>> + r = -ENXIO;
>> + goto out;
>> + }
>> +
>> + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
>> + for (i = 0; i < pgd_sz; i++) {
>> + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
>> +
>> + if (rmi_granule_delegate(pgd_phys)) {
>> + r = -ENXIO;
>> + goto out_undelegate_tables;
>> + }
>> + }
>> +
>> + params->rtt_level_start = get_start_level(kvm);
>> + params->rtt_num_start = pgd_sz;
>> + params->rtt_base = kvm->arch.mmu.pgd_phys;
>> + params->vmid = realm->vmid;
>> +
>> + params_phys = virt_to_phys(params);
>> +
>> + if (rmi_realm_create(rd_phys, params_phys)) {
>> + r = -ENXIO;
>> + goto out_undelegate_tables;
>> + }
>> +
>> + realm->rd = rd;
>> + realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
>> +
>> + if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) {
>> + WARN_ON(rmi_realm_destroy(rd_phys));
>> + goto out_undelegate_tables;
>> + }
>> +
>> + return 0;
>> +
>> +out_undelegate_tables:
>> + while (--i >= 0) {
>> + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
>> +
>> + WARN_ON(rmi_granule_undelegate(pgd_phys));
>> + }
>> + WARN_ON(rmi_granule_undelegate(rd_phys));
>> +out:
>> + free_page((unsigned long)rd);
>> + return r;
>> +}
>> +
>> +/* Protects access to rme_vmid_bitmap */
>> +static DEFINE_SPINLOCK(rme_vmid_lock);
>> +static unsigned long *rme_vmid_bitmap;
>> +
>> +static int rme_vmid_init(void)
>> +{
>> + unsigned int vmid_count = 1 << kvm_get_vmid_bits();
>> +
>> + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL);
>> + if (!rme_vmid_bitmap) {
>> + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__);
>> + return -ENOMEM;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int rme_vmid_reserve(void)
>> +{
>> + int ret;
>> + unsigned int vmid_count = 1 << kvm_get_vmid_bits();
>> +
>> + spin_lock(&rme_vmid_lock);
>> + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0);
>> + spin_unlock(&rme_vmid_lock);
>> +
>> + return ret;
>> +}
>> +
>> +static void rme_vmid_release(unsigned int vmid)
>> +{
>> + spin_lock(&rme_vmid_lock);
>> + bitmap_release_region(rme_vmid_bitmap, vmid, 0);
>> + spin_unlock(&rme_vmid_lock);
>> +}
>> +
>> +static int kvm_create_realm(struct kvm *kvm)
>> +{
>> + struct realm *realm = &kvm->arch.realm;
>> + int ret;
>> +
>> + if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE)
>> + return -EEXIST;
>> +
>> + ret = rme_vmid_reserve();
>> + if (ret < 0)
>> + return ret;
>> + realm->vmid = ret;
>> +
>> + ret = realm_create_rd(kvm);
>> + if (ret) {
>> + rme_vmid_release(realm->vmid);
>> + return ret;
>> + }
>> +
>> + WRITE_ONCE(realm->state, REALM_STATE_NEW);
>> +
>> + /* The realm is up, free the parameters. */
>> + free_page((unsigned long)realm->params);
>> + realm->params = NULL;
>> +
>> + return 0;
>> +}
>> +
>> +static int config_realm_hash_algo(struct realm *realm,
>> + struct kvm_cap_arm_rme_config_item *cfg)
>> +{
>> + switch (cfg->hash_algo) {
>> + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256:
>> + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256))
>> + return -EINVAL;
>> + break;
>> + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512:
>> + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512))
>> + return -EINVAL;
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>> + realm->params->measurement_algo = cfg->hash_algo;
>> + return 0;
>> +}
>> +
>> +static int config_realm_sve(struct realm *realm,
>> + struct kvm_cap_arm_rme_config_item *cfg)
>> +{
>> + u64 features_0 = realm->params->features_0;
>> + int max_sve_vq = u64_get_bits(rmm_feat_reg0,
>> + RMI_FEATURE_REGISTER_0_SVE_VL);
>> +
>> + if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN))
>> + return -EINVAL;
>> +
>> + if (cfg->sve_vq > max_sve_vq)
>> + return -EINVAL;
>> +
>> + features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN |
>> + RMI_FEATURE_REGISTER_0_SVE_VL);
>> + features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN);
>> + features_0 |= u64_encode_bits(cfg->sve_vq,
>> + RMI_FEATURE_REGISTER_0_SVE_VL);
>> +
>> + realm->params->features_0 = features_0;
>> + return 0;
>> +}
>> +
>> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
>> +{
>> + struct kvm_cap_arm_rme_config_item cfg;
>> + struct realm *realm = &kvm->arch.realm;
>> + int r = 0;
>> +
>> + if (kvm_realm_state(kvm) != REALM_STATE_NONE)
>> + return -EBUSY;
>> +
>> + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg)))
>> + return -EFAULT;
>> +
>> + switch (cfg.cfg) {
>> + case KVM_CAP_ARM_RME_CFG_RPV:
>> + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv));
>> + break;
>> + case KVM_CAP_ARM_RME_CFG_HASH_ALGO:
>> + r = config_realm_hash_algo(realm, &cfg);
>> + break;
>> + case KVM_CAP_ARM_RME_CFG_SVE:
>> + r = config_realm_sve(realm, &cfg);
>> + break;
>> + default:
>> + r = -EINVAL;
>> + }
>> +
>> + return r;
>> +}
>> +
>> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>> +{
>> + int r = 0;
>> +
>> + switch (cap->args[0]) {
>> + case KVM_CAP_ARM_RME_CONFIG_REALM:
>> + r = kvm_rme_config_realm(kvm, cap);
>> + break;
>> + case KVM_CAP_ARM_RME_CREATE_RD:
>> + if (kvm->created_vcpus) {
>> + r = -EBUSY;
>> + break;
>> + }
>> +
>> + r = kvm_create_realm(kvm);
>> + break;
>> + default:
>> + r = -EINVAL;
>> + break;
>> + }
>> +
>> + return r;
>> +}
>> +
>> +void kvm_destroy_realm(struct kvm *kvm)
>> +{
>> + struct realm *realm = &kvm->arch.realm;
>> + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
>> + unsigned int pgd_sz;
>> + int i;
>> +
>> + if (realm->params) {
>> + free_page((unsigned long)realm->params);
>> + realm->params = NULL;
>> + }
>> +
>> + if (kvm_realm_state(kvm) == REALM_STATE_NONE)
>> + return;
>> +
>> + WRITE_ONCE(realm->state, REALM_STATE_DYING);
>> +
>> + rme_vmid_release(realm->vmid);
>> +
>> + if (realm->rd) {
>> + phys_addr_t rd_phys = virt_to_phys(realm->rd);
>> +
>> + if (WARN_ON(rmi_realm_destroy(rd_phys)))
>> + return;
>> + if (WARN_ON(rmi_granule_undelegate(rd_phys)))
>> + return;
>> + free_page((unsigned long)realm->rd);
>> + realm->rd = NULL;
>> + }
>> +
>> + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
>> + for (i = 0; i < pgd_sz; i++) {
>> + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
>> +
>> + if (WARN_ON(rmi_granule_undelegate(pgd_phys)))
>> + return;
>> + }
>> +
>> + kvm_free_stage2_pgd(&kvm->arch.mmu);
>> +}
>> +
>> +int kvm_init_realm_vm(struct kvm *kvm)
>> +{
>> + struct realm_params *params;
>> +
>> + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL);
>> + if (!params)
>> + return -ENOMEM;
>> +
>> + params->features_0 = create_realm_feat_reg0(kvm);
>> + kvm->arch.realm.params = params;
>> + return 0;
>> +}
>> +
>> int kvm_init_rme(void)
>> {
>> + int ret;
>> +
>> if (PAGE_SIZE != SZ_4K)
>> /* Only 4k page size on the host is supported */
>> return 0;
>> @@ -43,6 +394,12 @@ int kvm_init_rme(void)
>> /* Continue without realm support */
>> return 0;
>>
>> + ret = rme_vmid_init();
>> + if (ret)
>> + return ret;
>> +
>> + WARN_ON(rmi_features(0, &rmm_feat_reg0));
>> +
>> /* Future patch will enable static branch kvm_rme_is_available */
>>
>> return 0;
>