Re: [PATCH 6/9] x86/kvm/mmu: make space for source data caching in struct kvm_mmu
From: Vitaly Kuznetsov
Date: Thu Sep 20 2018 - 04:13:00 EST
Sean Christopherson <sean.j.christopherson@xxxxxxxxx> writes:
> On Thu, 2018-08-02 at 12:01 +0200, Vitaly Kuznetsov wrote:
>> In preparation to MMU reconfiguration avoidance we need a space to
>> cache source data. As this partially intersects with kvm_mmu_page_role,
>> create 64bit sized union kvm_mmu_role holding both base_role and
>> extended data. No functional change.
>>
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
>> ---
>> Âarch/x86/include/asm/kvm_host.h | 14 +++++++++++++-
>> Âarch/x86/kvm/mmu.cÂÂÂÂÂÂÂÂÂÂÂÂÂÂ| 19 ++++++++++++-------
>> Âarch/x86/kvm/vmx.cÂÂÂÂÂÂÂÂÂÂÂÂÂÂ|ÂÂ2 +-
>> Â3 files changed, 26 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index c5f116f9783d..830166ab4d59 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -272,6 +272,18 @@ union kvm_mmu_page_role {
>> Â };
>> Â};
>> Â
>> +union kvm_mmu_scache {
>> + unsigned int word;
>> +};
>> +
>> +union kvm_mmu_role {
>> + unsigned long as_u64;
>
> No clue if it matters, but can't this be a u32 since both
> kvm_mmu_page_role andÂkvm_mmu_scache are capped at 16 bits?
kvm_mmu_page_role is 32 bits, even if we make 'smm' a 1-bit field and
throw away its padding we'll end up with 19 bits ...
(Generally speaking, I would prefer to leave some space for future
extension: we only have 3 'struct kvm_mmu' per vcpu so we're talking
about 12 bytes. We can, of course, change types here in future - this in
not a stable ABI or anything - but the one doing that will have to check
all users and this is always error-prone.)
>
> Tangentially related, it seems like we should have build-
> time asserts on the size of the unions since we're (ab)using
> union behavior to dereference the entire value in a single
> shot, e.g. base_role.word and mmu_role->as_u64.
(I was told that union aliasing is UB, however, it's all over kernel so
it's unlikely that gcc's behavior is ever going to change).
I like your idea, I'll add a couple of BUILD_BUG_ONs in v2. Thanks!
>
>> + struct {
>> + union kvm_mmu_page_role base_role;
>> + union kvm_mmu_scache scache;
>> + };
>> +};
>> +
>> Âstruct kvm_rmap_head {
>> Â unsigned long val;
>> Â};
>> @@ -359,7 +371,7 @@ struct kvm_mmu {
>> Â void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>> Â ÂÂÂu64 *spte, const void *pte);
>> Â hpa_t root_hpa;
>> - union kvm_mmu_page_role base_role;
>> + union kvm_mmu_role mmu_role;
>> Â u8 root_level;
>> Â u8 shadow_root_level;
>> Â u8 ept_ad;
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 85ec027299d6..c538e47e471b 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -2331,7 +2331,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>> Â int collisions = 0;
>> Â LIST_HEAD(invalid_list);
>> Â
>> - role = vcpu->arch.mmu->base_role;
>> + role = vcpu->arch.mmu->mmu_role.base_role;
>> Â role.level = level;
>> Â role.direct = direct;
>> Â if (role.direct)
>> @@ -4377,7 +4377,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
>> Âvoid
>> Âreset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
>> Â{
>> - bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
>> + bool uses_nx = context->nx ||
>> + context->mmu_role.base_role.smep_andnot_wp;
>> Â struct rsvd_bits_validate *shadow_zero_check;
>> Â int i;
>> Â
>> @@ -4696,7 +4697,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
>> Â{
>> Â struct kvm_mmu *context = vcpu->arch.mmu;
>> Â
>> - context->base_role.word = mmu_base_role_mask.word &
>> + context->mmu_role.base_role.word = mmu_base_role_mask.word &
>> Â ÂÂkvm_calc_tdp_mmu_root_page_role(vcpu).word;
>> Â context->page_fault = tdp_page_fault;
>> Â context->sync_page = nonpaging_sync_page;
>> @@ -4777,7 +4778,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
>> Â else
>> Â paging32_init_context(vcpu, context);
>> Â
>> - context->base_role.word = mmu_base_role_mask.word &
>> + context->mmu_role.base_role.word = mmu_base_role_mask.word &
>> Â ÂÂkvm_calc_shadow_mmu_root_page_role(vcpu).word;
>> Â reset_shadow_zero_bits_mask(vcpu, context);
>> Â}
>> @@ -4786,7 +4787,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
>> Âstatic union kvm_mmu_page_role
>> Âkvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
>> Â{
>> - union kvm_mmu_page_role role = vcpu->arch.mmu->base_role;
>> + union kvm_mmu_page_role role = vcpu->arch.mmu->mmu_role.base_role;
>> Â
>> Â role.level = PT64_ROOT_4LEVEL;
>> Â role.direct = false;
>> @@ -4816,7 +4817,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
>> Â context->update_pte = ept_update_pte;
>> Â context->root_level = PT64_ROOT_4LEVEL;
>> Â context->direct_map = false;
>> - context->base_role.word = root_page_role.word & mmu_base_role_mask.word;
>> + context->mmu_role.base_role.word =
>> + root_page_role.word & mmu_base_role_mask.word;
>> Â context->get_pdptr = kvm_pdptr_read;
>> Â
>> Â update_permission_bitmask(vcpu, context, true);
>> @@ -5131,10 +5133,13 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>> Â
>> Â local_flush = true;
>> Â while (npte--) {
>> + unsigned int base_role =
>> + vcpu->arch.mmu->mmu_role.base_role.word;
>> +
>> Â entry = *spte;
>> Â mmu_page_zap_pte(vcpu->kvm, sp, spte);
>> Â if (gentry &&
>> - ÂÂÂÂÂÂ!((sp->role.word ^ vcpu->arch.mmu->base_role.word)
>> + ÂÂÂÂÂÂ!((sp->role.word ^ base_role)
>> Â ÂÂÂÂÂÂ& mmu_base_role_mask.word) && rmap_can_add(vcpu))
>> Â mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
>> Â if (need_remote_flush(entry, *spte))
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 494148818b8d..0d41116bef1f 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -9028,7 +9028,7 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
>> Â
>> Â kvm_mmu_unload(vcpu);
>> Â mmu->ept_ad = accessed_dirty;
>> - mmu->base_role.ad_disabled = !accessed_dirty;
>> + mmu->mmu_role.base_role.ad_disabled = !accessed_dirty;
>> Â vmcs12->ept_pointer = address;
>> Â /*
>> Â Â* TODO: Check what's the correct approach in case
--
Vitaly