Re: [PATCH Part2 v5 37/45] KVM: SVM: Add support to handle MSR based Page State Change VMGEXIT

From: Dr. David Alan Gilbert
Date: Tue Sep 28 2021 - 05:56:48 EST


* Brijesh Singh (brijesh.singh@xxxxxxx) wrote:
> SEV-SNP VMs can ask the hypervisor to change the page state in the RMP
> table to be private or shared using the Page State Change MSR protocol
> as defined in the GHCB specification.
>
> Before changing the page state in the RMP entry, lookup the page in the
> NPT to make sure that there is a valid mapping for it. If the mapping
> exist then try to find a workable page level between the NPT and RMP for
> the page. If the page is not mapped in the NPT, then create a fault such
> that it gets mapped before we change the page state in the RMP entry.
>
> Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
> ---
> arch/x86/include/asm/sev-common.h | 9 ++
> arch/x86/kvm/svm/sev.c | 197 ++++++++++++++++++++++++++++++
> arch/x86/kvm/trace.h | 34 ++++++
> arch/x86/kvm/x86.c | 1 +
> 4 files changed, 241 insertions(+)
>
> diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
> index 91089967ab09..4980f77aa1d5 100644
> --- a/arch/x86/include/asm/sev-common.h
> +++ b/arch/x86/include/asm/sev-common.h
> @@ -89,6 +89,10 @@ enum psc_op {
> };
>
> #define GHCB_MSR_PSC_REQ 0x014
> +#define GHCB_MSR_PSC_GFN_POS 12
> +#define GHCB_MSR_PSC_GFN_MASK GENMASK_ULL(39, 0)
> +#define GHCB_MSR_PSC_OP_POS 52
> +#define GHCB_MSR_PSC_OP_MASK 0xf
> #define GHCB_MSR_PSC_REQ_GFN(gfn, op) \
> /* GHCBData[55:52] */ \
> (((u64)((op) & 0xf) << 52) | \
> @@ -98,6 +102,11 @@ enum psc_op {
> GHCB_MSR_PSC_REQ)
>
> #define GHCB_MSR_PSC_RESP 0x015
> +#define GHCB_MSR_PSC_ERROR_POS 32
> +#define GHCB_MSR_PSC_ERROR_MASK GENMASK_ULL(31, 0)
> +#define GHCB_MSR_PSC_ERROR GENMASK_ULL(31, 0)
> +#define GHCB_MSR_PSC_RSVD_POS 12
> +#define GHCB_MSR_PSC_RSVD_MASK GENMASK_ULL(19, 0)
> #define GHCB_MSR_PSC_RESP_VAL(val) \
> /* GHCBData[63:32] */ \
> (((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 991b8c996fc1..6d9483ec91ab 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -31,6 +31,7 @@
> #include "svm_ops.h"
> #include "cpuid.h"
> #include "trace.h"
> +#include "mmu.h"
>
> #define __ex(x) __kvm_handle_fault_on_reboot(x)
>
> @@ -2905,6 +2906,181 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
> svm->vmcb->control.ghcb_gpa = value;
> }
>
> +static int snp_rmptable_psmash(struct kvm *kvm, kvm_pfn_t pfn)
> +{
> + pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
> +
> + return psmash(pfn);
> +}
> +
> +static int snp_make_page_shared(struct kvm *kvm, gpa_t gpa, kvm_pfn_t pfn, int level)

....

> +
> + /*
> + * Mark the userspace range unmerable before adding the pages

^^^^^^^^^ typo

> + * in the RMP table.
> + */
> + mmap_write_lock(kvm->mm);
> + rc = snp_mark_unmergable(kvm, hva, page_level_size(level));
> + mmap_write_unlock(kvm->mm);
> + if (rc)
> + return -EINVAL;
> + }
> +
> + write_lock(&kvm->mmu_lock);
> +
> + rc = kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level);
> + if (!rc) {
> + /*
> + * This may happen if another vCPU unmapped the page
> + * before we acquire the lock. Retry the PSC.
> + */
> + write_unlock(&kvm->mmu_lock);
> + return 0;
> + }
> +
> + /*
> + * Adjust the level so that we don't go higher than the backing
> + * page level.
> + */
> + level = min_t(size_t, level, npt_level);
> +
> + trace_kvm_snp_psc(vcpu->vcpu_id, pfn, gpa, op, level);
> +
> + switch (op) {
> + case SNP_PAGE_STATE_SHARED:
> + rc = snp_make_page_shared(kvm, gpa, pfn, level);
> + break;
> + case SNP_PAGE_STATE_PRIVATE:
> + rc = rmp_make_private(pfn, gpa, level, sev->asid, false);

Minor nit; it seems a shame that snp_make_page_shared and
rmp_make_private both take gpa, pfn, level - in different orders.

Dave

> + break;
> + default:
> + rc = -EINVAL;
> + break;
> + }
> +
> + write_unlock(&kvm->mmu_lock);
> +
> + if (rc) {
> + pr_err_ratelimited("Error op %d gpa %llx pfn %llx level %d rc %d\n",
> + op, gpa, pfn, level, rc);
> + return rc;
> + }
> +
> + gpa = gpa + page_level_size(level);
> + }
> +
> + return 0;
> +}
> +
> static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
> {
> struct vmcb_control_area *control = &svm->vmcb->control;
> @@ -3005,6 +3181,27 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
> GHCB_MSR_INFO_POS);
> break;
> }
> + case GHCB_MSR_PSC_REQ: {
> + gfn_t gfn;
> + int ret;
> + enum psc_op op;
> +
> + gfn = get_ghcb_msr_bits(svm, GHCB_MSR_PSC_GFN_MASK, GHCB_MSR_PSC_GFN_POS);
> + op = get_ghcb_msr_bits(svm, GHCB_MSR_PSC_OP_MASK, GHCB_MSR_PSC_OP_POS);
> +
> + ret = __snp_handle_page_state_change(vcpu, op, gfn_to_gpa(gfn), PG_LEVEL_4K);
> +
> + if (ret)
> + set_ghcb_msr_bits(svm, GHCB_MSR_PSC_ERROR,
> + GHCB_MSR_PSC_ERROR_MASK, GHCB_MSR_PSC_ERROR_POS);
> + else
> + set_ghcb_msr_bits(svm, 0,
> + GHCB_MSR_PSC_ERROR_MASK, GHCB_MSR_PSC_ERROR_POS);
> +
> + set_ghcb_msr_bits(svm, 0, GHCB_MSR_PSC_RSVD_MASK, GHCB_MSR_PSC_RSVD_POS);
> + set_ghcb_msr_bits(svm, GHCB_MSR_PSC_RESP, GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS);
> + break;
> + }
> case GHCB_MSR_TERM_REQ: {
> u64 reason_set, reason_code;
>
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index 1c360e07856f..35ca1cf8440a 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -7,6 +7,7 @@
> #include <asm/svm.h>
> #include <asm/clocksource.h>
> #include <asm/pvclock-abi.h>
> +#include <asm/sev-common.h>
>
> #undef TRACE_SYSTEM
> #define TRACE_SYSTEM kvm
> @@ -1711,6 +1712,39 @@ TRACE_EVENT(kvm_vmgexit_msr_protocol_exit,
> __entry->vcpu_id, __entry->ghcb_gpa, __entry->result)
> );
>
> +/*
> + * Tracepoint for the SEV-SNP page state change processing
> + */
> +#define psc_operation \
> + {SNP_PAGE_STATE_PRIVATE, "private"}, \
> + {SNP_PAGE_STATE_SHARED, "shared"} \
> +
> +TRACE_EVENT(kvm_snp_psc,
> + TP_PROTO(unsigned int vcpu_id, u64 pfn, u64 gpa, u8 op, int level),
> + TP_ARGS(vcpu_id, pfn, gpa, op, level),
> +
> + TP_STRUCT__entry(
> + __field(int, vcpu_id)
> + __field(u64, pfn)
> + __field(u64, gpa)
> + __field(u8, op)
> + __field(int, level)
> + ),
> +
> + TP_fast_assign(
> + __entry->vcpu_id = vcpu_id;
> + __entry->pfn = pfn;
> + __entry->gpa = gpa;
> + __entry->op = op;
> + __entry->level = level;
> + ),
> +
> + TP_printk("vcpu %u, pfn %llx, gpa %llx, op %s, level %d",
> + __entry->vcpu_id, __entry->pfn, __entry->gpa,
> + __print_symbolic(__entry->op, psc_operation),
> + __entry->level)
> +);
> +
> #endif /* _TRACE_KVM_H */
>
> #undef TRACE_INCLUDE_PATH
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e5d5c5ed7dd4..afcdc75a99f2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12371,3 +12371,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_snp_psc);
> --
> 2.17.1
>
>
--
Dr. David Alan Gilbert / dgilbert@xxxxxxxxxx / Manchester, UK