Re: [PATCH 09/13] KVM: x86: save/load state on SMM switch

From: Bandan Das
Date: Tue May 05 2015 - 16:44:47 EST


Paolo Bonzini <pbonzini@xxxxxxxxxx> writes:

> +static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpuid_entry2 *best;
> +
> + best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
> + return best && (best->edx & bit(X86_FEATURE_LM));
> +}
> +

We could combine all guest_cpuid_has* functions into a single
function guest_cpuid_has_feature(vcpu, feature) and avoid code duplication.
Not relevant to this change - just a note (to self).

> static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
> {
> struct kvm_cpuid_entry2 *best;
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index f6b641207416..a49606871277 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -2262,12 +2262,253 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
> return rc;
> }
>
> +static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
> +{
> + u32 eax, ebx, ecx, edx;
> +
> + eax = 0x80000001;
> + ecx = 0;
> + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
> + return edx & bit(X86_FEATURE_LM);
> +}
> +
> +#define get_smstate(type, smbase, offset) \
> + ({ \
> + type __val; \
> + int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \
> + sizeof(__val), NULL); \
> + if (r != X86EMUL_CONTINUE) \
> + return X86EMUL_UNHANDLEABLE; \
> + __val; \
> + })
> +
> +static void rsm_set_desc_flags(struct desc_struct *desc, u16 flags)
> +{
> + desc->g = (flags >> 15) & 1;
> + desc->d = (flags >> 14) & 1;
> + desc->l = (flags >> 13) & 1;
> + desc->avl = (flags >> 12) & 1;
> + desc->p = (flags >> 7) & 1;
> + desc->dpl = (flags >> 5) & 3;
> + desc->s = (flags >> 4) & 1;
> + desc->type = flags & 15;
> +}
> +
> +static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
> +{
> + struct desc_struct desc;
> + int offset;
> + u16 selector;
> +
> + selector = get_smstate(u32, smbase, 0x7fa8 + n * 4);
Probably a good idea to use #defines for all the offsets here
and elsewhere.

> +
> + if (n < 3)
> + offset = 0x7f84 + n * 12;
> + else
> + offset = 0x7f2c + (n - 3) * 12;
> +
> + set_desc_base(&desc, get_smstate(u32, smbase, offset + 8));
> + set_desc_limit(&desc, get_smstate(u32, smbase, offset + 4));
> + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, offset));
> + ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
> + return X86EMUL_CONTINUE;
> +}
> +
> +static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
> +{
> + struct desc_struct desc;
> + int offset;
> + u16 selector;
> + u32 base3;
> +
> + offset = 0x7e00 + n * 16;
> +
> + selector = get_smstate(u16, smbase, offset);
> + rsm_set_desc_flags(&desc, get_smstate(u16, smbase, offset + 2));
> + set_desc_limit(&desc, get_smstate(u32, smbase, offset + 4));
> + set_desc_base(&desc, get_smstate(u32, smbase, offset + 8));
> + base3 = get_smstate(u32, smbase, offset + 12);
> +
> + ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
> + return X86EMUL_CONTINUE;
> +}
> +
> +static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
> + u64 cr0, u64 cr4)
> +{
> + int bad;
> +
> + /*
> + * First enable PAE, long mode needs it before CR0.PG = 1 is set.
> + * Then enable protected mode. However, PCID cannot be enabled
> + * if EFER.LMA=0, so set it separately.
> + */
> + bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> + if (bad)
> + return X86EMUL_UNHANDLEABLE;
> +
> + bad = ctxt->ops->set_cr(ctxt, 0, cr0);
> + if (bad)
> + return X86EMUL_UNHANDLEABLE;
> +
> + if (cr4 & X86_CR4_PCIDE) {
> + bad = ctxt->ops->set_cr(ctxt, 4, cr4);
> + if (bad)
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + return X86EMUL_CONTINUE;
> +}
> +
> +static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
> +{
> + struct desc_struct desc;
> + struct desc_ptr dt;
> + u16 selector;
> + u32 val, cr0, cr4;
> + int i;
> +
> + cr0 = get_smstate(u32, smbase, 0x7ffc);
> + ctxt->ops->set_cr(ctxt, 3, get_smstate(u32, smbase, 0x7ff8));
> + ctxt->eflags = get_smstate(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
> + ctxt->_eip = get_smstate(u32, smbase, 0x7ff0);
> +
> + for (i = 0; i < 8; i++)
> + *reg_write(ctxt, i) = get_smstate(u32, smbase, 0x7fd0 + i * 4);
> +
> + val = get_smstate(u32, smbase, 0x7fcc);
> + ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
> + val = get_smstate(u32, smbase, 0x7fc8);
> + ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
> +
> + selector = get_smstate(u32, smbase, 0x7fc4);
> + set_desc_base(&desc, get_smstate(u32, smbase, 0x7f64));
> + set_desc_limit(&desc, get_smstate(u32, smbase, 0x7f60));
> + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, 0x7f5c));
> + ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
> +
> + selector = get_smstate(u32, smbase, 0x7fc0);
> + set_desc_base(&desc, get_smstate(u32, smbase, 0x7f80));
> + set_desc_limit(&desc, get_smstate(u32, smbase, 0x7f7c));
> + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, 0x7f78));
> + ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
> +
> + dt.address = get_smstate(u32, smbase, 0x7f74);
> + dt.size = get_smstate(u32, smbase, 0x7f70);
> + ctxt->ops->set_gdt(ctxt, &dt);
> +
> + dt.address = get_smstate(u32, smbase, 0x7f58);
> + dt.size = get_smstate(u32, smbase, 0x7f54);
> + ctxt->ops->set_idt(ctxt, &dt);
> +
> + for (i = 0; i < 6; i++) {
> + int r = rsm_load_seg_32(ctxt, smbase, i);
> + if (r != X86EMUL_CONTINUE)
> + return r;
This return is redundant since rsm_load_seg_32 always returns
success. Same for rsm_load_seg_64()

> + }
> +
> + cr4 = get_smstate(u32, smbase, 0x7f14);
> +
> + ctxt->ops->set_smbase(ctxt, get_smstate(u32, smbase, 0x7ef8));
> +
> + return rsm_enter_protected_mode(ctxt, cr0, cr4);
> +}
> +
> +static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
> +{
> + struct desc_struct desc;
> + struct desc_ptr dt;
> + u64 val, cr0, cr4;
> + u32 base3;
> + u16 selector;
> + int i;
> +
> + for (i = 0; i < 16; i++)
> + *reg_write(ctxt, i) = get_smstate(u64, smbase, 0x7ff8 - i * 8);
> +
> + ctxt->_eip = get_smstate(u64, smbase, 0x7f78);
> + ctxt->eflags = get_smstate(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
> +
> + val = get_smstate(u32, smbase, 0x7f68);
> + ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
> + val = get_smstate(u32, smbase, 0x7f60);
> + ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
> +
> + cr0 = get_smstate(u64, smbase, 0x7f58);
> + ctxt->ops->set_cr(ctxt, 3, get_smstate(u64, smbase, 0x7f50));
> + cr4 = get_smstate(u64, smbase, 0x7f48);
> + ctxt->ops->set_smbase(ctxt, get_smstate(u32, smbase, 0x7f00));
> + val = get_smstate(u64, smbase, 0x7ed0);
> + ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
> +
> + selector = get_smstate(u32, smbase, 0x7e90);
> + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, 0x7e92));
> + set_desc_limit(&desc, get_smstate(u32, smbase, 0x7e94));
> + set_desc_base(&desc, get_smstate(u32, smbase, 0x7e98));
> + base3 = get_smstate(u32, smbase, 0x7e9c);
> + ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
> +
> + dt.size = get_smstate(u32, smbase, 0x7e84);
> + dt.address = get_smstate(u64, smbase, 0x7e88);
> + ctxt->ops->set_idt(ctxt, &dt);
> +
> + selector = get_smstate(u32, smbase, 0x7e70);
> + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, 0x7e72));
> + set_desc_limit(&desc, get_smstate(u32, smbase, 0x7e74));
> + set_desc_base(&desc, get_smstate(u32, smbase, 0x7e78));
> + base3 = get_smstate(u32, smbase, 0x7e7c);
> + ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
> +
> + dt.size = get_smstate(u32, smbase, 0x7e64);
> + dt.address = get_smstate(u64, smbase, 0x7e68);
> + ctxt->ops->set_gdt(ctxt, &dt);
> +
> + for (i = 0; i < 6; i++) {
> + int r = rsm_load_seg_64(ctxt, smbase, i);
> + if (r != X86EMUL_CONTINUE)
> + return r;
> + }
> +
> + return rsm_enter_protected_mode(ctxt, cr0, cr4);
> +}
> +
> static int em_rsm(struct x86_emulate_ctxt *ctxt)
> {
> + unsigned long cr0, cr4, efer;
> + u64 smbase;
> + int ret;
> +
> + printk("rsm\n");
> if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
> return emulate_ud(ctxt);
>
> - return X86EMUL_UNHANDLEABLE;
> + /*
> + * Get back to real mode, to prepare a safe state in which
> + * to load CR0/CR3/CR4/EFER. Also this will ensure that
> + * addresses passed to read_std/write_std are not virtual.
> + */
I am trying to understand this. Aren't we are already in real mode
here since we are in smm or am I missing something..

> + cr0 = ctxt->ops->get_cr(ctxt, 0);
> + if (cr0 & X86_CR0_PE)
> + ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
> + cr4 = ctxt->ops->get_cr(ctxt, 4);
> + if (cr0 & X86_CR4_PAE)
> + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
> + efer = 0;
> + ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
> +
> + ctxt->ops->get_msr(ctxt, MSR_IA32_SMBASE, &smbase);
> + if (emulator_has_longmode(ctxt))
> + ret = rsm_load_state_64(ctxt, smbase + 0x8000);
> + else
> + ret = rsm_load_state_32(ctxt, smbase + 0x8000);
> +
> + if (ret != X86EMUL_CONTINUE) {
> + /* FIXME: should triple fault */
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
> + return X86EMUL_CONTINUE;
> }
>
> static void
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4cd7a2a18e93..ab6a38617813 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6232,12 +6232,226 @@ static void process_nmi(struct kvm_vcpu *vcpu)
> kvm_make_request(KVM_REQ_EVENT, vcpu);
> }
>
> +#define put_smstate(type, buf, offset, val) \
> + *(type *)((buf) + (offset) - 0x7e00) = val
> +
> +static u16 process_smi_get_segment_flags(struct kvm_segment *seg)
> +{
> + u16 flags = 0;
> + flags |= seg->g << 15;
> + flags |= seg->db << 14;
> + flags |= seg->l << 13;
> + flags |= seg->avl << 12;
> + flags |= seg->present << 7;
> + flags |= seg->dpl << 5;
> + flags |= seg->s << 4;
> + flags |= seg->type;
> + return flags;
> +}
> +
> +static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
> +{
> + struct kvm_segment seg;
> + int offset;
> +
> + kvm_get_segment(vcpu, &seg, n);
> + put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
> +
> + if (n < 3)
> + offset = 0x7f84 + n * 12;
> + else
> + offset = 0x7f2c + (n - 3) * 12;
> +
> + put_smstate(u32, buf, offset + 8, seg.base);
> + put_smstate(u32, buf, offset + 4, seg.limit);
> + put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
> +}
> +
> +static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
> +{
> + struct kvm_segment seg;
> + int offset;
> +
> + kvm_get_segment(vcpu, &seg, n);
> + offset = 0x7e00 + n * 16;
> +
> + put_smstate(u16, buf, offset, seg.selector);
> + put_smstate(u16, buf, offset + 2, process_smi_get_segment_flags(&seg));
> + put_smstate(u32, buf, offset + 4, seg.limit);
> + put_smstate(u64, buf, offset + 8, seg.base);
> +}
> +
> +static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
> +{
> + struct desc_ptr dt;
> + struct kvm_segment seg;
> + unsigned long val;
> + int i;
> +
> + put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
> + put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
> + put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
> + put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
> +
> + for (i = 0; i < 8; i++)
> + put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
> +
> + kvm_get_dr(vcpu, 6, &val);
> + put_smstate(u32, buf, 0x7fcc, (u32)val);
> + kvm_get_dr(vcpu, 7, &val);
> + put_smstate(u32, buf, 0x7fc8, (u32)val);
> +
> + kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
> + put_smstate(u32, buf, 0x7fc4, seg.selector);
> + put_smstate(u32, buf, 0x7f64, seg.base);
> + put_smstate(u32, buf, 0x7f60, seg.limit);
> + put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
> +
> + kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
> + put_smstate(u32, buf, 0x7fc0, seg.selector);
> + put_smstate(u32, buf, 0x7f80, seg.base);
> + put_smstate(u32, buf, 0x7f7c, seg.limit);
> + put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
> +
> + kvm_x86_ops->get_gdt(vcpu, &dt);
> + put_smstate(u32, buf, 0x7f74, dt.address);
> + put_smstate(u32, buf, 0x7f70, dt.size);
> +
> + kvm_x86_ops->get_idt(vcpu, &dt);
> + put_smstate(u32, buf, 0x7f58, dt.address);
> + put_smstate(u32, buf, 0x7f54, dt.size);
> +
> + for (i = 0; i < 6; i++)
> + process_smi_save_seg_32(vcpu, buf, i);
> +
> + put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
> +
> + /* revision id */
> + put_smstate(u32, buf, 0x7efc, 0x00020000);
> + put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
> +}
> +
> +static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
> +{
> +#ifdef CONFIG_X86_64
> + struct desc_ptr dt;
> + struct kvm_segment seg;
> + unsigned long val;
> + int i;
> +
> + for (i = 0; i < 16; i++)
> + put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
> +
> + put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
> + put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
> +
> + kvm_get_dr(vcpu, 6, &val);
> + put_smstate(u64, buf, 0x7f68, val);
> + kvm_get_dr(vcpu, 7, &val);
> + put_smstate(u64, buf, 0x7f60, val);
> +
> + put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
> + put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
> + put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
> +
> + put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
> +
> + /* revision id */
> + put_smstate(u32, buf, 0x7efc, 0x00020064);
Is the revision id (and 0x00020000 for process_smi*_32()) from the
spec ? I can't seem to find them.

Bandan
...
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/