[PATCH v2] KVM: SVM: Add support for AMD's OSVW feature in guests

From: Boris Ostrovsky
Date: Tue Jan 03 2012 - 23:45:12 EST


From: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>

In some cases guests should not provide workarounds for errata even when the
physical processor is affected. For example, because of erratum 400 on family
10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
going to idle in order to avoid getting stuck in a non-C0 state. This is not
necessary: HLT and IO instructions are intercepted and therefore there is no
reason for erratum 400 workaround in the guest.

This patch allows us to present a guest with certain errata as fixed,
regardless of the state of actual hardware.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 6 ++++
arch/x86/kvm/svm.c | 55 +++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 30 ++++++++++++++++++++-
3 files changed, 90 insertions(+), 1 deletions(-)

Second version of OSVW patch. svm_init_osvw() is still in svm.c but registers'
values are kept in virtual MSRs and should be available on reboot after
cross-vendor migration. The registers can also be set from userland so
that they are consistent across a cluster.


diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b4973f4..9ef9215 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -452,6 +452,12 @@ struct kvm_vcpu_arch {
u32 id;
bool send_user_only;
} apf;
+
+ /* OSVW MSRs (AMD only) */
+ struct {
+ u64 length;
+ u64 status;
+ } osvw;
};

struct kvm_arch {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e32243e..b19769d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -110,6 +110,13 @@ struct nested_state {
#define MSRPM_OFFSETS 16
static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;

+/*
+ * Set osvw_len to higher value when updated Revision Guides
+ * are published and we know what the new status bits are
+ */
+static uint64_t osvw_len = 4, osvw_status;
+static DEFINE_SPINLOCK(svm_lock);
+
struct vcpu_svm {
struct kvm_vcpu vcpu;
struct vmcb *vmcb;
@@ -556,6 +563,20 @@ static void svm_init_erratum_383(void)
erratum_383_found = true;
}

+static void svm_init_osvw(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Guests should see errata 400 and 415 as fixed (assuming that
+ * HLT and IO instructions are intercepted).
+ */
+ vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
+ vcpu->arch.osvw.status = osvw_status & ~(6ULL);
+
+ /* Mark erratum 298 as present */
+ if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
+ vcpu->arch.osvw.status |= 1;
+}
+
static int has_svm(void)
{
const char *msg;
@@ -620,6 +641,38 @@ static int svm_hardware_enable(void *garbage)
__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
}

+
+ /*
+ * Get OSVW bits.
+ *
+ * Note that it is possible to have a system with mixed processor
+ * revisions and therefore different OSVW bits. If bits are not the same
+ * on different processors then choose the worst case (i.e. if erratum
+ * is present on one processor and not on another then assume that the
+ * erratum is present everywhere).
+ */
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
+ uint64_t len, status;
+ int err;
+
+ len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
+ if (!err)
+ status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
+ &err);
+
+ spin_lock(&svm_lock);
+ if (err)
+ osvw_status = osvw_len = 0;
+ else {
+ if (len < osvw_len)
+ osvw_len = len;
+ osvw_status |= status;
+ osvw_status &= (1ULL << osvw_len) - 1;
+ }
+ spin_unlock(&svm_lock);
+ } else
+ osvw_status = osvw_len = 0;
+
svm_init_erratum_383();

return 0;
@@ -1185,6 +1238,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;

+ svm_init_osvw(&svm->vcpu);
+
return &svm->vcpu;

free_page4:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4c938da..49a7b3c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -598,6 +598,14 @@ static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
}

+static bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ return best && (best->ecx & bit(X86_FEATURE_OSVW));
+}
+
static void update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -1700,6 +1708,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
*/
pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
break;
+ case MSR_AMD64_OSVW_ID_LENGTH:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ vcpu->arch.osvw.length = data;
+ break;
+ case MSR_AMD64_OSVW_STATUS:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ vcpu->arch.osvw.status = data;
+ break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
@@ -1978,6 +1996,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
*/
data = 0xbe702111;
break;
+ case MSR_AMD64_OSVW_ID_LENGTH:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ data = vcpu->arch.osvw.length;
+ break;
+ case MSR_AMD64_OSVW_STATUS:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ data = vcpu->arch.osvw.status;
+ break;
default:
if (!ignore_msrs) {
pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -2451,7 +2479,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_supported_word6_x86_features =
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
- F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
+ F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);

/* cpuid 0xC0000001.edx */
--
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/