Re: [PATCH v2 2/5] KVM: SVM: Always intercept RDMSR for TMCCT (current APIC timer count)
From: Sean Christopherson
Date: Thu May 07 2026 - 11:46:48 EST
On Thu, May 07, 2026, Naveen N Rao wrote:
> On Wed, May 06, 2026 at 11:47:43AM -0700, Sean Christopherson wrote:
> > Explicitly intercept RDMSR for TMMCT, a.k.a. the current APIC timer count,
> > when x2AVIC is enabled, as TMMCT reads aren't accelerated by hardware.
>
> s/TMMCT/TMCCT for the above two lines.
>
> > Disabling interception is suboptimal as the RDMSR generates an
> > AVIC_UNACCELERATED_ACCESS fault #VMEXIT, which forces KVM to decode the
> > instruction to figure out what the guest was trying to access.
> >
> > Note, the only reason this isn't a fatal bug is that the AVIC architecture
> > had the foresight to guard against buggy hypervisors. E.g. if hardware
> > simply read from the virtual APIC page, the guest would get garbage.
> >
> > Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode")
> > Cc: stable@xxxxxxxxxxxxxxx
> > Cc: Naveen N Rao (AMD) <naveen@xxxxxxxxxx>
> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> > ---
> > arch/x86/kvm/svm/avic.c | 3 +++
> > 1 file changed, 3 insertions(+)
> >
> > diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> > index 4f203e503e8e..d693c9ff9f18 100644
> > --- a/arch/x86/kvm/svm/avic.c
> > +++ b/arch/x86/kvm/svm/avic.c
> > @@ -172,6 +172,9 @@ static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
> > svm_set_intercept_for_msr(vcpu, APIC_BASE_MSR + i,
> > MSR_TYPE_R, intercept);
> >
> > + if (!intercept)
> > + svm_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
> > +
>
> Nit: I'm thinking it might be better to roll this into the previous
> loop. That way, all MSR_TYPE_R intercepts are setup in one place and we
> don't need to parse the if (!intercept) condition..
>
> Something like this?
>
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index c5d46c0d2403..f292cba45e07 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -136,11 +136,9 @@ static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
>
> for_each_set_bit(i, (unsigned long *)&x2apic_readable_mask,
> BITS_PER_TYPE(x2apic_readable_mask))
> - svm_set_intercept_for_msr(vcpu, APIC_BASE_MSR + i,
> - MSR_TYPE_R, intercept);
> -
> - if (!intercept)
> - svm_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
> + if (APIC_BASE_MSR + i != X2APIC_MSR(APIC_TMCCT))
> + svm_set_intercept_for_msr(vcpu, APIC_BASE_MSR + i,
> + MSR_TYPE_R, intercept);
Hmm, I don't love burying the check in the for-loop, as it makes it a bit hard to
see that KVM *never* toggles the intercept for TMCCT. And in the unlikely scenario
we need to exempt more MSRs, this will be annoying to maintain.
What about adjusting the mask straightaway?
diff --git arch/x86/kvm/lapic.h arch/x86/kvm/lapic.h
index 274885af4ebc..2842a7b70d74 100644
--- arch/x86/kvm/lapic.h
+++ arch/x86/kvm/lapic.h
@@ -156,6 +156,8 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_exit(void);
+#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
+
u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic);
static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic)
diff --git arch/x86/kvm/svm/avic.c arch/x86/kvm/svm/avic.c
index c5d46c0d2403..e51f468477a4 100644
--- arch/x86/kvm/svm/avic.c
+++ arch/x86/kvm/svm/avic.c
@@ -132,16 +132,14 @@ static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
if (!x2avic_enabled)
return;
- x2apic_readable_mask = kvm_lapic_readable_reg_mask(vcpu->arch.apic);
+ x2apic_readable_mask = kvm_lapic_readable_reg_mask(vcpu->arch.apic) &
+ ~APIC_REG_MASK(APIC_TMCCT);
for_each_set_bit(i, (unsigned long *)&x2apic_readable_mask,
BITS_PER_TYPE(x2apic_readable_mask))
svm_set_intercept_for_msr(vcpu, APIC_BASE_MSR + i,
MSR_TYPE_R, intercept);
- if (!intercept)
- svm_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
-
svm_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_W, intercept);
svm_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W, intercept);
svm_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W, intercept);
Oh! Actually, even better! This is a great opportunity to dedup Intel vs. AMD
(and we can/should do the same for writes).
diff --git arch/x86/kvm/lapic.c arch/x86/kvm/lapic.c
index 4078e624ca66..ac57d6dbe032 100644
--- arch/x86/kvm/lapic.c
+++ arch/x86/kvm/lapic.c
@@ -1730,7 +1730,7 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
#define APIC_REGS_MASK(first, count) \
(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
-u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic)
+static u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic)
{
/* Leave bits '0' for reserved and write-only registers. */
u64 valid_reg_mask =
@@ -1766,7 +1766,22 @@ u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic)
return valid_reg_mask;
}
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_readable_reg_mask);
+
+u64 kvm_x2apic_disable_intercept_reg_mask(struct kvm_vcpu *vcpu)
+{
+ if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)))
+ return 0;
+
+ /*
+ * TMMCT, a.k.a. the current APIC timer count, reads aren't accelerated
+ * by hardware (Intel or AMD), and handling the fault-like APIC-access
+ * VM-Exit is more expensive than handling a WRMSR VM-Exit (because the
+ * APIC-access path requires slow emulation of the code stream).
+ */
+ return kvm_lapic_readable_reg_mask(vcpu->arch.apic) &
+ ~APIC_REG_MASK(APIC_TMCCT);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x2apic_disable_intercept_reg_mask);
static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data)
diff --git arch/x86/kvm/lapic.h arch/x86/kvm/lapic.h
index 274885af4ebc..7b61167e1b5c 100644
--- arch/x86/kvm/lapic.h
+++ arch/x86/kvm/lapic.h
@@ -156,7 +156,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_exit(void);
-u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic);
+u64 kvm_x2apic_disable_intercept_reg_mask(struct kvm_vcpu *vcpu);
static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic)
{
diff --git arch/x86/kvm/svm/avic.c arch/x86/kvm/svm/avic.c
index c5d46c0d2403..1532bfff5686 100644
--- arch/x86/kvm/svm/avic.c
+++ arch/x86/kvm/svm/avic.c
@@ -132,16 +132,13 @@ static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
if (!x2avic_enabled)
return;
- x2apic_readable_mask = kvm_lapic_readable_reg_mask(vcpu->arch.apic);
+ x2apic_readable_mask = kvm_x2apic_disable_intercept_reg_mask(vcpu);
for_each_set_bit(i, (unsigned long *)&x2apic_readable_mask,
BITS_PER_TYPE(x2apic_readable_mask))
svm_set_intercept_for_msr(vcpu, APIC_BASE_MSR + i,
MSR_TYPE_R, intercept);
- if (!intercept)
- svm_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
-
svm_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_W, intercept);
svm_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W, intercept);
svm_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W, intercept);
diff --git arch/x86/kvm/vmx/vmx.c arch/x86/kvm/vmx/vmx.c
index 859f4bc01445..40dff50e800a 100644
--- arch/x86/kvm/vmx/vmx.c
+++ arch/x86/kvm/vmx/vmx.c
@@ -4138,7 +4138,7 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
* mode, only the current timer count needs on-demand emulation by KVM.
*/
if (mode & MSR_BITMAP_MODE_X2APIC_APICV)
- msr_bitmap[read_idx] = ~kvm_lapic_readable_reg_mask(vcpu->arch.apic);
+ msr_bitmap[read_idx] = ~kvm_x2apic_disable_intercept_reg_mask(vcpu);
else
msr_bitmap[read_idx] = ~0ull;
msr_bitmap[write_idx] = ~0ull;
@@ -4151,7 +4151,6 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
!(mode & MSR_BITMAP_MODE_X2APIC));
if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
- vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW);
vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
if (enable_ipiv)