Re: [EXTERNAL] There is a null-ptr-deref bug in kvm_dirty_ring_get in virt/kvm/dirty_ring.c

From: David Woodhouse
Date: Tue Nov 16 2021 - 12:07:19 EST


On Tue, 2021-11-16 at 16:22 +0000, David Woodhouse wrote:
>
> I suppose we could make it a KVM_XEN_VCPU_SET_ATTR instead, and thus
> associate it with a particular CPU at least for the initial wallclock
> write?
>

We'd end up needing to do all this too, to plumb that 'vcpu' through to
the actual mark_page_dirty_in_slot(). And I might end up wanting to
kill kvm_write_guest() et al completely. If we're never supposed to be
writing without a vCPU associated with the write, then we should always
use kvm_vcpu_write_guest(), shouldn't we?

Paolo, what do you think? Want me to finish and test this and submit
it, along with changing the shinfo address to a per-vCPU thing?

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 326cdfec74a1..d8411ce4db4b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1143,7 +1143,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
/* Mark the page dirty only if the fault is handled successfully */
if (writable && !ret) {
kvm_set_pfn_dirty(pfn);
- mark_page_dirty_in_slot(kvm, memslot, gfn);
+ mark_page_dirty_in_slot(kvm, vcpu, memslot, gfn);
}

out_unlock:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 33794379949e..4fd2ad5327b6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3090,7 +3090,7 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
return false;

if (is_writable_pte(new_spte) && !is_writable_pte(old_spte))
- mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn);
+ mark_page_dirty_in_slot(vcpu->kvm, vcpu, fault->slot, fault->gfn);

return true;
}
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 0c76c45fdb68..0598515f3ae2 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -184,7 +184,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
/* Enforced by kvm_mmu_hugepage_adjust. */
WARN_ON(level > PG_LEVEL_4K);
- mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
+ mark_page_dirty_in_slot(vcpu->kvm, vcpu, slot, gfn);
}

*new_spte = spte;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index a54c3491af42..c5669c9918a4 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -247,7 +247,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
if ((!is_writable_pte(old_spte) || pfn_changed) &&
is_writable_pte(new_spte)) {
slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn);
- mark_page_dirty_in_slot(kvm, slot, gfn);
+ mark_page_dirty_in_slot(kvm, NULL, slot, gfn);
}
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a879e4d08758..c14ce545fae9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2118,7 +2118,7 @@ static s64 get_kvmclock_base_ns(void)
}
#endif

-void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
+void kvm_write_wall_clock(struct kvm_vcpu *vcpu, gpa_t wall_clock, int sec_hi_ofs)
{
int version;
int r;
@@ -2129,7 +2129,7 @@ void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
if (!wall_clock)
return;

- r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
+ r = kvm_vcpu_read_guest(vcpu, wall_clock, &version, sizeof(version));
if (r)
return;

@@ -2138,7 +2138,7 @@ void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)

++version;

- if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
+ if (kvm_vcpu_write_guest(vcpu, wall_clock, &version, sizeof(version)))
return;

/*
@@ -2146,22 +2146,22 @@ void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
* system time (updated by kvm_guest_time_update below) to the
* wall clock specified here. We do the reverse here.
*/
- wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
+ wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(vcpu->kvm);

wc.nsec = do_div(wall_nsec, 1000000000);
wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */
wc.version = version;

- kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
+ kvm_vcpu_write_guest(vcpu, wall_clock, &wc, sizeof(wc));

if (sec_hi_ofs) {
wc_sec_hi = wall_nsec >> 32;
- kvm_write_guest(kvm, wall_clock + sec_hi_ofs,
- &wc_sec_hi, sizeof(wc_sec_hi));
+ kvm_vcpu_write_guest(vcpu, wall_clock + sec_hi_ofs,
+ &wc_sec_hi, sizeof(wc_sec_hi));
}

version++;
- kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+ kvm_vcpu_write_guest(vcpu, wall_clock, &version, sizeof(version));
}

static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
@@ -3353,7 +3353,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
out:
user_access_end();
dirty:
- mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
+ mark_page_dirty_in_slot(vcpu->kvm, vcpu, ghc->memslot, gpa_to_gfn(ghc->gpa));
}

int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3494,14 +3494,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;

vcpu->kvm->arch.wall_clock = data;
- kvm_write_wall_clock(vcpu->kvm, data, 0);
+ kvm_write_wall_clock(vcpu, data, 0);
break;
case MSR_KVM_WALL_CLOCK:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
return 1;

vcpu->kvm->arch.wall_clock = data;
- kvm_write_wall_clock(vcpu->kvm, data, 0);
+ kvm_write_wall_clock(vcpu, data, 0);
break;
case MSR_KVM_SYSTEM_TIME_NEW:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
@@ -4421,7 +4421,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;

- mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
+ mark_page_dirty_in_slot(vcpu->kvm, vcpu, ghc->memslot, gpa_to_gfn(ghc->gpa));
}

void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index ea264c4502e4..f1dab3413fc8 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -294,7 +294,7 @@ static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
return is_smm(vcpu) || static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
}

-void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs);
+void kvm_write_wall_clock(struct kvm_vcpu *vcpu, gpa_t wall_clock, int sec_hi_ofs);
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);

u64 get_kvmclock_ns(struct kvm *kvm);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 7a58df25c9b2..e7b0c0af807d 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -37,7 +37,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)

ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, true, gpa,
PAGE_SIZE, true);
- if (ret)
+ if (ret && !kvm->vcpus[0])
goto out;

/* Paranoia checks on the 32-bit struct layout */
@@ -60,7 +60,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
}
#endif

- kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs);
+ kvm_write_wall_clock(kvm->vcpus[0], gpa + wc_ofs, sec_hi_ofs - wc_ofs);
kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);

out:
@@ -316,7 +316,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
err:
user_access_end();

- mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+ mark_page_dirty_in_slot(v->kvm, v, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
} else {
__get_user(rc, (u8 __user *)ghc->hva + offset);
}
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index 4da8d4a4140b..f3be974f9c5a 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -43,7 +43,8 @@ static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
return 0;
}

-static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm,
+ struct kvm_vcpu *vcpu)
{
return NULL;
}
@@ -78,7 +79,8 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)

u32 kvm_dirty_ring_get_rsvd_entries(void);
int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
-struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm);
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm,
+ struct kvm_vcpu *vcpu);

/*
* called with kvm->slots_lock held, returns the number of
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f145a58e37b0..59a92a82e3a3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -954,7 +954,8 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn);
+void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ struct kvm_memory_slot *memslot, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);

struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index 2b4474387895..879d454eef71 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -36,12 +36,16 @@ static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring)
return kvm_dirty_ring_used(ring) >= ring->size;
}

-struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+ struct kvm_vcpu *running_vcpu = kvm_get_running_vcpu();

+ WARN_ON_ONCE(vcpu && vcpu != running_vcpu);
WARN_ON_ONCE(vcpu->kvm != kvm);

+ if (!vcpu)
+ vcpu = running_vcpu;
+
return &vcpu->dirty_ring;
}

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4065fd32271a..24f300e5fa96 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2787,7 +2787,7 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
}
EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);

-static int __kvm_write_guest_page(struct kvm *kvm,
+static int __kvm_write_guest_page(struct kvm *kvm, struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, gfn_t gfn,
const void *data, int offset, int len)
{
@@ -2800,7 +2800,7 @@ static int __kvm_write_guest_page(struct kvm *kvm,
r = __copy_to_user((void __user *)addr + offset, data, len);
if (r)
return -EFAULT;
- mark_page_dirty_in_slot(kvm, memslot, gfn);
+ mark_page_dirty_in_slot(kvm, vcpu, memslot, gfn);
return 0;
}

@@ -2809,7 +2809,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);

- return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len);
+ return __kvm_write_guest_page(kvm, NULL, slot, gfn, data, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_write_guest_page);

@@ -2818,7 +2818,7 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
{
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);

- return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len);
+ return __kvm_write_guest_page(vcpu->kvm, vcpu, slot, gfn, data, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);

@@ -2937,7 +2937,7 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
if (r)
return -EFAULT;
- mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT);
+ mark_page_dirty_in_slot(kvm, NULL, ghc->memslot, gpa >> PAGE_SHIFT);

return 0;
}
@@ -3006,7 +3006,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);

-void mark_page_dirty_in_slot(struct kvm *kvm,
+void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot,
gfn_t gfn)
{
@@ -3015,7 +3015,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
u32 slot = (memslot->as_id << 16) | memslot->id;

if (kvm->dirty_ring_size)
- kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
+ kvm_dirty_ring_push(kvm_dirty_ring_get(kvm, vcpu),
slot, rel_gfn);
else
set_bit_le(rel_gfn, memslot->dirty_bitmap);
@@ -3028,7 +3028,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *memslot;

memslot = gfn_to_memslot(kvm, gfn);
- mark_page_dirty_in_slot(kvm, memslot, gfn);
+ mark_page_dirty_in_slot(kvm, NULL, memslot, gfn);
}
EXPORT_SYMBOL_GPL(mark_page_dirty);

@@ -3037,7 +3037,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
struct kvm_memory_slot *memslot;

memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn);
+ mark_page_dirty_in_slot(vcpu->kvm, vcpu, memslot, gfn);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);


Attachment: smime.p7s
Description: S/MIME cryptographic signature