[PATCH] KVM: replace large kvmalloc allocation with vmalloc

From: Paolo Bonzini
Date: Fri Oct 15 2021 - 12:55:37 EST


KVM's paging data structures (especially the rmaps) can be made as
large as possible by userspace simply by creating large-enough memslots.
Since commit 7661809d493b ("mm: don't allow oversized kvmalloc() calls")
these huge allocations cause a warning, assuming that they could be the
result of an integer overflow or underflow.

Since there are configurations in the wild creating a multi-TiB memslot,
and in fact it is more likely than not that these allocations end up not
using kmalloc-ed memory. For example, the dirty bitmap for a 64 GiB
memslot would cause a 4 MiB allocation, since each 32 KiB of guest
address space corresponds to 2 bytes in the dirty bitmap. Therefore,
just use vmalloc directly. Introduce a new helper vcalloc to check for
overflow for extra paranoia, even though it should not be a problem here
even on 32-bit systems.

Reported-by: syzbot+e0de2333cbf95ea473e8@xxxxxxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/mmu/page_track.c | 3 +--
arch/x86/kvm/x86.c | 4 ++--
include/linux/vmalloc.h | 10 ++++++++++
virt/kvm/kvm_main.c | 4 ++--
4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 21427e84a82e..0d9842472288 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -36,8 +36,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,

for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
slot->arch.gfn_track[i] =
- kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
- GFP_KERNEL_ACCOUNT);
+ vcalloc(npages, sizeof(*slot->arch.gfn_track[i]));
if (!slot->arch.gfn_track[i])
goto track_free;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aabd3a2ec1bc..07f5760ea30c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11394,7 +11394,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,

WARN_ON(slot->arch.rmap[i]);

- slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
+ slot->arch.rmap[i] = vcalloc(lpages, sz);
if (!slot->arch.rmap[i]) {
memslot_rmap_free(slot);
return -ENOMEM;
@@ -11475,7 +11475,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,

lpages = __kvm_mmu_slot_lpages(slot, npages, level);

- linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
+ linfo = vcalloc(lpages, sizeof(*linfo));
if (!linfo)
goto out_free;

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 671d402c3778..6d51c83c2b0e 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -167,6 +167,16 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff);

+static inline void *vcalloc(size_t n, size_t size)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(n, size, &bytes)))
+ return NULL;
+
+ return vzalloc(bytes);
+}
+
/*
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
* and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7851f3a1b5f7..0295d89f5445 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1242,9 +1242,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
*/
static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
{
- unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
+ unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot);

- memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT);
+ memslot->dirty_bitmap = vcalloc(2, dirty_bytes);
if (!memslot->dirty_bitmap)
return -ENOMEM;

--
2.27.0