[PATCH 2/2] KVM: MMU: try to fix up page faults before giving up
From: Paolo Bonzini
Date: Thu Jun 30 2016 - 09:02:41 EST
The vGPU folks would like to trap the first access to a BAR by setting
vm_ops on the VMAs produced by mmap-ing a VFIO device. The fault handler
then can use remap_pfn_range to place some non-reserved pages in the VMA.
This kind of non-linear VM_PFNMAP mapping is not handled by KVM, but
follow_pfn and fixup_user_fault together help supporting it. Because
these pages are not reserved, they are subject to reference counting,
but there is already a helper (kvm_get_pfn) that gets this right.
Cc: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
Reported-by: Kirti Wankhede <kwankhede@xxxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
mm/gup.c | 1 +
virt/kvm/kvm_main.c | 41 ++++++++++++++++++++++++++++++++++++++---
2 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index c057784c8444..e3ac22f90fa4 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -720,6 +720,7 @@ retry:
}
return 0;
}
+EXPORT_SYMBOL_GPL(fixup_user_fault);
static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5aae59e00bef..2927fb9ca062 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1446,9 +1446,41 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
unsigned long addr, bool *async,
bool write_fault, kvm_pfn_t *p_pfn)
{
- *p_pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
- vma->vm_pgoff;
- BUG_ON(!kvm_is_reserved_pfn(*p_pfn));
+ unsigned long pfn;
+ int r;
+
+ r = follow_pfn(vma, addr, &pfn);
+ if (r) {
+ /*
+ * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
+ * not call the fault handler, so do it here.
+ */
+ bool unlocked = false;
+ r = fixup_user_fault(current, current->mm, addr,
+ (write_fault ? FAULT_FLAG_WRITE : 0),
+ &unlocked);
+ if (unlocked)
+ return -EAGAIN;
+ if (r)
+ return r;
+
+ r = follow_pfn(vma, addr, &pfn);
+ if (r)
+ return r;
+
+ }
+
+ /*
+ * For pages mapped under VM_PFNMAP we assume that whoever called
+ * remap_pfn_range will also call e.g. unmap_mapping_range before
+ * the underlying pfns are freed, so that our MMU notifier gets
+ * called. We still have to get a reference here to the page,
+ * because the callers of *hva_to_pfn* and *gfn_to_pfn* ultimately
+ * end up doing a kvm_release_pfn_clean on the returned pfn.
+ */
+ kvm_get_pfn(pfn);
+
+ *p_pfn = pfn;
return 0;
}
@@ -1493,12 +1525,15 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
goto exit;
}
+retry:
vma = find_vma_intersection(current->mm, addr, addr + 1);
if (vma == NULL)
pfn = KVM_PFN_ERR_FAULT;
else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
+ if (r == -EAGAIN)
+ goto retry;
if (r < 0)
pfn = KVM_PFN_ERR_FAULT;
} else {
--
1.8.3.1