Re: [RFCv2 08/16] KVM: Use GUP instead of copy_from/to_user() to access guest memory

From: John Hubbard
Date: Tue Oct 20 2020 - 04:26:07 EST


On 10/19/20 11:18 PM, Kirill A. Shutemov wrote:
New helpers copy_from_guest()/copy_to_guest() to be used if KVM memory
protection feature is enabled.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
include/linux/kvm_host.h | 4 ++
virt/kvm/kvm_main.c | 90 +++++++++++++++++++++++++++++++---------
2 files changed, 75 insertions(+), 19 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 05e3c2fb3ef7..380a64613880 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -504,6 +504,7 @@ struct kvm {
struct srcu_struct irq_srcu;
pid_t userspace_pid;
unsigned int max_halt_poll_ns;
+ bool mem_protected;
};
#define kvm_err(fmt, ...) \
@@ -728,6 +729,9 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
void kvm_get_pfn(kvm_pfn_t pfn);
+int copy_from_guest(void *data, unsigned long hva, int len, bool protected);
+int copy_to_guest(unsigned long hva, const void *data, int len, bool protected);
+
void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cf88233b819a..a9884cb8c867 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2313,19 +2313,70 @@ static int next_segment(unsigned long len, int offset)
return len;
}
+int copy_from_guest(void *data, unsigned long hva, int len, bool protected)
+{
+ int offset = offset_in_page(hva);
+ struct page *page;
+ int npages, seg;
+
+ if (!protected)
+ return __copy_from_user(data, (void __user *)hva, len);
+
+ might_fault();
+ kasan_check_write(data, len);
+ check_object_size(data, len, false);
+
+ while ((seg = next_segment(len, offset)) != 0) {
+ npages = get_user_pages_unlocked(hva, 1, &page, 0);
+ if (npages != 1)
+ return -EFAULT;
+ memcpy(data, page_address(page) + offset, seg);

Hi Kirill!

OK, so the copy_from_guest() is a read-only case for gup, which I think is safe
from a gup/pup + filesystem point of view, but see below about copy_to_guest()...

+ put_page(page);
+ len -= seg;
+ hva += seg;
+ offset = 0;
+ }
+
+ return 0;
+}
+
+int copy_to_guest(unsigned long hva, const void *data, int len, bool protected)
+{
+ int offset = offset_in_page(hva);
+ struct page *page;
+ int npages, seg;
+
+ if (!protected)
+ return __copy_to_user((void __user *)hva, data, len);
+
+ might_fault();
+ kasan_check_read(data, len);
+ check_object_size(data, len, true);
+
+ while ((seg = next_segment(len, offset)) != 0) {
+ npages = get_user_pages_unlocked(hva, 1, &page, FOLL_WRITE);


Should copy_to_guest() use pin_user_pages_unlocked() instead of gup_unlocked?
We wrote a "Case 5" in Documentation/core-api/pin_user_pages.rst, just for this
situation, I think:


CASE 5: Pinning in order to write to the data within the page
-------------------------------------------------------------
Even though neither DMA nor Direct IO is involved, just a simple case of "pin,
write to a page's data, unpin" can cause a problem. Case 5 may be considered a
superset of Case 1, plus Case 2, plus anything that invokes that pattern. In
other words, if the code is neither Case 1 nor Case 2, it may still require
FOLL_PIN, for patterns like this:

Correct (uses FOLL_PIN calls):
pin_user_pages()
write to the data within the pages
unpin_user_pages()


thanks,
--
John Hubbard
NVIDIA

+ if (npages != 1)
+ return -EFAULT;
+ memcpy(page_address(page) + offset, data, seg);
+ put_page(page);
+ len -= seg;
+ hva += seg;
+ offset = 0;
+ }
+
+ return 0;
+}
+
static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
- void *data, int offset, int len)
+ void *data, int offset, int len,
+ bool protected)
{
- int r;
unsigned long addr;
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = __copy_from_user(data, (void __user *)addr + offset, len);
- if (r)
- return -EFAULT;
- return 0;
+ return copy_from_guest(data, addr + offset, len, protected);
}
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
@@ -2333,7 +2384,8 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- return __kvm_read_guest_page(slot, gfn, data, offset, len);
+ return __kvm_read_guest_page(slot, gfn, data, offset, len,
+ kvm->mem_protected);
}
EXPORT_SYMBOL_GPL(kvm_read_guest_page);
@@ -2342,7 +2394,8 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
{
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- return __kvm_read_guest_page(slot, gfn, data, offset, len);
+ return __kvm_read_guest_page(slot, gfn, data, offset, len,
+ vcpu->kvm->mem_protected);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
@@ -2415,7 +2468,8 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
- const void *data, int offset, int len)
+ const void *data, int offset, int len,
+ bool protected)
{
int r;
unsigned long addr;
@@ -2423,7 +2477,8 @@ static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
addr = gfn_to_hva_memslot(memslot, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = __copy_to_user((void __user *)addr + offset, data, len);
+
+ r = copy_to_guest(addr + offset, data, len, protected);
if (r)
return -EFAULT;
mark_page_dirty_in_slot(memslot, gfn);
@@ -2435,7 +2490,8 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- return __kvm_write_guest_page(slot, gfn, data, offset, len);
+ return __kvm_write_guest_page(slot, gfn, data, offset, len,
+ kvm->mem_protected);
}
EXPORT_SYMBOL_GPL(kvm_write_guest_page);
@@ -2444,7 +2500,8 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
{
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- return __kvm_write_guest_page(slot, gfn, data, offset, len);
+ return __kvm_write_guest_page(slot, gfn, data, offset, len,
+ vcpu->kvm->mem_protected);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
@@ -2560,7 +2617,7 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
if (unlikely(!ghc->memslot))
return kvm_write_guest(kvm, gpa, data, len);
- r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
+ r = copy_to_guest(ghc->hva + offset, data, len, kvm->mem_protected);
if (r)
return -EFAULT;
mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
@@ -2581,7 +2638,6 @@ int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
unsigned long len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
- int r;
gpa_t gpa = ghc->gpa + offset;
BUG_ON(len + offset > ghc->len);
@@ -2597,11 +2653,7 @@ int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
if (unlikely(!ghc->memslot))
return kvm_read_guest(kvm, gpa, data, len);
- r = __copy_from_user(data, (void __user *)ghc->hva + offset, len);
- if (r)
- return -EFAULT;
-
- return 0;
+ return copy_from_guest(data, ghc->hva + offset, len, kvm->mem_protected);
}
EXPORT_SYMBOL_GPL(kvm_read_guest_offset_cached);