If a guest accesses swapped out memory do not swap it in from vcpu thread
context. Schedule work to do swapping and put vcpu into halted state
instead.
Interrupts will still be delivered to the guest and if interrupt will
cause reschedule guest will continue to run another task.
+
+static bool can_do_async_pf(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
+ kvm_event_needs_reinjection(vcpu)))
+ return false;
+
+ return kvm_x86_ops->interrupt_allowed(vcpu);
+}
@@ -5112,6 +5122,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (unlikely(r))
goto out;
+ kvm_check_async_pf_completion(vcpu);
+ if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
+ /* Page is swapped out. Do synthetic halt */
+ r = 1;
+ goto out;
+ }
+
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
inject_pending_event(vcpu);
@@ -5781,6 +5798,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_clear_async_pf_completion_queue(vcpu);
+ memset(vcpu->arch.apf.gfns, 0xff, sizeof vcpu->arch.apf.gfns);
@@ -6040,6 +6064,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
+ || !list_empty_careful(&vcpu->async_pf.done)
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|| vcpu->arch.nmi_pending ||
(kvm_arch_interrupt_allowed(vcpu)&&
+static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ u32 key = kvm_async_pf_hash_fn(gfn);
+
+ while (vcpu->arch.apf.gfns[key] != -1)
+ key = kvm_async_pf_next_probe(key);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+
+ if (work == kvm_double_apf)
+ trace_kvm_async_pf_doublefault(kvm_rip_read(vcpu));
+ else {
+ trace_kvm_async_pf_not_present(work->gva);
+
+ kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
+ }
+}
+
+TRACE_EVENT(
+ kvm_async_pf_not_present,
+ TP_PROTO(u64 gva),
+ TP_ARGS(gva),
+
+TRACE_EVENT(
+ kvm_async_pf_completed,
+ TP_PROTO(unsigned long address, struct page *page, u64 gva),
+ TP_ARGS(address, page, gva),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, address)
+ __field(struct page*, page)
+ __field(u64, gva)
+ ),
+
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->page = page;
+ __entry->gva = gva;
+ ),
+void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
+{
+ /* cancel outstanding work queue item */
+ while (!list_empty(&vcpu->async_pf.queue)) {
+ struct kvm_async_pf *work =
+ list_entry(vcpu->async_pf.queue.next,
+ typeof(*work), queue);
+ cancel_work_sync(&work->work);
+ list_del(&work->queue);
+ if (!work->page) /* work was canceled */
+ kmem_cache_free(async_pf_cache, work);
+ }
+/*
+ /* do alloc nowait since if we are going to sleep anyway we
+ may as well sleep faulting in page */