[PATCH v2 15/22] powerpc/kvm/book3s: use find_kvm_host_pte in pute_tce functions

From: Aneesh Kumar K.V
Date: Wed Mar 18 2020 - 23:57:38 EST


Current code just hold rmap lock to ensure parallel page table update is
prevented. That is not sufficient. The kernel should also check whether
a mmu_notifer callback was running in parallel.

Cc: Alexey Kardashevskiy <aik@xxxxxxxxx>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx>
---
arch/powerpc/kvm/book3s_64_vio_hv.c | 30 +++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index ab6eeb8e753e..8a6bf12d2e88 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -438,8 +438,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
return H_SUCCESS;
}

-static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
- unsigned long ua, unsigned long *phpa)
+static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
+ unsigned long ua, unsigned long *phpa)
{
pte_t *ptep, pte;
unsigned shift = 0;
@@ -453,10 +453,17 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
* to exit which will agains result in the below page table walk
* to finish.
*/
- ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
- if (!ptep || !pte_present(*ptep))
+ /* an rmap lock won't make it safe. because that just ensure hash
+ * page table entries are removed with rmap lock held. After that
+ * mmu notifier returns and we go ahead and removing ptes from Qemu page table.
+ */
+ ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift);
+ if (!ptep)
+ return -ENXIO;
+
+ pte = READ_ONCE(*ptep);
+ if (!pte_present(pte))
return -ENXIO;
- pte = *ptep;

if (!shift)
shift = PAGE_SHIFT;
@@ -478,10 +485,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages)
{
+ struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
long i, ret = H_SUCCESS;
unsigned long tces, entry, ua = 0;
unsigned long *rmap = NULL;
+ unsigned long mmu_seq;
bool prereg = false;
struct kvmppc_spapr_tce_iommu_table *stit;

@@ -489,6 +498,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;

+ /*
+ * used to check for invalidations in progress
+ */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -548,7 +563,9 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
* real page.
*/
lock_rmap(rmap);
- if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+ if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
@@ -594,6 +611,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (rmap)
unlock_rmap(rmap);

+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
return ret;
}

--
2.24.1