[RFC 2/2] KVM: arm64: Add write-combine support for stage-2 entries

From: Shanker Donthineni
Date: Thu Apr 29 2021 - 12:29:58 EST


In the current implementation, the device memory is always mapped as
DEVICE_nGnRE in stage-2. In the host kernel, device drivers have
flexibility whether to choose a memory-type device or write-combine
(Non-cacheable) depends on the use case. PCI specification has a
prefetchable BAR concept where multiple writes can be combined and
no side effects on reads. It provides huge performance improvement
and also allows unaligned access.

NVIDIA GPU PCIe devices have 3 BAR regions. Two regions are mapped to
video/compute memory and marked as prefetchable. The GPU driver takes
advantage of the write-combine feature for higher performance. The
same driver has no issues in the host kernel but crashes inside the
virtual machine because of unaligned accesses.

This patch finds the PTE attributes for device memory in VMA. It
updates the stage-2 attribute to NORMAL_NC for WC regions and
the default type DEVICE_nGnRE for non-WC regions.

Change-Id: Ibaea69c7a301df3c86609e871f6d066728391080
Signed-off-by: Shanker Donthineni <sdonthineni@xxxxxxxxxx>
---
arch/arm64/include/asm/kvm_mmu.h | 3 ++-
arch/arm64/include/asm/kvm_pgtable.h | 2 ++
arch/arm64/include/asm/memory.h | 4 +++-
arch/arm64/kvm/hyp/pgtable.c | 9 +++++++--
arch/arm64/kvm/mmu.c | 21 ++++++++++++++++++---
arch/arm64/kvm/vgic/vgic-v2.c | 2 +-
6 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 90873851f677..dec498a6ba2f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -160,7 +160,8 @@ void stage2_unmap_vm(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
- phys_addr_t pa, unsigned long size, bool writable);
+ phys_addr_t pa, unsigned long size, bool writable,
+ bool writecombine);

int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 8886d43cfb11..26f28220f6f3 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -35,6 +35,7 @@ struct kvm_pgtable {
* @KVM_PGTABLE_PROT_W: Write permission.
* @KVM_PGTABLE_PROT_R: Read permission.
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
+ * @KVM_PGTABLE_PROT_WC: Normal non-cacheable (WC).
*/
enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_X = BIT(0),
@@ -42,6 +43,7 @@ enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_R = BIT(2),

KVM_PGTABLE_PROT_DEVICE = BIT(3),
+ KVM_PGTABLE_PROT_WC = BIT(4),
};

#define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0aabc3be9a75..04a812b59437 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -144,13 +144,15 @@
* Memory types for Stage-2 translation
*/
#define MT_S2_NORMAL 0xf
+#define MT_S2_WRITE_COMBINE 5
#define MT_S2_DEVICE_nGnRE 0x1

/*
* Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
- * Stage-2 enforces Normal-WB and Device-nGnRE
+ * Stage-2 enforces Normal-WB, Normal-NC and Device-nGnRE
*/
#define MT_S2_FWB_NORMAL 6
+#define MT_S2_FWB_WRITE_COMBINE 5
#define MT_S2_FWB_DEVICE_nGnRE 1

#ifdef CONFIG_ARM64_4K_PAGES
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 926fc07074f5..bdfed559eae2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -444,9 +444,14 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
struct stage2_map_data *data)
{
bool device = prot & KVM_PGTABLE_PROT_DEVICE;
- kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
- PAGE_S2_MEMATTR(NORMAL);
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
+ kvm_pte_t attr = PAGE_S2_MEMATTR(NORMAL);
+
+ if (device) {
+ attr = (prot & KVM_PGTABLE_PROT_WC) ?
+ PAGE_S2_MEMATTR(WRITE_COMBINE) :
+ PAGE_S2_MEMATTR(DEVICE_nGnRE);
+ }

if (!(prot & KVM_PGTABLE_PROT_X))
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 8711894db8c2..5b8ec1ab12e2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -487,6 +487,16 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
}
}

+/**
+ * is_vma_write_combine - check if VMA is mapped with writecombine or not
+ * Return true if VMA mapped with MT_NORMAL_NC otherwise fasle
+ */
+static bool inline is_vma_write_combine(struct vm_area_struct *vma)
+{
+ pteval_t pteval = pgprot_val(vma->vm_page_prot);
+ return ((pteval & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC));
+}
+
/**
* kvm_phys_addr_ioremap - map a device range to guest IPA
*
@@ -495,9 +505,11 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
* @pa: The physical address of the device
* @size: The size of the mapping
* @writable: Whether or not to create a writable mapping
+ * @writecombine: Whether or not to create a writecombine mapping
*/
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
- phys_addr_t pa, unsigned long size, bool writable)
+ phys_addr_t pa, unsigned long size, bool writable,
+ bool writecombine)
{
phys_addr_t addr;
int ret = 0;
@@ -505,6 +517,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_R |
+ (writecombine ? KVM_PGTABLE_PROT_WC : 0) |
(writable ? KVM_PGTABLE_PROT_W : 0);

size += offset_in_page(guest_ipa);
@@ -891,7 +904,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}

if (device)
- prot |= KVM_PGTABLE_PROT_DEVICE;
+ prot |= KVM_PGTABLE_PROT_DEVICE |
+ (is_vma_write_combine(vma) ? KVM_PGTABLE_PROT_WC : 0);
else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
prot |= KVM_PGTABLE_PROT_X;

@@ -1357,7 +1371,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,

ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
vm_end - vm_start,
- writable);
+ writable,
+ is_vma_write_combine(vma));
if (ret)
break;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 11934c2af2f4..6f921efea6c0 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -336,7 +336,7 @@ int vgic_v2_map_resources(struct kvm *kvm)
if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
kvm_vgic_global_state.vcpu_base,
- KVM_VGIC_V2_CPU_SIZE, true);
+ KVM_VGIC_V2_CPU_SIZE, true, false);
if (ret) {
kvm_err("Unable to remap VGIC CPU to VCPU\n");
return ret;
--
2.17.1