[PATCH 3/3] KVM: arm64: Normalize cache configuration

From: Akihiko Odaki
Date: Sun Dec 11 2022 - 00:18:04 EST


Before this change, the cache configuration of the physical CPU was
exposed to vcpus. This is problematic because the cache configuration a
vcpu sees varies when it migrates between vcpus with different cache
configurations.

Fabricate cache configuration from arm64_ftr_reg_ctrel0.sys_val, which
holds the CTR_EL0 value the userspace sees regardless of which physical
CPU it resides on.

HCR_TID2 is now always set as it is troublesome to detect the difference
of cache configurations among physical CPUs.

CSSELR_EL1 is now held in the memory instead of the corresponding
phyisccal register as the fabricated cache configuration may have a
cache level which does not exist in the physical CPU, and setting the
physical CSSELR_EL1 for the level results in an UNKNOWN behavior.

CLIDR_EL1 and CCSIDR_EL1 are now writable from the userspace so that
the VMM can restore the values saved with the old kernel.

Suggested-by: Marc Zyngier <maz@xxxxxxxxxx>
Signed-off-by: Akihiko Odaki <akihiko.odaki@xxxxxxxxxx>
---
arch/arm64/include/asm/kvm_arm.h | 3 +-
arch/arm64/include/asm/kvm_emulate.h | 4 -
arch/arm64/include/asm/kvm_host.h | 6 +-
arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 2 -
arch/arm64/kvm/reset.c | 1 +
arch/arm64/kvm/sys_regs.c | 232 ++++++++++++---------
6 files changed, 142 insertions(+), 106 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 8aa8492dafc0..44be46c280c1 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -81,11 +81,12 @@
* SWIO: Turn set/way invalidates into set/way clean+invalidate
* PTW: Take a stage2 fault if a stage1 walk steps in device memory
* TID3: Trap EL1 reads of group 3 ID registers
+ * TID2: Trap CTR_EL0, CCSIDR2_EL1, CLIDR_EL1, and CSSELR_EL1
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
- HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 )
+ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 9bdba47f7e14..30c4598d643b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -88,10 +88,6 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 &= ~HCR_RW;

- if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
- vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 |= HCR_TID2;
-
if (kvm_has_mte(vcpu->kvm))
vcpu->arch.hcr_el2 |= HCR_ATA;
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 45e2136322ba..27abf81c6910 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -178,6 +178,7 @@ struct kvm_vcpu_fault_info {
enum vcpu_sysreg {
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
+ CLIDR_EL1, /* Cache Level ID Register */
CSSELR_EL1, /* Cache Size Selection Register */
SCTLR_EL1, /* System Control Register */
ACTLR_EL1, /* Auxiliary Control Register */
@@ -417,6 +418,9 @@ struct kvm_vcpu_arch {
u64 last_steal;
gpa_t base;
} steal;
+
+ /* Per-vcpu CCSIDR override or NULL */
+ u32 *ccsidr;
};

/*
@@ -621,7 +625,6 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
return false;

switch (reg) {
- case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break;
case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
@@ -666,7 +669,6 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
return false;

switch (reg) {
- case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break;
case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index baa5b9b3dde5..147cb4c846c6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -39,7 +39,6 @@ static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)

static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
- ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR);
ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0);
@@ -95,7 +94,6 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
- write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1);

if (has_vhe() ||
!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 5ae18472205a..7980983dbad7 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -157,6 +157,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
kfree(sve_state);
+ kfree(vcpu->arch.ccsidr);
}

static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f4a7c5abcbca..e7edd9ffadc3 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -11,6 +11,7 @@

#include <linux/bitfield.h>
#include <linux/bsearch.h>
+#include <linux/cacheinfo.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/printk.h>
@@ -81,25 +82,64 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
__vcpu_sys_reg(vcpu, reg) = val;
}

-/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
-static u32 cache_levels;
-
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
#define CSSELR_MAX 14

/* Which cache CCSIDR represents depends on CSSELR value. */
-static u32 get_ccsidr(u32 csselr)
+static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr)
+{
+ u64 ctr_el0;
+ int field;
+
+ if (vcpu->arch.ccsidr)
+ return vcpu->arch.ccsidr[csselr];
+
+ ctr_el0 = arm64_ftr_reg_ctrel0.sys_val;
+ field = csselr & CSSELR_EL1_InD ? CTR_EL0_IminLine_SHIFT : CTR_EL0_DminLine_SHIFT;
+
+ /*
+ * Fabricate a CCSIDR value as the overriding value does not exist.
+ * The real CCSIDR value will not be used as it can vary by the
+ * physical CPU which the vcpu currently resides in.
+ *
+ * The line size is determined with arm64_ftr_reg_ctrel0.sys_val, which
+ * should be valid for all CPUs even if they have different cache
+ * configuration.
+ *
+ * The associativity bits are cleared, meaning the geometry of all data
+ * and unified caches (which are guaranteed to be PIPT and thus
+ * non-aliasing) are 1 set and 1 way.
+ * Guests should not be doing cache operations by set/way at all, and
+ * for this reason, we trap them and attempt to infer the intent, so
+ * that we can flush the entire guest's address space at the appropriate
+ * time. The exposed geometry minimizes the number of the traps.
+ * [If guests should attempt to infer aliasing properties from the
+ * geometry (which is not permitted by the architecture), they would
+ * only do so for virtually indexed caches.]
+ */
+ return cpuid_feature_extract_unsigned_field(ctr_el0, field) - 2;
+}
+
+static int set_ccsidr(struct kvm_vcpu *vcpu, u32 csselr, u32 val)
{
- u32 ccsidr;
+ u32 i;
+
+ if (!vcpu->arch.ccsidr) {
+ if (val == get_ccsidr(vcpu, csselr))
+ return 0;
+
+ vcpu->arch.ccsidr =
+ kmalloc_array(CSSELR_MAX, sizeof(u32), GFP_KERNEL);
+ if (!vcpu->arch.ccsidr)
+ return -ENOMEM;
+
+ for (i = 0; i < CSSELR_MAX; i++)
+ vcpu->arch.ccsidr[i] = get_ccsidr(vcpu, i);
+ }

- /* Make sure noone else changes CSSELR during this! */
- local_irq_disable();
- write_sysreg(csselr, csselr_el1);
- isb();
- ccsidr = read_sysreg(ccsidr_el1);
- local_irq_enable();
+ vcpu->arch.ccsidr[csselr] = val;

- return ccsidr;
+ return 0;
}

/*
@@ -1124,6 +1164,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
ID_DFR0_PERFMON_SHIFT,
kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0);
break;
+ case SYS_ID_AA64MMFR2_EL1:
+ val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
+ break;
+ case SYS_ID_MMFR4_EL1:
+ val &= ~ARM64_FEATURE_MASK(ID_MMFR4_CCIDX);
+ break;
}

return val;
@@ -1275,10 +1321,64 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write)
return write_to_read_only(vcpu, p, r);

- p->regval = read_sysreg(clidr_el1);
+ p->regval = __vcpu_sys_reg(vcpu, r->reg);
return true;
}

+/*
+ * Fabricate a CLIDR_EL1 value instead of using the real value, which can vary
+ * by the physical CPU which the vcpu currently resides in.
+ */
+static void reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 dic = arm64_ftr_reg_ctrel0.sys_val & CTR_EL0_DIC;
+ u64 idc = arm64_ftr_reg_ctrel0.sys_val & CTR_EL0_IDC;
+ u64 clidr;
+
+ if (dic && idc) {
+ /*
+ * No cache maintenance required to ensure the PoU so have only
+ * one unified cache level for LoC.
+ */
+ clidr = 1 << CLIDR_LOC_SHIFT;
+ clidr |= CACHE_TYPE_UNIFIED << CLIDR_CTYPE_SHIFT(1);
+ } else {
+ /*
+ * Cache maintenance required to ensure the PoU. Let L1 be a
+ * separate cache to be invalidated or cleaned when ensuring
+ * the PoU.
+ */
+ clidr = (1 << CLIDR_LOUU_SHIFT) | (1 << CLIDR_LOUIS_SHIFT);
+
+ /*
+ * Instruction cache invalidation to the PoU is required so
+ * let L1 have an instruction cache.
+ */
+ if (!dic)
+ clidr |= CACHE_TYPE_INST << CLIDR_CTYPE_SHIFT(1);
+
+ if (idc) {
+ /*
+ * Data cache clean to the PoU is not required so
+ * L1 will not have data cache. Let L2 be a unified
+ * cache for LoC.
+ */
+ clidr |= 2 << CLIDR_LOC_SHIFT;
+ clidr |= CACHE_TYPE_UNIFIED << CLIDR_CTYPE_SHIFT(2);
+ } else {
+ /*
+ * Data cache clean to the PoU is required so let L1
+ * have a data cache. As L1 has a data cache, it can
+ * be marked as LoC too.
+ */
+ clidr |= 1 << CLIDR_LOC_SHIFT;
+ clidr |= CACHE_TYPE_DATA << CLIDR_CTYPE_SHIFT(1);
+ }
+ }
+
+ __vcpu_sys_reg(vcpu, r->reg) = clidr;
+}
+
static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1300,25 +1400,19 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return write_to_read_only(vcpu, p, r);

csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
- p->regval = get_ccsidr(csselr);
+ if (csselr >= CSSELR_MAX)
+ return undef_access(vcpu, p, r);

- /*
- * Guests should not be doing cache operations by set/way at all, and
- * for this reason, we trap them and attempt to infer the intent, so
- * that we can flush the entire guest's address space at the appropriate
- * time.
- * To prevent this trapping from causing performance problems, let's
- * expose the geometry of all data and unified caches (which are
- * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way.
- * [If guests should attempt to infer aliasing properties from the
- * geometry (which is not permitted by the architecture), they would
- * only do so for virtually indexed caches.]
- */
- if (!(csselr & 1)) // data or unified cache
- p->regval &= ~GENMASK(27, 3);
+ p->regval = get_ccsidr(vcpu, csselr);
return true;
}

+static void reset_ccsidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ kfree(vcpu->arch.ccsidr);
+ vcpu->arch.ccsidr = NULL;
+}
+
static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
@@ -1603,8 +1697,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {

{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},

- { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
- { SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+ { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr, reset_ccsidr },
+ { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1 },
+ { SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
@@ -2106,6 +2201,7 @@ static const struct sys_reg_desc cp15_regs[] = {

{ Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
{ Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
+ { Op1(1), CRn(0), CRm(0), Op2(2), undef_access },
{ Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 },
};

@@ -2611,7 +2707,6 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,

FUNCTION_INVARIANT(midr_el1)
FUNCTION_INVARIANT(revidr_el1)
-FUNCTION_INVARIANT(clidr_el1)
FUNCTION_INVARIANT(aidr_el1)

static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
@@ -2623,7 +2718,6 @@ static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
static struct sys_reg_desc invariant_sys_regs[] = {
{ SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
{ SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
- { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
{ SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
};
@@ -2660,33 +2754,7 @@ static int set_invariant_sys_reg(u64 id, u64 __user *uaddr)
return 0;
}

-static bool is_valid_cache(u32 val)
-{
- u32 level, ctype;
-
- if (val >= CSSELR_MAX)
- return false;
-
- /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
- level = (val >> 1);
- ctype = (cache_levels >> (level * 3)) & 7;
-
- switch (ctype) {
- case 0: /* No cache */
- return false;
- case 1: /* Instruction cache only */
- return (val & 1);
- case 2: /* Data cache only */
- case 4: /* Unified cache */
- return !(val & 1);
- case 3: /* Separate instruction and data caches */
- return true;
- default: /* Reserved: we can't know instruction or data. */
- return false;
- }
-}
-
-static int demux_c15_get(u64 id, void __user *uaddr)
+static int demux_c15_get(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
{
u32 val;
u32 __user *uval = uaddr;
@@ -2702,16 +2770,16 @@ static int demux_c15_get(u64 id, void __user *uaddr)
return -ENOENT;
val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
- if (!is_valid_cache(val))
+ if (val >= CSSELR_MAX)
return -ENOENT;

- return put_user(get_ccsidr(val), uval);
+ return put_user(get_ccsidr(vcpu, val), uval);
default:
return -ENOENT;
}
}

-static int demux_c15_set(u64 id, void __user *uaddr)
+static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
{
u32 val, newval;
u32 __user *uval = uaddr;
@@ -2727,16 +2795,13 @@ static int demux_c15_set(u64 id, void __user *uaddr)
return -ENOENT;
val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
- if (!is_valid_cache(val))
+ if (val >= CSSELR_MAX)
return -ENOENT;

if (get_user(newval, uval))
return -EFAULT;

- /* This is also invariant: you can't change it. */
- if (newval != get_ccsidr(val))
- return -EINVAL;
- return 0;
+ return set_ccsidr(vcpu, val, newval);
default:
return -ENOENT;
}
@@ -2773,7 +2838,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
int err;

if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return demux_c15_get(reg->id, uaddr);
+ return demux_c15_get(vcpu, reg->id, uaddr);

err = get_invariant_sys_reg(reg->id, uaddr);
if (err != -ENOENT)
@@ -2817,7 +2882,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
int err;

if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return demux_c15_set(reg->id, uaddr);
+ return demux_c15_set(vcpu, reg->id, uaddr);

err = set_invariant_sys_reg(reg->id, uaddr);
if (err != -ENOENT)
@@ -2829,13 +2894,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg

static unsigned int num_demux_regs(void)
{
- unsigned int i, count = 0;
-
- for (i = 0; i < CSSELR_MAX; i++)
- if (is_valid_cache(i))
- count++;
-
- return count;
+ return CSSELR_MAX;
}

static int write_demux_regids(u64 __user *uindices)
@@ -2845,8 +2904,6 @@ static int write_demux_regids(u64 __user *uindices)

val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
for (i = 0; i < CSSELR_MAX; i++) {
- if (!is_valid_cache(i))
- continue;
if (put_user(val | i, uindices))
return -EFAULT;
uindices++;
@@ -2948,7 +3005,6 @@ int kvm_sys_reg_table_init(void)
{
bool valid = true;
unsigned int i;
- struct sys_reg_desc clidr;

/* Make sure tables are unique and in order. */
valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
@@ -2965,23 +3021,5 @@ int kvm_sys_reg_table_init(void)
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);

- /*
- * CLIDR format is awkward, so clean it up. See ARM B4.1.20:
- *
- * If software reads the Cache Type fields from Ctype1
- * upwards, once it has seen a value of 0b000, no caches
- * exist at further-out levels of the hierarchy. So, for
- * example, if Ctype3 is the first Cache Type field with a
- * value of 0b000, the values of Ctype4 to Ctype7 must be
- * ignored.
- */
- get_clidr_el1(NULL, &clidr); /* Ugly... */
- cache_levels = clidr.val;
- for (i = 0; i < 7; i++)
- if (((cache_levels >> (i*3)) & 7) == 0)
- break;
- /* Clear all higher bits. */
- cache_levels &= (1 << (i*3))-1;
-
return 0;
}
--
2.38.1