[PATCH 08/26] mm: asi: Use separate PCIDs for restricted address spaces

From: Brendan Jackman
Date: Fri Jul 12 2024 - 13:03:41 EST


From: Junaid Shahid <junaids@xxxxxxxxxx>

Each restricted address space is assigned a separate PCID. Since
currently only one ASI instance per-class exists for a given process,
the PCID is just derived from the class index.

This commit only sets the appropriate PCID when switching CR3, but does
not actually use the NOFLUSH bit. That will be done by later patches.

Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx>
Signed-off-by: Brendan Jackman <jackmanb@xxxxxxxxxx>
---
arch/x86/include/asm/asi.h | 10 +++++++++-
arch/x86/include/asm/tlbflush.h | 3 +++
arch/x86/mm/asi.c | 7 ++++---
arch/x86/mm/tlb.c | 44 +++++++++++++++++++++++++++++++++++++----
4 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index df34a8c0560b..1a19a925300c 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -69,7 +69,14 @@
#define static_asi_enabled() cpu_feature_enabled(X86_FEATURE_ASI)

#define ASI_MAX_NUM_ORDER 2
-#define ASI_MAX_NUM (1 << ASI_MAX_NUM_ORDER)
+/*
+ * We include an ASI identifier in the higher bits of PCID to use
+ * different PCID for restricted ASIs from non-restricted ASIs (see asi_pcid).
+ * The ASI identifier we use for this is asi_index + 1, as asi_index
+ * starts from 0. The -1 below for ASI_MAX_NUM comes from this PCID
+ * space availability.
+ */
+#define ASI_MAX_NUM ((1 << ASI_MAX_NUM_ORDER) - 1)

struct asi_hooks {
/*
@@ -101,6 +108,7 @@ struct asi {
struct asi_class *class;
struct mm_struct *mm;
int64_t ref_count;
+ u16 index;
};

DECLARE_PER_CPU_ALIGNED(struct asi *, curr_asi);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index ed847567b25d..3605f6b99da7 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -392,6 +392,9 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
#define huge_pmd_needs_flush huge_pmd_needs_flush

unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam);
+unsigned long build_cr3_pcid(pgd_t *pgd, u16 pcid, unsigned long lam, bool noflush);
+
+u16 asi_pcid(struct asi *asi, u16 asid);

#ifdef CONFIG_ADDRESS_MASKING
static inline u64 tlbstate_lam_cr3_mask(void)
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 2cd8e93a4415..0ba156f879d3 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -140,6 +140,7 @@ int asi_init(struct mm_struct *mm, int asi_index, struct asi **out_asi)

asi->class = &asi_class[asi_index];
asi->mm = mm;
+ asi->index = asi_index;

exit_unlock:
if (err)
@@ -174,6 +175,7 @@ EXPORT_SYMBOL_GPL(asi_destroy);
noinstr void __asi_enter(void)
{
u64 asi_cr3;
+ u16 pcid;
struct asi *target = asi_get_target(current);

/*
@@ -200,9 +202,8 @@ noinstr void __asi_enter(void)
*/
this_cpu_write(curr_asi, target);

- asi_cr3 = build_cr3(target->pgd,
- this_cpu_read(cpu_tlbstate.loaded_mm_asid),
- tlbstate_lam_cr3_mask());
+ pcid = asi_pcid(target, this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+ asi_cr3 = build_cr3_pcid(target->pgd, pcid, tlbstate_lam_cr3_mask(), false);
write_cr3(asi_cr3);

if (target->class->ops.post_asi_enter)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 9a5afeac9654..34d61b56d33f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -98,7 +98,12 @@
# define PTI_CONSUMED_PCID_BITS 0
#endif

-#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+#define ASI_CONSUMED_PCID_BITS ASI_MAX_NUM_ORDER
+#define ASI_PCID_BITS_SHIFT CR3_AVAIL_PCID_BITS
+#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS - \
+ ASI_CONSUMED_PCID_BITS)
+
+static_assert(BIT(CR3_AVAIL_PCID_BITS) > TLB_NR_DYN_ASIDS);

/*
* ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
@@ -155,18 +160,23 @@ static inline u16 user_pcid(u16 asid)
return ret;
}

+static inline unsigned long __build_cr3(pgd_t *pgd, u16 pcid, unsigned long lam)
+{
+ return __sme_pa_nodebug(pgd) | pcid | lam;
+}
+
inline_or_noinstr unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
{
- unsigned long cr3 = __sme_pa_nodebug(pgd) | lam;
+ u16 pcid = 0;

if (static_cpu_has(X86_FEATURE_PCID)) {
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
- cr3 |= kern_pcid(asid);
+ pcid = kern_pcid(asid);
} else {
VM_WARN_ON_ONCE(asid != 0);
}

- return cr3;
+ return __build_cr3(pgd, pcid, lam);
}

static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
@@ -181,6 +191,19 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
return build_cr3(pgd, asid, lam) | CR3_NOFLUSH;
}

+inline_or_noinstr unsigned long build_cr3_pcid(pgd_t *pgd, u16 pcid,
+ unsigned long lam, bool noflush)
+{
+ u64 noflush_bit = 0;
+
+ if (!static_cpu_has(X86_FEATURE_PCID))
+ pcid = 0;
+ else if (noflush)
+ noflush_bit = CR3_NOFLUSH;
+
+ return __build_cr3(pgd, pcid, lam) | noflush_bit;
+}
+
/*
* We get here when we do something requiring a TLB invalidation
* but could not go invalidate all of the contexts. We do the
@@ -995,6 +1018,19 @@ static void put_flush_tlb_info(void)
#endif
}

+#ifdef CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION
+
+inline_or_noinstr u16 asi_pcid(struct asi *asi, u16 asid)
+{
+ return kern_pcid(asid) | ((asi->index + 1) << ASI_PCID_BITS_SHIFT);
+}
+
+#else /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
+
+u16 asi_pcid(struct asi *asi, u16 asid) { return kern_pcid(asid); }
+
+#endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
+
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned int stride_shift,
bool freed_tables)

--
2.45.2.993.g49e7a77208-goog