Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a separate file

From: Palmer Dabbelt
Date: Wed Jun 05 2019 - 16:46:07 EST

Next message: Gustavo A. R. Silva: "[PATCH] ACPICA: utids: Use struct_size() helper"
Previous message: Christian Lamparter: "Re: [PATCH 5/6] arm64: dts: Add ipq6018 SoC and CP01 board support"
In reply to: Julien Grall: "Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a separate file"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On Wed, 05 Jun 2019 09:56:03 PDT (-0700), julien.grall@xxxxxxx wrote:

Hi,

I am CCing RISC-V folks to see if there are an interest to share the code.

@RISC-V: I noticed you are discussing about importing a version of ASID
allocator in RISC-V. At a first look, the code looks quite similar. Would the
library below helps you?

Thanks! I didn't look that closely at the original patches because the
argument against them was just "we don't have any way to test this".
Unfortunately, we don't have the constraint that there are more ASIDs than CPUs
in the system. As a result I don't think we can use this ASID allocation
strategy.

Cheers,

On 21/03/2019 16:36, Julien Grall wrote:

We will want to re-use the ASID allocator in a separate context (e.g
allocating VMID). So move the code in a new file.

The function asid_check_context has been moved in the header as a static
inline function because we want to avoid add a branch when checking if the
ASID is still valid.

Signed-off-by: Julien Grall <julien.grall@xxxxxxx>

---

This code will be used in the virt code for allocating VMID. I am not
entirely sure where to place it. Lib could potentially be a good place but I
am not entirely convinced the algo as it is could be used by other
architecture.

Looking at x86, it seems that it will not be possible to re-use because
the number of PCID (aka ASID) could be smaller than the number of CPUs.
See commit message 10af6235e0d327d42e1bad974385197817923dc1 "x86/mm:
Implement PCID based optimization: try to preserve old TLB entries using
PCI".
---
arch/arm64/include/asm/asid.h | 77 ++++++++++++++
arch/arm64/lib/Makefile | 2 +
arch/arm64/lib/asid.c | 185 +++++++++++++++++++++++++++++++++
arch/arm64/mm/context.c | 235 +-----------------------------------------
4 files changed, 267 insertions(+), 232 deletions(-)
create mode 100644 arch/arm64/include/asm/asid.h
create mode 100644 arch/arm64/lib/asid.c

diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
new file mode 100644
index 000000000000..bb62b587f37f
--- /dev/null
+++ b/arch/arm64/include/asm/asid.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ASM_ASID_H
+#define __ASM_ASM_ASID_H
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+
+struct asid_info
+{
+ atomic64_t generation;
+ unsigned long *map;
+ atomic64_t __percpu *active;
+ u64 __percpu *reserved;
+ u32 bits;
+ /* Lock protecting the structure */
+ raw_spinlock_t lock;
+ /* Which CPU requires context flush on next call */
+ cpumask_t flush_pending;
+ /* Number of ASID allocated by context (shift value) */
+ unsigned int ctxt_shift;
+ /* Callback to locally flush the context. */
+ void (*flush_cpu_ctxt_cb)(void);
+};
+
+#define NUM_ASIDS(info) (1UL << ((info)->bits))
+#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
+
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static inline void asid_check_context(struct asid_info *info,
+ atomic64_t *pasid, unsigned int cpu)
+{
+ u64 asid, old_active_asid;
+
+ asid = atomic64_read(pasid);
+
+ /*
+ * The memory ordering here is subtle.
+ * If our active_asid is non-zero and the ASID matches the current
+ * generation, then we update the active_asid entry with a relaxed
+ * cmpxchg. Racing with a concurrent rollover means that either:
+ *
+ * - We get a zero back from the cmpxchg and end up waiting on the
+ * lock. Taking the lock synchronises with the rollover and so
+ * we are forced to see the updated generation.
+ *
+ * - We get a valid ASID back from the cmpxchg, which means the
+ * relaxed xchg in flush_context will treat us as reserved
+ * because atomic RmWs are totally ordered for a given location.
+ */
+ old_active_asid = atomic64_read(&active_asid(info, cpu));
+ if (old_active_asid &&
+ !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
+ atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
+ old_active_asid, asid))
+ return;
+
+ asid_new_context(info, pasid, cpu);
+}
+
+int asid_allocator_init(struct asid_info *info,
+ u32 bits, unsigned int asid_per_ctxt,
+ void (*flush_cpu_ctxt_cb)(void));
+
+#endif
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 5540a1638baf..720df5ee2aa2 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -5,6 +5,8 @@ lib-y := clear_user.o delay.o copy_from_user.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o tishift.o

+lib-y += asid.o
+
ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only
diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
new file mode 100644
index 000000000000..72b71bfb32be
--- /dev/null
+++ b/arch/arm64/lib/asid.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic ASID allocator.
+ *
+ * Based on arch/arm/mm/context.c
+ *
+ * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include <linux/slab.h>
+
+#include <asm/asid.h>
+
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
+#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
+
+#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
+#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
+
+static void flush_context(struct asid_info *info)
+{
+ int i;
+ u64 asid;
+
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
+
+ for_each_possible_cpu(i) {
+ asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
+ /*
+ * If this CPU has already been through a
+ * rollover, but hasn't run another task in
+ * the meantime, we must preserve its reserved
+ * ASID, as this is the only trace we have of
+ * the process it is still running.
+ */
+ if (asid == 0)
+ asid = reserved_asid(info, i);
+ __set_bit(asid2idx(info, asid), info->map);
+ reserved_asid(info, i) = asid;
+ }
+
+ /*
+ * Queue a TLB invalidation for each CPU to perform on next
+ * context-switch
+ */
+ cpumask_setall(&info->flush_pending);
+}
+
+static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
+ u64 newasid)
+{
+ int cpu;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved ASIDs looking for a match.
+ * If we find one, then we can update our mm to use newasid
+ * (i.e. the same ASID in the current generation) but we can't
+ * exit the loop early, since we need to ensure that all copies
+ * of the old ASID are updated to reflect the mm. Failure to do
+ * so could result in us missing the reserved ASID in a future
+ * generation.
+ */
+ for_each_possible_cpu(cpu) {
+ if (reserved_asid(info, cpu) == asid) {
+ hit = true;
+ reserved_asid(info, cpu) = newasid;
+ }
+ }
+
+ return hit;
+}
+
+static u64 new_context(struct asid_info *info, atomic64_t *pasid)
+{
+ static u32 cur_idx = 1;
+ u64 asid = atomic64_read(pasid);
+ u64 generation = atomic64_read(&info->generation);
+
+ if (asid != 0) {
+ u64 newasid = generation | (asid & ~ASID_MASK(info));
+
+ /*
+ * If our current ASID was active during a rollover, we
+ * can continue to use it and this was just a false alarm.
+ */
+ if (check_update_reserved_asid(info, asid, newasid))
+ return newasid;
+
+ /*
+ * We had a valid ASID in a previous life, so try to re-use
+ * it if possible.
+ */
+ if (!__test_and_set_bit(asid2idx(info, asid), info->map))
+ return newasid;
+ }
+
+ /*
+ * Allocate a free ASID. If we can't find one, take a note of the
+ * currently active ASIDs and mark the TLBs as requiring flushes. We
+ * always count from ASID #2 (index 1), as we use ASID #0 when setting
+ * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
+ * pairs.
+ */
+ asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
+ if (asid != NUM_CTXT_ASIDS(info))
+ goto set_asid;
+
+ /* We're out of ASIDs, so increment the global generation count */
+ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
+ &info->generation);
+ flush_context(info);
+
+ /* We have more ASIDs than CPUs, so this will always succeed */
+ asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
+
+set_asid:
+ __set_bit(asid, info->map);
+ cur_idx = asid;
+ return idx2asid(info, asid) | generation;
+}
+
+/*
+ * Generate a new ASID for the context.
+ *
+ * @pasid: Pointer to the current ASID batch allocated. It will be updated
+ * with the new ASID batch.
+ * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ */
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu)
+{
+ unsigned long flags;
+ u64 asid;
+
+ raw_spin_lock_irqsave(&info->lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ asid = atomic64_read(pasid);
+ if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
+ asid = new_context(info, pasid);
+ atomic64_set(pasid, asid);
+ }
+
+ if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
+ info->flush_cpu_ctxt_cb();
+
+ atomic64_set(&active_asid(info, cpu), asid);
+ raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+/*
+ * Initialize the ASID allocator
+ *
+ * @info: Pointer to the asid allocator structure
+ * @bits: Number of ASIDs available
+ * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
+ * allocated contiguously for a given context. This value should be a power of
+ * 2.
+ */
+int asid_allocator_init(struct asid_info *info,
+ u32 bits, unsigned int asid_per_ctxt,
+ void (*flush_cpu_ctxt_cb)(void))
+{
+ info->bits = bits;
+ info->ctxt_shift = ilog2(asid_per_ctxt);
+ info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
+ /*
+ * Expect allocation after rollover to fail if we don't have at least
+ * one more ASID than CPUs. ASID #0 is always reserved.
+ */
+ WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
+ atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
+ info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
+ sizeof(*info->map), GFP_KERNEL);
+ if (!info->map)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&info->lock);
+
+ return 0;
+}
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 678a57b77c91..95ee7711a2ef 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -22,47 +22,22 @@
#include <linux/slab.h>
#include <linux/mm.h>

+#include <asm/asid.h>
#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>

-struct asid_info
-{
- atomic64_t generation;
- unsigned long *map;
- atomic64_t __percpu *active;
- u64 __percpu *reserved;
- u32 bits;
- raw_spinlock_t lock;
- /* Which CPU requires context flush on next call */
- cpumask_t flush_pending;
- /* Number of ASID allocated by context (shift value) */
- unsigned int ctxt_shift;
- /* Callback to locally flush the context. */
- void (*flush_cpu_ctxt_cb)(void);
-} asid_info;
-
-#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
-#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
-
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);

-#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
-#define NUM_ASIDS(info) (1UL << ((info)->bits))
-
-#define ASID_FIRST_VERSION(info) NUM_ASIDS(info)
-
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define ASID_PER_CONTEXT 2
#else
#define ASID_PER_CONTEXT 1
#endif

-#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
-#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
-#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
+struct asid_info asid_info;

/* Get the ASIDBits supported by the current CPU */
static u32 get_cpu_asid_bits(void)
@@ -102,178 +77,6 @@ void verify_cpu_asid_bits(void)
}
}

-static void flush_context(struct asid_info *info)
-{
- int i;
- u64 asid;
-
- /* Update the list of reserved ASIDs and the ASID bitmap. */
- bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
-
- for_each_possible_cpu(i) {
- asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
- /*
- * If this CPU has already been through a
- * rollover, but hasn't run another task in
- * the meantime, we must preserve its reserved
- * ASID, as this is the only trace we have of
- * the process it is still running.
- */
- if (asid == 0)
- asid = reserved_asid(info, i);
- __set_bit(asid2idx(info, asid), info->map);
- reserved_asid(info, i) = asid;
- }
-
- /*
- * Queue a TLB invalidation for each CPU to perform on next
- * context-switch
- */
- cpumask_setall(&info->flush_pending);
-}
-
-static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
- u64 newasid)
-{
- int cpu;
- bool hit = false;
-
- /*
- * Iterate over the set of reserved ASIDs looking for a match.
- * If we find one, then we can update our mm to use newasid
- * (i.e. the same ASID in the current generation) but we can't
- * exit the loop early, since we need to ensure that all copies
- * of the old ASID are updated to reflect the mm. Failure to do
- * so could result in us missing the reserved ASID in a future
- * generation.
- */
- for_each_possible_cpu(cpu) {
- if (reserved_asid(info, cpu) == asid) {
- hit = true;
- reserved_asid(info, cpu) = newasid;
- }
- }
-
- return hit;
-}
-
-static u64 new_context(struct asid_info *info, atomic64_t *pasid)
-{
- static u32 cur_idx = 1;
- u64 asid = atomic64_read(pasid);
- u64 generation = atomic64_read(&info->generation);
-
- if (asid != 0) {
- u64 newasid = generation | (asid & ~ASID_MASK(info));
-
- /*
- * If our current ASID was active during a rollover, we
- * can continue to use it and this was just a false alarm.
- */
- if (check_update_reserved_asid(info, asid, newasid))
- return newasid;
-
- /*
- * We had a valid ASID in a previous life, so try to re-use
- * it if possible.
- */
- if (!__test_and_set_bit(asid2idx(info, asid), info->map))
- return newasid;
- }
-
- /*
- * Allocate a free ASID. If we can't find one, take a note of the
- * currently active ASIDs and mark the TLBs as requiring flushes. We
- * always count from ASID #2 (index 1), as we use ASID #0 when setting
- * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
- * pairs.
- */
- asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
- if (asid != NUM_CTXT_ASIDS(info))
- goto set_asid;
-
- /* We're out of ASIDs, so increment the global generation count */
- generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
- &info->generation);
- flush_context(info);
-
- /* We have more ASIDs than CPUs, so this will always succeed */
- asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
-
-set_asid:
- __set_bit(asid, info->map);
- cur_idx = asid;
- return idx2asid(info, asid) | generation;
-}
-
-static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu);
-
-/*
- * Check the ASID is still valid for the context. If not generate a new ASID.
- *
- * @pasid: Pointer to the current ASID batch
- * @cpu: current CPU ID. Must have been acquired throught get_cpu()
- */
-static void asid_check_context(struct asid_info *info,
- atomic64_t *pasid, unsigned int cpu)
-{
- u64 asid, old_active_asid;
-
- asid = atomic64_read(pasid);
-
- /*
- * The memory ordering here is subtle.
- * If our active_asid is non-zero and the ASID matches the current
- * generation, then we update the active_asid entry with a relaxed
- * cmpxchg. Racing with a concurrent rollover means that either:
- *
- * - We get a zero back from the cmpxchg and end up waiting on the
- * lock. Taking the lock synchronises with the rollover and so
- * we are forced to see the updated generation.
- *
- * - We get a valid ASID back from the cmpxchg, which means the
- * relaxed xchg in flush_context will treat us as reserved
- * because atomic RmWs are totally ordered for a given location.
- */
- old_active_asid = atomic64_read(&active_asid(info, cpu));
- if (old_active_asid &&
- !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
- atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
- old_active_asid, asid))
- return;
-
- asid_new_context(info, pasid, cpu);
-}
-
-/*
- * Generate a new ASID for the context.
- *
- * @pasid: Pointer to the current ASID batch allocated. It will be updated
- * with the new ASID batch.
- * @cpu: current CPU ID. Must have been acquired through get_cpu()
- */
-static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu)
-{
- unsigned long flags;
- u64 asid;
-
- raw_spin_lock_irqsave(&info->lock, flags);
- /* Check that our ASID belongs to the current generation. */
- asid = atomic64_read(pasid);
- if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
- asid = new_context(info, pasid);
- atomic64_set(pasid, asid);
- }
-
- if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
- info->flush_cpu_ctxt_cb();
-
- atomic64_set(&active_asid(info, cpu), asid);
- raw_spin_unlock_irqrestore(&info->lock, flags);
-}
-
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
{
if (system_supports_cnp())
@@ -305,38 +108,6 @@ static void asid_flush_cpu_ctxt(void)
local_flush_tlb_all();
}

-/*
- * Initialize the ASID allocator
- *
- * @info: Pointer to the asid allocator structure
- * @bits: Number of ASIDs available
- * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
- * allocated contiguously for a given context. This value should be a power of
- * 2.
- */
-static int asid_allocator_init(struct asid_info *info,
- u32 bits, unsigned int asid_per_ctxt,
- void (*flush_cpu_ctxt_cb)(void))
-{
- info->bits = bits;
- info->ctxt_shift = ilog2(asid_per_ctxt);
- info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
- /*
- * Expect allocation after rollover to fail if we don't have at least
- * one more ASID than CPUs. ASID #0 is always reserved.
- */
- WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
- atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
- info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
- sizeof(*info->map), GFP_KERNEL);
- if (!info->map)
- return -ENOMEM;
-
- raw_spin_lock_init(&info->lock);
-
- return 0;
-}
-
static int asids_init(void)
{
u32 bits = get_cpu_asid_bits();
@@ -344,7 +115,7 @@ static int asids_init(void)
if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
asid_flush_cpu_ctxt))
panic("Unable to initialize ASID allocator for %lu ASIDs\n",
- 1UL << bits);
+ NUM_ASIDS(&asid_info));

asid_info.active = &active_asids;
asid_info.reserved = &reserved_asids;

Next message: Gustavo A. R. Silva: "[PATCH] ACPICA: utids: Use struct_size() helper"
Previous message: Christian Lamparter: "Re: [PATCH 5/6] arm64: dts: Add ipq6018 SoC and CP01 board support"
In reply to: Julien Grall: "Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a separate file"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]