[PATCH v2] kasan: speed up mte_set_mem_tag_range

From: Evgenii Stepanov
Date: Mon May 17 2021 - 19:40:40 EST


Use DC GVA / DC GZVA to speed up KASan memory tagging in HW tags mode.

The first cacheline is always tagged using STG/STZG even if the address is
cacheline-aligned, as benchmarks show it is faster than a conditional
branch.

Signed-off-by: Evgenii Stepanov <eugenis@xxxxxxxxxx>
Co-developed-by: Peter Collingbourne <pcc@xxxxxxxxxx>
---
Changelog since v1:
- Added Co-developed-by.

arch/arm64/include/asm/mte-kasan.h | 40 +------------------
arch/arm64/lib/Makefile | 2 +
arch/arm64/lib/mte-kasan.S | 63 ++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+), 39 deletions(-)
create mode 100644 arch/arm64/lib/mte-kasan.S

diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index ddd4d17cf9a0..e29a0e2ab35c 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -48,45 +48,7 @@ static inline u8 mte_get_random_tag(void)
return mte_get_ptr_tag(addr);
}

-/*
- * Assign allocation tags for a region of memory based on the pointer tag.
- * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
- * size must be non-zero and MTE_GRANULE_SIZE aligned.
- */
-static inline void mte_set_mem_tag_range(void *addr, size_t size,
- u8 tag, bool init)
-{
- u64 curr, end;
-
- if (!size)
- return;
-
- curr = (u64)__tag_set(addr, tag);
- end = curr + size;
-
- /*
- * 'asm volatile' is required to prevent the compiler to move
- * the statement outside of the loop.
- */
- if (init) {
- do {
- asm volatile(__MTE_PREAMBLE "stzg %0, [%0]"
- :
- : "r" (curr)
- : "memory");
- curr += MTE_GRANULE_SIZE;
- } while (curr != end);
- } else {
- do {
- asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
- :
- : "r" (curr)
- : "memory");
- curr += MTE_GRANULE_SIZE;
- } while (curr != end);
- }
-}
-
+void mte_set_mem_tag_range(void *addr, size_t size, u8 tag, bool init);
void mte_enable_kernel_sync(void);
void mte_enable_kernel_async(void);
void mte_init_tags(u64 max_tag);
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index d31e1169d9b8..c06ada79a437 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -18,3 +18,5 @@ obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o

obj-$(CONFIG_ARM64_MTE) += mte.o
+
+obj-$(CONFIG_KASAN_HW_TAGS) += mte-kasan.o
diff --git a/arch/arm64/lib/mte-kasan.S b/arch/arm64/lib/mte-kasan.S
new file mode 100644
index 000000000000..9f6975e2af60
--- /dev/null
+++ b/arch/arm64/lib/mte-kasan.S
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Google Inc.
+ */
+#include <linux/const.h>
+#include <linux/linkage.h>
+
+#include <asm/mte-def.h>
+
+ .arch armv8.5-a+memtag
+
+ .macro __set_mem_tag_range, stg, gva, start, size, linesize, tmp1, tmp2, tmp3
+ add \tmp3, \start, \size
+ cmp \size, \linesize, lsl #1
+ b.lt .Lsmtr3_\@
+
+ sub \tmp1, \linesize, #1
+ bic \tmp2, \tmp3, \tmp1
+ orr \tmp1, \start, \tmp1
+
+.Lsmtr1_\@:
+ \stg \start, [\start], #MTE_GRANULE_SIZE
+ cmp \start, \tmp1
+ b.lt .Lsmtr1_\@
+
+.Lsmtr2_\@:
+ dc \gva, \start
+ add \start, \start, \linesize
+ cmp \start, \tmp2
+ b.lt .Lsmtr2_\@
+
+.Lsmtr3_\@:
+ cmp \start, \tmp3
+ b.ge .Lsmtr4_\@
+ \stg \start, [\start], #MTE_GRANULE_SIZE
+ b .Lsmtr3_\@
+.Lsmtr4_\@:
+ .endm
+
+/*
+ * Assign allocation tags for a region of memory based on the pointer tag.
+ * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
+ * size must be non-zero and MTE_GRANULE_SIZE aligned.
+ * x0 - start address
+ * x1 - region size
+ * x2 - tag
+ * x3 - bool init
+ */
+SYM_FUNC_START(mte_set_mem_tag_range)
+ mrs x4, dczid_el0
+ and w4, w4, #0xf
+ mov x5, #4
+ lsl x4, x5, x4
+
+ bfi x0, x2, #56, #8
+
+ cbz x3, .Lnoinit
+ __set_mem_tag_range stzg, gzva, x0, x1, x4, x2, x3, x5
+ ret
+.Lnoinit:
+ __set_mem_tag_range stg, gva, x0, x1, x4, x2, x3, x5
+ ret
+SYM_FUNC_END(mte_set_mem_tag_range)
--
2.31.1.751.gd2f1c929bd-goog