[PATCH] arm64: Add support for new control bits CTR_EL0.IDC and CTR_EL0.IDC

From: Shanker Donthineni
Date: Fri Feb 16 2018 - 19:58:05 EST


Two point of unification cache maintenance operations 'DC CVAU' and
'IC IVAU' are optional for implementors as per ARMv8 specification.
This patch parses the updated CTR_EL0 register definition and adds
the required changes to skip POU operations if the hardware reports
CTR_EL0.IDC and/or CTR_EL0.IDC.

CTR_EL0.DIC: Instruction cache invalidation requirements for
instruction to data coherence. The meaning of this bit[29].
0: Instruction cache invalidation to the point of unification
is required for instruction to data coherence.
1: Instruction cache cleaning to the point of unification is
not required for instruction to data coherence.

CTR_EL0.IDC: Data cache clean requirements for instruction to data
coherence. The meaning of this bit[28].
0: Data cache clean to the point of unification is required for
instruction to data coherence, unless CLIDR_EL1.LoC == 0b000
or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000).
1: Data cache clean to the point of unification is not required
for instruction to data coherence.

Signed-off-by: Philip Elcan <pelcan@xxxxxxxxxxxxxx>
Signed-off-by: Shanker Donthineni <shankerd@xxxxxxxxxxxxxx>
---
arch/arm64/include/asm/assembler.h | 48 ++++++++++++++++++++++++--------------
arch/arm64/include/asm/cache.h | 2 ++
arch/arm64/kernel/cpufeature.c | 2 ++
arch/arm64/mm/cache.S | 26 ++++++++++++++-------
4 files changed, 51 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3c78835..9eaa948 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -30,6 +30,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
+#include <asm/cache.h>

.macro save_and_disable_daif, flags
mrs \flags, daif
@@ -334,9 +335,9 @@
* raw_dcache_line_size - get the minimum D-cache line size on this CPU
* from the CTR register.
*/
- .macro raw_dcache_line_size, reg, tmp
- mrs \tmp, ctr_el0 // read CTR
- ubfm \tmp, \tmp, #16, #19 // cache line size encoding
+ .macro raw_dcache_line_size, reg, tmp, ctr
+ mrs \ctr, ctr_el0 // read CTR
+ ubfm \tmp, \ctr, #16, #19 // cache line size encoding
mov \reg, #4 // bytes per word
lsl \reg, \reg, \tmp // actual cache line size
.endm
@@ -344,9 +345,9 @@
/*
* dcache_line_size - get the safe D-cache line size across all CPUs
*/
- .macro dcache_line_size, reg, tmp
- read_ctr \tmp
- ubfm \tmp, \tmp, #16, #19 // cache line size encoding
+ .macro dcache_line_size, reg, tmp, ctr
+ read_ctr \ctr
+ ubfm \tmp, \ctr, #16, #19 // cache line size encoding
mov \reg, #4 // bytes per word
lsl \reg, \reg, \tmp // actual cache line size
.endm
@@ -355,9 +356,9 @@
* raw_icache_line_size - get the minimum I-cache line size on this CPU
* from the CTR register.
*/
- .macro raw_icache_line_size, reg, tmp
- mrs \tmp, ctr_el0 // read CTR
- and \tmp, \tmp, #0xf // cache line size encoding
+ .macro raw_icache_line_size, reg, tmp, ctr
+ mrs \ctr, ctr_el0 // read CTR
+ and \tmp, \ctr, #0xf // cache line size encoding
mov \reg, #4 // bytes per word
lsl \reg, \reg, \tmp // actual cache line size
.endm
@@ -365,9 +366,9 @@
/*
* icache_line_size - get the safe I-cache line size across all CPUs
*/
- .macro icache_line_size, reg, tmp
- read_ctr \tmp
- and \tmp, \tmp, #0xf // cache line size encoding
+ .macro icache_line_size, reg, tmp, ctr
+ read_ctr \ctr
+ and \tmp, \ctr, #0xf // cache line size encoding
mov \reg, #4 // bytes per word
lsl \reg, \reg, \tmp // actual cache line size
.endm
@@ -408,13 +409,21 @@
* size: size of the region
* Corrupts: kaddr, size, tmp1, tmp2
*/
- .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
- dcache_line_size \tmp1, \tmp2
+ .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2, tmp3
+ dcache_line_size \tmp1, \tmp2, \tmp3
add \size, \kaddr, \size
sub \tmp2, \tmp1, #1
bic \kaddr, \kaddr, \tmp2
9998:
- .if (\op == cvau || \op == cvac)
+ .if (\op == cvau)
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ tbnz \tmp3, #CTR_IDC_SHIFT, 9997f
+ dc cvau, \kaddr
+alternative_else
+ dc civac, \kaddr
+ nop
+alternative_endif
+ .elseif (\op == cvac)
alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
dc \op, \kaddr
alternative_else
@@ -433,6 +442,7 @@
cmp \kaddr, \size
b.lo 9998b
dsb \domain
+9997:
.endm

/*
@@ -441,10 +451,11 @@
*
* start, end: virtual addresses describing the region
* label: A label to branch to on user fault.
- * Corrupts: tmp1, tmp2
+ * Corrupts: tmp1, tmp2, tmp3
*/
- .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
- icache_line_size \tmp1, \tmp2
+ .macro invalidate_icache_by_line start, end, tmp1, tmp2, tmp3, label
+ icache_line_size \tmp1, \tmp2, \tmp3
+ tbnz \tmp3, #CTR_DIC_SHIFT, 9996f
sub \tmp2, \tmp1, #1
bic \tmp2, \start, \tmp2
9997:
@@ -454,6 +465,7 @@
b.lo 9997b
dsb ish
isb
+9996:
.endm

/*
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index ea9bb4e..aea533b 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -22,6 +22,8 @@
#define CTR_L1IP_MASK 3
#define CTR_CWG_SHIFT 24
#define CTR_CWG_MASK 15
+#define CTR_IDC_SHIFT 28
+#define CTR_DIC_SHIFT 29

#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 29b1f87..f42bb5a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -200,6 +200,8 @@ static int __init register_cpu_hwcaps_dumper(void)

static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 0), /* DIC */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 0), /* IDC */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 758bde7..5764af8 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -24,6 +24,7 @@
#include <asm/cpufeature.h>
#include <asm/alternative.h>
#include <asm/asm-uaccess.h>
+#include <asm/cache.h>

/*
* flush_icache_range(start,end)
@@ -50,7 +51,12 @@ ENTRY(flush_icache_range)
*/
ENTRY(__flush_cache_user_range)
uaccess_ttbr0_enable x2, x3, x4
- dcache_line_size x2, x3
+ dcache_line_size x2, x3, x4
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ tbnz x4, #CTR_IDC_SHIFT, 8f
+alternative_else
+ nop
+alternative_endif
sub x3, x2, #1
bic x4, x0, x3
1:
@@ -60,7 +66,9 @@ user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
b.lo 1b
dsb ish

- invalidate_icache_by_line x0, x1, x2, x3, 9f
+8:
+ invalidate_icache_by_line x0, x1, x2, x3, x4, 9f
+
mov x0, #0
1:
uaccess_ttbr0_disable x1, x2
@@ -82,7 +90,7 @@ ENDPROC(__flush_cache_user_range)
ENTRY(invalidate_icache_range)
uaccess_ttbr0_enable x2, x3, x4

- invalidate_icache_by_line x0, x1, x2, x3, 2f
+ invalidate_icache_by_line x0, x1, x2, x3, x4, 2f
mov x0, xzr
1:
uaccess_ttbr0_disable x1, x2
@@ -102,7 +110,7 @@ ENDPROC(invalidate_icache_range)
* - size - size in question
*/
ENTRY(__flush_dcache_area)
- dcache_by_line_op civac, sy, x0, x1, x2, x3
+ dcache_by_line_op civac, sy, x0, x1, x2, x3, x4
ret
ENDPIPROC(__flush_dcache_area)

@@ -116,7 +124,7 @@ ENDPIPROC(__flush_dcache_area)
* - size - size in question
*/
ENTRY(__clean_dcache_area_pou)
- dcache_by_line_op cvau, ish, x0, x1, x2, x3
+ dcache_by_line_op cvau, ish, x0, x1, x2, x3, x4
ret
ENDPROC(__clean_dcache_area_pou)

@@ -140,7 +148,7 @@ ENTRY(__inval_dcache_area)
*/
__dma_inv_area:
add x1, x1, x0
- dcache_line_size x2, x3
+ dcache_line_size x2, x3, x4
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
bic x1, x1, x3
@@ -178,7 +186,7 @@ ENTRY(__clean_dcache_area_poc)
* - size - size in question
*/
__dma_clean_area:
- dcache_by_line_op cvac, sy, x0, x1, x2, x3
+ dcache_by_line_op cvac, sy, x0, x1, x2, x3, x4
ret
ENDPIPROC(__clean_dcache_area_poc)
ENDPROC(__dma_clean_area)
@@ -193,7 +201,7 @@ ENDPROC(__dma_clean_area)
* - size - size in question
*/
ENTRY(__clean_dcache_area_pop)
- dcache_by_line_op cvap, sy, x0, x1, x2, x3
+ dcache_by_line_op cvap, sy, x0, x1, x2, x3, x4
ret
ENDPIPROC(__clean_dcache_area_pop)

@@ -206,7 +214,7 @@ ENDPIPROC(__clean_dcache_area_pop)
* - size - size in question
*/
ENTRY(__dma_flush_area)
- dcache_by_line_op civac, sy, x0, x1, x2, x3
+ dcache_by_line_op civac, sy, x0, x1, x2, x3, x4
ret
ENDPIPROC(__dma_flush_area)

--
Qualcomm Datacenter Technologies, Inc. on behalf of the Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.