[PATCH 19/23] [AARCH64] delouse input arguments in system functions
From: Yury Norov
Date: Tue Jun 28 2016 - 12:44:58 EST
This patch introduces DELOUSE() macro which does nothing
for lp64, and clears top bits of registers holding inputs,
where needed.
AARCH64/ILP32 needs it because top bits condition is undefined
according to ABI.
'Delouse' term comes from Linux kernel where similar macro does
the same for compat syscall wrapper.
Signed-off-by: Yury Norov <ynorov@xxxxxxxxxxxxxxxxxx>
---
sysdeps/aarch64/__longjmp.S | 2 ++
sysdeps/aarch64/dl-tlsdesc.S | 8 ++++++++
sysdeps/aarch64/memcmp.S | 3 +++
sysdeps/aarch64/memcpy.S | 8 ++++++++
sysdeps/aarch64/memset.S | 3 +++
sysdeps/aarch64/setjmp.S | 1 +
sysdeps/aarch64/strchr.S | 1 +
sysdeps/aarch64/strchrnul.S | 1 +
sysdeps/aarch64/strcmp.S | 2 ++
sysdeps/aarch64/strcpy.S | 2 ++
sysdeps/aarch64/strlen.S | 2 ++
sysdeps/aarch64/strncmp.S | 3 +++
sysdeps/aarch64/strnlen.S | 3 +++
sysdeps/aarch64/strrchr.S | 1 +
sysdeps/aarch64/sysdep.h | 2 ++
sysdeps/unix/sysv/linux/aarch64/clone.S | 7 +++++++
sysdeps/unix/sysv/linux/aarch64/getcontext.S | 1 +
sysdeps/unix/sysv/linux/aarch64/setcontext.S | 1 +
sysdeps/unix/sysv/linux/aarch64/swapcontext.S | 1 +
19 files changed, 52 insertions(+)
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index 58332be..0377715 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -46,6 +46,8 @@ ENTRY (__longjmp)
cfi_offset(d14, JB_D14<<3)
cfi_offset(d15, JB_D15<<3)
+ DELOUSE(0)
+
ldp x19, x20, [x0, #JB_X19<<3]
ldp x21, x22, [x0, #JB_X21<<3]
ldp x23, x24, [x0, #JB_X23<<3]
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 09cd158..24a110e 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -74,6 +74,7 @@
cfi_startproc
.align 2
_dl_tlsdesc_return:
+ DELOUSE(0)
ldr PTR_REG (0), [x0, #PTR_SIZE]
RET
cfi_endproc
@@ -96,6 +97,7 @@ _dl_tlsdesc_return_lazy:
td->entry) and thus it synchronizes with the release store to
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
+ DELOUSE(0)
ldar PTR_REG (zr), [x0]
ldr PTR_REG (0), [x0, #PTR_SIZE]
RET
@@ -125,6 +127,7 @@ _dl_tlsdesc_undefweak:
td->entry) and thus it synchronizes with the release store to
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
from [x0,#8] here happens after the initialization of td->arg. */
+ DELOUSE(0)
ldar PTR_REG (zr), [x0]
ldr PTR_REG (0), [x0, #PTR_SIZE]
mrs x1, tpidr_el0
@@ -174,6 +177,7 @@ _dl_tlsdesc_dynamic:
stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
mov x29, sp
+ DELOUSE(0)
/* Save just enough registers to support fast path, if we fall
into slow path we will save additional registers. */
@@ -188,6 +192,7 @@ _dl_tlsdesc_dynamic:
td->entry) and thus it synchronizes with the release store to
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
+ DELOUSE(0)
ldar PTR_REG (zr), [x0]
ldr PTR_REG (1), [x0,#PTR_SIZE]
ldr PTR_REG (0), [x4]
@@ -279,12 +284,14 @@ _dl_tlsdesc_resolve_rela:
SAVE_Q_REGISTERS
+ DELOUSE(3)
ldr PTR_REG (1), [x3, #PTR_SIZE]
bl _dl_tlsdesc_resolve_rela_fixup
RESTORE_Q_REGISTERS
ldr x0, [sp, #32+16*8]
+ DELOUSE(0)
ldr PTR_REG (1), [x0]
blr x1
@@ -346,6 +353,7 @@ _dl_tlsdesc_resolve_hold:
RESTORE_Q_REGISTERS
ldr x0, [sp, #32+16*9]
+ DELOUSE(0)
ldr PTR_REG (1), [x0]
blr x1
diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S
index ae2d997..982aa02 100644
--- a/sysdeps/aarch64/memcmp.S
+++ b/sysdeps/aarch64/memcmp.S
@@ -47,6 +47,9 @@
#define mask x13
ENTRY_ALIGN (memcmp, 6)
+ DELOUSE(0)
+ DELOUSE(1)
+ DELOUSE(2)
cbz limit, L(ret0)
eor tmp1, src1, src2
tst tmp1, #7
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
index de73f0f..38a29d6 100644
--- a/sysdeps/aarch64/memcpy.S
+++ b/sysdeps/aarch64/memcpy.S
@@ -61,6 +61,10 @@
ENTRY_ALIGN (memmove, 6)
+ DELOUSE(0)
+ DELOUSE(1)
+ DELOUSE(2)
+
sub tmp1, dstin, src
cmp count, 96
ccmp tmp1, count, 2, hi
@@ -71,6 +75,10 @@ END (memmove)
libc_hidden_builtin_def (memmove)
ENTRY (memcpy)
+ DELOUSE(0)
+ DELOUSE(1)
+ DELOUSE(2)
+
prfm PLDL1KEEP, [src]
add srcend, src, count
add dstend, dstin, count
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index 4d222c5..1cad5c6 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -39,6 +39,9 @@
ENTRY_ALIGN (__memset, 6)
+ DELOUSE(0)
+ DELOUSE(2)
+
dup v0.16B, valw
add dstend, dstin, count
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index da83f19..d608660 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -33,6 +33,7 @@ END (_setjmp)
libc_hidden_def (_setjmp)
ENTRY (__sigsetjmp)
+ DELOUSE(0)
1:
stp x19, x20, [x0, #JB_X19<<3]
diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S
index 5e3aecf..838384c 100644
--- a/sysdeps/aarch64/strchr.S
+++ b/sysdeps/aarch64/strchr.S
@@ -62,6 +62,7 @@
/* Locals and temporaries. */
ENTRY (strchr)
+ DELOUSE(0)
mov wtmp2, #0x0401
movk wtmp2, #0x4010, lsl #16
dup vrepchr.16b, chrin
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
index a624c8d..b60df26 100644
--- a/sysdeps/aarch64/strchrnul.S
+++ b/sysdeps/aarch64/strchrnul.S
@@ -60,6 +60,7 @@
identify exactly which byte is causing the termination. */
ENTRY (__strchrnul)
+ DELOUSE(0)
/* Magic constant 0x40100401 to allow us to identify which lane
matches the termination condition. */
mov wtmp2, #0x0401
diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S
index ba0ccb4..ccfe281 100644
--- a/sysdeps/aarch64/strcmp.S
+++ b/sysdeps/aarch64/strcmp.S
@@ -49,6 +49,8 @@
/* Start of performance-critical section -- one 64B cache line. */
ENTRY_ALIGN(strcmp, 6)
+ DELOUSE(0)
+ DELOUSE(1)
eor tmp1, src1, src2
mov zeroones, #REP8_01
tst tmp1, #7
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 0694199..2a281b9 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -91,6 +91,8 @@
#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
ENTRY_ALIGN (STRCPY, 6)
+ DELOUSE(0)
+ DELOUSE(1)
/* For moderately short strings, the fastest way to do the copy is to
calculate the length of the string in the same way as strlen, then
essentially do a memcpy of the result. This avoids the need for
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
index a07834b..d1df0d1 100644
--- a/sysdeps/aarch64/strlen.S
+++ b/sysdeps/aarch64/strlen.S
@@ -85,6 +85,8 @@
boundary. */
ENTRY_ALIGN (__strlen, 6)
+ DELOUSE(0)
+ DELOUSE(1)
and tmp1, srcin, MIN_PAGE_SIZE - 1
mov zeroones, REP8_01
cmp tmp1, MIN_PAGE_SIZE - 16
diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S
index f6a17fd..a372654 100644
--- a/sysdeps/aarch64/strncmp.S
+++ b/sysdeps/aarch64/strncmp.S
@@ -51,6 +51,9 @@
#define endloop x15
ENTRY_ALIGN_AND_PAD (strncmp, 6, 7)
+ DELOUSE(0)
+ DELOUSE(1)
+ DELOUSE(2)
cbz limit, L(ret0)
eor tmp1, src1, src2
mov zeroones, #REP8_01
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index 4cce45f..6f67221 100644
--- a/sysdeps/aarch64/strnlen.S
+++ b/sysdeps/aarch64/strnlen.S
@@ -50,6 +50,9 @@
#define REP8_80 0x8080808080808080
ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9)
+ DELOUSE(0)
+ DELOUSE(1)
+ DELOUSE(2)
cbz limit, L(hit_limit)
mov zeroones, #REP8_01
bic src, srcin, #15
diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S
index 44c1917..bb85a60 100644
--- a/sysdeps/aarch64/strrchr.S
+++ b/sysdeps/aarch64/strrchr.S
@@ -68,6 +68,7 @@
identify exactly which byte is causing the termination, and why. */
ENTRY(strrchr)
+ DELOUSE(0)
cbz x1, L(null_search)
/* Magic constant 0x40100401 to allow us to identify which lane
matches the requested byte. Magic constant 0x80200802 used
diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
index 4ce0945..c5de4de 100644
--- a/sysdeps/aarch64/sysdep.h
+++ b/sysdeps/aarch64/sysdep.h
@@ -25,10 +25,12 @@
# define AARCH64_R(NAME) R_AARCH64_ ## NAME
# define PTR_REG(n) x##n
# define PTR_LOG_SIZE 3
+# define DELOUSE(n)
#else
# define AARCH64_R(NAME) R_AARCH64_P32_ ## NAME
# define PTR_REG(n) w##n
# define PTR_LOG_SIZE 2
+# define DELOUSE(n) mov w##n, w##n
#endif
#define PTR_SIZE (1<<PTR_LOG_SIZE)
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S
index 76baa7a..eff6633 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone.S
@@ -39,6 +39,13 @@
*/
.text
ENTRY(__clone)
+ DELOUSE(0)
+ DELOUSE(1)
+ DELOUSE(2)
+ DELOUSE(3)
+ DELOUSE(4)
+ DELOUSE(5)
+ DELOUSE(6)
/* Save args for the child. */
mov x10, x0
mov x11, x2
diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
index 71e526c..35ff326 100644
--- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
@@ -30,6 +30,7 @@
.text
ENTRY(__getcontext)
+ DELOUSE(0)
/* The saved context will return to the getcontext() call point
with a return value of 0 */
str xzr, [x0, oX0 + 0 * SZREG]
diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
index d17f8c8..7d854bd 100644
--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
@@ -34,6 +34,7 @@
.text
ENTRY (__setcontext)
+ DELOUSE(0)
/* Save a copy of UCP. */
mov x9, x0
diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
index c1a16f3..764fedc 100644
--- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
@@ -27,6 +27,7 @@
.text
ENTRY(__swapcontext)
+ DELOUSE(0)
/* Set the value returned when swapcontext() returns in this context. */
str xzr, [x0, oX0 + 0 * SZREG]
--
2.7.4