Re: [PATCH v2] Make iwmmxt.S support Clang's integrated assembler
From: Ard Biesheuvel
Date: Sat Nov 07 2020 - 03:29:38 EST
On Sat, 7 Nov 2020 at 01:11, Jian Cai <jiancai@xxxxxxxxxx> wrote:
>
> This patch replaces 6 IWMMXT instructions Clang's integrated assembler
> does not support in iwmmxt.S using macros, while making sure GNU
> assembler still emit the same instructions. This should be easier than
> providing full IWMMXT support in Clang.
>
> "Intel Wireless MMX Technology - Developer Guide - August, 2002" should
> be referenced for the encoding schemes of these extensions.
>
> Link: https://github.com/ClangBuiltLinux/linux/issues/975
>
> Suggested-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
> Suggested-by: Ard Biesheuvel <ardb@xxxxxxxxxx>
> Signed-off-by: Jian Cai <jiancai@xxxxxxxxxx>
Please make sure you test this carefully on BE32, as the instruction
byte order used by .inst is LE IIRC
> ---
> arch/arm/kernel/iwmmxt.S | 89 ++++++++++++++++++++--------------------
> arch/arm/kernel/iwmmxt.h | 47 +++++++++++++++++++++
> 2 files changed, 92 insertions(+), 44 deletions(-)
> create mode 100644 arch/arm/kernel/iwmmxt.h
>
> diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
> index 0dcae787b004..d2b4ac06e4ed 100644
> --- a/arch/arm/kernel/iwmmxt.S
> +++ b/arch/arm/kernel/iwmmxt.S
> @@ -16,6 +16,7 @@
> #include <asm/thread_info.h>
> #include <asm/asm-offsets.h>
> #include <asm/assembler.h>
> +#include "iwmmxt.h"
>
> #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B)
> #define PJ4(code...) code
> @@ -113,33 +114,33 @@ concan_save:
>
> concan_dump:
>
> - wstrw wCSSF, [r1, #MMX_WCSSF]
> - wstrw wCASF, [r1, #MMX_WCASF]
> - wstrw wCGR0, [r1, #MMX_WCGR0]
> - wstrw wCGR1, [r1, #MMX_WCGR1]
> - wstrw wCGR2, [r1, #MMX_WCGR2]
> - wstrw wCGR3, [r1, #MMX_WCGR3]
> + wstrw wCSSF, r1, MMX_WCSSF
> + wstrw wCASF, r1, MMX_WCASF
> + wstrw wCGR0, r1, MMX_WCGR0
> + wstrw wCGR1, r1, MMX_WCGR1
> + wstrw wCGR2, r1, MMX_WCGR2
> + wstrw wCGR3, r1, MMX_WCGR3
>
> 1: @ MUP? wRn
> tst r2, #0x2
> beq 2f
>
> - wstrd wR0, [r1, #MMX_WR0]
> - wstrd wR1, [r1, #MMX_WR1]
> - wstrd wR2, [r1, #MMX_WR2]
> - wstrd wR3, [r1, #MMX_WR3]
> - wstrd wR4, [r1, #MMX_WR4]
> - wstrd wR5, [r1, #MMX_WR5]
> - wstrd wR6, [r1, #MMX_WR6]
> - wstrd wR7, [r1, #MMX_WR7]
> - wstrd wR8, [r1, #MMX_WR8]
> - wstrd wR9, [r1, #MMX_WR9]
> - wstrd wR10, [r1, #MMX_WR10]
> - wstrd wR11, [r1, #MMX_WR11]
> - wstrd wR12, [r1, #MMX_WR12]
> - wstrd wR13, [r1, #MMX_WR13]
> - wstrd wR14, [r1, #MMX_WR14]
> - wstrd wR15, [r1, #MMX_WR15]
> + wstrd wR0, r1, MMX_WR0
> + wstrd wR1, r1, MMX_WR1
> + wstrd wR2, r1, MMX_WR2
> + wstrd wR3, r1, MMX_WR3
> + wstrd wR4, r1, MMX_WR4
> + wstrd wR5, r1, MMX_WR5
> + wstrd wR6, r1, MMX_WR6
> + wstrd wR7, r1, MMX_WR7
> + wstrd wR8, r1, MMX_WR8
> + wstrd wR9, r1, MMX_WR9
> + wstrd wR10, r1, MMX_WR10
> + wstrd wR11, r1, MMX_WR11
> + wstrd wR12, r1, MMX_WR12
> + wstrd wR13, r1, MMX_WR13
> + wstrd wR14, r1, MMX_WR14
> + wstrd wR15, r1, MMX_WR15
>
> 2: teq r0, #0 @ anything to load?
> reteq lr @ if not, return
> @@ -147,30 +148,30 @@ concan_dump:
> concan_load:
>
> @ Load wRn
> - wldrd wR0, [r0, #MMX_WR0]
> - wldrd wR1, [r0, #MMX_WR1]
> - wldrd wR2, [r0, #MMX_WR2]
> - wldrd wR3, [r0, #MMX_WR3]
> - wldrd wR4, [r0, #MMX_WR4]
> - wldrd wR5, [r0, #MMX_WR5]
> - wldrd wR6, [r0, #MMX_WR6]
> - wldrd wR7, [r0, #MMX_WR7]
> - wldrd wR8, [r0, #MMX_WR8]
> - wldrd wR9, [r0, #MMX_WR9]
> - wldrd wR10, [r0, #MMX_WR10]
> - wldrd wR11, [r0, #MMX_WR11]
> - wldrd wR12, [r0, #MMX_WR12]
> - wldrd wR13, [r0, #MMX_WR13]
> - wldrd wR14, [r0, #MMX_WR14]
> - wldrd wR15, [r0, #MMX_WR15]
> + wldrd wR0, r0, MMX_WR0
> + wldrd wR1, r0, MMX_WR1
> + wldrd wR2, r0, MMX_WR2
> + wldrd wR3, r0, MMX_WR3
> + wldrd wR4, r0, MMX_WR4
> + wldrd wR5, r0, MMX_WR5
> + wldrd wR6, r0, MMX_WR6
> + wldrd wR7, r0, MMX_WR7
> + wldrd wR8, r0, MMX_WR8
> + wldrd wR9, r0, MMX_WR9
> + wldrd wR10, r0, MMX_WR10
> + wldrd wR11, r0, MMX_WR11
> + wldrd wR12, r0, MMX_WR12
> + wldrd wR13, r0, MMX_WR13
> + wldrd wR14, r0, MMX_WR14
> + wldrd wR15, r0, MMX_WR15
>
> @ Load wCx
> - wldrw wCSSF, [r0, #MMX_WCSSF]
> - wldrw wCASF, [r0, #MMX_WCASF]
> - wldrw wCGR0, [r0, #MMX_WCGR0]
> - wldrw wCGR1, [r0, #MMX_WCGR1]
> - wldrw wCGR2, [r0, #MMX_WCGR2]
> - wldrw wCGR3, [r0, #MMX_WCGR3]
> + wldrw wCSSF, r0, MMX_WCSSF
> + wldrw wCASF, r0, MMX_WCASF
> + wldrw wCGR0, r0, MMX_WCGR0
> + wldrw wCGR1, r0, MMX_WCGR1
> + wldrw wCGR2, r0, MMX_WCGR2
> + wldrw wCGR3, r0, MMX_WCGR3
>
> @ clear CUP/MUP (only if r1 != 0)
> teq r1, #0
> diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h
> new file mode 100644
> index 000000000000..fb627286f5bb
> --- /dev/null
> +++ b/arch/arm/kernel/iwmmxt.h
> @@ -0,0 +1,47 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __IWMMXT_H__
> +#define __IWMMXT_H__
> +
> +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
> +.set .LwR\b, \b
> +.set .Lr\b, \b
> +.endr
> +
> +.set .LwCSSF, 0x2
> +.set .LwCASF, 0x3
> +.set .LwCGR0, 0x8
> +.set .LwCGR1, 0x9
> +.set .LwCGR2, 0xa
> +.set .LwCGR3, 0xb
> +
> +.macro wldrd, reg:req, base:req, offset:req
> +.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
> +.endm
> +
> +.macro wldrw, reg:req, base:req, offset:req
> +.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
> +.endm
> +
> +.macro wstrd, reg:req, base:req, offset:req
> +.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
> +.endm
> +
> +.macro wstrw, reg:req, base:req, offset:req
> +.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
> +.endm
> +
> +#ifdef __clang__
> +
> +#define wCon c1
> +
> +.macro tmrc, dest:req, control:req
> +mrc p1, 0, \dest, \control, c0, 0
> +.endm
> +
> +.macro tmcr, control:req, src:req
> +mcr p1, 0, \src, \control, c0, 0
> +.endm
> +#endif
> +
> +#endif
> --
> 2.29.1.341.ge80a0c044ae-goog
>