[RFC PATCH v3 2/4] objtool: x86 instruction decoder and big endian cross compiles

From: Vasily Gorbik
Date: Wed Sep 30 2020 - 18:17:54 EST


From: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>

Currently objtool seems to be the only tool from build tools needed
which breaks x86 cross compilation on big endian systems. Make the x86
instruction decoder of the objtool usable on big endian machines.

Signed-off-by: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
Co-developed-by: Vasily Gorbik <gor@xxxxxxxxxxxxx>
Signed-off-by: Vasily Gorbik <gor@xxxxxxxxxxxxx>
---
arch/x86/include/asm/insn.h | 35 ++++++++++
arch/x86/lib/insn.c | 108 +++++++++++++++---------------
tools/arch/x86/include/asm/insn.h | 35 ++++++++++
tools/arch/x86/lib/insn.c | 108 +++++++++++++++---------------
4 files changed, 178 insertions(+), 108 deletions(-)

diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 5c1ae3eff9d4..bff79b22da54 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -7,9 +7,12 @@
* Copyright (C) IBM Corporation, 2009
*/

+#include <asm/byteorder.h>
/* insn_attr_t is defined in inat.h */
#include <asm/inat.h>

+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+
struct insn_field {
union {
insn_value_t value;
@@ -20,6 +23,38 @@ struct insn_field {
unsigned char nbytes;
};

+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+ p->nbytes = n;
+}
+
+#else
+
+struct insn_field {
+ insn_value_t value;
+ union {
+ insn_value_t little;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+#ifndef __KERNEL__
+ p->little = __bswap_32(v);
+#endif
+ p->nbytes = n;
+}
+
+#endif
+
struct insn {
struct insn_field prefixes; /*
* Prefixes
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 404279563891..bbd4a5f15d83 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -5,6 +5,7 @@
* Copyright (C) IBM Corporation, 2002, 2004, 2009
*/

+#include <linux/kernel.h>
#ifdef __KERNEL__
#include <linux/string.h>
#else
@@ -15,15 +16,35 @@

#include <asm/emulate_prefix.h>

+#define leXX_to_cpu(t, r) \
+({ \
+ __typeof__(t) v; \
+ switch (sizeof(t)) { \
+ case 4: \
+ v = le32_to_cpu(r); \
+ break; \
+ case 2: \
+ v = le16_to_cpu(r); \
+ break; \
+ case 1: \
+ v = r; \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ break; \
+ } \
+ v; \
+})
+
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)

#define __get_next(t, insn) \
- ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })

#define __peek_nbyte_next(t, insn, n) \
- ({ t r = *(t*)((insn)->next_byte + n); r; })
+ ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })

#define get_next(t, insn) \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
@@ -157,8 +178,7 @@ void insn_get_prefixes(struct insn *insn)
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
if (inat_is_rex_prefix(attr)) {
- insn->rex_prefix.value = b;
- insn->rex_prefix.nbytes = 1;
+ insn_field_set(&insn->rex_prefix, b, 1);
insn->next_byte++;
if (X86_REX_W(b))
/* REX.W overrides opnd_size */
@@ -295,8 +315,7 @@ void insn_get_modrm(struct insn *insn)

if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
- modrm->value = mod;
- modrm->nbytes = 1;
+ insn_field_set(modrm, mod, 1);
if (inat_is_group(insn->attr)) {
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
@@ -334,7 +353,7 @@ int insn_rip_relative(struct insn *insn)
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
*/
- return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+ return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5);
}

/**
@@ -353,11 +372,11 @@ void insn_get_sib(struct insn *insn)
if (!insn->modrm.got)
insn_get_modrm(insn);
if (insn->modrm.nbytes) {
- modrm = (insn_byte_t)insn->modrm.value;
+ modrm = insn->modrm.bytes[0];
if (insn->addr_bytes != 2 &&
X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
- insn->sib.value = get_next(insn_byte_t, insn);
- insn->sib.nbytes = 1;
+ insn_field_set(&insn->sib,
+ get_next(insn_byte_t, insn), 1);
}
}
insn->sib.got = 1;
@@ -407,19 +426,18 @@ void insn_get_displacement(struct insn *insn)
if (mod == 3)
goto out;
if (mod == 1) {
- insn->displacement.value = get_next(signed char, insn);
- insn->displacement.nbytes = 1;
+ insn_field_set(&insn->displacement,
+ get_next(signed char, insn), 1);
} else if (insn->addr_bytes == 2) {
if ((mod == 0 && rm == 6) || mod == 2) {
- insn->displacement.value =
- get_next(short, insn);
- insn->displacement.nbytes = 2;
+ insn_field_set(&insn->displacement,
+ get_next(short, insn), 2);
}
} else {
if ((mod == 0 && rm == 5) || mod == 2 ||
(mod == 0 && base == 5)) {
- insn->displacement.value = get_next(int, insn);
- insn->displacement.nbytes = 4;
+ insn_field_set(&insn->displacement,
+ get_next(int, insn), 4);
}
}
}
@@ -435,18 +453,14 @@ static int __get_moffset(struct insn *insn)
{
switch (insn->addr_bytes) {
case 2:
- insn->moffset1.value = get_next(short, insn);
- insn->moffset1.nbytes = 2;
+ insn_field_set(&insn->moffset1, get_next(short, insn), 2);
break;
case 4:
- insn->moffset1.value = get_next(int, insn);
- insn->moffset1.nbytes = 4;
+ insn_field_set(&insn->moffset1, get_next(int, insn), 4);
break;
case 8:
- insn->moffset1.value = get_next(int, insn);
- insn->moffset1.nbytes = 4;
- insn->moffset2.value = get_next(int, insn);
- insn->moffset2.nbytes = 4;
+ insn_field_set(&insn->moffset1, get_next(int, insn), 4);
+ insn_field_set(&insn->moffset2, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -464,13 +478,11 @@ static int __get_immv32(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate.value = get_next(short, insn);
- insn->immediate.nbytes = 2;
+ insn_field_set(&insn->immediate, get_next(short, insn), 2);
break;
case 4:
case 8:
- insn->immediate.value = get_next(int, insn);
- insn->immediate.nbytes = 4;
+ insn_field_set(&insn->immediate, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -487,18 +499,15 @@ static int __get_immv(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate1.value = get_next(short, insn);
- insn->immediate1.nbytes = 2;
+ insn_field_set(&insn->immediate1, get_next(short, insn), 2);
break;
case 4:
- insn->immediate1.value = get_next(int, insn);
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
insn->immediate1.nbytes = 4;
break;
case 8:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
- insn->immediate2.value = get_next(int, insn);
- insn->immediate2.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+ insn_field_set(&insn->immediate2, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -515,12 +524,10 @@ static int __get_immptr(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate1.value = get_next(short, insn);
- insn->immediate1.nbytes = 2;
+ insn_field_set(&insn->immediate1, get_next(short, insn), 2);
break;
case 4:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
break;
case 8:
/* ptr16:64 is not exist (no segment) */
@@ -528,8 +535,7 @@ static int __get_immptr(struct insn *insn)
default: /* opnd_bytes must be modified manually */
goto err_out;
}
- insn->immediate2.value = get_next(unsigned short, insn);
- insn->immediate2.nbytes = 2;
+ insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2);
insn->immediate1.got = insn->immediate2.got = 1;

return 1;
@@ -565,22 +571,17 @@ void insn_get_immediate(struct insn *insn)

switch (inat_immediate_size(insn->attr)) {
case INAT_IMM_BYTE:
- insn->immediate.value = get_next(signed char, insn);
- insn->immediate.nbytes = 1;
+ insn_field_set(&insn->immediate, get_next(signed char, insn), 1);
break;
case INAT_IMM_WORD:
- insn->immediate.value = get_next(short, insn);
- insn->immediate.nbytes = 2;
+ insn_field_set(&insn->immediate, get_next(short, insn), 2);
break;
case INAT_IMM_DWORD:
- insn->immediate.value = get_next(int, insn);
- insn->immediate.nbytes = 4;
+ insn_field_set(&insn->immediate, get_next(int, insn), 4);
break;
case INAT_IMM_QWORD:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
- insn->immediate2.value = get_next(int, insn);
- insn->immediate2.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+ insn_field_set(&insn->immediate2, get_next(int, insn), 4);
break;
case INAT_IMM_PTR:
if (!__get_immptr(insn))
@@ -599,8 +600,7 @@ void insn_get_immediate(struct insn *insn)
goto err_out;
}
if (inat_has_second_immediate(insn->attr)) {
- insn->immediate2.value = get_next(signed char, insn);
- insn->immediate2.nbytes = 1;
+ insn_field_set(&insn->immediate2, get_next(signed char, insn), 1);
}
done:
insn->immediate.got = 1;
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 568854b14d0a..a5d33602e9d7 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -7,9 +7,12 @@
* Copyright (C) IBM Corporation, 2009
*/

+#include <asm/byteorder.h>
/* insn_attr_t is defined in inat.h */
#include "inat.h"

+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+
struct insn_field {
union {
insn_value_t value;
@@ -20,6 +23,38 @@ struct insn_field {
unsigned char nbytes;
};

+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+ p->nbytes = n;
+}
+
+#else
+
+struct insn_field {
+ insn_value_t value;
+ union {
+ insn_value_t little;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+#ifndef __KERNEL__
+ p->little = __bswap_32(v);
+#endif
+ p->nbytes = n;
+}
+
+#endif
+
struct insn {
struct insn_field prefixes; /*
* Prefixes
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index 0151dfc6da61..def6277d56c0 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -5,6 +5,7 @@
* Copyright (C) IBM Corporation, 2002, 2004, 2009
*/

+#include <linux/kernel.h>
#ifdef __KERNEL__
#include <linux/string.h>
#else
@@ -15,15 +16,35 @@

#include "../include/asm/emulate_prefix.h"

+#define leXX_to_cpu(t, r) \
+({ \
+ __typeof__(t) v; \
+ switch (sizeof(t)) { \
+ case 4: \
+ v = le32_to_cpu(r); \
+ break; \
+ case 2: \
+ v = le16_to_cpu(r); \
+ break; \
+ case 1: \
+ v = r; \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ break; \
+ } \
+ v; \
+})
+
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)

#define __get_next(t, insn) \
- ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })

#define __peek_nbyte_next(t, insn, n) \
- ({ t r = *(t*)((insn)->next_byte + n); r; })
+ ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })

#define get_next(t, insn) \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
@@ -157,8 +178,7 @@ void insn_get_prefixes(struct insn *insn)
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
if (inat_is_rex_prefix(attr)) {
- insn->rex_prefix.value = b;
- insn->rex_prefix.nbytes = 1;
+ insn_field_set(&insn->rex_prefix, b, 1);
insn->next_byte++;
if (X86_REX_W(b))
/* REX.W overrides opnd_size */
@@ -295,8 +315,7 @@ void insn_get_modrm(struct insn *insn)

if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
- modrm->value = mod;
- modrm->nbytes = 1;
+ insn_field_set(modrm, mod, 1);
if (inat_is_group(insn->attr)) {
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
@@ -334,7 +353,7 @@ int insn_rip_relative(struct insn *insn)
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
*/
- return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+ return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5);
}

/**
@@ -353,11 +372,11 @@ void insn_get_sib(struct insn *insn)
if (!insn->modrm.got)
insn_get_modrm(insn);
if (insn->modrm.nbytes) {
- modrm = (insn_byte_t)insn->modrm.value;
+ modrm = insn->modrm.bytes[0];
if (insn->addr_bytes != 2 &&
X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
- insn->sib.value = get_next(insn_byte_t, insn);
- insn->sib.nbytes = 1;
+ insn_field_set(&insn->sib,
+ get_next(insn_byte_t, insn), 1);
}
}
insn->sib.got = 1;
@@ -407,19 +426,18 @@ void insn_get_displacement(struct insn *insn)
if (mod == 3)
goto out;
if (mod == 1) {
- insn->displacement.value = get_next(signed char, insn);
- insn->displacement.nbytes = 1;
+ insn_field_set(&insn->displacement,
+ get_next(signed char, insn), 1);
} else if (insn->addr_bytes == 2) {
if ((mod == 0 && rm == 6) || mod == 2) {
- insn->displacement.value =
- get_next(short, insn);
- insn->displacement.nbytes = 2;
+ insn_field_set(&insn->displacement,
+ get_next(short, insn), 2);
}
} else {
if ((mod == 0 && rm == 5) || mod == 2 ||
(mod == 0 && base == 5)) {
- insn->displacement.value = get_next(int, insn);
- insn->displacement.nbytes = 4;
+ insn_field_set(&insn->displacement,
+ get_next(int, insn), 4);
}
}
}
@@ -435,18 +453,14 @@ static int __get_moffset(struct insn *insn)
{
switch (insn->addr_bytes) {
case 2:
- insn->moffset1.value = get_next(short, insn);
- insn->moffset1.nbytes = 2;
+ insn_field_set(&insn->moffset1, get_next(short, insn), 2);
break;
case 4:
- insn->moffset1.value = get_next(int, insn);
- insn->moffset1.nbytes = 4;
+ insn_field_set(&insn->moffset1, get_next(int, insn), 4);
break;
case 8:
- insn->moffset1.value = get_next(int, insn);
- insn->moffset1.nbytes = 4;
- insn->moffset2.value = get_next(int, insn);
- insn->moffset2.nbytes = 4;
+ insn_field_set(&insn->moffset1, get_next(int, insn), 4);
+ insn_field_set(&insn->moffset2, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -464,13 +478,11 @@ static int __get_immv32(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate.value = get_next(short, insn);
- insn->immediate.nbytes = 2;
+ insn_field_set(&insn->immediate, get_next(short, insn), 2);
break;
case 4:
case 8:
- insn->immediate.value = get_next(int, insn);
- insn->immediate.nbytes = 4;
+ insn_field_set(&insn->immediate, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -487,18 +499,15 @@ static int __get_immv(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate1.value = get_next(short, insn);
- insn->immediate1.nbytes = 2;
+ insn_field_set(&insn->immediate1, get_next(short, insn), 2);
break;
case 4:
- insn->immediate1.value = get_next(int, insn);
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
insn->immediate1.nbytes = 4;
break;
case 8:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
- insn->immediate2.value = get_next(int, insn);
- insn->immediate2.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+ insn_field_set(&insn->immediate2, get_next(int, insn), 4);
break;
default: /* opnd_bytes must be modified manually */
goto err_out;
@@ -515,12 +524,10 @@ static int __get_immptr(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
- insn->immediate1.value = get_next(short, insn);
- insn->immediate1.nbytes = 2;
+ insn_field_set(&insn->immediate1, get_next(short, insn), 2);
break;
case 4:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
break;
case 8:
/* ptr16:64 is not exist (no segment) */
@@ -528,8 +535,7 @@ static int __get_immptr(struct insn *insn)
default: /* opnd_bytes must be modified manually */
goto err_out;
}
- insn->immediate2.value = get_next(unsigned short, insn);
- insn->immediate2.nbytes = 2;
+ insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2);
insn->immediate1.got = insn->immediate2.got = 1;

return 1;
@@ -565,22 +571,17 @@ void insn_get_immediate(struct insn *insn)

switch (inat_immediate_size(insn->attr)) {
case INAT_IMM_BYTE:
- insn->immediate.value = get_next(signed char, insn);
- insn->immediate.nbytes = 1;
+ insn_field_set(&insn->immediate, get_next(signed char, insn), 1);
break;
case INAT_IMM_WORD:
- insn->immediate.value = get_next(short, insn);
- insn->immediate.nbytes = 2;
+ insn_field_set(&insn->immediate, get_next(short, insn), 2);
break;
case INAT_IMM_DWORD:
- insn->immediate.value = get_next(int, insn);
- insn->immediate.nbytes = 4;
+ insn_field_set(&insn->immediate, get_next(int, insn), 4);
break;
case INAT_IMM_QWORD:
- insn->immediate1.value = get_next(int, insn);
- insn->immediate1.nbytes = 4;
- insn->immediate2.value = get_next(int, insn);
- insn->immediate2.nbytes = 4;
+ insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+ insn_field_set(&insn->immediate2, get_next(int, insn), 4);
break;
case INAT_IMM_PTR:
if (!__get_immptr(insn))
@@ -599,8 +600,7 @@ void insn_get_immediate(struct insn *insn)
goto err_out;
}
if (inat_has_second_immediate(insn->attr)) {
- insn->immediate2.value = get_next(signed char, insn);
- insn->immediate2.nbytes = 1;
+ insn_field_set(&insn->immediate2, get_next(signed char, insn), 1);
}
done:
insn->immediate.got = 1;
--
⣿⣿⣿⣿⢋⡀⣀⠹⣿⣿⣿⣿
⣿⣿⣿⣿⠠⣶⡦⠀⣿⣿⣿⣿
⣿⣿⣿⠏⣴⣮⣴⣧⠈⢿⣿⣿
⣿⣿⡏⢰⣿⠖⣠⣿⡆⠈⣿⣿
⣿⢛⣵⣄⠙⣶⣶⡟⣅⣠⠹⣿
⣿⣜⣛⠻⢎⣉⣉⣀⠿⣫⣵⣿