Re: [PATCH 5.10 017/130] x86/insn: Add an insn_decode() API

From: Florian Fainelli
Date: Tue Jul 12 2022 - 19:04:44 EST


On 7/12/22 11:37, Greg Kroah-Hartman wrote:
From: Borislav Petkov <bp@xxxxxxx>

commit 93281c4a96572a34504244969b938e035204778d upstream.

Users of the instruction decoder should use this to decode instruction
bytes. For that, have insn*() helpers return an int value to denote
success/failure. When there's an error fetching the next insn byte and
the insn falls short, return -ENODATA to denote that.

While at it, make insn_get_opcode() more stricter as to whether what has
seen so far is a valid insn and if not.

Copy linux/kconfig.h for the tools-version of the decoder so that it can
use IS_ENABLED().

Also, cast the INSN_MODE_KERN dummy define value to (enum insn_mode)
for tools use of the decoder because perf tool builds with -Werror and
errors out with -Werror=sign-compare otherwise.

Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Acked-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Link: https://lkml.kernel.org/r/20210304174237.31945-5-bp@xxxxxxxxx
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/include/asm/insn.h | 24 ++--
arch/x86/lib/insn.c | 216 +++++++++++++++++++++++++++++-------
tools/arch/x86/include/asm/insn.h | 24 ++--
tools/arch/x86/lib/insn.c | 222 +++++++++++++++++++++++++++++---------
tools/include/linux/kconfig.h | 73 ++++++++++++
5 files changed, 452 insertions(+), 107 deletions(-)
create mode 100644 tools/include/linux/kconfig.h

--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -87,13 +87,23 @@ struct insn {
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+ INSN_MODE_32,
+ INSN_MODE_64,
+ /* Mode is determined by the current kernel build. */
+ INSN_MODE_KERN,
+ INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
/* Attribute will be determined after getting ModRM (for opcode groups) */
static inline void insn_get_attribute(struct insn *insn)
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -13,6 +13,9 @@
#include <asm/inat.h> /*__ignore_sync_check__ */
#include <asm/insn.h> /* __ignore_sync_check__ */
+#include <linux/errno.h>
+#include <linux/kconfig.h>
+
#include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
/* Verify next sizeof(t) bytes can be on the same instruction */
@@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(stru
* Populates the @insn->prefixes bitmap, and updates @insn->next_byte
* to point to the (first) opcode. No effect if @insn->prefixes.got
* is already set.
+ *
+ * * Returns:
+ * 0: on success
+ * < 0: on error
*/
-void insn_get_prefixes(struct insn *insn)
+int insn_get_prefixes(struct insn *insn)
{
struct insn_field *prefixes = &insn->prefixes;
insn_attr_t attr;
@@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn
int i, nb;
if (prefixes->got)
- return;
+ return 0;
insn_get_emulate_prefix(insn);
@@ -217,8 +224,10 @@ vex_end:
prefixes->got = 1;
+ return 0;
+
err_out:
- return;
+ return -ENODATA;
}
/**
@@ -230,16 +239,25 @@ err_out:
* If necessary, first collects any preceding (prefix) bytes.
* Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
* is already 1.
+ *
+ * Returns:
+ * 0: on success
+ * < 0: on error
*/
-void insn_get_opcode(struct insn *insn)
+int insn_get_opcode(struct insn *insn)
{
struct insn_field *opcode = &insn->opcode;
+ int pfx_id, ret;
insn_byte_t op;
- int pfx_id;
+
if (opcode->got)
- return;
- if (!insn->prefixes.got)
- insn_get_prefixes(insn);
+ return 0;
+
+ if (!insn->prefixes.got) {
+ ret = insn_get_prefixes(insn);
+ if (ret)
+ return ret;
+ }
/* Get first opcode */
op = get_next(insn_byte_t, insn);
@@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn)
insn->attr = inat_get_avx_attribute(op, m, p);
if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
(!inat_accept_vex(insn->attr) &&
- !inat_is_group(insn->attr)))
- insn->attr = 0; /* This instruction is bad */
- goto end; /* VEX has only 1 byte for opcode */
+ !inat_is_group(insn->attr))) {
+ /* This instruction is bad */
+ insn->attr = 0;
+ return -EINVAL;
+ }
+ /* VEX has only 1 byte for opcode */
+ goto end;
}
insn->attr = inat_get_opcode_attribute(op);
@@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn)
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
}
- if (inat_must_vex(insn->attr))
- insn->attr = 0; /* This instruction is bad */
+
+ if (inat_must_vex(insn->attr)) {
+ /* This instruction is bad */
+ insn->attr = 0;
+ return -EINVAL;
+ }
end:
opcode->got = 1;
+ return 0;
err_out:
- return;
+ return -ENODATA;
}
/**
@@ -283,15 +310,25 @@ err_out:
* Populates @insn->modrm and updates @insn->next_byte to point past the
* ModRM byte, if any. If necessary, first collects the preceding bytes
* (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
+ *
+ * Returns:
+ * 0: on success
+ * < 0: on error
*/
-void insn_get_modrm(struct insn *insn)
+int insn_get_modrm(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
insn_byte_t pfx_id, mod;
+ int ret;
+
if (modrm->got)
- return;
- if (!insn->opcode.got)
- insn_get_opcode(insn);
+ return 0;
+
+ if (!insn->opcode.got) {
+ ret = insn_get_opcode(insn);
+ if (ret)
+ return ret;
+ }
if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
@@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn)
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
- insn->attr = 0; /* This is bad */
+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+ /* Bad insn */
+ insn->attr = 0;
+ return -EINVAL;
+ }
}
}
if (insn->x86_64 && inat_is_force64(insn->attr))
insn->opnd_bytes = 8;
+
modrm->got = 1;
+ return 0;
err_out:
- return;
+ return -ENODATA;
}
@@ -325,11 +367,16 @@ err_out:
int insn_rip_relative(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
+ int ret;
if (!insn->x86_64)
return 0;
- if (!modrm->got)
- insn_get_modrm(insn);
+
+ if (!modrm->got) {
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return 0;
+ }
/*
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
@@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn)
*
* If necessary, first collects the instruction up to and including the
* ModRM byte.
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
*/
-void insn_get_sib(struct insn *insn)
+int insn_get_sib(struct insn *insn)
{
insn_byte_t modrm;
+ int ret;
if (insn->sib.got)
- return;
- if (!insn->modrm.got)
- insn_get_modrm(insn);
+ return 0;
+
+ if (!insn->modrm.got) {
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return ret;
+ }
+
if (insn->modrm.nbytes) {
modrm = (insn_byte_t)insn->modrm.value;
if (insn->addr_bytes != 2 &&
@@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn)
}
insn->sib.got = 1;
+ return 0;
+
err_out:
- return;
+ return -ENODATA;
}
@@ -374,15 +433,25 @@ err_out:
* If necessary, first collects the instruction up to and including the
* SIB byte.
* Displacement value is sign-expanded.
+ *
+ * * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
*/
-void insn_get_displacement(struct insn *insn)
+int insn_get_displacement(struct insn *insn)
{
insn_byte_t mod, rm, base;
+ int ret;
if (insn->displacement.got)
- return;
- if (!insn->sib.got)
- insn_get_sib(insn);
+ return 0;
+
+ if (!insn->sib.got) {
+ ret = insn_get_sib(insn);
+ if (ret)
+ return ret;
+ }
+
if (insn->modrm.nbytes) {
/*
* Interpreting the modrm byte:
@@ -425,9 +494,10 @@ void insn_get_displacement(struct insn *
}
out:
insn->displacement.got = 1;
+ return 0;
err_out:
- return;
+ return -ENODATA;
}
/* Decode moffset16/32/64. Return 0 if failed */
@@ -538,20 +608,30 @@ err_out:
}
/**
- * insn_get_immediate() - Get the immediates of instruction
+ * insn_get_immediate() - Get the immediate in an instruction
* @insn: &struct insn containing instruction
*
* If necessary, first collects the instruction up to and including the
* displacement bytes.
* Basically, most of immediates are sign-expanded. Unsigned-value can be
- * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
+ *
+ * Returns:
+ * 0: on success
+ * < 0: on error
*/
-void insn_get_immediate(struct insn *insn)
+int insn_get_immediate(struct insn *insn)
{
+ int ret;
+
if (insn->immediate.got)
- return;
- if (!insn->displacement.got)
- insn_get_displacement(insn);
+ return 0;
+
+ if (!insn->displacement.got) {
+ ret = insn_get_displacement(insn);
+ if (ret)
+ return ret;
+ }
if (inat_has_moffset(insn->attr)) {
if (!__get_moffset(insn))
@@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *ins
}
done:
insn->immediate.got = 1;
+ return 0;
err_out:
- return;
+ return -ENODATA;
}
/**
@@ -615,13 +696,58 @@ err_out:
*
* If necessary, first collects the instruction up to and including the
* immediates bytes.
- */
-void insn_get_length(struct insn *insn)
+ *
+ * Returns:
+ * - 0 on success
+ * - < 0 on error
+*/
+int insn_get_length(struct insn *insn)
{
+ int ret;
+
if (insn->length)
- return;
- if (!insn->immediate.got)
- insn_get_immediate(insn);
+ return 0;
+
+ if (!insn->immediate.got) {
+ ret = insn_get_immediate(insn);
+ if (ret)
+ return ret;
+ }
+
insn->length = (unsigned char)((unsigned long)insn->next_byte
- (unsigned long)insn->kaddr);
+
+ return 0;
+}
+
+/**
+ * insn_decode() - Decode an x86 instruction
+ * @insn: &struct insn to be initialized
+ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len: length of the insn buffer at @kaddr
+ * @m: insn mode, see enum insn_mode
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
+ */
+int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
+{
+ int ret;
+
+/* #define INSN_MODE_KERN -1 __ignore_sync_check__ mode is only valid in the kernel */
+
+ if (m == INSN_MODE_KERN)
+ insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
+ else
+ insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
+
+ ret = insn_get_length(insn);
+ if (ret)
+ return ret;
+
+ if (insn_complete(insn))
+ return 0;
+
+ return -EINVAL;
}
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -87,13 +87,23 @@ struct insn {
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+ INSN_MODE_32,
+ INSN_MODE_64,
+ /* Mode is determined by the current kernel build. */
+ INSN_MODE_KERN,
+ INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
/* Attribute will be determined after getting ModRM (for opcode groups) */
static inline void insn_get_attribute(struct insn *insn)
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -10,10 +10,13 @@
#else
#include <string.h>
#endif
-#include "../include/asm/inat.h" /* __ignore_sync_check__ */
-#include "../include/asm/insn.h" /* __ignore_sync_check__ */
+#include <asm/inat.h> /* __ignore_sync_check__ */
+#include <asm/insn.h> /* __ignore_sync_check__ */

These includes breaks the build for me with:

CC /local/users/fainelli/buildroot/output/arm64/build/linux-custom/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.o
In file included from util/intel-pt-decoder/intel-pt-insn-decoder.c:15:
util/intel-pt-decoder/../../../arch/x86/lib/insn.c:13:10: fatal error: asm/inat.h: No such file or directory
#include <asm/inat.h> /* __ignore_sync_check__ */
^~~~~~~~~~~~
compilation terminated.
make[7]: *** [util/intel-pt-decoder/Build:14: /local/users/fainelli/buildroot/output/arm64/build/linux-custom/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.o] Error 1
make[6]: *** [/local/users/fainelli/buildroot/output/arm64/build/linux-custom/tools/build/Makefile.build:139: intel-pt-decoder] Error 2
make[5]: *** [/local/users/fainelli/buildroot/output/arm64/build/linux-custom/tools/build/Makefile.build:139: util] Error 2
make[4]: *** [Makefile.perf:643: /local/users/fainelli/buildroot/output/arm64/build/linux-custom/tools/perf/perf-in.o] Error 2
make[3]: *** [Makefile.perf:229: sub-make] Error 2
make[2]: *** [Makefile:70: all] Error 2
make[1]: *** [package/pkg-generic.mk:294: /local/users/fainelli/buildroot/output/arm64/build/linux-tools/.stamp_built] Error 2
make: *** [Makefile:27: _all] Error 2

It looks like you would also need to back port this:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0705ef64d1ff52b817e278ca6e28095585ff31e1
--
Florian