[PATCH v9 13/29] x86/insn-eval: Add utility functions to get segment selector

From: Ricardo Neri
Date: Wed Oct 04 2017 - 00:00:13 EST


When computing a linear address and segmentation is used, we need to know
the base address of the segment involved in the computation. In most of
the cases, the segment base address will be zero as in USER_DS/USER32_DS.
However, it may be possible that a user space program defines its own
segments via a local descriptor table. In such a case, the segment base
address may not be zero. Thus, the segment base address is needed to
calculate correctly the linear address.

If running in protected mode, the segment selector to be used when
computing a linear address is determined by either any of segment override
prefixes in the instruction or inferred from the registers involved in the
computation of the effective address; in that order. Also, there are cases
when the segment override prefixes shall be ignored (i.e., code segments
are always selected by the CS segment register; string instructions always
use the ES segment register when using rDI register as operand). In long
mode, segment registers are ignored, except for FS and GS. In these two
cases, base addresses are obtained from the respective MSRs.

For clarity, this process can be split into four steps (and an equal
number of functions): determine if segment prefixes overrides can be used;
parse the segment override prefixes, and use them if found; if not found
or cannot be used, use the default segment registers associated with the
operand registers. Once the segment register to use has been identified,
read its value to obtain the segment selector.

The method to obtain the segment selector depends on several factors. In
32-bit builds, segment selectors are saved into a pt_regs structure
when switching to kernel mode. The same is also true for virtual-8086
mode. In 64-bit builds, segmentation is mostly ignored, except when
running a program in 32-bit legacy mode. In this case, CS and SS can be
obtained from pt_regs. DS, ES, FS and GS can be read directly from
the respective segment registers.

In order to identify the segment registers, a new set of #defines is
introduced. It also includes two special identifiers. One of them
indicates when the default segment register associated with instruction
operands shall be used. Another one indicates that the contents of the
segment register shall be ignored; this identifier is used when in long
mode.

Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Adam Buchbinder <adam.buchbinder@xxxxxxxxx>
Cc: Colin Ian King <colin.king@xxxxxxxxxxxxx>
Cc: Lorenzo Stoakes <lstoakes@xxxxxxxxx>
Cc: Qiaowei Ren <qiaowei.ren@xxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Thomas Garnier <thgarnie@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
Cc: Ravi V. Shankar <ravi.v.shankar@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/inat.h | 10 ++
arch/x86/lib/insn-eval.c | 321 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 331 insertions(+)

diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 02aff08..1c78580 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -97,6 +97,16 @@
#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)

+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE 0
+#define INAT_SEG_REG_DEFAULT 1
+#define INAT_SEG_REG_CS 2
+#define INAT_SEG_REG_SS 3
+#define INAT_SEG_REG_DS 4
+#define INAT_SEG_REG_ES 5
+#define INAT_SEG_REG_FS 6
+#define INAT_SEG_REG_GS 7
+
/* Attribute search APIs */
extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index ac7b87c..77b48f9 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -9,6 +9,7 @@
#include <asm/inat.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <asm/vm86.h>

#undef pr_fmt
#define pr_fmt(fmt) "insn: " fmt
@@ -47,6 +48,326 @@ static bool is_string_insn(struct insn *insn)
}
}

+/**
+ * get_overridden_seg_reg_idx() - obtain segment register override index
+ * @insn: Instruction with segment override prefixes
+ *
+ * Inspect the instruction prefixes and find segment overrides, if any.
+ *
+ * Returns:
+ *
+ * A constant identifying the segment register to use, among CS, SS, DS,
+ * ES, FS, or GS. INAT_SEG_REG_DEFAULT is returned if no segment override
+ * prefixes were found.
+ *
+ * -EINVAL in case of error.
+ */
+static int get_overridden_seg_reg_idx(struct insn *insn)
+{
+ int idx = INAT_SEG_REG_DEFAULT;
+ int sel_overrides = 0, i;
+
+ if (!insn)
+ return -EINVAL;
+
+ insn_get_prefixes(insn);
+
+ /* Look for any segment override prefixes. */
+ for (i = 0; i < insn->prefixes.nbytes; i++) {
+ insn_attr_t attr;
+
+ attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+ switch (attr) {
+ case INAT_MAKE_PREFIX(INAT_PFX_CS):
+ idx = INAT_SEG_REG_CS;
+ sel_overrides++;
+ break;
+ case INAT_MAKE_PREFIX(INAT_PFX_SS):
+ idx = INAT_SEG_REG_SS;
+ sel_overrides++;
+ break;
+ case INAT_MAKE_PREFIX(INAT_PFX_DS):
+ idx = INAT_SEG_REG_DS;
+ sel_overrides++;
+ break;
+ case INAT_MAKE_PREFIX(INAT_PFX_ES):
+ idx = INAT_SEG_REG_ES;
+ sel_overrides++;
+ break;
+ case INAT_MAKE_PREFIX(INAT_PFX_FS):
+ idx = INAT_SEG_REG_FS;
+ sel_overrides++;
+ break;
+ case INAT_MAKE_PREFIX(INAT_PFX_GS):
+ idx = INAT_SEG_REG_GS;
+ sel_overrides++;
+ break;
+ /* No default action needed. */
+ }
+ }
+
+ /* More than one segment override prefix leads to undefined behavior. */
+ if (sel_overrides > 1)
+ return -EINVAL;
+
+ return idx;
+}
+
+/**
+ * allow_seg_reg_overrides() - check if segment override prefixes are allowed
+ * @insn: Instruction with segment override prefixes
+ * @regoff: Operand offset, in pt_regs, for which the check is performed
+ *
+ * Determine if for a particular register used in register-indirect addressing
+ * segment override prefixes can be used. Specifically, no overrides are allowed
+ * for rIP as well as for rDI if used in a string instruction.
+ *
+ * Returns:
+ *
+ * 1 if segment override prefixes can be used with the register indicated
+ * in regoff. 0 if otherwise.
+ *
+ * -EINVAL in case of error.
+ */
+static int allow_seg_reg_overrides(struct insn *insn, int regoff)
+{
+ /*
+ * Segment override prefixes should not be used for rIP. It is not
+ * necessary to inspect the instruction structure.
+ */
+ if (regoff == offsetof(struct pt_regs, ip))
+ return 0;
+
+ /* Subsequent checks require a valid insn. */
+ if (!insn)
+ return -EINVAL;
+
+ if (regoff == offsetof(struct pt_regs, di) && is_string_insn(insn))
+ return 0;
+
+ return 1;
+}
+
+/**
+ * resolve_seg_reg() - obtain segment register index
+ * @insn: Instruction with operands
+ * @regs: Register values as seen when entering kernel mode
+ * @regoff: Operand offset, in pt_regs, used to deterimine segment register
+ *
+ * Determine the segment register associated with the operands and, if
+ * applicable, prefixes and the instruction pointed by insn.
+ *
+ * The segment register associated to an operand used in register-indirect
+ * addressing depends on:
+ *
+ * a) Whether running in long mode (in such a case segments are ignored, except
+ * if FS or GS are used).
+ *
+ * b) Whether segment override prefixes can be used. Certain instructions and
+ * registers do not allow override prefixes.
+ *
+ * c) If segment overrides prefixes are found in the instruction prefixes.
+ *
+ * d) The default segment register associated with the operand register.
+ *
+ * The function checks first if segment override prefixes can be used with the
+ * register indicated by regoff. If allowed, obtain such overridden segment.
+ * Lastly, if not prefixes were found, resolve the segment register index to use
+ * based on the defaults described in the Intel documentation. All segment
+ * register indexes will be ignored, except if overrides were found for FS or
+ * GS.
+ *
+ * The operand register, regoff, is represented as the offset from the base of
+ * pt_regs.
+ *
+ * Please note that this function does not return the value in the segment
+ * register (i.e., the segment selector) but our defined index. The segment
+ * selector needs to be obtained using get_segment_selector() and passing the
+ * segment register index resolved by this function.
+ *
+ * Returns:
+ *
+ * An index identifying the segment register to use, among CS, SS, DS,
+ * ES, FS, or GS. INAT_SEG_REG_IGNORE is returned if running in long mode.
+ *
+ * -EINVAL in case of error.
+ */
+static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
+{
+ int use_pfx_overrides, idx;
+
+ use_pfx_overrides = allow_seg_reg_overrides(insn, regoff);
+ if (use_pfx_overrides < 0)
+ return use_pfx_overrides;
+
+ if (use_pfx_overrides == 0)
+ goto resolve_default_idx;
+
+ if (!insn)
+ return -EINVAL;
+
+ idx = get_overridden_seg_reg_idx(insn);
+ if (idx < 0)
+ return idx;
+
+ if (idx == INAT_SEG_REG_DEFAULT)
+ goto resolve_default_idx;
+
+ /*
+ * In long mode, segment override prefixes are ignored, except for
+ * overrides for FS and GS.
+ */
+ if (user_64bit_mode(regs)) {
+ if (idx != INAT_SEG_REG_FS &&
+ idx != INAT_SEG_REG_GS)
+ idx = INAT_SEG_REG_IGNORE;
+ }
+
+ return idx;
+
+resolve_default_idx:
+
+ if (user_64bit_mode(regs))
+ return INAT_SEG_REG_IGNORE;
+ /*
+ * If we are here, we use the default segment register as described
+ * in the Intel documentation:
+ *
+ * + DS for all references involving r[ABCD]X, and rSI.
+ * + If used in a string instruction, ES for rDI. Otherwise, DS.
+ * + AX, CX and DX are not valid register operands in 16-bit addresses.
+ * encodings but are valid for 32-bit and 64-bit encodings.
+ * + -EDOM is reserved to identify for cases in which no register
+ * is used (i.e., displacement-only addressing). Use DS.
+ * + SS for (E)SP or (E)BP.
+ * + CS for (E)IP.
+ */
+
+ switch (regoff) {
+ case offsetof(struct pt_regs, ax):
+ case offsetof(struct pt_regs, cx):
+ case offsetof(struct pt_regs, dx):
+ /* Need insn to verify address size. */
+ if (insn->addr_bytes == 2)
+ return -EINVAL;
+
+ case -EDOM:
+ case offsetof(struct pt_regs, bx):
+ case offsetof(struct pt_regs, si):
+ return INAT_SEG_REG_DS;
+
+ case offsetof(struct pt_regs, di):
+ if (is_string_insn(insn))
+ return INAT_SEG_REG_ES;
+ return INAT_SEG_REG_DS;
+
+ case offsetof(struct pt_regs, bp):
+ case offsetof(struct pt_regs, sp):
+ return INAT_SEG_REG_SS;
+
+ case offsetof(struct pt_regs, ip):
+ return INAT_SEG_REG_CS;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * get_segment_selector() - obtain segment selector
+ * @regs: Register values as seen when entering kernel mode
+ * @seg_reg_idx: Segment register index to use
+ *
+ * Obtain the segment selector from any of the CS, SS, DS, ES, FS, GS segment
+ * registers. In CONFIG_X86_32, the segment is obtained from either pt_regs or
+ * kernel_vm86_regs as applicable. In CONFIG_X86_64, CS and SS are obtained
+ * from pt_regs. DS, ES, FS and GS are obtained by reading the actual CPU
+ * registers. This done for only for completeness as in CONFIG_X86_64 segment
+ * registers are ignored.
+ *
+ * Returns:
+ *
+ * Value of the segment selector, including null when running in
+ * long mode.
+ *
+ * -EINVAL on error.
+ */
+static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
+{
+#ifdef CONFIG_X86_64
+ unsigned short sel;
+
+ switch (seg_reg_idx) {
+ case INAT_SEG_REG_IGNORE:
+ return 0;
+ case INAT_SEG_REG_CS:
+ return (unsigned short)(regs->cs & 0xffff);
+ case INAT_SEG_REG_SS:
+ return (unsigned short)(regs->ss & 0xffff);
+ case INAT_SEG_REG_DS:
+ savesegment(ds, sel);
+ return sel;
+ case INAT_SEG_REG_ES:
+ savesegment(es, sel);
+ return sel;
+ case INAT_SEG_REG_FS:
+ savesegment(fs, sel);
+ return sel;
+ case INAT_SEG_REG_GS:
+ savesegment(gs, sel);
+ return sel;
+ default:
+ return -EINVAL;
+ }
+#else /* CONFIG_X86_32 */
+ struct kernel_vm86_regs *vm86regs = (struct kernel_vm86_regs *)regs;
+
+ if (v8086_mode(regs)) {
+ switch (seg_reg_idx) {
+ case INAT_SEG_REG_CS:
+ return (unsigned short)(regs->cs & 0xffff);
+ case INAT_SEG_REG_SS:
+ return (unsigned short)(regs->ss & 0xffff);
+ case INAT_SEG_REG_DS:
+ return vm86regs->ds;
+ case INAT_SEG_REG_ES:
+ return vm86regs->es;
+ case INAT_SEG_REG_FS:
+ return vm86regs->fs;
+ case INAT_SEG_REG_GS:
+ return vm86regs->gs;
+ case INAT_SEG_REG_IGNORE:
+ /* fall through */
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (seg_reg_idx) {
+ case INAT_SEG_REG_CS:
+ return (unsigned short)(regs->cs & 0xffff);
+ case INAT_SEG_REG_SS:
+ return (unsigned short)(regs->ss & 0xffff);
+ case INAT_SEG_REG_DS:
+ return (unsigned short)(regs->ds & 0xffff);
+ case INAT_SEG_REG_ES:
+ return (unsigned short)(regs->es & 0xffff);
+ case INAT_SEG_REG_FS:
+ return (unsigned short)(regs->fs & 0xffff);
+ case INAT_SEG_REG_GS:
+ /*
+ * GS may or may not be in regs as per CONFIG_X86_32_LAZY_GS.
+ * The macro below takes care of both cases.
+ */
+ return get_user_gs(regs);
+ case INAT_SEG_REG_IGNORE:
+ /* fall through */
+ default:
+ return -EINVAL;
+ }
+#endif /* CONFIG_X86_64 */
+}
+
static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
enum reg_type type)
{
--
2.7.4