[PATCH v9 22/29] x86/insn-eval: Add support to resolve 16-bit addressing encodings

From: Ricardo Neri
Date: Tue Oct 03 2017 - 23:59:52 EST


Tasks running in virtual-8086 mode, in protected mode with code segment
descriptors that specify 16-bit default address sizes via the
D bit, or via an address override prefix will use 16-bit addressing form
encodings as described in the Intel 64 and IA-32 Architecture Software
Developer's Manual Volume 2A Section 2.1.5, Table 2-1.

16-bit addressing encodings differ in several ways from the 32-bit/64-bit
addressing form encodings: ModRM.rm points to different registers and, in
some cases, effective addresses are indicated by the addition of the value
of two registers. Also, there is no support for SIB bytes. Thus, a
separate function is needed to parse this form of addressing.

A couple of functions are introduced. get_reg_offset_16() obtains the
offset from the base of pt_regs of the registers indicated by the ModRM
byte of the address encoding. get_addr_ref_16() computes the linear
address indicated by the instructions using the value of the registers
given by ModRM and the base address of the applicable segment.

Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Adam Buchbinder <adam.buchbinder@xxxxxxxxx>
Cc: Colin Ian King <colin.king@xxxxxxxxxxxxx>
Cc: Lorenzo Stoakes <lstoakes@xxxxxxxxx>
Cc: Qiaowei Ren <qiaowei.ren@xxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Thomas Garnier <thgarnie@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
Cc: Ravi V. Shankar <ravi.v.shankar@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
---
arch/x86/lib/insn-eval.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 182 insertions(+)

diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index d43808c..2f859a1 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -462,6 +462,80 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
}

/**
+ * get_reg_offset_16() - Obtain offset of register indicated by instruction
+ * @insn: Instruction containing ModRM byte
+ * @regs: Register values as seen when entering kernel mode
+ * @offs1: Offset of the first operand register
+ * @offs2: Offset of the second opeand register, if applicable
+ *
+ * Obtain the offset, in pt_regs, of the registers indicated by the ModRM byte
+ * within insn. This function is to be used with 16-bit address encodings. The
+ * offs1 and offs2 will be written with the offset of the two registers
+ * indicated by the instruction. In cases where any of the registers is not
+ * referenced by the instruction, the value will be set to -EDOM.
+ *
+ * Returns:
+ *
+ * 0 on success, -EINVAL on error.
+ */
+static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs,
+ int *offs1, int *offs2)
+{
+ /*
+ * 16-bit addressing can use one or two registers. Specifics of
+ * encodings are given in Table 2-1. "16-Bit Addressing Forms with the
+ * ModR/M Byte" of the Intel Software Development Manual.
+ */
+ static const int regoff1[] = {
+ offsetof(struct pt_regs, bx),
+ offsetof(struct pt_regs, bx),
+ offsetof(struct pt_regs, bp),
+ offsetof(struct pt_regs, bp),
+ offsetof(struct pt_regs, si),
+ offsetof(struct pt_regs, di),
+ offsetof(struct pt_regs, bp),
+ offsetof(struct pt_regs, bx),
+ };
+
+ static const int regoff2[] = {
+ offsetof(struct pt_regs, si),
+ offsetof(struct pt_regs, di),
+ offsetof(struct pt_regs, si),
+ offsetof(struct pt_regs, di),
+ -EDOM,
+ -EDOM,
+ -EDOM,
+ -EDOM,
+ };
+
+ if (!offs1 || !offs2)
+ return -EINVAL;
+
+ /* Operand is a register, use the generic function. */
+ if (X86_MODRM_MOD(insn->modrm.value) == 3) {
+ *offs1 = insn_get_modrm_rm_off(insn, regs);
+ *offs2 = -EDOM;
+ return 0;
+ }
+
+ *offs1 = regoff1[X86_MODRM_RM(insn->modrm.value)];
+ *offs2 = regoff2[X86_MODRM_RM(insn->modrm.value)];
+
+ /*
+ * If ModRM.mod is 0 and ModRM.rm is 110b, then we use displacement-
+ * only addressing. This means that no registers are involved in
+ * computing the effective address. Thus, ensure that the first
+ * register offset is invalild. The second register offset is already
+ * invalid under the aforementioned conditions.
+ */
+ if ((X86_MODRM_MOD(insn->modrm.value) == 0) &&
+ (X86_MODRM_RM(insn->modrm.value) == 6))
+ *offs1 = -EDOM;
+
+ return 0;
+}
+
+/**
* get_desc() - Obtain address of segment descriptor
* @sel: Segment selector
*
@@ -713,6 +787,112 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs)
}

/**
+ * get_addr_ref_16() - Obtain the 16-bit address referred by instruction
+ * @insn: Instruction containing ModRM byte and displacement
+ * @regs: Register values as seen when entering kernel mode
+ *
+ * This function is to be used with 16-bit address encodings. Obtain the memory
+ * address referred by the instruction's ModRM and displacement bytes. Also, the
+ * segment used as base is determined by either any segment override prefixes in
+ * insn or the default segment of the registers involved in the address
+ * computation. In protected mode, segment limits are enforced.
+ *
+ * Returns:
+ *
+ * Linear address referenced by the instruction operands on success.
+ *
+ * -1L on error.
+ */
+static void __user *get_addr_ref_16(struct insn *insn, struct pt_regs *regs)
+{
+ unsigned long linear_addr = -1L, seg_base_addr, seg_limit;
+ int addr_offset1, addr_offset2, seg_reg_indx, ret;
+ short eff_addr, addr1 = 0, addr2 = 0;
+
+ insn_get_modrm(insn);
+ insn_get_displacement(insn);
+
+ if (insn->addr_bytes != 2)
+ goto out;
+
+ /*
+ * If operand is a register, the layout is the same as in
+ * 32-bit and 64-bit addressing.
+ */
+ if (X86_MODRM_MOD(insn->modrm.value) == 3) {
+ addr_offset1 = get_reg_offset(insn, regs, REG_TYPE_RM);
+ if (addr_offset1 < 0)
+ goto out;
+
+ eff_addr = regs_get_register(regs, addr_offset1);
+
+ seg_reg_indx = resolve_seg_reg(insn, regs, addr_offset1);
+ if (seg_reg_indx < 0)
+ goto out;
+
+ seg_base_addr = insn_get_seg_base(regs, seg_reg_indx);
+ if (seg_base_addr == -1L)
+ goto out;
+
+ seg_limit = get_seg_limit(regs, seg_reg_indx);
+ } else {
+ ret = get_reg_offset_16(insn, regs, &addr_offset1,
+ &addr_offset2);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Don't fail on invalid offset values. They might be invalid
+ * because they cannot be used for this particular value of
+ * the ModRM. Instead, use them in the computation only if
+ * they contain a valid value.
+ */
+ if (addr_offset1 != -EDOM)
+ addr1 = 0xffff & regs_get_register(regs, addr_offset1);
+ if (addr_offset2 != -EDOM)
+ addr2 = 0xffff & regs_get_register(regs, addr_offset2);
+
+ eff_addr = addr1 + addr2;
+
+ /*
+ * The first operand register could indicate to use of either SS
+ * or DS registers to obtain the segment selector. The second
+ * operand register can only indicate the use of DS. Thus, use
+ * the first operand to obtain the segment selector.
+ */
+ seg_reg_indx = resolve_seg_reg(insn, regs, addr_offset1);
+ if (seg_reg_indx < 0)
+ goto out;
+
+ seg_base_addr = insn_get_seg_base(regs, seg_reg_indx);
+ if (seg_base_addr == -1L)
+ goto out;
+
+ seg_limit = get_seg_limit(regs, seg_reg_indx);
+
+ eff_addr += (insn->displacement.value & 0xffff);
+ }
+
+ /*
+ * Before computing the linear address, make sure the effective address
+ * is within the limits of the segment. In virtual-8086 mode, segment
+ * limits are not enforced. In such a case, the segment limit is -1L to
+ * reflect this fact.
+ */
+ if ((unsigned long)(eff_addr & 0xffff) > seg_limit)
+ goto out;
+
+ linear_addr = (unsigned long)(eff_addr & 0xffff) + seg_base_addr;
+
+ /* Limit linear address to 20 bits */
+ if (v8086_mode(regs))
+ linear_addr &= 0xfffff;
+
+out:
+ return (void __user *)linear_addr;
+}
+
+/**
* get_addr_ref_32() - Obtain a 32-bit linear address
* @insn: Instruction with ModRM, SIB bytes and displacement
* @regs: Register values as seen when entering kernel mode
@@ -1021,6 +1201,8 @@ static void __user *get_addr_ref_64(struct insn *insn, struct pt_regs *regs)
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
{
switch (insn->addr_bytes) {
+ case 2:
+ return get_addr_ref_16(insn, regs);
case 4:
return get_addr_ref_32(insn, regs);
case 8:
--
2.7.4