[v6 PATCH 17/21] x86: Add emulation code for UMIP instructions
From: Ricardo Neri
Date: Tue Mar 07 2017 - 19:40:07 EST
The feature User-Mode Instruction Prevention present in recent Intel
processor prevents a group of instructions from being executed with
CPL > 0. Otherwise, a general protection fault is issued.
Rather than relaying this fault to the user space (in the form of a SIGSEGV
signal), the instructions protected by UMIP can be emulated to provide
dummy results. This allows to conserve the current kernel behavior and not
reveal the system resources that UMIP intends to protect (the global
descriptor and interrupt descriptor tables, the segment selectors of the
local descriptor table and the task state and the machine status word).
This emulation is needed because certain applications (e.g., WineHQ) rely
on this subset of instructions to function.
The instructions protected by UMIP can be split in two groups. Those who
return a kernel memory address (sgdt and sidt) and those who return a
value (sldt, str and smsw).
For the instructions that return a kernel memory address, applications
such as WineHQ rely on the result being located in the kernel memory space.
The result is emulated as a hard-coded value that, lies close to the top
of the kernel memory. The limit for the GDT and the IDT are set to zero.
The instructions sldt and str return a segment selector relative to the
base address of the global descriptor table. Since the actual address of
such table is not revealed, it makes sense to emulate the result as zero.
The instruction smsw is emulated to return the value that the register CR0
has at boot time as set in the head_32.
Care is taken to appropriately emulate the results when segmentation is
used. This is, rather than relying on USER_DS and USER_CS, the function
insn_get_addr_ref inspects the segment descriptor pointed by the registers
in pt_regs. This ensures that we correctly obtain the segment base address
and the address and operand sizes even if the user space application uses
local descriptor table.
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Chen Yucong <slaoub@xxxxxxxxx>
Cc: Chris Metcalf <cmetcalf@xxxxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Cc: Huang Rui <ray.huang@xxxxxxx>
Cc: Jiri Slaby <jslaby@xxxxxxx>
Cc: Jonathan Corbet <corbet@xxxxxxx>
Cc: Michael S. Tsirkin <mst@xxxxxxxxxx>
Cc: Paul Gortmaker <paul.gortmaker@xxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ravi V. Shankar <ravi.v.shankar@xxxxxxxxx>
Cc: Shuah Khan <shuah@xxxxxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Liang Z. Li <liang.z.li@xxxxxxxxx>
Cc: Alexandre Julliard <julliard@xxxxxxxxxx>
Cc: Stas Sergeev <stsp@xxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-msdos@xxxxxxxxxxxxxxx
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/umip.h | 15 +++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/umip.c | 257 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 273 insertions(+)
create mode 100644 arch/x86/include/asm/umip.h
create mode 100644 arch/x86/kernel/umip.c
diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h
new file mode 100644
index 0000000..077b236
--- /dev/null
+++ b/arch/x86/include/asm/umip.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_X86_UMIP_H
+#define _ASM_X86_UMIP_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_X86_INTEL_UMIP
+bool fixup_umip_exception(struct pt_regs *regs);
+#else
+static inline bool fixup_umip_exception(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_X86_INTEL_UMIP */
+#endif /* _ASM_X86_UMIP_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 84c0059..0ded7b1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -122,6 +122,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
+obj-$(CONFIG_X86_INTEL_UMIP) += umip.o
ifdef CONFIG_FRAME_POINTER
obj-y += unwind_frame.o
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
new file mode 100644
index 0000000..e64d8e5
--- /dev/null
+++ b/arch/x86/kernel/umip.c
@@ -0,0 +1,257 @@
+/*
+ * umip.c Emulation for instruction protected by the Intel User-Mode
+ * Instruction Prevention. The instructions are:
+ * sgdt
+ * sldt
+ * sidt
+ * str
+ * smsw
+ *
+ * Copyright (c) 2017, Intel Corporation.
+ * Ricardo Neri <ricardo.neri@xxxxxxxxxxxxxxx>
+ */
+
+#include <linux/uaccess.h>
+#include <asm/umip.h>
+#include <asm/traps.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
+#include <linux/ratelimit.h>
+
+/*
+ * == Base addresses of GDT and IDT
+ * Some applications to function rely finding the global descriptor table (GDT)
+ * and the interrupt descriptor table (IDT) in kernel memory.
+ * For x86_32, the selected values do not match any particular hole, but it
+ * suffices to provide a memory location within kernel memory.
+ *
+ * == CRO flags for SMSW
+ * Use the flags given when booting, as found in head_32.S
+ */
+
+#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | X86_CR0_NE | \
+ X86_CR0_WP | X86_CR0_AM)
+#define UMIP_DUMMY_GDT_BASE 0xfffe0000
+#define UMIP_DUMMY_IDT_BASE 0xffff0000
+
+enum umip_insn {
+ UMIP_SGDT = 0, /* opcode 0f 01 ModR/M reg 0 */
+ UMIP_SIDT, /* opcode 0f 01 ModR/M reg 1 */
+ UMIP_SLDT, /* opcode 0f 00 ModR/M reg 0 */
+ UMIP_SMSW, /* opcode 0f 01 ModR/M reg 4 */
+ UMIP_STR, /* opcode 0f 00 ModR/M reg 1 */
+};
+
+/**
+ * __identify_insn - Identify a UMIP-protected instruction
+ * @insn: Instruction structure with opcode and ModRM byte.
+ *
+ * From the instruction opcode and the reg part of the ModRM byte, identify,
+ * if any, a UMIP-protected instruction.
+ *
+ * Return: an enumeration of a UMIP-protected instruction; -EINVAL on failure.
+ */
+static int __identify_insn(struct insn *insn)
+{
+ /* By getting modrm we also get the opcode. */
+ insn_get_modrm(insn);
+
+ /* All the instructions of interest start with 0x0f. */
+ if (insn->opcode.bytes[0] != 0xf)
+ return -EINVAL;
+
+ if (insn->opcode.bytes[1] == 0x1) {
+ switch (X86_MODRM_REG(insn->modrm.value)) {
+ case 0:
+ return UMIP_SGDT;
+ case 1:
+ return UMIP_SIDT;
+ case 4:
+ return UMIP_SMSW;
+ default:
+ return -EINVAL;
+ }
+ } else if (insn->opcode.bytes[1] == 0x0) {
+ if (X86_MODRM_REG(insn->modrm.value) == 0)
+ return UMIP_SLDT;
+ else if (X86_MODRM_REG(insn->modrm.value) == 1)
+ return UMIP_STR;
+ else
+ return -EINVAL;
+ } else {
+ return -EINVAL;
+ }
+}
+
+/**
+ * __emulate_umip_insn - Emulate UMIP instructions with dummy values
+ * @insn: Instruction structure with ModRM byte
+ * @umip_inst: Instruction to emulate
+ * @data: Buffer onto which the dummy values will be copied
+ * @data_size: Size of the emulated result
+ *
+ * Emulate an instruction protected by UMIP. The result of the emulation
+ * is saved in the provided buffer. The size of the results depends on both
+ * the instruction and type of operand (register vs memory address). Thus,
+ * the size of the result needs to be updated.
+ *
+ * Result: 0 if success, -EINVAL on failure to emulate
+ */
+static int __emulate_umip_insn(struct insn *insn, enum umip_insn umip_inst,
+ unsigned char *data, int *data_size)
+{
+ unsigned long dummy_base_addr;
+ unsigned short dummy_limit = 0;
+ unsigned int dummy_value = 0;
+
+ switch (umip_inst) {
+ /*
+ * These two instructions return the base address and limit of the
+ * global and interrupt descriptor table. The base address can be
+ * 24-bit, 32-bit or 64-bit. Limit is always 16-bit. If the operand
+ * size is 16-bit the returned value of the base address is supposed
+ * to be a zero-extended 24-byte number. However, it seems that a
+ * 32-byte number is always returned in legacy protected mode
+ * irrespective of the operand size.
+ */
+ case UMIP_SGDT:
+ /* fall through */
+ case UMIP_SIDT:
+ if (umip_inst == UMIP_SGDT)
+ dummy_base_addr = UMIP_DUMMY_GDT_BASE;
+ else
+ dummy_base_addr = UMIP_DUMMY_IDT_BASE;
+ if (X86_MODRM_MOD(insn->modrm.value) == 3) {
+ /* SGDT and SIDT do not take register as argument. */
+ return -EINVAL;
+ }
+
+ memcpy(data + 2, &dummy_base_addr, sizeof(dummy_base_addr));
+ memcpy(data, &dummy_limit, sizeof(dummy_limit));
+ *data_size = sizeof(dummy_base_addr) + sizeof(dummy_limit);
+ break;
+ case UMIP_SMSW:
+ /*
+ * Even though CR0_STATE contain 4 bytes, the number
+ * of bytes to be copied in the result buffer is determined
+ * by whether the operand is a register or a memory location.
+ */
+ dummy_value = CR0_STATE;
+ /*
+ * These two instructions return a 16-bit value. We return
+ * all zeros. This is equivalent to a null descriptor for
+ * str and sldt.
+ */
+ /* fall through */
+ case UMIP_SLDT:
+ /* fall through */
+ case UMIP_STR:
+ /* if operand is a register, it is zero-extended */
+ if (X86_MODRM_MOD(insn->modrm.value) == 3) {
+ memset(data, 0, insn->opnd_bytes);
+ *data_size = insn->opnd_bytes;
+ /* if not, only the two least significant bytes are copied */
+ } else {
+ *data_size = 2;
+ }
+ memcpy(data, &dummy_value, sizeof(dummy_value));
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * fixup_umip_exception - Fixup #GP faults caused by UMIP
+ * @regs: Registers as saved when entering the #GP trap
+ *
+ * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
+ * fault if with CPL > 0 (i.e., from user space). This function can be
+ * used to emulate the results of the aforementioned instructions with
+ * dummy values. Results are copied to user-space memory as indicated by
+ * the instruction pointed by EIP using the registers indicated in the
+ * instruction operands. This function also takes care of determining
+ * the address to which the results must be copied.
+ */
+bool fixup_umip_exception(struct pt_regs *regs)
+{
+ struct insn insn;
+ unsigned char buf[MAX_INSN_SIZE];
+ /* 10 bytes is the maximum size of the result of UMIP instructions */
+ unsigned char dummy_data[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ unsigned long seg_base;
+ int not_copied, nr_copied, reg_offset, dummy_data_size;
+ void __user *uaddr;
+ unsigned long *reg_addr;
+ enum umip_insn umip_inst;
+
+ /*
+ * Use the segment base in case user space used a different code
+ * segment, either in protected (e.g., from an LDT) or virtual-8086
+ * modes. In most of the cases seg_base will be zero as in USER_CS.
+ */
+ seg_base = insn_get_seg_base(regs, &insn, offsetof(struct pt_regs, ip),
+ true);
+ not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
+ sizeof(buf));
+ nr_copied = sizeof(buf) - not_copied;
+ /*
+ * The copy_from_user above could have failed if user code is protected
+ * by a memory protection key. Give up on emulation in such a case.
+ * Should we issue a page fault?
+ */
+ if (!nr_copied)
+ return false;
+
+ insn_init(&insn, buf, nr_copied, 0);
+
+ /*
+ * Override the default operand and address sizes to what is specified
+ * in the code segment descriptor. The instruction decoder only sets
+ * the address size it to either 4 or 8 address bytes and does nothing
+ * for the operand bytes. This OK for most of the cases, but we could
+ * have special cases where, for instance, a 16-bit code segment
+ * descriptor is used.
+ * If there are overrides, the instruction decoder correctly updates
+ * these values, even for 16-bit defaults.
+ */
+ insn.addr_bytes = insn_get_seg_default_address_bytes(regs);
+ insn.opnd_bytes = insn_get_seg_default_operand_bytes(regs);
+
+ if (!insn.addr_bytes || !insn.opnd_bytes)
+ return false;
+
+#ifdef CONFIG_X86_64
+ if (user_64bit_mode(regs))
+ return false;
+#endif
+
+ insn_get_length(&insn);
+ if (nr_copied < insn.length)
+ return false;
+
+ umip_inst = __identify_insn(&insn);
+ /* Check if we found an instruction protected by UMIP */
+ if (umip_inst < 0)
+ return false;
+
+ if (__emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
+ return false;
+
+ /* If operand is a register, write directly to it */
+ if (X86_MODRM_MOD(insn.modrm.value) == 3) {
+ reg_offset = insn_get_reg_offset_modrm_rm(&insn, regs);
+ reg_addr = (unsigned long *)((unsigned long)regs + reg_offset);
+ memcpy(reg_addr, dummy_data, dummy_data_size);
+ } else {
+ uaddr = insn_get_addr_ref(&insn, regs);
+ nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
+ if (nr_copied > 0)
+ return false;
+ }
+
+ /* increase IP to let the program keep going */
+ regs->ip += insn.length;
+ return true;
+}
--
2.9.3