[RFC patch 26/27] Immediate Values - Jump
From: Mathieu Desnoyers
Date: Wed Apr 16 2008 - 17:37:48 EST
Adds a new imv_cond() macro to declare a byte read that is meant to be embedded
in unlikely(imv_cond(var)), so the kernel can dynamically detect patterns such
as mov, test, jne or mov, test, je and patch it with nops and a jump.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxx>
---
arch/x86/kernel/immediate.c | 381 ++++++++++++++++++++++++++++++++--------
include/asm-powerpc/immediate.h | 2
include/asm-x86/immediate.h | 34 +++
include/linux/immediate.h | 11 -
kernel/immediate.c | 6
5 files changed, 359 insertions(+), 75 deletions(-)
Index: linux-2.6-lttng/include/asm-x86/immediate.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-x86/immediate.h 2008-04-16 14:04:47.000000000 -0400
+++ linux-2.6-lttng/include/asm-x86/immediate.h 2008-04-16 14:19:13.000000000 -0400
@@ -20,6 +20,7 @@ struct __imv {
* Pointer to the memory location of the
* immediate value within the instruction.
*/
+ int jmp_off; /* offset for jump target */
unsigned char size; /* Type size. */
unsigned char insn_size;/* Instruction size. */
} __attribute__ ((packed));
@@ -57,6 +58,7 @@ struct __imv {
".previous\n\t" \
".section __imv,\"aw\",@progbits\n\t" \
_ASM_PTR "%c1, (3f)-%c2\n\t" \
+ ".int 0\n\t" \
".byte %c2, (2b-1b)\n\t" \
".previous\n\t" \
"mov $0,%0\n\t" \
@@ -74,6 +76,7 @@ struct __imv {
".previous\n\t" \
".section __imv,\"aw\",@progbits\n\t" \
_ASM_PTR "%c1, (3f)-%c2\n\t" \
+ ".int 0\n\t" \
".byte %c2, (2b-1b)\n\t" \
".previous\n\t" \
".org . + ((-.-(2b-1b)) & (%c2-1)), 0x90\n\t" \
@@ -95,6 +98,7 @@ struct __imv {
".previous\n\t" \
".section __imv,\"aw\",@progbits\n\t" \
_ASM_PTR "%c1, (3f)-%c2\n\t" \
+ ".int 0\n\t" \
".byte %c2, (2b-1b)\n\t" \
".previous\n\t" \
".org . + ((-.-(2b-1b)) & (%c2-1)), 0x90\n\t" \
@@ -108,6 +112,34 @@ struct __imv {
value; \
})
-extern int arch_imv_update(const struct __imv *imv, int early);
+/*
+ * Uses %al.
+ * size is 0.
+ * Use in if (unlikely(imv_cond(var)))
+ * Given a char as argument.
+ */
+#define imv_cond(name) \
+ ({ \
+ __typeof__(name##__imv) value; \
+ BUILD_BUG_ON(sizeof(value) > 1); \
+ asm (".section __discard,\"\",@progbits\n\t" \
+ "1:\n\t" \
+ "mov $0,%0\n\t" \
+ "2:\n\t" \
+ ".previous\n\t" \
+ ".section __imv,\"aw\",@progbits\n\t" \
+ _ASM_PTR "%c1, (3f)-1\n\t" \
+ ".int 0\n\t" \
+ ".byte %c2, (2b-1b)\n\t" \
+ ".previous\n\t" \
+ "mov $0,%0\n\t" \
+ "3:\n\t" \
+ : "=a" (value) \
+ : "i" (&name##__imv), \
+ "i" (0)); \
+ value; \
+ })
+
+extern int arch_imv_update(struct __imv *imv, int early);
#endif /* _ASM_X86_IMMEDIATE_H */
Index: linux-2.6-lttng/arch/x86/kernel/immediate.c
===================================================================
--- linux-2.6-lttng.orig/arch/x86/kernel/immediate.c 2008-04-16 14:04:47.000000000 -0400
+++ linux-2.6-lttng/arch/x86/kernel/immediate.c 2008-04-16 14:06:17.000000000 -0400
@@ -80,13 +80,19 @@
#include <asm/cacheflush.h>
#define BREAKPOINT_INSTRUCTION 0xcc
+#define JMP_REL8 0xeb
+#define JMP_REL32 0xe9
+#define INSN_NOP1 0x90
+#define INSN_NOP2 0x89, 0xf6
#define BREAKPOINT_INS_LEN 1
#define NR_NOPS 10
+/*#define DEBUG_IMMEDIATE 1*/
+
static unsigned long target_after_int3; /* EIP of the target after the int3 */
static unsigned long bypass_eip; /* EIP of the bypass. */
static unsigned long bypass_after_int3; /* EIP after the end-of-bypass int3 */
-static unsigned long after_imv; /*
+static unsigned long after_imv; /*
* EIP where to resume after the
* single-stepping.
*/
@@ -142,6 +148,25 @@ static int imv_notifier(struct notifier_
if (die_val == DIE_INT3) {
if (args->regs->ip == target_after_int3) {
+ /* deal with non-relocatable jmp instructions */
+ switch (*(uint8_t *)bypass_eip) {
+ case JMP_REL8: /* eb cb jmp rel8 */
+ args->regs->ip +=
+ *(signed char *)(bypass_eip + 1) + 1;
+ return NOTIFY_STOP;
+ case JMP_REL32: /* e9 cw jmp rel16 (valid on ia32) */
+ /* e9 cd jmp rel32 */
+ args->regs->ip +=
+ *(int *)(bypass_eip + 1) + 4;
+ return NOTIFY_STOP;
+ case INSN_NOP1:
+ /* deal with insertion of nop + jmp_rel32 */
+ if (*((uint8_t *)bypass_eip + 1) == JMP_REL32) {
+ args->regs->ip +=
+ *(int *)(bypass_eip + 2) + 5;
+ return NOTIFY_STOP;
+ }
+ }
preempt_disable();
args->regs->ip = bypass_eip;
return NOTIFY_STOP;
@@ -159,71 +184,107 @@ static struct notifier_block imv_notify
.priority = 0x7fffffff, /* we need to be notified first */
};
-/**
- * arch_imv_update - update one immediate value
- * @imv: pointer of type const struct __imv to update
- * @early: early boot (1) or normal (0)
- *
- * Update one immediate value. Must be called with imv_mutex held.
+/*
+ * returns -1 if not found
+ * return 0 if found.
*/
-__kprobes int arch_imv_update(const struct __imv *imv, int early)
+static inline int detect_mov_test_jne(uint8_t *addr, uint8_t **opcode,
+ uint8_t **jmp_offset, int *offset_len)
{
- int ret;
- unsigned char opcode_size = imv->insn_size - imv->size;
- unsigned long insn = imv->imv - opcode_size;
- unsigned long len;
- char *vaddr;
- struct page *pages[1];
-
-#ifdef CONFIG_KPROBES
- /*
- * Fail if a kprobe has been set on this instruction.
- * (TODO: we could eventually do better and modify all the (possibly
- * nested) kprobes for this site if kprobes had an API for this.
- */
- if (unlikely(!early
- && *(unsigned char *)insn == BREAKPOINT_INSTRUCTION)) {
- printk(KERN_WARNING "Immediate value in conflict with kprobe. "
- "Variable at %p, "
- "instruction at %p, size %hu\n",
- (void *)imv->imv,
- (void *)imv->var, imv->size);
- return -EBUSY;
- }
-#endif
-
- /*
- * If the variable and the instruction have the same value, there is
- * nothing to do.
- */
- switch (imv->size) {
- case 1: if (*(uint8_t *)imv->imv
- == *(uint8_t *)imv->var)
- return 0;
- break;
- case 2: if (*(uint16_t *)imv->imv
- == *(uint16_t *)imv->var)
- return 0;
- break;
- case 4: if (*(uint32_t *)imv->imv
- == *(uint32_t *)imv->var)
+ printk(KERN_DEBUG "Trying at %p %hx %hx %hx %hx %hx %hx\n",
+ addr, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+ /* b0 cb movb cb,%al */
+ if (addr[0] != 0xb0)
+ return -1;
+ /* 84 c0 test %al,%al */
+ if (addr[2] != 0x84 || addr[3] != 0xc0)
+ return -1;
+ printk(KERN_DEBUG "Found test %%al,%%al at %p\n", addr + 2);
+ switch (addr[4]) {
+ case 0x75: /* 75 cb jne rel8 */
+ printk(KERN_DEBUG "Found jne rel8 at %p\n", addr + 4);
+ *opcode = addr + 4;
+ *jmp_offset = addr + 5;
+ *offset_len = 1;
+ return 0;
+ case 0x0f:
+ switch (addr[5]) {
+ case 0x85: /* 0F 85 cw jne rel16 (valid on ia32) */
+ /* 0F 85 cd jne rel32 */
+ printk(KERN_DEBUG "Found jne rel16/32 at %p\n",
+ addr + 5);
+ *opcode = addr + 4;
+ *jmp_offset = addr + 6;
+ *offset_len = 4;
return 0;
+ default:
+ return -1;
+ }
break;
-#ifdef CONFIG_X86_64
- case 8: if (*(uint64_t *)imv->imv
- == *(uint64_t *)imv->var)
+ default: return -1;
+ }
+}
+
+/*
+ * returns -1 if not found
+ * return 0 if found.
+ */
+static inline int detect_mov_test_je(uint8_t *addr, uint8_t **opcode,
+ uint8_t **jmp_offset, int *offset_len)
+{
+ /* b0 cb movb cb,%al */
+ if (addr[0] != 0xb0)
+ return -1;
+ /* 84 c0 test %al,%al */
+ if (addr[2] != 0x84 || addr[3] != 0xc0)
+ return -1;
+ printk(KERN_DEBUG "Found test %%al,%%al at %p\n", addr + 2);
+ switch (addr[4]) {
+ case 0x74: /* 74 cb je rel8 */
+ printk(KERN_DEBUG "Found je rel8 at %p\n", addr + 4);
+ *opcode = addr + 4;
+ *jmp_offset = addr + 5;
+ *offset_len = 1;
+ return 0;
+ case 0x0f:
+ switch (addr[5]) {
+ case 0x84: /* 0F 84 cw je rel16 (valid on ia32) */
+ /* 0F 84 cd je rel32 */
+ printk(KERN_DEBUG "Found je rel16/32 at %p\n",
+ addr + 5);
+ *opcode = addr + 4;
+ *jmp_offset = addr + 6;
+ *offset_len = 4;
return 0;
+ default:
+ return -1;
+ }
break;
-#endif
- default:return -EINVAL;
+ default: return -1;
}
+}
+
+static int static_early;
- if (!early) {
- /* bypass is 10 bytes long for x86_64 long */
- WARN_ON(imv->insn_size > 10);
- _imv_bypass(&bypass_eip, &bypass_after_int3);
+/*
+ * Marked noinline because we prefer to have only one _imv_bypass. Not that it
+ * is required, but there is no need to edit two bypasses.
+ */
+static noinline int replace_instruction_safe(uint8_t *addr, uint8_t *newcode,
+ int size)
+{
+ char *vaddr;
+ struct page *pages[1];
+ int len;
+ int ret;
+
+ /* bypass is 10 bytes long for x86_64 long */
+ WARN_ON(size > 10);
+
+ _imv_bypass(&bypass_eip, &bypass_after_int3);
- after_imv = imv->imv + imv->size;
+ if (!static_early) {
+ after_imv = (unsigned long)addr + size;
/*
* Using the _early variants because nobody is executing the
@@ -238,22 +299,23 @@ __kprobes int arch_imv_update(const stru
vaddr = vmap(pages, 1, VM_MAP, PAGE_KERNEL);
BUG_ON(!vaddr);
text_poke_early(&vaddr[bypass_eip & ~PAGE_MASK],
- (void *)insn, imv->insn_size);
+ (void *)addr, size);
/*
* Fill the rest with nops.
*/
- len = NR_NOPS - imv->insn_size;
+ len = NR_NOPS - size;
add_nops((void *)
- &vaddr[(bypass_eip & ~PAGE_MASK) + imv->insn_size],
+ &vaddr[(bypass_eip & ~PAGE_MASK) + size],
len);
vunmap(vaddr);
- target_after_int3 = insn + BREAKPOINT_INS_LEN;
+ target_after_int3 = (unsigned long)addr + BREAKPOINT_INS_LEN;
/* register_die_notifier has memory barriers */
register_die_notifier(&imv_notify);
- /* The breakpoint will single-step the bypass */
- text_poke((void *)insn,
- ((unsigned char[]){BREAKPOINT_INSTRUCTION}), 1);
+ /* The breakpoint will execute the bypass */
+ text_poke((void *)addr,
+ ((unsigned char[]){BREAKPOINT_INSTRUCTION}),
+ BREAKPOINT_INS_LEN);
/*
* Make sure the breakpoint is set before we continue (visible
* to other CPUs and interrupts).
@@ -265,14 +327,18 @@ __kprobes int arch_imv_update(const stru
ret = on_each_cpu(imv_synchronize_core, NULL, 1, 1);
BUG_ON(ret != 0);
- text_poke((void *)(insn + opcode_size), (void *)imv->var,
- imv->size);
+ text_poke((void *)(addr + BREAKPOINT_INS_LEN),
+ &newcode[BREAKPOINT_INS_LEN],
+ size - BREAKPOINT_INS_LEN);
/*
* Make sure the value can be seen from other CPUs and
* interrupts.
*/
wmb();
- text_poke((void *)insn, (unsigned char *)bypass_eip, 1);
+#ifdef DEBUG_IMMEDIATE
+ mdelay(10); /* lets the breakpoint for a while */
+#endif
+ text_poke(addr, newcode, BREAKPOINT_INS_LEN);
/*
* Wait for all int3 handlers to end (interrupts are disabled in
* int3). This CPU is clearly not in a int3 handler, because
@@ -285,7 +351,184 @@ __kprobes int arch_imv_update(const stru
unregister_die_notifier(&imv_notify);
/* unregister_die_notifier has memory barriers */
} else
- text_poke_early((void *)imv->imv, (void *)imv->var,
- imv->size);
+ text_poke_early(addr, newcode, size);
+ return 0;
+}
+
+static int patch_jump_target(struct __imv *imv)
+{
+ uint8_t *opcode, *jmp_offset;
+ int offset_len;
+ int mov_test_j_found = 0;
+
+ if(!detect_mov_test_jne((uint8_t *)imv->imv - 1,
+ &opcode, &jmp_offset, &offset_len)) {
+ imv->insn_size = 1; /* positive logic */
+ mov_test_j_found = 1;
+ } else if(!detect_mov_test_je((uint8_t *)imv->imv - 1,
+ &opcode, &jmp_offset, &offset_len)) {
+ imv->insn_size = 0; /* negative logic */
+ mov_test_j_found = 1;
+ }
+
+ if (mov_test_j_found) {
+ int logicvar = imv->insn_size ? imv->var : !imv->var;
+ int newoff;
+
+ if (offset_len == 1) {
+ imv->jmp_off = *(signed char *)jmp_offset;
+ /* replace with JMP_REL8 opcode. */
+ replace_instruction_safe(opcode,
+ ((unsigned char[]){ JMP_REL8,
+ (logicvar ? (signed char)imv->jmp_off : 0) }),
+ 2);
+ } else {
+ /* replace with nop and JMP_REL16/32 opcode.
+ * It's ok to shrink an instruction, never ok to
+ * grow it afterward. */
+ imv->jmp_off = *(int *)jmp_offset;
+ newoff = logicvar ? (int)imv->jmp_off : 0;
+ replace_instruction_safe(opcode,
+ ((unsigned char[]){ INSN_NOP1, JMP_REL32,
+ ((unsigned char *)&newoff)[0],
+ ((unsigned char *)&newoff)[1],
+ ((unsigned char *)&newoff)[2],
+ ((unsigned char *)&newoff)[3] }),
+ 6);
+ }
+ /* now we can get rid of the movb */
+ replace_instruction_safe((uint8_t *)imv->imv - 1,
+ ((unsigned char[]){ INSN_NOP2 }),
+ 2);
+ /* now we can get rid of the testb */
+ replace_instruction_safe((uint8_t *)imv->imv + 1,
+ ((unsigned char[]){ INSN_NOP2 }),
+ 2);
+ /* remember opcode + 1 to enable the JMP_REL patching */
+ if (offset_len == 1)
+ imv->imv = (unsigned long)opcode + 1;
+ else
+ imv->imv = (unsigned long)opcode + 2; /* skip nop */
+ return 0;
+
+ }
+
+ if (*((uint8_t *)imv->imv - 1) == JMP_REL8) {
+ int logicvar = imv->insn_size ? imv->var : !imv->var;
+
+ printk(KERN_DEBUG "Found JMP_REL8 at %p\n",
+ ((uint8_t *)imv->imv - 1));
+ replace_instruction_safe((uint8_t *)imv->imv - 1,
+ ((unsigned char[]){ JMP_REL8,
+ (logicvar ? (signed char)imv->jmp_off : 0) }),
+ 2);
+ return 0;
+ }
+
+ if (*((uint8_t *)imv->imv - 1) == JMP_REL32) {
+ int logicvar = imv->insn_size ? imv->var : !imv->var;
+ int newoff = logicvar ? (int)imv->jmp_off : 0;
+
+ printk(KERN_DEBUG "Found JMP_REL32 at %p, update with %x\n",
+ ((uint8_t *)imv->imv - 1), newoff);
+ replace_instruction_safe((uint8_t *)imv->imv - 1,
+ ((unsigned char[]){ JMP_REL32,
+ ((unsigned char *)&newoff)[0],
+ ((unsigned char *)&newoff)[1],
+ ((unsigned char *)&newoff)[2],
+ ((unsigned char *)&newoff)[3] }),
+ 5);
+ return 0;
+ }
+
+ /* Nothing known found. */
+ return -1;
+}
+
+/**
+ * arch_imv_update - update one immediate value
+ * @imv: pointer of type const struct __imv to update
+ * @early: early boot (1) or normal (0)
+ *
+ * Update one immediate value. Must be called with imv_mutex held.
+ */
+__kprobes int arch_imv_update(struct __imv *imv, int early)
+{
+ int ret;
+ uint8_t buf[10];
+ unsigned long insn, opcode_size;
+
+ static_early = early;
+
+ /*
+ * If imv_cond is encountered, try to patch it with
+ * patch_jump_target. Continue with normal immediate values if the area
+ * surrounding the instruction is not as expected.
+ */
+ if (imv->size == 0) {
+ ret = patch_jump_target(imv);
+ if (ret) {
+#ifdef DEBUG_IMMEDIATE
+ static int nr_fail;
+ printk("Jump target fallback at %lX, nr fail %d\n",
+ imv->imv, ++nr_fail);
+#endif
+ imv->size = 1;
+ } else {
+#ifdef DEBUG_IMMEDIATE
+ static int nr_success;
+ printk("Jump target at %lX, nr success %d\n",
+ imv->imv, ++nr_success);
+#endif
+ return 0;
+ }
+ }
+
+ opcode_size = imv->insn_size - imv->size;
+ insn = imv->imv - opcode_size;
+
+#ifdef CONFIG_KPROBES
+ /*
+ * Fail if a kprobe has been set on this instruction.
+ * (TODO: we could eventually do better and modify all the (possibly
+ * nested) kprobes for this site if kprobes had an API for this.
+ */
+ if (unlikely(!early
+ && *(unsigned char *)insn == BREAKPOINT_INSTRUCTION)) {
+ printk(KERN_WARNING "Immediate value in conflict with kprobe. "
+ "Variable at %p, "
+ "instruction at %p, size %hu\n",
+ (void *)imv->var,
+ (void *)imv->imv, imv->size);
+ return -EBUSY;
+ }
+#endif
+
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (imv->size) {
+ case 1: if (*(uint8_t *)imv->imv == *(uint8_t *)imv->var)
+ return 0;
+ break;
+ case 2: if (*(uint16_t *)imv->imv == *(uint16_t *)imv->var)
+ return 0;
+ break;
+ case 4: if (*(uint32_t *)imv->imv == *(uint32_t *)imv->var)
+ return 0;
+ break;
+#ifdef CONFIG_X86_64
+ case 8: if (*(uint64_t *)imv->imv == *(uint64_t *)imv->var)
+ return 0;
+ break;
+#endif
+ default:return -EINVAL;
+ }
+
+ memcpy(buf, (uint8_t *)insn, opcode_size);
+ memcpy(&buf[opcode_size], (void *)imv->var, imv->size);
+ replace_instruction_safe((uint8_t *)insn, buf, imv->insn_size);
+
return 0;
}
Index: linux-2.6-lttng/include/linux/immediate.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/immediate.h 2008-04-16 14:04:47.000000000 -0400
+++ linux-2.6-lttng/include/linux/immediate.h 2008-04-16 14:04:48.000000000 -0400
@@ -33,8 +33,7 @@
* Internal update functions.
*/
extern void core_imv_update(void);
-extern void imv_update_range(const struct __imv *begin,
- const struct __imv *end);
+extern void imv_update_range(struct __imv *begin, struct __imv *end);
extern void imv_unref_core_init(void);
extern void imv_unref(struct __imv *begin, struct __imv *end, void *start,
unsigned long size);
@@ -54,6 +53,14 @@ extern void imv_unref(struct __imv *begi
#define imv_read(name) _imv_read(name)
/**
+ * imv_cond - read immediate variable use as condition for if()
+ * @name: immediate value name
+ *
+ * Reads the value of @name.
+ */
+#define imv_cond _imv_read(name)
+
+/**
* imv_set - set immediate variable (with locking)
* @name: immediate value name
* @i: required value
Index: linux-2.6-lttng/kernel/immediate.c
===================================================================
--- linux-2.6-lttng.orig/kernel/immediate.c 2008-04-16 14:04:47.000000000 -0400
+++ linux-2.6-lttng/kernel/immediate.c 2008-04-16 14:04:48.000000000 -0400
@@ -43,10 +43,10 @@ static DEFINE_MUTEX(imv_mutex);
*
* Updates a range of immediates.
*/
-void imv_update_range(const struct __imv *begin,
- const struct __imv *end)
+void imv_update_range(struct __imv *begin,
+ struct __imv *end)
{
- const struct __imv *iter;
+ struct __imv *iter;
int ret;
for (iter = begin; iter < end; iter++) {
mutex_lock(&imv_mutex);
Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-powerpc/immediate.h 2008-04-16 14:04:47.000000000 -0400
+++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2008-04-16 14:04:48.000000000 -0400
@@ -68,6 +68,8 @@ struct __imv {
value; \
})
+#define imv_cond(name) imv_read(name)
+
extern int arch_imv_update(const struct __imv *imv, int early);
#endif /* _ASM_POWERPC_IMMEDIATE_H */
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/