[PATCH v7 1/3] x86: Add classes to exception tables

From: Tony Luck
Date: Mon Jan 04 2016 - 19:12:49 EST


Starting with a patch from Andy Lutomirski <luto@xxxxxxxxxxxxxx>
that used linker relocation trickery to free up a couple of bits
in the "fixup" field of the exception table (and generalized the
uaccess_err hack to use one of the classes).

This patch allocates another one of the classes to provide
a mechanism to provide the fault number to the fixup code
in %rax.

Still one free class for the next brilliant idea. If more are
needed it should be possible to squeeze another bit or three
extending the same technique.

Originally-from: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/include/asm/asm.h | 102 +++++++++++++++++++++++++++++++----------
arch/x86/include/asm/uaccess.h | 17 +++++--
arch/x86/kernel/kprobes/core.c | 2 +-
arch/x86/kernel/traps.c | 6 +--
arch/x86/mm/extable.c | 66 ++++++++++++++++++--------
arch/x86/mm/fault.c | 2 +-
6 files changed, 142 insertions(+), 53 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 189679aba703..977273e36f87 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -43,19 +43,79 @@
#define _ASM_DI __ASM_REG(di)

/* Exception table entry */
-#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE(from,to) \
- .pushsection "__ex_table","a" ; \
- .balign 8 ; \
- .long (from) - . ; \
- .long (to) - . ; \
- .popsection

-# define _ASM_EXTABLE_EX(from,to) \
- .pushsection "__ex_table","a" ; \
- .balign 8 ; \
- .long (from) - . ; \
- .long (to) - . + 0x7ffffff0 ; \
+#define _EXTABLE_BIAS 0x20000000
+/*
+ * An exception table entry is 64 bits. The first 32 bits are the offset
+ * from that entry to the potentially faulting instruction. sortextable
+ * relies on that exact encoding. The second 32 bits encode the fault
+ * handler address.
+ *
+ * We want to stick two extra bits of handler class into the fault handler
+ * address. All of these are generated by relocations, so we can only
+ * rely on addition.
+ *
+ * The offset to the fixup is signed, and we're trying to use the high
+ * bits for a different purpose. In C, we could just do:
+ *
+ * u32 class_and_offset = ((target - here) & 0x3fffffff) | class;
+ *
+ * Then, to decode it, we'd mask off the class and sign-extend to recover
+ * the offset.
+ *
+ * In asm, we can't do that, because this all gets laundered through the
+ * linker, and there's no relocation type that supports this chicanery.
+ * Instead we cheat a bit. We first add a large number to the offset
+ * (0x20000000). The result is still nominally signed, but now it's
+ * always positive, and the two high bits are always clear. We can then
+ * set high bits by ordinary addition or subtraction instead of using
+ * bitwise operations. As far as the linker is concerned, all we're
+ * doing is adding a large constant to the difference between here (".")
+ * and the target, and that's a valid relocation type.
+ *
+ * We therefore emit:
+ * (target - here) + (class) + 0x20000000
+ *
+ * This has the property that the two high bits are the class (see
+ * ex_class().
+ *
+ * To get the offset back we just mask off the class bits and subtract
+ * 0x20000000. See ex_fixup_addr().
+ */
+
+/*
+ * There are two bits of extable entry class giving four classes
+ */
+#define EXTABLE_CLASS_DEFAULT 0 /* standard uaccess fixup */
+#define EXTABLE_CLASS_FAULT 1 /* provide fault number as well as fixup */
+#define EXTABLE_CLASS_EX 2 /* uaccess + set uaccess_err */
+#define EXTABLE_CLASS_UNUSED 3 /* available for something else */
+
+/*
+ * The biases are the class constants + _EXTABLE_BIAS, as signed integers.
+ * This can't use ordinary arithmetic -- the assembler isn't that smart.
+ */
+#define _EXTABLE_BIAS_DEFAULT _EXTABLE_BIAS
+#define _EXTABLE_BIAS_FAULT _EXTABLE_BIAS + 0x40000000
+#define _EXTABLE_BIAS_EX _EXTABLE_BIAS - 0x80000000
+#define _EXTABLE_BIAS_UNUSED _EXTABLE_BIAS - 0x40000000
+
+#define _ASM_EXTABLE(from,to) \
+ _ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_DEFAULT)
+
+#define _ASM_EXTABLE_FAULT(from,to) \
+ _ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_FAULT)
+
+#define _ASM_EXTABLE_EX(from,to) \
+ _ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_EX)
+
+#ifdef __ASSEMBLY__
+# define _EXPAND_EXTABLE_BIAS(x) x
+# define _ASM_EXTABLE_CLASS(from,to,bias) \
+ .pushsection "__ex_table","a" ; \
+ .balign 8 ; \
+ .long (from) - . ; \
+ .long (to) - . + _EXPAND_EXTABLE_BIAS(bias) ; \
.popsection

# define _ASM_NOKPROBE(entry) \
@@ -89,18 +149,12 @@
.endm

#else
-# define _ASM_EXTABLE(from,to) \
- " .pushsection \"__ex_table\",\"a\"\n" \
- " .balign 8\n" \
- " .long (" #from ") - .\n" \
- " .long (" #to ") - .\n" \
- " .popsection\n"
-
-# define _ASM_EXTABLE_EX(from,to) \
- " .pushsection \"__ex_table\",\"a\"\n" \
- " .balign 8\n" \
- " .long (" #from ") - .\n" \
- " .long (" #to ") - . + 0x7ffffff0\n" \
+# define _EXPAND_EXTABLE_BIAS(x) #x
+# define _ASM_EXTABLE_CLASS(from,to,bias) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+ " .balign 8\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - . + " _EXPAND_EXTABLE_BIAS(bias) "\n" \
" .popsection\n"
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a8df874f3e88..b023300cd6f0 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -93,9 +93,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
* The exception table consists of pairs of addresses relative to the
* exception table enty itself: the first is the address of an
* instruction that is allowed to fault, and the second is the address
- * at which the program should continue. No registers are modified,
- * so it is entirely up to the continuation code to figure out what to
- * do.
+ * at which the program should continue. The exception table is linked
+ * soon after the fixup section, so we don't need a full 32-bit offset
+ * for the fixup. We steal the two upper bits so we can tag exception
+ * table entries with different classes. In the default class no registers
+ * are modified, so it is entirely up to the continuation code to figure
+ * out what to do.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -110,7 +113,13 @@ struct exception_table_entry {
#define ARCH_HAS_SORT_EXTABLE
#define ARCH_HAS_SEARCH_EXTABLE

-extern int fixup_exception(struct pt_regs *regs);
+static inline unsigned int
+ex_class(const struct exception_table_entry *x)
+{
+ return (u32)x->fixup >> 30;
+}
+
+extern int fixup_exception(struct pt_regs *regs, int trapnr);
extern int early_fixup_exception(unsigned long *ip);

/*
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1deffe6cc873..0f05deeff5ce 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -988,7 +988,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- if (fixup_exception(regs))
+ if (fixup_exception(regs, trapnr))
return 1;

/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 346eec73f7db..df25081e5970 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -199,7 +199,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
}

if (!user_mode(regs)) {
- if (!fixup_exception(regs)) {
+ if (!fixup_exception(regs, trapnr)) {
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
@@ -453,7 +453,7 @@ do_general_protection(struct pt_regs *regs, long error_code)

tsk = current;
if (!user_mode(regs)) {
- if (fixup_exception(regs))
+ if (fixup_exception(regs, X86_TRAP_GP))
return;

tsk->thread.error_code = error_code;
@@ -699,7 +699,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
conditional_sti(regs);

if (!user_mode(regs)) {
- if (!fixup_exception(regs)) {
+ if (!fixup_exception(regs, X86_TRAP_DE)) {
task->thread.error_code = error_code;
task->thread.trap_nr = trapnr;
die(str, regs, error_code);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 903ec1e9c326..7a592ec193d5 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -11,13 +11,51 @@ ex_insn_addr(const struct exception_table_entry *x)
static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
{
- return (unsigned long)&x->fixup + x->fixup;
+ long offset = (long)((u32)x->fixup & 0x3fffffff) - (long)_EXTABLE_BIAS;
+ return (unsigned long)&x->fixup + offset;
}

-int fixup_exception(struct pt_regs *regs)
+/* Fixup functions for each exception class */
+static int fix_class_default(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ regs->ip = ex_fixup_addr(fixup);
+ return 1;
+}
+static int fix_class_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = trapnr;
+ return 1;
+}
+static int fix_class_ex(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ /* Special hack for uaccess_err */
+ current_thread_info()->uaccess_err = 1;
+ regs->ip = ex_fixup_addr(fixup);
+ return 1;
+}
+static int fix_class_unused(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ /* can't happen unless exception table was corrupted */
+ BUG_ON(1);
+ return 0;
+}
+
+static int (*allclasses[])(const struct exception_table_entry *,
+ struct pt_regs *, int) = {
+ [EXTABLE_CLASS_DEFAULT] = fix_class_default,
+ [EXTABLE_CLASS_FAULT] = fix_class_fault,
+ [EXTABLE_CLASS_EX] = fix_class_ex,
+ [EXTABLE_CLASS_UNUSED] = fix_class_unused
+};
+
+int fixup_exception(struct pt_regs *regs, int trapnr)
{
const struct exception_table_entry *fixup;
- unsigned long new_ip;

#ifdef CONFIG_PNPBIOS
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -34,17 +72,8 @@ int fixup_exception(struct pt_regs *regs)
#endif

fixup = search_exception_tables(regs->ip);
- if (fixup) {
- new_ip = ex_fixup_addr(fixup);
-
- if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
- /* Special hack for uaccess_err */
- current_thread_info()->uaccess_err = 1;
- new_ip -= 0x7ffffff0;
- }
- regs->ip = new_ip;
- return 1;
- }
+ if (fixup)
+ return allclasses[ex_class(fixup)](fixup, regs, trapnr);

return 0;
}
@@ -53,18 +82,15 @@ int fixup_exception(struct pt_regs *regs)
int __init early_fixup_exception(unsigned long *ip)
{
const struct exception_table_entry *fixup;
- unsigned long new_ip;

fixup = search_exception_tables(*ip);
if (fixup) {
- new_ip = ex_fixup_addr(fixup);
-
- if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
- /* uaccess handling not supported during early boot */
+ if (ex_class(fixup)) {
+ /* special handling not supported during early boot */
return 0;
}

- *ip = new_ip;
+ *ip = ex_fixup_addr(fixup);
return 1;
}

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index eef44d9a3f77..495946c3f9dd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -656,7 +656,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
int sig;

/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs)) {
+ if (fixup_exception(regs, X86_TRAP_PF)) {
/*
* Any interrupt that takes a fault gets the fixup. This makes
* the below recursive fault logic only apply to a faults from
--
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/