[PATCH][DEBUG] x86/refcount: split up refcount saturation handling

From: Kees Cook
Date: Thu Aug 31 2017 - 15:27:36 EST


In support of debugging the problems Mike Galbraith has seen with
x86-refcount vs gcc vs network refcounts...

This minimizes the differences between unchecked-refcount and x86-refcount
by changing the refcount_dec() failure case to not saturate. The reporting
of negative values is reduced to pr_warn from WARN to avoid spamming dmesg
(which may impact race conditions). Ratelimiting is disabled just to be
sure no reports are being dropped.

Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
---
arch/x86/mm/extable.c | 51 ++++++++++++++++++++++++++++++++++-----------------
kernel/panic.c | 2 +-
2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index c076f710de4c..dcb498668370 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
+#include <linux/ratelimit.h>

#include <asm/traps.h>
#include <asm/kdebug.h>
@@ -43,19 +44,7 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
bool ex_handler_refcount(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
- /* First unconditionally saturate the refcount. */
- *(int *)regs->cx = INT_MIN / 2;
-
- /*
- * Strictly speaking, this reports the fixup destination, not
- * the fault location, and not the actually overflowing
- * instruction, which is the instruction before the "js", but
- * since that instruction could be a variety of lengths, just
- * report the location after the overflow, which should be close
- * enough for finding the overflow, as it's at least back in
- * the function, having returned from .text.unlikely.
- */
- regs->ip = ex_fixup_addr(fixup);
+ const char *msg;

/*
* This function has been called because either a negative refcount
@@ -68,12 +57,40 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
* these cases we want a report, since it's a boundary condition.
*
*/
- if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
- bool zero = regs->flags & X86_EFLAGS_ZF;
-
- refcount_error_report(regs, zero ? "hit zero" : "overflow");
+ if (regs->flags & X86_EFLAGS_OF) {
+ /* Always saturate on overflow detection. */
+ *(int *)regs->cx = INT_MIN / 2;
+ msg = "saturated due to overflow";
+ } else if (regs->flags & X86_EFLAGS_SF) {
+ /* Do not generate traceback on re-saturation. */
+ *(int *)regs->cx = INT_MIN / 2;
+ regs->ip = ex_fixup_addr(fixup);
+ pr_warn("refcount_t saturated due to negative result at %pB in %s[%d]\n",
+ (void *)instruction_pointer(regs),
+ current->comm, task_pid_nr(current));
+ return true;
+ } else if (regs->flags & X86_EFLAGS_ZF) {
+ /* An unchecked dec-to-zero happened. WARN only. */
+ msg = "hit zero without test";
+ } else {
+ /* Impossible state. */
+ *(int *)regs->cx = INT_MIN / 2;
+ msg = "saturated due to unknown state";
}

+ /*
+ * Strictly speaking, this reports the fixup destination, not
+ * the fault location, and not the actually overflowing
+ * instruction, which is the instruction before the "js", but
+ * since that instruction could be a variety of lengths, just
+ * report the location after the overflow, which should be close
+ * enough for finding the overflow, as it's at least back in
+ * the function, having returned from .text.unlikely.
+ */
+ regs->ip = ex_fixup_addr(fixup);
+
+ refcount_error_report(regs, msg);
+
return true;
}
EXPORT_SYMBOL_GPL(ex_handler_refcount);
diff --git a/kernel/panic.c b/kernel/panic.c
index bdd18afa19a4..966ade491543 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -605,7 +605,7 @@ EXPORT_SYMBOL(__stack_chk_fail);
#ifdef CONFIG_ARCH_HAS_REFCOUNT
void refcount_error_report(struct pt_regs *regs, const char *err)
{
- WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n",
+ WARN(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n",
err, (void *)instruction_pointer(regs),
current->comm, task_pid_nr(current),
from_kuid_munged(&init_user_ns, current_uid()),
--
2.7.4


--
Kees Cook
Pixel Security