Re: [patch] 2.3.18ac10 /proc fixes [Re: [patch] wchan in 2.3.18ac6]

Andrea Arcangeli (andrea@suse.de)
Mon, 4 Oct 1999 18:24:46 +0200 (CEST)


On Mon, 4 Oct 1999, Alan Cox wrote:

>> The right thing to do is to increase the double-page reference count in
>> the struct page to avoid /proc sniffing. So we'll be secure and _fast_.
>
>That sounds a lot saner.

BTW, note that 2.3.18ac10 can be sniffed via /proc/ too via
KSTK_EIP/KSTCK_ESP and wchan(). So 2.3.18ac10 is not safe too (that was
another reason I took the not secure approch of 2.2.x).

Actually I produced a new patch that looks ok to me.

It fixes both the security everywhere (EIP/ESP/wchan() included) the
lock_kernel() before reading tsk->mm and the down(mmap_sem) in read_maps
and the other minor things that my other patch was doing. The downside is
that it's not possible to take the stack-cache in UP anymore (anyway I am
not convinced it was a good idea either and it looks like a workaround
because GFP is not freeing the right pages taking care of the order of
the GFP request).

against 2.3.18ac10:

diff -urN 2.3.18ac10/arch/i386/kernel/process.c proc/arch/i386/kernel/process.c
--- 2.3.18ac10/arch/i386/kernel/process.c Tue Sep 14 14:34:06 1999
+++ proc/arch/i386/kernel/process.c Mon Oct 4 16:32:26 1999
@@ -341,70 +341,6 @@
}

/*
- * Allocation and freeing of basic task resources.
- *
- * NOTE! The task struct and the stack go together
- *
- * The task structure is a two-page thing, and as such
- * not reliable to allocate using the basic page alloc
- * functions. We have a small cache of structures for
- * when the allocations fail..
- *
- * This extra buffer essentially acts to make for less
- * "jitter" in the allocations..
- *
- * On SMP we don't do this right now because:
- * - we aren't holding any locks when called, and we might
- * as well just depend on the generic memory management
- * to do proper locking for us instead of complicating it
- * here.
- * - if you use SMP you have a beefy enough machine that
- * this shouldn't matter..
- */
-#ifndef __SMP__
-#define EXTRA_TASK_STRUCT 16
-static struct task_struct * task_struct_stack[EXTRA_TASK_STRUCT];
-static int task_struct_stack_ptr = -1;
-#endif
-
-struct task_struct * alloc_task_struct(void)
-{
-#ifndef EXTRA_TASK_STRUCT
- return (struct task_struct *) __get_free_pages(GFP_KERNEL,1);
-#else
- int index;
- struct task_struct *ret;
-
- index = task_struct_stack_ptr;
- if (index >= EXTRA_TASK_STRUCT/2)
- goto use_cache;
- ret = (struct task_struct *) __get_free_pages(GFP_KERNEL,1);
- if (!ret) {
- index = task_struct_stack_ptr;
- if (index >= 0) {
-use_cache:
- ret = task_struct_stack[index];
- task_struct_stack_ptr = index-1;
- }
- }
- return ret;
-#endif
-}
-
-void free_task_struct(struct task_struct *p)
-{
-#ifdef EXTRA_TASK_STRUCT
- int index = task_struct_stack_ptr+1;
-
- if (index < EXTRA_TASK_STRUCT) {
- task_struct_stack[index] = p;
- task_struct_stack_ptr = index;
- } else
-#endif
- free_pages((unsigned long) p, 1);
-}
-
-/*
* No need to lock the MM as we are the last user
*/
void release_segments(struct mm_struct *mm)
diff -urN 2.3.18ac10/fs/proc/array.c proc/fs/proc/array.c
--- 2.3.18ac10/fs/proc/array.c Thu Sep 30 17:00:27 1999
+++ proc/fs/proc/array.c Mon Oct 4 18:17:17 1999
@@ -48,6 +48,8 @@
*
* Chuck Lever : safe handling of task_struct
* <cel@monkey.org>
+ *
+ * Andrea Arcangeli : SMP race/security fixes.
*/

#include <linux/types.h>
@@ -481,7 +483,9 @@
{
struct task_struct *p;
struct mm_struct *mm = NULL;
-
+
+ /* need kernel lock to avoid the tsk->mm to go away under us */
+ lock_kernel();
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
if (p)
@@ -489,6 +493,7 @@
if (mm)
atomic_inc(&mm->mm_users);
read_unlock(&tasklist_lock);
+ unlock_kernel();
return mm;
}

@@ -917,7 +922,7 @@
struct mm_struct *mm = NULL;

/*
- * We lock the whole kernel here because p->files is still
+ * We lock the whole kernel here because p->files and p->mm are still
* protected by the global kernel lock.
*/
lock_kernel();
@@ -959,61 +964,55 @@

static int get_stat(int pid, char * buffer)
{
- struct task_struct *tsk, *p;
- struct mm_struct *mm = NULL;
+ struct task_struct *tsk;
+ struct mm_struct *mm;
unsigned long vsize, eip, esp, wchan;
long priority, nice;
pid_t ppid = 0;
sigset_t sigign, sigcatch;
char state;
int res = 0;
+ unsigned int tty_device;
+ int tty_pgrp;

- tsk = kmalloc(sizeof(struct task_struct), GFP_KERNEL);
- if (!tsk)
- goto out;
-
- /*
- * Hold the tasklist_lock and extract address information
- * we may need after the lock is released. We can't hold
- * the tasklist_lock and jiggle the mmap_sem -- that can
- * result in a deadlock.
- */
read_lock(&tasklist_lock);
- p = find_task_by_pid(pid);
- if (p) {
- memcpy(tsk, p, sizeof(struct task_struct));
-
- mm = p->mm;
- if (mm)
- atomic_inc(&mm->mm_users);
+ tsk = find_task_by_pid(pid);
+ if (!tsk)
+ goto out_unlock;
+ /* avoid the task list to go away under us (security) */
+ get_page(MAP_NR(tsk) + mem_map);
+ ppid = tsk->p_pptr->pid;
+ read_unlock(&tasklist_lock);

- ppid = p->p_pptr->pid;
+ /* we need the big kernel lock to avoid tsk->mm and tsk->tty
+ to change under us */
+ lock_kernel();
+ mm = tsk->mm;
+ if (mm)
+ atomic_inc(&mm->mm_users);
+ tty_device = tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0;
+ tty_pgrp = tsk->tty ? tsk->tty->pgrp : -1;
+ unlock_kernel();

- spin_lock_irq(&p->sigmask_lock);
- collect_sigign_sigcatch(p, &sigign, &sigcatch);
- spin_unlock_irq(&p->sigmask_lock);
- }
- read_unlock(&tasklist_lock);
- if (!p)
- goto free_out;
+ spin_lock_irq(&tsk->sigmask_lock);
+ collect_sigign_sigcatch(tsk, &sigign, &sigcatch);
+ spin_unlock_irq(&tsk->sigmask_lock);
+
+ eip = KSTK_EIP(tsk);
+ esp = KSTK_ESP(tsk);
+ wchan = get_wchan(tsk);

state = *get_task_state(tsk);
vsize = eip = esp = 0;
- if (mm) {
+ if (mm)
+ {
struct vm_area_struct *vma;
down(&mm->mmap_sem);
- vma = mm->mmap;
- while (vma) {
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
vsize += vma->vm_end - vma->vm_start;
- vma = vma->vm_next;
- }
- eip = KSTK_EIP(p);
- esp = KSTK_ESP(p);
up(&mm->mmap_sem);
}

- wchan = get_wchan(p);
-
/* scale priority and nice values from timeslices to -20..20 */
/* to make it look like a "normal" Unix priority/nice value */
priority = tsk->counter;
@@ -1030,8 +1029,8 @@
ppid,
tsk->pgrp,
tsk->session,
- tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0,
- tsk->tty ? tsk->tty->pgrp : -1,
+ tty_device,
+ tty_pgrp,
tsk->flags,
tsk->min_flt,
tsk->cmin_flt,
@@ -1068,12 +1067,15 @@
tsk->exit_signal,
tsk->processor);

-free_out:
- kfree(tsk);
-out:
if (mm)
mmput(mm);
+ free_task_struct(tsk);
return res;
+
+out_unlock:
+ read_unlock(&tasklist_lock);
+ unlock_kernel();
+ return 0;
}

static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
@@ -1226,7 +1228,6 @@
char * destptr = buf, * buffer;
loff_t lineno;
ssize_t column, i;
- int volatile_task = 0;
long retval;

/*
@@ -1238,19 +1239,16 @@
goto out;

retval = -EINVAL;
+ lock_kernel();
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
if (p) {
mm = p->mm;
- if (mm) {
+ if (mm)
atomic_inc(&mm->mm_users);
-
- /* Check whether the mmaps could change if we sleep */
- volatile_task = (p != current ||
- atomic_read(&mm->mm_users) > 2);
- }
}
read_unlock(&tasklist_lock);
+ unlock_kernel();
if (!p)
goto freepage_out;

@@ -1262,6 +1260,7 @@
lineno = *ppos >> MAPS_LINE_SHIFT;
column = *ppos & (MAPS_LINE_LENGTH-1);

+ down(&mm->mmap_sem);
/* quickly go to line "lineno" */
for (map = mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++)
continue;
@@ -1334,13 +1333,8 @@
/* done? */
if (count == 0)
break;
-
- /* By writing to user space, we might have slept.
- * Stop the loop, to avoid a race condition.
- */
- if (volatile_task)
- break;
}
+ up(&mm->mmap_sem);

/* encode f_pos */
*ppos = (lineno << MAPS_LINE_SHIFT) + column;
@@ -1367,6 +1361,9 @@
*/
read_lock(&tasklist_lock);
tsk = find_task_by_pid(pid);
+ if (tsk)
+ get_page(MAP_NR(tsk) + mem_map);
+ read_unlock(&tasklist_lock);
if (tsk) {
len = sprintf(buffer,
"cpu %lu %lu\n",
@@ -1378,8 +1375,8 @@
i,
tsk->per_cpu_utime[cpu_logical_map(i)],
tsk->per_cpu_stime[cpu_logical_map(i)]);
+ free_task_struct(tsk);
}
- read_unlock(&tasklist_lock);
return len;
}
#endif
diff -urN 2.3.18ac10/include/asm-i386/processor.h proc/include/asm-i386/processor.h
--- 2.3.18ac10/include/asm-i386/processor.h Mon Oct 4 03:46:12 1999
+++ proc/include/asm-i386/processor.h Mon Oct 4 17:47:16 1999
@@ -409,8 +409,21 @@
}

#define THREAD_SIZE (2*PAGE_SIZE)
-extern struct task_struct * alloc_task_struct(void);
-extern void free_task_struct(struct task_struct *);
+/*
+ * Allocation and freeing of basic task resources.
+ *
+ * NOTE! The task struct and the stack go together
+ *
+ * The task structure is a two-page thing, and as such
+ * not reliable to allocate using the basic page alloc
+ * functions. We have a small cache of structures for
+ * when the allocations fail..
+ */
+#define alloc_task_struct() \
+ ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+
+#define free_task_struct(p) \
+ free_pages((unsigned long) p, 1);

#define init_task (init_task_union.task)
#define init_stack (init_task_union.stack)

Andrea

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/