Re: task_lock in 2.3.99pre7-8

From: Manfred Spraul (manfreds@colorfullife.com)
Date: Tue May 09 2000 - 12:17:55 EST


Andi Kleen wrote:
>
> pre7-8 turns the task_lock into a spinlock and grabs it above
> __exit_files in do/exit.c. __exit_files can sleep and runs
> inside the spinlock.
>
> Is this a bug or I am missing something ?
>

Linus, please apply the second half of my patch. I've retested with
pre7-8, and I closed 3 memory leaks [2 in my new code, an old one in
exec_domain.c].

struct task_struct.file, .fs, .tty, .mm and .sig users are now
synchronized.

.flags is NOT YET synchronized. Linus, what would you prefer: atomic_t
or split it into "ptrace_flags" (protected by lock_kernel) and "flags"
(protected by "a thread never run on 2 cpus at the same time").

And I also found 2 unrelated bugs:

* ->sig: spinlock calls are missing in kernel/signal.c, the spinlock is
there [current->sig->siglock].

* SMP race somewhere in execve(): under load+ptrace, argv[] is lost.

--
	Manfred

// $Header$ // Kernel Version: // VERSION = 2 // PATCHLEVEL = 3 // SUBLEVEL = 99 // EXTRAVERSION = -pre7 --- 2.3/include/linux/sched.h Tue May 9 18:25:16 2000 +++ build-2.3/include/linux/sched.h Tue May 9 18:28:35 2000 @@ -345,7 +345,7 @@ /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs */ +/* Protection of (de-)allocation: mm, files, fs, tty */ spinlock_t alloc_lock; }; --- 2.3/kernel/exit.c Tue May 9 18:25:17 2000 +++ build-2.3/kernel/exit.c Tue May 9 18:26:37 2000 @@ -182,24 +182,32 @@ extern kmem_cache_t *files_cachep; +void put_files_struct(struct files_struct *files) +{ + if (atomic_dec_and_test(&files->count)) { + close_files(files); + /* + * Free the fd and fdset arrays if we expanded them. + */ + if (files->fd != &files->fd_array[0]) + free_fd_array(files->fd, files->max_fds); + if (files->max_fdset > __FD_SETSIZE) { + free_fdset(files->open_fds, files->max_fdset); + free_fdset(files->close_on_exec, files->max_fdset); + } + kmem_cache_free(files_cachep, files); + } +} + static inline void __exit_files(struct task_struct *tsk) { - struct files_struct * files = xchg(&tsk->files, NULL); + struct files_struct * files = tsk->files; if (files) { - if (atomic_dec_and_test(&files->count)) { - close_files(files); - /* - * Free the fd and fdset arrays if we expanded them. - */ - if (files->fd != &files->fd_array[0]) - free_fd_array(files->fd, files->max_fds); - if (files->max_fdset > __FD_SETSIZE) { - free_fdset(files->open_fds, files->max_fdset); - free_fdset(files->close_on_exec, files->max_fdset); - } - kmem_cache_free(files_cachep, files); - } + task_lock(tsk); + tsk->files = NULL; + task_unlock(tsk); + put_files_struct(files); } } @@ -232,7 +240,9 @@ struct fs_struct * fs = tsk->fs; if (fs) { + task_lock(tsk); tsk->fs = NULL; + task_unlock(tsk); __put_fs_struct(fs); } } @@ -247,11 +257,9 @@ struct signal_struct * sig = tsk->sig; if (sig) { - unsigned long flags; - - spin_lock_irqsave(&tsk->sigmask_lock, flags); + spin_lock_irq(&tsk->sigmask_lock); tsk->sig = NULL; - spin_unlock_irqrestore(&tsk->sigmask_lock, flags); + spin_unlock_irq(&tsk->sigmask_lock); if (atomic_dec_and_test(&sig->count)) kfree(sig); } @@ -302,7 +310,10 @@ atomic_inc(&mm->mm_count); mm_release(); if (mm != tsk->active_mm) BUG(); + /* more a memory barrier than a real lock */ + task_lock(tsk); tsk->mm = NULL; + task_unlock(tsk); enter_lazy_tlb(mm, current, smp_processor_id()); mmput(mm); } @@ -429,7 +440,6 @@ #ifdef CONFIG_BSD_PROCESS_ACCT acct_process(code); #endif - task_lock(tsk); sem_exit(); __exit_mm(tsk); __exit_files(tsk); @@ -439,7 +449,6 @@ tsk->state = TASK_ZOMBIE; tsk->exit_code = code; exit_notify(); - task_unlock(tsk); put_exec_domain(tsk->exec_domain); if (tsk->binfmt && tsk->binfmt->module) __MOD_DEC_USE_COUNT(tsk->binfmt->module); --- 2.3/fs/proc/array.c Tue May 9 18:25:15 2000 +++ build-2.3/fs/proc/array.c Tue May 9 18:26:37 2000 @@ -306,6 +306,7 @@ char state; int res; pid_t ppid; + int tty_nr; struct mm_struct *mm; state = *get_task_state(task); @@ -332,10 +333,13 @@ collect_sigign_sigcatch(task, &sigign, &sigcatch); + task_lock(task); if (task->tty) tty_pgrp = task->tty->pgrp; else tty_pgrp = -1; + tty_nr = task->tty ? kdev_t_to_nr(task->tty->device) : 0; + task_unlock(task); /* scale priority and nice values from timeslices to -20..20 */ /* to make it look like a "normal" Unix priority/nice value */ @@ -356,7 +360,7 @@ ppid, task->pgrp, task->session, - task->tty ? kdev_t_to_nr(task->tty->device) : 0, + tty_nr, tty_pgrp, task->flags, task->min_flt, --- 2.3/fs/proc/base.c Tue May 9 18:25:15 2000 +++ build-2.3/fs/proc/base.c Tue May 9 18:59:22 2000 @@ -121,6 +121,7 @@ *mnt = mntget(fs->rootmnt); *dentry = dget(fs->root); result = 0; + put_fs_struct(fs); } return result; } @@ -258,18 +259,6 @@ if (!(page = __get_free_page(GFP_KERNEL))) return -ENOMEM; - /* FIXME: check that all proc_read function - * handle a dying task gracefully. - * The memory for the task structure - * won't be freed, we've called get_task_struct(). - */ -#if 0 - if (!task->p_pptr) { - free_page(page); - return -EIO; - } -#endif - length = inode->u.proc_i.op.proc_read(task, (char*)page); if (length < 0) { @@ -521,6 +510,7 @@ unsigned int fd, pid, ino; int retval; char buf[NUMBUF]; + struct files_struct * files; retval = 0; pid = p->pid; @@ -537,12 +527,19 @@ goto out; filp->f_pos++; default: + task_lock(p); + files = p->files; + if (files) + atomic_inc(&files->count); + task_unlock(p); + if (!files) + goto out; for (fd = filp->f_pos-2; - p->p_pptr && p->files && fd < p->files->max_fds; + fd < files->max_fds; fd++, filp->f_pos++) { unsigned int i,j; - if (!fcheck_task(p, fd)) + if (!fcheck_files(files, fd)) continue; j = NUMBUF; @@ -556,8 +553,8 @@ ino = fake_ino(pid, PROC_PID_FD_DIR + fd); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino) < 0) break; - } + put_files_struct(files); } out: return retval; @@ -713,16 +710,20 @@ inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd); if (!inode) goto out; - /* FIXME */ + task_lock(task); files = task->files; - if (!files) /* can we ever get here if that's the case? */ + if (files) + atomic_inc(&files->count); + task_unlock(task); + if (!files) goto out_unlock; read_lock(&files->file_lock); - file = inode->u.proc_i.file = fcheck_task(task, fd); + file = inode->u.proc_i.file = fcheck_files(files, fd); if (!file) goto out_unlock2; get_file(file); read_unlock(&files->file_lock); + put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; inode->i_mode = S_IFLNK; @@ -736,6 +737,7 @@ return NULL; out_unlock2: + put_files_struct(files); read_unlock(&files->file_lock); out_unlock: iput(inode); --- 2.3/fs/exec.c Tue May 9 18:25:14 2000 +++ build-2.3/fs/exec.c Tue May 9 18:26:37 2000 @@ -379,8 +379,10 @@ struct mm_struct *active_mm = current->active_mm; init_new_context(current, mm); + task_lock(current); current->mm = mm; current->active_mm = mm; + task_unlock(current); activate_mm(active_mm, mm); mm_release(); if (old_mm) { @@ -413,7 +415,9 @@ spin_lock_init(&newsig->siglock); atomic_set(&newsig->count, 1); memcpy(newsig->action, current->sig->action, sizeof(newsig->action)); + spin_lock_irq(&current->sigmask_lock); current->sig = newsig; + spin_unlock_irq(&current->sigmask_lock); return 0; } @@ -466,7 +470,6 @@ /* * Make sure we have a private signal table */ - task_lock(current); oldsig = current->sig; retval = make_private_signals(); if (retval) goto flush_failed; @@ -505,16 +508,16 @@ flush_signal_handlers(current); flush_old_files(current->files); - task_unlock(current); return 0; mmap_failed: +flush_failed: + spin_lock_irq(&current->sigmask_lock); if (current->sig != oldsig) kfree(current->sig); -flush_failed: current->sig = oldsig; - task_unlock(current); + spin_unlock_irq(&current->sigmask_lock); return retval; } --- 2.3/fs/binfmt_elf.c Thu Apr 27 11:27:11 2000 +++ build-2.3/fs/binfmt_elf.c Tue May 9 18:26:37 2000 @@ -261,12 +261,14 @@ if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) elf_type |= MAP_FIXED; + down(&current->mm->mmap_sem); map_addr = do_mmap(interpreter, load_addr + ELF_PAGESTART(vaddr), eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), elf_prot, elf_type, eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); + up(&current->mm->mmap_sem); if (map_addr > -1024UL) /* Real error */ goto out_close; @@ -612,11 +614,13 @@ elf_flags |= MAP_FIXED; } + down(&current->mm->mmap_sem); error = do_mmap(bprm->file, ELF_PAGESTART(load_bias + vaddr), (elf_ppnt->p_filesz + ELF_PAGEOFFSET(elf_ppnt->p_vaddr)), elf_prot, elf_flags, (elf_ppnt->p_offset - ELF_PAGEOFFSET(elf_ppnt->p_vaddr))); + up(&current->mm->mmap_sem); if (!load_addr_set) { load_addr_set = 1; @@ -726,8 +730,10 @@ Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ /* N.B. Shouldn't the size here be PAGE_SIZE?? */ + down(&current->mm->mmap_sem); error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); + up(&current->mm->mmap_sem); } #ifdef ELF_PLAT_INIT --- 2.3/fs/binfmt_aout.c Thu Apr 27 11:27:11 2000 +++ build-2.3/fs/binfmt_aout.c Tue May 9 18:26:37 2000 @@ -363,20 +363,24 @@ goto beyond_if; } + down(&current->mm->mmap_sem); error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset); + up(&current->mm->mmap_sem); if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } + down(&current->mm->mmap_sem); error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); + up(&current->mm->mmap_sem); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; --- 2.3/kernel/exec_domain.c Thu Apr 27 11:27:26 2000 +++ build-2.3/kernel/exec_domain.c Tue May 9 19:13:16 2000 @@ -110,9 +110,16 @@ if (it) { if (atomic_read(&current->fs->count) != 1) { struct fs_struct *new = copy_fs_struct(current->fs); - if (!new) + struct fs_struct *old; + if (!new) { + put_exec_domain(it); return; - put_fs_struct(xchg(&current->fs,new)); + } + task_lock(current); + old = current->fs; + current->fs = new; + task_unlock(current); + put_fs_struct(old); } /* * At that point we are guaranteed to be the sole owner of --- 2.3/include/linux/file.h Tue May 9 18:25:16 2000 +++ build-2.3/include/linux/file.h Tue May 9 18:26:37 2000 @@ -7,16 +7,12 @@ extern void _fput(struct file *); -/* - * Check whether the specified task has the fd open. Since the task - * may not have a files_struct, we must test for p->files != NULL. - */ -static inline struct file * fcheck_task(struct task_struct *p, unsigned int fd) +static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) { struct file * file = NULL; - if (fd < p->files->max_fds) - file = p->files->fd[fd]; + if (fd < files->max_fds) + file = files->fd[fd]; return file; } @@ -99,5 +95,7 @@ if (result) fput(result); } + +void put_files_struct(struct files_struct *fs); #endif /* __LINUX_FILE_H */ --- 2.3/drivers/char/tty_io.c Thu Apr 27 11:26:55 2000 +++ build-2.3/drivers/char/tty_io.c Tue May 9 18:26:37 2000 @@ -1384,7 +1384,9 @@ current->leader && !current->tty && tty->session == 0) { + task_lock(current); current->tty = tty; + task_unlock(current); current->tty_old_pgrp = 0; tty->session = current->session; tty->pgrp = current->pgrp; @@ -1594,7 +1596,9 @@ } else return -EPERM; } + task_lock(current); current->tty = tty; + task_unlock(current); current->tty_old_pgrp = 0; tty->session = current->session; tty->pgrp = current->pgrp; @@ -1761,7 +1765,9 @@ return -ENOTTY; if (current->leader) disassociate_ctty(0); + task_lock(current); current->tty = NULL; + task_unlock(current); return 0; case TIOCSCTTY: return tiocsctty(tty, arg); @@ -1858,8 +1864,9 @@ send_sig(SIGKILL, p, 1); else if (p->files) { read_lock(&p->files->file_lock); + /* FIXME: p->files could change */ for (i=0; i < p->files->max_fds; i++) { - filp = fcheck_task(p, i); + filp = fcheck_files(p->files, i); if (filp && (filp->f_op == &tty_fops) && (filp->private_data == tty)) { send_sig(SIGKILL, p, 1);

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Mon May 15 2000 - 21:00:14 EST