Re: [PATCH 01/23] tref: Implement task references.

From: Oleg Nesterov
Date: Thu Mar 02 2006 - 14:17:06 EST


Eric W. Biederman wrote:
>
> Holding a reference to a task_struct pins about 10K of low memory even after
> that task has exited. Which seems to be at 1 or 2 orders of mangnitude more
> memory than any other data structure in the kernel. Not holding a reference
> to a task_struct and you risk problems with pid wrap around.

I think there is another, much simpler solution. We can make a "reference" to the
pid itself to protect it against free_pidmap(), so that this pid can't be reused.

struct pid_ref
{
pid_t pid;
atomic_t count;
struct hlist_node chain;
};

// allocated in pidhash_init()
static struct hlist_head *ref_hash;

struct pid_ref *find_pid_ref(pid_t pid)
{
struct hlist_node *elem;
struct pid_ref *ref;

hlist_for_each_entry(ref, elem, &ref_hash[pid_hashfn(pid)], chain)
if (ref->pid == pid)
return ref;

return NULL;
}

// just s/free_pidmap/__free_pidmap/
static void __free_pidmap(int pid)
{
pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
int offset = pid & BITS_PER_PAGE_MASK;

clear_bit(offset, map->page);
atomic_inc(&map->nr_free);
}

fastcall void free_pidmap(int pid)
{
if (!find_pid_ref(pid))
__free_pidmap(pid);
}


static int pid_inuse(pid_t pid)
{
int type;

for (type = 0; type < PIDTYPE_MAX; ++type)
if (find_pid(type, pid))
return 1;

return 0;
}

// simple, non-optimized version
struct pid_ref *mk_pid_ref(pid_t pid)
{
struct pid_ref *ref;

write_lock_irq(&tasklist_lock);
ref = find_pid_ref(pid);
if (ref)
atomic_inc(&ref->count);
else if (pid_inuse(pid)) {
ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
if (ref) {
ref->pid = pid;
atomic_set(&ref->count, 1);
hlist_add_head(&ref->chain,
&ref_hash[pid_hashfn(pid)]);
}
}
write_unlock_irq(&tasklist_lock);

return ref;
}

void put_pid_ref(struct pid_ref *ref)
{
if (!ref || !atomic_dec_and_test(&ref->count))
return;

write_lock_irq(&tasklist_lock);
if (!atomic_read(&ref->count)) {
if (!pid_inuse(ref->pid))
__free_pidmap(ref->pid);
hlist_del(&ref->chain);
kfree(ref);
}
write_unlock_irq(&tasklist_lock);
}

That's all. The only modified function is free_pidmap(), and the change is
trivial. Example of usage:

struct fown_struct {
...
- int pid;
+ struct pid_ref *ref;
+ enum pid_type type;
...
}

void file_free(struct file *f)
{
+ put_pid_ref(f->f_owner->ref);
...
}

void f_modown(struct file *filp, int pid, uid_t uid, uid_t euid, int force)
{
struct pid_ref *old, *ref;
enum pid_type type = PIDTYPE_PID;

if (pid < 0) {
pid = -pid;
type = PIDTYPE_PGID;
}
ref = mk_pid_ref(pid);

write_lock_irq(&filp->f_owner.lock);
old = ref;
if (force || !filp->f_owner.ref) {
old = filp->f_owner.ref;
filp->f_owner.ref = ref;
filp->f_owner.type = type;
filp->f_owner.uid = uid;
filp->f_owner.euid = euid;
}
write_unlock_irq(&filp->f_owner.lock);

put_pid_ref(old);
}

void send_sigio(struct fown_struct *fown, int fd, int band)
{
struct task_struct *p;

read_lock(&fown->lock);
if (!fown->ref)
goto out_unlock_fown;

read_lock(&tasklist_lock);

do_each_task_pid(fown->ref->pid, fown->type, p)
send_sigio_to_task(p, fown, fd, band);
while_each_task_pid(fown->ref->pid, fown->type, p);

read_unlock(&tasklist_lock);
out_unlock_fown:
read_unlock(&fown->lock);
}

What do you think?

Oleg.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/