Re: [PATCH RFC v2 1/4] cgroup: implement eventfd-based generic API for notifications

From: KAMEZAWA Hiroyuki
Date: Tue Dec 15 2009 - 04:38:49 EST


On Tue, 15 Dec 2009 11:11:16 +0200
"Kirill A. Shutemov" <kirill@xxxxxxxxxxxxx> wrote:

> Could anybody review the patch?
>
> Thank you.

some nitpicks.

>
> On Sat, Dec 12, 2009 at 12:59 AM, Kirill A. Shutemov
> <kirill@xxxxxxxxxxxxx> wrote:

> > + Â Â Â /*
> > + Â Â Â Â* Unregister events and notify userspace.
> > + Â Â Â Â* FIXME: How to avoid race with cgroup_event_remove_work()
> > + Â Â Â Â* Â Â Â Âwhich runs from workqueue?
> > + Â Â Â Â*/
> > + Â Â Â mutex_lock(&cgrp->event_list_mutex);
> > + Â Â Â list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
> > + Â Â Â Â Â Â Â cgroup_event_remove(event);
> > + Â Â Â Â Â Â Â eventfd_signal(event->eventfd, 1);
> > + Â Â Â }
> > + Â Â Â mutex_unlock(&cgrp->event_list_mutex);
> > +
> > +out:
> > Â Â Â Âreturn ret;
> > Â}

How ciritical is this FIXME ?
But Hmm..can't we use RCU ?

> >
> > @@ -1136,6 +1187,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
> > Â Â Â ÂINIT_LIST_HEAD(&cgrp->release_list);
> > Â Â Â ÂINIT_LIST_HEAD(&cgrp->pidlists);
> > Â Â Â Âmutex_init(&cgrp->pidlist_mutex);
> > + Â Â Â INIT_LIST_HEAD(&cgrp->event_list);
> > + Â Â Â mutex_init(&cgrp->event_list_mutex);
> > Â}
> >
> > Âstatic void init_cgroup_root(struct cgroupfs_root *root)
> > @@ -1935,6 +1988,16 @@ static const struct inode_operations cgroup_dir_inode_operations = {
> > Â Â Â Â.rename = cgroup_rename,
> > Â};
> >
> > +/*
> > + * Check if a file is a control file
> > + */
> > +static inline struct cftype *__file_cft(struct file *file)
> > +{
> > + Â Â Â if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
> > + Â Â Â Â Â Â Â return ERR_PTR(-EINVAL);
> > + Â Â Â return __d_cft(file->f_dentry);
> > +}
> > +
> > Âstatic int cgroup_create_file(struct dentry *dentry, mode_t mode,
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âstruct super_block *sb)
> > Â{
> > @@ -2789,6 +2852,151 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
> > Â Â Â Âreturn 0;
> > Â}
> >
> > +static inline void cgroup_event_remove(struct cgroup_event *event)
> > +{
> > + Â Â Â struct cgroup *cgrp = event->cgrp;
> > +
> > + Â Â Â BUG_ON(event->cft->unregister_event(cgrp, event->cft, event->eventfd));

Hmm ? BUG ? If bug, please add document or comment.

> > + Â Â Â eventfd_ctx_put(event->eventfd);
> > + Â Â Â remove_wait_queue(event->wqh, &event->wait);
> > + Â Â Â list_del(&event->list);

please add comment as /* event_list_mutex must be held */

> > + Â Â Â kfree(event);
> > +}
> > +
> > +static void cgroup_event_remove_work(struct work_struct *work)
> > +{
> > + Â Â Â struct cgroup_event *event = container_of(work, struct cgroup_event,
> > + Â Â Â Â Â Â Â Â Â Â Â remove);
> > + Â Â Â struct cgroup *cgrp = event->cgrp;
> > +
> > + Â Â Â mutex_lock(&cgrp->event_list_mutex);
> > + Â Â Â cgroup_event_remove(event);
> > + Â Â Â mutex_unlock(&cgrp->event_list_mutex);
> > +}
> > +
> > +static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
> > + Â Â Â Â Â Â Â int sync, void *key)
> > +{
> > + Â Â Â struct cgroup_event *event = container_of(wait,
> > + Â Â Â Â Â Â Â Â Â Â Â struct cgroup_event, wait);
> > + Â Â Â unsigned long flags = (unsigned long)key;
> > +
> > + Â Â Â if (flags & POLLHUP)
> > + Â Â Â Â Â Â Â /*
> > + Â Â Â Â Â Â Â Â* This function called with spinlock taken, but
> > + Â Â Â Â Â Â Â Â* cgroup_event_remove() may sleep, so we have
> > + Â Â Â Â Â Â Â Â* to run it in a workqueue.
> > + Â Â Â Â Â Â Â Â*/
> > + Â Â Â Â Â Â Â schedule_work(&event->remove);
> > +
> > + Â Â Â return 0;
> > +}

> > +
> > +static void cgroup_event_ptable_queue_proc(struct file *file,
> > + Â Â Â Â Â Â Â wait_queue_head_t *wqh, poll_table *pt)
> > +{
> > + Â Â Â struct cgroup_event *event = container_of(pt,
> > + Â Â Â Â Â Â Â Â Â Â Â struct cgroup_event, pt);
> > +
> > + Â Â Â event->wqh = wqh;
> > + Â Â Â add_wait_queue(wqh, &event->wait);
> > +}
> > +
> > +static int cgroup_write_event_control(struct cgroup *cont, struct cftype *cft,
> > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â const char *buffer)
> > +{
> > + Â Â Â struct cgroup_event *event = NULL;
> > + Â Â Â unsigned int efd, cfd;
> > + Â Â Â struct file *efile = NULL;
> > + Â Â Â struct file *cfile = NULL;
> > + Â Â Â char *endp;
> > + Â Â Â int ret;
> > +
> > + Â Â Â efd = simple_strtoul(buffer, &endp, 10);
> > + Â Â Â if (*endp != ' ')
> > + Â Â Â Â Â Â Â return -EINVAL;
> > + Â Â Â buffer = endp + 1;
> > +
> > + Â Â Â cfd = simple_strtoul(buffer, &endp, 10);
> > + Â Â Â if ((*endp != ' ') && (*endp != '\0'))
> > + Â Â Â Â Â Â Â return -EINVAL;
> > + Â Â Â buffer = endp + 1;
> > +
> > + Â Â Â event = kzalloc(sizeof(*event), GFP_KERNEL);
> > + Â Â Â if (!event)
> > + Â Â Â Â Â Â Â return -ENOMEM;
> > + Â Â Â event->cgrp = cont;
> > + Â Â Â INIT_LIST_HEAD(&event->list);
> > + Â Â Â init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
> > + Â Â Â init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
> > + Â Â Â INIT_WORK(&event->remove, cgroup_event_remove_work);
> > +
> > + Â Â Â efile = eventfd_fget(efd);
> > + Â Â Â if (IS_ERR(efile)) {
> > + Â Â Â Â Â Â Â ret = PTR_ERR(efile);
> > + Â Â Â Â Â Â Â goto fail;
> > + Â Â Â }
> > +
> > + Â Â Â event->eventfd = eventfd_ctx_fileget(efile);
> > + Â Â Â if (IS_ERR(event->eventfd)) {
> > + Â Â Â Â Â Â Â ret = PTR_ERR(event->eventfd);
> > + Â Â Â Â Â Â Â goto fail;
> > + Â Â Â }
> > +
> > + Â Â Â cfile = fget(cfd);
> > + Â Â Â if (!cfile) {
> > + Â Â Â Â Â Â Â ret = -EBADF;
> > + Â Â Â Â Â Â Â goto fail;
> > + Â Â Â }
> > +
> > + Â Â Â /* the process need read permission on control file */
> > + Â Â Â ret = file_permission(cfile, MAY_READ);
> > + Â Â Â if (ret < 0)
> > + Â Â Â Â Â Â Â goto fail;
> > +
> > + Â Â Â event->cft = __file_cft(cfile);
> > + Â Â Â if (IS_ERR(event->cft)) {
> > + Â Â Â Â Â Â Â ret = PTR_ERR(event->cft);
> > + Â Â Â Â Â Â Â goto fail;
> > + Â Â Â }
> > +
> > + Â Â Â if (!event->cft->register_event || !event->cft->unregister_event) {
> > + Â Â Â Â Â Â Â ret = -EINVAL;
> > + Â Â Â Â Â Â Â goto fail;
> > + Â Â Â }
> > +
> > + Â Â Â ret = event->cft->register_event(cont, event->cft,
> > + Â Â Â Â Â Â Â Â Â Â Â event->eventfd, buffer);
> > + Â Â Â if (ret)
> > + Â Â Â Â Â Â Â goto fail;
> > +
> > + Â Â Â efile->f_op->poll(efile, &event->pt);

Not necessary to check return value ?

Thanks,
-Kame
> > +
> > + Â Â Â mutex_lock(&cont->event_list_mutex);
> > + Â Â Â list_add(&event->list, &cont->event_list);
> > + Â Â Â mutex_unlock(&cont->event_list_mutex);
> > +
> > + Â Â Â fput(cfile);
> > + Â Â Â fput(efile);
> > +
> > + Â Â Â return 0;
> > +
> > +fail:
> > + Â Â Â if (!IS_ERR(cfile))
> > + Â Â Â Â Â Â Â fput(cfile);
> > +
> > + Â Â Â if (event && event->eventfd && !IS_ERR(event->eventfd))
> > + Â Â Â Â Â Â Â eventfd_ctx_put(event->eventfd);
> > +
> > + Â Â Â if (!IS_ERR(efile))
> > + Â Â Â Â Â Â Â fput(efile);
> > +
> > + Â Â Â if (event)
> > + Â Â Â Â Â Â Â kfree(event);
> > +
> > + Â Â Â return ret;
> > +}
> > +
> > Â/*
> > Â* for the common functions, 'private' gives the type of file
> > Â*/
> > @@ -2814,6 +3022,11 @@ static struct cftype files[] = {
> > Â Â Â Â Â Â Â Â.read_u64 = cgroup_read_notify_on_release,
> > Â Â Â Â Â Â Â Â.write_u64 = cgroup_write_notify_on_release,
> > Â Â Â Â},
> > + Â Â Â {
> > + Â Â Â Â Â Â Â .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
> > + Â Â Â Â Â Â Â .write_string = cgroup_write_event_control,
> > + Â Â Â Â Â Â Â .mode = S_IWUGO,
> > + Â Â Â },
> > Â};
> >
> > Âstatic struct cftype cft_release_agent = {
> > --
> > 1.6.5.3
> >
> >
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxxx For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/