Re: Race between "mount" uevent and /proc/mounts?

From: Kay Sievers
Date: Mon Oct 31 2005 - 19:28:52 EST


On Wed, Oct 26, 2005 at 08:28:59PM +0100, Al Viro wrote:
> On Wed, Oct 26, 2005 at 04:34:17PM +0200, Kay Sievers wrote:
> > > Semantics for events depends on which objects you are interested in.
> > > Existing ones do not match _any_ of the real objects and I have no
> > > idea what exactly had been intended for them. I've asked gregkh, but
> > > he didn't remember that either. Apparently they are used by different
> > > people as (bad) approximations to different things. Which doesn't work
> > > well. And until somebody cares to describe what exactly are they trying
> > > to watch the situation obviously won't improve.
> >
> > They are actually events for claim/release of a block device. As uevents
> > are bound to kobjects we needed to send these events from an existing device
> > which is the blockdev itself.
> >
> > Sure, the event itself, has nothing to do with a filesystem. The names are
> > like this for historical reasons and "CLAIM/RELEASE" may be less confusing.
> > The events are used as a trigger to rescan /proc/mounts instead of polling
> > it constantly.
>
> But that makes no sense. /proc/*/mounts changes when mount tree changes.
> Which is obviously not an event happening to block devices. Moreover,
> changes of mount tree may involve no changes in the set of active filesystems
> or be separated in time from such changes by arbitrary intervals.
>
> Looks like seriously wrong assumptions in userland code working with these
> events... _IF_ you want to keep track of /proc/*/mounts changes, the obvious
> solution would be to implement ->poll() for them.

Ok, makes sense. The attached seems to work for me. If we can get
something like this, we can remove the superblock claim/release events
completely and just read the events from the /proc/mounts file itself.

Thanks,
Kay


diff --git a/fs/namespace.c b/fs/namespace.c
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -26,6 +26,7 @@
#include <asm/unistd.h>

extern int __init init_rootfs(void);
+DECLARE_WAIT_QUEUE_HEAD(mounts_wait);

#ifdef CONFIG_SYSFS
extern int __init sysfs_init(void);
@@ -120,6 +121,8 @@ static void detach_mnt(struct vfsmount *
list_del_init(&mnt->mnt_child);
list_del_init(&mnt->mnt_hash);
old_nd->dentry->d_mounted--;
+ current->namespace->mounts_poll_pending = 1;
+ wake_up_interruptible(&mounts_wait);
}

static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
@@ -129,6 +132,8 @@ static void attach_mnt(struct vfsmount *
list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
nd->dentry->d_mounted++;
+ current->namespace->mounts_poll_pending = 1;
+ wake_up_interruptible(&mounts_wait);
}

static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
@@ -1395,6 +1400,7 @@ static void __init init_mount_tree(void)
list_add(&mnt->mnt_list, &namespace->list);
namespace->root = mnt;
mnt->mnt_namespace = namespace;
+ namespace->mounts_poll_pending = 0;

init_task.namespace = namespace;
read_lock(&tasklist_lock);
diff --git a/fs/proc/base.c b/fs/proc/base.c
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -57,6 +57,7 @@
#include <linux/init.h>
#include <linux/file.h>
#include <linux/string.h>
+#include <linux/poll.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
#include <linux/namespace.h>
@@ -660,6 +661,8 @@ static struct file_operations proc_smaps
#endif

extern struct seq_operations mounts_op;
+extern wait_queue_head_t mounts_wait;
+
static int mounts_open(struct inode *inode, struct file *file)
{
struct task_struct *task = proc_task(inode);
@@ -692,10 +695,36 @@ static int mounts_release(struct inode *
return seq_release(inode, file);
}

+static unsigned int mounts_poll(struct file *file, poll_table *wait)
+{
+ struct task_struct *task = proc_task(file->f_dentry->d_inode);
+ struct namespace *namespace;
+ int ret = 0;
+
+ task_lock(task);
+ namespace = task->namespace;
+ if (namespace)
+ get_namespace(namespace);
+ task_unlock(task);
+
+ if (!namespace)
+ return -EINVAL;
+
+ poll_wait(file, &mounts_wait, wait);
+ if (namespace->mounts_poll_pending) {
+ namespace->mounts_poll_pending = 0;
+ ret = POLLIN | POLLRDNORM;
+ }
+ put_namespace(namespace);
+
+ return ret;
+}
+
static struct file_operations proc_mounts_operations = {
.open = mounts_open,
.read = seq_read,
.llseek = seq_lseek,
+ .poll = mounts_poll,
.release = mounts_release,
};

diff --git a/include/linux/namespace.h b/include/linux/namespace.h
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -10,6 +10,7 @@ struct namespace {
struct vfsmount * root;
struct list_head list;
struct rw_semaphore sem;
+ int mounts_poll_pending;
};

extern int copy_namespace(int, struct task_struct *);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/