[patch] epoll use a single inode ...

From: Davide Libenzi
Date: Mon Mar 05 2007 - 16:26:12 EST



Epoll does not keep any private data attached to its inode, so there'd be
no need to allocate one inode per fd. For epoll, the inode is just a
placeholder for the file operations and could be shared by all instances.
I'd like to use the same optimization even for the upcoming file-based
objects, so if you see problems let me know.
One that Al was pointing out was that an fstat(2) over an epoll fd would
show the same st_ino. IMO that should be fine since an fstat(2) over an
epoll fd is not something you want to do in any case and expecting
meaningfull results.



Signed-off-by: Davide Libenzi <davidel@xxxxxxxxxxxxxxx>


- Davide



eventpoll.c | 36 ++++++++++++++++++++++++++++++++----
1 file changed, 32 insertions(+), 4 deletions(-)



Index: linux-2.6.20.ep2/fs/eventpoll.c
===================================================================
--- linux-2.6.20.ep2.orig/fs/eventpoll.c 2007-03-04 14:40:01.000000000 -0800
+++ linux-2.6.20.ep2/fs/eventpoll.c 2007-03-05 13:03:52.000000000 -0800
@@ -258,6 +258,7 @@
int maxevents, long timeout);
static int eventpollfs_delete_dentry(struct dentry *dentry);
static struct inode *ep_eventpoll_inode(void);
+static struct inode *ep_create_inode(void);
static int eventpollfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data, struct vfsmount *mnt);
@@ -279,6 +280,9 @@
/* Virtual fs used to allocate inodes for eventpoll files */
static struct vfsmount *eventpoll_mnt __read_mostly;

+/* Placeholder inode for eventpoll fds */
+static struct inode *eventpoll_inode;
+
/* File callbacks that implement the eventpoll file behaviour */
static const struct file_operations eventpoll_fops = {
.release = ep_eventpoll_close,
@@ -763,15 +767,18 @@
* using the inode number.
*/
error = -ENOMEM;
- sprintf(name, "[%lu]", inode->i_ino);
+ sprintf(name, "[%p]", ep);
this.name = name;
this.len = strlen(name);
- this.hash = inode->i_ino;
+ this.hash = 0;
dentry = d_alloc(eventpoll_mnt->mnt_sb->s_root, &this);
if (!dentry)
goto eexit_4;
dentry->d_op = &eventpollfs_dentry_operations;
- d_add(dentry, inode);
+ /* Do not publish this dentry inside the global dentry hash table */
+ dentry->d_flags &= ~DCACHE_UNHASHED;
+ d_instantiate(dentry, inode);
+
file->f_path.mnt = mntget(eventpoll_mnt);
file->f_path.dentry = dentry;
file->f_mapping = inode->i_mapping;
@@ -1555,6 +1562,11 @@

static int eventpollfs_delete_dentry(struct dentry *dentry)
{
+ /*
+ * We faked vfs to believe the dentry was hashed when we created it.
+ * Now we restore the flag so that dput() will work correctly.
+ */
+ dentry->d_flags |= DCACHE_UNHASHED;

return 1;
}
@@ -1562,6 +1574,17 @@

static struct inode *ep_eventpoll_inode(void)
{
+
+ return igrab(eventpoll_inode);
+}
+
+/*
+ * A single inode exist for all eventpoll files. On the contrary of pipes,
+ * eventpoll inodes has no per-instance data associated, so we can avoid
+ * the allocation of multiple of them.
+ */
+static struct inode *ep_create_inode(void)
+{
int error = -ENOMEM;
struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);

@@ -1626,10 +1649,14 @@

/* Mount the above commented virtual file system */
eventpoll_mnt = kern_mount(&eventpoll_fs_type);
- error = PTR_ERR(eventpoll_mnt);
if (IS_ERR(eventpoll_mnt))
goto epanic;

+ /* Create the single instance of inode for all eventpoll fds */
+ eventpoll_inode = ep_create_inode();
+ if (IS_ERR(eventpoll_inode))
+ goto epanic;
+
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: successfully initialized.\n",
current));
return 0;
@@ -1642,6 +1669,7 @@
static void __exit eventpoll_exit(void)
{
/* Undo all operations done inside eventpoll_init() */
+ iput(eventpoll_inode);
unregister_filesystem(&eventpoll_fs_type);
mntput(eventpoll_mnt);
kmem_cache_destroy(pwq_cache);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/