[patch 38/52] fs: icache RCU free inodes

From: npiggin
Date: Wed Jun 23 2010 - 23:17:59 EST


RCU free the struct inode. This will allow:

- sb_inode_list_lock to be moved inside i_lock because sb list walkers who want
to take i_lock no longer need to take sb_inode_list_lock to walk the list in
the first place. This will simplify and optimize locking.
- eventually, completely write-free RCU path walking. The inode must be
consulted for permissions when walking, so a write-free reference (ie.
RCU is helpful).
- can potentially simplify things a bit in VM land. May not need to take the
page lock to get back to the page->mapping.
- can remove some nested trylock loops in dcache code
- Could remove the 'wq' allocation from socket now that the entire thing is
rcu freed.

todo: convert all filesystems
---
fs/block_dev.c | 9 ++++++++-
fs/ext2/super.c | 9 ++++++++-
fs/ext3/super.c | 9 ++++++++-
fs/fat/inode.c | 9 ++++++++-
fs/hugetlbfs/inode.c | 9 ++++++++-
fs/inode.c | 9 ++++++++-
fs/nfs/inode.c | 9 ++++++++-
fs/proc/inode.c | 9 ++++++++-
include/linux/fs.h | 5 ++++-
ipc/mqueue.c | 9 ++++++++-
mm/shmem.c | 9 ++++++++-
net/socket.c | 9 ++++++++-
net/sunrpc/rpc_pipe.c | 10 +++++++++-
13 files changed, 101 insertions(+), 13 deletions(-)

Index: linux-2.6/fs/ext2/super.c
===================================================================
--- linux-2.6.orig/fs/ext2/super.c
+++ linux-2.6/fs/ext2/super.c
@@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(st
return &ei->vfs_inode;
}

-static void ext2_destroy_inode(struct inode *inode)
+static void ext2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
}

+static void ext2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ext2_i_callback);
+}
+
static void init_once(void *foo)
{
struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -277,13 +277,20 @@ void __destroy_inode(struct inode *inode
}
EXPORT_SYMBOL(__destroy_inode);

+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(inode_cachep, inode);
+}
+
void destroy_inode(struct inode *inode)
{
__destroy_inode(inode);
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
- kmem_cache_free(inode_cachep, (inode));
+ call_rcu(&inode->i_rcu, i_callback);
}

/*
@@ -346,6 +353,7 @@ void clear_inode(struct inode *inode)
bd_forget(inode);
if (S_ISCHR(inode->i_mode) && inode->i_cdev)
cd_forget(inode);
+ /* don't need i_lock here */
inode->i_state = I_CLEAR;
}
EXPORT_SYMBOL(clear_inode);
@@ -661,7 +669,7 @@ __inode_add_to_lists(struct super_block
spin_unlock(&sb_inode_list_lock);
if (b) {
spin_lock_bucket(b);
- hlist_bl_add_head(&inode->i_hash, &b->head);
+ hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
spin_unlock_bucket(b);
}
}
@@ -713,6 +721,7 @@ struct inode *new_inode(struct super_blo

inode = alloc_inode(sb);
if (inode) {
+ /* XXX: init as locked for speedup */
spin_lock(&sb_inode_list_lock);
spin_lock(&inode->i_lock);
inode->i_ino = atomic_inc_return(&last_ino);
@@ -870,6 +879,7 @@ static int test_inode_iunique(struct sup
spin_unlock_bucket(b);
return 0;
}
+ /* XXX: test for I_FREEING|I_CLEAR|etc? */
}
spin_unlock_bucket(b);
return 1;
@@ -1156,42 +1166,41 @@ int insert_inode_locked(struct inode *in
struct super_block *sb = inode->i_sb;
ino_t ino = inode->i_ino;
struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
+ struct hlist_bl_node *node;
+ struct inode *old;

inode->i_state |= I_NEW;
- while (1) {
- struct hlist_bl_node *node;
- struct inode *old = NULL;

repeat:
- spin_lock_bucket(b);
- hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
- if (old->i_ino != ino)
- continue;
- if (old->i_sb != sb)
- continue;
- if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
- continue;
- if (!spin_trylock(&old->i_lock)) {
- spin_unlock_bucket(b);
- goto repeat;
- }
- break;
- }
- if (likely(!node)) {
- hlist_bl_add_head(&inode->i_hash, &b->head);
+ spin_lock_bucket(b);
+ hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
+ if (old->i_ino != ino)
+ continue;
+ if (old->i_sb != sb)
+ continue;
+ if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+ continue;
+ if (!spin_trylock(&old->i_lock)) {
spin_unlock_bucket(b);
- return 0;
- }
- spin_unlock_bucket(b);
- __iget(old);
- spin_unlock(&old->i_lock);
- wait_on_inode(old);
- if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
- iput(old);
- return -EBUSY;
+ goto repeat;
}
+ goto found_old;
+ }
+ hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
+ spin_unlock_bucket(b);
+ return 0;
+
+found_old:
+ spin_unlock_bucket(b);
+ __iget(old);
+ spin_unlock(&old->i_lock);
+ wait_on_inode(old);
+ if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
iput(old);
+ return -EBUSY;
}
+ iput(old);
+ goto repeat;
}
EXPORT_SYMBOL(insert_inode_locked);

@@ -1200,43 +1209,44 @@ int insert_inode_locked4(struct inode *i
{
struct super_block *sb = inode->i_sb;
struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
+ struct hlist_bl_node *node;
+ struct inode *old;

inode->i_state |= I_NEW;

- while (1) {
- struct hlist_bl_node *node;
- struct inode *old = NULL;
-
repeat:
- spin_lock_bucket(b);
- hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
- if (old->i_sb != sb)
- continue;
- if (!test(old, data))
- continue;
- if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
- continue;
- if (!spin_trylock(&old->i_lock)) {
- spin_unlock_bucket(b);
- goto repeat;
- }
- break;
- }
- if (likely(!node)) {
- hlist_bl_add_head(&inode->i_hash, &b->head);
+ spin_lock_bucket(b);
+ hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
+ if (old->i_sb != sb)
+ continue;
+ /* XXX: audit put test outside i_lock? */
+ if (!test(old, data))
+ continue;
+ if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+ continue;
+ if (!spin_trylock(&old->i_lock)) {
spin_unlock_bucket(b);
- return 0;
- }
- spin_unlock_bucket(b);
- __iget(old);
- spin_unlock(&old->i_lock);
- wait_on_inode(old);
- if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
- iput(old);
- return -EBUSY;
+ cpu_relax();
+ cpu_relax();
+ goto repeat;
}
+ goto found_old;
+ }
+ hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
+ spin_unlock_bucket(b);
+ return 0;
+
+found_old:
+ spin_unlock_bucket(b);
+ __iget(old);
+ spin_unlock(&old->i_lock);
+ wait_on_inode(old);
+ if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
iput(old);
+ return -EBUSY;
}
+ iput(old);
+ goto repeat;
}
EXPORT_SYMBOL(insert_inode_locked4);

@@ -1254,7 +1264,7 @@ void __insert_inode_hash(struct inode *i

spin_lock(&inode->i_lock);
spin_lock_bucket(b);
- hlist_bl_add_head(&inode->i_hash, &b->head);
+ hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
spin_unlock_bucket(b);
spin_unlock(&inode->i_lock);
}
@@ -1271,7 +1281,7 @@ void __remove_inode_hash(struct inode *i
{
struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, inode->i_ino);
spin_lock_bucket(b);
- hlist_bl_del_init(&inode->i_hash);
+ hlist_bl_del_init_rcu(&inode->i_hash);
spin_unlock_bucket(b);
}

Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -726,7 +726,10 @@ struct inode {
struct hlist_bl_node i_hash;
struct list_head i_list; /* backing dev IO list */
struct list_head i_sb_list;
- struct list_head i_dentry;
+ union {
+ struct list_head i_dentry;
+ struct rcu_head i_rcu;
+ };
unsigned long i_ino;
unsigned int i_count;
unsigned int i_nlink;
Index: linux-2.6/fs/block_dev.c
===================================================================
--- linux-2.6.orig/fs/block_dev.c
+++ linux-2.6/fs/block_dev.c
@@ -397,13 +397,20 @@ static struct inode *bdev_alloc_inode(st
return &ei->vfs_inode;
}

-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);

+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bdev_cachep, bdi);
}

+static void bdev_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
static void init_once(void *foo)
{
struct bdev_inode *ei = (struct bdev_inode *) foo;
Index: linux-2.6/fs/ext3/super.c
===================================================================
--- linux-2.6.orig/fs/ext3/super.c
+++ linux-2.6/fs/ext3/super.c
@@ -485,6 +485,13 @@ static struct inode *ext3_alloc_inode(st
return &ei->vfs_inode;
}

+static void ext3_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
static void ext3_destroy_inode(struct inode *inode)
{
if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -495,7 +502,7 @@ static void ext3_destroy_inode(struct in
false);
dump_stack();
}
- kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+ call_rcu(&inode->i_rcu, ext3_i_callback);
}

static void init_once(void *foo)
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c
+++ linux-2.6/fs/hugetlbfs/inode.c
@@ -665,11 +665,18 @@ static struct inode *hugetlbfs_alloc_ino
return &p->vfs_inode;
}

+static void hugetlbfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+}
+
static void hugetlbfs_destroy_inode(struct inode *inode)
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
- kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+ call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
}

static const struct address_space_operations hugetlbfs_aops = {
Index: linux-2.6/fs/proc/inode.c
===================================================================
--- linux-2.6.orig/fs/proc/inode.c
+++ linux-2.6/fs/proc/inode.c
@@ -66,11 +66,18 @@ static struct inode *proc_alloc_inode(st
return inode;
}

-static void proc_destroy_inode(struct inode *inode)
+static void proc_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}

+static void proc_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, proc_i_callback);
+}
+
static void init_once(void *foo)
{
struct proc_inode *ei = (struct proc_inode *) foo;
Index: linux-2.6/ipc/mqueue.c
===================================================================
--- linux-2.6.orig/ipc/mqueue.c
+++ linux-2.6/ipc/mqueue.c
@@ -236,11 +236,18 @@ static struct inode *mqueue_alloc_inode(
return &ei->vfs_inode;
}

-static void mqueue_destroy_inode(struct inode *inode)
+static void mqueue_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
}

+static void mqueue_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, mqueue_i_callback);
+}
+
static void mqueue_delete_inode(struct inode *inode)
{
struct mqueue_inode_info *info;
Index: linux-2.6/net/socket.c
===================================================================
--- linux-2.6.orig/net/socket.c
+++ linux-2.6/net/socket.c
@@ -271,20 +271,20 @@ static struct inode *sock_alloc_inode(st
}


-static void wq_free_rcu(struct rcu_head *head)
+static void sock_free_rcu(struct rcu_head *head)
{
- struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct socket_alloc *ei = container_of(inode, struct socket_alloc,
+ vfs_inode);

- kfree(wq);
+ kfree(ei->socket.wq);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(sock_inode_cachep, ei);
}

static void sock_destroy_inode(struct inode *inode)
{
- struct socket_alloc *ei;
-
- ei = container_of(inode, struct socket_alloc, vfs_inode);
- call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
- kmem_cache_free(sock_inode_cachep, ei);
+ call_rcu(&inode->i_rcu, sock_free_rcu);
}

static void init_once(void *foo)
Index: linux-2.6/fs/fat/inode.c
===================================================================
--- linux-2.6.orig/fs/fat/inode.c
+++ linux-2.6/fs/fat/inode.c
@@ -520,11 +520,18 @@ static struct inode *fat_alloc_inode(str
return &ei->vfs_inode;
}

-static void fat_destroy_inode(struct inode *inode)
+static void fat_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
}

+static void fat_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, fat_i_callback);
+}
+
static void init_once(void *foo)
{
struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
Index: linux-2.6/fs/nfs/inode.c
===================================================================
--- linux-2.6.orig/fs/nfs/inode.c
+++ linux-2.6/fs/nfs/inode.c
@@ -1365,11 +1365,18 @@ struct inode *nfs_alloc_inode(struct sup
return &nfsi->vfs_inode;
}

-void nfs_destroy_inode(struct inode *inode)
+static void nfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
}

+void nfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, nfs_i_callback);
+}
+
static inline void nfs4_init_once(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c
+++ linux-2.6/mm/shmem.c
@@ -2389,13 +2389,20 @@ static struct inode *shmem_alloc_inode(s
return &p->vfs_inode;
}

+static void shmem_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+}
+
static void shmem_destroy_inode(struct inode *inode)
{
if ((inode->i_mode & S_IFMT) == S_IFREG) {
/* only struct inode is valid if it's an inline symlink */
mpol_free_shared_policy(&SHMEM_I(inode)->policy);
}
- kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+ call_rcu(&inode->i_rcu, shmem_i_callback);
}

static void init_once(void *foo)
Index: linux-2.6/net/sunrpc/rpc_pipe.c
===================================================================
--- linux-2.6.orig/net/sunrpc/rpc_pipe.c
+++ linux-2.6/net/sunrpc/rpc_pipe.c
@@ -163,11 +163,19 @@ rpc_alloc_inode(struct super_block *sb)
}

static void
-rpc_destroy_inode(struct inode *inode)
+rpc_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
}

+static void
+rpc_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, rpc_i_callback);
+}
+
static int
rpc_pipe_open(struct inode *inode, struct file *filp)
{
Index: linux-2.6/include/linux/net.h
===================================================================
--- linux-2.6.orig/include/linux/net.h
+++ linux-2.6/include/linux/net.h
@@ -120,7 +120,6 @@ enum sock_shutdown_cmd {
struct socket_wq {
wait_queue_head_t wait;
struct fasync_struct *fasync_list;
- struct rcu_head rcu;
} ____cacheline_aligned_in_smp;

/**


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/