Re: [patch 23/35] fs: icache use per-CPU lists and locks for sb inode lists
From: Miklos Szeredi
Date: Tue Oct 19 2010 - 11:34:04 EST
On Tue, 19 Oct 2010, npiggin@xxxxxxxx wrote:
> Signed-off-by: Nick Piggin <npiggin@xxxxxxxxx>
>
> ---
> fs/drop_caches.c | 4 -
> fs/fs-writeback.c | 15 +++--
> fs/inode.c | 99 ++++++++++++++++++++++++++++-----------
> fs/notify/inode_mark.c | 6 +-
> fs/quota/dquot.c | 8 +--
> fs/super.c | 16 +++++-
> include/linux/fs.h | 58 ++++++++++++++++++++++
> include/linux/fsnotify_backend.h | 4 -
> include/linux/writeback.h | 1
> 9 files changed, 164 insertions(+), 47 deletions(-)
>
> Index: linux-2.6/fs/inode.c
> ===================================================================
> --- linux-2.6.orig/fs/inode.c 2010-10-19 14:18:59.000000000 +1100
> +++ linux-2.6/fs/inode.c 2010-10-19 14:19:23.000000000 +1100
[snip]
> @@ -718,13 +714,63 @@
> return tmp & I_HASHMASK;
> }
>
> +static inline int inode_list_cpu(struct inode *inode)
> +{
> +#ifdef CONFIG_SMP
> + return inode->i_sb_list_cpu;
> +#else
> + return smp_processor_id();
> +#endif
> +}
> +
> +/* helper for file_sb_list_add to reduce ifdefs */
> +static inline void __inode_sb_list_add(struct inode *inode, struct super_block *sb)
> +{
> + struct list_head *list;
> +#ifdef CONFIG_SMP
> + int cpu;
> + cpu = smp_processor_id();
> + inode->i_sb_list_cpu = cpu;
> + list = per_cpu_ptr(sb->s_inodes, cpu);
> +#else
> + list = &sb->s_inodes;
> +#endif
> + list_add_rcu(&inode->i_sb_list, list);
> +}
> +
> +/**
> + * inode_sb_list_add - add an inode to the sb's file list
> + * @inode: inode to add
> + * @sb: sb to add it to
> + *
> + * Use this function to associate an with the superblock it belongs to.
^^^inode
> + */
> +static void inode_sb_list_add(struct inode *inode, struct super_block *sb)
> +{
> + lg_local_lock(inode_list_lglock);
> + __inode_sb_list_add(inode, sb);
> + lg_local_unlock(inode_list_lglock);
> +}
> +
> +/**
> + * inode_sb_list_del - remove an inode from the sb's inode list
> + * @inode: inode to remove
> + * @sb: sb to remove it from
> + *
> + * Use this function to remove an inode from its superblock.
> + */
> +static void inode_sb_list_del(struct inode *inode)
> +{
> + lg_local_lock_cpu(inode_list_lglock, inode_list_cpu(inode));
> + list_del_rcu(&inode->i_sb_list);
> + lg_local_unlock_cpu(inode_list_lglock, inode_list_cpu(inode));
> +}
> +
> static inline void
> __inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
> struct inode *inode)
> {
> - spin_lock(&sb_inode_list_lock);
> - list_add_rcu(&inode->i_sb_list, &sb->s_inodes);
> - spin_unlock(&sb_inode_list_lock);
> + inode_sb_list_add(inode, sb);
> if (b) {
> spin_lock_bucket(b);
> hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
> @@ -1270,6 +1316,7 @@
> continue;
> if (!spin_trylock(&old->i_lock)) {
> spin_unlock_bucket(b);
> + cpu_relax();
Doesn't this logically belong to a previous patch?
> goto repeat;
> }
> goto found_old;
> @@ -1453,9 +1500,7 @@
> inodes_stat.nr_unused--;
> spin_unlock(&wb_inode_list_lock);
> }
> - spin_lock(&sb_inode_list_lock);
> - list_del_rcu(&inode->i_sb_list);
> - spin_unlock(&sb_inode_list_lock);
> + inode_sb_list_del(inode);
> WARN_ON(inode->i_state & I_NEW);
> inode->i_state |= I_FREEING;
> spin_unlock(&inode->i_lock);
> @@ -1732,6 +1777,8 @@
> init_once);
> register_shrinker(&icache_shrinker);
>
> + lg_lock_init(inode_list_lglock);
> +
> /* Hash may have been set up in inode_init_early */
> if (!hashdist)
> return;
> Index: linux-2.6/include/linux/fs.h
> ===================================================================
> --- linux-2.6.orig/include/linux/fs.h 2010-10-19 14:18:59.000000000 +1100
> +++ linux-2.6/include/linux/fs.h 2010-10-19 14:19:22.000000000 +1100
> @@ -374,6 +374,7 @@
> #include <linux/cache.h>
> #include <linux/kobject.h>
> #include <linux/list.h>
> +#include <linux/rculist.h>
> #include <linux/rculist_bl.h>
> #include <linux/radix-tree.h>
> #include <linux/prio_tree.h>
> @@ -733,6 +734,9 @@
> struct rcu_head i_rcu;
> };
> unsigned long i_ino;
> +#ifdef CONFIG_SMP
> + int i_sb_list_cpu;
> +#endif
> unsigned int i_count;
> unsigned int i_nlink;
> uid_t i_uid;
> @@ -1344,11 +1348,12 @@
> #endif
> const struct xattr_handler **s_xattr;
>
> - struct list_head s_inodes; /* all inodes */
> struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
> #ifdef CONFIG_SMP
> + struct list_head __percpu *s_inodes;
> struct list_head __percpu *s_files;
> #else
> + struct list_head s_inodes; /* all inodes */
> struct list_head s_files;
> #endif
> /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
> @@ -2202,6 +2207,57 @@
> __insert_inode_hash(inode, inode->i_ino);
> }
>
> +#ifdef CONFIG_SMP
> +/*
> + * These macros iterate all inodes on all CPUs for a given superblock.
> + * rcu_read_lock must be held.
> + */
> +#define do_inode_list_for_each_entry_rcu(__sb, __inode) \
> +{ \
> + int i; \
> + for_each_possible_cpu(i) { \
> + struct list_head *list; \
> + list = per_cpu_ptr((__sb)->s_inodes, i); \
> + list_for_each_entry_rcu((__inode), list, i_sb_list)
> +
> +#define while_inode_list_for_each_entry_rcu \
> + } \
> +}
> +
> +#define do_inode_list_for_each_entry_safe(__sb, __inode, __tmp) \
> +{ \
> + int i; \
> + for_each_possible_cpu(i) { \
> + struct list_head *list; \
> + list = per_cpu_ptr((__sb)->s_inodes, i); \
> + list_for_each_entry_safe((__inode), (__tmp), list, i_sb_list)
> +
> +#define while_inode_list_for_each_entry_safe \
> + } \
> +}
> +
> +#else
> +
> +#define do_inode_list_for_each_entry_rcu(__sb, __inode) \
> +{ \
> + struct list_head *list; \
> + list = &(sb)->s_inodes; \
> + list_for_each_entry_rcu((__inode), list, i_sb_list)
> +
> +#define while_inode_list_for_each_entry_rcu \
> +}
> +
> +#define do_inode_list_for_each_entry_safe(__sb, __inode, __tmp) \
> +{ \
> + struct list_head *list; \
> + list = &(sb)->s_inodes; \
> + list_for_each_entry_safe((__inode), (__tmp), list, i_sb_list)
> +
> +#define while_inode_list_for_each_entry_safe \
> +}
> +
> +#endif
> +
> #ifdef CONFIG_BLOCK
> extern void submit_bio(int, struct bio *);
> extern int bdev_read_only(struct block_device *);
> Index: linux-2.6/fs/super.c
> ===================================================================
> --- linux-2.6.orig/fs/super.c 2010-10-19 14:17:17.000000000 +1100
> +++ linux-2.6/fs/super.c 2010-10-19 14:18:59.000000000 +1100
> @@ -67,12 +67,25 @@
> for_each_possible_cpu(i)
> INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
> }
> + s->s_inodes = alloc_percpu(struct list_head);
> + if (!s->s_inodes) {
> + free_percpu(s->s_files);
> + security_sb_free(s);
> + kfree(s);
> + s = NULL;
> + goto out;
Factor out error cleanups to separate out labels?
> + } else {
> + int i;
> +
> + for_each_possible_cpu(i)
> + INIT_LIST_HEAD(per_cpu_ptr(s->s_inodes, i));
> + }
> #else
> INIT_LIST_HEAD(&s->s_files);
> + INIT_LIST_HEAD(&s->s_inodes);
> #endif
> INIT_LIST_HEAD(&s->s_instances);
> INIT_HLIST_HEAD(&s->s_anon);
> - INIT_LIST_HEAD(&s->s_inodes);
> INIT_LIST_HEAD(&s->s_dentry_lru);
> init_rwsem(&s->s_umount);
> mutex_init(&s->s_lock);
> @@ -124,6 +137,7 @@
> static inline void destroy_super(struct super_block *s)
> {
> #ifdef CONFIG_SMP
> + free_percpu(s->s_inodes);
> free_percpu(s->s_files);
> #endif
> security_sb_free(s);
> Index: linux-2.6/fs/drop_caches.c
> ===================================================================
> --- linux-2.6.orig/fs/drop_caches.c 2010-10-19 14:18:59.000000000 +1100
> +++ linux-2.6/fs/drop_caches.c 2010-10-19 14:19:18.000000000 +1100
> @@ -17,7 +17,7 @@
> struct inode *inode, *toput_inode = NULL;
>
> rcu_read_lock();
> - list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
> + do_inode_list_for_each_entry_rcu(sb, inode) {
> spin_lock(&inode->i_lock);
> if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)
> || inode->i_mapping->nrpages == 0) {
> @@ -31,7 +31,7 @@
> iput(toput_inode);
> toput_inode = inode;
> rcu_read_lock();
> - }
> + } while_inode_list_for_each_entry_rcu
> rcu_read_unlock();
> iput(toput_inode);
> }
> Index: linux-2.6/fs/fs-writeback.c
> ===================================================================
> --- linux-2.6.orig/fs/fs-writeback.c 2010-10-19 14:18:59.000000000 +1100
> +++ linux-2.6/fs/fs-writeback.c 2010-10-19 14:19:22.000000000 +1100
> @@ -1074,7 +1074,7 @@
> * we still have to wait for that writeout.
> */
> rcu_read_lock();
> - list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
> + do_inode_list_for_each_entry_rcu(sb, inode) {
> struct address_space *mapping;
>
> spin_lock(&inode->i_lock);
> @@ -1093,11 +1093,12 @@
> spin_unlock(&inode->i_lock);
> rcu_read_unlock();
> /*
> - * We hold a reference to 'inode' so it couldn't have been
> - * removed from s_inodes list while we dropped the i_lock. We
> - * cannot iput the inode now as we can be holding the last
> - * reference and we cannot iput it under spinlock. So we keep
> - * the reference and iput it later.
> + * We hold a reference to 'inode' so it couldn't have
> + * been removed from s_inodes list while we dropped the
> + * i_lock. We cannot iput the inode now as we can be
> + * holding the last reference and we cannot iput it
> + * under spinlock. So we keep the reference and iput it
> + * later.
> */
> iput(old_inode);
> old_inode = inode;
> @@ -1107,7 +1108,7 @@
> cond_resched();
>
> rcu_read_lock();
> - }
> + } while_inode_list_for_each_entry_rcu
> rcu_read_unlock();
> iput(old_inode);
> }
> Index: linux-2.6/fs/notify/inode_mark.c
> ===================================================================
> --- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-19 14:18:59.000000000 +1100
> +++ linux-2.6/fs/notify/inode_mark.c 2010-10-19 14:19:18.000000000 +1100
> @@ -236,11 +236,11 @@
> * and with the sb going away, no new inodes will appear or be referenced
> * from other paths.
> */
> -void fsnotify_unmount_inodes(struct list_head *list)
> +void fsnotify_unmount_inodes(struct super_block *sb)
> {
> struct inode *inode, *next_i, *need_iput = NULL;
>
> - list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
> + do_inode_list_for_each_entry_safe(sb, inode, next_i) {
> struct inode *need_iput_tmp;
>
> spin_lock(&inode->i_lock);
> @@ -295,5 +295,5 @@
> fsnotify_inode_delete(inode);
>
> iput(inode);
> - }
> + } while_inode_list_for_each_entry_safe
> }
> Index: linux-2.6/fs/quota/dquot.c
> ===================================================================
> --- linux-2.6.orig/fs/quota/dquot.c 2010-10-19 14:18:59.000000000 +1100
> +++ linux-2.6/fs/quota/dquot.c 2010-10-19 14:19:18.000000000 +1100
> @@ -898,7 +898,7 @@
> #endif
>
> rcu_read_lock();
> - list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
> + do_inode_list_for_each_entry_rcu(sb, inode) {
> spin_lock(&inode->i_lock);
> if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
> spin_unlock(&inode->i_lock);
> @@ -930,7 +930,7 @@
> * lock. So we keep the reference and iput it later. */
> old_inode = inode;
> rcu_read_lock();
> - }
> + } while_inode_list_for_each_entry_rcu
> rcu_read_unlock();
> iput(old_inode);
>
> @@ -1013,7 +1013,7 @@
> int reserved = 0;
>
> rcu_read_lock();
> - list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
> + do_inode_list_for_each_entry_rcu(sb, inode) {
> /*
> * We have to scan also I_NEW inodes because they can already
> * have quota pointer initialized. Luckily, we need to touch
> @@ -1025,7 +1025,7 @@
> reserved = 1;
> remove_inode_dquot_ref(inode, type, tofree_head);
> }
> - }
> + } while_inode_list_for_each_entry_rcu
> rcu_read_unlock();
> #ifdef CONFIG_QUOTA_DEBUG
> if (reserved) {
> Index: linux-2.6/include/linux/fsnotify_backend.h
> ===================================================================
> --- linux-2.6.orig/include/linux/fsnotify_backend.h 2010-10-19 14:17:17.000000000 +1100
> +++ linux-2.6/include/linux/fsnotify_backend.h 2010-10-19 14:18:59.000000000 +1100
> @@ -402,7 +402,7 @@
> extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
> extern void fsnotify_get_mark(struct fsnotify_mark *mark);
> extern void fsnotify_put_mark(struct fsnotify_mark *mark);
> -extern void fsnotify_unmount_inodes(struct list_head *list);
> +extern void fsnotify_unmount_inodes(struct super_block *sb);
>
> /* put here because inotify does some weird stuff when destroying watches */
> extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
> @@ -443,7 +443,7 @@
> return 0;
> }
>
> -static inline void fsnotify_unmount_inodes(struct list_head *list)
> +static inline void fsnotify_unmount_inodes(struct super_block *sb)
> {}
>
> #endif /* CONFIG_FSNOTIFY */
> Index: linux-2.6/include/linux/writeback.h
> ===================================================================
> --- linux-2.6.orig/include/linux/writeback.h 2010-10-19 14:18:59.000000000 +1100
> +++ linux-2.6/include/linux/writeback.h 2010-10-19 14:19:21.000000000 +1100
> @@ -9,7 +9,6 @@
>
> struct backing_dev_info;
>
> -extern spinlock_t sb_inode_list_lock;
> extern spinlock_t wb_inode_list_lock;
> extern struct list_head inode_unused;
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/