[patch] fix for inode leakage in 2.2.13

Andrea Arcangeli (andrea@suse.de)
Mon, 1 Nov 1999 17:53:16 +0100 (CET)


I think I found two separate problems in the inode recycling code in the
latest 2.2.x kernels. Both the two problems could lead to a inode leakage
(prevented since 2.2.14pre3 by the by the check I added that will enforce
the hard inode-max limit).

o the dcache shrink is not properly controlled to release
the necessary number of inodes. I had to redesign
prune_dcache (core of the VFS) to fix this. I dropped
select_dcache that was not doing its work well enough and
I changed the prune_dcache interface to get two parameters.
The first parameter tells how many dentry we want to release
and the second paramentere tells how many _inodes_ we want
to reelase.
o other tasks could eat indoes before the freeing
task had a chance to get a free inode. (I think
this was a minor problem but nevertheless it could
potentially happen).

This patch will fix both the two problems. It's against 2.2.13 and will go
fine on the top of 2.2.14pre3and probably on the top of 2.2.12 and
previous too. I can't reproduce the leakage anymore with it applyed and
the dcache get shrunk properly now. I am using inode-max set to 1000 (echo
1000 >/proc/sys/fs/inode-max) and I am running a rsync of /home (where
there is all my developement stuff):

andrea@alpha:~ > df -i /home/
Filesystem Inodes IUsed IFree %IUsed Mounted on
/dev/sda2 1024000 70994 953006 7% /home
andrea@alpha:~ > df /home/
Filesystem 1024-blocks Used Available Capacity Mounted on
/dev/sda2 3967488 2232020 1735468 56% /home

Without the patch the inode-max message triggers after 20sec of rsync load
and rsync fails. Now the rsync completes without any message even with
inode-max set to 1000.

Note that this is 2.2.x stuff only as in 2.3.x I made the icache
allocation dynamic and so in 2.3.x there's no major problem directly
related to the icache grow.

diff -urN 2.2.13/fs/dcache.c 2.2.13-inodeleak/fs/dcache.c
--- 2.2.13/fs/dcache.c Tue Jul 13 00:33:04 1999
+++ 2.2.13-inodeleak/fs/dcache.c Mon Nov 1 16:36:55 1999
@@ -69,18 +69,23 @@
* Release the dentry's inode, using the fileystem
* d_iput() operation if defined.
*/
-static inline void dentry_iput(struct dentry * dentry)
+static inline int dentry_iput(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
+ int ret = 0;
+
if (inode) {
dentry->d_inode = NULL;
list_del(&dentry->d_alias);
INIT_LIST_HEAD(&dentry->d_alias);
+ ret = inode->i_count == 1;
if (dentry->d_op && dentry->d_op->d_iput)
dentry->d_op->d_iput(dentry, inode);
else
iput(inode);
}
+
+ return ret;
}

/*
@@ -195,86 +200,23 @@
}

/*
- * Select less valuable dentries to be pruned when we need
- * inodes or memory. The selected dentries are moved to the
- * old end of the list where prune_dcache() can find them.
- *
- * Negative dentries are included in the selection so that
- * they don't accumulate at the end of the list. The count
- * returned is the total number of dentries selected, which
- * may be much larger than the requested number of inodes.
- */
-int select_dcache(int inode_count, int page_count)
-{
- struct list_head *next, *tail = &dentry_unused;
- int found = 0;
- int depth = dentry_stat.nr_unused >> 1;
- unsigned long max_value = 4;
-
- if (page_count)
- max_value = -1;
-
- next = tail->prev;
- while (next != &dentry_unused && depth--) {
- struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_lru);
- struct inode *inode = dentry->d_inode;
- unsigned long value = 0;
-
- next = tmp->prev;
- if (dentry->d_count) {
- dentry_stat.nr_unused--;
- list_del(tmp);
- INIT_LIST_HEAD(tmp);
- continue;
- }
-
- /*
- * Select dentries based on the page cache count ...
- * should factor in number of uses as well. We take
- * all negative dentries so that they don't accumulate.
- * (We skip inodes that aren't immediately available.)
- */
- if (inode) {
- value = inode->i_nrpages;
- if (value >= max_value)
- continue;
- if (inode->i_state || inode->i_count > 1)
- continue;
- }
-
- /*
- * Move the selected dentries behind the tail.
- */
- if (tmp != tail->prev) {
- list_del(tmp);
- list_add(tmp, tail->prev);
- }
- tail = tmp;
- found++;
- if (inode && --inode_count <= 0)
- break;
- if (page_count && (page_count -= value) <= 0)
- break;
- }
- return found;
-}
-
-/*
* Throw away a dentry - free the inode, dput the parent.
* This requires that the LRU list has already been
* removed.
*/
-static inline void prune_one_dentry(struct dentry * dentry)
+static inline int prune_one_dentry(struct dentry * dentry)
{
struct dentry * parent;
+ int ret;

list_del(&dentry->d_hash);
list_del(&dentry->d_child);
- dentry_iput(dentry);
+ ret = dentry_iput(dentry);
parent = dentry->d_parent;
d_free(dentry);
dput(parent);
+
+ return ret;
}

/*
@@ -282,9 +224,21 @@
* more memory, or simply when we need to unmount
* something (at which point we need to unuse
* all dentries).
+ *
+ * If you don't want a limit on the number of
+ * inodes that will be released then call
+ * with i_nr = -1.
+ *
+ * If you don't want a limit on the number of
+ * dentries that will be released then call
+ * with d_nr = 0.
+ *
+ * Returns the number of inodes released.
*/
-void prune_dcache(int count)
+int prune_dcache(int d_nr, int i_nr)
{
+ int __i_nr = i_nr;
+
for (;;) {
struct dentry *dentry;
struct list_head *tmp = dentry_unused.prev;
@@ -296,11 +250,15 @@
INIT_LIST_HEAD(tmp);
dentry = list_entry(tmp, struct dentry, d_lru);
if (!dentry->d_count) {
- prune_one_dentry(dentry);
- if (!--count)
+ i_nr -= prune_one_dentry(dentry);
+ if (!i_nr)
+ break;
+ if (!--d_nr)
break;
}
}
+
+ return __i_nr - i_nr;
}

/*
@@ -455,7 +413,7 @@
int found;

while ((found = select_parent(parent)) != 0)
- prune_dcache(found);
+ prune_dcache(found, -1);
}

/*
@@ -475,7 +433,7 @@
int count = 0;
if (priority)
count = dentry_stat.nr_unused / priority;
- prune_dcache(count);
+ prune_dcache(count, -1);
}
}

diff -urN 2.2.13/fs/dquot.c 2.2.13-inodeleak/fs/dquot.c
--- 2.2.13/fs/dquot.c Sun Oct 31 23:31:33 1999
+++ 2.2.13-inodeleak/fs/dquot.c Mon Nov 1 16:17:21 1999
@@ -572,10 +572,9 @@
/*
* Try pruning the dcache to free up some dquots ...
*/
- count = select_dcache(128, 0);
- if (count) {
- printk(KERN_DEBUG "get_empty_dquot: pruning %d\n", count);
- prune_dcache(count);
+ printk(KERN_DEBUG "get_empty_dquot: pruning %d\n", count);
+ if (prune_dcache(0, 128))
+ {
free_inode_memory(count);
goto repeat;
}
diff -urN 2.2.13/fs/inode.c 2.2.13-inodeleak/fs/inode.c
--- 2.2.13/fs/inode.c Thu Aug 12 02:32:33 1999
+++ 2.2.13-inodeleak/fs/inode.c Mon Nov 1 16:10:06 1999
@@ -340,12 +340,11 @@
(((inode)->i_count | (inode)->i_state) == 0)
#define INODE(entry) (list_entry(entry, struct inode, i_list))

-static int free_inodes(void)
+static int __free_inodes(struct list_head * freeable)
{
- struct list_head list, *entry, *freeable = &list;
+ struct list_head *entry;
int found = 0;

- INIT_LIST_HEAD(freeable);
entry = inode_in_use.next;
while (entry != &inode_in_use) {
struct list_head *tmp = entry;
@@ -358,15 +357,19 @@
INIT_LIST_HEAD(&INODE(tmp)->i_hash);
list_add(tmp, freeable);
list_entry(tmp, struct inode, i_list)->i_state = I_FREEING;
- found = 1;
+ found++;
}

- if (found)
- dispose_list(freeable);
-
return found;
}

+static void free_inodes(void)
+{
+ LIST_HEAD(throw_away);
+ if (__free_inodes(&throw_away))
+ dispose_list(&throw_away);
+}
+
/*
* Searches the inodes list for freeable inodes,
* shrinking the dcache before (and possible after,
@@ -374,6 +377,36 @@
*/
static void try_to_free_inodes(int goal)
{
+ static int block;
+ static struct wait_queue * wait_inode_freeing;
+ LIST_HEAD(throw_away);
+
+ /* We must make sure to not eat the inodes
+ while the blocker task sleeps otherwise
+ the blocker task may find none inode
+ available. */
+ if (block)
+ {
+ struct wait_queue __wait;
+
+ __wait.task = current;
+ add_wait_queue(&wait_inode_freeing, &__wait);
+ for (;;)
+ {
+ /* NOTE: we rely only on the inode_lock to be sure
+ to not miss the unblock event */
+ current->state = TASK_UNINTERRUPTIBLE;
+ spin_unlock(&inode_lock);
+ schedule();
+ spin_lock(&inode_lock);
+ if (!block)
+ break;
+ }
+ remove_wait_queue(&wait_inode_freeing, &__wait);
+ current->state = TASK_RUNNING;
+ }
+
+ block = 1;
/*
* First stry to just get rid of unused inodes.
*
@@ -381,16 +414,18 @@
* to try to shrink the dcache and sync existing
* inodes..
*/
- free_inodes();
- goal -= inodes_stat.nr_free_inodes;
+ goal -= __free_inodes(&throw_away);
if (goal > 0) {
spin_unlock(&inode_lock);
- select_dcache(goal, 0);
- prune_dcache(goal);
+ prune_dcache(0, goal);
spin_lock(&inode_lock);
sync_all_inodes();
- free_inodes();
+ __free_inodes(&throw_away);
}
+ if (!list_empty(&throw_away))
+ dispose_list(&throw_away);
+ block = 0;
+ wake_up(&wait_inode_freeing);
}

/*
@@ -465,7 +500,7 @@
* If the allocation failed, do an extensive pruning of
* the dcache and then try again to free some inodes.
*/
- prune_dcache(inodes_stat.nr_inodes >> 2);
+ prune_dcache(0, inodes_stat.nr_inodes >> 2);

spin_lock(&inode_lock);
free_inodes();
diff -urN 2.2.13/include/linux/dcache.h 2.2.13-inodeleak/include/linux/dcache.h
--- 2.2.13/include/linux/dcache.h Sun Oct 31 23:31:37 1999
+++ 2.2.13-inodeleak/include/linux/dcache.h Mon Nov 1 16:21:03 1999
@@ -133,15 +133,14 @@

/* allocate/de-allocate */
extern struct dentry * d_alloc(struct dentry * parent, const struct qstr *name);
-extern void prune_dcache(int);
+extern int prune_dcache(int, int);
extern void shrink_dcache_sb(struct super_block *);
extern void shrink_dcache_parent(struct dentry *);
extern int d_invalidate(struct dentry *);

-#define shrink_dcache() prune_dcache(0)
+#define shrink_dcache() prune_dcache(0, -1)

/* dcache memory management */
-extern int select_dcache(int, int);
extern void shrink_dcache_memory(int, unsigned int);
extern void check_dcache_memory(void);
extern void free_inode_memory(int); /* defined in fs/inode.c */

Andrea

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/