patch for 2.1.54 fs/inode.c -- please test!

Bill Hawes (whawes@star.net)
Tue, 09 Sep 1997 10:12:00 -0400


This is a multi-part message in MIME format.
--------------EF11AA4C1A00656F10245E88
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've attached a patch that provides consistent inode cleanup and
improves inode memory management, and would like to ask for people to
give it a test. It seems to be quite effective at balancing memory
committed to inodes (and indirectly to the dcache.)

The patch uses clear_inode to provide consistent cleanup of inodes going
to the unused list, and improves inode management by making
try_to_free_inodes more effective. It can now handle inodes with page
cache, and tries to reclaim than one inode at a time.

In the event try_to_free_inodes can't free any inodes, it performs a
_limited_ shrink of the dcache using a new prune_dcache() routine. This
is just a very simple extension to shrink_dcache that passes a count of
the number of dentries to shrink. Since dentries are released from an
age-sorted unused list, this should provide simple but effective control
of dentry growth.

The patch also performs a greater but still limited shrink of the dcache
after the number of inodes has reached half of the nominal limit. This
will greatly slow (but not limit) the amount of memory committed to
inodes.

I've left a few debugging prinks in the patch, but haven't enabled the
conditionals. Let me know how it works out.

Regards,
Bill
--------------EF11AA4C1A00656F10245E88
Content-Type: text/plain; charset=us-ascii; name="inode_54-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="inode_54-patch"

--- linux-2.1.54/fs/inode.c.old Sat Sep 6 16:03:33 1997
+++ linux-2.1.54/fs/inode.c Tue Sep 9 09:39:15 1997
@@ -113,33 +116,6 @@
sema_init(&inode->i_sem, 1);
}

-
-/*
- * Look out! This returns with the inode lock held if
- * it got an inode..
- */
-static struct inode * grow_inodes(void)
-{
- struct inode * inode = (struct inode *)__get_free_page(GFP_KERNEL);
-
- if (inode) {
- int size;
- struct inode * tmp;
-
- spin_lock(&inode_lock);
- size = PAGE_SIZE - 2*sizeof(struct inode);
- tmp = inode;
- do {
- tmp++;
- init_once(tmp);
- list_add(&tmp->i_list, &inode_unused);
- size -= sizeof(struct inode);
- } while (size >= 0);
- init_once(inode);
- }
- return inode;
-}
-
static inline void write_inode(struct inode *inode)
{
if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
@@ -224,7 +200,8 @@
*/
void clear_inode(struct inode *inode)
{
- truncate_inode_pages(inode, 0);
+ if (inode->i_nrpages)
+ truncate_inode_pages(inode, 0);
wait_on_inode(inode);
if (IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
inode->i_sb->dq_op->drop(inode);
@@ -239,6 +216,7 @@
static void dispose_list(struct list_head * head)
{
struct list_head *next;
+ int count = 0;

next = head->next;
for (;;) {
@@ -249,12 +227,14 @@
if (tmp == head)
break;
inode = list_entry(tmp, struct inode, i_list);
- truncate_inode_pages(inode, 0);
+ clear_inode(inode);
+ count++;
}

/* Add them all to the unused list in one fell swoop */
spin_lock(&inode_lock);
list_splice(head, &inode_unused);
+ inodes_stat.nr_free_inodes += count;
spin_unlock(&inode_lock);
}

@@ -311,26 +317,27 @@
return busy;
}

+extern void prune_dcache(int); /* N.B. move to include/linux/dcache.h */
/*
- * This is called with the inode lock held. It just looks at the last
- * inode on the in-use list, and if the inode is trivially freeable
- * we just move it to the unused list.
- *
- * Otherwise we just move the inode to be the first inode and expect to
- * get back to the problem later..
+ * This is called with the inode lock held. It searches
+ * the in-use for the specified number of freeable inodes.
+ * Freeable inodes are moved to a temporary list and then
+ * placed on the unused list by dispose_list.
+ *
+ * N.B. The spinlock is released to call dispose_list.
*/
#define CAN_UNUSE(inode) \
(((inode)->i_count == 0) && \
- ((inode)->i_nrpages == 0) && \
(!(inode)->i_state))

-static void try_to_free_inodes(void)
+static void try_to_free_inodes(int goal)
{
struct list_head * tmp;
struct list_head *head = &inode_in_use;
+ LIST_HEAD(freeable);
+ int found = 0, search = goal << 1;

- tmp = head->prev;
- if (tmp != head) {
+ while ((tmp = head->prev) != head && search--) {
struct inode * inode;

list_del(tmp);
@@ -338,13 +345,90 @@
if (CAN_UNUSE(inode)) {
list_del(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_hash);
- head = &inode_unused;
+ list_add(tmp, &freeable);
+ if (++found < goal)
+ continue;
+ break;
}
list_add(tmp, head);
}
+ /*
+ * If we didn't free any inodes, do a limited
+ * pruning of the dcache to help the next time.
+ */
+ spin_unlock(&inode_lock);
+ if (found)
+ dispose_list(&freeable);
+ else
+ prune_dcache(goal);
+ spin_lock(&inode_lock);
}
-

+/*
+ * This is called with the spinlock held, but releases
+ * the lock when freeing or allocating inodes.
+ * Look out! This returns with the inode lock held if
+ * it got an inode..
+ */
+static struct inode * grow_inodes(void)
+{
+ struct inode * inode;
+
+ /*
+ * Check whether to shrink the dcache ... if we've
+ * allocated more than half of the nominal maximum,
+ * try shrinking before allocating more.
+ */
+ if (inodes_stat.nr_inodes >= (max_inodes >> 1)) {
+ struct list_head * tmp;
+
+#ifdef INODE_DEBUG
+ printk("grow_inodes: %d inodes, pruning dcache ...",
+ inodes_stat.nr_inodes);
+#endif
+ spin_unlock(&inode_lock);
+ prune_dcache(128);
+ spin_lock(&inode_lock);
+ try_to_free_inodes(128);
+ tmp = inode_unused.next;
+ if (tmp != &inode_unused) {
+ printk("got one\n");
+ inodes_stat.nr_free_inodes--;
+ list_del(tmp);
+ inode = list_entry(tmp, struct inode, i_list);
+ return inode;
+ }
+#ifdef INODE_DEBUG
+ else
+ printk("failed\n");
+#endif
+ }
+
+ spin_unlock(&inode_lock);
+ inode = (struct inode *)__get_free_page(GFP_KERNEL);
+ if (inode) {
+ int size;
+ struct inode * tmp;
+
+ spin_lock(&inode_lock);
+ size = PAGE_SIZE - 2*sizeof(struct inode);
+ tmp = inode;
+ do {
+ tmp++;
+ init_once(tmp);
+ list_add(&tmp->i_list, &inode_unused);
+ inodes_stat.nr_free_inodes++;
+ size -= sizeof(struct inode);
+ } while (size >= 0);
+ init_once(inode);
+ inodes_stat.nr_inodes += PAGE_SIZE / sizeof(struct inode);
+ }
+ return inode;
+}
+
+/*
+ * Called with the inode lock held.
+ */
static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
{
struct list_head *tmp;
@@ -403,28 +487,42 @@
struct list_head * tmp;

spin_lock(&inode_lock);
- try_to_free_inodes();
+ /*
+ * Check whether to restock the unused list.
+ */
+ if (inodes_stat.nr_free_inodes < 16)
+ try_to_free_inodes(8);
tmp = inode_unused.next;
if (tmp != &inode_unused) {
list_del(tmp);
+ inodes_stat.nr_free_inodes--;
inode = list_entry(tmp, struct inode, i_list);
add_new_inode:
+#ifdef INODE_PARANOIA
+if (inode->i_count)
+printk("get_empty_inode: unused count=%d\n", inode->i_count);
+#endif
+ list_add(&inode->i_list, &inode_in_use);
inode->i_sb = NULL;
inode->i_dev = 0;
inode->i_ino = ++last_ino;
inode->i_count = 1;
- list_add(&inode->i_list, &inode_in_use);
inode->i_state = 0;
spin_unlock(&inode_lock);
clean_inode(inode);
return inode;
}

+#ifdef INODE_PARANOIA
+if (inodes_stat.nr_free_inodes) {
+printk("get_empty_inode: bad free count %d\n", inodes_stat.nr_free_inodes);
+inodes_stat.nr_free_inodes = 0;
+}
+#endif
/*
* Warning: if this succeeded, we will now
* return with the inode lock.
*/
- spin_unlock(&inode_lock);
inode = grow_inodes();
if (inode)
goto add_new_inode;
@@ -442,8 +540,13 @@

if (tmp != &inode_unused) {
list_del(tmp);
+ inodes_stat.nr_free_inodes--;
inode = list_entry(tmp, struct inode, i_list);
add_new_inode:
+#ifdef INODE_PARANOIA
+if (inode->i_count)
+printk("get_new_inode: unused count=%d\n", inode->i_count);
+#endif
list_add(&inode->i_list, &inode_in_use);
list_add(&inode->i_hash, head);
inode->i_sb = sb;
@@ -458,12 +561,17 @@
return inode;
}

+#ifdef INODE_PARANOIA
+if (inodes_stat.nr_free_inodes) {
+printk("get_new_inode: bad free count %d\n", inodes_stat.nr_free_inodes);
+inodes_stat.nr_free_inodes = 0;
+}
+#endif
/*
- * Uhhuh.. We need to expand. Unlock for the allocation,
- * but note that "grow_inodes()" will return with the
- * lock held again if the allocation succeeded.
+ * Uhhuh.. We need to expand. Note that "grow_inodes()" will
+ * release the spinlock, but will return with the lock held
+ * again if the allocation succeeded.
*/
- spin_unlock(&inode_lock);
inode = grow_inodes();
if (inode) {
/* We released the lock, so.. */
@@ -471,6 +579,7 @@
if (!old)
goto add_new_inode;
list_add(&inode->i_list, &inode_unused);
+ inodes_stat.nr_free_inodes++;
spin_unlock(&inode_lock);
wait_on_inode(old);
return old;
@@ -491,14 +600,25 @@
struct inode * inode;

spin_lock(&inode_lock);
+ if (!inodes_stat.nr_free_inodes)
+ goto restock;
+search:
inode = find_inode(sb, ino, head);
if (!inode) {
- try_to_free_inodes();
return get_new_inode(sb, ino, head);
}
spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
+
+ /*
+ * We restock the freelist before calling find,
+ * in order to avoid repeating the search.
+ * (The unused list usually won't be empty.)
+ */
+restock:
+ try_to_free_inodes(8);
+ goto search;
}

void insert_inode_hash(struct inode *inode)
@@ -530,11 +650,39 @@
delete(inode);
spin_lock(&inode_lock);
}
+#ifdef INODE_PARANOIA
+if (inode->i_nrpages)
+printk("iput: device %s inode %ld not cleared, pages=%ld\n",
+kdevname(inode->i_dev), inode->i_ino, inode->i_nrpages);
+if (!list_empty(&inode->i_hash))
+printk("iput: device %s inode %ld rehashed\n",
+kdevname(inode->i_dev), inode->i_ino);
+if (!list_empty(&inode->i_list))
+printk("iput: device %s inode %ld reinserted\n",
+kdevname(inode->i_dev), inode->i_ino);
+#endif
}
if (list_empty(&inode->i_hash)) {
list_del(&inode->i_list);
+ INIT_LIST_HEAD(&inode->i_list);
+ spin_unlock(&inode_lock);
+ clear_inode(inode);
+ spin_lock(&inode_lock);
list_add(&inode->i_list, &inode_unused);
+ inodes_stat.nr_free_inodes++;
}
+#ifdef INODE_PARANOIA
+if (inode->i_count)
+printk("iput: device %s inode %ld count changed, count=%d\n",
+kdevname(inode->i_dev), inode->i_ino, inode->i_count);
+if (atomic_read(&inode->i_sem.count) != 1)
+printk("iput: Aieee, semaphore in use device %s, count=%d\n",
+kdevname(inode->i_dev), atomic_read(&inode->i_sem.count));
+#endif
+ }
+ if (inode->i_count > (1<<15)) {
+ printk("iput: device %s inode %ld count wrapped\n",
+ kdevname(inode->i_dev), inode->i_ino);
}
spin_unlock(&inode_lock);
}
--- linux-2.1.54/fs/dcache.c.old Sat Sep 6 16:03:33 1997
+++ linux-2.1.54/fs/dcache.c Mon Sep 8 15:52:37 1997
@@ -119,8 +119,10 @@
* something (at which point we need to unuse
* all dentries).
*/
-void shrink_dcache(void)
+static void __shrink_dcache(int count)
{
+ int shrunk = 0;
+
for (;;) {
struct dentry *dentry;
struct list_head *tmp = dentry_unused.prev;
@@ -143,8 +145,27 @@
parent = dentry->d_parent;
d_free(dentry);
dput(parent);
+ if (count && ++shrunk >= count)
+ break;
}
}
+}
+
+/*
+ * Do a complete shrink of the dcache.
+ */
+void shrink_dcache(void)
+{
+ __shrink_dcache(0);
+}
+
+extern void prune_dcache(int);
+/*
+ * Shrink the dcache a little.
+ */
+void prune_dcache(int count)
+{
+ __shrink_dcache(count);
}

#define NAME_ALLOC_LEN(len) ((len+16) & ~15)

--------------EF11AA4C1A00656F10245E88--