patch for clear_inode race -- please test!

Bill Hawes (whawes@star.net)
Wed, 02 Jul 1997 15:57:37 -0400


This is a multi-part message in MIME format.
--------------4346DCB606D9F1DE0CAD94D9
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

The following patch against 2.0.30 closes a small race condition in
clear_inode that could result in messed up inode use counts and other
unpleasantness.

The problem arises from the possibility of blocking while clearing an
inode, which makes it possible for another task to find the inode and
start using it, only to have it cleared while in use. The patch works
by repeatedly testing the use count as each potential blocking operation
is performed; if the use count indicates the inode is back in service,
the code exits with a warning message.

Two other minor races are also fixed: while checking whether a device
can be mounted, or invalidating inodes for a device, it's possible that
blocking in clear_inode() will allow another inode for that device to be
put in service behind the inode being checked.

My previous attempts to eliminate the possibility of blocking in
clear_inode() (using a non-blocking truncate_inode_pages) hit a snag, as
there doesn't seem to be any way to avoid a potential block from the
dq_op->drop operation. Thus it seems better to just accept the block
and make clear_inode safe.

I've tested the patch with concurrent compiles in 6M of memory, but the
better test would be on heavily loaded systems with lots of file
activity (web servers, etc.) If you see the printk message, it means
that the patch avoided a problem :-)

-Bill
--------------4346DCB606D9F1DE0CAD94D9
Content-Type: text/plain; charset=us-ascii; name="inode_ci-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="inode_ci-patch"

--- fs/inode.c.old Sun Apr 27 15:54:43 1997
+++ fs/inode.c Wed Jul 2 14:19:51 1997
@@ -168,22 +168,49 @@
*
* The solution is the weird use of 'volatile'. Ho humm. Have to report
* it to the gcc lists, and hope we can do this more cleanly some day..
+ *
+ * WSH 07/02/97: closed a race condition resulting from blocking while
+ * calling truncate_inode_pages, wait_on_inode, or i_sb->dq_op->drop.
*/
void clear_inode(struct inode * inode)
{
struct wait_queue * wait;
+ int count;

- inode->i_count++;
- truncate_inode_pages(inode, 0);
- wait_on_inode(inode);
- if (IS_WRITABLE(inode)) {
- if (inode->i_sb && inode->i_sb->dq_op)
+ /*
+ * Valid use counts at entry are 0 or 1. After conditionally
+ * incrementing the use count should always be 1.
+ */
+ if (!(count = inode->i_count))
+ inode->i_count++;
+
+ while (inode->i_count == 1) {
+ if (inode->i_nrpages)
+ truncate_inode_pages(inode, 0);
+ else if (inode->i_lock)
+ __wait_on_inode(inode);
+ else if (IS_WRITABLE(inode) && inode->i_sb &&
+ inode->i_sb->dq_op) {
+ inode->i_lock = 1;
inode->i_sb->dq_op->drop(inode);
+ unlock_inode(inode);
+ }
+ else
+ break;
+ }
+ /*
+ * If we blocked above, it's possible that another process may
+ * have put the inode back in use.
+ */
+ if (--inode->i_count > 0) {
+printk("clear_inode: inode back in use, count=%d\n", inode->i_count);
+ return;
}
+
remove_inode_hash(inode);
remove_inode_free(inode);
wait = ((volatile struct inode *) inode)->i_wait;
- if (--inode->i_count)
+ if (count)
nr_free_inodes++;
memset(inode,0,sizeof(*inode));
((volatile struct inode *) inode)->i_wait = wait;
@@ -193,8 +220,10 @@
int fs_may_mount(kdev_t dev)
{
struct inode * inode, * next;
- int i;
+ int i, found;

+repeat:
+ found = 0;
next = first_inode;
for (i = nr_inodes ; i > 0 ; i--) {
inode = next;
@@ -204,7 +233,14 @@
if (inode->i_count || inode->i_dirt || inode->i_lock)
return 0;
clear_inode(inode);
+ found = 1;
}
+ /*
+ * If we cleared any inodes, we may have blocked, allowing inodes
+ * for this device to be put in service behind us in the list.
+ */
+ if (found)
+ goto repeat;
return 1;
}

@@ -383,8 +419,10 @@
void invalidate_inodes(kdev_t dev)
{
struct inode * inode, * next;
- int i;
+ int i, found;

+repeat:
+ found = 0;
next = first_inode;
for(i = nr_inodes ; i > 0 ; i--) {
inode = next;
@@ -397,7 +435,14 @@
continue;
}
clear_inode(inode);
+ found = 1;
}
+ /*
+ * If we cleared any inodes, we may have blocked, allowing inodes
+ * for this device to be put in service behind us in the list.
+ */
+ if (found)
+ goto repeat;
}

void sync_inodes(kdev_t dev)
@@ -519,6 +564,10 @@
}
if (best->i_dirt) {
write_inode(best);
+ goto repeat;
+ }
+ if (best->i_nrpages) {
+ truncate_inode_pages(inode, 0);
goto repeat;
}
if (best->i_count)

--------------4346DCB606D9F1DE0CAD94D9--