Re: ext2fs bug : files are disapeared, unable to delete, two files' contents are switched etc.

From: tytso@mit.edu
Date: Sun Mar 26 2000 - 07:44:53 EST


   From: "Soohoon Lee" <soohoon.lee@alpha-processor.com>
   Date: Tue, 21 Mar 2000 17:59:54 -0500

   With small free memory and frequent file create/delete operations,
   ext2 shows many problems. Sometimes files are disapeared, unable to
   delete, two files' contents are switched fsck finds inconsitencies in
   the properly unmounted filesystem etc. I'm not sure if this patch
   fixes all the problems but I cannot see the problems any more.

   tested platform is UP2000, 1 CPU(EV67 750MHz), 256M, 2.2.14
   While one process is creating a file, it can be switched to do disk I/O like
   following
   stack trace.

    schedule+0x1b0 (AR:0xfffffc0005d0bd48)
    sleep_on+0x5c (AR:0xfffffc0005d0bd68)
    wakeup_bdflush+0x50 (AR:0xfffffc0005d0bd98)
    refile_buffer+0x10c (AR:0xfffffc0005d0bda8) -> mark_buffer_dirty()
    ext2_add_entry+0x3ac (AR:0xfffffc0005d0bdc8)
    ext2_create+0xe4 (AR:0xfffffc0005d0be28)
    open_namei+0x1c4 (AR:0xfffffc0005d0be78)
    filp_open+0x80 (AR:0xfffffc0005d0beb8)
    sys_open+0x6c (AR:0xfffffc0005d0bee8)
    Exception frame:
    entSys+0xa8 (AR:0xfffffc0005d0bf18)

You're right, this is a problem. ext2_add_entry assumed
mark_buffer_dirty() couldn't block and it was wrong.

   Supposing that ext2 is MP safe, my fix for this problem is removing
   mark_buffer_dirty() in ext2_add_entry(), simple. Every caller of
   ext2_add_entry() calls mark_buffer_dirty(), So it's superflous. Then
   the first process would not sleep until the directory entry is marked
   with allocated inode.

This apparently solves the problem for Linux 2.2, but I'd prefer a
cleaner patch for Linux 2.3. Enclosed find the patch, which I will be
sending on to Linus. I haven't had a chance to backport this patch
to Linux 2.2 yet, but it shoudl be relatively simple.

What I've done in this patch is to move more of the processing into
ext2_add_entry and ext2_remove_entry, which also has the advantage of
tightening up the code and making it smaller.

                                                - Ted

Patch generated: on Sun Mar 26 07:11:16 EST 2000 by tytso@trampoline.thunk.org
against Linux version 2.3.99
 
===================================================================
RCS file: fs/ext2/RCS/namei.c,v
retrieving revision 1.1
diff -u -r1.1 fs/ext2/namei.c
--- fs/ext2/namei.c 2000/03/25 00:37:16 1.1
+++ fs/ext2/namei.c 2000/03/26 07:42:58
@@ -182,61 +182,71 @@
         return NULL;
 }
 
+static inline void ext2_set_de_type(struct super_block *sb,
+ struct ext2_dir_entry_2 *de,
+ umode_t mode) {
+ if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
+ return;
+ if (S_ISREG(mode))
+ de->file_type = EXT2_FT_REG_FILE;
+ else if (S_ISDIR(mode))
+ de->file_type = EXT2_FT_DIR;
+ else if (S_ISLNK(mode))
+ de->file_type = EXT2_FT_SYMLINK;
+ else if (S_ISSOCK(mode))
+ de->file_type = EXT2_FT_SOCK;
+ else if (S_ISFIFO(mode))
+ de->file_type = EXT2_FT_FIFO;
+ else if (S_ISCHR(mode))
+ de->file_type = EXT2_FT_CHRDEV;
+ else if (S_ISBLK(mode))
+ de->file_type = EXT2_FT_BLKDEV;
+}
+
 /*
  * ext2_add_entry()
  *
- * adds a file entry to the specified directory, using the same
- * semantics as ext2_find_entry(). It returns NULL if it failed.
- *
- * NOTE!! The inode part of 'de' is left at 0 - which means you
- * may not sleep between calling this and putting something into
- * the entry, as someone else might have used it while you slept.
+ * adds a file entry to the specified directory.
  */
-static struct buffer_head * ext2_add_entry (struct inode * dir,
- const char * name, int namelen,
- struct ext2_dir_entry_2 ** res_dir,
- int *err)
+int ext2_add_entry (struct inode * dir, const char * name, int namelen,
+ struct inode *inode)
 {
         unsigned long offset;
         unsigned short rec_len;
         struct buffer_head * bh;
         struct ext2_dir_entry_2 * de, * de1;
         struct super_block * sb;
+ int retval;
 
- *err = -EINVAL;
- *res_dir = NULL;
         if (!dir || !dir->i_nlink)
- return NULL;
+ return -EINVAL;
         sb = dir->i_sb;
 
         if (!namelen)
- return NULL;
+ return -EINVAL;
         /*
          * Is this a busy deleted directory? Can't create new files if so
          */
         if (dir->i_size == 0)
         {
- *err = -ENOENT;
- return NULL;
+ return -ENOENT;
         }
- bh = ext2_bread (dir, 0, 0, err);
+ bh = ext2_bread (dir, 0, 0, &retval);
         if (!bh)
- return NULL;
+ return retval;
         rec_len = EXT2_DIR_REC_LEN(namelen);
         offset = 0;
         de = (struct ext2_dir_entry_2 *) bh->b_data;
- *err = -ENOSPC;
         while (1) {
                 if ((char *)de >= sb->s_blocksize + bh->b_data) {
                         brelse (bh);
                         bh = NULL;
- bh = ext2_bread (dir, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, err);
+ bh = ext2_bread (dir, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, &retval);
                         if (!bh)
- return NULL;
+ return retval;
                         if (dir->i_size <= offset) {
                                 if (dir->i_size == 0) {
- *err = -ENOENT;
- return NULL;
+ return -ENOENT;
                                 }
 
                                 ext2_debug ("creating next block\n");
@@ -256,14 +266,12 @@
                 }
                 if (!ext2_check_dir_entry ("ext2_add_entry", dir, de, bh,
                                            offset)) {
- *err = -ENOENT;
                         brelse (bh);
- return NULL;
+ return -ENOENT;
                 }
                 if (ext2_match (namelen, name, de)) {
- *err = -EEXIST;
                                 brelse (bh);
- return NULL;
+ return -EEXIST;
                 }
                 if ((le32_to_cpu(de->inode) == 0 && le16_to_cpu(de->rec_len) >= rec_len) ||
                     (le16_to_cpu(de->rec_len) >= EXT2_DIR_REC_LEN(de->name_len) + rec_len)) {
@@ -276,7 +284,11 @@
                                 de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(de->name_len));
                                 de = de1;
                         }
- de->inode = 0;
+ if (inode) {
+ de->inode = cpu_to_le32(inode->i_ino);
+ ext2_set_de_type(dir->i_sb, de, inode->i_mode);
+ } else
+ de->inode = 0;
                         de->name_len = namelen;
                         de->file_type = 0;
                         memcpy (de->name, name, namelen);
@@ -296,22 +308,26 @@
                         mark_inode_dirty(dir);
                         dir->i_version = ++event;
                         mark_buffer_dirty(bh, 1);
- *res_dir = de;
- *err = 0;
- return bh;
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ brelse(bh);
+ return 0;
                 }
                 offset += le16_to_cpu(de->rec_len);
                 de = (struct ext2_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
         }
         brelse (bh);
- return NULL;
+ return -ENOSPC;
 }
 
 /*
  * ext2_delete_entry deletes a directory entry by merging it with the
  * previous entry
  */
-static int ext2_delete_entry (struct ext2_dir_entry_2 * dir,
+static int ext2_delete_entry (struct inode * dir,
+ struct ext2_dir_entry_2 * de_del,
                               struct buffer_head * bh)
 {
         struct ext2_dir_entry_2 * de, * pde;
@@ -324,13 +340,19 @@
                 if (!ext2_check_dir_entry ("ext2_delete_entry", NULL,
                                            de, bh, i))
                         return -EIO;
- if (de == dir) {
+ if (de == de_del) {
                         if (pde)
                                 pde->rec_len =
                                         cpu_to_le16(le16_to_cpu(pde->rec_len) +
- le16_to_cpu(dir->rec_len));
+ le16_to_cpu(de->rec_len));
                         else
- dir->inode = 0;
+ de->inode = 0;
+ dir->i_version = ++event;
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
                         return 0;
                 }
                 i += le16_to_cpu(de->rec_len);
@@ -340,27 +362,6 @@
         return -ENOENT;
 }
 
-static inline void ext2_set_de_type(struct super_block *sb,
- struct ext2_dir_entry_2 *de,
- umode_t mode) {
- if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
- return;
- if (S_ISREG(mode))
- de->file_type = EXT2_FT_REG_FILE;
- else if (S_ISDIR(mode))
- de->file_type = EXT2_FT_DIR;
- else if (S_ISLNK(mode))
- de->file_type = EXT2_FT_SYMLINK;
- else if (S_ISSOCK(mode))
- de->file_type = EXT2_FT_SOCK;
- else if (S_ISFIFO(mode))
- de->file_type = EXT2_FT_FIFO;
- else if (S_ISCHR(mode))
- de->file_type = EXT2_FT_CHRDEV;
- else if (S_ISBLK(mode))
- de->file_type = EXT2_FT_BLKDEV;
-}
-
 /*
  * By the time this is called, we already have created
  * the directory cache entry for the new file, but it
@@ -372,38 +373,28 @@
 static int ext2_create (struct inode * dir, struct dentry * dentry, int mode)
 {
         struct inode * inode;
- struct buffer_head * bh;
- struct ext2_dir_entry_2 * de;
- int err = -EIO;
+ int err;
 
         /*
          * N.B. Several error exits in ext2_new_inode don't set err.
          */
         inode = ext2_new_inode (dir, mode, &err);
         if (!inode)
- return err;
+ return -EIO;
 
         inode->i_op = &ext2_file_inode_operations;
         inode->i_fop = &ext2_file_operations;
         inode->i_mapping->a_ops = &ext2_aops;
         inode->i_mode = mode;
         mark_inode_dirty(inode);
- bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err);
- if (!bh) {
+ err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len,
+ inode);
+ if (err) {
                 inode->i_nlink--;
                 mark_inode_dirty(inode);
                 iput (inode);
                 return err;
         }
- de->inode = cpu_to_le32(inode->i_ino);
- ext2_set_de_type(dir->i_sb, de, S_IFREG);
- dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
- brelse (bh);
         d_instantiate(dentry, inode);
         return 0;
 }
@@ -411,56 +402,42 @@
 static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev)
 {
         struct inode * inode;
- struct buffer_head * bh;
- struct ext2_dir_entry_2 * de;
- int err = -EIO;
+ int err;
 
         inode = ext2_new_inode (dir, mode, &err);
         if (!inode)
- goto out;
+ return -EIO;
 
         inode->i_uid = current->fsuid;
         init_special_inode(inode, mode, rdev);
- bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err);
- if (!bh)
+ err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len,
+ inode);
+ if (err)
                 goto out_no_entry;
- de->inode = cpu_to_le32(inode->i_ino);
- dir->i_version = ++event;
- ext2_set_de_type(dir->i_sb, de, inode->i_mode);
         mark_inode_dirty(inode);
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
         d_instantiate(dentry, inode);
- brelse(bh);
- err = 0;
-out:
- return err;
+ return 0;
 
 out_no_entry:
         inode->i_nlink--;
         mark_inode_dirty(inode);
         iput(inode);
- goto out;
+ return err;
 }
 
 static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 {
         struct inode * inode;
- struct buffer_head * bh, * dir_block;
+ struct buffer_head * dir_block;
         struct ext2_dir_entry_2 * de;
         int err;
 
- err = -EMLINK;
         if (dir->i_nlink >= EXT2_LINK_MAX)
- goto out;
+ return -EMLINK;
 
- err = -EIO;
         inode = ext2_new_inode (dir, S_IFDIR, &err);
         if (!inode)
- goto out;
+ return -EIO;
 
         inode->i_op = &ext2_dir_inode_operations;
         inode->i_fop = &ext2_dir_operations;
@@ -471,7 +448,7 @@
                 inode->i_nlink--; /* is this nlink == 0? */
                 mark_inode_dirty(inode);
                 iput (inode);
- return err;
+ return -EIO;
         }
         de = (struct ext2_dir_entry_2 *) dir_block->b_data;
         de->inode = cpu_to_le32(inode->i_ino);
@@ -492,31 +469,21 @@
         if (dir->i_mode & S_ISGID)
                 inode->i_mode |= S_ISGID;
         mark_inode_dirty(inode);
- bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err);
- if (!bh)
+ err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len,
+ inode);
+ if (err)
                 goto out_no_entry;
- de->inode = cpu_to_le32(inode->i_ino);
- ext2_set_de_type(dir->i_sb, de, S_IFDIR);
- dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
         dir->i_nlink++;
         dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL;
         mark_inode_dirty(dir);
         d_instantiate(dentry, inode);
- brelse (bh);
- err = 0;
-out:
- return err;
+ return 0;
 
 out_no_entry:
         inode->i_nlink = 0;
         mark_inode_dirty(inode);
         iput (inode);
- goto out;
+ return err;
 }
 
 /*
@@ -604,15 +571,9 @@
         if (!empty_dir (inode))
                 goto end_rmdir;
 
- retval = ext2_delete_entry (de, bh);
- dir->i_version = ++event;
+ retval = ext2_delete_entry(dir, de, bh);
         if (retval)
                 goto end_rmdir;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
         if (inode->i_nlink != 2)
                 ext2_warning (inode->i_sb, "ext2_rmdir",
                               "empty directory has nlink!=2 (%d)",
@@ -657,15 +618,9 @@
                               inode->i_ino, inode->i_nlink);
                 inode->i_nlink = 1;
         }
- retval = ext2_delete_entry (de, bh);
+ retval = ext2_delete_entry(dir, de, bh);
         if (retval)
                 goto end_unlink;
- dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
         dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL;
         mark_inode_dirty(dir);
@@ -683,18 +638,14 @@
 static int ext2_symlink (struct inode * dir, struct dentry *dentry, const char * symname)
 {
         struct inode * inode;
- struct ext2_dir_entry_2 * de;
- struct buffer_head * bh = NULL;
         int l, err;
 
- err = -ENAMETOOLONG;
         l = strlen(symname)+1;
         if (l > dir->i_sb->s_blocksize)
- goto out;
+ return -ENAMETOOLONG;
 
- err = -EIO;
         if (!(inode = ext2_new_inode (dir, S_IFLNK, &err)))
- goto out;
+ return -EIO;
 
         inode->i_mode = S_IFLNK | S_IRWXUGO;
 
@@ -711,36 +662,24 @@
         }
         mark_inode_dirty(inode);
 
- bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err);
- if (!bh)
+ err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len,
+ inode);
+ if (err)
                 goto out_no_entry;
- de->inode = cpu_to_le32(inode->i_ino);
- ext2_set_de_type(dir->i_sb, de, S_IFLNK);
- dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
- brelse (bh);
         d_instantiate(dentry, inode);
- err = 0;
-out:
- return err;
+ return 0;
 
 out_no_entry:
         inode->i_nlink--;
         mark_inode_dirty(inode);
         iput (inode);
- goto out;
+ return err;
 }
 
 static int ext2_link (struct dentry * old_dentry,
                 struct inode * dir, struct dentry *dentry)
 {
         struct inode *inode = old_dentry->d_inode;
- struct ext2_dir_entry_2 * de;
- struct buffer_head * bh;
         int err;
 
         if (S_ISDIR(inode->i_mode))
@@ -748,20 +687,12 @@
 
         if (inode->i_nlink >= EXT2_LINK_MAX)
                 return -EMLINK;
-
- bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err);
- if (!bh)
+
+ err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len,
+ inode);
+ if (err)
                 return err;
 
- de->inode = cpu_to_le32(inode->i_ino);
- ext2_set_de_type(dir->i_sb, de, inode->i_mode);
- dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
- brelse (bh);
         inode->i_nlink++;
         inode->i_ctime = CURRENT_TIME;
         mark_inode_dirty(inode);
@@ -829,14 +760,26 @@
                         goto end_rename;
         }
         if (!new_bh) {
- new_bh = ext2_add_entry (new_dir, new_dentry->d_name.name,
- new_dentry->d_name.len, &new_de,
- &retval);
- if (!new_bh)
+ retval = ext2_add_entry (new_dir, new_dentry->d_name.name,
+ new_dentry->d_name.len,
+ old_inode);
+ if (retval)
                         goto end_rename;
+ } else {
+ new_de->inode = le32_to_cpu(old_inode->i_ino);
+ if (EXT2_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
+ EXT2_FEATURE_INCOMPAT_FILETYPE))
+ new_de->file_type = old_de->file_type;
+ new_dir->i_version = ++event;
+ mark_buffer_dirty(new_bh, 1);
+ if (IS_SYNC(new_dir)) {
+ ll_rw_block (WRITE, 1, &new_bh);
+ wait_on_buffer (new_bh);
+ }
+ brelse(new_bh);
+ new_bh = NULL;
         }
- new_dir->i_version = ++event;
-
+
         /*
          * Like most other Unix systems, set the ctime for inodes on a
          * rename.
@@ -847,14 +790,8 @@
         /*
          * ok, that's it
          */
- new_de->inode = le32_to_cpu(old_inode->i_ino);
- if (EXT2_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
- EXT2_FEATURE_INCOMPAT_FILETYPE))
- new_de->file_type = old_de->file_type;
-
- ext2_delete_entry (old_de, old_bh);
+ ext2_delete_entry(old_dir, old_de, old_bh);
 
- old_dir->i_version = ++event;
         if (new_inode) {
                 new_inode->i_nlink--;
                 new_inode->i_ctime = CURRENT_TIME;
@@ -876,16 +813,6 @@
                         new_dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL;
                         mark_inode_dirty(new_dir);
                 }
- }
- mark_buffer_dirty(old_bh, 1);
- if (IS_SYNC(old_dir)) {
- ll_rw_block (WRITE, 1, &old_bh);
- wait_on_buffer (old_bh);
- }
- mark_buffer_dirty(new_bh, 1);
- if (IS_SYNC(new_dir)) {
- ll_rw_block (WRITE, 1, &new_bh);
- wait_on_buffer (new_bh);
         }
 
         retval = 0;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Fri Mar 31 2000 - 21:00:17 EST