[PATCH] concurrent block allocation for ext2 against 2.5.64

From: Alex Tomas (bzzz@tmi.comex.ru)
Date: Thu Mar 13 2003 - 03:55:32 EST


Hi!

as Andrew said, concurrent balloc for ext3 is useless because of BKL.
and I saw it in benchmarks. but it may be useful for ext2.

Results:
         9/100000 9/500000 16/100000 16/500000 32/100000 32/500000
ext2: 0m9.260s 0m46.160s 0m18.133s 1m33.553s 0m35.958s 3m4.164s
ext2-ca: 0m8.578s 0m42.712s 0m17.412s 1m28.637s 0m33.736s 2m53.824s

in those benchmarks, I run 2 process, each of them writes N blocks
(9, 16, 32), truncates file and repeat these steps M times (100000, 500000).

diff -uNr linux/fs/ext2/balloc.c edited/fs/ext2/balloc.c
--- linux/fs/ext2/balloc.c Thu Feb 20 16:18:53 2003
+++ edited/fs/ext2/balloc.c Thu Mar 13 10:54:50 2003
@@ -98,9 +98,13 @@
 {
         struct ext2_sb_info * sbi = EXT2_SB(sb);
         struct ext2_super_block * es = sbi->s_es;
- unsigned free_blocks = le32_to_cpu(es->s_free_blocks_count);
- unsigned root_blocks = le32_to_cpu(es->s_r_blocks_count);
+ unsigned free_blocks;
+ unsigned root_blocks;
 
+ spin_lock(&sbi->s_alloc_lock);
+
+ free_blocks = le32_to_cpu(es->s_free_blocks_count);
+ root_blocks = le32_to_cpu(es->s_r_blocks_count);
         if (free_blocks < count)
                 count = free_blocks;
 
@@ -113,11 +117,16 @@
                  */
                 if (free_blocks > root_blocks)
                         count = free_blocks - root_blocks;
- else
+ else {
+ spin_unlock(&sbi->s_alloc_lock);
                         return 0;
+ }
         }
 
         es->s_free_blocks_count = cpu_to_le32(free_blocks - count);
+
+ spin_unlock(&sbi->s_alloc_lock);
+
         mark_buffer_dirty(sbi->s_sbh);
         sb->s_dirt = 1;
         return count;
@@ -128,35 +137,54 @@
         if (count) {
                 struct ext2_sb_info * sbi = EXT2_SB(sb);
                 struct ext2_super_block * es = sbi->s_es;
- unsigned free_blocks = le32_to_cpu(es->s_free_blocks_count);
+ unsigned free_blocks;
+
+ spin_lock(&sbi->s_alloc_lock);
+ free_blocks = le32_to_cpu(es->s_free_blocks_count);
                 es->s_free_blocks_count = cpu_to_le32(free_blocks + count);
+ spin_unlock(&sbi->s_alloc_lock);
+
                 mark_buffer_dirty(sbi->s_sbh);
                 sb->s_dirt = 1;
         }
 }
 
-static inline int group_reserve_blocks(struct ext2_group_desc *desc,
+static inline int group_reserve_blocks(struct ext2_sb_info *sbi, struct ext2_group_desc *desc,
                                     struct buffer_head *bh, int count)
 {
         unsigned free_blocks;
 
- if (!desc->bg_free_blocks_count)
+ spin_lock(&sbi->s_alloc_lock);
+
+ if (!desc->bg_free_blocks_count) {
+ spin_unlock(&sbi->s_alloc_lock);
                 return 0;
+ }
 
         free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
         if (free_blocks < count)
                 count = free_blocks;
         desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
+
+ spin_unlock(&sbi->s_alloc_lock);
+
         mark_buffer_dirty(bh);
         return count;
 }
 
-static inline void group_release_blocks(struct ext2_group_desc *desc,
+static inline void group_release_blocks(struct ext2_sb_info *sbi, struct ext2_group_desc *desc,
                                     struct buffer_head *bh, int count)
 {
         if (count) {
- unsigned free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
+ unsigned free_blocks;
+
+ spin_lock(&sbi->s_alloc_lock);
+
+ free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
                 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
+
+ spin_unlock(&sbi->s_alloc_lock);
+
                 mark_buffer_dirty(bh);
         }
 }
@@ -176,7 +204,6 @@
         struct ext2_super_block * es;
         unsigned freed = 0, group_freed;
 
- lock_super (sb);
         es = EXT2_SB(sb)->s_es;
         if (block < le32_to_cpu(es->s_first_data_block) ||
             block + count < block ||
@@ -224,7 +251,7 @@
                             block, count);
 
         for (i = 0, group_freed = 0; i < count; i++) {
- if (!ext2_clear_bit(bit + i, bitmap_bh->b_data))
+ if (!test_and_clear_bit(bit + i, (void *) bitmap_bh->b_data))
                         ext2_error (sb, "ext2_free_blocks",
                                       "bit already cleared for block %lu",
                                       block + i);
@@ -236,7 +263,7 @@
         if (sb->s_flags & MS_SYNCHRONOUS)
                 sync_dirty_buffer(bitmap_bh);
 
- group_release_blocks(desc, bh2, group_freed);
+ group_release_blocks(EXT2_SB(sb), desc, bh2, group_freed);
         freed += group_freed;
 
         if (overflow) {
@@ -247,7 +274,6 @@
 error_return:
         brelse(bitmap_bh);
         release_blocks(sb, freed);
- unlock_super (sb);
         DQUOT_FREE_BLOCK(inode, freed);
 }
 
@@ -258,6 +284,8 @@
 
         if (!ext2_test_bit(goal, map))
                 goto got_it;
+
+repeat:
         if (goal) {
                 /*
                  * The goal was occupied; search forward for a free
@@ -297,7 +325,8 @@
         }
         return -1;
 got_it:
- ext2_set_bit(goal, map);
+ if (test_and_set_bit(goal, (void *) map))
+ goto repeat;
         return goal;
 }
 
@@ -342,8 +371,6 @@
 
         dq_alloc = prealloc_goal + 1;
 
- lock_super (sb);
-
         es_alloc = reserve_blocks(sb, dq_alloc);
         if (!es_alloc) {
                 *err = -ENOSPC;
@@ -360,7 +387,7 @@
         if (!desc)
                 goto io_error;
 
- group_alloc = group_reserve_blocks(desc, gdp_bh, es_alloc);
+ group_alloc = group_reserve_blocks(sbi, desc, gdp_bh, es_alloc);
         if (group_alloc) {
                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
                                         group_size);
@@ -375,7 +402,7 @@
                                 group_size, ret_block);
                 if (ret_block >= 0)
                         goto got_block;
- group_release_blocks(desc, gdp_bh, group_alloc);
+ group_release_blocks(sbi, desc, gdp_bh, group_alloc);
                 group_alloc = 0;
         }
 
@@ -393,7 +420,7 @@
                 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
                 if (!desc)
                         goto io_error;
- group_alloc = group_reserve_blocks(desc, gdp_bh, es_alloc);
+ group_alloc = group_reserve_blocks(sbi, desc, gdp_bh, es_alloc);
         }
         if (bit >= sbi->s_groups_count) {
                 *err = -ENOSPC;
@@ -452,7 +479,7 @@
                 unsigned n;
 
                 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
- if (ext2_set_bit(ret_block, bitmap_bh->b_data))
+ if (test_and_set_bit(ret_block, (void *) bitmap_bh->b_data))
                                  break;
                 }
                 *prealloc_block = block + 1;
@@ -471,10 +498,9 @@
 
         *err = 0;
 out_release:
- group_release_blocks(desc, gdp_bh, group_alloc);
+ group_release_blocks(sbi, desc, gdp_bh, group_alloc);
         release_blocks(sb, es_alloc);
 out_unlock:
- unlock_super (sb);
         DQUOT_FREE_BLOCK(inode, dq_alloc);
 out:
         brelse(bitmap_bh);
diff -uNr linux/fs/ext2/super.c edited/fs/ext2/super.c
--- linux/fs/ext2/super.c Thu Feb 20 16:18:53 2003
+++ edited/fs/ext2/super.c Wed Mar 12 23:29:53 2003
@@ -564,6 +564,7 @@
                 return -ENOMEM;
         sb->s_fs_info = sbi;
         memset(sbi, 0, sizeof(*sbi));
+ spin_lock_init(&sbi->s_alloc_lock);
 
         /*
          * See what the current blocksize for the device is, and
diff -uNr linux/include/linux/ext2_fs_sb.h edited/include/linux/ext2_fs_sb.h
--- linux/include/linux/ext2_fs_sb.h Mon Nov 11 06:28:30 2002
+++ edited/include/linux/ext2_fs_sb.h Wed Mar 12 22:57:30 2003
@@ -45,6 +45,7 @@
         u32 s_next_generation;
         unsigned long s_dir_count;
         u8 *s_debts;
+ spinlock_t s_alloc_lock;
 };
 
 #endif /* _LINUX_EXT2_FS_SB */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Mar 15 2003 - 22:00:34 EST