Alexander, Ted,
I was taking a hard look at the proposed changes. In load_inode_bitmap()
we shouldn't be cacheing the I/O error case (this was in the old code too).
It means we don't have to check for NULL bh in the table cache each time
through, and for (s_groups_count < MAX_GROUPS_LOADED) case there are 3 less
comparisons and 1 less assignment before we return (the other fast path
is still 4 comparisons)... Also, a NULL return for an I/O error is good
for 1 less comparison per call (need to change callers to test for NULL
instead of IS_ERR()) and the callers have no more work than before...
We should probably reconcile the changes here with load_block_bitmap(),
or for that matter, we should make a single function and pass in a
pointer to s_{block,inode}_bitmap and s_{block,inode}_bitmap_number
arrays and save the duplicate code maintenance (even if inline).
If we have a generic function like so:
struct buffer_head *load_bitmap(struct super_block *sb,
unsigned int block_group,
unsigned long *bitmap_number,
struct buffer_head *bitmap,
size_t gdt_offset)
And we call the load_bitmap() function like either:
bh = load_bitmap(sb, block_group,
sbi->s_inode_bitmap_number,
sbi->s_inode_bitmap,
offsetof(ext2_group_desc, bg_inode_bitmap));
or
bh = load_bitmap(sb, block_group,
sbi->s_block_bitmap_number,
sbi->s_block_bitmap,
offsetof(ext2_group_desc, bg_block_bitmap));
We use the gdt_offset when calling a generic read_bitmap() function,
which uses the gdt_offset (rather than have function pointers, which
are overkill here) to determine which bitmap to load (my pointer math
may be a bit off here, but the idea is valid):
bh = bread(sb->s_dev,
le32_to_cpu(*((__u32 *)((void *)gdp + gdt_offset))),
sb->s_blocksize);
Now that I look at load_block_bitmap(), I see we use block_group to
dereference bitmap arrays, but we don't check it until later, and it
is much more complex than the load_inode_bitmap() now is. We need to
audit the callers of load_{block,inode}_bitmap() (a quick check shows
it is impossible to call load_inode_bitmap() with a bad group number),
and then we can simply remove the block_group check from here entirely.
I think with Al's proposed changes this will be relatively easy, but
I would rather do it _after_ his changes have been accepted. Would
anyone be interested in a real patch that does this?
Comments/changes are enclosed below, with sh style # comments by me.
PS - discussion on this should probably move to ext2-devel or fsdevel.
*
* Return the buffer head of the bitmap, or NULL on error.
*/
static struct buffer_head *load_inode_bitmap (struct super_block * sb,
unsigned int block_group)
{
> + struct ext2_sb_info *sbi = EXT2_SB(sb);
struct buffer_head *bh;
> + int i, slot = 0;
> +
> + if (block_group >= sbi->s_groups_count)
> + ext2_panic(sb, __FUNCTION,
> + "block_group >= groups_count - "
> + "block_group = %d, groups_count = %lu",
> + block_group, sbi->s_groups_count);
> +
/* Case A: we can fit all blocks in cache - keep in group order. */
> + if (sbi->s_groups_count <= EXT2_MAX_GROUP_LOADED) {
> + slot = block_group;
> + bh = sbi->s_inode_bitmap[slot];
> + if (!bh)
> + goto read_it;
> + if (sbi->s_inode_bitmap_number[slot] == slot)
> + goto found;
/* FIXME: when online shrinking we may go from case B to A */
> + ext2_panic (sb, "load_inode_bitmap",
> + "block_group != inode_bitmap_number");
# no need for return on panic
> + }
> +
> + bh = sbi->s_inode_bitmap[0];
/*
* We haven't loaded any bitmaps yet, or last one was an I/O error.
* No need to cache the I/O error case, so just overwrite it.
*/
if (!bh)
goto read_it;
/*
* Case B: we keep LRU cache of accessed bitmaps (0 = most recent).
* Fast path - we are using the same bitmap as last time.
*/
if (sbi->s_inode_bitmap_number[0] == block_group)
goto found;
> +
# can start at 1, because we already checked bitmap[0]
for (i = 1; i < sbi->s_loaded_inode_bitmaps &&
> + sbi->s_inode_bitmap_number[i] != block_group;
> + i++)
> + ;
> + if (i < sbi->s_loaded_inode_bitmaps) {
> + int j;
> + unsigned long nr = sbi->s_inode_bitmap_number[i];
> + bh = sbi->s_inode_bitmap[i];
> + for (j = i; j > 0; j--) {
> + sbi->s_inode_bitmap_number[j] =
> + sbi->s_inode_bitmap_number[j - 1];
> + sbi->s_inode_bitmap[j] =
> + sbi->s_inode_bitmap[j - 1];
> + }
> + sbi->s_inode_bitmap_number[0] = nr;
> + sbi->s_inode_bitmap[0] = bh;
/* Only when online growing (case A to B) can bh be NULL
if (bh) */
> + goto found;
> + } else {
> + int j;
> + if (sbi->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
> + sbi->s_loaded_inode_bitmaps++;
> + else
> + brelse(sbi->s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]);
> + for (j = sbi->s_loaded_inode_bitmaps - 1; j > 0; j--) {
> + sbi->s_inode_bitmap_number[j] =
> + sbi->s_inode_bitmap_number[j - 1];
> + sbi->s_inode_bitmap[j] =
> + sbi->s_inode_bitmap[j - 1];
> + }
> + }
> +
> +read_it:
> + bh = read_inode_bitmap(sb, block_group);
> + /*
* On I/O error, just leave a NULL in the cache's block pointer
* for this group. The IO will be retried next time.
> + */
> + sbi->s_inode_bitmap[slot] = bh;
> + sbi->s_inode_bitmap_number[slot] = block_group;
> +found:
> + return bh;
> +}
# Need to fix callers of load_inode_bitmap() to check for NULL return
> }
> block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
> bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
> + bh = load_inode_bitmap (sb, block_group);
if (!bh)
goto error_return;
# In find_cg_other() we should probably should also check for free blocks,
# to give inode a chance to be close to any blocks it will likely soon
# allocate. Don't need to byte swap to test non-zeroness... It would be
# nice to use "gdt" instead of "cg", to keep it consistent with the rest
# of the ext2 code... Maybe find_group_dir() and find_group_other()?
> + * Try to place the inode in its parent directory
> + */
i = dir_group;
gdt = ext2_get_group_desc(sb, i, &bh);
if (gdt && gdt->bg_free_inodes_count && gdt->bh_free_blocks_count)
goto found;
# Same here - only allocate an inode in a group with free blocks, and no
# byte swapping needed to check non-zeroness... We leave a few blocks
# free in each group for use by inodes that already exist there...
> + /*
> + * Use a quadratic hash to find a group with a
* free inode and some free blocks
> + */
> + for (j = 1; j < ngroups; j <<= 1) {
> + i += j;
> + if (i >= ngroups)
> + i -= ngroups;
gdt = ext2_get_group_desc(sb, i, &bh);
if (gdt && gdt->bg_free_inodes_count &&
dt->bh_free_blocks_count >= 32)
goto found;
# Now we have to search all groups for a free inode in this next loop
# (2 more than with old code). This is only less efficient in the case
# we are creating lots of zero length files, or if we failed in the
# second stage because the groups are all (nearly) full...
# In either case, the inode may not be in a group with (m)any free blocks.
# I'd rather optimize for the normal case of files with blocks (more
# inode/block locality).
> + /*
> + * That failed: try linear search for a free inode
> + */
i = dir_group;
for (j = 0; j < ngroups; j++, i++) {
if (i >= ngroups)
> + i = 0;
gdt = ext2_get_group_desc(sb, i, &bh);
if (gdt && gdt->bg_free_inodes_count)
> + goto found;
> + }
# In ext2_new_inode() you removed the case where the bit was set between
# time we searched and the time we set it. This probably can't happen
# while we hold the superblock lock. It may be worth keeping for when we
# go to per-group locks.
> repeat:
> + if (S_ISDIR(mode))
> + i = find_cg_dir(sb, dir->u.ext2_i.i_block_group);
> else
> + i = find_cg_other(sb, dir->u.ext2_i.i_block_group);
>
> + err = -ENOSPC;
> + if (i == -1)
> + goto fail;
> +
> + err = -EIO;
> + bh = load_inode_bitmap (sb, i);
> + if (IS_ERR(bh))
> + goto fail2;
> +
> + j = ext2_find_first_zero_bit ((unsigned long *) bh->b_data,
> + EXT2_INODES_PER_GROUP(sb));
> + if (j >= EXT2_INODES_PER_GROUP(sb))
> + goto bad_count;
/* With the superblock lock we are safe here, but not once it's gone.
if (ext2_set_bit(j, bh->b_data)) {
ext2_warning(sb, __FUNCTION__,
"bit already set for inode %d", j);
goto repeat;
}
*/
# Need to check that we got a good gdp back here...
> +fail2:
> + gdp = ext2_get_group_desc (sb, i, &bh2);
if (!gdp)
goto fail;
> + gdp->bg_free_inodes_count =
> + cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) + 1);
> + if (S_ISDIR(mode))
> + gdp->bg_used_dirs_count =
> + cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) - 1);
> + mark_buffer_dirty(bh2);
> +fail:
> + unlock_super(sb);
> + iput(inode);
> + return ERR_PTR(err);
Cheers, Andreas
-- Andreas Dilger \ "If a man ate a pound of pasta and a pound of antipasto, \ would they cancel out, leaving him still hungry?" http://www-mddsp.enel.ucalgary.ca/People/adilger/ -- Dogbert - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Thu Dec 07 2000 - 21:00:07 EST