[PATCH 5/5] ext4: write support for preallocated blocks/extents
From: Amit K. Arora
Date: Thu Apr 26 2007 - 14:17:00 EST
This patch adds write support for preallocated (using fallocate system
call) blocks/extents. The preallocated extents in ext4 are marked
"uninitialized", hence they need special handling especially while
writing to them. This patch takes care of that.
Signed-off-by: Amit Arora <aarora@xxxxxxxxxx>
---
fs/ext4/extents.c | 228 +++++++++++++++++++++++++++++++++++-----
include/linux/ext4_fs_extents.h | 1
2 files changed, 202 insertions(+), 27 deletions(-)
Index: linux-2.6.21/fs/ext4/extents.c
===================================================================
--- linux-2.6.21.orig/fs/ext4/extents.c
+++ linux-2.6.21/fs/ext4/extents.c
@@ -1141,6 +1141,51 @@ ext4_can_extents_be_merged(struct inode
}
/*
+ * ext4_ext_try_to_merge:
+ * tries to merge the "ex" extent to the next extent in the tree.
+ * It always tries to merge towards right. If you want to merge towards
+ * left, pass "ex - 1" as argument instead of "ex".
+ * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
+ * 1 if they got merged.
+ */
+int ext4_ext_try_to_merge(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *ex)
+{
+ struct ext4_extent_header *eh;
+ unsigned int depth, len;
+ int merge_done=0, uninitialized = 0;
+
+ depth = ext_depth(inode);
+ BUG_ON(path[depth].p_hdr == NULL);
+ eh = path[depth].p_hdr;
+
+ while (ex < EXT_LAST_EXTENT(eh)) {
+ if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
+ break;
+ /* merge with next extent! */
+ if (ext4_ext_is_uninitialized(ex))
+ uninitialized = 1;
+ ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ + ext4_ext_get_actual_len(ex + 1));
+ if (uninitialized)
+ ext4_ext_mark_uninitialized(ex);
+
+ if (ex + 1 < EXT_LAST_EXTENT(eh)) {
+ len = (EXT_LAST_EXTENT(eh) - ex - 1)
+ * sizeof(struct ext4_extent);
+ memmove(ex + 1, ex + 2, len);
+ }
+ eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+ merge_done = 1;
+ BUG_ON(eh->eh_entries == 0);
+ }
+
+ return merge_done;
+}
+
+
+/*
* ext4_ext_check_overlap:
* check if a portion of the "newext" extent overlaps with an
* existing extent.
@@ -1316,25 +1361,7 @@ has_space:
merge:
/* try to merge extents to the right */
- while (nearex < EXT_LAST_EXTENT(eh)) {
- if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
- break;
- /* merge with next extent! */
- if (ext4_ext_is_uninitialized(nearex))
- uninitialized = 1;
- nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex)
- + ext4_ext_get_actual_len(nearex + 1));
- if (uninitialized)
- ext4_ext_mark_uninitialized(nearex);
-
- if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
- len = (EXT_LAST_EXTENT(eh) - nearex - 1)
- * sizeof(struct ext4_extent);
- memmove(nearex + 1, nearex + 2, len);
- }
- eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
- BUG_ON(eh->eh_entries == 0);
- }
+ ext4_ext_try_to_merge(inode, path, nearex);
/* try to merge extents to the left */
@@ -1999,15 +2026,149 @@ void ext4_ext_release(struct super_block
#endif
}
+/*
+ * ext4_ext_convert_to_initialized:
+ * this function is called by ext4_ext_get_blocks() if someone tries to write
+ * to an uninitialized extent. It may result in splitting the uninitialized
+ * extent into multiple extents (upto three). Atleast one initialized extent
+ * and atmost two uninitialized extents can result.
+ * There are three possibilities:
+ * a> No split required: Entire extent should be initialized.
+ * b> Split into two extents: Only one end of the extent is being written to.
+ * c> Split into three extents: Somone is writing in middle of the extent.
+ */
+int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_fsblk_t iblock,
+ unsigned long max_blocks)
+{
+ struct ext4_extent *ex, *ex1 = NULL, *ex2 = NULL, *ex3 = NULL, newex;
+ struct ext4_extent_header *eh;
+ unsigned int allocated, ee_block, ee_len, depth;
+ ext4_fsblk_t newblock;
+ int err = 0, ret = 0;
+
+ depth = ext_depth(inode);
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+ allocated = ee_len - (iblock - ee_block);
+ newblock = iblock - ee_block + ext_pblock(ex);
+ ex2 = ex;
+
+ /* ex1: ee_block to iblock - 1 : uninitialized */
+ if (iblock > ee_block) {
+ ex1 = ex;
+ ex1->ee_len = cpu_to_le16(iblock - ee_block);
+ ext4_ext_mark_uninitialized(ex1);
+ ex2 = &newex;
+ }
+ /* for sanity, update the length of the ex2 extent before
+ * we insert ex3, if ex1 is NULL. This is to avoid temporary
+ * overlap of blocks.
+ */
+ if (!ex1 && allocated > max_blocks)
+ ex2->ee_len = cpu_to_le16(max_blocks);
+ /* ex3: to ee_block + ee_len : uninitialised */
+ if (allocated > max_blocks) {
+ unsigned int newdepth;
+ ex3 = &newex;
+ ex3->ee_block = cpu_to_le32(iblock + max_blocks);
+ ext4_ext_store_pblock(ex3, newblock + max_blocks);
+ ex3->ee_len = cpu_to_le16(allocated - max_blocks);
+ ext4_ext_mark_uninitialized(ex3);
+ err = ext4_ext_insert_extent(handle, inode, path, ex3);
+ if (err)
+ goto out;
+ /* The depth, and hence eh & ex might change
+ * as part of the insert above.
+ */
+ newdepth = ext_depth(inode);
+ if (newdepth != depth)
+ {
+ depth=newdepth;
+ path = ext4_ext_find_extent(inode, iblock, NULL);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ path = NULL;
+ goto out;
+ }
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+ if (ex2 != &newex)
+ ex2 = ex;
+ }
+ allocated = max_blocks;
+ }
+ /* If there was a change of depth as part of the
+ * insertion of ex3 above, we need to update the length
+ * of the ex1 extent again here
+ */
+ if (ex1 && ex1 != ex) {
+ ex1 = ex;
+ ex1->ee_len = cpu_to_le16(iblock - ee_block);
+ ext4_ext_mark_uninitialized(ex1);
+ ex2 = &newex;
+ }
+ /* ex2: iblock to iblock + maxblocks-1 : initialised */
+ ex2->ee_block = cpu_to_le32(iblock);
+ ex2->ee_start = cpu_to_le32(newblock);
+ ext4_ext_store_pblock(ex2, newblock);
+ ex2->ee_len = cpu_to_le16(allocated);
+ if (ex2 != ex)
+ goto insert;
+ if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+ goto out;
+ /* New (initialized) extent starts from the first block
+ * in the current extent. i.e., ex2 == ex
+ * We have to see if it can be merged with the extent
+ * on the left.
+ */
+ if (ex2 > EXT_FIRST_EXTENT(eh)) {
+ /* To merge left, pass "ex2 - 1" to try_to_merge(),
+ * since it merges towards right _only_.
+ */
+ ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
+ if (ret) {
+ err = ext4_ext_correct_indexes(handle, inode, path);
+ if (err)
+ goto out;
+ depth = ext_depth(inode);
+ ex2--;
+ }
+ }
+ /* Try to Merge towards right. This might be required
+ * only when the whole extent is being written to.
+ * i.e. ex2==ex and ex3==NULL.
+ */
+ if (!ex3) {
+ ret = ext4_ext_try_to_merge(inode, path, ex2);
+ if (ret) {
+ err = ext4_ext_correct_indexes(handle, inode, path);
+ if (err)
+ goto out;
+ }
+ }
+ /* Mark modified extent as dirty */
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ goto out;
+insert:
+ err = ext4_ext_insert_extent(handle, inode, path, &newex);
+out:
+ return err ? err : allocated;
+}
+
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t iblock,
unsigned long max_blocks, struct buffer_head *bh_result,
int create, int extend_disksize)
{
struct ext4_ext_path *path = NULL;
+ struct ext4_extent_header *eh;
struct ext4_extent newex, *ex;
ext4_fsblk_t goal, newblock;
- int err = 0, depth;
+ int err = 0, depth, ret;
unsigned long allocated = 0;
__clear_bit(BH_New, &bh_result->b_state);
@@ -2055,6 +2216,7 @@ int ext4_ext_get_blocks(handle_t *handle
* this is why assert can't be put in ext4_ext_find_extent()
*/
BUG_ON(path[depth].p_ext == NULL && depth != 0);
+ eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex) {
@@ -2063,13 +2225,9 @@ int ext4_ext_get_blocks(handle_t *handle
unsigned short ee_len;
/*
- * Allow future support for preallocated extents to be added
- * as an RO_COMPAT feature:
* Uninitialized extents are treated as holes, except that
- * we avoid (fail) allocating new blocks during a write.
+ * we split out initialized portions during a write.
*/
- if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN)
- goto out2;
ee_len = ext4_ext_get_actual_len(ex);
/* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
@@ -2078,12 +2236,27 @@ int ext4_ext_get_blocks(handle_t *handle
allocated = ee_len - (iblock - ee_block);
ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
ee_block, ee_len, newblock);
+
/* Do not put uninitialized extent in the cache */
- if (!ext4_ext_is_uninitialized(ex))
+ if (!ext4_ext_is_uninitialized(ex)) {
ext4_ext_put_in_cache(inode, ee_block,
ee_len, ee_start,
EXT4_EXT_CACHE_EXTENT);
- goto out;
+ goto out;
+ }
+ if (create == EXT4_CREATE_UNINITIALIZED_EXT)
+ goto out;
+ if (!create)
+ goto out2;
+
+ ret = ext4_ext_convert_to_initialized(handle, inode,
+ path, iblock,
+ max_blocks);
+ if (ret <= 0)
+ goto out2;
+ else
+ allocated = ret;
+ goto outnew;
}
}
@@ -2135,6 +2308,7 @@ int ext4_ext_get_blocks(handle_t *handle
/* previous routine could use block we allocated */
newblock = ext_pblock(&newex);
+outnew:
__set_bit(BH_New, &bh_result->b_state);
/* Cache only when it is _not_ an uninitialized extent */
Index: linux-2.6.21/include/linux/ext4_fs_extents.h
===================================================================
--- linux-2.6.21.orig/include/linux/ext4_fs_extents.h
+++ linux-2.6.21/include/linux/ext4_fs_extents.h
@@ -203,6 +203,7 @@ ext4_ext_invalidate_cache(struct inode *
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_try_to_merge(struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/