[PATCH 2/2] ext4: allocate the fast-commit range array lazily

From: Daejun Park

Date: Thu Jun 11 2026 - 00:49:36 EST


The multi-interval tracker added a fixed array of EXT4_FC_MAX_RANGES + 1
entries to every ext4_inode_info -- ~136 bytes that is wasted on inodes
that never use fast commit (read-only files, directories, ...).

Shrink it to the common case:

- Keep the first range inline in i_fc_range, so a single contiguous
dirty region (the common case) needs no allocation at all.

- Allocate the i_fc_ranges array only when a second disjoint range
appears, and free it when the inode is evicted.

- The tracking path runs under i_fc_lock and so cannot sleep, so the
array is allocated with GFP_ATOMIC. On failure, fall back to
coalescing the new range into the inline i_fc_range -- exactly the
original single coalesced-range behaviour -- so no full-commit
fallback or fast-commit ineligibility is needed.

The per-inode fast-commit footprint drops from ~140 bytes (the embedded
array) to 20 bytes (inline range + array pointer + count); the array is
allocated only while two or more disjoint ranges are tracked.

No on-disk format change. Crash recovery (online replay + offline
e2fsck) and the fast-commit xfstests are unaffected.

While rewriting __track_range, also skip degenerate ranges (a sub-block
punch hole rounds the start up past the end, passing end == start - 1, so
no whole block changed) instead of storing an empty range, and drop the
redundant per-transaction reset here -- ext4_fc_track_template() already
resets the range set under i_fc_lock before calling the tracker.

Signed-off-by: Daejun Park <pdaejun@xxxxxxxxx>
---
fs/ext4/ext4.h | 19 ++++++++----
fs/ext4/fast_commit.c | 70 +++++++++++++++++++++++++++++++++++++++----
fs/ext4/super.c | 1 +
3 files changed, 80 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 314a1c90075b..6c6ac19e86b6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1081,14 +1081,23 @@ struct ext4_inode_info {
*/

/*
- * Disjoint lblk ranges modified in this fast commit. Tracking the
+ * Logical block ranges modified in this fast commit. Tracking the
* actual modified ranges (instead of one coalesced [min,max]) avoids
* re-logging the whole spanned extent map for scattered allocations.
- * Sorted by start, mutually disjoint. Bounded by EXT4_FC_MAX_RANGES;
- * the extra slot is transient room used while inserting before an
- * overflow merge. Protected by i_fc_lock.
+ *
+ * The first range is kept inline in i_fc_range, so the common case of a
+ * single contiguous dirty region needs no allocation. When a second
+ * disjoint range appears the inode is upgraded to the i_fc_ranges array
+ * (EXT4_FC_MAX_RANGES + 1 entries, sorted and mutually disjoint; the
+ * extra slot is transient room used while inserting before an overflow
+ * merge), allocated then and freed when the inode is evicted. If that
+ * allocation fails we fall back to coalescing into i_fc_range, i.e. the
+ * original single coalesced-range behaviour. i_fc_nr_ranges counts the
+ * valid ranges; while i_fc_ranges is NULL it is 0 or 1. Protected by
+ * i_fc_lock.
*/
- struct ext4_fc_lblk_range i_fc_ranges[EXT4_FC_MAX_RANGES + 1];
+ struct ext4_fc_lblk_range i_fc_range;
+ struct ext4_fc_lblk_range *i_fc_ranges;
unsigned int i_fc_nr_ranges;

spinlock_t i_raw_lock; /* protects updates to the raw inode */
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index ab9ab50ad0b5..786b79a9c573 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -211,6 +211,7 @@ void ext4_fc_init_inode(struct inode *inode)
struct ext4_inode_info *ei = EXT4_I(inode);

ext4_fc_reset_inode(inode);
+ ei->i_fc_ranges = NULL;
ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
INIT_LIST_HEAD(&ei->i_fc_list);
INIT_LIST_HEAD(&ei->i_fc_dilist);
@@ -671,17 +672,73 @@ static int __track_range(handle_t *handle, struct inode *inode, void *arg,
struct ext4_inode_info *ei = EXT4_I(inode);
struct __track_range_args *__arg =
(struct __track_range_args *)arg;
+ ext4_lblk_t start = __arg->start, end = __arg->end;
+ ext4_lblk_t s0, e0;

if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
ext4_debug("Special inode %ld being modified\n", inode->i_ino);
return -ECANCELED;
}

- /* A new transaction (update == false) starts a fresh range set. */
- if (!update)
- ei->i_fc_nr_ranges = 0;
+ /*
+ * A sub-block punch hole rounds up the start and down the end, passing
+ * end == start - 1: no whole block changed, so there is nothing to
+ * track. (ext4_fc_track_template has already reset the range set for a
+ * new transaction, so we need not do it here.)
+ */
+ if (end < start)
+ return 0;
+
+ /* Already upgraded to the heap array: full multi-interval tracking. */
+ if (ei->i_fc_ranges) {
+ ext4_fc_range_add(ei, start, end);
+ return 0;
+ }
+
+ /* First range of this commit stays inline, no allocation needed. */
+ if (ei->i_fc_nr_ranges == 0) {
+ ei->i_fc_range.start = start;
+ ei->i_fc_range.len = end - start + 1;
+ ei->i_fc_nr_ranges = 1;
+ return 0;
+ }
+
+ /* One inline range so far. */
+ s0 = ei->i_fc_range.start;
+ e0 = s0 + ei->i_fc_range.len - 1;

- ext4_fc_range_add(ei, __arg->start, __arg->end);
+ /* Disjoint from it: try to upgrade to the array for exact tracking. */
+ if (start > e0 + 1 || end + 1 < s0) {
+ struct ext4_fc_lblk_range *heap;
+
+ /*
+ * GFP_ATOMIC: we hold i_fc_lock. __GFP_NOWARN: failure is not
+ * fatal -- we fall back to the single coalesced range below --
+ * so it must not splat under memory pressure.
+ */
+ heap = kmalloc_array(EXT4_FC_MAX_RANGES + 1, sizeof(*heap),
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (heap) {
+ heap[0] = ei->i_fc_range;
+ ei->i_fc_ranges = heap;
+ ext4_fc_range_add(ei, start, end);
+ return 0;
+ }
+ /*
+ * Out of memory: fall back to the original single coalesced
+ * range by absorbing the gap below. This over-logs the spanned
+ * extents but stays a valid fast commit (no full-commit
+ * fallback), so there is nothing to mark ineligible.
+ */
+ }
+
+ /* Overlapping/adjacent, or array allocation failed: coalesce inline. */
+ if (start < s0)
+ s0 = start;
+ if (end > e0)
+ e0 = end;
+ ei->i_fc_range.start = s0;
+ ei->i_fc_range.len = e0 - s0 + 1;

return 0;
}
@@ -1016,7 +1073,10 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
spin_unlock(&ei->i_fc_lock);
return 0;
}
- memcpy(ranges, ei->i_fc_ranges, nr * sizeof(ranges[0]));
+ if (ei->i_fc_ranges)
+ memcpy(ranges, ei->i_fc_ranges, nr * sizeof(ranges[0]));
+ else
+ ranges[0] = ei->i_fc_range; /* inline single-range mode */
ei->i_fc_nr_ranges = 0;
spin_unlock(&ei->i_fc_lock);

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 699c15db28a8..93d495cad0ba 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1433,6 +1433,7 @@ static void ext4_free_in_core_inode(struct inode *inode)
pr_warn("%s: inode %ld still in fc list",
__func__, inode->i_ino);
}
+ kfree(EXT4_I(inode)->i_fc_ranges);
kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
}

--
2.43.0