[RFC PATCH 4/4] ext4: fast_commit: replay DAX ByteLog records

From: Li Chen

Date: Thu Feb 26 2026 - 05:22:47 EST


Add replay support for EXT4_FC_TAG_DAX_BYTELOG_ANCHOR.
The anchor TLV describes a ByteLog window in the DAX-mapped fast commit
area, which is validated and then replayed using existing TLV handlers.

Signed-off-by: Li Chen <me@linux.beauty>
---
fs/ext4/fast_commit.c | 246 ++++++++++++++++++++++++++++++++++++++++++
fs/ext4/fast_commit.h | 9 ++
2 files changed, 255 insertions(+)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 2f7b7ea29df2..6370505ecc86 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -12,6 +12,7 @@
#include "ext4_extents.h"
#include "mballoc.h"

+#include <linux/crc32c.h>
#include <linux/lockdep.h>
/*
* Ext4 Fast Commits
@@ -2172,10 +2173,228 @@ static bool ext4_fc_value_len_isvalid(struct ext4_sb_info *sbi,
return len >= sizeof(struct ext4_fc_tail);
case EXT4_FC_TAG_HEAD:
return len == sizeof(struct ext4_fc_head);
+ case EXT4_FC_TAG_DAX_BYTELOG_ANCHOR:
+ return len == sizeof(struct ext4_fc_bytelog_entry);
}
return false;
}

+static void ext4_fc_reset_bytelog_state(struct ext4_fc_bytelog_state *state)
+{
+ state->cursor = 0;
+ state->next_seq = 0;
+ state->ring_crc = ~0U;
+ state->initialized = false;
+}
+
+typedef int (*ext4_fc_bytelog_cb_t)(struct super_block *sb,
+ struct ext4_fc_tl_mem *tl,
+ u8 *val, void *data);
+
+static int ext4_fc_bytelog_iterate(struct super_block *sb,
+ struct ext4_fc_bytelog_state *iter,
+ const struct ext4_fc_bytelog_anchor *anchor,
+ ext4_fc_bytelog_cb_t fn, void *data)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_fc_bytelog *log = &sbi->s_fc_bytelog;
+ u8 *base = log->kaddr;
+ u64 cursor, end;
+ int ret;
+
+ if (!log->mapped || !base)
+ return -EOPNOTSUPP;
+ if (anchor->head > log->size_bytes)
+ return -EFSCORRUPTED;
+
+ iter->cursor = anchor->tail;
+ iter->next_seq = 0;
+ iter->ring_crc = ~0U;
+ iter->initialized = true;
+ cursor = iter->cursor;
+ end = anchor->head;
+
+ if (cursor < log->base_off)
+ return -EFSCORRUPTED;
+ if (cursor > end || cursor > log->size_bytes)
+ return -EFSCORRUPTED;
+
+ while (cursor < end) {
+ struct ext4_fc_bytelog_hdr *hdr;
+ size_t remaining;
+ u32 payload_len, record_len;
+ u16 record_tag;
+ u8 *payload;
+ struct ext4_fc_tl_mem tl;
+
+ if (end - cursor > SIZE_MAX)
+ return -E2BIG;
+ remaining = end - cursor;
+ if (cursor > log->size_bytes - sizeof(*hdr))
+ return -EFSCORRUPTED;
+
+ hdr = (struct ext4_fc_bytelog_hdr *)(base + cursor);
+ payload = (u8 *)hdr + sizeof(*hdr);
+ ret = ext4_fc_bytelog_validate_hdr(hdr, remaining, payload);
+ if (ret)
+ return ret;
+ if (!ext4_fc_bytelog_record_committed(hdr))
+ return -EUCLEAN;
+ if (ext4_fc_bytelog_seq(hdr) != iter->next_seq)
+ return -EUCLEAN;
+
+ payload_len = ext4_fc_bytelog_payload_len(hdr);
+ if (payload_len < EXT4_FC_TAG_BASE_LEN)
+ return -EFSCORRUPTED;
+
+ record_tag = le16_to_cpu(hdr->tag);
+ if (record_tag == EXT4_FC_BYTELOG_TAG_BATCH) {
+ u32 pos = 0;
+
+ while (pos < payload_len) {
+ u32 value_len;
+
+ if (payload_len - pos < EXT4_FC_TAG_BASE_LEN)
+ return -EFSCORRUPTED;
+
+ ext4_fc_get_tl(&tl, payload + pos);
+ value_len = tl.fc_len;
+ if (value_len >
+ payload_len - pos - EXT4_FC_TAG_BASE_LEN)
+ return -EFSCORRUPTED;
+ if (!ext4_fc_value_len_isvalid(sbi, tl.fc_tag,
+ tl.fc_len))
+ return -EFSCORRUPTED;
+ if (fn) {
+ ret = fn(sb, &tl,
+ payload + pos +
+ EXT4_FC_TAG_BASE_LEN,
+ data);
+ if (ret)
+ return ret;
+ }
+ pos += EXT4_FC_TAG_BASE_LEN + value_len;
+ }
+ } else {
+ u32 value_len;
+
+ ext4_fc_get_tl(&tl, payload);
+ value_len = payload_len - EXT4_FC_TAG_BASE_LEN;
+ if (tl.fc_len != value_len)
+ return -EFSCORRUPTED;
+ if (record_tag != tl.fc_tag)
+ return -EFSCORRUPTED;
+ if (!ext4_fc_value_len_isvalid(sbi, tl.fc_tag, tl.fc_len))
+ return -EFSCORRUPTED;
+ if (fn) {
+ ret = fn(sb, &tl,
+ payload + EXT4_FC_TAG_BASE_LEN,
+ data);
+ if (ret)
+ return ret;
+ }
+ }
+
+ iter->ring_crc = crc32c(iter->ring_crc, payload, payload_len);
+ record_len = ext4_fc_bytelog_record_len(hdr);
+ cursor += record_len;
+ iter->next_seq++;
+ }
+
+ if (cursor != end)
+ return -EFSCORRUPTED;
+ iter->cursor = cursor;
+ if (iter->next_seq != anchor->seq)
+ return -EUCLEAN;
+ if (iter->ring_crc != anchor->crc)
+ return -EFSBADCRC;
+ return 0;
+}
+
+static int ext4_fc_bytelog_scan_cb(struct super_block *sb,
+ struct ext4_fc_tl_mem *tl, u8 *val,
+ void *data)
+{
+ struct ext4_fc_add_range ext;
+ struct ext4_extent *ex;
+
+ (void)data;
+ switch (tl->fc_tag) {
+ case EXT4_FC_TAG_ADD_RANGE:
+ memcpy(&ext, val, sizeof(ext));
+ ex = (struct ext4_extent *)&ext.fc_ex;
+ return ext4_fc_record_regions(sb, le32_to_cpu(ext.fc_ino),
+ le32_to_cpu(ex->ee_block),
+ ext4_ext_pblock(ex),
+ ext4_ext_get_actual_len(ex), 0);
+ case EXT4_FC_TAG_DEL_RANGE:
+ case EXT4_FC_TAG_LINK:
+ case EXT4_FC_TAG_UNLINK:
+ case EXT4_FC_TAG_CREAT:
+ case EXT4_FC_TAG_INODE:
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int ext4_fc_bytelog_replay_cb(struct super_block *sb,
+ struct ext4_fc_tl_mem *tl, u8 *val,
+ void *data)
+{
+ (void)data;
+ switch (tl->fc_tag) {
+ case EXT4_FC_TAG_LINK:
+ return ext4_fc_replay_link(sb, tl, val);
+ case EXT4_FC_TAG_UNLINK:
+ return ext4_fc_replay_unlink(sb, tl, val);
+ case EXT4_FC_TAG_ADD_RANGE:
+ return ext4_fc_replay_add_range(sb, tl, val);
+ case EXT4_FC_TAG_CREAT:
+ return ext4_fc_replay_create(sb, tl, val);
+ case EXT4_FC_TAG_DEL_RANGE:
+ return ext4_fc_replay_del_range(sb, tl, val);
+ case EXT4_FC_TAG_INODE:
+ return ext4_fc_replay_inode(sb, tl, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int ext4_fc_replay_scan_bytelog(struct super_block *sb,
+ struct ext4_fc_replay_state *state,
+ const struct ext4_fc_bytelog_anchor *anchor)
+{
+ int ret;
+
+ ret = ext4_fc_bytelog_iterate(sb, &state->fc_bytelog_scan, anchor,
+ ext4_fc_bytelog_scan_cb, state);
+ if (ret)
+ return ret;
+ return JBD2_FC_REPLAY_CONTINUE;
+}
+
+static int ext4_fc_replay_apply_bytelog(struct super_block *sb,
+ struct ext4_fc_replay_state *state,
+ const struct ext4_fc_bytelog_anchor *anchor)
+{
+ return ext4_fc_bytelog_iterate(sb, &state->fc_bytelog_replay, anchor,
+ ext4_fc_bytelog_replay_cb, NULL);
+}
+
+static int ext4_fc_replay_bytelog_anchor(struct super_block *sb,
+ struct ext4_fc_replay_state *state,
+ struct ext4_fc_tl_mem *tl, u8 *val)
+{
+ struct ext4_fc_bytelog_entry entry;
+ struct ext4_fc_bytelog_anchor anchor;
+
+ (void)tl;
+ memcpy(&entry, val, sizeof(entry));
+ ext4_fc_bytelog_anchor_from_disk(&anchor, &entry);
+ return ext4_fc_replay_apply_bytelog(sb, state, &anchor);
+}
+
/*
* Recovery Scan phase handler
*
@@ -2206,6 +2425,8 @@ static int ext4_fc_replay_scan(journal_t *journal,
struct ext4_fc_tail tail;
__u8 *start, *end, *cur, *val;
struct ext4_fc_head head;
+ struct ext4_fc_bytelog_entry entry;
+ struct ext4_fc_bytelog_anchor anchor;
struct ext4_extent *ex;

state = &sbi->s_fc_replay_state;
@@ -2220,6 +2441,8 @@ static int ext4_fc_replay_scan(journal_t *journal,
state->fc_regions = NULL;
state->fc_regions_valid = state->fc_regions_used =
state->fc_regions_size = 0;
+ ext4_fc_reset_bytelog_state(&state->fc_bytelog_scan);
+ ext4_fc_reset_bytelog_state(&state->fc_bytelog_replay);
/* Check if we can stop early */
if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
!= EXT4_FC_TAG_HEAD)
@@ -2278,6 +2501,9 @@ static int ext4_fc_replay_scan(journal_t *journal,
state->fc_replay_num_tags = state->fc_cur_tag;
state->fc_regions_valid =
state->fc_regions_used;
+ if (ext4_fc_bytelog_active(sbi) ||
+ state->fc_bytelog_scan.initialized)
+ ret = JBD2_FC_REPLAY_STOP;
} else {
ret = state->fc_replay_num_tags ?
JBD2_FC_REPLAY_STOP : -EFSBADCRC;
@@ -2299,6 +2525,15 @@ static int ext4_fc_replay_scan(journal_t *journal,
state->fc_crc = ext4_chksum(state->fc_crc, cur,
EXT4_FC_TAG_BASE_LEN + tl.fc_len);
break;
+ case EXT4_FC_TAG_DAX_BYTELOG_ANCHOR:
+ state->fc_cur_tag++;
+ state->fc_crc = ext4_chksum(state->fc_crc, cur,
+ EXT4_FC_TAG_BASE_LEN +
+ tl.fc_len);
+ memcpy(&entry, val, sizeof(entry));
+ ext4_fc_bytelog_anchor_from_disk(&anchor, &entry);
+ ret = ext4_fc_replay_scan_bytelog(sb, state, &anchor);
+ break;
default:
ret = state->fc_replay_num_tags ?
JBD2_FC_REPLAY_STOP : -ECANCELED;
@@ -2335,6 +2570,8 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
if (state->fc_current_pass != pass) {
state->fc_current_pass = pass;
sbi->s_mount_state |= EXT4_FC_REPLAY;
+ if (pass == PASS_REPLAY)
+ ext4_fc_reset_bytelog_state(&state->fc_bytelog_replay);
}
if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
ext4_debug("Replay stops\n");
@@ -2393,9 +2630,18 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
0, tl.fc_len, 0);
memcpy(&tail, val, sizeof(tail));
WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
+ if ((ext4_fc_bytelog_active(sbi) ||
+ state->fc_bytelog_scan.initialized) &&
+ state->fc_replay_num_tags == 0) {
+ ext4_fc_set_bitmaps_and_counters(sb);
+ return JBD2_FC_REPLAY_STOP;
+ }
break;
case EXT4_FC_TAG_HEAD:
break;
+ case EXT4_FC_TAG_DAX_BYTELOG_ANCHOR:
+ ret = ext4_fc_replay_bytelog_anchor(sb, state, &tl, val);
+ break;
default:
trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0);
ret = -ECANCELED;
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index fb51e19b9778..224d718150c4 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -153,6 +153,13 @@ struct ext4_fc_alloc_region {
int ino, len;
};

+struct ext4_fc_bytelog_state {
+ u64 cursor;
+ u64 next_seq;
+ u32 ring_crc;
+ bool initialized;
+};
+
/*
* Fast commit replay state.
*/
@@ -166,6 +173,8 @@ struct ext4_fc_replay_state {
int fc_regions_size, fc_regions_used, fc_regions_valid;
int *fc_modified_inodes;
int fc_modified_inodes_used, fc_modified_inodes_size;
+ struct ext4_fc_bytelog_state fc_bytelog_scan;
+ struct ext4_fc_bytelog_state fc_bytelog_replay;
};

#define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1)
--
2.52.0