[RFC PATCH 3/5] f2fs: implement inline tail write & truncate

From: Wu Bo
Date: Tue Jul 09 2024 - 21:19:53 EST


As f2fs inline data is a special case of inline tail, we can expand
inline tail based on inline data.

If file tail can inline to inode block, not bother to pre-allocate block
at write begin. When tail page writing back, inline it or normally write
it if the tail is oversize.
And when inode page writing back, update inline data from tail page.

Signed-off-by: Wu Bo <bo.wu@xxxxxxxx>
---
fs/f2fs/data.c | 30 ++++++++++++++++++++++++++++++
fs/f2fs/f2fs.h | 16 ++++++++++++++++
fs/f2fs/file.c | 10 ++++++++++
fs/f2fs/inline.c | 38 +++++++++++++++++++++++++++++++-------
fs/f2fs/node.c | 6 +++++-
5 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b9b0debc6b3d..2481da74c13d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2769,6 +2769,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
unsigned offset = 0;
bool need_balance_fs = false;
+ bool need_clear_tail = false;
bool quota_inode = IS_NOQUOTA(inode);
int err = 0;
struct f2fs_io_info fio = {
@@ -2860,6 +2861,16 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
goto out;
}

+ if (f2fs_has_inline_tail(inode) && page->index == end_index) {
+ if (support_tail_inline(inode, i_size)) {
+ err = f2fs_write_inline_data(inode, page);
+ if (!err)
+ goto out;
+ } else {
+ need_clear_tail = true;
+ }
+ }
+
if (err == -EAGAIN) {
err = f2fs_do_write_data_page(&fio);
if (err == -EAGAIN) {
@@ -2882,6 +2893,11 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
if (err && err != -ENOENT)
goto redirty_out;

+ if (need_clear_tail) {
+ err = f2fs_clear_inline_tail(inode, false);
+ if (err)
+ goto redirty_out;
+ }
out:
inode_dec_dirty_pages(inode);
if (err) {
@@ -3386,6 +3402,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
flag = F2FS_GET_BLOCK_DEFAULT;
f2fs_map_lock(sbi, flag);
locked = true;
+ } else if (f2fs_has_inline_tail(inode)) {
+ if (!support_tail_inline(inode, pos + len)) {
+ f2fs_map_lock(sbi, flag);
+ locked = true;
+ }
} else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
f2fs_map_lock(sbi, flag);
locked = true;
@@ -3414,6 +3435,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
goto out;
}

+ if (f2fs_has_inline_tail(inode)) {
+ if (support_tail_inline(inode, pos + len)) {
+ f2fs_do_read_inline_data(page_folio(page), ipage);
+ if (inode->i_nlink)
+ set_page_private_inline(ipage);
+ goto out;
+ }
+ }
+
if (!f2fs_lookup_read_extent_cache_block(inode, index,
&dn.data_blkaddr)) {
if (locked) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4595b6fa3714..ca43396df4df 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3323,6 +3323,21 @@ static inline int f2fs_has_inline_dentry(struct inode *inode)
return is_inode_flag_set(inode, FI_INLINE_DENTRY);
}

+static inline bool support_tail_inline(struct inode *inode, loff_t i_size)
+{
+ const pgoff_t tail_index = ((unsigned long long)i_size) >> PAGE_SHIFT;
+ unsigned long tail_len = i_size & (PAGE_SIZE - 1);
+
+ if (tail_index > COMPACT_ADDRS_PER_INODE)
+ return false;
+ if (!tail_len)
+ return false;
+ if (tail_len > MAX_INLINE_DATA(inode))
+ return false;
+
+ return true;
+}
+
static inline int is_file(struct inode *inode, int type)
{
return F2FS_I(inode)->i_advise & type;
@@ -4183,6 +4198,7 @@ void f2fs_truncate_inline_inode(struct inode *inode,
int f2fs_read_inline_data(struct inode *inode, struct folio *folio);
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
int f2fs_convert_inline_inode(struct inode *inode);
+int f2fs_clear_inline_tail(struct inode *inode, bool force);
int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry);
int f2fs_write_inline_data(struct inode *inode, struct page *page);
int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5c0b281a70f3..19d65ef0fd00 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -708,6 +708,9 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
goto out;
}

+ if (f2fs_has_inline_tail(inode) && f2fs_exist_data(inode))
+ f2fs_truncate_inline_inode(inode, ipage, 0);
+
set_new_dnode(&dn, inode, ipage, NULL, 0);
err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
if (err) {
@@ -4648,6 +4651,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
return ret;
}

+ if (f2fs_has_inline_tail(inode) &&
+ (pos + count > MAX_INLINE_TAIL(inode))) {
+ ret = f2fs_clear_inline_tail(inode, true);
+ if (ret)
+ return ret;
+ }
+
/* Do not preallocate blocks that will be written partially in 4KB. */
map.m_lblk = F2FS_BLK_ALIGN(pos);
map.m_len = F2FS_BYTES_TO_BLK(pos + count);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 7638d0d7b7ee..8c1636c1ff2f 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -84,6 +84,7 @@ void f2fs_truncate_inline_inode(struct inode *inode,
{
void *addr;

+ from = from & (PAGE_SIZE - 1);
if (from >= MAX_INLINE_DATA(inode))
return;

@@ -138,11 +139,12 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
};
struct node_info ni;
int dirty, err;
+ pgoff_t index = page->index;

if (!f2fs_exist_data(dn->inode))
goto clear_out;

- err = f2fs_reserve_block(dn, 0);
+ err = f2fs_reserve_block(dn, index);
if (err)
return err;

@@ -158,8 +160,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(dn);
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
- f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
- __func__, dn->inode->i_ino, dn->data_blkaddr);
+ f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[%lu]:0x%x, run fsck to fix.",
+ __func__, dn->inode->i_ino, index, dn->data_blkaddr);
f2fs_handle_error(fio.sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
}
@@ -192,10 +194,33 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
clear_out:
stat_dec_inline_inode(dn->inode);
clear_inode_flag(dn->inode, FI_INLINE_DATA);
+ if (index >= COMPACT_ADDRS_PER_INODE)
+ clear_inode_flag(dn->inode, FI_INLINE_TAIL);
f2fs_put_dnode(dn);
return 0;
}

+int f2fs_clear_inline_tail(struct inode *inode, bool force)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct page *ipage;
+ loff_t i_size = i_size_read(inode);
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
+
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
+
+ f2fs_truncate_inline_inode(inode, ipage, 0);
+ clear_page_private_inline(ipage);
+
+ if (force || end_index >= COMPACT_ADDRS_PER_INODE)
+ clear_inode_flag(inode, FI_INLINE_TAIL);
+
+ f2fs_put_page(ipage, 1);
+ return 0;
+}
+
int f2fs_convert_inline_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -243,20 +268,19 @@ int f2fs_convert_inline_inode(struct inode *inode)
int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
struct dnode_of_data dn;
+ pgoff_t index = page->index;
int err;

set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err)
return err;

- if (!f2fs_has_inline_data(inode)) {
+ if (!f2fs_has_inline_data(inode) && !f2fs_has_inline_tail(inode)) {
f2fs_put_dnode(&dn);
return -EAGAIN;
}

- f2fs_bug_on(F2FS_I_SB(inode), page->index);
-
f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
memcpy_from_page(inline_data_addr(inode, dn.inode_page),
page, 0, MAX_INLINE_DATA(inode));
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b72ef96f7e33..5c45de7f60a1 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1514,6 +1514,8 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
struct page *page;
+ loff_t i_size;
+ pgoff_t end_index;
int ret;

/* should flush inline_data before evict_inode */
@@ -1521,7 +1523,9 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
if (!inode)
return;

- page = f2fs_pagecache_get_page(inode->i_mapping, 0,
+ i_size = i_size_read(inode);
+ end_index = i_size >> PAGE_SHIFT;
+ page = f2fs_pagecache_get_page(inode->i_mapping, end_index,
FGP_LOCK|FGP_NOWAIT, 0);
if (!page)
goto iput_out;
--
2.35.3