Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

From: Chao Yu
Date: Tue Apr 16 2024 - 21:05:23 EST


On 2024/4/17 0:51, Jaegeuk Kim wrote:
On 04/16, Chao Yu wrote:
On 2024/4/15 22:01, Chao Yu wrote:
On 2024/4/15 11:26, Chao Yu wrote:
On 2024/4/14 23:19, Jaegeuk Kim wrote:
It seems this caused kernel hang. Chao, have you tested this patch enough?

Jaegeuk,

Oh, I've checked this patch w/ fsstress before submitting it, but missed
the SPO testcase... do you encounter kernel hang w/ SPO testcase?

I did see any hang issue w/ por_fsstress testcase, which testcase do you use?

Sorry, I mean I haven't reproduced it yet...

I'd prefer to check this patch later. Have you tested on Zoned device with
nullblk?

Yes, I enabled blkzoned feature w/ nullblk device, and set
/sys/kernel/config/nullb/nullb0/zone_max_open to six, so that it can
emulate ZUFS' configuration.

Thanks,



Thanks,


Thanks,


Anyway, let me test it more.

Thanks,


On 04/13, Chao Yu wrote:
On 2024/4/13 5:11, Jaegeuk Kim wrote:
On 04/07, Chao Yu wrote:
max open zone may be larger than log header number of f2fs, for
such case, it doesn't need to wait last IO in previous zone, let's
introduce available_open_zone semaphore, and reduce it once we
submit first write IO in a zone, and increase it after completion
of last IO in the zone.

Cc: Daeho Jeong <daeho43@xxxxxxxxx>
Signed-off-by: Chao Yu <chao@xxxxxxxxxx>
---
v3:
- avoid race condition in between __submit_merged_bio()
and __allocate_new_segment().
   fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
   fs/f2fs/f2fs.h    |  34 ++++++++++++---
   fs/f2fs/iostat.c  |   7 ++++
   fs/f2fs/iostat.h  |   2 +
   fs/f2fs/segment.c |  43 ++++++++++++++++---
   fs/f2fs/segment.h |  12 +++++-
   fs/f2fs/super.c   |   2 +
   7 files changed, 156 insertions(+), 49 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0d88649c60a5..18a4ac0a06bc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
   #ifdef CONFIG_BLK_DEV_ZONED
   static void f2fs_zone_write_end_io(struct bio *bio)
   {
-    struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
+    struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
-    bio->bi_private = io->bi_private;
-    complete(&io->zone_wait);
       f2fs_write_end_io(bio);
+    up(&sbi->available_open_zones);
   }
   #endif
@@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
       if (!io->bio)
           return;
+#ifdef CONFIG_BLK_DEV_ZONED
+    if (io->open_zone) {
+        /*
+         * if there is no open zone, it will wait for last IO in
+         * previous zone before submitting new IO.
+         */
+        down(&fio->sbi->available_open_zones);
+        io->open_zone = false;
+        io->zone_openned = true;
+    }
+
+    if (io->close_zone) {
+        io->bio->bi_end_io = f2fs_zone_write_end_io;
+        io->zone_openned = false;
+        io->close_zone = false;
+    }
+#endif
+
       if (is_read_io(fio->op)) {
           trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
           f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
@@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
               INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
               init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
   #ifdef CONFIG_BLK_DEV_ZONED
-            init_completion(&sbi->write_io[i][j].zone_wait);
-            sbi->write_io[i][j].zone_pending_bio = NULL;
-            sbi->write_io[i][j].bi_private = NULL;
+            sbi->write_io[i][j].open_zone = false;
+            sbi->write_io[i][j].zone_openned = false;
+            sbi->write_io[i][j].close_zone = false;
   #endif
           }
       }
@@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
       f2fs_up_write(&io->io_rwsem);
   }
+void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+    struct f2fs_bio_info *io;
+
+    if (!f2fs_sb_has_blkzoned(sbi))
+        return;
+
+    io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
+
+    f2fs_down_write(&io->io_rwsem);
+    if (io->zone_openned) {
+        if (io->bio) {
+            io->close_zone = true;
+            __submit_merged_bio(io);
+        } else if (io->zone_openned) {
+            up(&sbi->available_open_zones);
+            io->zone_openned = false;
+        }
+    }
+    f2fs_up_write(&io->io_rwsem);
+#endif
+
+}
+
   static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
                   struct inode *inode, struct page *page,
                   nid_t ino, enum page_type type, bool force)
@@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
   }
   #ifdef CONFIG_BLK_DEV_ZONED
-static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
+static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
+                    block_t blkaddr, bool start)
   {
-    int devi = 0;
+    if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
+        return false;
+
+    if (start)
+        return (blkaddr % sbi->blocks_per_blkz) == 0;
+    return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
-    if (f2fs_is_multi_device(sbi)) {
-        devi = f2fs_target_device_index(sbi, blkaddr);
-        if (blkaddr < FDEV(devi).start_blk ||
-            blkaddr > FDEV(devi).end_blk) {
-            f2fs_err(sbi, "Invalid block %x", blkaddr);
-            return false;
-        }
-        blkaddr -= FDEV(devi).start_blk;
-    }
-    return bdev_is_zoned(FDEV(devi).bdev) &&
-        f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
-        (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
   }
   #endif
@@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
       struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
       struct page *bio_page;
       enum count_type type;
+#ifdef CONFIG_BLK_DEV_ZONED
+    bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
+#endif
       f2fs_bug_on(sbi, is_read_io(fio->op));
       f2fs_down_write(&io->io_rwsem);
   next:
-#ifdef CONFIG_BLK_DEV_ZONED
-    if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
-        wait_for_completion_io(&io->zone_wait);
-        bio_put(io->zone_pending_bio);
-        io->zone_pending_bio = NULL;
-        io->bi_private = NULL;
-    }
-#endif
-
       if (fio->in_list) {
           spin_lock(&io->io_lock);
           if (list_empty(&io->io_list)) {
@@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
       type = WB_DATA_TYPE(bio_page, fio->compressed_page);
       inc_page_count(sbi, type);
+#ifdef CONFIG_BLK_DEV_ZONED
+    if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
+        io->open_zone = true;
+#endif
+
       if (io->bio &&
           (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
                     fio->new_blkaddr) ||
@@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
       io->last_block_in_bio = fio->new_blkaddr;
       trace_f2fs_submit_page_write(fio->page, fio);
+
   #ifdef CONFIG_BLK_DEV_ZONED
-    if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
-            is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
-        bio_get(io->bio);
-        reinit_completion(&io->zone_wait);
-        io->bi_private = io->bio->bi_private;
-        io->bio->bi_private = io;
-        io->bio->bi_end_io = f2fs_zone_write_end_io;
-        io->zone_pending_bio = io->bio;
+    if (blkzoned &&
+        is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
+        io->close_zone = true;
           __submit_merged_bio(io);
       }
   #endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 694f8a52cb84..8a26530cf4fb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
       struct bio *bio;        /* bios to merge */
       sector_t last_block_in_bio;    /* last block number */
       struct f2fs_io_info fio;    /* store buffered io info. */
-#ifdef CONFIG_BLK_DEV_ZONED
-    struct completion zone_wait;    /* condition value for the previous open zone to close */
-    struct bio *zone_pending_bio;    /* pending bio for the previous zone */
-    void *bi_private;        /* previous bi_private for pending bio */
-#endif
       struct f2fs_rwsem io_rwsem;    /* blocking op for bio */
       spinlock_t io_lock;        /* serialize DATA/NODE IOs */
       struct list_head io_list;    /* track fios */
       struct list_head bio_list;    /* bio entry list head */
       struct f2fs_rwsem bio_list_lock;    /* lock to protect bio entry list */
+#ifdef CONFIG_BLK_DEV_ZONED
+    bool open_zone;            /* open a zone */
+    bool zone_openned;        /* zone has been openned */
+    bool close_zone;        /* close a zone */
+#endif
   };
   #define FDEV(i)                (sbi->devs[i])
@@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
   #ifdef CONFIG_BLK_DEV_ZONED
       unsigned int blocks_per_blkz;        /* F2FS blocks per zone */
       unsigned int max_open_zones;        /* max open zone resources of the zoned device */
+    struct semaphore available_open_zones;    /* available open zones */
   #endif
       /* for node-related operations */
@@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
   void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
                 enum page_type type);
   int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
+void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
   void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
   void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
                   struct inode *inode, struct page *page,
@@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
       return test_bit(zno, FDEV(devi).blkz_seq);
   }
+
+static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
+                            block_t blkaddr)
+{
+    int devi = 0;
+
+    if (f2fs_is_multi_device(sbi)) {
+        devi = f2fs_target_device_index(sbi, blkaddr);
+        if (blkaddr < FDEV(devi).start_blk ||
+            blkaddr > FDEV(devi).end_blk) {
+            f2fs_err(sbi, "Invalid block %x", blkaddr);
+            return false;
+        }
+        blkaddr -= FDEV(devi).start_blk;
+    }
+
+    if (!bdev_is_zoned(FDEV(devi).bdev) ||
+        !f2fs_blkz_is_seq(sbi, devi, blkaddr))
+        return false;
+
+    return true;

Applied as below.

@@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
                  blkaddr -= FDEV(devi).start_blk;
          }

-       if (!bdev_is_zoned(FDEV(devi).bdev) ||
-               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
-               return false;
-
-       return true;
+       return bdev_is_zoned(FDEV(devi).bdev) &&
+               f2fs_blkz_is_seq(sbi, devi, blkaddr);

Looks good, thank you for cleanup.

Thanks,

   }
   #endif

+}
   #endif
   static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index f8703038e1d8..a8626e297876 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
       spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
   }
+void *iostat_get_bio_private(struct bio *bio)
+{
+    struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+    return iostat_ctx->sbi;
+}
+
   void iostat_update_and_unbind_ctx(struct bio *bio)
   {
       struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
index eb99d05cf272..9006c3d41590 100644
--- a/fs/f2fs/iostat.h
+++ b/fs/f2fs/iostat.h
@@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
       return iostat_ctx->post_read_ctx;
   }
+extern void *iostat_get_bio_private(struct bio *bio);
   extern void iostat_update_and_unbind_ctx(struct bio *bio);
   extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
           struct bio *bio, struct bio_post_read_ctx *ctx);
@@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
   #else
   static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
           enum iostat_type type, unsigned long long io_bytes) {}
+static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
   static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
   static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
           struct bio *bio, struct bio_post_read_ctx *ctx) {}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4fd76e867e0a..4a3cf2888faf 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
           return err;
       stat_inc_seg_type(sbi, curseg);
       locate_dirty_segment(sbi, old_segno);
+
+    f2fs_blkzoned_submit_merged_write(sbi, type);
+
       return 0;
   }
@@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
           f2fs_bug_on(fio->sbi, true);
       }
-    if (IS_HOT(type))
-        fio->temp = HOT;
-    else if (IS_WARM(type))
-        fio->temp = WARM;
-    else
-        fio->temp = COLD;
+    fio->temp = type_to_temp(type);
       return type;
   }
@@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
           return -EINVAL;
       }
+#ifdef CONFIG_BLK_DEV_ZONED
+    if (f2fs_sb_has_blkzoned(sbi)) {
+        for (type = 0; type < NR_PERSISTENT_LOG; type++) {
+            struct curseg_info *curseg = CURSEG_I(sbi, type);
+            enum page_type ptype;
+            enum temp_type temp;
+
+            if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
+                continue;
+
+            if (!f2fs_blkaddr_in_seqzone(sbi,
+                    START_BLOCK(sbi, curseg->segno)))
+                continue;
+
+            ptype = PAGE_TYPE(type);
+            temp = type_to_temp(type);
+            down(&sbi->available_open_zones);
+            sbi->write_io[ptype][temp].zone_openned = true;
+        }
+    }
+#endif
       return 0;
   }
@@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
       for (i = 0; i < NR_CURSEG_TYPE; i++) {
           kfree(array[i].sum_blk);
           kfree(array[i].journal);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+        if (f2fs_sb_has_blkzoned(sbi)) {
+            enum page_type ptype = PAGE_TYPE(i);
+            enum temp_type temp = type_to_temp(i);
+
+            if (sbi->write_io[ptype][temp].zone_openned) {
+                up(&sbi->available_open_zones);
+                sbi->write_io[ptype][temp].zone_openned = false;
+            }
+        }
+#endif
       }
       kfree(array);
   }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index e1c0f418aa11..855978ca869f 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -24,7 +24,8 @@
   #define IS_DATASEG(t)    ((t) <= CURSEG_COLD_DATA)
   #define IS_NODESEG(t)    ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
-#define SE_PAGETYPE(se)    ((IS_NODESEG((se)->type) ? NODE : DATA))
+#define PAGE_TYPE(t)    (IS_NODESEG(t) ? NODE : DATA)
+#define SE_PAGETYPE(se)    (PAGE_TYPE((se)->type))
   static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
                           unsigned short seg_type)
@@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
               return GET_SEGNO(sbi, FDEV(devi).start_blk);
       return 0;
   }
+
+static inline enum temp_type type_to_temp(int type)
+{
+    if (IS_HOT(type))
+        return HOT;
+    else if (IS_WARM(type))
+        return WARM;
+    return COLD;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fdf358c7f808..954baa6c100d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
                   sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
               return -EINVAL;
           }
+
+        sema_init(&sbi->available_open_zones, sbi->max_open_zones);
       }
       zone_sectors = bdev_zone_sectors(bdev);
--
2.40.1


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel