Re: [PATCH] f2fs: avoid f2fs_gc dead loop
From: Chao Yu
Date: Mon Dec 25 2017 - 05:31:06 EST
On 2017/12/25 17:56, Yunlong Song wrote:
> In this case, f2fs_gc will skip all the victims and return with no dead loop. The atomic file will
> use SSR to OPU, it‘s OK.
Nope, SSR trigger condition is limited, don't rely on it.
Thanks,
>
> On 2017/12/25 17:45, Chao Yu wrote:
>> On 2017/12/25 14:15, Yunlong Song wrote:
>>> What if the application starts atomic write but forgets to commit, e.g.
>>> bugs in application or the application
>>> is a malicious software itself?
>> I agree we should consider robustness of f2fs in security aspect, but
>> please consider more scenario of these sqlite customized interface usage,
>> it looks just skipping gc is not enough, for example, if there is one large
>> size db in our partition, with random write, its data spreads in each
>> segment, once this db has been atomic opened, foreground gc may loop for ever.
>>
>> How about checking opened time of atomic or volatile file in
>> f2fs_balance_fs, if it exceeds threshold, we can restore the file to normal
>> one to avoid potential security issue.
>>
>> Thanks,
>>
>>> On 2017/12/25 11:44, Chao Yu wrote:
>>>> On 2017/12/23 21:09, Yunlong Song wrote:
>>>>> For some corner case, f2fs_gc selects one target victim but cannot free
>>>>> that victim segment due to some reason (e.g. the segment has some blocks
>>>>> of atomic file which is not commited yet), in this case, the victim
>>>> File should not be atomic opened for long time since normally sqlite
>>>> transaction will finish quickly, so we can expect that gc loop could be
>>>> ended up soon, right?
>>>>
>>>> Thanks,
>>>>
>>>>> segment may probably be selected over and over, and then f2fs_gc will
>>>>> go to dead loop. This patch identifies the dead-loop segment, and skips
>>>>> it in __get_victim next time.
>>>>>
>>>>> Signed-off-by: Yunlong Song <yunlong.song@xxxxxxxxxx>
>>>>> ---
>>>>> fs/f2fs/f2fs.h | 8 ++++++++
>>>>> fs/f2fs/gc.c | 34 ++++++++++++++++++++++++++++++++++
>>>>> fs/f2fs/super.c | 3 +++
>>>>> 3 files changed, 45 insertions(+)
>>>>>
>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>> index ca6b0c9..b75851b 100644
>>>>> --- a/fs/f2fs/f2fs.h
>>>>> +++ b/fs/f2fs/f2fs.h
>>>>> @@ -115,6 +115,13 @@ struct f2fs_mount_info {
>>>>> unsigned int opt;
>>>>> };
>>>>> +struct gc_loop_info {
>>>>> + int count;
>>>>> + unsigned int segno;
>>>>> + unsigned long *segmap;
>>>>> +};
>>>>> +#define GC_LOOP_MAX 10
>>>>> +
>>>>> #define F2FS_FEATURE_ENCRYPT 0x0001
>>>>> #define F2FS_FEATURE_BLKZONED 0x0002
>>>>> #define F2FS_FEATURE_ATOMIC_WRITE 0x0004
>>>>> @@ -1125,6 +1132,7 @@ struct f2fs_sb_info {
>>>>> /* threshold for converting bg victims for fg */
>>>>> u64 fggc_threshold;
>>>>> + struct gc_loop_info gc_loop;
>>>>> /* maximum # of trials to find a victim segment for SSR and GC */
>>>>> unsigned int max_victim_search;
>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>> index 5d5bba4..4ee9e1b 100644
>>>>> --- a/fs/f2fs/gc.c
>>>>> +++ b/fs/f2fs/gc.c
>>>>> @@ -229,6 +229,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
>>>>> if (no_fggc_candidate(sbi, secno))
>>>>> continue;
>>>>> + if (sbi->gc_loop.segmap &&
>>>>> + test_bit(GET_SEG_FROM_SEC(sbi, secno), sbi->gc_loop.segmap))
>>>>> + continue;
>>>>> +
>>>>> clear_bit(secno, dirty_i->victim_secmap);
>>>>> return GET_SEG_FROM_SEC(sbi, secno);
>>>>> }
>>>>> @@ -371,6 +375,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
>>>>> if (gc_type == FG_GC && p.alloc_mode == LFS &&
>>>>> no_fggc_candidate(sbi, secno))
>>>>> goto next;
>>>>> + if (gc_type == FG_GC && p.alloc_mode == LFS &&
>>>>> + sbi->gc_loop.segmap && test_bit(segno, sbi->gc_loop.segmap))
>>>>> + goto next;
>>>>> cost = get_gc_cost(sbi, segno, &p);
>>>>> @@ -1042,6 +1049,27 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>>>> seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
>>>>> if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
>>>>> sec_freed++;
>>>>> + else if (gc_type == FG_GC && seg_freed == 0) {
>>>>> + if (!sbi->gc_loop.segmap) {
>>>>> + sbi->gc_loop.segmap =
>>>>> + kvzalloc(f2fs_bitmap_size(MAIN_SEGS(sbi)), GFP_KERNEL);
>>>>> + sbi->gc_loop.count = 0;
>>>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>>> + }
>>>>> + if (segno == sbi->gc_loop.segno) {
>>>>> + if (sbi->gc_loop.count > GC_LOOP_MAX) {
>>>>> + f2fs_bug_on(sbi, 1);
>>>>> + set_bit(segno, sbi->gc_loop.segmap);
>>>>> + sbi->gc_loop.count = 0;
>>>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>>> + }
>>>>> + else
>>>>> + sbi->gc_loop.count++;
>>>>> + } else {
>>>>> + sbi->gc_loop.segno = segno;
>>>>> + sbi->gc_loop.count = 0;
>>>>> + }
>>>>> + }
>>>>> total_freed += seg_freed;
>>>>> if (gc_type == FG_GC)
>>>>> @@ -1075,6 +1103,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>>>> if (sync)
>>>>> ret = sec_freed ? 0 : -EAGAIN;
>>>>> + if (sbi->gc_loop.segmap) {
>>>>> + kvfree(sbi->gc_loop.segmap);
>>>>> + sbi->gc_loop.segmap = NULL;
>>>>> + sbi->gc_loop.count = 0;
>>>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>>> + }
>>>>> return ret;
>>>>> }
>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>> index 031cb26..76f0b72 100644
>>>>> --- a/fs/f2fs/super.c
>>>>> +++ b/fs/f2fs/super.c
>>>>> @@ -2562,6 +2562,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>> sbi->last_valid_block_count = sbi->total_valid_block_count;
>>>>> sbi->reserved_blocks = 0;
>>>>> sbi->current_reserved_blocks = 0;
>>>>> + sbi->gc_loop.segmap = NULL;
>>>>> + sbi->gc_loop.count = 0;
>>>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>>> for (i = 0; i < NR_INODE_TYPE; i++) {
>>>>> INIT_LIST_HEAD(&sbi->inode_list[i]);
>>>>>
>>>> .
>>>>
>>
>> .
>>
>