[PATCH 4.2.y-ckt 137/268] zram/zcomp: use GFP_NOIO to allocate streams

From: Kamal Mostafa
Date: Wed Jan 27 2016 - 16:28:53 EST


4.2.8-ckt3 -stable review patch. If anyone has any objections, please let me know.

---8<------------------------------------------------------------

From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>

commit 3d5fe03a3ea013060ebba2a811aeb0f23f56aefa upstream.

We can end up allocating a new compression stream with GFP_KERNEL from
within the IO path, which may result is nested (recursive) IO
operations. That can introduce problems if the IO path in question is a
reclaimer, holding some locks that will deadlock nested IOs.

Allocate streams and working memory using GFP_NOIO flag, forbidding
recursive IO and FS operations.

An example:

inconsistent {IN-RECLAIM_FS-W} -> {RECLAIM_FS-ON-W} usage.
git/20158 [HC0[0]:SC0[0]:HE1:SE1] takes:
(jbd2_handle){+.+.?.}, at: start_this_handle+0x4ca/0x555
{IN-RECLAIM_FS-W} state was registered at:
__lock_acquire+0x8da/0x117b
lock_acquire+0x10c/0x1a7
start_this_handle+0x52d/0x555
jbd2__journal_start+0xb4/0x237
__ext4_journal_start_sb+0x108/0x17e
ext4_dirty_inode+0x32/0x61
__mark_inode_dirty+0x16b/0x60c
iput+0x11e/0x274
__dentry_kill+0x148/0x1b8
shrink_dentry_list+0x274/0x44a
prune_dcache_sb+0x4a/0x55
super_cache_scan+0xfc/0x176
shrink_slab.part.14.constprop.25+0x2a2/0x4d3
shrink_zone+0x74/0x140
kswapd+0x6b7/0x930
kthread+0x107/0x10f
ret_from_fork+0x3f/0x70
irq event stamp: 138297
hardirqs last enabled at (138297): debug_check_no_locks_freed+0x113/0x12f
hardirqs last disabled at (138296): debug_check_no_locks_freed+0x33/0x12f
softirqs last enabled at (137818): __do_softirq+0x2d3/0x3e9
softirqs last disabled at (137813): irq_exit+0x41/0x95

other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(jbd2_handle);
<Interrupt>
lock(jbd2_handle);

*** DEADLOCK ***
5 locks held by git/20158:
#0: (sb_writers#7){.+.+.+}, at: [<ffffffff81155411>] mnt_want_write+0x24/0x4b
#1: (&type->i_mutex_dir_key#2/1){+.+.+.}, at: [<ffffffff81145087>] lock_rename+0xd9/0xe3
#2: (&sb->s_type->i_mutex_key#11){+.+.+.}, at: [<ffffffff8114f8e2>] lock_two_nondirectories+0x3f/0x6b
#3: (&sb->s_type->i_mutex_key#11/4){+.+.+.}, at: [<ffffffff8114f909>] lock_two_nondirectories+0x66/0x6b
#4: (jbd2_handle){+.+.?.}, at: [<ffffffff811e31db>] start_this_handle+0x4ca/0x555

stack backtrace:
CPU: 2 PID: 20158 Comm: git Not tainted 4.1.0-rc7-next-20150615-dbg-00016-g8bdf555-dirty #211
Call Trace:
dump_stack+0x4c/0x6e
mark_lock+0x384/0x56d
mark_held_locks+0x5f/0x76
lockdep_trace_alloc+0xb2/0xb5
kmem_cache_alloc_trace+0x32/0x1e2
zcomp_strm_alloc+0x25/0x73 [zram]
zcomp_strm_multi_find+0xe7/0x173 [zram]
zcomp_strm_find+0xc/0xe [zram]
zram_bvec_rw+0x2ca/0x7e0 [zram]
zram_make_request+0x1fa/0x301 [zram]
generic_make_request+0x9c/0xdb
submit_bio+0xf7/0x120
ext4_io_submit+0x2e/0x43
ext4_bio_write_page+0x1b7/0x300
mpage_submit_page+0x60/0x77
mpage_map_and_submit_buffers+0x10f/0x21d
ext4_writepages+0xc8c/0xe1b
do_writepages+0x23/0x2c
__filemap_fdatawrite_range+0x84/0x8b
filemap_flush+0x1c/0x1e
ext4_alloc_da_blocks+0xb8/0x117
ext4_rename+0x132/0x6dc
? mark_held_locks+0x5f/0x76
ext4_rename2+0x29/0x2b
vfs_rename+0x540/0x636
SyS_renameat2+0x359/0x44d
SyS_rename+0x1e/0x20
entry_SYSCALL_64_fastpath+0x12/0x6f

[minchan@xxxxxxxxxx: add stable mark]
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
Acked-by: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Kyeongdon Kim <kyeongdon.kim@xxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Kamal Mostafa <kamal@xxxxxxxxxxxxx>
---
drivers/block/zram/zcomp.c | 4 ++--
drivers/block/zram/zcomp_lz4.c | 2 +-
drivers/block/zram/zcomp_lzo.c | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 5cb13ca..c536177 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -76,7 +76,7 @@ static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm)
*/
static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
{
- struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL);
+ struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_NOIO);
if (!zstrm)
return NULL;

@@ -85,7 +85,7 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
* allocate 2 pages. 1 for compressed data, plus 1 extra for the
* case when compressed size is larger than the original one
*/
- zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ zstrm->buffer = (void *)__get_free_pages(GFP_NOIO | __GFP_ZERO, 1);
if (!zstrm->private || !zstrm->buffer) {
zcomp_strm_free(comp, zstrm);
zstrm = NULL;
diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c
index f2afb7e..ee44b51 100644
--- a/drivers/block/zram/zcomp_lz4.c
+++ b/drivers/block/zram/zcomp_lz4.c
@@ -15,7 +15,7 @@

static void *zcomp_lz4_create(void)
{
- return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
+ return kzalloc(LZ4_MEM_COMPRESS, GFP_NOIO);
}

static void zcomp_lz4_destroy(void *private)
diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c
index da1bc47..683ce04 100644
--- a/drivers/block/zram/zcomp_lzo.c
+++ b/drivers/block/zram/zcomp_lzo.c
@@ -15,7 +15,7 @@

static void *lzo_create(void)
{
- return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+ return kzalloc(LZO1X_MEM_COMPRESS, GFP_NOIO);
}

static void lzo_destroy(void *private)
--
1.9.1