[PATCH] jffs2: GC deadlock reading a page that is used in jffs2_write_begin()

From: Kyeong Yoo
Date: Mon Jul 03 2017 - 10:11:45 EST


GC task can deadlock in read_cache_page() because it may attempt
to release a page that is actually allocated by another task in
jffs2_write_begin().
The reason is that in jffs2_write_begin() there is a small window
a cache page is allocated for use but not set Uptodate yet.

This ends up with a deadlock between two tasks:
1) A task (e.g. file copy)
- jffs2_write_begin() locks a cache page
- jffs2_write_end() tries to lock "alloc_sem" from
jffs2_reserve_space() <-- STUCK
2) GC task (jffs2_gcd_mtd3)
- jffs2_garbage_collect_pass() locks "alloc_sem"
- try to lock the same cache page in read_cache_page() <-- STUCK

So to avoid this deadlock, hold "alloc_sem" in jffs2_write_begin()
while reading data in a cache page.

Signed-off-by: Kyeong Yoo <kyeong.yoo@xxxxxxxxxxxxxxxxxxx>
---
fs/jffs2/file.c | 40 +++++++++++++++++++++++++---------------
1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c12476e309c6..eb4e4d784d26 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -135,20 +135,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page *pg;
struct inode *inode = mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
pgoff_t index = pos >> PAGE_SHIFT;
uint32_t pageofs = index << PAGE_SHIFT;
int ret = 0;

- pg = grab_cache_page_write_begin(mapping, index, flags);
- if (!pg)
- return -ENOMEM;
- *pagep = pg;
-
jffs2_dbg(1, "%s()\n", __func__);

if (pageofs > inode->i_size) {
/* Make new hole frag from old EOF to new page */
- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
struct jffs2_raw_inode ri;
struct jffs2_full_dnode *fn;
uint32_t alloc_len;
@@ -159,7 +154,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
if (ret)
- goto out_page;
+ goto out_err;

mutex_lock(&f->sem);
memset(&ri, 0, sizeof(ri));
@@ -189,7 +184,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
ret = PTR_ERR(fn);
jffs2_complete_reservation(c);
mutex_unlock(&f->sem);
- goto out_page;
+ goto out_err;
}
ret = jffs2_add_full_dnode_to_inode(c, f, fn);
if (f->metadata) {
@@ -204,7 +199,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
jffs2_free_full_dnode(fn);
jffs2_complete_reservation(c);
mutex_unlock(&f->sem);
- goto out_page;
+ goto out_err;
}
jffs2_complete_reservation(c);
inode->i_size = pageofs;
@@ -212,6 +207,19 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
}

/*
+ * While getting a page and reading data in, lock c->alloc_sem until
+ * the page is Uptodate. Otherwise GC task may attempt to read the same
+ * page in read_cache_page(), which causes a deadlock.
+ */
+ mutex_lock(&c->alloc_sem);
+ pg = grab_cache_page_write_begin(mapping, index, flags);
+ if (!pg) {
+ ret = -ENOMEM;
+ goto release_sem;
+ }
+ *pagep = pg;
+
+ /*
* Read in the page if it wasn't already present. Cannot optimize away
* the whole page write case until jffs2_write_end can handle the
* case of a short-copy.
@@ -220,15 +228,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
mutex_lock(&f->sem);
ret = jffs2_do_readpage_nolock(inode, pg);
mutex_unlock(&f->sem);
- if (ret)
- goto out_page;
+ if (ret) {
+ unlock_page(pg);
+ put_page(pg);
+ goto release_sem;
+ }
}
jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
- return ret;

-out_page:
- unlock_page(pg);
- put_page(pg);
+release_sem:
+ mutex_unlock(&c->alloc_sem);
+out_err:
return ret;
}