[RFC 4/9] gfs2: Fix mmap + page fault deadlocks (part 1)

From: Andreas Gruenbacher
Date: Mon May 31 2021 - 13:09:16 EST


When the buffer passed to a read or write system call is memory mapped
to the same file, a page fault can occur in filemap_fault. In that
case, the task will already be holding the inode glock, and trying to
take the same lock again will result in a BUG in add_to_queue().

Fix that by recognizing the self-recursion case. Either skip the lock
taking (when the glock is held in a compatible way), or fail the
operation.

Likewise, a request to un-share a copy-on-write page can *probably*
happen in similar situations, so treat the locking in gfs2_page_mkwrite
in the same way.

A future patch will handle these case more gracefully by retrying
operations instead of failing them, along with addressing more complex
deadlock scenarios.

Reported-by: Jan Kara <jack@xxxxxxx>
Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx>
---
fs/gfs2/file.c | 40 ++++++++++++++++++++++++++++++----------
1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6d77743f11a4..7d88abb4629b 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -423,6 +423,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
struct page *page = vmf->page;
struct inode *inode = file_inode(vmf->vma->vm_file);
struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder *outer_gh = gfs2_glock_is_locked_by_me(ip->i_gl);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
u64 offset = page_offset(page);
@@ -436,10 +437,18 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);

gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- err = gfs2_glock_nq(&gh);
- if (err) {
- ret = block_page_mkwrite_return(err);
- goto out_uninit;
+ if (likely(!outer_gh)) {
+ err = gfs2_glock_nq(&gh);
+ if (err) {
+ ret = block_page_mkwrite_return(err);
+ goto out_uninit;
+ }
+ } else {
+ if (!gfs2_holder_is_compatible(outer_gh, LM_ST_EXCLUSIVE)) {
+ /* We could try to upgrade outer_gh here. */
+ ret = VM_FAULT_SIGBUS;
+ goto out_uninit;
+ }
}

/* Check page index against inode size */
@@ -540,7 +549,8 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
out_quota_unlock:
gfs2_quota_unlock(ip);
out_unlock:
- gfs2_glock_dq(&gh);
+ if (likely(!outer_gh))
+ gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
if (ret == VM_FAULT_LOCKED) {
@@ -555,6 +565,7 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder *outer_gh = gfs2_glock_is_locked_by_me(ip->i_gl);
struct gfs2_holder gh;
vm_fault_t ret;
u16 state;
@@ -562,13 +573,22 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf)

state = (vmf->flags & FAULT_FLAG_WRITE) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
gfs2_holder_init(ip->i_gl, state, 0, &gh);
- err = gfs2_glock_nq(&gh);
- if (err) {
- ret = block_page_mkwrite_return(err);
- goto out_uninit;
+ if (likely(!outer_gh)) {
+ err = gfs2_glock_nq(&gh);
+ if (err) {
+ ret = block_page_mkwrite_return(err);
+ goto out_uninit;
+ }
+ } else {
+ if (!gfs2_holder_is_compatible(outer_gh, state)) {
+ /* We could try to upgrade outer_gh here. */
+ ret = VM_FAULT_SIGBUS;
+ goto out_uninit;
+ }
}
ret = filemap_fault(vmf);
- gfs2_glock_dq(&gh);
+ if (likely(!outer_gh))
+ gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
return ret;
--
2.26.3