[PATCH 4.9 68/86] NFS: Fix unstable write completion

From: Greg Kroah-Hartman
Date: Fri Mar 16 2018 - 11:32:54 EST


4.9-stable review patch. If anyone has any objections, please let me know.

------------------

From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>

commit c4f24df942a181699c5bab01b8e5e82b925f77f3 upstream.

We do want to respect the FLUSH_SYNC argument to nfs_commit_inode() to
ensure that all outstanding COMMIT requests to the inode in question are
complete. Currently we may exit early from both nfs_commit_inode() and
nfs_write_inode() even if there are COMMIT requests in flight, or unstable
writes on the commit list.

In order to get the right semantics w.r.t. sync_inode(), we don't need
to have nfs_commit_inode() reset the inode dirty flags when called from
nfs_wb_page() and/or nfs_wb_all(). We just need to ensure that
nfs_write_inode() leaves them in the right state if there are outstanding
commits, or stable pages.

Reported-by: Scott Mayhew <smayhew@xxxxxxxxxx>
Fixes: dc4fd9ab01ab ("nfs: don't wait on commit in nfs_commit_inode()...")
Cc: stable@xxxxxxxxxxxxxxx # v4.14+
Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
fs/nfs/write.c | 83 +++++++++++++++++++++++++++++----------------------------
1 file changed, 43 insertions(+), 40 deletions(-)

--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1847,40 +1847,43 @@ int nfs_generic_commit_list(struct inode
return status;
}

-int nfs_commit_inode(struct inode *inode, int how)
+static int __nfs_commit_inode(struct inode *inode, int how,
+ struct writeback_control *wbc)
{
LIST_HEAD(head);
struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
- int error = 0;
- int res;
+ int ret, nscan;

nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_commit_begin(cinfo.mds);
- res = nfs_scan_commit(inode, &head, &cinfo);
- if (res)
- error = nfs_generic_commit_list(inode, &head, how, &cinfo);
+ for (;;) {
+ ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
+ if (ret <= 0)
+ break;
+ ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
+ if (ret < 0)
+ break;
+ ret = 0;
+ if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
+ if (nscan < wbc->nr_to_write)
+ wbc->nr_to_write -= nscan;
+ else
+ wbc->nr_to_write = 0;
+ }
+ if (nscan < INT_MAX)
+ break;
+ cond_resched();
+ }
nfs_commit_end(cinfo.mds);
- if (res == 0)
- return res;
- if (error < 0)
- goto out_error;
- if (!may_wait)
- goto out_mark_dirty;
- error = wait_on_commit(cinfo.mds);
- if (error < 0)
- return error;
- return res;
-out_error:
- res = error;
- /* Note: If we exit without ensuring that the commit is complete,
- * we must mark the inode as dirty. Otherwise, future calls to
- * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
- * that the data is on the disk.
- */
-out_mark_dirty:
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- return res;
+ if (ret || !may_wait)
+ return ret;
+ return wait_on_commit(cinfo.mds);
+}
+
+int nfs_commit_inode(struct inode *inode, int how)
+{
+ return __nfs_commit_inode(inode, how, NULL);
}
EXPORT_SYMBOL_GPL(nfs_commit_inode);

@@ -1890,11 +1893,11 @@ int nfs_write_inode(struct inode *inode,
int flags = FLUSH_SYNC;
int ret = 0;

- /* no commits means nothing needs to be done */
- if (!nfsi->commit_info.ncommit)
- return ret;
-
if (wbc->sync_mode == WB_SYNC_NONE) {
+ /* no commits means nothing needs to be done */
+ if (!nfsi->commit_info.ncommit)
+ goto check_requests_outstanding;
+
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
@@ -1905,16 +1908,16 @@ int nfs_write_inode(struct inode *inode,
flags = 0;
}

- ret = nfs_commit_inode(inode, flags);
- if (ret >= 0) {
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (ret < wbc->nr_to_write)
- wbc->nr_to_write -= ret;
- else
- wbc->nr_to_write = 0;
- }
- return 0;
- }
+ ret = __nfs_commit_inode(inode, flags, wbc);
+ if (!ret) {
+ if (flags & FLUSH_SYNC)
+ return 0;
+ } else if (nfsi->commit_info.ncommit)
+ goto out_mark_dirty;
+
+check_requests_outstanding:
+ if (!atomic_read(&nfsi->commit_info.rpcs_out))
+ return ret;
out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;