Re: [PATCH v5 40/40] 9p: Use netfslib read/write_iter

From: David Howells
Date: Thu May 23 2024 - 03:44:34 EST


Hi Andrea,

Note that there are changes to the netfslib write-side upstream and you might
also want to apply the attached.

In https://bugs.launchpad.net/ubuntu/+source/autopkgtest/+bug/2056461 you say:

| It seems that kernel 6.8 introduced a regression in the 9pfs related to
| caching and netfslib, that can cause some user-space apps to read content
| from files that is not up-to-date (when they are used in a producer/consumer
| fashion).

Can you clarify how these files are being used?

David
---
commit 39302c160390441ed5b4f4f7ad480c44eddf0962
Author: David Howells <dhowells@xxxxxxxxxx>
Date: Wed May 22 17:30:22 2024 +0100

netfs, 9p: Fix race between umount and async request completion

There's a problem in 9p's interaction with netfslib whereby a crash occurs
because the 9p_fid structs get forcibly destroyed during client teardown
(without paying attention to their refcounts) before netfslib has finished
with them. However, it's not a simple case of deferring the clunking that
p9_fid_put() does as that requires the client.

The problem is that netfslib has to unlock pages and clear the IN_PROGRESS
flag before destroying the objects involved - including the pid - and, in
any case, nothing checks to see if writeback completed barring looking at
the page flags.

Fix this by keeping a count of outstanding I/O requests (of any type) and
waiting for it to quiesce during inode eviction.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Eric Van Hensbergen <ericvh@xxxxxxxxxx>
cc: Latchesar Ionkov <lucho@xxxxxxxxxx>
cc: Dominique Martinet <asmadeus@xxxxxxxxxxxxx>
cc: Christian Schoenebeck <linux_oss@xxxxxxxxxxxxx>
cc: Jeff Layton <jlayton@xxxxxxxxxx>
cc: Steve French <sfrench@xxxxxxxxx>
cc: v9fs@xxxxxxxxxxxxxxx
cc: linux-afs@xxxxxxxxxxxxxxxxxxx
cc: linux-cifs@xxxxxxxxxxxxxxx
cc: netfs@xxxxxxxxxxxxxxx
cc: linux-fsdevel@xxxxxxxxxxxxxxx

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8c9a896d691e..57cfa9f65046 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -354,6 +354,7 @@ void v9fs_evict_inode(struct inode *inode)
version = cpu_to_le32(v9inode->qid.version);
netfs_clear_inode_writeback(inode, &version);

+ netfs_wait_for_outstanding_io(inode);
clear_inode(inode);
filemap_fdatawrite(&inode->i_data);

@@ -361,8 +362,10 @@ void v9fs_evict_inode(struct inode *inode)
if (v9fs_inode_cookie(v9inode))
fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
#endif
- } else
+ } else {
+ netfs_wait_for_outstanding_io(inode);
clear_inode(inode);
+ }
}

struct inode *
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 94fc049aff58..c831e711a4ac 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -652,6 +652,7 @@ void afs_evict_inode(struct inode *inode)

afs_set_cache_aux(vnode, &aux);
netfs_clear_inode_writeback(inode, &aux);
+ netfs_wait_for_outstanding_io(inode);
clear_inode(inode);

while (!list_empty(&vnode->wb_keys)) {
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index c90d482b1650..f4a642727479 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -72,6 +72,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
}

+ atomic_inc(&ctx->io_count);
trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq);
@@ -124,6 +125,7 @@ static void netfs_free_request(struct work_struct *work)
{
struct netfs_io_request *rreq =
container_of(work, struct netfs_io_request, work);
+ struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i;

trace_netfs_rreq(rreq, netfs_rreq_trace_free);
@@ -142,6 +144,9 @@ static void netfs_free_request(struct work_struct *work)
}
kvfree(rreq->direct_bv);
}
+
+ if (atomic_dec_and_test(&ictx->io_count))
+ wake_up_var(&ictx->io_count);
call_rcu(&rreq->rcu, netfs_free_request_rcu);
}

diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index ec5b639f421a..21c9e173ea9a 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -435,6 +435,7 @@ cifs_evict_inode(struct inode *inode)
if (inode->i_state & I_PINNING_NETFS_WB)
cifs_fscache_unuse_inode_cookie(inode, true);
cifs_fscache_release_inode_cookie(inode);
+ netfs_wait_for_outstanding_io(inode);
clear_inode(inode);
}

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index d2d291a9cdad..3ca3906bb8da 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -68,6 +68,7 @@ struct netfs_inode {
loff_t remote_i_size; /* Size of the remote file */
loff_t zero_point; /* Size after which we assume there's no data
* on the server */
+ atomic_t io_count; /* Number of outstanding reqs */
unsigned long flags;
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
@@ -474,6 +475,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
ctx->remote_i_size = i_size_read(&ctx->inode);
ctx->zero_point = LLONG_MAX;
ctx->flags = 0;
+ atomic_set(&ctx->io_count, 0);
#if IS_ENABLED(CONFIG_FSCACHE)
ctx->cache = NULL;
#endif
@@ -517,4 +519,20 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
#endif
}

+/**
+ * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
+ * @ctx: The netfs inode to wait on
+ *
+ * Wait for outstanding I/O requests of any type to complete. This is intended
+ * to be called from inode eviction routines. This makes sure that any
+ * resources held by those requests are cleaned up before we let the inode get
+ * cleaned up.
+ */
+static inline void netfs_wait_for_outstanding_io(struct inode *inode)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+ wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
+}
+
#endif /* _LINUX_NETFS_H */