[PATCH v6 19/34] fuse: Pin pages rather than ref'ing if appropriate

From: David Howells
Date: Mon Jan 16 2023 - 18:15:09 EST


Convert the fuse code to use iov_iter_extract_pages() instead of
iov_iter_get_pages(). This will pin pages or leave them unaltered rather
than getting a ref on them as appropriate to the iterator.

The pages need to be pinned for DIO-read rather than having refs taken on
them to prevent VM copy-on-write from malfunctioning during a concurrent
fork() (the result of the I/O would otherwise end up only visible to the
child process and not the parent).

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Miklos Szeredi <miklos@xxxxxxxxxx>
cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
cc: Christoph Hellwig <hch@xxxxxx>
cc: linux-fsdevel@xxxxxxxxxxxxxxx
---

fs/fuse/dev.c | 25 +++++++++++++++++++------
fs/fuse/file.c | 26 ++++++++++++++++++--------
fs/fuse/fuse_i.h | 1 +
3 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e3d8443e24a6..107497e68726 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -641,6 +641,7 @@ static int unlock_request(struct fuse_req *req)

struct fuse_copy_state {
int write;
+ unsigned int cleanup_mode; /* Page cleanup mode (0/FOLL_GET/PIN) */
struct fuse_req *req;
struct iov_iter *iter;
struct pipe_buffer *pipebufs;
@@ -661,6 +662,11 @@ static void fuse_copy_init(struct fuse_copy_state *cs, int write,
cs->iter = iter;
}

+static void fuse_release_copy_page(struct fuse_copy_state *cs, struct page *page)
+{
+ page_put_unpin(page, cs->cleanup_mode);
+}
+
/* Unmap and put previous page of userspace buffer */
static void fuse_copy_finish(struct fuse_copy_state *cs)
{
@@ -675,7 +681,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
flush_dcache_page(cs->pg);
set_page_dirty_lock(cs->pg);
}
- put_page(cs->pg);
+ fuse_release_copy_page(cs, cs->pg);
}
cs->pg = NULL;
}
@@ -704,6 +710,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)

BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
+ cs->cleanup_mode = FOLL_GET;
cs->pg = buf->page;
cs->offset = buf->offset;
cs->len = buf->len;
@@ -722,6 +729,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
buf->len = 0;

cs->currbuf = buf;
+ cs->cleanup_mode = FOLL_GET;
cs->pg = page;
cs->offset = 0;
cs->len = PAGE_SIZE;
@@ -729,15 +737,18 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->nr_segs++;
}
} else {
+ unsigned int gup_flags = cs->write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
+ struct page **pages = &cs->pg;
size_t off;
- err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off,
- cs->write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
+
+ err = iov_iter_extract_pages(cs->iter, &pages, PAGE_SIZE, 1,
+ gup_flags, &off);
if (err < 0)
return err;
BUG_ON(!err);
cs->len = err;
cs->offset = off;
- cs->pg = page;
+ cs->cleanup_mode = iov_iter_extract_mode(cs->iter, gup_flags);
}

return lock_request(cs->req);
@@ -899,10 +910,12 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;

- get_page(page);
+ err = try_grab_page(page, cs->cleanup_mode);
+ if (err < 0)
+ return err;
err = unlock_request(cs->req);
if (err) {
- put_page(page);
+ fuse_release_copy_page(cs, page);
return err;
}

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 68c196437306..c317300e757a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -624,6 +624,11 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
args->out_args[0].size = count;
}

+static void fuse_release_page(struct fuse_args_pages *ap, struct page *page)
+{
+ page_put_unpin(page, ap->cleanup_mode);
+}
+
static void fuse_release_user_pages(struct fuse_args_pages *ap,
bool should_dirty)
{
@@ -632,7 +637,7 @@ static void fuse_release_user_pages(struct fuse_args_pages *ap,
for (i = 0; i < ap->num_pages; i++) {
if (should_dirty)
set_page_dirty_lock(ap->pages[i]);
- put_page(ap->pages[i]);
+ fuse_release_page(ap, ap->pages[i]);
}
}

@@ -920,7 +925,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
else
SetPageError(page);
unlock_page(page);
- put_page(page);
+ fuse_release_page(ap, page);
}
if (ia->ff)
fuse_file_put(ia->ff, false, false);
@@ -1153,7 +1158,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
}
if (ia->write.page_locked && (i == ap->num_pages - 1))
unlock_page(page);
- put_page(page);
+ fuse_release_page(ap, page);
}

return err;
@@ -1172,6 +1177,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,

ap->args.in_pages = true;
ap->descs[0].offset = offset;
+ ap->cleanup_mode = FOLL_GET;

do {
size_t tmp;
@@ -1200,7 +1206,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,

if (!tmp) {
unlock_page(page);
- put_page(page);
+ fuse_release_page(ap, page);
goto again;
}

@@ -1393,9 +1399,12 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
size_t *nbytesp, int write,
unsigned int max_pages)
{
+ unsigned int gup_flags = write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
size_t nbytes = 0; /* # bytes already packed in req */
ssize_t ret = 0;

+ ap->cleanup_mode = iov_iter_extract_mode(ii, gup_flags);
+
/* Special case for kernel I/O: can copy directly into the buffer */
if (iov_iter_is_kvec(ii)) {
unsigned long user_addr = fuse_get_user_addr(ii);
@@ -1412,12 +1421,13 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
}

while (nbytes < *nbytesp && ap->num_pages < max_pages) {
+ struct page **pages = &ap->pages[ap->num_pages];
unsigned npages;
size_t start;
- ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
- *nbytesp - nbytes,
- max_pages - ap->num_pages,
- &start, write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
+ ret = iov_iter_extract_pages(ii, &pages,
+ *nbytesp - nbytes,
+ max_pages - ap->num_pages,
+ gup_flags, &start);
if (ret < 0)
break;

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index c673faefdcb9..7b6be1dd7593 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -271,6 +271,7 @@ struct fuse_args_pages {
struct page **pages;
struct fuse_page_desc *descs;
unsigned int num_pages;
+ unsigned int cleanup_mode;
};

#define FUSE_ARGS(args) struct fuse_args args = {}