[PATCH v1 12/15] fs/direct-io: keep track of wether a page is coming from GUP or not

From: jglisse
Date: Thu Apr 11 2019 - 17:09:14 EST


From: JÃrÃme Glisse <jglisse@xxxxxxxxxx>

We want to keep track of how we got a reference on page when doing DIO,
ie wether the page was reference through GUP (get_user_page*) or not.
For that this patch rework the way page reference is taken and handed
over between DIO code and BIO. Instead of taking a reference for page
that have been successfuly added to a BIO we just steal the reference
we have when we lookup the page (either through GUP or for ZERO_PAGE).

So this patch keep track of wether the reference has been stolen by the
BIO or not. This avoids a bunch of get_page()/put_page() so this limit
the number of atomic operations.

Signed-off-by: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
Cc: linux-fsdevel@xxxxxxxxxxxxxxx
Cc: linux-block@xxxxxxxxxxxxxxx
Cc: linux-mm@xxxxxxxxx
Cc: John Hubbard <jhubbard@xxxxxxxxxx>
Cc: Jan Kara <jack@xxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx>
Cc: Johannes Thumshirn <jthumshirn@xxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Jens Axboe <axboe@xxxxxxxxx>
Cc: Ming Lei <ming.lei@xxxxxxxxxx>
Cc: Dave Chinner <david@xxxxxxxxxxxxx>
Cc: Jason Gunthorpe <jgg@xxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Ernesto A. FernÃndez <ernesto.mnd.fernandez@xxxxxxxxx>
Cc: Jeff Moyer <jmoyer@xxxxxxxxxx>
---
fs/direct-io.c | 82 ++++++++++++++++++++++++++++++++++++--------------
1 file changed, 60 insertions(+), 22 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b8b5d8e31aeb..ef9fc7703a78 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -100,6 +100,7 @@ struct dio_submit {
unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
sector_t cur_page_block; /* Where it starts */
loff_t cur_page_fs_offset; /* Offset in file */
+ bool cur_page_from_gup; /* Current page is coming from GUP */

struct iov_iter *iter;
/*
@@ -148,6 +149,8 @@ struct dio {
struct page *pages[DIO_PAGES]; /* page buffer */
struct work_struct complete_work;/* deferred AIO completion */
};
+
+ bool gup; /* pages are coming from GUP */
} ____cacheline_aligned_in_smp;

static struct kmem_cache *dio_cache __read_mostly;
@@ -167,6 +170,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
{
ssize_t ret;

+ dio->gup = iov_iter_get_pages_use_gup(sdio->iter);
ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
&sdio->from);

@@ -181,6 +185,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
dio->page_errors = ret;
get_page(page);
dio->pages[0] = page;
+ dio->gup = false;
sdio->head = 0;
sdio->tail = 1;
sdio->from = 0;
@@ -490,8 +495,12 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
*/
static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
{
- while (sdio->head < sdio->tail)
- put_page(dio->pages[sdio->head++]);
+ while (sdio->head < sdio->tail) {
+ if (dio->gup)
+ put_user_page(dio->pages[sdio->head++]);
+ else
+ put_page(dio->pages[sdio->head++]);
+ }
}

/*
@@ -760,15 +769,19 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
{
int ret;

- ret = bio_add_page(sdio->bio, sdio->cur_page,
- sdio->cur_page_len, sdio->cur_page_offset, false);
+ /*
+ * The bio is stealing the page reference and that is fine we can add a
+ * page only once ie when dio_send_cur_page() is call and each call to
+ * dio_send_cur_page() clear the cur_page (on success).
+ */
+ ret = bio_add_page(sdio->bio, sdio->cur_page, sdio->cur_page_len,
+ sdio->cur_page_offset, sdio->cur_page_from_gup);
if (ret == sdio->cur_page_len) {
/*
* Decrement count only, if we are done with this page
*/
if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
sdio->pages_in_io--;
- get_page(sdio->cur_page);
sdio->final_block_in_bio = sdio->cur_page_block +
(sdio->cur_page_len >> sdio->blkbits);
ret = 0;
@@ -828,9 +841,14 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
if (ret == 0) {
ret = dio_bio_add_page(sdio);
+ if (!ret)
+ /* Clear the current page. */
+ sdio->cur_page = NULL;
BUG_ON(ret != 0);
}
- }
+ } else
+ /* Clear the current page. */
+ sdio->cur_page = NULL;
out:
return ret;
}
@@ -855,7 +873,7 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
static inline int
submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
unsigned offset, unsigned len, sector_t blocknr,
- struct buffer_head *map_bh)
+ struct buffer_head *map_bh, bool gup)
{
int ret = 0;

@@ -882,14 +900,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
*/
if (sdio->cur_page) {
ret = dio_send_cur_page(dio, sdio, map_bh);
- put_page(sdio->cur_page);
- sdio->cur_page = NULL;
if (ret)
return ret;
}

- get_page(page); /* It is in dio */
+ /* Steal page reference and GUP flag */
sdio->cur_page = page;
+ sdio->cur_page_from_gup = gup;
sdio->cur_page_offset = offset;
sdio->cur_page_len = len;
sdio->cur_page_block = blocknr;
@@ -903,8 +920,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
ret = dio_send_cur_page(dio, sdio, map_bh);
if (sdio->bio)
dio_bio_submit(dio, sdio);
- put_page(sdio->cur_page);
- sdio->cur_page = NULL;
}
return ret;
}
@@ -946,13 +961,29 @@ static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
this_chunk_bytes = this_chunk_blocks << sdio->blkbits;

page = ZERO_PAGE(0);
+ get_page(page);
if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
- sdio->next_block_for_io, map_bh))
+ sdio->next_block_for_io, map_bh, false)) {
+ put_page(page);
return;
+ }

sdio->next_block_for_io += this_chunk_blocks;
}

+static inline void dio_put_page(const struct dio *dio, bool stolen,
+ struct page *page)
+{
+ /* If page reference was stolen then nothing to do. */
+ if (stolen)
+ return;
+
+ if (dio->gup)
+ put_user_page(page);
+ else
+ put_page(page);
+}
+
/*
* Walk the user pages, and the file, mapping blocks to disk and generating
* a sequence of (page,offset,len,block) mappings. These mappings are injected
@@ -977,6 +1008,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
int ret = 0;

while (sdio->block_in_file < sdio->final_block_in_request) {
+ bool stolen = false;
struct page *page;
size_t from, to;

@@ -1003,7 +1035,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,

ret = get_more_blocks(dio, sdio, map_bh);
if (ret) {
- put_page(page);
+ dio_put_page(dio, stolen, page);
goto out;
}
if (!buffer_mapped(map_bh))
@@ -1048,7 +1080,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,

/* AKPM: eargh, -ENOTBLK is a hack */
if (dio->op == REQ_OP_WRITE) {
- put_page(page);
+ dio_put_page(dio, stolen, page);
return -ENOTBLK;
}

@@ -1061,7 +1093,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
if (sdio->block_in_file >=
i_size_aligned >> blkbits) {
/* We hit eof */
- put_page(page);
+ dio_put_page(dio, stolen, page);
goto out;
}
zero_user(page, from, 1 << blkbits);
@@ -1099,11 +1131,13 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
from,
this_chunk_bytes,
sdio->next_block_for_io,
- map_bh);
+ map_bh, dio->gup);
if (ret) {
- put_page(page);
+ dio_put_page(dio, stolen, page);
goto out;
- }
+ } else
+ /* The page reference has been stolen ... */
+ stolen = true;
sdio->next_block_for_io += this_chunk_blocks;

sdio->block_in_file += this_chunk_blocks;
@@ -1117,7 +1151,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
}

/* Drop the ref which was taken in get_user_pages() */
- put_page(page);
+ dio_put_page(dio, stolen, page);
}
out:
return ret;
@@ -1356,8 +1390,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
if (retval == 0)
retval = ret2;
- put_page(sdio.cur_page);
- sdio.cur_page = NULL;
+ else {
+ if (sdio.cur_page_from_gup)
+ put_user_page(sdio.cur_page);
+ else
+ put_page(sdio.cur_page);
+ }
}
if (sdio.bio)
dio_bio_submit(dio, &sdio);
--
2.20.1