[PATCH 1/7] iov_iter: Add a function to extract an iter's buffers to a bvec iter

From: David Howells
Date: Wed May 25 2022 - 10:26:38 EST


Copy cifs's setup_aio_ctx_iter() and to lib/iov_iter.c and generalise it as
extract_iter_to_iter(). This allocates and sets up an array of bio_vecs
for all the page fragments in an I/O iterator and sets a second supplied
iterator to bvec-type pointing to the array.

This is can be used when setting up for a direct I/O or an asynchronous I/O
to set up a record of the page fragments that are going to contribute to
the buffer, paging them all in to prevent DIO->mmap loops and allowing the
original iterator to be deallocated (it may be on the stack of the caller).

Note that extract_iter_to_iter() doesn't actually need to make a separate
allocation for the page array. It can place the page array at the end of
the bvec array storage, provided it traverses both arrays from the 0th
element forwards.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
---

include/linux/uio.h | 4 ++
lib/iov_iter.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 97 insertions(+)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 739285fe5a2f..5a3c6f296b96 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -236,6 +236,10 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
size_t maxsize, unsigned maxpages, size_t *start);
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);
+ssize_t extract_iter_to_iter(struct iov_iter *orig,
+ size_t orig_len,
+ struct iov_iter *new,
+ struct bio_vec **_bv);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 6dd5330f7a99..8db34ddd23be 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1696,6 +1696,99 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
}
EXPORT_SYMBOL(iov_iter_get_pages_alloc);

+/**
+ * extract_iter_to_iter - Extract the pages from one iterator into another
+ * @orig: The original iterator
+ * @orig_len: The amount of iterator to copy
+ * @new: The iterator to be set up
+ * @_bv: Where to store the allocated bvec table pointer, if allocated
+ *
+ * Extract the page fragments from the given amount of the source iterator and
+ * build up a second iterator that refers to all of those bits. This allows
+ * the original iterator to disposed of.
+ *
+ * If a bvec array is created, the number of pages in the array is returned and
+ * a pointer to the array is saved into *@_bv;
+ */
+ssize_t extract_iter_to_iter(struct iov_iter *orig,
+ size_t orig_len,
+ struct iov_iter *new,
+ struct bio_vec **_bv)
+{
+ struct bio_vec *bv = NULL;
+ struct page **pages;
+ unsigned int cur_npages;
+ unsigned int max_pages = iov_iter_npages(orig, INT_MAX);
+ unsigned int npages = 0;
+ unsigned int i;
+ size_t count = orig_len;
+ ssize_t ret;
+ size_t bv_size, pg_size;
+ size_t start;
+ size_t len;
+
+ *_bv = NULL;
+
+ if (iov_iter_is_kvec(orig) || iov_iter_is_discard(orig)) {
+ *new = *orig;
+ iov_iter_advance(orig, count);
+ return 0;
+ }
+
+ bv_size = array_size(max_pages, sizeof(*bv));
+ bv = kvmalloc(bv_size, GFP_KERNEL);
+ if (!bv)
+ return -ENOMEM;
+
+ /* Put the page list at the end of the bvec list storage. bvec
+ * elements are larger than page pointers, so as long as we work
+ * 0->last, we should be fine.
+ */
+ pg_size = array_size(max_pages, sizeof(*pages));
+ pages = (void *)bv + bv_size - pg_size;
+
+ while (count && npages < max_pages) {
+ ret = iov_iter_get_pages(orig, pages, count, max_pages - npages,
+ &start);
+ if (ret < 0) {
+ pr_err("Couldn't get user pages (rc=%zd)\n", ret);
+ break;
+ }
+
+ if (ret > count) {
+ pr_err("get_pages rc=%zd more than %zu\n", ret, count);
+ break;
+ }
+
+ iov_iter_advance(orig, ret);
+ count -= ret;
+ ret += start;
+ cur_npages = DIV_ROUND_UP(ret, PAGE_SIZE);
+
+ if (npages + cur_npages > max_pages) {
+ pr_err("Out of bvec array capacity (%u vs %u)\n",
+ npages + cur_npages, max_pages);
+ break;
+ }
+
+ for (i = 0; i < cur_npages; i++) {
+ len = ret > PAGE_SIZE ? PAGE_SIZE : ret;
+ bv[npages + i].bv_page = *pages++;
+ bv[npages + i].bv_offset = start;
+ bv[npages + i].bv_len = len - start;
+ ret -= len;
+ start = 0;
+ }
+
+ npages += cur_npages;
+ }
+
+ *_bv = bv;
+ iov_iter_bvec(new, iov_iter_rw(orig), bv, npages, orig_len - count);
+ return npages;
+}
+EXPORT_SYMBOL(extract_iter_to_iter);
+
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
struct iov_iter *i)
{