[PATCH 4/7] block: pass iov_iter to the BLOCK_PC mapping functions

From: Christoph Hellwig
Date: Sun Jan 18 2015 - 10:18:37 EST


From: Kent Overstreet <kmo@xxxxxxxxxxxxx>

Make use of a new interface provided by iov_iter, backed by
scatter-gather list of iovec, instead of the old interface based on
sg_iovec. Also use iov_iter_advance() instead of manual iteration.

This commit should contain only literal replacements, without
functional changes.

Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Cc: Jens Axboe <axboe@xxxxxxxxx>
Cc: Doug Gilbert <dgilbert@xxxxxxxxxxxx>
Cc: "James E.J. Bottomley" <JBottomley@xxxxxxxxxxxxx>
Signed-off-by: Kent Overstreet <kmo@xxxxxxxxxxxxx>
[dpark: add more description in commit message]
Signed-off-by: Dongsu Park <dongsu.park@xxxxxxxxxxxxxxxx>
[hch: fixed to do a deep clone of the iov_iter, and to properly use
the iov_iter direction]
Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
block/bio.c | 146 +++++++++++++++++++++++--------------------------
block/blk-map.c | 38 ++++++-------
block/scsi_ioctl.c | 17 ++----
drivers/scsi/sg.c | 15 ++---
include/linux/bio.h | 7 +--
include/linux/blkdev.h | 4 +-
6 files changed, 101 insertions(+), 126 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 0895f69..7d8c655 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -28,7 +28,6 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
-#include <scsi/sg.h> /* for struct sg_iovec */

#include <trace/events/block.h>

@@ -1022,21 +1021,11 @@ void bio_copy_data(struct bio *dst, struct bio *src)
EXPORT_SYMBOL(bio_copy_data);

struct bio_map_data {
- int nr_sgvecs;
int is_our_pages;
- struct sg_iovec sgvecs[];
+ struct iov_iter iter;
+ struct iovec iov[];
};

-static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
- const struct sg_iovec *iov, int iov_count,
- int is_our_pages)
-{
- memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
- bmd->nr_sgvecs = iov_count;
- bmd->is_our_pages = is_our_pages;
- bio->bi_private = bmd;
-}
-
static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
gfp_t gfp_mask)
{
@@ -1044,36 +1033,33 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
return NULL;

return kmalloc(sizeof(struct bio_map_data) +
- sizeof(struct sg_iovec) * iov_count, gfp_mask);
+ sizeof(struct iovec) * iov_count, gfp_mask);
}

-static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
- int to_user, int from_user)
+static int __bio_copy_iov(struct bio *bio, const struct iov_iter *iter,
+ int to_user, int from_user)
{
int ret = 0, i;
struct bio_vec *bvec;
- int iov_idx = 0;
- unsigned int iov_off = 0;
+ struct iov_iter iov_iter = *iter;

bio_for_each_segment_all(bvec, bio, i) {
char *bv_addr = page_address(bvec->bv_page);
unsigned int bv_len = bvec->bv_len;

- while (bv_len && iov_idx < iov_count) {
- unsigned int bytes;
- char __user *iov_addr;
-
- bytes = min_t(unsigned int,
- iov[iov_idx].iov_len - iov_off, bv_len);
- iov_addr = iov[iov_idx].iov_base + iov_off;
+ while (bv_len && iov_iter.count) {
+ struct iovec iov = iov_iter_iovec(&iov_iter);
+ unsigned int bytes = min_t(unsigned int, bv_len,
+ iov.iov_len);

if (!ret) {
if (to_user)
- ret = copy_to_user(iov_addr, bv_addr,
- bytes);
+ ret = copy_to_user(iov.iov_base,
+ bv_addr, bytes);

if (from_user)
- ret = copy_from_user(bv_addr, iov_addr,
+ ret = copy_from_user(bv_addr,
+ iov.iov_base,
bytes);

if (ret)
@@ -1082,13 +1068,7 @@ static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_c

bv_len -= bytes;
bv_addr += bytes;
- iov_addr += bytes;
- iov_off += bytes;
-
- if (iov[iov_idx].iov_len == iov_off) {
- iov_idx++;
- iov_off = 0;
- }
+ iov_iter_advance(&iov_iter, bytes);
}
}

@@ -1122,7 +1102,7 @@ int bio_uncopy_user(struct bio *bio)
* don't copy into a random user address space, just free.
*/
if (current->mm)
- ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
+ ret = __bio_copy_iov(bio, &bmd->iter,
bio_data_dir(bio) == READ, 0);
if (bmd->is_our_pages)
bio_free_pages(bio);
@@ -1135,12 +1115,10 @@ EXPORT_SYMBOL(bio_uncopy_user);

/**
* bio_copy_user_iov - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iov: the iovec.
- * @iov_count: number of elements in the iovec
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
+ * @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
@@ -1148,24 +1126,25 @@ EXPORT_SYMBOL(bio_uncopy_user);
*/
struct bio *bio_copy_user_iov(struct request_queue *q,
struct rq_map_data *map_data,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
+ const struct iov_iter *iter,
+ gfp_t gfp_mask)
{
struct bio_map_data *bmd;
struct page *page;
struct bio *bio;
int i, ret;
int nr_pages = 0;
- unsigned int len = 0;
+ unsigned int len = iter->count;
unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;

- for (i = 0; i < iov_count; i++) {
+ for (i = 0; i < iter->nr_segs; i++) {
unsigned long uaddr;
unsigned long end;
unsigned long start;

- uaddr = (unsigned long)iov[i].iov_base;
- end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ uaddr = (unsigned long) iter->iov[i].iov_base;
+ end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
start = uaddr >> PAGE_SHIFT;

/*
@@ -1175,22 +1154,31 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
return ERR_PTR(-EINVAL);

nr_pages += end - start;
- len += iov[i].iov_len;
}

if (offset)
nr_pages++;

- bmd = bio_alloc_map_data(iov_count, gfp_mask);
+ bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
if (!bmd)
return ERR_PTR(-ENOMEM);

+ /*
+ * We need to do a deep copy of the iov_iter including the iovecs.
+ * The caller provided iov might point to an on-stack or otherwise
+ * shortlived one.
+ */
+ bmd->is_our_pages = map_data ? 0 : 1;
+ memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
+ iov_iter_init(&bmd->iter, iter->type, bmd->iov,
+ iter->nr_segs, iter->count);
+
ret = -ENOMEM;
bio = bio_kmalloc(gfp_mask, nr_pages);
if (!bio)
goto out_bmd;

- if (!write_to_vm)
+ if (iter->type & WRITE)
bio->bi_rw |= REQ_WRITE;

ret = 0;
@@ -1238,14 +1226,14 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
/*
* success
*/
- if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
+ if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
(map_data && map_data->from_user)) {
- ret = __bio_copy_iov(bio, iov, iov_count, 0, 1);
+ ret = __bio_copy_iov(bio, iter, 0, 1);
if (ret)
goto cleanup;
}

- bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
+ bio->bi_private = bmd;
return bio;
cleanup:
if (!map_data)
@@ -1258,19 +1246,21 @@ out_bmd:

static struct bio *__bio_map_user_iov(struct request_queue *q,
struct block_device *bdev,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
+ const struct iov_iter *iter,
+ gfp_t gfp_mask)
{
- int i, j;
+ int j;
int nr_pages = 0;
struct page **pages;
struct bio *bio;
int cur_page = 0;
int ret, offset;
+ struct iov_iter i;
+ struct iovec iov;

- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
- unsigned long len = iov[i].iov_len;
+ iov_for_each(iov, i, *iter) {
+ unsigned long uaddr = (unsigned long) iov.iov_base;
+ unsigned long len = iov.iov_len;
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;

@@ -1300,16 +1290,17 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
if (!pages)
goto out;

- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
- unsigned long len = iov[i].iov_len;
+ iov_for_each(iov, i, *iter) {
+ unsigned long uaddr = (unsigned long) iov.iov_base;
+ unsigned long len = iov.iov_len;
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
const int local_nr_pages = end - start;
const int page_limit = cur_page + local_nr_pages;

ret = get_user_pages_fast(uaddr, local_nr_pages,
- write_to_vm, &pages[cur_page]);
+ (iter->type & WRITE) != WRITE,
+ &pages[cur_page]);
if (ret < local_nr_pages) {
ret = -EFAULT;
goto out_unmap;
@@ -1349,7 +1340,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
/*
* set data direction, and check if mapped pages need bouncing
*/
- if (!write_to_vm)
+ if (iter->type & WRITE)
bio->bi_rw |= REQ_WRITE;

bio->bi_bdev = bdev;
@@ -1357,10 +1348,10 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
return bio;

out_unmap:
- for (i = 0; i < nr_pages; i++) {
- if(!pages[i])
+ for (j = 0; j < nr_pages; j++) {
+ if (!pages[j])
break;
- page_cache_release(pages[i]);
+ page_cache_release(pages[j]);
}
out:
kfree(pages);
@@ -1369,25 +1360,22 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
}

/**
- * bio_map_user_iov - map user sg_iovec table into bio
- * @q: the struct request_queue for the bio
- * @bdev: destination block device
- * @iov: the iovec.
- * @iov_count: number of elements in the iovec
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
+ * bio_map_user_iov - map user iovec into bio
+ * @q: the struct request_queue for the bio
+ * @bdev: destination block device
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
*
* Map the user space address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
+ const struct iov_iter *iter,
+ gfp_t gfp_mask)
{
struct bio *bio;

- bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
- gfp_mask);
+ bio = __bio_map_user_iov(q, bdev, iter, gfp_mask);
if (IS_ERR(bio))
return bio;

diff --git a/block/blk-map.c b/block/blk-map.c
index 152a5fe..30e6bb8 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -5,7 +5,7 @@
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
-#include <scsi/sg.h> /* for struct sg_iovec */
+#include <linux/uio.h>

#include "blk.h"

@@ -44,9 +44,7 @@ static int __blk_rq_unmap_user(struct bio *bio)
* @q: request queue where request should be inserted
* @rq: request to map data to
* @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iov: pointer to the iovec
- * @iov_count: number of elements in the iovec
- * @len: I/O byte count
+ * @iter: iovec iterator
* @gfp_mask: memory allocation flags
*
* Description:
@@ -63,20 +61,21 @@ static int __blk_rq_unmap_user(struct bio *bio)
* unmapping.
*/
int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
- struct rq_map_data *map_data, const struct sg_iovec *iov,
- int iov_count, unsigned int len, gfp_t gfp_mask)
+ struct rq_map_data *map_data,
+ const struct iov_iter *iter, gfp_t gfp_mask)
{
struct bio *bio;
- int i, read = rq_data_dir(rq) == READ;
int unaligned = 0;
+ struct iov_iter i;
+ struct iovec iov;

- if (!iov || iov_count <= 0)
+ if (!iter || !iter->count)
return -EINVAL;

- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
+ iov_for_each(iov, i, *iter) {
+ unsigned long uaddr = (unsigned long) iov.iov_base;

- if (!iov[i].iov_len)
+ if (!iov.iov_len)
return -EINVAL;

/*
@@ -86,16 +85,15 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
unaligned = 1;
}

- if (unaligned || (q->dma_pad_mask & len) || map_data)
- bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
- gfp_mask);
+ if (unaligned || (q->dma_pad_mask & iter->count) || map_data)
+ bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
else
- bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
+ bio = bio_map_user_iov(q, NULL, iter, gfp_mask);

if (IS_ERR(bio))
return PTR_ERR(bio);

- if (bio->bi_iter.bi_size != len) {
+ if (bio->bi_iter.bi_size != iter->count) {
/*
* Grab an extra reference to this bio, as bio_unmap_user()
* expects to be able to drop it twice as it happens on the
@@ -121,12 +119,14 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
struct rq_map_data *map_data, void __user *ubuf,
unsigned long len, gfp_t gfp_mask)
{
- struct sg_iovec iov;
+ struct iovec iov;
+ struct iov_iter i;

- iov.iov_base = (void __user *)ubuf;
+ iov.iov_base = ubuf;
iov.iov_len = len;
+ iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len);

- return blk_rq_map_user_iov(q, rq, map_data, &iov, 1, len, gfp_mask);
+ return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
}
EXPORT_SYMBOL(blk_rq_map_user);

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 28163fa..e1f71c3 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -332,7 +332,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,

ret = 0;
if (hdr->iovec_count) {
- size_t iov_data_len;
+ struct iov_iter i;
struct iovec *iov = NULL;

ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
@@ -342,20 +342,11 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
goto out_free_cdb;
}

- iov_data_len = ret;
- ret = 0;
-
/* SG_IO howto says that the shorter of the two wins */
- if (hdr->dxfer_len < iov_data_len) {
- hdr->iovec_count = iov_shorten(iov,
- hdr->iovec_count,
- hdr->dxfer_len);
- iov_data_len = hdr->dxfer_len;
- }
+ iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count,
+ min_t(unsigned, ret, hdr->dxfer_len));

- ret = blk_rq_map_user_iov(q, rq, NULL, (struct sg_iovec *) iov,
- hdr->iovec_count,
- iov_data_len, GFP_KERNEL);
+ ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL);
kfree(iov);
} else if (hdr->dxfer_len)
ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index b14f64c..4052e59 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1734,22 +1734,19 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
}

if (iov_count) {
- int len, size = sizeof(struct sg_iovec) * iov_count;
+ int size = sizeof(struct iovec) * iov_count;
struct iovec *iov;
+ struct iov_iter i;

iov = memdup_user(hp->dxferp, size);
if (IS_ERR(iov))
return PTR_ERR(iov);

- len = iov_length(iov, iov_count);
- if (hp->dxfer_len < len) {
- iov_count = iov_shorten(iov, iov_count, hp->dxfer_len);
- len = hp->dxfer_len;
- }
+ iov_iter_init(&i, rw, iov, iov_count,
+ min_t(size_t, hp->dxfer_len,
+ iov_length(iov, iov_count)));

- res = blk_rq_map_user_iov(q, rq, md, (struct sg_iovec *)iov,
- iov_count,
- len, GFP_ATOMIC);
+ res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC);
kfree(iov);
} else
res = blk_rq_map_user(q, rq, md, hp->dxferp,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d0d6735..0d6105b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -428,11 +428,10 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
extern int bio_get_nr_vecs(struct block_device *);
-struct sg_iovec;
struct rq_map_data;
extern struct bio *bio_map_user_iov(struct request_queue *,
struct block_device *,
- const struct sg_iovec *, int, int, gfp_t);
+ const struct iov_iter *, gfp_t);
extern void bio_unmap_user(struct bio *);
extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
gfp_t);
@@ -462,8 +461,8 @@ extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);

extern struct bio *bio_copy_user_iov(struct request_queue *,
struct rq_map_data *,
- const struct sg_iovec *,
- int, int, gfp_t);
+ const struct iov_iter *,
+ gfp_t);
extern int bio_uncopy_user(struct bio *);
void zero_fill_bio(struct bio *bio);
extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 92f4b4b..9621b73 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -850,8 +850,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *,
extern int blk_rq_unmap_user(struct bio *);
extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
- struct rq_map_data *, const struct sg_iovec *,
- int, unsigned int, gfp_t);
+ struct rq_map_data *, const struct iov_iter *,
+ gfp_t);
extern int blk_execute_rq(struct request_queue *, struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/