[PATCH 3/3] block: loop: use vfs ITER_BVEC to read/write backing file

From: Ming Lei
Date: Sun Mar 22 2015 - 04:15:36 EST


Now loop code gets simplified a lot, and becomes more clean.

Also one extra page copy is avoided for READ in case of none
transfer.

Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxxxxx>
---
drivers/block/loop.c | 285 ++++++++++++++++++++++----------------------------
drivers/block/loop.h | 3 +
2 files changed, 127 insertions(+), 161 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c082cf7..f3c470a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -75,6 +75,7 @@
#include <linux/sysfs.h>
#include <linux/miscdevice.h>
#include <linux/falloc.h>
+#include <linux/uio.h>
#include "loop.h"

#include <asm/uaccess.h>
@@ -87,26 +88,51 @@ static int part_shift;

static struct workqueue_struct *loop_wq;

-/*
- * Transfer functions
- */
-static int transfer_none(struct loop_device *lo, int cmd,
- struct page *raw_page, unsigned raw_off,
- struct page *loop_page, unsigned loop_off,
- int size, sector_t real_block)
+struct ibvec_rw_data {
+ struct bio_vec *bvec;
+ unsigned long nr_segs;
+ size_t count;
+ int rw;
+ loff_t pos;
+};
+
+static ssize_t vfs_ibvec_rw(struct loop_device *lo, struct ibvec_rw_data *data)
{
- char *raw_buf = kmap_atomic(raw_page) + raw_off;
- char *loop_buf = kmap_atomic(loop_page) + loop_off;
+ struct iov_iter iter;
+ struct file *file = lo->lo_backing_file;
+
+ iov_iter_bvec(&iter, ITER_BVEC | data->rw, data->bvec,
+ data->nr_segs, data->count);

- if (cmd == READ)
- memcpy(loop_buf, raw_buf, size);
+ if (data->rw == READ)
+ return vfs_iter_read(file, &iter, &data->pos);
else
- memcpy(raw_buf, loop_buf, size);
+ return vfs_iter_write(file, &iter, &data->pos);
+}

- kunmap_atomic(loop_buf);
- kunmap_atomic(raw_buf);
- cond_resched();
- return 0;
+static ssize_t vfs_rw(struct loop_device *lo, struct ibvec_rw_data *data)
+{
+ char *buf;
+ struct bio_vec *bvec;
+ struct file *file;
+ int ret;
+
+ if (lo->vfs_rw_iter)
+ return vfs_ibvec_rw(lo, data);
+
+ /* fallback to vfs_read and vfs_write */
+ BUG_ON(data->nr_segs != 1);
+
+ file = lo->lo_backing_file;
+ bvec = data->bvec;
+ buf = kmap(bvec->bv_page) + bvec->bv_offset;
+
+ if (data->rw == READ)
+ ret = vfs_read(file, buf, bvec->bv_len, &data->pos);
+ else
+ ret = vfs_write(file, buf, bvec->bv_len, &data->pos);
+ kunmap(bvec->bv_page);
+ return ret;
}

static int transfer_xor(struct loop_device *lo, int cmd,
@@ -147,7 +173,6 @@ static int xor_init(struct loop_device *lo, const struct loop_info64 *info)

static struct loop_func_table none_funcs = {
.number = LO_CRYPT_NONE,
- .transfer = transfer_none,
};

static struct loop_func_table xor_funcs = {
@@ -214,74 +239,45 @@ lo_do_transfer(struct loop_device *lo, int cmd,
struct page *lpage, unsigned loffs,
int size, sector_t rblock)
{
- if (unlikely(!lo->transfer))
- return 0;
-
return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
}

/**
- * __do_lo_send_write - helper for writing data to a loop device
- *
- * This helper just factors out common code between do_lo_send_direct_write()
- * and do_lo_send_write().
- */
-static int __do_lo_send_write(struct file *file,
- u8 *buf, const int len, loff_t pos)
-{
- ssize_t bw;
- mm_segment_t old_fs = get_fs();
-
- file_start_write(file);
- set_fs(get_ds());
- bw = file->f_op->write(file, buf, len, &pos);
- set_fs(old_fs);
- file_end_write(file);
- if (likely(bw == len))
- return 0;
- printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
- (unsigned long long)pos, len);
- if (bw >= 0)
- bw = -EIO;
- return bw;
-}
-
-/**
- * do_lo_send_direct_write - helper for writing data to a loop device
+ * do_lo_send_write - helper for writing data to a loop device
*
- * This is the fast, non-transforming version that does not need double
- * buffering.
*/
-static int do_lo_send_direct_write(struct loop_device *lo,
- struct bio_vec *bvec, loff_t pos, struct page *page)
+static ssize_t do_lo_send_write(struct loop_device *lo,
+ struct loop_cmd *cmd,
+ struct bio_vec *bvec, loff_t pos)
{
- ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
- kmap(bvec->bv_page) + bvec->bv_offset,
- bvec->bv_len, pos);
- kunmap(bvec->bv_page);
- cond_resched();
- return bw;
-}
+ ssize_t ret;
+ struct ibvec_rw_data data;
+ struct bio_vec r_bvec = *bvec;
+ struct page *r_page = cmd->trans_page;
+
+ if (r_page != NULL) {
+ ret = lo_do_transfer(lo, WRITE, r_page, 0,
+ bvec->bv_page, bvec->bv_offset,
+ bvec->bv_len, pos >> 9);
+ if (unlikely(ret))
+ goto fail;

-/**
- * do_lo_send_write - helper for writing data to a loop device
- *
- * This is the slow, transforming version that needs to double buffer the
- * data as it cannot do the transformations in place without having direct
- * access to the destination pages of the backing file.
- */
-static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
- loff_t pos, struct page *page)
-{
- int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
- bvec->bv_offset, bvec->bv_len, pos >> 9);
- if (likely(!ret)) {
- ret = __do_lo_send_write(lo->lo_backing_file,
- kmap(page), bvec->bv_len,
- pos);
- kunmap(page);
- return ret;
+ r_bvec.bv_page = r_page;
+ r_bvec.bv_offset = 0;
}
+
+ data.bvec = &r_bvec;
+ data.count = bvec->bv_len;
+ data.pos = pos;
+ data.nr_segs = 1;
+ data.rw = WRITE;
+
+ ret = vfs_rw(lo, &data);
+ if (ret < 0)
+ goto fail;
+ return ret;
+
+ fail:
printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, "
"length %i.\n", (unsigned long long)pos, bvec->bv_len);
if (ret > 0)
@@ -289,108 +285,64 @@ static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
return ret;
}

-static int lo_send(struct loop_device *lo, struct loop_cmd *cmd, loff_t pos)
+static ssize_t lo_send(struct loop_device *lo, struct loop_cmd *cmd,
+ loff_t pos)
{
- int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
- struct page *page);
struct bio_vec bvec;
struct req_iterator iter;
- struct page *page = NULL;
int ret = 0;
struct request *rq = cmd->rq;

- if (lo->transfer != transfer_none) {
- page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
- if (unlikely(!page))
- goto fail;
- do_lo_send = do_lo_send_write;
- } else {
- do_lo_send = do_lo_send_direct_write;
- }
-
rq_for_each_segment(bvec, rq, iter) {
- ret = do_lo_send(lo, &bvec, pos, page);
+ ret = do_lo_send_write(lo, cmd, &bvec, pos);
if (ret < 0)
break;
pos += bvec.bv_len;
}
- if (page) {
- __free_page(page);
- }
-out:
- return ret;
-fail:
- printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
- ret = -ENOMEM;
- goto out;
-}
-
-struct lo_read_data {
- struct loop_device *lo;
- struct page *page;
- unsigned offset;
- int bsize;
-};
-
-static int
-lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
- struct splice_desc *sd)
-{
- struct lo_read_data *p = sd->u.data;
- struct loop_device *lo = p->lo;
- struct page *page = buf->page;
- sector_t IV;
- int size;
-
- IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
- (buf->offset >> 9);
- size = sd->len;
- if (size > p->bsize)
- size = p->bsize;
-
- if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
- printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n",
- page->index);
- size = -EINVAL;
- }

- flush_dcache_page(p->page);
-
- if (size > 0)
- p->offset += size;
-
- return size;
-}
-
-static int
-lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
-{
- return __splice_from_pipe(pipe, sd, lo_splice_actor);
+ if (ret > 0)
+ ret = 0;
+ return ret;
}

static ssize_t
-do_lo_receive(struct loop_device *lo,
- struct bio_vec *bvec, int bsize, loff_t pos)
+do_lo_receive(struct loop_device *lo, struct loop_cmd *cmd,
+ struct bio_vec *bvec, loff_t pos)
{
- struct lo_read_data cookie;
- struct splice_desc sd;
- struct file *file;
ssize_t retval;
+ struct ibvec_rw_data data;
+ struct bio_vec r_bvec = *bvec;
+ bool trans = false;
+
+ if (cmd->trans_page != NULL) {
+ r_bvec.bv_page = cmd->trans_page;
+ r_bvec.bv_offset = 0;
+ trans = true;
+ }

- cookie.lo = lo;
- cookie.page = bvec->bv_page;
- cookie.offset = bvec->bv_offset;
- cookie.bsize = bsize;
-
- sd.len = 0;
- sd.total_len = bvec->bv_len;
- sd.flags = 0;
- sd.pos = pos;
- sd.u.data = &cookie;
+ data.bvec = &r_bvec;
+ data.count = r_bvec.bv_len;
+ data.pos = pos;
+ data.nr_segs = 1;
+ data.rw = READ;

- file = lo->lo_backing_file;
- retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
+ retval = vfs_rw(lo, &data);
+ if (retval < 0)
+ goto out;

+ if (trans) {
+ retval = lo_do_transfer(lo, READ, r_bvec.bv_page, 0,
+ bvec->bv_page, bvec->bv_offset, retval,
+ pos >> 9);
+ if (retval < 0)
+ goto out;
+ flush_dcache_page(bvec->bv_page);
+ }
+out:
+ if (retval < 0)
+ printk_ratelimited(KERN_ERR "loop: transfer error block "
+ "%lld ret=%ld\n", pos >> 9,
+ (long)retval);
return retval;
}

@@ -401,10 +353,9 @@ lo_receive(struct loop_device *lo, struct loop_cmd *cmd, loff_t pos)
struct req_iterator iter;
ssize_t s;
struct request *rq = cmd->rq;
- int bsize = lo->lo_blocksize;

rq_for_each_segment(bvec, rq, iter) {
- s = do_lo_receive(lo, &bvec, bsize, pos);
+ s = do_lo_receive(lo, cmd, &bvec, pos);
if (s < 0)
return s;

@@ -458,12 +409,22 @@ static inline int lo_rw(struct loop_device *lo, struct loop_cmd *cmd,
loff_t pos, int rw)
{
int ret;
+ struct page *page = NULL;
+
+ if (lo->transfer != NULL) {
+ page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
+ if (unlikely(!page))
+ return -ENOMEM;
+ }
+ cmd->trans_page = page;

if (rw == READ)
ret = lo_receive(lo, cmd, pos);
else
ret = lo_send(lo, cmd, pos);

+ if (cmd->trans_page)
+ __free_page(cmd->trans_page);
return ret;
}

@@ -804,7 +765,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
- lo->transfer = transfer_none;
+ lo->transfer = NULL;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
@@ -813,6 +774,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_flush(lo->lo_queue, REQ_FLUSH);

+ lo->vfs_rw_iter = file->f_op->read_iter && file->f_op->write_iter;
+
set_capacity(lo->lo_disk, size);
bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 301c27f..981c8b9 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -63,12 +63,15 @@ struct loop_device {
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk;
+
+ bool vfs_rw_iter;
};

struct loop_cmd {
struct work_struct read_work;
struct request *rq;
struct list_head list;
+ struct page *trans_page; /* only for encrypted transfer */
};

/* Support for loadable transfer modules */
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/