[RFC 3/4] lightnvm: read from rrpc write buffer if possible

From: Javier GonzÃlez
Date: Thu Feb 04 2016 - 08:09:42 EST


Since writes are buffered in memory, incoming reads must retrieve
buffered pages instead of submitting the I/O to the media.

This patch implements this logic. When a read bio arrives to rrpc, valid
pages from the flash blocks residing in memory are copied. If there are
any "holes" in the bio, a new bio is submitted to the media to retrieve
the necessary pages. The original bio is updated accordingly.

Signed-off-by: Javier GonzÃlez <javier@xxxxxxxxxxxx>
---
drivers/lightnvm/rrpc.c | 451 ++++++++++++++++++++++++++++++++++++-----------
include/linux/lightnvm.h | 1 +
2 files changed, 346 insertions(+), 106 deletions(-)

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index e9fb19d..6348d52 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -827,10 +827,13 @@ static void rrpc_end_io(struct nvm_rq *rqd)
struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
uint8_t nr_pages = rqd->nr_pages;

- if (bio_data_dir(rqd->bio) == WRITE)
+ if (bio_data_dir(rqd->bio) == WRITE) {
rrpc_end_io_write(rrpc, rqd, nr_pages);
- else
+ } else {
+ if (rqd->flags & NVM_IOTYPE_SYNC)
+ return;
rrpc_end_io_read(rrpc, rqd, nr_pages);
+ }

bio_put(rqd->bio);

@@ -842,83 +845,6 @@ static void rrpc_end_io(struct nvm_rq *rqd)
mempool_free(rqd, rrpc->rq_pool);
}

-static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
- struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd,
- unsigned long flags, int nr_pages)
-{
- struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
- struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd);
- struct rrpc_addr *gp;
- sector_t laddr = rrpc_get_laddr(bio);
- int is_gc = flags & NVM_IOTYPE_GC;
- int i;
-
- if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
- nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
- mempool_free(rrqd, rrpc->rrq_pool);
- mempool_free(rqd, rrpc->rq_pool);
- return NVM_IO_REQUEUE;
- }
-
- for (i = 0; i < nr_pages; i++) {
- /* We assume that mapping occurs at 4KB granularity */
- BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
- gp = &rrpc->trans_map[laddr + i];
-
- if (gp->rblk) {
- rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
- gp->addr);
- } else {
- BUG_ON(is_gc);
- rrpc_unlock_laddr(rrpc, r);
- nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
- rqd->dma_ppa_list);
- mempool_free(rrqd, rrpc->rrq_pool);
- mempool_free(rqd, rrpc->rq_pool);
- return NVM_IO_DONE;
- }
-
- brrqd[i].addr = gp;
- }
-
- rqd->opcode = NVM_OP_HBREAD;
-
- return NVM_IO_OK;
-}
-
-static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
- unsigned long flags)
-{
- struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
- int is_gc = flags & NVM_IOTYPE_GC;
- sector_t laddr = rrpc_get_laddr(bio);
- struct rrpc_addr *gp;
-
- if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
- mempool_free(rrqd, rrpc->rrq_pool);
- mempool_free(rqd, rrpc->rq_pool);
- return NVM_IO_REQUEUE;
- }
-
- BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
- gp = &rrpc->trans_map[laddr];
-
- if (gp->rblk) {
- rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
- } else {
- BUG_ON(is_gc);
- rrpc_unlock_rq(rrpc, rrqd);
- mempool_free(rrqd, rrpc->rrq_pool);
- mempool_free(rqd, rrpc->rq_pool);
- return NVM_IO_DONE;
- }
-
- rqd->opcode = NVM_OP_HBREAD;
- rrqd->addr = gp;
-
- return NVM_IO_OK;
-}
-
/*
* Copy data from current bio to block write buffer. This if necessary
* to guarantee durability if a flash block becomes bad before all pages
@@ -1051,14 +977,335 @@ static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
return NVM_IO_DONE;
}

+static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio,
+ struct rrpc_rq *rrqd, unsigned long flags)
+{
+ uint8_t nr_pages = rrpc_get_pages(bio);
+
+ rrqd->nr_pages = nr_pages;
+
+ if (nr_pages > 1)
+ return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages);
+ else
+ return rrpc_write_rq(rrpc, bio, rrqd, flags);
+}
+
+static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
+ struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd,
+ unsigned long flags, int nr_pages)
+{
+ struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+ struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd);
+ struct rrpc_addr *gp;
+ sector_t laddr = rrpc_get_laddr(bio);
+ int is_gc = flags & NVM_IOTYPE_GC;
+ int i;
+
+ if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
+ nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
+ return NVM_IO_REQUEUE;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ /* We assume that mapping occurs at 4KB granularity */
+ BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
+ gp = &rrpc->trans_map[laddr + i];
+
+ if (gp->rblk) {
+ rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
+ gp->addr);
+ } else {
+ BUG_ON(is_gc);
+ rrpc_unlock_laddr(rrpc, r);
+ nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
+ rqd->dma_ppa_list);
+ return NVM_IO_DONE;
+ }
+
+ brrqd[i].addr = gp;
+ }
+
+ rqd->opcode = NVM_OP_HBREAD;
+
+ return NVM_IO_OK;
+}
+
+static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
+ unsigned long flags)
+{
+ struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+ int is_gc = flags & NVM_IOTYPE_GC;
+ sector_t laddr = rrpc_get_laddr(bio);
+ struct rrpc_addr *gp;
+
+ if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd))
+ return NVM_IO_REQUEUE;
+
+ BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
+ gp = &rrpc->trans_map[laddr];
+
+ if (gp->rblk) {
+ rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
+ } else {
+ BUG_ON(is_gc);
+ rrpc_unlock_rq(rrpc, rrqd);
+ return NVM_IO_DONE;
+ }
+
+ rqd->opcode = NVM_OP_HBREAD;
+ rrqd->addr = gp;
+
+ return NVM_IO_OK;
+}
+
+static int rrpc_read_w_buf_entry(struct bio *bio, struct rrpc_block *rblk,
+ struct bvec_iter iter, int entry)
+{
+ struct buf_entry *read_entry;
+ struct bio_vec bv;
+ struct page *page;
+ void *kaddr;
+ void *data;
+ int read = 0;
+
+ lockdep_assert_held(&rblk->w_buf.s_lock);
+
+ spin_lock(&rblk->w_buf.w_lock);
+ if (entry >= rblk->w_buf.cur_mem) {
+ spin_unlock(&rblk->w_buf.w_lock);
+ goto out;
+ }
+ spin_unlock(&rblk->w_buf.w_lock);
+
+ read_entry = &rblk->w_buf.entries[entry];
+ data = read_entry->data;
+
+ bv = bio_iter_iovec(bio, iter);
+ page = bv.bv_page;
+ kaddr = kmap_atomic(page);
+ memcpy(kaddr + bv.bv_offset, data, RRPC_EXPOSED_PAGE_SIZE);
+ kunmap_atomic(kaddr);
+ read++;
+
+out:
+ return read;
+}
+
+static int rrpc_read_from_w_buf(struct rrpc *rrpc, struct nvm_rq *rqd,
+ struct rrpc_buf_rq *brrqd, unsigned long *read_bitmap)
+{
+ struct nvm_dev *dev = rrpc->dev;
+ struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+ struct rrpc_addr *addr;
+ struct bio *bio = rqd->bio;
+ struct bvec_iter iter = bio->bi_iter;
+ struct rrpc_block *rblk;
+ unsigned long blk_id;
+ int nr_pages = rqd->nr_pages;
+ int left = nr_pages;
+ int read = 0;
+ int entry;
+ int i;
+
+ if (nr_pages != bio->bi_vcnt)
+ goto out;
+
+ if (nr_pages == 1) {
+ rblk = rrqd->addr->rblk;
+
+ /* If the write buffer exists, the block is open in memory */
+ spin_lock(&rblk->w_buf.s_lock);
+ atomic_inc(&rblk->w_buf.refs);
+ if (rblk->w_buf.entries) {
+ blk_id = rblk->parent->id;
+ entry = rrqd->addr->addr -
+ (blk_id * dev->sec_per_pg * dev->pgs_per_blk);
+
+ read = rrpc_read_w_buf_entry(bio, rblk, iter, entry);
+
+ left -= read;
+ WARN_ON(test_and_set_bit(0, read_bitmap));
+ }
+ bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE);
+
+ atomic_dec(&rblk->w_buf.refs);
+ spin_unlock(&rblk->w_buf.s_lock);
+
+ goto out;
+ }
+
+ /* Iterate through all pages and copy those that are found in the write
+ * buffer. We will complete the holes (if any) with a intermediate bio
+ * later on
+ */
+ for (i = 0; i < nr_pages; i++) {
+ addr = brrqd[i].addr;
+ rblk = addr->rblk;
+
+ /* If the write buffer exists, the block is open in memory */
+ spin_lock(&rblk->w_buf.s_lock);
+ atomic_inc(&rblk->w_buf.refs);
+ if (rblk->w_buf.entries) {
+ blk_id = rblk->parent->id;
+ entry = addr->addr - (blk_id * dev->sec_per_pg *
+ dev->pgs_per_blk);
+
+ read = rrpc_read_w_buf_entry(bio, rblk, iter, entry);
+
+ left -= read;
+ WARN_ON(test_and_set_bit(i, read_bitmap));
+ }
+ bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE);
+
+ atomic_dec(&rblk->w_buf.refs);
+ spin_unlock(&rblk->w_buf.s_lock);
+ }
+
+out:
+ return left;
+}
+
+static int rrpc_submit_read_io(struct rrpc *rrpc, struct bio *bio,
+ struct nvm_rq *rqd, unsigned long flags)
+{
+ struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+ int err;
+
+ err = nvm_submit_io(rrpc->dev, rqd);
+ if (err) {
+ pr_err("rrpc: I/O submission failed: %d\n", err);
+ bio_put(bio);
+ if (!(flags & NVM_IOTYPE_GC)) {
+ rrpc_unlock_rq(rrpc, rrqd);
+ if (rqd->nr_pages > 1)
+ nvm_dev_dma_free(rrpc->dev,
+ rqd->ppa_list, rqd->dma_ppa_list);
+ }
+ return NVM_IO_ERR;
+ }
+
+ return NVM_IO_OK;
+}
+
+static int rrpc_fill_partial_read_bio(struct rrpc *rrpc, struct bio *bio,
+ unsigned long *read_bitmap, struct nvm_rq *rqd,
+ struct rrpc_buf_rq *brrqd, uint8_t nr_pages)
+{
+ struct bio *new_bio;
+ struct page *page;
+ struct bio_vec src_bv, dst_bv;
+ void *src_p, *dst_p;
+ int nr_holes = nr_pages - bitmap_weight(read_bitmap, nr_pages);
+ int hole;
+ int i = 0;
+ int ret;
+ DECLARE_COMPLETION_ONSTACK(wait);
+
+ new_bio = bio_alloc(GFP_KERNEL, nr_holes);
+ if (!new_bio) {
+ pr_err("nvm: rrpc: could not alloc read bio\n");
+ return NVM_IO_ERR;
+ }
+
+ hole = find_first_zero_bit(read_bitmap, nr_pages);
+ do {
+ page = mempool_alloc(rrpc->page_pool, GFP_KERNEL);
+ if (!page) {
+ bio_put(new_bio);
+ pr_err("nvm: rrpc: could not alloc read page\n");
+ goto err;
+ }
+
+ ret = bio_add_page(new_bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
+ if (ret != RRPC_EXPOSED_PAGE_SIZE) {
+ pr_err("nvm: rrpc: could not add page to bio\n");
+ mempool_free(page, rrpc->page_pool);
+ goto err;
+ }
+
+ rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
+ brrqd[hole].addr->addr);
+
+ i++;
+ hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1);
+ } while (hole != nr_pages);
+
+ if (nr_holes != new_bio->bi_vcnt) {
+ pr_err("rrpc: malformed bio\n");
+ goto err;
+ }
+
+ new_bio->bi_iter.bi_sector = bio->bi_iter.bi_sector;
+ new_bio->bi_rw = READ;
+ new_bio->bi_private = &wait;
+ new_bio->bi_end_io = rrpc_end_sync_bio;
+
+ rqd->flags |= NVM_IOTYPE_SYNC;
+ rqd->bio = new_bio;
+ rqd->nr_pages = nr_holes;
+
+ rrpc_submit_read_io(rrpc, new_bio, rqd, rqd->flags);
+ wait_for_completion_io(&wait);
+
+ if (new_bio->bi_error)
+ goto err;
+
+ /* Fill the holes in the original bio */
+ i = 0;
+ hole = find_first_zero_bit(read_bitmap, nr_pages);
+ do {
+ src_bv = new_bio->bi_io_vec[i];
+ dst_bv = bio->bi_io_vec[hole];
+
+ src_p = kmap_atomic(src_bv.bv_page);
+ dst_p = kmap_atomic(dst_bv.bv_page);
+
+ memcpy(dst_p + dst_bv.bv_offset,
+ src_p + src_bv.bv_offset,
+ RRPC_EXPOSED_PAGE_SIZE);
+
+ kunmap_atomic(src_p);
+ kunmap_atomic(dst_p);
+
+ mempool_free(&src_bv.bv_page, rrpc->page_pool);
+
+ i++;
+ hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1);
+ } while (hole != nr_pages);
+
+ bio_put(new_bio);
+
+ /* Complete the original bio and associated request */
+ rqd->flags &= ~NVM_IOTYPE_SYNC;
+ rqd->bio = bio;
+ rqd->nr_pages = nr_pages;
+
+ bio_endio(bio);
+ rrpc_end_io(rqd);
+ return NVM_IO_OK;
+
+err:
+ /* Free allocated pages in new bio */
+ for (i = 0; i < new_bio->bi_vcnt; i++) {
+ src_bv = new_bio->bi_io_vec[i];
+ mempool_free(&src_bv.bv_page, rrpc->page_pool);
+ }
+ bio_endio(new_bio);
+ return NVM_IO_ERR;
+}
+
static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio,
struct rrpc_rq *rrqd, unsigned long flags)
{
struct nvm_rq *rqd;
struct rrpc_buf_rq brrqd[rrpc->max_write_pgs];
+ unsigned long read_bitmap; /* Max 64 ppas per request */
+ uint8_t left;
uint8_t nr_pages = rrpc_get_pages(bio);
int err;

+ bitmap_zero(&read_bitmap, nr_pages);
+
rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
if (!rqd) {
pr_err_ratelimited("rrpc: not able to queue bio.");
@@ -1073,22 +1320,25 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio,
&rqd->dma_ppa_list);
if (!rqd->ppa_list) {
pr_err("rrpc: not able to allocate ppa list\n");
- mempool_free(rrqd, rrpc->rrq_pool);
mempool_free(rqd, rrpc->rq_pool);
+ mempool_free(rrqd, rrpc->rrq_pool);
return NVM_IO_ERR;
}

err = rrpc_read_ppalist_rq(rrpc, bio, rqd, brrqd, flags,
nr_pages);
if (err) {
- mempool_free(rrqd, rrpc->rrq_pool);
mempool_free(rqd, rrpc->rq_pool);
+ mempool_free(rrqd, rrpc->rrq_pool);
return err;
}
} else {
err = rrpc_read_rq(rrpc, bio, rqd, flags);
- if (err)
+ if (err) {
+ mempool_free(rrqd, rrpc->rrq_pool);
+ mempool_free(rqd, rrpc->rq_pool);
return err;
+ }
}

bio_get(bio);
@@ -1097,33 +1347,22 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio,
rqd->nr_pages = rrqd->nr_pages = nr_pages;
rqd->flags = flags;

- err = nvm_submit_io(rrpc->dev, rqd);
- if (err) {
- pr_err("rrpc: I/O submission failed: %d\n", err);
- bio_put(bio);
- if (!(flags & NVM_IOTYPE_GC)) {
- rrpc_unlock_rq(rrpc, rrqd);
- if (rqd->nr_pages > 1)
- nvm_dev_dma_free(rrpc->dev,
- rqd->ppa_list, rqd->dma_ppa_list);
- }
+ left = rrpc_read_from_w_buf(rrpc, rqd, brrqd, &read_bitmap);
+ if (left == 0) {
+ bio_endio(bio);
+ rrpc_end_io(rqd);
+ return NVM_IO_OK;
+ } else if (left < 0)
return NVM_IO_ERR;
- }

- return NVM_IO_OK;
-}
+ if (bitmap_empty(&read_bitmap, nr_pages))
+ return rrpc_submit_read_io(rrpc, bio, rqd, flags);

-static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio,
- struct rrpc_rq *rrqd, unsigned long flags)
-{
- uint8_t nr_pages = rrpc_get_pages(bio);
-
- rrqd->nr_pages = nr_pages;
-
- if (nr_pages > 1)
- return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages);
- else
- return rrpc_write_rq(rrpc, bio, rrqd, flags);
+ /* The read bio could not be completely read from the write buffer. This
+ * case only occurs when several pages are sent in a single bio
+ */
+ return rrpc_fill_partial_read_bio(rrpc, bio, &read_bitmap, rqd, brrqd,
+ nr_pages);
}

static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index eda9743..ae26ced 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -11,6 +11,7 @@ enum {

NVM_IOTYPE_NONE = 0,
NVM_IOTYPE_GC = 1,
+ NVM_IOTYPE_SYNC = 2,
};

#define NVM_BLK_BITS (16)
--
2.1.4