Re: [PATCH v2 2/2] lightnvm: add non-continuous lun target creation support

From: Matias BjÃrling
Date: Wed Jan 27 2016 - 04:44:21 EST


On 01/26/2016 01:33 PM, Wenwei Tao wrote:
> When create a target, we specify the begin lunid and
> the end lunid, and get the corresponding continuous
> luns from media manager, if one of the luns is not free,
> we failed to create the target, even if the device's
> total free luns are enough.
>
> So add non-continuous lun target creation support,
> thus we can improve the backend device's space utilization.
> Signed-off-by: Wenwei Tao <ww.tao0320@xxxxxxxxx>
> ---
> Changes since v1:
> -use NVM_FIXED instead NVM_C_FIXED in gennvm_get_lun
> -add target creation flags check
> -rebase to v4.5-rc1
>
> drivers/lightnvm/core.c | 36 ++++---
> drivers/lightnvm/gennvm.c | 42 ++++++++-
> drivers/lightnvm/rrpc.c | 215 +++++++++++++++++++++++++++---------------
> drivers/lightnvm/rrpc.h | 6 +-
> include/linux/lightnvm.h | 24 ++++-
> include/uapi/linux/lightnvm.h | 3 +
> 6 files changed, 229 insertions(+), 97 deletions(-)
>

Hi Wenwei,

I did some digging on the patch and changed the interface to a
reserve/release interface. I also removed the logic to dynamically
select another lun than the one requested.

A couple of questions:

1. The rrpc_lun->rev_lock and rev_trans_map change; this might be for
another patch, and it isn't directly related to continuous mapping?
2. Instead of dynamically assigning new luns when not available, what
about taking a list of lun ids instead?

I would only implement this in the lnvm ioctl interface. It would allow
a list of lun ids to be passed through the lnvm ioctl interface. This
way, the NVM_CONFIG_TYPE_SIMPLE can be extended with another
NVM_CONFIG_TYPE_LIST, or similar, which then parses the ioctl
appropriately. Would that be a better way to do it?

Here is the diff. It is also rebased on top of the two latest patches
that which are sent up for the next -rc.

Thanks

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 27a59e8..59a4bf9 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -468,6 +468,11 @@ static int nvm_core_init(struct nvm_dev *dev)
dev->luns_per_chnl *
dev->nr_chnls;
dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
+ dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!dev->lun_map)
+ return -ENOMEM;
+
INIT_LIST_HEAD(&dev->online_targets);
mutex_init(&dev->mlock);
spin_lock_init(&dev->lock);
@@ -610,6 +615,7 @@ void nvm_unregister(char *disk_name)
up_write(&nvm_lock);

nvm_exit(dev);
+ kfree(dev->lun_map);
kfree(dev);
}
EXPORT_SYMBOL(nvm_unregister);
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 6e2685d..6419898 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -188,6 +188,9 @@ static int gennvm_block_map(u64 slba, u32 nlb,
__le64 *entries, void *private)
lun_id = div_u64(pba, dev->sec_per_lun);
lun = &gn->luns[lun_id];

+ if (!test_bit(lun_id, dev->lun_map))
+ __set_bit(lun_id, dev->lun_map);
+
/* Calculate block offset into lun */
pba = pba - (dev->sec_per_lun * lun_id);
blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
@@ -478,10 +481,23 @@ static int gennvm_erase_blk(struct nvm_dev *dev,
struct nvm_block *blk,
return nvm_erase_ppa(dev, &addr, 1);
}

+static int gennvm_reserve_lun(struct nvm_dev *dev, int lunid)
+{
+ return test_and_set_bit(lunid, dev->lun_map);
+}
+
+static void gennvm_release_lun(struct nvm_dev *dev, int lunid)
+{
+ WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
+}
+
static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
{
struct gen_nvm *gn = dev->mp;

+ if (unlikely(lunid >= dev->nr_luns))
+ return NULL;
+
return &gn->luns[lunid].vlun;
}

@@ -523,6 +539,8 @@ static struct nvmm_type gennvm = {
.erase_blk = gennvm_erase_blk,

.get_lun = gennvm_get_lun,
+ .reserve_lun = gennvm_reserve_lun,
+ .release_lun = gennvm_release_lun,
.lun_info_print = gennvm_lun_info_print,

.get_area = gennvm_get_area,
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 20afe1c..0a99ebc 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -26,25 +26,32 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct
bio *bio,
for ((i) = 0, rlun = &(rrpc)->luns[0]; \
(i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])

+static inline u64 lun_poffset(struct nvm_dev *dev, struct nvm_lun *lun)
+{
+ return lun->id * dev->sec_per_lun;
+}
+
static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
{
struct rrpc_block *rblk = a->rblk;
- unsigned int pg_offset;
+ struct rrpc_lun *rlun = rblk->rlun;
+ u64 pg_offset;

- lockdep_assert_held(&rrpc->rev_lock);
+ lockdep_assert_held(&rlun->rev_lock);

if (a->addr == ADDR_EMPTY || !rblk)
return;

spin_lock(&rblk->lock);

- div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset);
+ div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, (u32 *)&pg_offset);
WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
rblk->nr_invalid_pages++;

spin_unlock(&rblk->lock);

- rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
+ pg_offset = lun_poffset(rrpc->dev, rlun->parent);
+ rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY;
}

static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
@@ -52,14 +59,15 @@ static void rrpc_invalidate_range(struct rrpc *rrpc,
sector_t slba,
{
sector_t i;

- spin_lock(&rrpc->rev_lock);
for (i = slba; i < slba + len; i++) {
struct rrpc_addr *gp = &rrpc->trans_map[i];
+ struct rrpc_lun *rlun = gp->rblk->rlun;

+ spin_lock(&rlun->rev_lock);
rrpc_page_invalidate(rrpc, gp);
+ spin_unlock(&rlun->rev_lock);
gp->rblk = NULL;
}
- spin_unlock(&rrpc->rev_lock);
}

static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
@@ -281,13 +289,14 @@ static void rrpc_end_sync_bio(struct bio *bio)
static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block
*rblk)
{
struct request_queue *q = rrpc->dev->q;
+ struct rrpc_lun *rlun = rblk->rlun;
struct rrpc_rev_addr *rev;
struct nvm_rq *rqd;
struct bio *bio;
struct page *page;
int slot;
int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
- u64 phys_addr;
+ u64 phys_addr, poffset;
DECLARE_COMPLETION_ONSTACK(wait);

if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
@@ -303,6 +312,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc,
struct rrpc_block *rblk)
if (!page)
return -ENOMEM;

+ poffset = lun_poffset(rrpc->dev, rlun->parent);
while ((slot = find_first_zero_bit(rblk->invalid_pages,
nr_pgs_per_blk)) < nr_pgs_per_blk) {

@@ -310,23 +320,23 @@ static int rrpc_move_valid_pages(struct rrpc
*rrpc, struct rrpc_block *rblk)
phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot;

try:
- spin_lock(&rrpc->rev_lock);
+ spin_lock(&rlun->rev_lock);
/* Get logical address from physical to logical table */
- rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
+ rev = &rlun->rev_trans_map[phys_addr - poffset];
/* already updated by previous regular write */
if (rev->addr == ADDR_EMPTY) {
- spin_unlock(&rrpc->rev_lock);
+ spin_unlock(&rlun->rev_lock);
continue;
}

rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
if (IS_ERR_OR_NULL(rqd)) {
- spin_unlock(&rrpc->rev_lock);
+ spin_unlock(&rlun->rev_lock);
schedule();
goto try;
}

- spin_unlock(&rrpc->rev_lock);
+ spin_unlock(&rlun->rev_lock);

/* Perform read to do GC */
bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
@@ -395,7 +405,7 @@ static void rrpc_block_gc(struct work_struct *work)
struct rrpc_block *rblk = gcb->rblk;
struct nvm_dev *dev = rrpc->dev;
struct nvm_lun *lun = rblk->parent->lun;
- struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
+ struct rrpc_lun *rlun = lun->private;

mempool_free(gcb, rrpc->gcb_pool);
pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
@@ -496,9 +506,9 @@ static void rrpc_gc_queue(struct work_struct *work)
ws_gc);
struct rrpc *rrpc = gcb->rrpc;
struct rrpc_block *rblk = gcb->rblk;
- struct nvm_lun *lun = rblk->parent->lun;
struct nvm_block *blk = rblk->parent;
- struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
+ struct nvm_lun *lun = blk->lun;
+ struct rrpc_lun *rlun = lun->private;

spin_lock(&rlun->lock);
list_add_tail(&rblk->prio, &rlun->prio_list);
@@ -549,22 +559,24 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct
rrpc *rrpc, int is_gc)
static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
struct rrpc_block *rblk, u64 paddr)
{
+ struct rrpc_lun *rlun = rblk->rlun;
struct rrpc_addr *gp;
struct rrpc_rev_addr *rev;
+ u64 poffset = lun_poffset(rrpc->dev, rlun->parent);

BUG_ON(laddr >= rrpc->nr_pages);

gp = &rrpc->trans_map[laddr];
- spin_lock(&rrpc->rev_lock);
+ spin_lock(&rlun->rev_lock);
if (gp->rblk)
rrpc_page_invalidate(rrpc, gp);

gp->addr = paddr;
gp->rblk = rblk;

- rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
+ rev = &rlun->rev_trans_map[gp->addr - poffset];
rev->addr = laddr;
- spin_unlock(&rrpc->rev_lock);
+ spin_unlock(&rlun->rev_lock);

return gp;
}
@@ -953,8 +965,6 @@ static void rrpc_requeue(struct work_struct *work)

static void rrpc_gc_free(struct rrpc *rrpc)
{
- struct rrpc_lun *rlun;
- int i;

if (rrpc->krqd_wq)
destroy_workqueue(rrpc->krqd_wq);
@@ -962,16 +972,6 @@ static void rrpc_gc_free(struct rrpc *rrpc)
if (rrpc->kgc_wq)
destroy_workqueue(rrpc->kgc_wq);

- if (!rrpc->luns)
- return;
-
- for (i = 0; i < rrpc->nr_luns; i++) {
- rlun = &rrpc->luns[i];
-
- if (!rlun->blocks)
- break;
- vfree(rlun->blocks);
- }
}

static int rrpc_gc_init(struct rrpc *rrpc)
@@ -992,7 +992,6 @@ static int rrpc_gc_init(struct rrpc *rrpc)

static void rrpc_map_free(struct rrpc *rrpc)
{
- vfree(rrpc->rev_trans_map);
vfree(rrpc->trans_map);
}

@@ -1000,19 +999,28 @@ static int rrpc_l2p_update(u64 slba, u32 nlb,
__le64 *entries, void *private)
{
struct rrpc *rrpc = (struct rrpc *)private;
struct nvm_dev *dev = rrpc->dev;
- struct rrpc_addr *addr = rrpc->trans_map + slba;
- struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
+ struct rrpc_addr *addr;
+ struct rrpc_rev_addr *raddr;
sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
- u64 elba = slba + nlb;
- u64 i;
+ int page_size = dev->sec_per_pg * dev->sec_size;
+ u64 elba, i;
+
+ elba = slba + nlb;

if (unlikely(elba > dev->total_pages)) {
pr_err("nvm: L2P data from device is out of bounds!\n");
return -EINVAL;
}

+ slba -= rrpc->soffset >> (ilog2(page_size) - 9);
+ addr = rrpc->trans_map + slba;
for (i = 0; i < nlb; i++) {
+ struct rrpc_lun *rlun;
+ struct nvm_lun *lun;
u64 pba = le64_to_cpu(entries[i]);
+ u64 poffset;
+ int lunid;
+
/* LNVM treats address-spaces as silos, LBA and PBA are
* equally large and zero-indexed.
*/
@@ -1028,8 +1036,15 @@ static int rrpc_l2p_update(u64 slba, u32 nlb,
__le64 *entries, void *private)
if (!pba)
continue;

+ lunid = div_u64(pba, dev->sec_per_lun);
+ lun = dev->mt->get_lun(dev, lunid);
+ if (unlikely(!lun))
+ return -EINVAL;
+ rlun = lun->private;
+ raddr = rlun->rev_trans_map;
+ poffset = lun_poffset(dev, lun);
addr[i].addr = pba;
- raddr[pba].addr = slba + i;
+ raddr[pba - poffset].addr = slba + i;
}

return 0;
@@ -1049,17 +1064,11 @@ static int rrpc_map_init(struct rrpc *rrpc)
if (!rrpc->trans_map)
return -ENOMEM;

- rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
- * rrpc->nr_pages);
- if (!rrpc->rev_trans_map)
- return -ENOMEM;

for (i = 0; i < rrpc->nr_pages; i++) {
struct rrpc_addr *p = &rrpc->trans_map[i];
- struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];

p->addr = ADDR_EMPTY;
- r->addr = ADDR_EMPTY;
}

if (!dev->ops->get_l2p_tbl)
@@ -1130,22 +1139,86 @@ static void rrpc_core_free(struct rrpc *rrpc)

static void rrpc_luns_free(struct rrpc *rrpc)
{
+ struct nvm_dev *dev = rrpc->dev;
+ struct rrpc_lun *rlun;
+ struct nvm_lun *lun;
+ int i;
+
+ if (!rrpc->luns)
+ return;
+
+ for (i = 0; i < rrpc->nr_luns; i++) {
+ rlun = &rrpc->luns[i];
+ if (!rlun)
+ break;
+ lun = rlun->parent;
+ dev->mt->release_lun(dev, lun->id);
+ vfree(rlun->rev_trans_map);
+ vfree(rlun->blocks);
+ }
kfree(rrpc->luns);
+ rrpc->luns = NULL;
+
+}
+
+static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun,
+ struct nvm_lun *lun)
+{
+ struct nvm_dev *dev = rrpc->dev;
+ int i;
+
+ rlun->rrpc = rrpc;
+ rlun->parent = lun;
+
+ rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) *
+ dev->sec_per_lun);
+ if (!rlun->rev_trans_map)
+ return -ENOMEM;
+
+ for (i = 0; i < dev->sec_per_lun; i++) {
+ struct rrpc_rev_addr *r = &rlun->rev_trans_map[i];
+
+ r->addr = ADDR_EMPTY;
+ }
+
+ rlun->blocks = vzalloc(sizeof(struct rrpc_block) * dev->blks_per_lun);
+ if (!rlun->blocks) {
+ vfree(rlun->rev_trans_map);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < dev->blks_per_lun; i++) {
+ struct rrpc_block *rblk = &rlun->blocks[i];
+ struct nvm_block *blk = &lun->blocks[i];
+
+ rblk->parent = blk;
+ rblk->rlun = rlun;
+ INIT_LIST_HEAD(&rblk->prio);
+ spin_lock_init(&rblk->lock);
+ }
+
+ lun->private = rlun;
+ INIT_LIST_HEAD(&rlun->prio_list);
+ INIT_LIST_HEAD(&rlun->open_list);
+ INIT_LIST_HEAD(&rlun->closed_list);
+ INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
+ spin_lock_init(&rlun->lock);
+ spin_lock_init(&rlun->rev_lock);
+
+ return 0;
}

static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
{
struct nvm_dev *dev = rrpc->dev;
struct rrpc_lun *rlun;
- int i, j;
+ int i, ret;

if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
pr_err("rrpc: number of pages per block too high.");
return -EINVAL;
}

- spin_lock_init(&rrpc->rev_lock);
-
rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
GFP_KERNEL);
if (!rrpc->luns)
@@ -1153,40 +1226,35 @@ static int rrpc_luns_init(struct rrpc *rrpc, int
lun_begin, int lun_end)

/* 1:1 mapping */
for (i = 0; i < rrpc->nr_luns; i++) {
- struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
+ int lunid = lun_begin + i;
+ struct nvm_lun *lun;
+
+ if (dev->mt->reserve_lun(dev, lunid)) {
+ pr_err("rrpc: lun %u is already allocated\n", lunid);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ lun = dev->mt->get_lun(dev, lunid);
+ if (!lun) {
+ ret = -EINVAL;
+ goto err;
+ }

rlun = &rrpc->luns[i];
- rlun->rrpc = rrpc;
- rlun->parent = lun;
- INIT_LIST_HEAD(&rlun->prio_list);
- INIT_LIST_HEAD(&rlun->open_list);
- INIT_LIST_HEAD(&rlun->closed_list);
-
- INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
- spin_lock_init(&rlun->lock);
+ ret = rrpc_lun_init(rrpc, rlun, lun);
+ if (ret)
+ goto err;

rrpc->total_blocks += dev->blks_per_lun;
rrpc->nr_pages += dev->sec_per_lun;
-
- rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
- rrpc->dev->blks_per_lun);
- if (!rlun->blocks)
- goto err;
-
- for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
- struct rrpc_block *rblk = &rlun->blocks[j];
- struct nvm_block *blk = &lun->blocks[j];
-
- rblk->parent = blk;
- rblk->rlun = rlun;
- INIT_LIST_HEAD(&rblk->prio);
- spin_lock_init(&rblk->lock);
- }
}

return 0;
err:
- return -ENOMEM;
+ rrpc_luns_free(rrpc);
+ return ret;
+
}

/* returns 0 on success and stores the beginning address in *begin */
@@ -1258,14 +1326,16 @@ static sector_t rrpc_capacity(void *private)
static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block
*rblk)
{
struct nvm_dev *dev = rrpc->dev;
+ struct rrpc_lun *rlun = rblk->rlun;
int offset;
struct rrpc_addr *laddr;
- u64 paddr, pladdr;
+ u64 paddr, pladdr, poffset;

+ poffset = lun_poffset(dev, rlun->parent);
for (offset = 0; offset < dev->pgs_per_blk; offset++) {
paddr = block_to_addr(rrpc, rblk) + offset;

- pladdr = rrpc->rev_trans_map[paddr].addr;
+ pladdr = rlun->rev_trans_map[paddr - poffset].addr;
if (pladdr == ADDR_EMPTY)
continue;

@@ -1374,9 +1444,6 @@ static void *rrpc_init(struct nvm_dev *dev, struct
gendisk *tdisk,
goto err;
}

- rrpc->poffset = dev->sec_per_lun * lun_begin;
- rrpc->lun_offset = lun_begin;
-
ret = rrpc_core_init(rrpc);
if (ret) {
pr_err("nvm: rrpc: could not initialize core\n");
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 9380c68..4d756d8 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -86,6 +86,9 @@ struct rrpc_lun {
*/

struct work_struct ws_gc;
+ /* store a reverse map for garbage collection */
+ struct rrpc_rev_addr *rev_trans_map;
+ spinlock_t rev_lock;

spinlock_t lock;
};
@@ -124,9 +127,6 @@ struct rrpc {
* addresses are used when writing to the disk block device.
*/
struct rrpc_addr *trans_map;
- /* also store a reverse map for garbage collection */
- struct rrpc_rev_addr *rev_trans_map;
- spinlock_t rev_lock;

struct rrpc_inflight inflights;

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 18f1bb0..a33af4f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -271,6 +271,7 @@ struct nvm_lun {
spinlock_t lock;

struct nvm_block *blocks;
+ void *private;
};

enum {
@@ -342,6 +343,8 @@ struct nvm_dev {
int nr_luns;
unsigned max_pages_per_blk;

+ unsigned long *lun_map;
+
void *ppalist_pool;

struct nvm_id identity;
@@ -462,6 +465,8 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *,
struct nvm_rq *);
typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
unsigned long);
typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
+typedef int (nvmm_reserve_lun(struct nvm_dev *, int));
+typedef void (nvmm_release_lun(struct nvm_dev *, int));
typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);

typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
@@ -488,6 +493,8 @@ struct nvmm_type {

/* Configuration management */
nvmm_get_lun_fn *get_lun;
+ nvmm_reserve_lun *reserve_lun;
+ nvmm_release_lun *release_lun;

/* Statistics */
nvmm_lun_info_print_fn *lun_info_print;