Re: [PATCH 1/2] lightnvm: pblk: refactor metadata paths

From: Javier Gonzalez
Date: Tue Aug 21 2018 - 07:56:58 EST




> On 21 Aug 2018, at 13.54, Matias BjÃrling <mb@xxxxxxxxxxx> wrote:
>
>> On 08/17/2018 12:19 PM, Javier GonzÃlez wrote:
>> pblk maintains two different metadata paths for smeta and emeta, which
>> store metadata at the start of the line and at the end of the line,
>> respectively. Until now, these path has been common for writing and
>> retrieving metadata, however, as these paths diverge, the common code
>> becomes less clear and unnecessary complicated.
>> In preparation for further changes to the metadata write path, this
>> patch separates the write and read paths for smeta and emeta and
>> removes the synchronous emeta path as it not used anymore (emeta is
>> scheduled asynchronously to prevent jittering due to internal I/Os).
>> Signed-off-by: Javier GonzÃlez <javier@xxxxxxxxxxxx>
>> ---
>> drivers/lightnvm/pblk-core.c | 338 ++++++++++++++++++---------------------
>> drivers/lightnvm/pblk-gc.c | 2 +-
>> drivers/lightnvm/pblk-recovery.c | 4 +-
>> drivers/lightnvm/pblk.h | 5 +-
>> 4 files changed, 164 insertions(+), 185 deletions(-)
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 72de7456845b..52306573cc0e 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -621,12 +621,137 @@ u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
>> return paddr;
>> }
>> -/*
>> - * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
>> - * taking the per LUN semaphore.
>> - */
>> -static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
>> - void *emeta_buf, u64 paddr, int dir)
>> +u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
>> +{
>> + struct nvm_tgt_dev *dev = pblk->dev;
>> + struct nvm_geo *geo = &dev->geo;
>> + struct pblk_line_meta *lm = &pblk->lm;
>> + int bit;
>> +
>> + /* This usually only happens on bad lines */
>> + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
>> + if (bit >= lm->blk_per_line)
>> + return -1;
>> +
>> + return bit * geo->ws_opt;
>> +}
>> +
>> +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
>> +{
>> + struct nvm_tgt_dev *dev = pblk->dev;
>> + struct pblk_line_meta *lm = &pblk->lm;
>> + struct bio *bio;
>> + struct nvm_rq rqd;
>> + u64 paddr = pblk_line_smeta_start(pblk, line);
>> + int i, ret;
>> +
>> + memset(&rqd, 0, sizeof(struct nvm_rq));
>> +
>> + rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
>> + &rqd.dma_meta_list);
>> + if (!rqd.meta_list)
>> + return -ENOMEM;
>> +
>> + rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
>> + rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
>> +
>> + bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
>> + if (IS_ERR(bio)) {
>> + ret = PTR_ERR(bio);
>> + goto free_ppa_list;
>> + }
>> +
>> + bio->bi_iter.bi_sector = 0; /* internal bio */
>> + bio_set_op_attrs(bio, REQ_OP_READ, 0);
>> +
>> + rqd.bio = bio;
>> + rqd.opcode = NVM_OP_PREAD;
>> + rqd.nr_ppas = lm->smeta_sec;
>> + rqd.is_seq = 1;
>> +
>> + for (i = 0; i < lm->smeta_sec; i++, paddr++)
>> + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
>> +
>> + ret = pblk_submit_io_sync(pblk, &rqd);
>> + if (ret) {
>> + pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
>> + bio_put(bio);
>> + goto free_ppa_list;
>> + }
>> +
>> + atomic_dec(&pblk->inflight_io);
>> +
>> + if (rqd.error)
>> + pblk_log_read_err(pblk, &rqd);
>> +
>> +free_ppa_list:
>> + nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
>> + return ret;
>> +}
>> +
>> +static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
>> + u64 paddr)
>> +{
>> + struct nvm_tgt_dev *dev = pblk->dev;
>> + struct pblk_line_meta *lm = &pblk->lm;
>> + struct bio *bio;
>> + struct nvm_rq rqd;
>> + __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
>> + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
>> + int i, ret;
>> +
>> + memset(&rqd, 0, sizeof(struct nvm_rq));
>> +
>> + rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
>> + &rqd.dma_meta_list);
>> + if (!rqd.meta_list)
>> + return -ENOMEM;
>> +
>> + rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
>> + rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
>> +
>> + bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
>> + if (IS_ERR(bio)) {
>> + ret = PTR_ERR(bio);
>> + goto free_ppa_list;
>> + }
>> +
>> + bio->bi_iter.bi_sector = 0; /* internal bio */
>> + bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
>> +
>> + rqd.bio = bio;
>> + rqd.opcode = NVM_OP_PWRITE;
>> + rqd.nr_ppas = lm->smeta_sec;
>> + rqd.is_seq = 1;
>> +
>> + for (i = 0; i < lm->smeta_sec; i++, paddr++) {
>> + struct pblk_sec_meta *meta_list = rqd.meta_list;
>> +
>> + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
>> + meta_list[i].lba = lba_list[paddr] = addr_empty;
>> + }
>> +
>> + ret = pblk_submit_io_sync(pblk, &rqd);
>> + if (ret) {
>> + pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
>> + bio_put(bio);
>> + goto free_ppa_list;
>> + }
>> +
>> + atomic_dec(&pblk->inflight_io);
>> +
>> + if (rqd.error) {
>> + pblk_log_write_err(pblk, &rqd);
>> + ret = -EIO;
>> + }
>> +
>> +free_ppa_list:
>> + nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
>> + return ret;
>> +}
>> +
>> +int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
>> + void *emeta_buf)
>> {
>> struct nvm_tgt_dev *dev = pblk->dev;
>> struct nvm_geo *geo = &dev->geo;
>> @@ -635,24 +760,15 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
>> void *ppa_list, *meta_list;
>> struct bio *bio;
>> struct nvm_rq rqd;
>> + u64 paddr = line->emeta_ssec;
>> dma_addr_t dma_ppa_list, dma_meta_list;
>> int min = pblk->min_write_pgs;
>> int left_ppas = lm->emeta_sec[0];
>> - int id = line->id;
>> + int line_id = line->id;
>> int rq_ppas, rq_len;
>> - int cmd_op, bio_op;
>> int i, j;
>> int ret;
>> - if (dir == PBLK_WRITE) {
>> - bio_op = REQ_OP_WRITE;
>> - cmd_op = NVM_OP_PWRITE;
>> - } else if (dir == PBLK_READ) {
>> - bio_op = REQ_OP_READ;
>> - cmd_op = NVM_OP_PREAD;
>> - } else
>> - return -EINVAL;
>> -
>> meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
>> &dma_meta_list);
>> if (!meta_list)
>> @@ -675,64 +791,43 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
>> }
>> bio->bi_iter.bi_sector = 0; /* internal bio */
>> - bio_set_op_attrs(bio, bio_op, 0);
>> + bio_set_op_attrs(bio, REQ_OP_READ, 0);
>> rqd.bio = bio;
>> rqd.meta_list = meta_list;
>> rqd.ppa_list = ppa_list;
>> rqd.dma_meta_list = dma_meta_list;
>> rqd.dma_ppa_list = dma_ppa_list;
>> - rqd.opcode = cmd_op;
>> + rqd.opcode = NVM_OP_PREAD;
>> rqd.nr_ppas = rq_ppas;
>> - if (dir == PBLK_WRITE) {
>> - struct pblk_sec_meta *meta_list = rqd.meta_list;
>> + for (i = 0; i < rqd.nr_ppas; ) {
>> + struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
>> + int pos = pblk_ppa_to_pos(geo, ppa);
>> - rqd.is_seq = 1;
>> - for (i = 0; i < rqd.nr_ppas; ) {
>> - spin_lock(&line->lock);
>> - paddr = __pblk_alloc_page(pblk, line, min);
>> - spin_unlock(&line->lock);
>> - for (j = 0; j < min; j++, i++, paddr++) {
>> - meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
>> - rqd.ppa_list[i] =
>> - addr_to_gen_ppa(pblk, paddr, id);
>> - }
>> - }
>> - } else {
>> - for (i = 0; i < rqd.nr_ppas; ) {
>> - struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
>> - int pos = pblk_ppa_to_pos(geo, ppa);
>> + if (pblk_io_aligned(pblk, rq_ppas))
>> + rqd.is_seq = 1;
>> - if (pblk_io_aligned(pblk, rq_ppas))
>> - rqd.is_seq = 1;
>> -
>> - while (test_bit(pos, line->blk_bitmap)) {
>> - paddr += min;
>> - if (pblk_boundary_paddr_checks(pblk, paddr)) {
>> - pblk_err(pblk, "corrupt emeta line:%d\n",
>> - line->id);
>> - bio_put(bio);
>> - ret = -EINTR;
>> - goto free_rqd_dma;
>> - }
>> -
>> - ppa = addr_to_gen_ppa(pblk, paddr, id);
>> - pos = pblk_ppa_to_pos(geo, ppa);
>> - }
>> -
>> - if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
>> - pblk_err(pblk, "corrupt emeta line:%d\n",
>> - line->id);
>> + while (test_bit(pos, line->blk_bitmap)) {
>> + paddr += min;
>> + if (pblk_boundary_paddr_checks(pblk, paddr)) {
>> bio_put(bio);
>> ret = -EINTR;
>> goto free_rqd_dma;
>> }
>> - for (j = 0; j < min; j++, i++, paddr++)
>> - rqd.ppa_list[i] =
>> - addr_to_gen_ppa(pblk, paddr, line->id);
>> + ppa = addr_to_gen_ppa(pblk, paddr, line_id);
>> + pos = pblk_ppa_to_pos(geo, ppa);
>> }
>> +
>> + if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
>> + bio_put(bio);
>> + ret = -EINTR;
>> + goto free_rqd_dma;
>> + }
>> +
>> + for (j = 0; j < min; j++, i++, paddr++)
>> + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
>> }
>> ret = pblk_submit_io_sync(pblk, &rqd);
>> @@ -744,136 +839,19 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
>> atomic_dec(&pblk->inflight_io);
>> - if (rqd.error) {
>> - if (dir == PBLK_WRITE)
>> - pblk_log_write_err(pblk, &rqd);
>> - else
>> - pblk_log_read_err(pblk, &rqd);
>> - }
>> + if (rqd.error)
>> + pblk_log_read_err(pblk, &rqd);
>> emeta_buf += rq_len;
>> left_ppas -= rq_ppas;
>> if (left_ppas)
>> goto next_rq;
>> +
>> free_rqd_dma:
>> nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
>> return ret;
>> }
>> -u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
>> -{
>> - struct nvm_tgt_dev *dev = pblk->dev;
>> - struct nvm_geo *geo = &dev->geo;
>> - struct pblk_line_meta *lm = &pblk->lm;
>> - int bit;
>> -
>> - /* This usually only happens on bad lines */
>> - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
>> - if (bit >= lm->blk_per_line)
>> - return -1;
>> -
>> - return bit * geo->ws_opt;
>> -}
>> -
>> -static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
>> - u64 paddr, int dir)
>> -{
>> - struct nvm_tgt_dev *dev = pblk->dev;
>> - struct pblk_line_meta *lm = &pblk->lm;
>> - struct bio *bio;
>> - struct nvm_rq rqd;
>> - __le64 *lba_list = NULL;
>> - int i, ret;
>> - int cmd_op, bio_op;
>> -
>> - if (dir == PBLK_WRITE) {
>> - bio_op = REQ_OP_WRITE;
>> - cmd_op = NVM_OP_PWRITE;
>> - lba_list = emeta_to_lbas(pblk, line->emeta->buf);
>> - } else if (dir == PBLK_READ_RECOV || dir == PBLK_READ) {
>> - bio_op = REQ_OP_READ;
>> - cmd_op = NVM_OP_PREAD;
>> - } else
>> - return -EINVAL;
>> -
>> - memset(&rqd, 0, sizeof(struct nvm_rq));
>> -
>> - rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
>> - &rqd.dma_meta_list);
>> - if (!rqd.meta_list)
>> - return -ENOMEM;
>> -
>> - rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
>> - rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
>> -
>> - bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
>> - if (IS_ERR(bio)) {
>> - ret = PTR_ERR(bio);
>> - goto free_ppa_list;
>> - }
>> -
>> - bio->bi_iter.bi_sector = 0; /* internal bio */
>> - bio_set_op_attrs(bio, bio_op, 0);
>> -
>> - rqd.bio = bio;
>> - rqd.opcode = cmd_op;
>> - rqd.is_seq = 1;
>> - rqd.nr_ppas = lm->smeta_sec;
>> -
>> - for (i = 0; i < lm->smeta_sec; i++, paddr++) {
>> - struct pblk_sec_meta *meta_list = rqd.meta_list;
>> -
>> - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
>> -
>> - if (dir == PBLK_WRITE) {
>> - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
>> -
>> - meta_list[i].lba = lba_list[paddr] = addr_empty;
>> - }
>> - }
>> -
>> - /*
>> - * This I/O is sent by the write thread when a line is replace. Since
>> - * the write thread is the only one sending write and erase commands,
>> - * there is no need to take the LUN semaphore.
>> - */
>> - ret = pblk_submit_io_sync(pblk, &rqd);
>> - if (ret) {
>> - pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
>> - bio_put(bio);
>> - goto free_ppa_list;
>> - }
>> -
>> - atomic_dec(&pblk->inflight_io);
>> -
>> - if (rqd.error) {
>> - if (dir == PBLK_WRITE) {
>> - pblk_log_write_err(pblk, &rqd);
>> - ret = 1;
>> - } else if (dir == PBLK_READ)
>> - pblk_log_read_err(pblk, &rqd);
>> - }
>> -
>> -free_ppa_list:
>> - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
>> -
>> - return ret;
>> -}
>> -
>> -int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
>> -{
>> - u64 bpaddr = pblk_line_smeta_start(pblk, line);
>> -
>> - return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ_RECOV);
>> -}
>> -
>> -int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
>> - void *emeta_buf)
>> -{
>> - return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
>> - line->emeta_ssec, PBLK_READ);
>> -}
>> -
>> static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
>> struct ppa_addr ppa)
>> {
>> @@ -1102,7 +1080,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
>> line->smeta_ssec = off;
>> line->cur_sec = off + lm->smeta_sec;
>> - if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) {
>> + if (init && pblk_line_smeta_write(pblk, line, off)) {
>> pblk_debug(pblk, "line smeta I/O failed. Retry\n");
>> return 0;
>> }
>> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
>> index 157c2567c9e8..189d92b58447 100644
>> --- a/drivers/lightnvm/pblk-gc.c
>> +++ b/drivers/lightnvm/pblk-gc.c
>> @@ -144,7 +144,7 @@ static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
>> if (!emeta_buf)
>> return NULL;
>> - ret = pblk_line_read_emeta(pblk, line, emeta_buf);
>> + ret = pblk_line_emeta_read(pblk, line, emeta_buf);
>> if (ret) {
>> pblk_err(pblk, "line %d read emeta failed (%d)\n",
>> line->id, ret);
>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
>> index cf629ab016ba..7b27e958dc8e 100644
>> --- a/drivers/lightnvm/pblk-recovery.c
>> +++ b/drivers/lightnvm/pblk-recovery.c
>> @@ -835,7 +835,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
>> continue;
>> /* Lines that cannot be read are assumed as not written here */
>> - if (pblk_line_read_smeta(pblk, line))
>> + if (pblk_line_smeta_read(pblk, line))
>> continue;
>> crc = pblk_calc_smeta_crc(pblk, smeta_buf);
>> @@ -905,7 +905,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
>> line->emeta = emeta;
>> memset(line->emeta->buf, 0, lm->emeta_len[0]);
>> - if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
>> + if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) {
>> pblk_recov_l2p_from_oob(pblk, line);
>> goto next;
>> }
>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
>> index 48b3035df3c4..a71f9847957b 100644
>> --- a/drivers/lightnvm/pblk.h
>> +++ b/drivers/lightnvm/pblk.h
>> @@ -782,6 +782,7 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
>> void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
>> int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
>> int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
>> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd);
>
> I'll remove this from the patch and add to the next.
>
>> int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
>> struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
>> unsigned int nr_secs, unsigned int len,
>> @@ -806,8 +807,8 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
>> void (*work)(struct work_struct *), gfp_t gfp_mask,
>> struct workqueue_struct *wq);
>> u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
>> -int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
>> -int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
>> +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line);
>> +int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
>> void *emeta_buf);
>> int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
>> void pblk_line_put(struct kref *ref);
>
> Thanks. Applied for 4.20. I think the split could be optimized quite a bit with respect to complexity. Now a lot of very similar code is spread across two functions. I may look into optimizing the request alloc path, such that it not open coded in most places.

Let me take another round at it. Iâll submit a V2 later this week.

Javier.