Re: [PATCH V2] lightnvm: pblk: fix race condition on GC

From: Hans Holmberg
Date: Mon Feb 04 2019 - 16:02:00 EST


Thanks Heiner, it's awesome to get this fixed!

Reviewed-by: Hans Holmberg <hans.holmberg@xxxxxxxxxxxx>

On Mon, Feb 4, 2019 at 9:19 AM Javier GonzÃlez <javier@xxxxxxxxxxx> wrote:
>
> > On 1 Feb 2019, at 03.38, Heiner Litz <hlitz@xxxxxxxx> wrote:
> >
> > This patch fixes a race condition where a write is mapped to the last
> > sectors of a line. The write is synced to the device but the L2P is not
> > updated yet. When the line is garbage collected before the L2P update is
> > performed, the sectors are ignored by the GC logic and the line is freed
> > before all sectors are moved. When the L2P is finally updated, it contains
> > a mapping to a freed line, subsequent reads of the corresponding LBAs fail.
> >
> > This patch introduces a per line counter specifying the number of sectors
> > that are synced to the device but have not been updated in the L2P. Lines
> > with a counter of greater than zero will not be selected for GC.
> >
> > Signed-off-by: Heiner Litz <hlitz@xxxxxxxx>
> > ---
> >
> > v2: changed according to Javier's comment. Instead of performing check
> > while holding the trans_lock, add an atomic per line counter
> >
> > drivers/lightnvm/pblk-core.c | 1 +
> > drivers/lightnvm/pblk-gc.c | 20 +++++++++++++-------
> > drivers/lightnvm/pblk-map.c | 1 +
> > drivers/lightnvm/pblk-rb.c | 1 +
> > drivers/lightnvm/pblk-write.c | 1 +
> > drivers/lightnvm/pblk.h | 1 +
> > 6 files changed, 18 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> > index eabcbc119681..b7ed0502abef 100644
> > --- a/drivers/lightnvm/pblk-core.c
> > +++ b/drivers/lightnvm/pblk-core.c
> > @@ -1278,6 +1278,7 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
> > spin_unlock(&line->lock);
> >
> > kref_init(&line->ref);
> > + atomic_set(&line->sec_to_update, 0);
> >
> > return 0;
> > }
> > diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
> > index 2fa118c8eb71..26a52ea7ec45 100644
> > --- a/drivers/lightnvm/pblk-gc.c
> > +++ b/drivers/lightnvm/pblk-gc.c
> > @@ -365,16 +365,22 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
> > struct list_head *group_list)
> > {
> > struct pblk_line *line, *victim;
> > - int line_vsc, victim_vsc;
> > + unsigned int line_vsc = ~0x0L, victim_vsc = ~0x0L;
> >
> > victim = list_first_entry(group_list, struct pblk_line, list);
> > +
> > list_for_each_entry(line, group_list, list) {
> > - line_vsc = le32_to_cpu(*line->vsc);
> > - victim_vsc = le32_to_cpu(*victim->vsc);
> > - if (line_vsc < victim_vsc)
> > + if (!atomic_read(&line->sec_to_update))
> > + line_vsc = le32_to_cpu(*line->vsc);
> > + if (line_vsc < victim_vsc) {
> > victim = line;
> > + victim_vsc = le32_to_cpu(*victim->vsc);
> > + }
> > }
> >
> > + if (victim_vsc == ~0x0)
> > + return NULL;
> > +
> > return victim;
> > }
> >
> > @@ -448,13 +454,13 @@ static void pblk_gc_run(struct pblk *pblk)
> >
> > do {
> > spin_lock(&l_mg->gc_lock);
> > - if (list_empty(group_list)) {
> > +
> > + line = pblk_gc_get_victim_line(pblk, group_list);
> > + if (!line) {
> > spin_unlock(&l_mg->gc_lock);
> > break;
> > }
> >
> > - line = pblk_gc_get_victim_line(pblk, group_list);
> > -
> > spin_lock(&line->lock);
> > WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
> > line->state = PBLK_LINESTATE_GC;
> > diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
> > index 79df583ea709..7fbc99b60cac 100644
> > --- a/drivers/lightnvm/pblk-map.c
> > +++ b/drivers/lightnvm/pblk-map.c
> > @@ -73,6 +73,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
> > */
> > if (i < valid_secs) {
> > kref_get(&line->ref);
> > + atomic_inc(&line->sec_to_update);
> > w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
> > w_ctx->ppa = ppa_list[i];
> > meta->lba = cpu_to_le64(w_ctx->lba);
> > diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
> > index a6133b50ed9c..03c241b340ea 100644
> > --- a/drivers/lightnvm/pblk-rb.c
> > +++ b/drivers/lightnvm/pblk-rb.c
> > @@ -260,6 +260,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
> > entry->cacheline);
> >
> > line = pblk_ppa_to_line(pblk, w_ctx->ppa);
> > + atomic_dec(&line->sec_to_update);
> > kref_put(&line->ref, pblk_line_put);
> > clean_wctx(w_ctx);
> > rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
> > diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
> > index 06d56deb645d..6593deab52da 100644
> > --- a/drivers/lightnvm/pblk-write.c
> > +++ b/drivers/lightnvm/pblk-write.c
> > @@ -177,6 +177,7 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
> > * re-map these entries
> > */
> > line = pblk_ppa_to_line(pblk, w_ctx->ppa);
> > + atomic_dec(&line->sec_to_update);
> > kref_put(&line->ref, pblk_line_put);
> > }
> > spin_unlock(&pblk->trans_lock);
> > diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> > index a6386d5acd73..ac3ab778e976 100644
> > --- a/drivers/lightnvm/pblk.h
> > +++ b/drivers/lightnvm/pblk.h
> > @@ -487,6 +487,7 @@ struct pblk_line {
> > __le32 *vsc; /* Valid sector count in line */
> >
> > struct kref ref; /* Write buffer L2P references */
> > + atomic_t sec_to_update; /* Outstanding L2P updates to ppa */
> >
> > struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */
> >
> > --
> > 2.17.1
>
>
> Looks good to me. Again, good marathon-catch! :)
>
> Reviewed-by: Javier GonzÃlez <javier@xxxxxxxxxxx>
>
>