[PATCH -next 2/3] md/raid10: convert resync_lock to use seqlock

From: Yu Kuai
Date: Mon Aug 29 2022 - 09:09:01 EST


From: Yu Kuai <yukuai3@xxxxxxxxxx>

Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
and io can't be dispatched until 'barrier' is dropped.

Since holding the 'barrier' is not common, convert 'resync_lock' to use
seqlock so that holding lock can be avoided in fast path.

Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
drivers/md/raid10.c | 62 ++++++++++++++++++++++++++++++---------------
drivers/md/raid10.h | 2 +-
2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b70c207f7932..086216b051f5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -930,38 +930,60 @@ static void flush_pending_writes(struct r10conf *conf)

static void raise_barrier(struct r10conf *conf, int force)
{
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
BUG_ON(force && !conf->barrier);

/* Wait until no block IO is waiting (unless 'force') */
wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
- conf->resync_lock);
+ conf->resync_lock.lock);

/* block any new IO from starting */
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);

/* Now wait for all pending IO to complete */
wait_event_lock_irq(conf->wait_barrier,
!atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock);
+ conf->resync_lock.lock);

- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}

static void lower_barrier(struct r10conf *conf)
{
unsigned long flags;
- spin_lock_irqsave(&conf->resync_lock, flags);
- conf->barrier--;
- spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+ write_seqlock_irqsave(&conf->resync_lock, flags);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
+ write_sequnlock_irqrestore(&conf->resync_lock, flags);
wake_up(&conf->wait_barrier);
}

+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+ unsigned int seq = raw_read_seqcount(&conf->resync_lock.seqcount);
+
+ if (seq & 1)
+ return false;
+
+ if (READ_ONCE(conf->barrier))
+ return false;
+
+ atomic_inc(&conf->nr_pending);
+ if (!read_seqcount_retry(&conf->resync_lock.seqcount, seq))
+ return true;
+
+ atomic_dec(&conf->nr_pending);
+ return false;
+}
+
static bool wait_barrier(struct r10conf *conf, bool nowait)
{
bool ret = true;

- spin_lock_irq(&conf->resync_lock);
+ if (wait_barrier_nolock(conf))
+ return true;
+
+ write_seqlock_irq(&conf->resync_lock);
if (conf->barrier) {
struct bio_list *bio_list = current->bio_list;
conf->nr_waiting++;
@@ -992,7 +1014,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
test_bit(MD_RECOVERY_RUNNING,
&conf->mddev->recovery) &&
conf->nr_queued > 0),
- conf->resync_lock);
+ conf->resync_lock.lock);
}
conf->nr_waiting--;
if (!conf->nr_waiting)
@@ -1001,7 +1023,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
/* Only increment nr_pending when we wait */
if (ret)
atomic_inc(&conf->nr_pending);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
return ret;
}

@@ -1026,27 +1048,27 @@ static void freeze_array(struct r10conf *conf, int extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
conf->array_freeze_pending++;
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);
conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier,
atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
- conf->resync_lock,
+ conf->resync_lock.lock,
flush_pending_writes(conf));

conf->array_freeze_pending--;
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}

static void unfreeze_array(struct r10conf *conf)
{
/* reverse the effect of the freeze */
- spin_lock_irq(&conf->resync_lock);
- conf->barrier--;
+ write_seqlock_irq(&conf->resync_lock);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
conf->nr_waiting--;
wake_up(&conf->wait_barrier);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}

static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -4033,7 +4055,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
INIT_LIST_HEAD(&conf->retry_list);
INIT_LIST_HEAD(&conf->bio_end_io_list);

- spin_lock_init(&conf->resync_lock);
+ seqlock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0);

@@ -4352,7 +4374,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
rdev->new_raid_disk = rdev->raid_disk * 2;
rdev->sectors = size;
}
- conf->barrier = 1;
+ WRITE_ONCE(conf->barrier, 1);
}

return conf;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 5c0804d8bb1f..8c072ce0bc54 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -76,7 +76,7 @@ struct r10conf {
/* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list;

- spinlock_t resync_lock;
+ seqlock_t resync_lock;
atomic_t nr_pending;
int nr_waiting;
int nr_queued;
--
2.31.1