Re: [RFC][PATCH] identify raid rcu-protected pointer

From: Paul E. McKenney
Date: Tue Oct 11 2005 - 18:03:32 EST


On Tue, Oct 11, 2005 at 02:48:37PM -0700, Suzanne Wood wrote:

Acked-by: <paulmck@xxxxxxxxxx>

Thanx, Paul

> Elements of this patch were submitted Oct 5-6, 2005, but
> are being resent with some explanation of reasoning employed
> and additions. Insertions of rcu_dereference()
> are done in response to previously marked rcu readside
> critical sections with corresponding rcu_assign_pointer().
>
> Because synchronize_rcu() occurs after p->rdev = NULL;
> in the five files (multipath.c, raid10.c, raid1.c,
> raid5.c, and raid6main.c) within drivers/md, it is
> thought that the rcu_dereference() protects the later
> dereference of that pointer which is type mdk_rdev_t
> defined in md_k.h for the struct mdk_rdev_s for the
> extended device.
>
> In a case like the following from raid10.c:
> while (!conf->mirrors[disk].rdev ||
> !conf->mirrors[disk].rdev->in_sync) {
>
> with no assignment of the rdev, the thought is that
> an rcu-dereference() of the rdev protects the access
> of, e.g, in_sync. While in_sync is an integer (flag),
> it's validity for the current object would be protected.
> More likely, a temporary variable will be used in
> read_balance() as seen in raid1.c, e.g.:
> rdev = conf->mirrors[new_disk].rdev)
>
> In read_balance() of raid1.c, it was assumed that the
> extent of the rcu readside critical section was due to
> the "retry" label and the possibility of desiring to be
> external to the loop, but the "goto retry" is nested
> in 2 levels of conditionals. This may indicate
> a reconsideration of the placement of rcu_read_lock()/
> unlock(). raid10.c read_balance() may also merit
> reevaluation.
>
> The rcu_assign_pointer(rdev->mddev, mddev) is inserted
> to make the mddev object globally visible because it
> is the structure referenced by the rcu protected rdev
> pointer. Other assignments to rdev fields in md.c
> appear to be in regard to initialization, but the
> developer will want to consider this.
>
> Thank you.
>
> ----------------------------------------------------
>
> md.c | 2 +-
> multipath.c | 6 +++---
> raid1.c | 18 +++++++++---------
> raid10.c | 14 +++++++-------
> raid5.c | 8 ++++----
> raid6main.c | 6 +++---
> 6 files changed, 28 insertions(+), 27 deletions(-)
>
> ----------------------------------------------------
>
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/md.c linux-2.6.14-rc4_patch/drivers/md/md.c
> --- linux-2.6.14-rc4/drivers/md/md.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/md.c 2005-10-11 13:30:22.000000000 -0700
> @@ -1145,7 +1145,7 @@ static int bind_rdev_to_array(mdk_rdev_t
> }
>
> list_add(&rdev->same_set, &mddev->disks);
> - rdev->mddev = mddev;
> + rcu_assign_pointer(rdev->mddev, mddev);
> printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b));
> return 0;
> }
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/multipath.c linux-2.6.14-rc4_patch/drivers/md/multipath.c
> --- linux-2.6.14-rc4/drivers/md/multipath.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/multipath.c 2005-10-11 10:28:33.000000000 -0700
> @@ -63,7 +63,7 @@ static int multipath_map (multipath_conf
>
> rcu_read_lock();
> for (i = 0; i < disks; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && rdev->in_sync) {
> atomic_inc(&rdev->nr_pending);
> rcu_read_unlock();
> @@ -139,7 +139,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -228,7 +228,7 @@ static int multipath_issue_flush(request
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->multipaths[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid10.c linux-2.6.14-rc4_patch/drivers/md/raid10.c
> --- linux-2.6.14-rc4/drivers/md/raid10.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/raid10.c 2005-10-11 14:09:20.000000000 -0700
> @@ -510,7 +510,7 @@ static int read_balance(conf_t *conf, r1
> slot = 0;
> disk = r10_bio->devs[slot].devnum;
>
> - while (!conf->mirrors[disk].rdev ||
> + while (!rcu_dereference(conf->mirrors[disk].rdev) ||
> !conf->mirrors[disk].rdev->in_sync) {
> slot++;
> if (slot == conf->copies) {
> @@ -527,7 +527,7 @@ static int read_balance(conf_t *conf, r1
> /* make sure the disk is operational */
> slot = 0;
> disk = r10_bio->devs[slot].devnum;
> - while (!conf->mirrors[disk].rdev ||
> + while (!rcu_dereference(conf->mirrors[disk].rdev) ||
> !conf->mirrors[disk].rdev->in_sync) {
> slot ++;
> if (slot == conf->copies) {
> @@ -547,7 +547,7 @@ static int read_balance(conf_t *conf, r1
> int ndisk = r10_bio->devs[nslot].devnum;
>
>
> - if (!conf->mirrors[ndisk].rdev ||
> + if (!rcu_dereference(conf->mirrors[ndisk].rdev) ||
> !conf->mirrors[ndisk].rdev->in_sync)
> continue;
>
> @@ -569,7 +569,7 @@ rb_out:
> r10_bio->read_slot = slot;
> /* conf->next_seq_sect = this_sector + sectors;*/
>
> - if (disk >= 0 && conf->mirrors[disk].rdev)
> + if (disk >= 0 && rcu_dereference(conf->mirrors[disk].rdev))
> atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
> rcu_read_unlock();
>
> @@ -583,7 +583,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -614,7 +614,7 @@ static int raid10_issue_flush(request_qu
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -772,7 +772,7 @@ static int make_request(request_queue_t
> rcu_read_lock();
> for (i = 0; i < conf->copies; i++) {
> int d = r10_bio->devs[i].devnum;
> - if (conf->mirrors[d].rdev &&
> + if (rcu_dereference(conf->mirrors[d].rdev) &&
> !conf->mirrors[d].rdev->faulty) {
> atomic_inc(&conf->mirrors[d].rdev->nr_pending);
> r10_bio->devs[i].bio = bio;
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid1.c linux-2.6.14-rc4_patch/drivers/md/raid1.c
> --- linux-2.6.14-rc4/drivers/md/raid1.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/raid1.c 2005-10-11 13:23:23.000000000 -0700
> @@ -416,10 +416,10 @@ static int read_balance(conf_t *conf, r1
> /* Choose the first operation device, for consistancy */
> new_disk = 0;
>
> - for (rdev = conf->mirrors[new_disk].rdev;
> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> !rdev || !rdev->in_sync
> || test_bit(WriteMostly, &rdev->flags);
> - rdev = conf->mirrors[++new_disk].rdev) {
> + rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
> @@ -434,10 +434,10 @@ static int read_balance(conf_t *conf, r1
>
>
> /* make sure the disk is operational */
> - for (rdev = conf->mirrors[new_disk].rdev;
> + for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> !rdev || !rdev->in_sync ||
> test_bit(WriteMostly, &rdev->flags);
> - rdev = conf->mirrors[new_disk].rdev) {
> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) { // increment new_disk?
>
> if (rdev && rdev->in_sync)
> wonly_disk = new_disk;
> @@ -474,7 +474,7 @@ static int read_balance(conf_t *conf, r1
> disk = conf->raid_disks;
> disk--;
>
> - rdev = conf->mirrors[disk].rdev;
> + rdev = rcu_dereference(conf->mirrors[disk].rdev);
>
> if (!rdev ||
> !rdev->in_sync ||
> @@ -496,7 +496,7 @@ static int read_balance(conf_t *conf, r1
>
>
> if (new_disk >= 0) {
> - rdev = conf->mirrors[new_disk].rdev;
> + rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
> if (!rdev)
> goto retry;
> atomic_inc(&rdev->nr_pending);
> @@ -522,7 +522,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -547,6 +547,7 @@ static void raid1_unplug(request_queue_t
> md_wakeup_thread(mddev->thread);
> }
>
> static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
> sector_t *error_sector)
> {
> @@ -556,7 +557,7 @@ static int raid1_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->mirrors[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> @@ -732,7 +733,7 @@ static int make_request(request_queue_t
> #endif
> rcu_read_lock();
> for (i = 0; i < disks; i++) {
> - if ((rdev=conf->mirrors[i].rdev) != NULL &&
> + if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
> !rdev->faulty) {
> atomic_inc(&rdev->nr_pending);
> if (rdev->faulty) {
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid5.c linux-2.6.14-rc4_patch/drivers/md/raid5.c
> --- linux-2.6.14-rc4/drivers/md/raid5.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/raid5.c 2005-10-11 13:27:27.000000000 -0700
> @@ -1305,12 +1305,11 @@ static void handle_stripe(struct stripe_
> bi->bi_end_io = raid5_end_read_request;
>
> rcu_read_lock();
> - rdev = conf->disks[i].rdev;
> + rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && rdev->faulty)
> rdev = NULL;
> if (rdev)
> atomic_inc(&rdev->nr_pending);
> - rcu_read_unlock();
>
> if (rdev) {
> if (test_bit(R5_Syncio, &sh->dev[i].flags))
> @@ -1339,6 +1338,7 @@ static void handle_stripe(struct stripe_
> clear_bit(R5_LOCKED, &sh->dev[i].flags);
> set_bit(STRIPE_HANDLE, &sh->state);
> }
> + rcu_read_unlock();
> }
> }
>
> @@ -1379,7 +1379,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -1424,7 +1424,7 @@ static int raid5_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> diff -urpNa -X dontdiff linux-2.6.14-rc4/drivers/md/raid6main.c linux-2.6.14-rc4_patch/drivers/md/raid6main.c
> --- linux-2.6.14-rc4/drivers/md/raid6main.c 2005-10-10 18:19:19.000000000 -0700
> +++ linux-2.6.14-rc4_patch/drivers/md/raid6main.c 2005-10-11 13:29:08.000000000 -0700
> @@ -1464,7 +1464,7 @@ static void handle_stripe(struct stripe_
> bi->bi_end_io = raid6_end_read_request;
>
> rcu_read_lock();
> - rdev = conf->disks[i].rdev;
> + rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && rdev->faulty)
> rdev = NULL;
> if (rdev)
> @@ -1538,7 +1538,7 @@ static void unplug_slaves(mddev_t *mddev
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
> request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
>
> @@ -1583,7 +1583,7 @@ static int raid6_issue_flush(request_que
>
> rcu_read_lock();
> for (i=0; i<mddev->raid_disks && ret == 0; i++) {
> - mdk_rdev_t *rdev = conf->disks[i].rdev;
> + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
> if (rdev && !rdev->faulty) {
> struct block_device *bdev = rdev->bdev;
> request_queue_t *r_queue = bdev_get_queue(bdev);
> ~p
>
>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/