[PATCH 002 of 7] md: Allow stripes to be expanded in preparation for expanding an array.

From: NeilBrown
Date: Mon Jan 23 2006 - 19:40:53 EST



Before a RAID-5 can be expanded, we need to be able to expand the
stripe-cache data structure.
This requires allocating new stripes in a new kmem_cache.
If this succeeds, we copy cache pages over and release the old
stripes and kmem_cache.
We then allocate new pages. If that fails, we leave the stripe
cache at it's new size. It isn't worth the effort to shink
it back again.

Signed-off-by: Neil Brown <neilb@xxxxxxx>

### Diffstat output
./drivers/md/raid5.c | 118 +++++++++++++++++++++++++++++++++++++++++--
./drivers/md/raid6main.c | 4 -
./include/linux/raid/raid5.h | 9 ++-
3 files changed, 123 insertions(+), 8 deletions(-)

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~ 2006-01-24 10:18:31.000000000 +1100
+++ ./drivers/md/raid5.c 2006-01-24 10:20:33.000000000 +1100
@@ -313,20 +313,130 @@ static int grow_stripes(raid5_conf_t *co
kmem_cache_t *sc;
int devs = conf->raid_disks;

- sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev));
-
- sc = kmem_cache_create(conf->cache_name,
+ sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
+ sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev));
+ conf->active_name = 0;
+ sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
if (!sc)
return 1;
conf->slab_cache = sc;
+ conf->pool_size = devs;
while (num--) {
if (!grow_one_stripe(conf))
return 1;
}
return 0;
}
+static int resize_stripes(raid5_conf_t *conf, int newsize)
+{
+ /* make all the stripes able to hold 'newsize' devices.
+ * New slots in each stripe get 'page' set to a new page.
+ * We allocate all the new stripes first, then if that succeeds,
+ * copy everything across.
+ * Finally we add new pages. This could fail, but we leave
+ * the stripe cache at it's new size, just with some pages empty.
+ *
+ * We use GFP_NOIO allocations as IO to the raid5 is blocked
+ * at some points in this operation.
+ */
+ struct stripe_head *osh, *nsh;
+ struct list_head newstripes, oldstripes;
+ struct disk_info *ndisks;
+ int err = 0;
+ kmem_cache_t *sc;
+ int i;
+
+ if (newsize <= conf->pool_size)
+ return 0; /* never bother to shrink */
+
+ sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
+ sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+ 0, 0, NULL, NULL);
+ if (!sc)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&newstripes);
+ for (i = conf->max_nr_stripes; i; i--) {
+ nsh = kmem_cache_alloc(sc, GFP_NOIO);
+ if (!nsh)
+ break;
+
+ memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
+
+ nsh->raid_conf = conf;
+ spin_lock_init(&nsh->lock);
+
+ list_add(&nsh->lru, &newstripes);
+ }
+ if (i) {
+ /* didn't get enough, give up */
+ while (!list_empty(&newstripes)) {
+ nsh = list_entry(newstripes.next, struct stripe_head, lru);
+ list_del(&nsh->lru);
+ kmem_cache_free(sc, nsh);
+ }
+ kmem_cache_destroy(sc);
+ return -ENOMEM;
+ }
+ /* OK, we have enough stripes, start collecting inactive
+ * stripes and copying them over
+ */
+ INIT_LIST_HEAD(&oldstripes);
+ list_for_each_entry(nsh, &newstripes, lru) {
+ spin_lock_irq(&conf->device_lock);
+ wait_event_lock_irq(conf->wait_for_stripe,
+ !list_empty(&conf->inactive_list),
+ conf->device_lock,
+ unplug_slaves(conf->mddev);
+ );
+ osh = get_free_stripe(conf);
+ spin_unlock_irq(&conf->device_lock);
+ atomic_set(&nsh->count, 1);
+ for(i=0; i<conf->pool_size; i++)
+ nsh->dev[i].page = osh->dev[i].page;
+ for( ; i<newsize; i++)
+ nsh->dev[i].page = NULL;
+ list_add(&osh->lru, &oldstripes);
+ }
+ /* Got them all.
+ * Return the new ones and free the old ones.
+ * At this point, we are holding all the stripes so the array
+ * is completely stalled, so now is a good time to resize
+ * conf->disks.
+ */
+ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
+ if (ndisks) {
+ for (i=0; i<conf->raid_disks; i++)
+ ndisks[i] = conf->disks[i];
+ kfree(conf->disks);
+ conf->disks = ndisks;
+ } else
+ err = -ENOMEM;
+ while(!list_empty(&newstripes)) {
+ nsh = list_entry(newstripes.next, struct stripe_head, lru);
+ list_del_init(&nsh->lru);
+ for (i=conf->raid_disks; i < newsize; i++)
+ if (nsh->dev[i].page == NULL) {
+ struct page *p = alloc_page(GFP_NOIO);
+ nsh->dev[i].page = p;
+ if (!p)
+ err = -ENOMEM;
+ }
+ release_stripe(nsh);
+ }
+ while(!list_empty(&oldstripes)) {
+ osh = list_entry(oldstripes.next, struct stripe_head, lru);
+ list_del(&osh->lru);
+ kmem_cache_free(conf->slab_cache, osh);
+ }
+ kmem_cache_destroy(conf->slab_cache);
+ conf->slab_cache = sc;
+ conf->active_name = 1-conf->active_name;
+ conf->pool_size = newsize;
+ return err;
+}
+

static int drop_one_stripe(raid5_conf_t *conf)
{
@@ -339,7 +449,7 @@ static int drop_one_stripe(raid5_conf_t
return 0;
if (atomic_read(&sh->count))
BUG();
- shrink_buffers(sh, conf->raid_disks);
+ shrink_buffers(sh, conf->pool_size);
kmem_cache_free(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
return 1;

diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~ 2006-01-24 10:18:31.000000000 +1100
+++ ./drivers/md/raid6main.c 2006-01-24 10:18:50.000000000 +1100
@@ -308,9 +308,9 @@ static int grow_stripes(raid6_conf_t *co
kmem_cache_t *sc;
int devs = conf->raid_disks;

- sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev));
+ sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev));

- sc = kmem_cache_create(conf->cache_name,
+ sc = kmem_cache_create(conf->cache_name[0],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
if (!sc)

diff ./include/linux/raid/raid5.h~current~ ./include/linux/raid/raid5.h
--- ./include/linux/raid/raid5.h~current~ 2006-01-24 10:18:31.000000000 +1100
+++ ./include/linux/raid/raid5.h 2006-01-24 10:18:50.000000000 +1100
@@ -216,7 +216,11 @@ struct raid5_private_data {
struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */
atomic_t preread_active_stripes; /* stripes with scheduled io */

- char cache_name[20];
+ /* unfortunately we need two cache names as we temporarily have
+ * two caches.
+ */
+ int active_name;
+ char cache_name[2][20];
kmem_cache_t *slab_cache; /* for allocating stripes */

int seq_flush, seq_write;
@@ -238,7 +242,8 @@ struct raid5_private_data {
wait_queue_head_t wait_for_overlap;
int inactive_blocked; /* release of inactive stripes blocked,
* waiting for 25% to be free
- */
+ */
+ int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock;
struct disk_info *disks;
};
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/