Re: [patch] speed up single bio_vec allocation

From: Jens Axboe
Date: Wed Dec 06 2006 - 05:55:56 EST


On Wed, Dec 06 2006, Jens Axboe wrote:
> I still can't help but think we can do better than this, and that this
> is nothing more than optimizing for a benchmark. For high performance
> I/O, you will be doing > 1 page bio's anyway and this patch wont help
> you at all. Perhaps we can just kill bio_vec slabs completely, and
> create bio slabs instead with differing sizes. So instead of having 1
> bio slab and 5 bio_vec slabs, change that to 5 bio slabs that leave room
> for the bio_vec list at the end. That would always eliminate the extra
> allocation, at the cost of blowing the 256-page case into a order 1 page
> allocation (256*16 + sizeof(*bio) > PAGE_SIZE) for the 4kb 64-bit archs,
> which is something I've always tried to avoid.

That would look something like this. You'd probably want to re-tweak the
slab sizes now, so that we get the most out of the slab page. If we
accept the 2^1 order allocation for the largest bio, we can make it
larger than 256 pages at no extra cost. Probably around 500 pages would
still fit inside the two pages.

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 08a40f4..b4bf3b3 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -813,7 +813,7 @@ static int crypt_ctr(struct dm_target *t
goto bad4;
}

- cc->bs = bioset_create(MIN_IOS, MIN_IOS, 4);
+ cc->bs = bioset_create(MIN_IOS, 4);
if (!cc->bs) {
ti->error = "Cannot allocate crypt bioset";
goto bad_bs;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index da663d2..581c24a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -60,7 +60,7 @@ static int resize_pool(unsigned int new_
if (!_io_pool)
return -ENOMEM;

- _bios = bioset_create(16, 16, 4);
+ _bios = bioset_create(16, 4);
if (!_bios) {
mempool_destroy(_io_pool);
_io_pool = NULL;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index fc4f743..98f6768 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -973,7 +973,7 @@ static struct mapped_device *alloc_dev(i
if (!md->tio_pool)
goto bad3;

- md->bs = bioset_create(16, 16, 4);
+ md->bs = bioset_create(16, 4);
if (!md->bs)
goto bad_no_bioset;

diff --git a/fs/bio.c b/fs/bio.c
index aa4d09b..452e8f7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,10 +30,6 @@

#define BIO_POOL_SIZE 256

-static kmem_cache_t *bio_slab __read_mostly;
-
-#define BIOVEC_NR_POOLS 6
-
/*
* a small number of entries is fine, not going to be performance critical.
* basically we just need to survive
@@ -41,23 +37,23 @@ static kmem_cache_t *bio_slab __read_mos
#define BIO_SPLIT_ENTRIES 8
mempool_t *bio_split_pool __read_mostly;

-struct biovec_slab {
+struct bio_slab {
int nr_vecs;
- char *name;
+ const char *name;
kmem_cache_t *slab;
};

-/*
- * if you change this list, also change bvec_alloc or things will
- * break badly! cannot be bigger than what you can fit into an
- * unsigned short
- */
-
-#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
- BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
+#define BIOSLAB_NR_POOLS 6
+#define BIOSLAB(x) { .nr_vecs = x, .name = "bio-"__stringify(x) }
+static struct bio_slab bio_slabs[BIOSLAB_NR_POOLS] __read_mostly = {
+ BIOSLAB(0),
+ BIOSLAB(4),
+ BIOSLAB(16),
+ BIOSLAB(64),
+ BIOSLAB(128),
+ BIOSLAB(BIO_MAX_PAGES),
};
-#undef BV
+#undef BIOSLAB

/*
* bio_set is used to allow other portions of the IO system to
@@ -66,8 +62,7 @@ static struct biovec_slab bvec_slabs[BIO
* and the bvec_slabs[].
*/
struct bio_set {
- mempool_t *bio_pool;
- mempool_t *bvec_pools[BIOVEC_NR_POOLS];
+ mempool_t *bio_pools[BIOSLAB_NR_POOLS];
};

/*
@@ -76,45 +71,12 @@ struct bio_set {
*/
static struct bio_set *fs_bio_set;

-static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
-{
- struct bio_vec *bvl;
-
- /*
- * see comment near bvec_array define!
- */
- switch (nr) {
- case 1 : *idx = 0; break;
- case 2 ... 4: *idx = 1; break;
- case 5 ... 16: *idx = 2; break;
- case 17 ... 64: *idx = 3; break;
- case 65 ... 128: *idx = 4; break;
- case 129 ... BIO_MAX_PAGES: *idx = 5; break;
- default:
- return NULL;
- }
- /*
- * idx now points to the pool we want to allocate from
- */
-
- bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
- if (bvl) {
- struct biovec_slab *bp = bvec_slabs + *idx;
-
- memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
- }
-
- return bvl;
-}
-
void bio_free(struct bio *bio, struct bio_set *bio_set)
{
const int pool_idx = BIO_POOL_IDX(bio);

- BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
-
- mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
- mempool_free(bio, bio_set->bio_pool);
+ BIO_BUG_ON(pool_idx >= BIOSLAB_NR_POOLS);
+ mempool_free(bio, bio_set->bio_pools[pool_idx]);
}

/*
@@ -160,26 +122,51 @@ void bio_init(struct bio *bio)
**/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
- struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
+ struct bio *bio = NULL;
+ long idx;
+
+ switch (nr_iovecs) {
+ case 0:
+ idx = 0;
+ break;
+ case 1 ... 4:
+ idx = 1;
+ break;
+ case 5 ... 16:
+ idx = 2;
+ break;
+ case 17 ... 64:
+ idx = 3;
+ break;
+ case 65 ... 128:
+ idx = 4;
+ break;
+ case 129 ... BIO_MAX_PAGES:
+ idx = 5;
+ break;
+ default:
+ goto out;
+ }
+
+ bio = mempool_alloc(bs->bio_pools[idx], gfp_mask);

if (likely(bio)) {
struct bio_vec *bvl = NULL;

bio_init(bio);
- if (likely(nr_iovecs)) {
- unsigned long idx = 0; /* shut up gcc */
-
- bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
- if (unlikely(!bvl)) {
- mempool_free(bio, bs->bio_pool);
- bio = NULL;
- goto out;
- }
+
+ if (nr_iovecs) {
+ struct bio_slab *bslab = &bio_slabs[idx];
+
+ bvl = (void *) bio + sizeof(*bio);
+ memset(bvl, 0, bslab->nr_vecs * sizeof(struct bio_vec));
+
bio->bi_flags |= idx << BIO_POOL_OFFSET;
- bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
+ bio->bi_max_vecs = bslab->nr_vecs;
}
bio->bi_io_vec = bvl;
}
+
out:
return bio;
}
@@ -1120,89 +1107,78 @@ struct bio_pair *bio_split(struct bio *b
* create memory pools for biovec's in a bio_set.
* use the global biovec slabs created for general use.
*/
-static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
+static int bio_create_pools(struct bio_set *bs, int pool_entries, int scale)
{
int i;

- for (i = 0; i < BIOVEC_NR_POOLS; i++) {
- struct biovec_slab *bp = bvec_slabs + i;
- mempool_t **bvp = bs->bvec_pools + i;
+ for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
+ struct bio_slab *bslab = bio_slabs + i;
+ mempool_t **bvp = bs->bio_pools + i;

if (pool_entries > 1 && i >= scale)
pool_entries >>= 1;

- *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
+ *bvp = mempool_create_slab_pool(pool_entries, bslab->slab);
if (!*bvp)
return -ENOMEM;
}
return 0;
}

-static void biovec_free_pools(struct bio_set *bs)
+static void bio_free_pools(struct bio_set *bs)
{
int i;

- for (i = 0; i < BIOVEC_NR_POOLS; i++) {
- mempool_t *bvp = bs->bvec_pools[i];
+ for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
+ mempool_t *bp = bs->bio_pools[i];

- if (bvp)
- mempool_destroy(bvp);
+ if (bp)
+ mempool_destroy(bp);
}

}

void bioset_free(struct bio_set *bs)
{
- if (bs->bio_pool)
- mempool_destroy(bs->bio_pool);
-
- biovec_free_pools(bs);
+ bio_free_pools(bs);

kfree(bs);
}

-struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
+struct bio_set *bioset_create(int bio_pool_size, int scale)
{
struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);

if (!bs)
return NULL;

- bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab);
- if (!bs->bio_pool)
- goto bad;
-
- if (!biovec_create_pools(bs, bvec_pool_size, scale))
+ if (!bio_create_pools(bs, bio_pool_size, scale))
return bs;

-bad:
bioset_free(bs);
return NULL;
}

-static void __init biovec_init_slabs(void)
+static void __init bio_init_slabs(void)
{
int i;

- for (i = 0; i < BIOVEC_NR_POOLS; i++) {
+ for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
int size;
- struct biovec_slab *bvs = bvec_slabs + i;
+ struct bio_slab *bs = bio_slabs + i;

- size = bvs->nr_vecs * sizeof(struct bio_vec);
- bvs->slab = kmem_cache_create(bvs->name, size, 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ size = bs->nr_vecs * sizeof(struct bio_vec) + sizeof(struct bio);
+ bs->slab = kmem_cache_create(bs->name, size, 0, SLAB_PANIC,
+ NULL, NULL);
}
}

static int __init init_bio(void)
{
- int megabytes, bvec_pool_entries;
- int scale = BIOVEC_NR_POOLS;
-
- bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ int scale = BIOSLAB_NR_POOLS;
+ int megabytes;

- biovec_init_slabs();
+ bio_init_slabs();

megabytes = nr_free_pages() >> (20 - PAGE_SHIFT);

@@ -1225,9 +1201,8 @@ static int __init init_bio(void)
* the system is completely unable to allocate memory, so we only
* need enough to make progress.
*/
- bvec_pool_entries = 1 + scale;

- fs_bio_set = bioset_create(BIO_POOL_SIZE, bvec_pool_entries, scale);
+ fs_bio_set = bioset_create(BIO_POOL_SIZE, scale);
if (!fs_bio_set)
panic("bio: can't allocate bios\n");

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 092dbd0..d6d0047 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -276,7 +276,7 @@ extern struct bio_pair *bio_split(struct
extern mempool_t *bio_split_pool;
extern void bio_pair_release(struct bio_pair *dbio);

-extern struct bio_set *bioset_create(int, int, int);
+extern struct bio_set *bioset_create(int, int);
extern void bioset_free(struct bio_set *);

extern struct bio *bio_alloc(gfp_t, int);

--
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/