Re: IO errors after "block: remove bio_get_nr_vecs()"

From: Linus Torvalds
Date: Sun Dec 20 2015 - 13:41:51 EST


On Sun, Dec 20, 2015 at 10:18 AM, Christoph Hellwig <hch@xxxxxx> wrote:
>
> Artem,
>
> can you re-check the commits around this series again? I would be
> extremtly surprised if it's really this particular commit and not
> one just before it causing the problem - it just allocates bios
> to the biggest possible instead of only allocating up to what
> bio_add_page would accept.

Judging by Artem's bisect log, the last commit he tested before the
bad one was the commit before: commit 6cf66b4caf9c ("fs: use helper
bio_add_page() instead of open coding on bi_io_vec") and he marked
that one good.

Sadly, without CONFIG_LOCALVERSION_AUTO, there's no way to match up
the dmesg files (in the same bisection tar-file as the bisection log)
with the actual versions. Also, Artem's bisect.log isn't actually the
.git/BISECT_LOG file that contains the full information about what was
marked good and bad, so it's a bit hard to read (ie I can tell that
Artem had to mark commit 6cf66b4caf9c as "good" not because his log
says so, but because that explains the next commit to be tested).

Of course, it's fairly easy to make a mistake while bisecting (just
doing a thinko), but usually bisection miistakes end up causing you to
go into some "all good" or "all bad" region of commits, and the fact
that Artem seems to have marked the previous commit good and the final
commit bad does seem to imply the bisection was successful.

But yes, it is always nice to double-check the bisection results. The
best way to do it is generally to try to revert the bad commit and
verify that things work after that, but that commit doesn't revert
cleanly on top of 4.3 due to other changes.

Attached is a *COMPLETELY*UNTESTED* revertish patch for 4.3. It's
basically a revert of b54ffb73cadc, but with a few fixups to make the
revert work on top of 4.3.

So Artem, if you can test whether 4.3 works with that revert, and/or
double-check booting that b54ffb73cadc again (to verify that it's
really bad), and its parent (to double-check that it's really good),
that would be a good way to verify that yes, it is really that *one*
commit that breaks things for you.

Linus
block/bio.c | 23 +++++++++++++++++++++++
drivers/md/dm-io.c | 2 +-
fs/btrfs/compression.c | 5 ++++-
fs/btrfs/extent_io.c | 9 +++++++--
fs/btrfs/inode.c | 3 ++-
fs/btrfs/scrub.c | 18 ++++++++++++++++--
fs/direct-io.c | 2 +-
fs/ext4/page-io.c | 3 ++-
fs/ext4/readpage.c | 2 +-
fs/f2fs/data.c | 2 +-
fs/gfs2/lops.c | 9 ++++++++-
fs/logfs/dev_bdev.c | 4 ++--
fs/mpage.c | 4 ++--
fs/nilfs2/segbuf.c | 2 +-
fs/xfs/xfs_aops.c | 3 ++-
include/linux/bio.h | 1 +
16 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index ad3f276d74bc..d483dbb0162d 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -694,6 +694,29 @@ integrity_clone:
EXPORT_SYMBOL(bio_clone_bioset);

/**
+ * bio_get_nr_vecs - return approx number of vecs
+ * @bdev: I/O target
+ *
+ * Return the approximate number of pages we can send to this target.
+ * There's no guarantee that you will be able to fit this number of pages
+ * into a bio, it does not account for dynamic restrictions that vary
+ * on offset.
+ */
+int bio_get_nr_vecs(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ int nr_pages;
+
+ nr_pages = min_t(unsigned,
+ queue_max_segments(q),
+ queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
+
+ return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
+
+}
+EXPORT_SYMBOL(bio_get_nr_vecs);
+
+/**
* bio_add_pc_page - attempt to add page to bio
* @q: the target queue
* @bio: destination bio
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 6f8e83b2a6f8..c84714f70378 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -316,7 +316,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
num_bvecs = 1;
else
- num_bvecs = min_t(int, BIO_MAX_PAGES,
+ num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));

bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 57ee8ca29b06..302266ec2cdb 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -97,7 +97,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
static struct bio *compressed_bio_alloc(struct block_device *bdev,
u64 first_byte, gfp_t gfp_flags)
{
- return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags);
+ int nr_vecs;
+
+ nr_vecs = bio_get_nr_vecs(bdev);
+ return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
}

static int check_compressed_csum(struct inode *inode,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3915c9473e94..f39f819cf7e8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2803,7 +2803,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
{
int ret = 0;
struct bio *bio;
+ int nr;
int contig = 0;
+ int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);

@@ -2831,9 +2833,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
return 0;
}
}
+ if (this_compressed)
+ nr = BIO_MAX_PAGES;
+ else
+ nr = bio_get_nr_vecs(bdev);

- bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
- GFP_NOFS | __GFP_HIGH);
+ bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
if (!bio)
return -ENOMEM;

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 611b66d73e80..4e1aea2ec8b7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7992,8 +7992,9 @@ out:
static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
u64 first_sector, gfp_t gfp_flags)
{
+ int nr_vecs = bio_get_nr_vecs(bdev);
struct bio *bio;
- bio = btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
+ bio = btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
if (bio)
bio_associate_current(bio);
return bio;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a39f5d1144e8..3b9296e92cc5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -464,14 +464,27 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
struct scrub_ctx *sctx;
int i;
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
+ int pages_per_rd_bio;
int ret;

+ /*
+ * the setting of pages_per_rd_bio is correct for scrub but might
+ * be wrong for the dev_replace code where we might read from
+ * different devices in the initial huge bios. However, that
+ * code is able to correctly handle the case when adding a page
+ * to a bio fails.
+ */
+ if (dev->bdev)
+ pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
+ bio_get_nr_vecs(dev->bdev));
+ else
+ pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
if (!sctx)
goto nomem;
atomic_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
- sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
+ sctx->pages_per_rd_bio = pages_per_rd_bio;
sctx->curr = -1;
sctx->dev_root = dev->dev_root;
for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
@@ -4053,7 +4066,8 @@ static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
return 0;

WARN_ON(!dev->bdev);
- wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
+ wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
+ bio_get_nr_vecs(dev->bdev));
wr_ctx->tgtdev = dev;
atomic_set(&wr_ctx->flush_all_writes, 0);
return 0;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 11256291642e..818c647f36d3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -655,7 +655,7 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
if (ret)
goto out;
sector = start_sector << (sdio->blkbits - 9);
- nr_pages = min(sdio->pages_in_io, BIO_MAX_PAGES);
+ nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev));
BUG_ON(nr_pages <= 0);
dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
sdio->boundary = 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 84ba4d2b3a35..5b3fcb8a010f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,9 +374,10 @@ void ext4_io_submit_init(struct ext4_io_submit *io,
static int io_submit_init_bio(struct ext4_io_submit *io,
struct buffer_head *bh)
{
+ int nvecs = bio_get_nr_vecs(bh->b_bdev);
struct bio *bio;

- bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+ bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
if (!bio)
return -ENOMEM;
wbc_init_bio(io->io_wbc, bio);
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 560af0437704..a4823d88ae26 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -284,7 +284,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
goto set_error_page;
}
bio = bio_alloc(GFP_KERNEL,
- min_t(int, nr_pages, BIO_MAX_PAGES));
+ min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
if (!bio) {
if (ctx)
ext4_release_crypto_ctx(ctx);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a82abe921b89..432496daacae 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -954,7 +954,7 @@ submit_and_realloc:
}

bio = bio_alloc(GFP_KERNEL,
- min_t(int, nr_pages, BIO_MAX_PAGES));
+ min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
if (!bio) {
if (ctx)
f2fs_release_crypto_ctx(ctx);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index d5369a109781..4052116959fb 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -261,11 +261,18 @@ void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
{
struct super_block *sb = sdp->sd_vfs;
+ unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev);
struct bio *bio;

BUG_ON(sdp->sd_log_bio);

- bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+ while (1) {
+ bio = bio_alloc(GFP_NOIO, nrvecs);
+ if (likely(bio))
+ break;
+ nrvecs = max(nrvecs/2, 1U);
+ }
+
bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
bio->bi_bdev = sb->s_bdev;
bio->bi_end_io = gfs2_end_log_write;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a7fdbd868474..cea0cc9878b7 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -81,7 +81,7 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
unsigned int max_pages;
int i;

- max_pages = min(nr_pages, BIO_MAX_PAGES);
+ max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));

bio = bio_alloc(GFP_NOFS, max_pages);
BUG_ON(!bio);
@@ -171,7 +171,7 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
unsigned int max_pages;
int i;

- max_pages = min(nr_pages, BIO_MAX_PAGES);
+ max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));

bio = bio_alloc(GFP_NOFS, max_pages);
BUG_ON(!bio);
diff --git a/fs/mpage.c b/fs/mpage.c
index a7c34274f207..7b0b90e23cbe 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -278,7 +278,7 @@ alloc_new:
goto out;
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
- min_t(int, nr_pages, BIO_MAX_PAGES), gfp);
+ min_t(int, nr_pages, bio_get_nr_vecs(bdev)), gfp);
if (bio == NULL)
goto confused;
}
@@ -605,7 +605,7 @@ alloc_new:
}
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
- BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
+ bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
if (bio == NULL)
goto confused;

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index f63620ce3892..550b10efb14e 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -414,7 +414,7 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
{
wi->bio = NULL;
wi->rest_blocks = segbuf->sb_sum.nblocks;
- wi->max_pages = BIO_MAX_PAGES;
+ wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev);
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
wi->start = wi->end = 0;
wi->blocknr = segbuf->sb_pseg_start;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 50ab2879b9da..760dd2a5bf05 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -380,7 +380,8 @@ STATIC struct bio *
xfs_alloc_ioend_bio(
struct buffer_head *bh)
{
- struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+ int nvecs = bio_get_nr_vecs(bh->b_bdev);
+ struct bio *bio = bio_alloc(GFP_NOIO, nvecs);

ASSERT(bio->bi_private == NULL);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b9b6e046b52e..f2198d5b8be1 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -451,6 +451,7 @@ void bio_chain(struct bio *, struct bio *);
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
+extern int bio_get_nr_vecs(struct block_device *);
struct rq_map_data;
extern struct bio *bio_map_user_iov(struct request_queue *,
const struct iov_iter *, gfp_t);