Re: [next-20260108]kernel BUG at drivers/scsi/scsi_lib.c:1173!
From: Ming Lei
Date: Fri Jan 09 2026 - 08:19:37 EST
On Fri, Jan 09, 2026 at 05:51:15PM +0530, Venkat Rao Bagalkote wrote:
>
> On 09/01/26 5:25 pm, Ming Lei wrote:
> > On Fri, Jan 09, 2026 at 05:14:36PM +0530, Venkat Rao Bagalkote wrote:
> > > On 09/01/26 12:19 pm, Ming Lei wrote:
> > > > On Thu, Jan 08, 2026 at 09:56:39PM -0800, Christoph Hellwig wrote:
> > > > > I've seen the same when running xfstests on xfs, and bisected it to:
> > > > >
> > > > > commit ee623c892aa59003fca173de0041abc2ccc2c72d
> > > > > Author: Ming Lei <ming.lei@xxxxxxxxxx>
> > > > > Date: Wed Dec 31 11:00:55 2025 +0800
> > > > >
> > > > > block: use bvec iterator helper for bio_may_need_split()
> > > > >
> > > > Hi Christoph and Venkat Rao Bagalkote,
> > > >
> > > > Unfortunately I can't duplicate the issue in my environment, can you test
> > > > the following patch?
> > > >
> > > > diff --git a/block/blk.h b/block/blk.h
> > > > index 98f4dfd4ec75..980eef1f5690 100644
> > > > --- a/block/blk.h
> > > > +++ b/block/blk.h
> > > > @@ -380,7 +380,7 @@ static inline bool bio_may_need_split(struct bio *bio,
> > > > return true;
> > > > bv = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
> > > > - if (bio->bi_iter.bi_size > bv->bv_len)
> > > > + if (bio->bi_iter.bi_size > bv->bv_len - bio->bi_iter.bi_bvec_done)
> > > > return true;
> > > > return bv->bv_len + bv->bv_offset > lim->max_fast_segment_size;
> > > > }
> > > Hello Ming,
> > >
> > >
> > > This is not helping. I am hitting this issue, during kernel build itself.
> > Can you confirm if it can fix the blktests ext4/056 first?
> >
> > If kernel building is running over new patched kernel, please provide the
> > dmesg log. And if it is reproduciable, can you confirm if it can be fixed
> > by reverting ee623c892aa59003 (block: use bvec iterator helper for bio_may_need_split())?
>
>
> Unfortunately, even with revert, build fails.
>
>
>
> commit c64b2ee9cddcb31546c8622ef018d344544a9388 (HEAD)
> Author: Super User <root@xxxxxxxxxxxxxxxxxxxxxxxxxxx>
> Date: Fri Jan 9 06:51:19 2026 -0600
>
> Revert "block: use bvec iterator helper for bio_may_need_split()"
>
> This reverts commit ee623c892aa59003fca173de0041abc2ccc2c72d.
OK, then your issue isn't related with the above change.
Can you reproduce & collect dmesg log with the bad sg/rq/bio/bvec info by
applying the attached debug patch?
Also if possible, please collect your scsi queue's limit info before
reproducing the issue:
(cd /sys/block/$SD/queue && find . -type f -exec grep -aH . {} \;)
Thanks,
Ming
diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c
index 752060d7261c..33c1b6a0a738 100644
--- a/block/blk-mq-dma.c
+++ b/block/blk-mq-dma.c
@@ -4,8 +4,75 @@
*/
#include <linux/blk-integrity.h>
#include <linux/blk-mq-dma.h>
+#include <linux/scatterlist.h>
#include "blk.h"
+static void dump_rq_mapping_debug(struct request *rq, struct scatterlist *sglist,
+ int nsegs)
+{
+ struct scatterlist *sg;
+ struct bio *bio;
+ struct bvec_iter iter;
+ struct bio_vec bv;
+ int i;
+
+ pr_err("=== __blk_rq_map_sg DEBUG DUMP ===\n");
+ pr_err("DISK: %s\n", rq->q->disk ? rq->q->disk->disk_name : "(null)");
+
+ /* Dump nsegs vs expected */
+ pr_err("nsegs=%d nr_phys_segments=%u\n",
+ nsegs, blk_rq_nr_phys_segments(rq));
+
+ /* Dump request info */
+ pr_err("REQUEST: __data_len=%u __sector=%llu cmd_flags=0x%x "
+ "rq_flags=0x%x nr_phys_segments=%u phys_gap_bit=%u\n",
+ rq->__data_len, (unsigned long long)rq->__sector,
+ rq->cmd_flags, (__force unsigned int)rq->rq_flags,
+ rq->nr_phys_segments, rq->phys_gap_bit);
+
+ /* Dump each SG element */
+ pr_err("--- SG LIST (%d entries) ---\n", nsegs);
+ for_each_sg(sglist, sg, nsegs, i) {
+ pr_err(" sg[%d]: pfn=0x%lx offset=%u len=%u dma_addr=0x%llx\n",
+ i, page_to_pfn(sg_page(sg)), sg->offset, sg->length,
+ (unsigned long long)sg_dma_address(sg));
+ }
+
+ /* Dump each bio */
+ pr_err("--- BIO LIST ---\n");
+ for (bio = rq->bio; bio; bio = bio->bi_next) {
+ pr_err(" BIO %p: bi_iter={sector=%llu size=%u idx=%u bvec_done=%u} "
+ "bi_flags=0x%x bi_opf=0x%x bi_vcnt=%u bi_bvec_gap_bit=%u\n",
+ bio,
+ (unsigned long long)bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size, bio->bi_iter.bi_idx,
+ bio->bi_iter.bi_bvec_done,
+ bio->bi_flags, bio->bi_opf, bio->bi_vcnt,
+ bio->bi_bvec_gap_bit);
+
+ /* Dump each bvec in this bio */
+ pr_err(" --- BVECS (bi_vcnt=%u) ---\n", bio->bi_vcnt);
+ for (i = 0; i < bio->bi_vcnt; i++) {
+ struct bio_vec *bvp = &bio->bi_io_vec[i];
+
+ pr_err(" bvec[%d]: pfn=0x%lx len=%u offset=%u\n",
+ i, page_to_pfn(bvp->bv_page), bvp->bv_len,
+ bvp->bv_offset);
+ }
+
+ /* Also dump effective bvecs via iterator */
+ pr_err(" --- EFFECTIVE BVECS (via iter) ---\n");
+ i = 0;
+ bio_for_each_bvec(bv, bio, iter) {
+ pr_err(" eff_bvec[%d]: pfn=0x%lx len=%u offset=%u\n",
+ i++, page_to_pfn(bv.bv_page), bv.bv_len,
+ bv.bv_offset);
+ }
+ }
+
+ pr_err("=== END DEBUG DUMP ===\n");
+}
+
static bool __blk_map_iter_next(struct blk_map_iter *iter)
{
if (iter->iter.bi_size)
@@ -306,6 +373,8 @@ int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
* Something must have been wrong if the figured number of
* segment is bigger than number of req's physical segments
*/
+ if (nsegs > blk_rq_nr_phys_segments(rq))
+ dump_rq_mapping_debug(rq, sglist, nsegs);
WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
return nsegs;
diff --git a/block/blk.h b/block/blk.h
index 98f4dfd4ec75..980eef1f5690 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -380,7 +380,7 @@ static inline bool bio_may_need_split(struct bio *bio,
return true;
bv = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
- if (bio->bi_iter.bi_size > bv->bv_len)
+ if (bio->bi_iter.bi_size > bv->bv_len - bio->bi_iter.bi_bvec_done)
return true;
return bv->bv_len + bv->bv_offset > lim->max_fast_segment_size;
}