Re: [PATCH 1/3] aio: fix oops because of extra IO control blockfreeing.
From: Andrew Morton
Date: Wed Mar 07 2007 - 17:14:56 EST
On Mon, 05 Mar 2007 17:23:33 +0300
Leonid Ananiev <leonid.i.ananiev@xxxxxxxxxxxxxxx> wrote:
> From Leonid Ananiev
>
> The patch fixes oops because of extra IO control block freeing.
> IO is retried if page could not be invalidated.
>
> Signed-off-by: Leonid Ananiev <leonid.i.ananiev@xxxxxxxxx>
>
> The patch fixes oops "Kernel BUG at fs/aio.c:509" archived at
> http://lkml.org/lkml/2007/2/8/337
> The number of IO control block (iocb)users < 0.
> If page could not be invalidated by invalidate_inode_pages2_range()
> than EIO is returned. It happens if journal_try_to_free_buffers() fails
> to drop_buffers().
> This EIO is not differing from real IO competition with EIO and
> aio_complete() is called.
> Later aio_complete() is called from dio_bio_end_aio() and frees iocb
> once more.
>
> After patch generic_file_direct_IO() sets PgBusy flag in iocb
> if page could not be invalidated. iocb is retried after IO competition.
> The process is waked up if IO is SYNC else iocb is kicked.
> The lines ___if (ret != -EIOCBRETRY)___ is deleted because
> nothing set to EIOCBRETRY.
Please copy linux-aio@xxxxxxxxx on AIO patches.
> Next patches 2/3 and 3/3 do cleanup only.
I cannot find those patches.
> The patch is applied and tested with aio-stress on 2.6.20 and 2.6.21-rc2
> diff -uprNX linux-2.6.21-rc2/Documentation/dontdiff
Your email client performs space-stuffing, which makes things harder on the
receiving end. http://mbligh.org/linuxdocs/Email/Clients/Thunderbird might
help.
> linux-2.6.21-rc2/fs/aio.c linux-2.6.21-rc2-aio31/fs/aio.c
> --- linux-2.6.21-rc2/fs/aio.c 2007-03-05 00:29:58.000000000 -0800
> +++ linux-2.6.21-rc2-aio31/fs/aio.c 2007-03-05 00:38:11.000000000 -0800
> @@ -723,37 +723,12 @@ static ssize_t aio_run_iocb(struct kiocb
> ret = retry(iocb);
> current->io_wait = NULL;
>
> - if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
> + if (!kiocbIsPgBusy(iocb) && ret != -EIOCBQUEUED) {
> BUG_ON(!list_empty(&iocb->ki_wait.task_list));
> aio_complete(iocb, ret, 0);
> }
> out:
> spin_lock_irq(&ctx->ctx_lock);
> -
> - if (-EIOCBRETRY == ret) {
> - /*
> - * OK, now that we are done with this iteration
> - * and know that there is more left to go,
> - * this is where we let go so that a subsequent
> - * "kick" can start the next iteration
> - */
> -
> - /* will make __queue_kicked_iocb succeed from here on */
> - INIT_LIST_HEAD(&iocb->ki_run_list);
> - /* we must queue the next iteration ourselves, if it
> - * has already been kicked */
> - if (kiocbIsKicked(iocb)) {
> - __queue_kicked_iocb(iocb);
> -
> - /*
> - * __queue_kicked_iocb will always return 1 here, because
> - * iocb->ki_run_list is empty at this point so it should
> - * be safe to unconditionally queue the context into the
> - * work queue.
> - */
> - aio_queue_work(ctx);
> - }
> - }
> return ret;
> }
>
> @@ -945,6 +920,10 @@ int fastcall aio_complete(struct kiocb *
> wake_up_process(iocb->ki_obj.tsk);
> return 1;
> }
> + if (TestClearPgBusy(iocb)) { // page could not be invalidated
> + kick_iocb(iocb);
> + return 1;
> + }
>
> info = &ctx->ring_info;
>
> diff -uprNX linux-2.6.21-rc2/Documentation/dontdiff
> linux-2.6.21-rc2/fs/read_write.c linux-2.6.21-rc2-aio31/fs/read_write.c
> --- linux-2.6.21-rc2/fs/read_write.c 2007-03-05 00:29:58.000000000 -0800
> +++ linux-2.6.21-rc2-aio31/fs/read_write.c 2007-03-05 04:44:44.000000000
> -0800
> @@ -239,7 +239,7 @@ ssize_t do_sync_read(struct file *filp,
>
> for (;;) {
> ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
> - if (ret != -EIOCBRETRY)
> + if (!TestClearPgBusy(&kiocb))
> break;
> wait_on_retry_sync_kiocb(&kiocb);
> }
> @@ -297,7 +297,7 @@ ssize_t do_sync_write(struct file *filp,
>
> for (;;) {
> ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
> - if (ret != -EIOCBRETRY)
> + if (!TestClearPgBusy(&kiocb))
> break;
> wait_on_retry_sync_kiocb(&kiocb);
> }
> @@ -463,7 +463,7 @@ ssize_t do_sync_readv_writev(struct file
>
> for (;;) {
> ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
> - if (ret != -EIOCBRETRY)
> + if (!TestClearPgBusy(&kiocb))
> break;
> wait_on_retry_sync_kiocb(&kiocb);
> }
> diff -uprNX linux-2.6.21-rc2/Documentation/dontdiff
> linux-2.6.21-rc2/include/linux/aio.h
> linux-2.6.21-rc2-aio31/include/linux/aio.h
> --- linux-2.6.21-rc2/include/linux/aio.h 2007-02-04 10:44:54.000000000 -0800
> +++ linux-2.6.21-rc2-aio31/include/linux/aio.h 2007-03-05
> 00:38:11.000000000 -0800
> @@ -34,21 +34,26 @@ struct kioctx;
> /* #define KIF_LOCKED 0 */
> #define KIF_KICKED 1
> #define KIF_CANCELLED 2
> +#define KIF_PG_BUSY 3
>
> #define kiocbTryLock(iocb) test_and_set_bit(KIF_LOCKED, &(iocb)->ki_flags)
> #define kiocbTryKick(iocb) test_and_set_bit(KIF_KICKED, &(iocb)->ki_flags)
> +#define TestClearPgBusy(iocb) test_and_clear_bit(KIF_PG_BUSY,
> &(iocb)->ki_flags)
>
> #define kiocbSetLocked(iocb) set_bit(KIF_LOCKED, &(iocb)->ki_flags)
> #define kiocbSetKicked(iocb) set_bit(KIF_KICKED, &(iocb)->ki_flags)
> #define kiocbSetCancelled(iocb) set_bit(KIF_CANCELLED, &(iocb)->ki_flags)
> +#define kiocbSetPgBusy(iocb) set_bit(KIF_PG_BUSY, &(iocb)->ki_flags)
>
> #define kiocbClearLocked(iocb) clear_bit(KIF_LOCKED, &(iocb)->ki_flags)
> #define kiocbClearKicked(iocb) clear_bit(KIF_KICKED, &(iocb)->ki_flags)
> #define kiocbClearCancelled(iocb) clear_bit(KIF_CANCELLED,
> &(iocb)->ki_flags)
> +#define kiocbClearPgBusy(iocb) clear_bit(KIF_PG_BUSY, &(iocb)->ki_flags)
>
> #define kiocbIsLocked(iocb) test_bit(KIF_LOCKED, &(iocb)->ki_flags)
> #define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags)
> #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags)
> +#define kiocbIsPgBusy(iocb) test_bit(KIF_PG_BUSY, &(iocb)->ki_flags)
>
> /* is there a better place to document function pointer methods? */
> /**
> diff -upr linux-2.6.20/mm/filemap.c linux-2.6.20-aio2/mm/filemap.c
> --- linux-2.6.20/mm/filemap.c 2007-02-04 21:44:54.000000000 +0300
> +++ linux-2.6.20-aio2/mm/filemap.c 2007-03-04 21:46:10.000000000 +0300
> @@ -2413,10 +2413,9 @@ generic_file_direct_IO(int rw, struct ki
> if (rw == WRITE && mapping->nrpages) {
> pgoff_t end = (offset + write_len - 1)
> >> PAGE_CACHE_SHIFT;
> - int err = invalidate_inode_pages2_range(mapping,
> - offset >> PAGE_CACHE_SHIFT, end);
> - if (err)
> - retval = err;
> + if (invalidate_inode_pages2_range(mapping,
> + offset >> PAGE_CACHE_SHIFT, end))
> + kiocbSetPgBusy(iocb);
> }
> }
> return retval;
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/