Re: [PATCH v1 09/13] ceph: make remaining I/O lock functions killable
From: Viacheslav Dubeyko
Date: Thu Mar 12 2026 - 16:06:09 EST
On Thu, 2026-03-12 at 10:16 +0200, Ionut Nechita (Wind River) wrote:
> From: Ionut Nechita <ionut.nechita@xxxxxxxxxxxxx>
>
> Following the same pattern as ceph_start_io_write(), make
> ceph_start_io_read() and ceph_start_io_direct() killable to
> prevent indefinite hangs when waiting for i_rwsem during
> MDS/OSD unavailability.
>
> This completes the killable lock conversion for all ceph I/O
> start functions, allowing blocked processes to be terminated
> with SIGKILL instead of hanging indefinitely.
>
> Signed-off-by: Ionut Nechita <ionut.nechita@xxxxxxxxxxxxx>
> ---
> fs/ceph/file.c | 27 +++++++++++++++++++--------
> fs/ceph/io.c | 28 ++++++++++++++++++++--------
> fs/ceph/io.h | 4 ++--
> 3 files changed, 41 insertions(+), 18 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 01e4f31b1f2f3..c828552d51920 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2122,10 +2122,15 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
> if (ceph_inode_is_shutdown(inode))
> return -ESTALE;
>
> - if (direct_lock)
> - ceph_start_io_direct(inode);
> - else
> - ceph_start_io_read(inode);
> + if (direct_lock) {
> + ret = ceph_start_io_direct(inode);
> + if (ret)
> + return ret;
> + } else {
> + ret = ceph_start_io_read(inode);
> + if (ret)
> + return ret;
> + }
>
> if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
> want |= CEPH_CAP_FILE_CACHE;
> @@ -2278,7 +2283,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
> (fi->flags & CEPH_F_SYNC))
> return copy_splice_read(in, ppos, pipe, len, flags);
>
> - ceph_start_io_read(inode);
> + ret = ceph_start_io_read(inode);
> + if (ret)
> + return ret;
>
> want = CEPH_CAP_FILE_CACHE;
> if (fi->fmode & CEPH_FILE_MODE_LAZY)
> @@ -2357,9 +2364,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> direct_lock = true;
>
> retry_snap:
> - if (direct_lock)
> - ceph_start_io_direct(inode);
> - else {
> + if (direct_lock) {
> + err = ceph_start_io_direct(inode);
> + if (err) {
> + ceph_free_cap_flush(prealloc_cf);
> + return err;
> + }
> + } else {
> err = ceph_start_io_write(inode);
> if (err) {
> ceph_free_cap_flush(prealloc_cf);
> diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> index f9ac89ec1d6a1..7bd57de2d9681 100644
> --- a/fs/ceph/io.c
> +++ b/fs/ceph/io.c
> @@ -47,20 +47,26 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
> * Note that buffered writes and truncates both take a write lock on
> * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
> */
> -void
> +int
> ceph_start_io_read(struct inode *inode)
Which kernel version do you have? I can see down_read_killable() already
available in ceph_start_io_read() for v.7.0.0-rc3 [1].
Thanks,
Slava.
[1] https://elixir.bootlin.com/linux/v7.0-rc3/source/fs/ceph/io.c#L59
> {
> struct ceph_inode_info *ci = ceph_inode(inode);
> + int ret;
>
> /* Be an optimist! */
> - down_read(&inode->i_rwsem);
> + ret = down_read_killable(&inode->i_rwsem);
> + if (ret)
> + return ret;
> if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> - return;
> + return 0;
> up_read(&inode->i_rwsem);
> /* Slow path.... */
> - down_write(&inode->i_rwsem);
> + ret = down_write_killable(&inode->i_rwsem);
> + if (ret)
> + return ret;
> ceph_block_o_direct(ci, inode);
> downgrade_write(&inode->i_rwsem);
> + return 0;
> }
>
> /**
> @@ -138,20 +144,26 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
> * Note that buffered writes and truncates both take a write lock on
> * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
> */
> -void
> +int
> ceph_start_io_direct(struct inode *inode)
> {
> struct ceph_inode_info *ci = ceph_inode(inode);
> + int ret;
>
> /* Be an optimist! */
> - down_read(&inode->i_rwsem);
> + ret = down_read_killable(&inode->i_rwsem);
> + if (ret)
> + return ret;
> if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> - return;
> + return 0;
> up_read(&inode->i_rwsem);
> /* Slow path.... */
> - down_write(&inode->i_rwsem);
> + ret = down_write_killable(&inode->i_rwsem);
> + if (ret)
> + return ret;
> ceph_block_buffered(ci, inode);
> downgrade_write(&inode->i_rwsem);
> + return 0;
> }
>
> /**
> diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> index 94ce176df9997..9432b8b607650 100644
> --- a/fs/ceph/io.h
> +++ b/fs/ceph/io.h
> @@ -2,11 +2,11 @@
> #ifndef _FS_CEPH_IO_H
> #define _FS_CEPH_IO_H
>
> -void ceph_start_io_read(struct inode *inode);
> +int ceph_start_io_read(struct inode *inode);
> void ceph_end_io_read(struct inode *inode);
> int ceph_start_io_write(struct inode *inode);
> void ceph_end_io_write(struct inode *inode);
> -void ceph_start_io_direct(struct inode *inode);
> +int ceph_start_io_direct(struct inode *inode);
> void ceph_end_io_direct(struct inode *inode);
>
> #endif /* FS_CEPH_IO_H */