[PATCH 2/7] block: loop: don't hold lo_ctl_mutex in lo_open
From: Jarod Wilson
Date: Wed Apr 08 2015 - 02:25:06 EST
From: Ming Lei <ming.lei@xxxxxxxxxxxxx>
The lo_ctl_mutex is held for running all ioctl handlers, and
in some ioctl handlers, ioctl_by_bdev(BLKRRPART) is called for
rereading partitions, which requires bd_mutex.
So it is easy to cause failure because trylock(bd_mutex) may
fail inside blkdev_reread_part(), and follows the lock context:
blkid or other application:
->open()
->mutex_lock(bd_mutex)
->lo_open()
->mutex_lock(lo_ctl_mutex)
losetup(set fd ioctl):
->mutex_lock(lo_ctl_mutex)
->ioctl_by_bdev(BLKRRPART)
->trylock(bd_mutex)
This patch trys to eliminate the ABBA lock dependency by removing
lo_ctl_mutext in lo_open() with the following approach:
1) introduce lo_open_mutex to protect lo_refcnt and avoid acquiring
lo_ctl_mutex in lo_open():
- for open vs. add/del loop, no any problem because of loop_index_mutex
- lo_open_mutex is used for syncing open() and loop_clr_fd()
- both open() and release() have been serialized by bd_mutex already
2) don't hold lo_ctl_mutex for decreasing/checking lo_refcnt in
lo_release(), then lo_ctl_mutex is only required for the last release.
CC: Christoph Hellwig <hch@xxxxxxxxxxxxx>
CC: Jens Axboe <axboe@xxxxxxxxx>
CC: Tejun Heo <tj@xxxxxxxxxx>
CC: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx>
CC: Markus Pargmann <mpa@xxxxxxxxxxxxxx>
CC: Stefan Weinhuber <wein@xxxxxxxxxx>
CC: Stefan Haberland <stefan.haberland@xxxxxxxxxx>
CC: Sebastian Ott <sebott@xxxxxxxxxxxxxxxxxx>
CC: Fabian Frederick <fabf@xxxxxxxxx>
CC: Ming Lei <ming.lei@xxxxxxxxxxxxx>
CC: David Herrmann <dh.herrmann@xxxxxxxxx>
CC: Mike Galbraith <bitbucket@xxxxxxxxx>
CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CC: nbd-general@xxxxxxxxxxxxxxxxxxxxx
CC: linux-s390@xxxxxxxxxxxxxxx
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxxxxx>
Signed-off-by: Jarod Wilson <jarod@xxxxxxxxxx>
---
drivers/block/loop.c | 32 ++++++++++++++++++++++++++------
drivers/block/loop.h | 1 +
2 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d1f168b..81a6bc1 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -879,14 +879,18 @@ static int loop_clr_fd(struct loop_device *lo)
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
* command to fail with EBUSY.
*/
+ mutex_lock(&lo->lo_open_mutex);
if (lo->lo_refcnt > 1) {
+ mutex_unlock(&lo->lo_open_mutex);
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
mutex_unlock(&lo->lo_ctl_mutex);
return 0;
}
- if (filp == NULL)
+ if (filp == NULL) {
+ mutex_unlock(&lo->lo_open_mutex);
return -EINVAL;
+ }
spin_lock_irq(&lo->lo_lock);
lo->lo_state = Lo_rundown;
@@ -919,6 +923,15 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_unbound;
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
+
+ /*
+ * Unlock open_mutex for avoiding -EBUSY of rereading part:
+ * - try to acquire bd_mutex from reread part
+ * - another task is opening the loop with holding bd_mutex
+ * and trys to acquire open_mutex
+ */
+ mutex_unlock(&lo->lo_open_mutex);
+
if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
ioctl_by_bdev(bdev, BLKRRPART, 0);
lo->lo_flags = 0;
@@ -1376,9 +1389,9 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
goto out;
}
- mutex_lock(&lo->lo_ctl_mutex);
+ mutex_lock(&lo->lo_open_mutex);
lo->lo_refcnt++;
- mutex_unlock(&lo->lo_ctl_mutex);
+ mutex_unlock(&lo->lo_open_mutex);
out:
mutex_unlock(&loop_index_mutex);
return err;
@@ -1387,13 +1400,16 @@ out:
static void lo_release(struct gendisk *disk, fmode_t mode)
{
struct loop_device *lo = disk->private_data;
- int err;
+ int err, ref;
- mutex_lock(&lo->lo_ctl_mutex);
+ mutex_lock(&lo->lo_open_mutex);
+ ref = --lo->lo_refcnt;
+ mutex_unlock(&lo->lo_open_mutex);
- if (--lo->lo_refcnt)
+ if (ref)
goto out;
+ mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
/*
* In autoclear mode, stop the loop thread
@@ -1646,6 +1662,7 @@ static int loop_add(struct loop_device **l, int i)
disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT;
mutex_init(&lo->lo_ctl_mutex);
+ mutex_init(&lo->lo_open_mutex);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
disk->major = LOOP_MAJOR;
@@ -1763,11 +1780,14 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
mutex_unlock(&lo->lo_ctl_mutex);
break;
}
+ mutex_lock(&lo->lo_open_mutex);
if (lo->lo_refcnt > 0) {
ret = -EBUSY;
+ mutex_unlock(&lo->lo_open_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
break;
}
+ mutex_unlock(&lo->lo_open_mutex);
lo->lo_disk->private_data = NULL;
mutex_unlock(&lo->lo_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 301c27f..1b4acf2 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -59,6 +59,7 @@ struct loop_device {
bool write_started;
int lo_state;
struct mutex lo_ctl_mutex;
+ struct mutex lo_open_mutex;
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/