[PATCH 5.6 016/166] rbd: avoid a deadlock on header_rwsem when flushing notifies
From: Greg Kroah-Hartman
Date: Wed Apr 22 2020 - 06:39:15 EST
From: Ilya Dryomov <idryomov@xxxxxxxxx>
commit 0e4e1de5b63fa423b13593337a27fd2d2b0bcf77 upstream.
rbd_unregister_watch() flushes notifies and therefore cannot be called
under header_rwsem because a header update notify takes header_rwsem to
synchronize with "rbd map". If mapping an image fails after the watch
is established and a header update notify sneaks in, we deadlock when
erroring out from rbd_dev_image_probe().
Move watch registration and unregistration out of the critical section.
The only reason they were put there was to make header_rwsem management
slightly more obvious.
Fixes: 811c66887746 ("rbd: fix rbd map vs notify races")
Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx>
Reviewed-by: Jason Dillaman <dillaman@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
drivers/block/rbd.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4554,6 +4554,10 @@ static void cancel_tasks_sync(struct rbd
cancel_work_sync(&rbd_dev->unlock_work);
}
+/*
+ * header_rwsem must not be held to avoid a deadlock with
+ * rbd_dev_refresh() when flushing notifies.
+ */
static void rbd_unregister_watch(struct rbd_device *rbd_dev)
{
cancel_tasks_sync(rbd_dev);
@@ -6964,6 +6968,9 @@ static void rbd_dev_image_release(struct
* device. If this image is the one being mapped (i.e., not a
* parent), initiate a watch on its header object before using that
* object to get detailed information about the rbd image.
+ *
+ * On success, returns with header_rwsem held for write if called
+ * with @depth == 0.
*/
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
@@ -6993,6 +7000,9 @@ static int rbd_dev_image_probe(struct rb
}
}
+ if (!depth)
+ down_write(&rbd_dev->header_rwsem);
+
ret = rbd_dev_header_info(rbd_dev);
if (ret) {
if (ret == -ENOENT && !need_watch)
@@ -7044,6 +7054,8 @@ static int rbd_dev_image_probe(struct rb
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
+ if (!depth)
+ up_write(&rbd_dev->header_rwsem);
if (need_watch)
rbd_unregister_watch(rbd_dev);
err_out_format:
@@ -7107,12 +7119,9 @@ static ssize_t do_rbd_add(struct bus_typ
goto err_out_rbd_dev;
}
- down_write(&rbd_dev->header_rwsem);
rc = rbd_dev_image_probe(rbd_dev, 0);
- if (rc < 0) {
- up_write(&rbd_dev->header_rwsem);
+ if (rc < 0)
goto err_out_rbd_dev;
- }
if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
rbd_warn(rbd_dev, "alloc_size adjusted to %u",