Re: [PATCH] device-dax: fix sysfs attribute deadlock

From: Yi Zhang
Date: Tue May 02 2017 - 06:44:04 EST


Verified this patch on 4.11.

Tested-by: Yi Zhang <yizhan@xxxxxxxxxx>

Best Regards,
Yi Zhang


----- Original Message -----
From: "Dan Williams" <dan.j.williams@xxxxxxxxx>
To: linux-nvdimm@xxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx, stable@xxxxxxxxxxxxxxx, "Yi Zhang" <yizhan@xxxxxxxxxx>
Sent: Sunday, April 30, 2017 10:21:54 PM
Subject: [PATCH] device-dax: fix sysfs attribute deadlock

Usage of device_lock() for dax_region attributes is unnecessary and
deadlock prone. It's unnecessary because the order of registration /
un-registration guarantees that drvdata is always valid. It's deadlock
prone because it sets up this situation:

ndctl D 0 2170 2082 0x00000000
Call Trace:
__schedule+0x31f/0x980
schedule+0x3d/0x90
schedule_preempt_disabled+0x15/0x20
__mutex_lock+0x402/0x980
? __mutex_lock+0x158/0x980
? align_show+0x2b/0x80 [dax]
? kernfs_seq_start+0x2f/0x90
mutex_lock_nested+0x1b/0x20
align_show+0x2b/0x80 [dax]
dev_attr_show+0x20/0x50

ndctl D 0 2186 2079 0x00000000
Call Trace:
__schedule+0x31f/0x980
schedule+0x3d/0x90
__kernfs_remove+0x1f6/0x340
? kernfs_remove_by_name_ns+0x45/0xa0
? remove_wait_queue+0x70/0x70
kernfs_remove_by_name_ns+0x45/0xa0
remove_files.isra.1+0x35/0x70
sysfs_remove_group+0x44/0x90
sysfs_remove_groups+0x2e/0x50
dax_region_unregister+0x25/0x40 [dax]
devm_action_release+0xf/0x20
release_nodes+0x16d/0x2b0
devres_release_all+0x3c/0x60
device_release_driver_internal+0x17d/0x220
device_release_driver+0x12/0x20
unbind_store+0x112/0x160

ndctl/2170 is trying to acquire the device_lock() to read an attribute,
and ndctl/2186 is holding the device_lock() while trying to drain all
active attribute readers.

Thanks to Yi Zhang for the reproduction script.

Fixes: d7fe1a67f658 ("dax: add region 'id', 'size', and 'align' attributes")
Cc: <stable@xxxxxxxxxxxxxxx>
Reported-by: Yi Zhang <yizhan@xxxxxxxxxx>
Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
drivers/dax/dax.c | 40 ++++++++++++----------------------------
1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index ef93aa84622b..5e8302d3a89c 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -36,36 +36,27 @@ static struct kmem_cache *dax_cache __read_mostly;
static struct super_block *dax_superblock __read_mostly;
MODULE_PARM_DESC(nr_dax, "max number of device-dax instances");

+/*
+ * Rely on the fact that drvdata is set before the attributes are
+ * registered, and that the attributes are unregistered before drvdata
+ * is cleared to assume that drvdata is always valid.
+ */
static ssize_t id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct dax_region *dax_region;
- ssize_t rc = -ENXIO;
+ struct dax_region *dax_region = dev_get_drvdata(dev);

- device_lock(dev);
- dax_region = dev_get_drvdata(dev);
- if (dax_region)
- rc = sprintf(buf, "%d\n", dax_region->id);
- device_unlock(dev);
-
- return rc;
+ return sprintf(buf, "%d\n", dax_region->id);
}
static DEVICE_ATTR_RO(id);

static ssize_t region_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct dax_region *dax_region;
- ssize_t rc = -ENXIO;
+ struct dax_region *dax_region = dev_get_drvdata(dev);

- device_lock(dev);
- dax_region = dev_get_drvdata(dev);
- if (dax_region)
- rc = sprintf(buf, "%llu\n", (unsigned long long)
- resource_size(&dax_region->res));
- device_unlock(dev);
-
- return rc;
+ return sprintf(buf, "%llu\n", (unsigned long long)
+ resource_size(&dax_region->res));
}
static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
region_size_show, NULL);
@@ -73,16 +64,9 @@ static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
static ssize_t align_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct dax_region *dax_region;
- ssize_t rc = -ENXIO;
+ struct dax_region *dax_region = dev_get_drvdata(dev);

- device_lock(dev);
- dax_region = dev_get_drvdata(dev);
- if (dax_region)
- rc = sprintf(buf, "%u\n", dax_region->align);
- device_unlock(dev);
-
- return rc;
+ return sprintf(buf, "%u\n", dax_region->align);
}
static DEVICE_ATTR_RO(align);