Mathieu Desnoyers wrote:
On 2024-01-31 16:02, Dan Williams wrote:
Mathieu Desnoyers wrote:
Replace the following fs/Kconfig:FS_DAX dependency:
depends on !(ARM || MIPS || SPARC)
By a runtime check within alloc_dax().
This is done in preparation for its use by each filesystem supporting
the "dax" mount option to validate whether DAX is indeed supported.
This is done in preparation for using cpu_dcache_is_aliasing() in a
following change which will properly support architectures which detect
data cache aliasing at runtime.
Fixes: d92576f1167c ("dax: does not work correctly with virtual aliasing caches")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: linux-mm@xxxxxxxxx
Cc: linux-arch@xxxxxxxxxxxxxxx
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Vishal Verma <vishal.l.verma@xxxxxxxxx>
Cc: Dave Jiang <dave.jiang@xxxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Cc: Russell King <linux@xxxxxxxxxxxxxxx>
Cc: nvdimm@xxxxxxxxxxxxxxx
Cc: linux-cxl@xxxxxxxxxxxxxxx
Cc: linux-fsdevel@xxxxxxxxxxxxxxx
Cc: dm-devel@xxxxxxxxxxxxxxx
---
drivers/dax/super.c | 6 ++++++
fs/Kconfig | 1 -
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 0da9232ea175..e9f397b8a5a3 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -445,6 +445,12 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
dev_t devt;
int minor;
+ /* Unavailable on architectures with virtually aliased data caches. */
+ if (IS_ENABLED(CONFIG_ARM) ||
+ IS_ENABLED(CONFIG_MIPS) ||
+ IS_ENABLED(CONFIG_SPARC))
+ return NULL;
This function returns ERR_PTR(), not NULL on failure.
Except that it returns NULL in the CONFIG_DAX=n case as you
noticed below.
...and I notice this mistake is also made in include/linux/dax.h in the
CONFIG_DAX=n case. That function also mentions:
static inline struct dax_device *alloc_dax(void *private,
const struct dax_operations *ops)
{
/*
* Callers should check IS_ENABLED(CONFIG_DAX) to know if this
* NULL is an error or expected.
*/
return NULL;
}
...and none of the callers validate the result, but now runtime
validation is necessary. I.e. it is not enough to check
IS_ENABLED(CONFIG_DAX) it also needs to check cpu_dcache_is_aliasing().
If the callers select DAX in their Kconfig, then they don't have to
explicitly check for IS_ENABLED(CONFIG_DAX). Things change for the
introduced runtime check though.
With that, there are a few more fixup places needed, pmem_attach_disk(),
dcssblk_add_store(), and virtio_fs_setup_dax().
Which approach should we take then ? Should we:
A) Keep returning NULL from alloc_dax() for both
cpu_dcache_is_aliasing() and CONFIG_DAX=n, and use IS_ERR_OR_NULL()
in the caller. If we do this, then the callers need to somehow
translate this NULL into a negative error value, or
B) Replace this NULL return value in both cases by a ERR_PTR() (which
error value should we return ?).
I would favor approach B) which appears more robust and introduces
fewer changes. If we go for that approach do we still need to change
the callers ?
I agree approach B is the way to go, but that still requires these
fixups, feel free to steal these hunks and split them into patches:
Co-developed-by: Dan Williams <dan.j.williams@xxxxxxxxx>
Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
...but note they are compile-tested only. They assume that alloc_dax()
returns ERR_PTR(-EOPNOTSUPP) when the arch support is missing, and I
wrote them quickly so I might have missed something.
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index f4b635526345..254d3b1e420e 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -322,7 +322,7 @@ EXPORT_SYMBOL_GPL(dax_alive);
*/
void kill_dax(struct dax_device *dax_dev)
{
- if (!dax_dev)
+ if (IS_ERR_OR_NULL(dax_dev))
return;
if (dax_dev->holder_data != NULL)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4e8fdcb3f1c8..b69c9e442cf4 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -560,17 +560,19 @@ static int pmem_attach_disk(struct device *dev,
dax_dev = alloc_dax(pmem, &pmem_dax_ops);
if (IS_ERR(dax_dev)) {
rc = PTR_ERR(dax_dev);
- goto out;
+ if (rc != -EOPNOTSUPP)
+ goto out;
+ } else {
+ set_dax_nocache(dax_dev);
+ set_dax_nomc(dax_dev);
+ if (is_nvdimm_sync(nd_region))
+ set_dax_synchronous(dax_dev);
+ rc = dax_add_host(dax_dev, disk);
+ if (rc)
+ goto out_cleanup_dax;
+ dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
+ pmem->dax_dev = dax_dev;
}
- set_dax_nocache(dax_dev);
- set_dax_nomc(dax_dev);
- if (is_nvdimm_sync(nd_region))
- set_dax_synchronous(dax_dev);
- rc = dax_add_host(dax_dev, disk);
- if (rc)
- goto out_cleanup_dax;
- dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
- pmem->dax_dev = dax_dev;
rc = device_add_disk(dev, disk, pmem_attribute_groups);
if (rc)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 4b7ecd4fd431..f911e58a24dd 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -681,12 +681,14 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
if (IS_ERR(dev_info->dax_dev)) {
rc = PTR_ERR(dev_info->dax_dev);
dev_info->dax_dev = NULL;
- goto put_dev;
+ if (rc != -EOPNOTSUPP)
+ goto put_dev;
+ } else {
+ set_dax_synchronous(dev_info->dax_dev);
+ rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
+ if (rc)
+ goto out_dax;
}
- set_dax_synchronous(dev_info->dax_dev);
- rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
- if (rc)
- goto out_dax;
get_device(&dev_info->dev);
rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 5f1be1da92ce..11053a70f5ab 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -16,6 +16,7 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/highmem.h>
+#include <linux/cleanup.h>
#include <linux/uio.h>
#include "fuse_i.h"
@@ -795,8 +796,11 @@ static void virtio_fs_cleanup_dax(void *data)
put_dax(dax_dev);
}
+DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T))
+
static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
{
+ struct dax_device *dax_dev __free(cleanup_dax) = NULL;
struct virtio_shm_region cache_reg;
struct dev_pagemap *pgmap;
bool have_cache;
@@ -804,6 +808,15 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
if (!IS_ENABLED(CONFIG_FUSE_DAX))
return 0;
+ dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
+ if (IS_ERR(dax_dev)) {
+ int rc = PTR_ERR(dax_dev);
+
+ if (rc == -EOPNOTSUPP)
+ return 0;
+ return rc;
+ }
+
/* Get cache region */
have_cache = virtio_get_shm_region(vdev, &cache_reg,
(u8)VIRTIO_FS_SHMCAP_ID_CACHE);
@@ -849,10 +862,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
- fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
- if (IS_ERR(fs->dax_dev))
- return PTR_ERR(fs->dax_dev);
-
+ fs->dax_dev = no_free_ptr(dax_dev);
return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
fs->dax_dev);
}