[resend PATCH v2 11/33] dm: add dax_device and dax_operations support

From: Dan Williams
Date: Mon Apr 17 2017 - 15:15:50 EST


Allocate a dax_device to represent the capacity of a device-mapper
instance. Provide a ->direct_access() method via the new dax_operations
indirection that mirrors the functionality of the current direct_access
support via block_device_operations. Once fs/dax.c has been converted
to use dax_operations the old dm_blk_direct_access() will be removed.

A new helper dm_dax_get_live_target() is introduced to separate some of
the dm-specifics from the direct_access implementation.

This enabling is only for the top-level dm representation to upper
layers. Converting target direct_access implementations is deferred to a
separate patch.

Cc: Toshi Kani <toshi.kani@xxxxxxx>
Cc: Mike Snitzer <snitzer@xxxxxxxxxx>
Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
drivers/md/Kconfig | 1
drivers/md/dm-core.h | 1
drivers/md/dm.c | 84 ++++++++++++++++++++++++++++++++++-------
include/linux/device-mapper.h | 1
4 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index b7767da50c26..1de8372d9459 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN
config BLK_DEV_DM
tristate "Device mapper support"
select BLK_DEV_DM_BUILTIN
+ select DAX
---help---
Device-mapper is a low level volume manager. It works by allowing
people to specify mappings for ranges of logical sectors. Various
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 136fda3ff9e5..538630190f66 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -58,6 +58,7 @@ struct mapped_device {
struct target_type *immutable_target_type;

struct gendisk *disk;
+ struct dax_device *dax_dev;
char name[16];

void *interface_ptr;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dfb75979e455..bd56dfe43a99 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -16,6 +16,7 @@
#include <linux/blkpg.h>
#include <linux/bio.h>
#include <linux/mempool.h>
+#include <linux/dax.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/hdreg.h>
@@ -908,31 +909,68 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
}
EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);

-static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
- void **kaddr, pfn_t *pfn, long size)
+static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
+ sector_t sector, int *srcu_idx)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_table *map;
struct dm_target *ti;
- int srcu_idx;
- long len, ret = -EIO;

- map = dm_get_live_table(md, &srcu_idx);
+ map = dm_get_live_table(md, srcu_idx);
if (!map)
- goto out;
+ return NULL;

ti = dm_table_find_target(map, sector);
if (!dm_target_is_valid(ti))
- goto out;
+ return NULL;

- len = max_io_len(sector, ti) << SECTOR_SHIFT;
- size = min(len, size);
+ return ti;
+}

- if (ti->type->direct_access)
- ret = ti->type->direct_access(ti, sector, kaddr, pfn, size);
-out:
+static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
+ long nr_pages, void **kaddr, pfn_t *pfn)
+{
+ struct mapped_device *md = dax_get_private(dax_dev);
+ sector_t sector = pgoff * PAGE_SECTORS;
+ struct dm_target *ti;
+ long len, ret = -EIO;
+ int srcu_idx;
+
+ ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+ if (!ti)
+ goto out;
+ if (!ti->type->direct_access)
+ goto out;
+ len = max_io_len(sector, ti) / PAGE_SECTORS;
+ if (len < 1)
+ goto out;
+ nr_pages = min(len, nr_pages);
+ if (ti->type->direct_access) {
+ ret = ti->type->direct_access(ti, sector, kaddr, pfn,
+ nr_pages * PAGE_SIZE);
+ /*
+ * FIXME: convert ti->type->direct_access to return
+ * nr_pages directly.
+ */
+ if (ret >= 0)
+ ret /= PAGE_SIZE;
+ }
+ out:
dm_put_live_table(md, srcu_idx);
- return min(ret, size);
+
+ return ret;
+}
+
+static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
+ void **kaddr, pfn_t *pfn, long size)
+{
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ struct dax_device *dax_dev = md->dax_dev;
+ long nr_pages = size / PAGE_SIZE;
+
+ nr_pages = dm_dax_direct_access(dax_dev, sector / PAGE_SECTORS,
+ nr_pages, kaddr, pfn);
+ return nr_pages < 0 ? nr_pages : nr_pages * PAGE_SIZE;
}

/*
@@ -1437,6 +1475,7 @@ static int next_free_minor(int *minor)
}

static const struct block_device_operations dm_blk_dops;
+static const struct dax_operations dm_dax_ops;

static void dm_wq_work(struct work_struct *work);

@@ -1483,6 +1522,12 @@ static void cleanup_mapped_device(struct mapped_device *md)
if (md->bs)
bioset_free(md->bs);

+ if (md->dax_dev) {
+ kill_dax(md->dax_dev);
+ put_dax(md->dax_dev);
+ md->dax_dev = NULL;
+ }
+
if (md->disk) {
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
@@ -1510,6 +1555,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
static struct mapped_device *alloc_dev(int minor)
{
int r, numa_node_id = dm_get_numa_node();
+ struct dax_device *dax_dev;
struct mapped_device *md;
void *old_md;

@@ -1574,6 +1620,12 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->queue = md->queue;
md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor);
+
+ dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+ if (!dax_dev)
+ goto bad;
+ md->dax_dev = dax_dev;
+
add_disk(md->disk);
format_dev_t(md->name, MKDEV(_major, minor));

@@ -2781,6 +2833,10 @@ static const struct block_device_operations dm_blk_dops = {
.owner = THIS_MODULE
};

+static const struct dax_operations dm_dax_ops = {
+ .direct_access = dm_dax_direct_access,
+};
+
/*
* module hooks
*/
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index a7e6903866fd..bcba4d89089c 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -130,6 +130,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
*/
typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
void **kaddr, pfn_t *pfn, long size);
+#define PAGE_SECTORS (PAGE_SIZE / 512)

void dm_error(const char *message);