Re: [PATCH 16/26] cxl/extent: Realize extent devices

From: Alison Schofield
Date: Wed Apr 10 2024 - 20:17:02 EST


On Sun, Mar 24, 2024 at 04:18:19PM -0700, Ira Weiny wrote:
> From: Navneet Singh <navneet.singh@xxxxxxxxx>
>
> Once all extents of an interleave set are present a region must
> surface an extent to the region.

Why the vague words - realize and surface?

Maybe slip the word DCD in the commit msg:
cxl/extent: Create DCD region extent devices

And be more explicit about where we are in the setup process:

Once the region driver discovers all the extents of an interleave set
a region extent device must be created for every device extent found.

Maybe rough example, but my intent is to seqway from the fact that
the region driver has done it's part, now we are here, creating the
region extent devices. Should that be 'DAX' region extent devices?

>
> Without interleaving; endpoint decoder and region extents have a 1:1
> relationship. Future support for IW > 1 will maintain a N:1
> relationship between the device extents and region extents.
>
> Create a region extent device for every device extent found. Release of
> the extent device triggers a response to the underlying hardware extent.
>
> There is no strong use case to support the addition of extents which
> overlap previously accepted extent ranges. Reject such new extents
> until such time as a good use case emerges.
>
> Expose the necessary details of region extents by creating the following
> sysfs entries.
>
> /sys/bus/cxl/devices/dax_regionX/extentY
> /sys/bus/cxl/devices/dax_regionX/extentY/offset
> /sys/bus/cxl/devices/dax_regionX/extentY/length
> /sys/bus/cxl/devices/dax_regionX/extentY/label
>
> The use of the extent devices by the DAX layer is deferred to later
> patches.

You mean later in this set, right?


>
> Signed-off-by: Navneet Singh <navneet.singh@xxxxxxxxx>
> Co-developed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
> Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>
>
> ---
> Changes for v1
> [iweiny: new patch]
> [iweiny: Rename 'dr_extent' to 'region_extent']
> ---
> drivers/cxl/core/Makefile | 1 +
> drivers/cxl/core/extent.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++
> drivers/cxl/core/mbox.c | 43 +++++++++++++++
> drivers/cxl/core/region.c | 76 +++++++++++++++++++++++++-
> drivers/cxl/cxl.h | 37 +++++++++++++
> tools/testing/cxl/Kbuild | 1 +
> 6 files changed, 290 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 9259bcc6773c..35c5c76bfcf1 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -14,5 +14,6 @@ cxl_core-y += pci.o
> cxl_core-y += hdm.o
> cxl_core-y += pmu.o
> cxl_core-y += cdat.o
> +cxl_core-y += extent.o
> cxl_core-$(CONFIG_TRACING) += trace.o
> cxl_core-$(CONFIG_CXL_REGION) += region.o
> diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
> new file mode 100644
> index 000000000000..487c220f1c3c
> --- /dev/null
> +++ b/drivers/cxl/core/extent.c
> @@ -0,0 +1,133 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
> +
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <cxl.h>
> +
> +static DEFINE_IDA(cxl_extent_ida);
> +
> +static ssize_t offset_show(struct device *dev, struct device_attribute *attr,
> + char *buf)
> +{
> + struct region_extent *reg_ext = to_region_extent(dev);
> +
> + return sysfs_emit(buf, "%pa\n", &reg_ext->hpa_range.start);
> +}
> +static DEVICE_ATTR_RO(offset);
> +
> +static ssize_t length_show(struct device *dev, struct device_attribute *attr,
> + char *buf)
> +{
> + struct region_extent *reg_ext = to_region_extent(dev);
> + u64 length = range_len(&reg_ext->hpa_range);
> +
> + return sysfs_emit(buf, "%pa\n", &length);
> +}
> +static DEVICE_ATTR_RO(length);
> +
> +static ssize_t label_show(struct device *dev, struct device_attribute *attr,
> + char *buf)
> +{
> + struct region_extent *reg_ext = to_region_extent(dev);
> +
> + return sysfs_emit(buf, "%s\n", reg_ext->label);
> +}
> +static DEVICE_ATTR_RO(label);
> +
> +static struct attribute *region_extent_attrs[] = {
> + &dev_attr_offset.attr,
> + &dev_attr_length.attr,
> + &dev_attr_label.attr,
> + NULL,
> +};
> +
> +static const struct attribute_group region_extent_attribute_group = {
> + .attrs = region_extent_attrs,
> +};
> +
> +static const struct attribute_group *region_extent_attribute_groups[] = {
> + &region_extent_attribute_group,
> + NULL,
> +};
> +
> +static void region_extent_release(struct device *dev)
> +{
> + struct region_extent *reg_ext = to_region_extent(dev);
> +
> + cxl_release_ed_extent(&reg_ext->ed_ext);
> + ida_free(&cxl_extent_ida, reg_ext->dev.id);
> + kfree(reg_ext);
> +}
> +
> +static const struct device_type region_extent_type = {
> + .name = "extent",
> + .release = region_extent_release,
> + .groups = region_extent_attribute_groups,
> +};
> +
> +bool is_region_extent(struct device *dev)
> +{
> + return dev->type == &region_extent_type;
> +}
> +EXPORT_SYMBOL_NS_GPL(is_region_extent, CXL);
> +
> +static void region_extent_unregister(void *ext)
> +{
> + struct region_extent *reg_ext = ext;
> +
> + dev_dbg(&reg_ext->dev, "DAX region rm extent HPA %#llx - %#llx\n",
> + reg_ext->hpa_range.start, reg_ext->hpa_range.end);
> + device_unregister(&reg_ext->dev);
> +}
> +
> +int dax_region_create_ext(struct cxl_dax_region *cxlr_dax,
> + struct range *hpa_range,
> + const char *label,
> + struct range *dpa_range,
> + struct cxl_endpoint_decoder *cxled)
> +{
> + struct region_extent *reg_ext;
> + struct device *dev;
> + int rc, id;
> +
> + id = ida_alloc(&cxl_extent_ida, GFP_KERNEL);
> + if (id < 0)
> + return -ENOMEM;
> +
> + reg_ext = kzalloc(sizeof(*reg_ext), GFP_KERNEL);
> + if (!reg_ext)
> + return -ENOMEM;
> +
> + reg_ext->hpa_range = *hpa_range;
> + reg_ext->ed_ext.dpa_range = *dpa_range;
> + reg_ext->ed_ext.cxled = cxled;
> + snprintf(reg_ext->label, DAX_EXTENT_LABEL_LEN, "%s", label);
> +
> + dev = &reg_ext->dev;
> + device_initialize(dev);
> + dev->id = id;
> + device_set_pm_not_required(dev);
> + dev->parent = &cxlr_dax->dev;
> + dev->type = &region_extent_type;
> + rc = dev_set_name(dev, "extent%d", dev->id);
> + if (rc)
> + goto err;
> +
> + rc = device_add(dev);
> + if (rc)
> + goto err;
> +
> + dev_dbg(dev, "DAX region extent HPA %#llx - %#llx\n",
> + reg_ext->hpa_range.start, reg_ext->hpa_range.end);
> +
> + return devm_add_action_or_reset(&cxlr_dax->dev, region_extent_unregister,
> + reg_ext);
> +
> +err:
> + dev_err(&cxlr_dax->dev, "Failed to initialize DAX extent dev HPA %#llx - %#llx\n",
> + reg_ext->hpa_range.start, reg_ext->hpa_range.end);
> +
> + put_device(dev);
> + return rc;
> +}
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 9e33a0976828..6b00e717e42b 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -1020,6 +1020,32 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
> return rc;
> }
>
> +static int cxl_send_dc_cap_response(struct cxl_memdev_state *mds,
> + struct range *extent, int opcode)
> +{
> + struct cxl_mbox_cmd mbox_cmd;
> + size_t size;
> +
> + struct cxl_mbox_dc_response *dc_res __free(kfree);
> + size = struct_size(dc_res, extent_list, 1);
> + dc_res = kzalloc(size, GFP_KERNEL);
> + if (!dc_res)
> + return -ENOMEM;
> +
> + dc_res->extent_list[0].dpa_start = cpu_to_le64(extent->start);
> + memset(dc_res->extent_list[0].reserved, 0, 8);
> + dc_res->extent_list[0].length = cpu_to_le64(range_len(extent));
> + dc_res->extent_list_size = cpu_to_le32(1);

Is the cpu_to_le32(1) necessary?
I notice similar for .extent_cnt, .start_extent_index in mbox.c


> +
> + mbox_cmd = (struct cxl_mbox_cmd) {
> + .opcode = opcode,
> + .size_in = size,
> + .payload_in = dc_res,
> + };
> +
> + return cxl_internal_send_cmd(mds, &mbox_cmd);
> +}
> +
> static struct cxl_memdev_state *
> cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> {
> @@ -1029,6 +1055,23 @@ cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> return container_of(cxlds, struct cxl_memdev_state, cxlds);
> }
>
> +void cxl_release_ed_extent(struct cxl_ed_extent *extent)
> +{
> + struct cxl_endpoint_decoder *cxled = extent->cxled;
> + struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> + struct device *dev = mds->cxlds.dev;
> + int rc;
> +
> + dev_dbg(dev, "Releasing DC extent DPA %#llx - %#llx\n",
> + extent->dpa_range.start, extent->dpa_range.end);
> +
> + rc = cxl_send_dc_cap_response(mds, &extent->dpa_range, CXL_MBOX_OP_RELEASE_DC);
> + if (rc)
> + dev_dbg(dev, "Failed to respond releasing extent DPA %#llx - %#llx; %d\n",
> + extent->dpa_range.start, extent->dpa_range.end, rc);

Don't repeat the start/end details on the second dev_dbg(), add rc value
only.

> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_release_ed_extent, CXL);
> +
> static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
> enum cxl_event_log_type type)
> {
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 3e563ab29afe..7635ff109578 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1450,11 +1450,81 @@ static int cxl_region_validate_position(struct cxl_region *cxlr,
> return 0;
> }
>
> +static int extent_check_overlap(struct device *dev, void *arg)
> +{
> + struct range *new_range = arg;
> + struct region_extent *ext;
> +
> + if (!is_region_extent(dev))
> + return 0;
> +
> + ext = to_region_extent(dev);
> + return range_overlaps(&ext->hpa_range, new_range);
> +}
> +
> +static int extent_overlaps(struct cxl_dax_region *cxlr_dax,
> + struct range *hpa_range)
> +{
> + struct device *dev __free(put_device) =
> + device_find_child(&cxlr_dax->dev, hpa_range, extent_check_overlap);
> +
> + if (dev)
> + return -EINVAL;
> + return 0;
> +}
> +
> /* Callers are expected to ensure cxled has been attached to a region */
> int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> struct cxl_dc_extent *dc_extent)
> {
> - return 0;
> + struct cxl_region *cxlr = cxled->cxld.region;
> + struct range ext_dpa_range, ext_hpa_range;
> + struct device *dev = &cxlr->dev;
> + resource_size_t dpa_offset, hpa;
> +
> + /*
> + * Interleave ways == 1 means this coresponds to a 1:1 mapping between
> + * device extents and DAX region extents. Future implementations
> + * should hold DC region extents here until the full dax region extent
> + * can be realized.
> + */
> + if (cxlr->params.interleave_ways != 1) {
> + dev_err(dev, "Interleaving DC not supported\n");
> + return -EINVAL;
> + }
> +
> + ext_dpa_range = (struct range) {
> + .start = le64_to_cpu(dc_extent->start_dpa),
> + .end = le64_to_cpu(dc_extent->start_dpa) +
> + le64_to_cpu(dc_extent->length) - 1,
> + };
> +
> + dev_dbg(dev, "Adding DC extent DPA %#llx - %#llx\n",
> + ext_dpa_range.start, ext_dpa_range.end);
> +
> + /*
> + * Without interleave...
> + * HPA offset == DPA offset
> + * ... but do the math anyway
> + */
> + dpa_offset = ext_dpa_range.start - cxled->dpa_res->start;
> + hpa = cxled->cxld.hpa_range.start + dpa_offset;
> +
> + ext_hpa_range = (struct range) {
> + .start = hpa - cxlr->cxlr_dax->hpa_range.start,
> + .end = ext_hpa_range.start + range_len(&ext_dpa_range) - 1,
> + };
> +
> + if (extent_overlaps(cxlr->cxlr_dax, &ext_hpa_range))
> + return -EINVAL;
> +
> + dev_dbg(dev, "Realizing region extent at HPA %#llx - %#llx\n",
> + ext_hpa_range.start, ext_hpa_range.end);
> +
> + return dax_region_create_ext(cxlr->cxlr_dax, &ext_hpa_range,
> + (char *)dc_extent->tag,
> + &ext_dpa_range,
> + cxled);
> }
>
> static int cxl_region_attach_position(struct cxl_region *cxlr,
> @@ -2684,6 +2754,7 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
>
> dev = &cxlr_dax->dev;
> cxlr_dax->cxlr = cxlr;
> + cxlr->cxlr_dax = cxlr_dax;
> device_initialize(dev);
> lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
> device_set_pm_not_required(dev);
> @@ -2799,7 +2870,10 @@ static int cxl_region_read_extents(struct cxl_region *cxlr)
> static void cxlr_dax_unregister(void *_cxlr_dax)
> {
> struct cxl_dax_region *cxlr_dax = _cxlr_dax;
> + struct cxl_region *cxlr = cxlr_dax->cxlr;
>
> + cxlr->cxlr_dax = NULL;
> + cxlr_dax->cxlr = NULL;
> device_unregister(&cxlr_dax->dev);
> }
>
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d585f5fdd3ae..5379ad7f5852 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -564,6 +564,7 @@ struct cxl_region_params {
> * @type: Endpoint decoder target type
> * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown
> * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
> + * @cxlr_dax: (for DC regions) cached copy of CXL DAX bridge
> * @flags: Region state flags
> * @params: active + config params for the region
> */
> @@ -574,6 +575,7 @@ struct cxl_region {
> enum cxl_decoder_type type;
> struct cxl_nvdimm_bridge *cxl_nvb;
> struct cxl_pmem_region *cxlr_pmem;
> + struct cxl_dax_region *cxlr_dax;
> unsigned long flags;
> struct cxl_region_params params;
> };
> @@ -617,6 +619,41 @@ struct cxl_dax_region {
> struct range hpa_range;
> };
>
> +/**
> + * struct cxl_ed_extent - Extent within an endpoint decoder
> + * @dpa_range: DPA range this extent covers within the decoder
> + * @cxled: reference to the endpoint decoder
> + */
> +struct cxl_ed_extent {
> + struct range dpa_range;
> + struct cxl_endpoint_decoder *cxled;
> +};
> +void cxl_release_ed_extent(struct cxl_ed_extent *extent);
> +
> +/**
> + * struct region_extent - CXL DAX region extent
> + * @dev: device representing this extent
> + * @hpa_range: HPA range of this extent
> + * @label: label of the extent
> + * @ed_ext: Endpoint decoder extent which backs this extent
> + */
> +#define DAX_EXTENT_LABEL_LEN 64
> +struct region_extent {
> + struct device dev;
> + struct range hpa_range;
> + char label[DAX_EXTENT_LABEL_LEN];
> + struct cxl_ed_extent ed_ext;
> +};
> +
> +int dax_region_create_ext(struct cxl_dax_region *cxlr_dax,
> + struct range *hpa_range,
> + const char *label,
> + struct range *dpa_range,
> + struct cxl_endpoint_decoder *cxled);
> +
> +bool is_region_extent(struct device *dev);
> +#define to_region_extent(dev) container_of(dev, struct region_extent, dev)
> +
> /**
> * struct cxl_port - logical collection of upstream port devices and
> * downstream port devices to construct a CXL memory
> diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
> index 030b388800f0..dc0cc1d5e6a0 100644
> --- a/tools/testing/cxl/Kbuild
> +++ b/tools/testing/cxl/Kbuild
> @@ -60,6 +60,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o
> cxl_core-y += $(CXL_CORE_SRC)/hdm.o
> cxl_core-y += $(CXL_CORE_SRC)/pmu.o
> cxl_core-y += $(CXL_CORE_SRC)/cdat.o
> +cxl_core-y += $(CXL_CORE_SRC)/extent.o
> cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
> cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
> cxl_core-y += config_check.o
>
> --
> 2.44.0
>