Re: [PATCH 14/26] cxl/region: Read existing extents on region creation

From: Jonathan Cameron
Date: Thu Apr 04 2024 - 12:05:04 EST


On Sun, 24 Mar 2024 16:18:17 -0700
ira.weiny@xxxxxxxxx wrote:

> From: Navneet Singh <navneet.singh@xxxxxxxxx>
>
> Dynamic capacity device extents may be left in an accepted state on a
> device due to an unexpected host crash. In this case creation of a new
> region on top of the DC partition (region) is expected to expose those
> extents for continued use.
>
> Once all endpoint decoders are part of a region and the region is being
> realized read the device extent list. For ease of review, this patch
> stops after reading the extent list and leaves realization of the region
> extents to a future patch.
>
> Signed-off-by: Navneet Singh <navneet.singh@xxxxxxxxx>
> Co-developed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
> Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>

A few things inline.

J
> static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
> enum cxl_event_log_type type)
> {
> @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
> }
> EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
>
> +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> + unsigned int *extent_gen_num)
> +{
> + struct cxl_mbox_get_dc_extent_in get_dc_extent;
> + struct cxl_mbox_get_dc_extent_out dc_extents;
> + struct cxl_mbox_cmd mbox_cmd;
> + unsigned int count;
> + int rc;
> +
> + get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> + .extent_cnt = cpu_to_le32(0),
> + .start_extent_index = cpu_to_le32(0),
> + };
> +
> + mbox_cmd = (struct cxl_mbox_cmd) {
> + .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> + .payload_in = &get_dc_extent,
> + .size_in = sizeof(get_dc_extent),
> + .size_out = sizeof(dc_extents),
> + .payload_out = &dc_extents,
> + .min_out = 1,

Why 1?

> + };
> +
> + rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> + if (rc < 0)
> + return rc;
> +
> + count = le32_to_cpu(dc_extents.total_extent_cnt);
> + *extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
> +
> + return count;
> +}
> +
> +static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
> + unsigned int start_gen_num,
> + unsigned int exp_cnt)
> +{
> + struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> + unsigned int start_index, total_read;
> + struct device *dev = mds->cxlds.dev;
> + struct cxl_mbox_cmd mbox_cmd;
> +
> + struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
> + kvmalloc(mds->payload_size, GFP_KERNEL);
> + if (!dc_extents)
> + return -ENOMEM;
> +
> + total_read = 0;
> + start_index = 0;
> + do {
> + unsigned int nr_ext, total_extent_cnt, gen_num;
> + struct cxl_mbox_get_dc_extent_in get_dc_extent;
> + int rc;
> +
> + get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> + .extent_cnt = cpu_to_le32(exp_cnt - start_index),
> + .start_extent_index = cpu_to_le32(start_index),
> + };
> +
> + mbox_cmd = (struct cxl_mbox_cmd) {
> + .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> + .payload_in = &get_dc_extent,
> + .size_in = sizeof(get_dc_extent),
> + .size_out = mds->payload_size,
> + .payload_out = dc_extents,
> + .min_out = 1,

Why 1?

> + };
> +
> + rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> + if (rc < 0)
> + return rc;
> +
> + nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
> + total_read += nr_ext;
> + total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> + gen_num = le32_to_cpu(dc_extents->extent_list_num);
> +
> + dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
> + total_extent_cnt, gen_num);
> +
> + if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
> + dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
> + gen_num, start_gen_num, exp_cnt, total_extent_cnt);
> + return -EIO;
> + }
> +
> + for (int i = 0; i < nr_ext ; i++) {
> + dev_dbg(dev, "Processing extent %d/%d\n",
> + start_index + i, exp_cnt);
> + rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
> + if (rc)
> + continue;

A blank line here

> + if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
> + continue;
and here would make this more readable I think.

> + rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
> + if (rc)
> + return rc;
> + }
> +
> + start_index += nr_ext;
> + } while (exp_cnt > total_read);
> +
> + return 0;
> +}

> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 0d7b09a49dcf..3e563ab29afe 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c

>
> +static int cxl_region_read_extents(struct cxl_region *cxlr)
> +{
> + struct cxl_region_params *p = &cxlr->params;
> + int i;
> +
> + for (i = 0; i < p->nr_targets; i++) {
> + int rc;
Maybe worth giving up early if we see nr_targets > 1?

If nothing else it saves people trying to figure out what happens if we
reboot into an older kernel that doesn't support interleave (from one
that does)

> +
> + rc = cxl_read_dc_extents(p->targets[i]);
> + if (rc)
> + return rc;
> + }
> +
> + return 0;
> +}
> +

> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 01bee6eedff3..8f2d8944d334 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h

> +/*
> + * CXL rev 3.1 section 8.2.9.2.1.6; Table 8-51

Throw a table name or section name into the reference so people
can find it in CXL rNext.

> + */
> +#define CXL_DC_EXTENT_TAG_LEN 0x10
> +struct cxl_dc_extent {
> + __le64 start_dpa;
> + __le64 length;
> + u8 tag[CXL_DC_EXTENT_TAG_LEN];
> + __le16 shared_extn_seq;
> + u8 reserved[6];
> +} __packed;
> +

> +/*
> + * Get Dynamic Capacity Extent List; Output Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-167
> + */
> +struct cxl_mbox_get_dc_extent_out {
> + __le32 ret_extent_cnt;
> + __le32 total_extent_cnt;
> + __le32 extent_list_num;

Naming isn't that clear given generation bit missing.

> + u8 rsvd[4];
> + struct cxl_dc_extent extent[];
> +} __packed;
> +