RE: [EXT] Re: [PATCH v3 1/2] perf/marvell: Odyssey DDR Performance monitor support

From: Gowthami Thiagarajan
Date: Tue Feb 27 2024 - 08:49:45 EST


Hi Jonathan,

Please find the response inline.

> -----Original Message-----
> From: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx>
> Sent: Monday, January 29, 2024 5:35 PM
> To: Gowthami Thiagarajan <gthiagarajan@xxxxxxxxxxx>
> Cc: will@xxxxxxxxxx; mark.rutland@xxxxxxx; linux-arm-kernel@xxxxxxxxxxxxxxxxxxx; linux-
> kernel@xxxxxxxxxxxxxxx; Sunil Kovvuri Goutham <sgoutham@xxxxxxxxxxx>; Bharat Bhushan
> <bbhushan2@xxxxxxxxxxx>; George Cherian <gcherian@xxxxxxxxxxx>
> Subject: [EXT] Re: [PATCH v3 1/2] perf/marvell: Odyssey DDR Performance monitor support
>
> External Email
>
> ----------------------------------------------------------------------
> On Mon, 22 Jan 2024 18:19:32 +0530
> Gowthami Thiagarajan <gthiagarajan@xxxxxxxxxxx> wrote:
>
> > Odyssey DRAM Subsystem supports eight counters for monitoring performance
> > and software can program those counters to monitor any of the defined
> > performance events. Supported performance events include those counted
> > at the interface between the DDR controller and the PHY, interface between
> > the DDR Controller and the CHI interconnect, or within the DDR Controller.
> >
> > Additionally DSS also supports two fixed performance event counters, one
> > for ddr reads and the other for ddr writes.
> >
> > Signed-off-by: Gowthami Thiagarajan <gthiagarajan@xxxxxxxxxxx>
> Hi Gowthami,
>
> A quick drive by review.
> Questions like whether this patch should be split are down to the perf maintainers.
> I would ask for it, but not my area of the kernel ;)
>
> Jonathan
>
>
> > +#define VERSION_V1 1
> > +#define VERSION_V2 2
>
> Prefix these defines + perhaps make them an enum?
[Gowthami] Ack.
>
> > +
> > struct cn10k_ddr_pmu {
> > struct pmu pmu;
> > void __iomem *base;
> > + struct ddr_pmu_platform_data *p_data;
>
> const both because it should be and to avoid casting away the const.
[Gowthami] Ack.
>
>
> > + int version;
> > unsigned int cpu;
> > struct device *dev;
> > int active_events;
> > @@ -134,6 +160,54 @@ struct cn10k_ddr_pmu {
> >
> > #define to_cn10k_ddr_pmu(p) container_of(p, struct cn10k_ddr_pmu, pmu)
> >
> > +struct ddr_pmu_platform_data {
> > + u64 counter_overflow_val;
> > + u64 counter_max_val;
> > + u64 ddrc_perf_cnt_base;
> > + u64 ddrc_perf_cfg_base;
> > + u64 ddrc_perf_cnt_op_mode_ctrl;
>
> Good to name these in a fashion that makes it clear what they are.
> Some are values, some are register address offsets I think?
>
> Shy is ddrc_perf prefix useful in here?
[Gowthami] Yes. Some are values and most of them point to register offsets.
Just to match with the register defined values named it so.
>
>
> > + u64 ddrc_perf_cnt_start_op_ctrl;
> > + u64 ddrc_perf_cnt_end_op_ctrl;
> > + u64 ddrc_perf_cnt_end_status;
> > + u64 ddrc_perf_cnt_freerun_en;
> > + u64 ddrc_perf_cnt_freerun_ctrl;
> > + u64 ddrc_perf_cnt_freerun_clr;
> > + u64 ddrc_perf_cnt_value_wr_op;
> > + u64 ddrc_perf_cnt_value_rd_op;
> > +};
> > +
> > +static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
> > + .counter_overflow_val = BIT_ULL(48),
> > + .counter_max_val = GENMASK_ULL(48, 0),
> > + .ddrc_perf_cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE,
> > + .ddrc_perf_cfg_base = CN10K_DDRC_PERF_CFG_BASE,
> > + .ddrc_perf_cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL,
> > + .ddrc_perf_cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL,
> > + .ddrc_perf_cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL,
> > + .ddrc_perf_cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS,
> > + .ddrc_perf_cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN,
> > + .ddrc_perf_cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL,
> > + .ddrc_perf_cnt_freerun_clr = 0,
> > + .ddrc_perf_cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP,
> > + .ddrc_perf_cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP,
> > +};
> > +
> > +static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
> > + .counter_overflow_val = 0,
> > + .counter_max_val = GENMASK_ULL(63, 0),
> > + .ddrc_perf_cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
> > + .ddrc_perf_cfg_base = ODY_DDRC_PERF_CFG_BASE,
> > + .ddrc_perf_cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
> > + .ddrc_perf_cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
> > + .ddrc_perf_cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
> > + .ddrc_perf_cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
> > + .ddrc_perf_cnt_freerun_en = 0,
> > + .ddrc_perf_cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
> > + .ddrc_perf_cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
> > + .ddrc_perf_cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
> > + .ddrc_perf_cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
> > +};
>
>
> ...
>
> > -static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
> > +static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap,
> > + struct cn10k_ddr_pmu *ddr_pmu)
> > {
> > + int ret = 0;
> > +
> > switch (eventid) {
> > case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
> > case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
> > *event_bitmap = (1ULL << (eventid - 1));
> > break;
> > + case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY:
> > + if (ddr_pmu->version == VERSION_V2) {
> > + *event_bitmap = (1ULL << (eventid - 1));
> > + } else {
> > + pr_err("%s Invalid eventid %d\n", __func__, eventid);
> > + ret = -EINVAL;
> > + }
> > + break;
> > case EVENT_OP_IS_ENTER_SELFREF:
> > case EVENT_OP_IS_ENTER_POWERDOWN:
> > case EVENT_OP_IS_ENTER_MPSM:
> > @@ -280,10 +451,10 @@ static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
> > break;
> > default:
> > pr_err("%s Invalid eventid %d\n", __func__, eventid);
> > - return -EINVAL;
> > + ret = -EINVAL;
> > }
> >
> > - return 0;
> > + return ret;
> Why? Just return in the various paths above.
>
> Direct returns make for easier to review code as you can follow a particular path through
> more quickly.
[Gowthami] Ack.
>
> > }
>
> > static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
> > {
> > struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
> > + struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
> >
> > writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
> > - DDRC_PERF_CNT_END_OP_CTRL);
> > + p_data->ddrc_perf_cnt_end_op_ctrl);
> > }
> >
> > static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
> > @@ -549,6 +778,7 @@ static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
> >
> > static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
> > {
> > + struct ddr_pmu_platform_data *p_data = pmu->p_data;
> > struct perf_event *event;
> > struct hw_perf_event *hwc;
> > u64 prev_count, new_count;
> > @@ -561,7 +791,8 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
> > prev_count = local64_read(&hwc->prev_count);
> > new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
> >
> > - /* Overflow condition is when new count less than
> > + /*
> > + * Overflow condition is when new count less than
> > * previous count
> > */
> > if (new_count < prev_count)
> > @@ -574,7 +805,8 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
> > prev_count = local64_read(&hwc->prev_count);
> > new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
> >
> > - /* Overflow condition is when new count less than
> > + /*
> > + * Overflow condition is when new count less than
>
> Good to fix this, but not in a patch doing anything meaningful. If you want
> to make comment syntax changes - separate patch.
[Gowthami] Ack.
>
> > * previous count
> > */
> > if (new_count < prev_count)
> > @@ -586,11 +818,23 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
> > continue;
> >
> > value = cn10k_ddr_perf_read_counter(pmu, i);
> > - if (value == DDRC_PERF_CNT_MAX_VALUE) {
> > + if (value == p_data->counter_max_val) {
> > pr_info("Counter-(%d) reached max value\n", i);
> > - cn10k_ddr_perf_event_update_all(pmu);
> > - cn10k_ddr_perf_pmu_disable(&pmu->pmu);
> > - cn10k_ddr_perf_pmu_enable(&pmu->pmu);
> > + /*
> > + * As separate control register is added for each
> > + * counter in odyssey, no need to update all
> > + * the events
> > + */
> > + if (pmu->version == VERSION_V2) {
>
> This sort of version difference is often better handled via a callback
> in the your pdata structure. Makes it easy to add a new one for v3 :)
>
[Gowthami] Agree. Will make the change in the next version.
> > + cn10k_ddr_perf_event_update(pmu->events[i]);
> > + cn10k_ddr_perf_counter_stop(pmu, i);
> > + cn10k_ddr_perf_counter_start(pmu, i);
> > +
> > + } else {
> > + cn10k_ddr_perf_event_update_all(pmu);
> > + cn10k_ddr_perf_pmu_disable(&pmu->pmu);
> > + cn10k_ddr_perf_pmu_enable(&pmu->pmu);
> > + }
> > }
> > }
> >
> > @@ -631,7 +875,10 @@ static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >
> > static int cn10k_ddr_perf_probe(struct platform_device *pdev)
> > {
> > + struct ddr_pmu_platform_data *pltfm_data;
> > + struct device *dev = &pdev->dev;
> > struct cn10k_ddr_pmu *ddr_pmu;
> > + const char *compatible;
> > struct resource *res;
> > void __iomem *base;
> > char *name;
> > @@ -642,6 +889,14 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
> > return -ENOMEM;
> >
> > ddr_pmu->dev = &pdev->dev;
> > +
> > + pltfm_data = (struct ddr_pmu_platform_data *)
> > + device_get_match_data(&pdev->dev);
>
> Shouldn't need the cast as it's a const void *
> and you should not need to modify it in here (so make your
> data types
> const struct ddr_pmu_platform *
>
[Gowthami] Ack
>
>
> > + if (!pltfm_data) {
> > + dev_err(&pdev->dev, "Error: No device match data found\n");
> > + return -ENODEV;
> > + }
> > + ddr_pmu->p_data = pltfm_data;
> > platform_set_drvdata(pdev, ddr_pmu);
> >
> > base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
> > @@ -650,25 +905,59 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
> >
> > ddr_pmu->base = base;
> >
> > - /* Setup the PMU counter to work in manual mode */
> > - writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
> > - DDRC_PERF_CNT_OP_MODE_CTRL);
> > -
> > - ddr_pmu->pmu = (struct pmu) {
> > - .module = THIS_MODULE,
> > - .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> > - .task_ctx_nr = perf_invalid_context,
> > - .attr_groups = cn10k_attr_groups,
> > - .event_init = cn10k_ddr_perf_event_init,
> > - .add = cn10k_ddr_perf_event_add,
> > - .del = cn10k_ddr_perf_event_del,
> > - .start = cn10k_ddr_perf_event_start,
> > - .stop = cn10k_ddr_perf_event_stop,
> > - .read = cn10k_ddr_perf_event_update,
> > - .pmu_enable = cn10k_ddr_perf_pmu_enable,
> > - .pmu_disable = cn10k_ddr_perf_pmu_disable,
> > - };
> > + ret = device_property_read_string(dev, "compatible", &compatible);
> > + if (ret) {
> > + pr_err("compatible property not found\n");
> > + return ret;
> > + }
> >
> > + if ((strncmp("marvell,cn10k-ddr-pmu", compatible,
> > + strlen(compatible)) == 0))
>
> Why not just embed this in your pdata structure?
> Even better would be add data to reflect the actual differences
> rather than relying on a 'version' number. It tends to be more
> extensible as new implementations surface to encode each difference
> as data in such a structure. Otherwise, in the long run you
> end up with big switch statements for the many different versions
> which just provide some per version constants. That's messy.
[Gowthami] Ack.
>
>
> > + ddr_pmu->version = VERSION_V1;
> > + else
> > + ddr_pmu->version = VERSION_V2;
> > +
> > + if (ddr_pmu->version == VERSION_V1) {
> > + ddr_pmu->pmu = (struct pmu) {
> > + .module = THIS_MODULE,
> > + .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> > + .task_ctx_nr = perf_invalid_context,
> > + .attr_groups = cn10k_attr_groups,
> > + .event_init = cn10k_ddr_perf_event_init,
> > + .add = cn10k_ddr_perf_event_add,
> > + .del = cn10k_ddr_perf_event_del,
> > + .start = cn10k_ddr_perf_event_start,
> > + .stop = cn10k_ddr_perf_event_stop,
> > + .read = cn10k_ddr_perf_event_update,
> > + .pmu_enable = cn10k_ddr_perf_pmu_enable,
> > + .pmu_disable = cn10k_ddr_perf_pmu_disable,
> > + };
> > +
> > + /*
> > + * As we have separate control registers for each counter in Odyssey,
> > + * setting up the mode will be done when we enable each counter
> > + *
>
> Trivial: Odd formatting. I'd drop the blank commented line and add a full stop.
[Gowthami] Ack
>
> > + */
> > +
> > + /* Setup the PMU counter to work in manual mode */
> > + writeq(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base +
> > + (ddr_pmu->p_data->ddrc_perf_cnt_op_mode_ctrl));
> > + } else {
> > + ddr_pmu->pmu = (struct pmu) {
> > + .module = THIS_MODULE,
> > + .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> > + .task_ctx_nr = perf_invalid_context,
> > + .attr_groups = odyssey_attr_groups,
> > + .event_init = cn10k_ddr_perf_event_init,
> > + .add = cn10k_ddr_perf_event_add,
> > + .del = cn10k_ddr_perf_event_del,
> > + .start = cn10k_ddr_perf_event_start,
> > + .stop = cn10k_ddr_perf_event_stop,
> > + .read = cn10k_ddr_perf_event_update,
> > + .pmu_enable = NULL,
> > + .pmu_disable = NULL,
>
> No need to set these to NULL. Not providing them has same result and
> I don't think there is any particular value wrt to 'documentation' of
> setting them explicitly. If there is a reason this needs calling out
> I'd expect a comment explaining why.
[Gowthami] There is no specific reason. Will remove it in the next version.
>
> Ideal patch series structure for changes like this patch makes would be:
> 1) Refactor to pull out the pdata - no functional change.
> 2) Patch adding the support for the new device.
>
> Result is easier to review than the combination of the two changes.
[Gowthami] Agree. Will split the patch as suggested.

Thanks,
Gowthami
>
>
> > + };
> > + }