Re: [PATCH v2 08/21] cpufreq: amd: add trace for amd-pstate module

From: Giovanni Gherdovich
Date: Wed Oct 06 2021 - 04:12:42 EST


On Sun, 2021-09-26 at 17:05 +0800, Huang Rui wrote:
> Add trace event to monitor the performance value changes which is
> controlled by cpu governors.
>
> Signed-off-by: Huang Rui <ray.huang@xxxxxxx>
> ---
> drivers/cpufreq/Makefile | 6 +-
> drivers/cpufreq/amd-pstate-trace.c | 2 +
> drivers/cpufreq/amd-pstate-trace.h | 96 ++++++++++++++++++++++++++++++
> drivers/cpufreq/amd-pstate.c | 11 ++++
> 4 files changed, 114 insertions(+), 1 deletion(-)
> create mode 100644 drivers/cpufreq/amd-pstate-trace.c
> create mode 100644 drivers/cpufreq/amd-pstate-trace.h
>
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index 5c9a2a1ee8dc..04882bc4b145 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -17,6 +17,10 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o
> obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o
> obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
>
> +# Traces
> +CFLAGS_amd-pstate-trace.o := -I$(src)
> +amd_pstate-y := amd-pstate.o amd-pstate-trace.o
> +
> ##################################################################################
> # x86 drivers.
> # Link order matters. K8 is preferred to ACPI because of firmware bugs in early
> @@ -25,7 +29,7 @@ obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
> # speedstep-* is preferred over p4-clockmod.
>
> obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
> -obj-$(CONFIG_X86_AMD_PSTATE) += amd-pstate.o
> +obj-$(CONFIG_X86_AMD_PSTATE) += amd_pstate.o
> obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
> obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
> obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
> diff --git a/drivers/cpufreq/amd-pstate-trace.c b/drivers/cpufreq/amd-pstate-trace.c
> new file mode 100644
> index 000000000000..891b696dcd69
> --- /dev/null
> +++ b/drivers/cpufreq/amd-pstate-trace.c
> @@ -0,0 +1,2 @@
> +#define CREATE_TRACE_POINTS
> +#include "amd-pstate-trace.h"
> diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
> new file mode 100644
> index 000000000000..50c85e150f30
> --- /dev/null
> +++ b/drivers/cpufreq/amd-pstate-trace.h
> @@ -0,0 +1,96 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * amd-pstate-trace.h - AMD Processor P-state Frequency Driver Tracer
> + *
> + * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
> + *
> + * Author: Huang Rui <ray.huang@xxxxxxx>
> + */
> +
> +#if !defined(_AMD_PSTATE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _AMD_PSTATE_TRACE_H
> +
> +#include <linux/cpufreq.h>
> +#include <linux/tracepoint.h>
> +#include <linux/trace_events.h>
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM amd_cpu

Hello Ray,

I'd prefer if TRACE_SYSTEM was set to "power". In that way the tracepoint is easier
to find, since it'd be together with other power-related tracepoints. I often do

perf list | grep "power:"

to find all that's available, or equivalently

ls $TRACEFS/events/power/

and if your tracepoint is somewhere else, I wouldn't find it.

> +
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_FILE amd-pstate-trace
> +
> +#define TPS(x) tracepoint_string(x)
> +
> +TRACE_EVENT(amd_pstate_perf,
> +
> + TP_PROTO(unsigned long min_perf,
> + unsigned long target_perf,
> + unsigned long capacity,
> + unsigned int cpu_id,
> + u64 prev,
> + u64 value,
> + int type
> + ),
> +
> + TP_ARGS(min_perf,
> + target_perf,
> + capacity,
> + cpu_id,
> + prev,
> + value,
> + type
> + ),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, min_perf)
> + __field(unsigned long, target_perf)
> + __field(unsigned long, capacity)
> + __field(unsigned int, cpu_id)
> + __field(u64, prev)
> + __field(u64, value)
> + __field(int, type)
> + ),
> +
> + TP_fast_assign(
> + __entry->min_perf = min_perf;
> + __entry->target_perf = target_perf;
> + __entry->capacity = capacity;
> + __entry->cpu_id = cpu_id;
> + __entry->prev = prev;
> + __entry->value = value;
> + __entry->type = type;
> + ),
> +
> + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u prev=0x%llx value=0x%llx type=0x%d",
> + (unsigned long)__entry->min_perf,
> + (unsigned long)__entry->target_perf,
> + (unsigned long)__entry->capacity,
> + (unsigned int)__entry->cpu_id,
> + (u64)__entry->prev,
> + (u64)__entry->value,
> + (int)__entry->type
> + )
> +);
> +
> +#endif /* _AMD_PSTATE_TRACE_H */
> +
> +/* This part must be outside protection */
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +
> +#include <trace/define_trace.h>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index c9bee7b1698a..0c9f9c0c8928 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -45,10 +45,17 @@
> #include <asm/processor.h>
> #include <asm/cpufeature.h>
> #include <asm/cpu_device_id.h>
> +#include "amd-pstate-trace.h"
>
> #define AMD_PSTATE_TRANSITION_LATENCY 0x20000
> #define AMD_PSTATE_TRANSITION_DELAY 500
>
> +enum switch_type
> +{
> + AMD_TARGET = 0,
> + AMD_ADJUST_PERF
> +};
> +
> static struct cpufreq_driver amd_pstate_driver;
>
> struct amd_cpudata {
> @@ -183,6 +190,7 @@ amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> {
> u64 prev = READ_ONCE(cpudata->cppc_req_cached);
> u64 value = prev;
> + enum switch_type type = fast_switch ? AMD_ADJUST_PERF : AMD_TARGET;
>
> value &= ~REQ_MIN_PERF(~0L);
> value |= REQ_MIN_PERF(min_perf);
> @@ -193,6 +201,9 @@ amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> value &= ~REQ_MAX_PERF(~0L);
> value |= REQ_MAX_PERF(max_perf);
>
> + trace_amd_pstate_perf(min_perf, des_perf, max_perf,
> + cpudata->cpu, prev, value, type);

Two things here:

1. the field "value" seems redundant, as you're already showing me {min,des,max}_perf.
Maybe you can remove "value" from the output of the trace?
One reason I can think why you're showing me "value", is to let me see if it's the
same as "prev", in which case I'd know the request isn't passed to the hardware.
Is that so? If that's the reason, maybe it would be clear to remove "value", "prev"
and just show a field like "changed={true,false}".

2. the field "type" is a little obscure for someone reading the trace. It can be
0 or 1, and to know what that means one has to read the code. I would suggest
replacing it with a field "fast_switch={true,false}", which is more telling.
What do you think?


Giovanni