Re: [PATCH v1 1/2] perf report: Check for fused instruction pair

From: Arnaldo Carvalho de Melo
Date: Fri Jun 16 2017 - 12:22:05 EST


Em Wed, Jun 14, 2017 at 10:53:40AM +0800, Jin Yao escreveu:
> Macro fusion merges two instructions to a single micro-op. Intel
> core platform performs this hardware optimization under limited
> circumstances. For example, CMP + JCC can be "fused" and executed
> /retired together. While with sampling this can result in the
> sample sometimes being on the JCC and sometimes on the CMP.
> So for the fused instruction pair, they could be considered
> together.

doing it as a weak function that will be overriden by the host arch
doesn't work, as we also support cross-annotation. So you have to take
into account perf_evsel__env_arch(evsel), etc.

Please search for perf_evsel__env_arch(evsel) in the annotation source
files to see how it is used.

- Arnaldo

> In general, the fused instruction pairs are:
>
> cmp/test/add/sub/and/inc/dec + jcc.
>
> This patch adds a new function which checks if 2 x86 instructions
> are in a "fused" pair. For non-x86 arch, the function just returns
> false.
>
> Signed-off-by: Jin Yao <yao.jin@xxxxxxxxxxxxxxx>
> ---
> tools/perf/arch/x86/util/Build | 1 +
> tools/perf/arch/x86/util/fused.c | 20 ++++++++++++++++++++
> tools/perf/util/Build | 1 +
> tools/perf/util/fused.c | 11 +++++++++++
> tools/perf/util/fused.h | 8 ++++++++
> 5 files changed, 41 insertions(+)
> create mode 100644 tools/perf/arch/x86/util/fused.c
> create mode 100644 tools/perf/util/fused.c
> create mode 100644 tools/perf/util/fused.h
>
> diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
> index f95e6f4..3809348 100644
> --- a/tools/perf/arch/x86/util/Build
> +++ b/tools/perf/arch/x86/util/Build
> @@ -4,6 +4,7 @@ libperf-y += pmu.o
> libperf-y += kvm-stat.o
> libperf-y += perf_regs.o
> libperf-y += group.o
> +libperf-y += fused.o
>
> libperf-$(CONFIG_DWARF) += dwarf-regs.o
> libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
> diff --git a/tools/perf/arch/x86/util/fused.c b/tools/perf/arch/x86/util/fused.c
> new file mode 100644
> index 0000000..be28d22
> --- /dev/null
> +++ b/tools/perf/arch/x86/util/fused.c
> @@ -0,0 +1,20 @@
> +#include <string.h>
> +#include "../../util/fused.h"
> +
> +bool fused_insn_pair(const char *insn1, const char *insn2)
> +{
> + if (strstr(insn2, "jmp"))
> + return false;
> +
> + if ((strstr(insn1, "cmp") && !strstr(insn1, "xchg")) ||
> + strstr(insn1, "test") ||
> + strstr(insn1, "add") ||
> + strstr(insn1, "sub") ||
> + strstr(insn1, "and") ||
> + strstr(insn1, "inc") ||
> + strstr(insn1, "dec")) {
> + return true;
> + }
> +
> + return false;
> +}
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 79dea95..b83757d 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -93,6 +93,7 @@ libperf-y += drv_configs.o
> libperf-y += units.o
> libperf-y += time-utils.o
> libperf-y += expr-bison.o
> +libperf-y += fused.o
>
> libperf-$(CONFIG_LIBBPF) += bpf-loader.o
> libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
> diff --git a/tools/perf/util/fused.c b/tools/perf/util/fused.c
> new file mode 100644
> index 0000000..2cf56fa
> --- /dev/null
> +++ b/tools/perf/util/fused.c
> @@ -0,0 +1,11 @@
> +#include <linux/compiler.h>
> +#include <linux/types.h>
> +#include <string.h>
> +
> +#include "fused.h"
> +
> +bool __weak fused_insn_pair(const char *insn1 __maybe_unused,
> + const char *insn2 __maybe_unused)
> +{
> + return false;
> +}
> diff --git a/tools/perf/util/fused.h b/tools/perf/util/fused.h
> new file mode 100644
> index 0000000..fa26714
> --- /dev/null
> +++ b/tools/perf/util/fused.h
> @@ -0,0 +1,8 @@
> +#ifndef __PERF_FUSED_H
> +#define __PERF_FUSED_H
> +
> +#include <linux/types.h>
> +
> +bool fused_insn_pair(const char *insn1, const char *insn2);
> +
> +#endif /* __PERF_FUSED_H */
> --
> 2.7.4