Re: [PATCH v9 2/7] perf/x86/intel: Record branch type
From: Jiri Olsa
Date: Mon Jul 17 2017 - 04:11:23 EST
On Mon, Jul 17, 2017 at 07:06:38PM +0800, Jin Yao wrote:
SNIP
> +#define X86_BR_TYPE_MAP_MAX 16
> +
> +static int
> +common_branch_type(int type)
> +{
> + int i;
> + const int branch_map[X86_BR_TYPE_MAP_MAX] = {
> + PERF_BR_CALL, /* X86_BR_CALL */
> + PERF_BR_RET, /* X86_BR_RET */
> + PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
> + PERF_BR_SYSRET, /* X86_BR_SYSRET */
> + PERF_BR_UNKNOWN, /* X86_BR_INT */
> + PERF_BR_UNKNOWN, /* X86_BR_IRET */
> + PERF_BR_COND, /* X86_BR_JCC */
> + PERF_BR_UNCOND, /* X86_BR_JMP */
> + PERF_BR_UNKNOWN, /* X86_BR_IRQ */
> + PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
> + PERF_BR_UNKNOWN, /* X86_BR_ABORT */
> + PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
> + PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
> + PERF_BR_CALL, /* X86_BR_ZERO_CALL */
> + PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
> + PERF_BR_IND, /* X86_BR_IND_JMP */
> + };
should the branch_map array be static? having it on stack makes
the compiler to create it every time we call the function
jirka
> +
> + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
> +
> + if (type) {
> + i = __ffs(type);
> + if (i < X86_BR_TYPE_MAP_MAX)
> + return branch_map[i];
> + }
> +
> + return PERF_BR_UNKNOWN;
> +}
> +
> /*
> * implement actual branch filter based on user demand.
> * Hardware may not exactly satisfy that request, thus
> @@ -942,7 +987,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
> bool compress = false;
>
> /* if sampling all branches, then nothing to filter */
> - if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
> + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
> + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
> return;
>
> for (i = 0; i < cpuc->lbr_stack.nr; i++) {
> @@ -963,6 +1009,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
> cpuc->lbr_entries[i].from = 0;
> compress = true;
> }
> +
> + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
> + cpuc->lbr_entries[i].type = common_branch_type(type);
> }
>
> if (!compress)
> --
> 2.7.4
>