[RFC][PATCH 0/7] perf: Branch stack annotation and fixes
From: Peter Zijlstra
Date: Fri Jul 08 2016 - 10:06:55 EST
Hi,
These here patches improve the perf branch-stack support and add branch-stack
support to perf-annotate.
They appear to work for me; but some of it is fairly hairy code so please have
a hard look.
The last patch includes the userspace changes; and includes samples
output of 'perf annotate branches', but since email doesn't do color,
lots of information is lost. A screenshot of the same data can be found
here:
http://programming.kicks-ass.net/sekrit/peterz1.png
And the actual program can be found below.
--- branches.c ---
#include <stdlib.h>
#include <stdio.h>
#define B(x) (1 << x)
long lfsr_taps[] =
{
[2] = B(0) | B(1),
[3] = B(0) | B(2),
[4] = B(0) | B(3),
[5] = B(1) | B(4),
[6] = B(0) | B(5),
[7] = B(0) | B(6),
[8] = B(1) | B(2) | B(3) | B(7),
[9] = B(3) | B(8),
[10] = B(2) | B(9),
[11] = B(1) | B(10),
[12] = B(0) | B(3) | B(5) | B(11),
[13] = B(0) | B(2) | B(3) | B(12),
[14] = B(0) | B(2) | B(4) | B(13),
[15] = B(0) | B(14),
[16] = B(1) | B(2) | B(4) | B(15),
[17] = B(2) | B(16),
[18] = B(6) | B(17),
[19] = B(0) | B(1) | B(4) | B(18),
[20] = B(2) | B(19),
[21] = B(1) | B(20),
[22] = B(0) | B(21),
[23] = B(4) | B(22),
[24] = B(0) | B(2) | B(3) | B(23),
[25] = B(2) | B(24),
[26] = B(0) | B(1) | B(5) | B(25),
[27] = B(0) | B(1) | B(4) | B(26),
[28] = B(2) | B(27),
[29] = B(1) | B(28),
[30] = B(0) | B(3) | B(5) | B(29),
[31] = B(2) | B(30),
[32] = B(1) | B(5) | B(6) | B(31),
};
unsigned long taps;
static unsigned long lfsr(unsigned long lfsr)
{
lfsr = (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
return lfsr;
}
void lfsr_init(long bits)
{
taps = lfsr_taps[bits];
}
unsigned volatile long acc = 0;
void branches(unsigned long seed, unsigned long iterations)
{
long i, reg = seed;
for (i = 0; i < iterations; i++) {
if (reg & 0x1)
acc++;
else
acc--;
reg = lfsr(reg);
if (seed & 1)
acc >>= 2;
if (~reg & 0x1)
acc--;
else
acc++;
reg = lfsr(reg);
}
}
int main(int argc, char **argv)
{
long bits = 22;
long seed = 2;
if (argc > 1)
bits = atoi(argv[1]);
if (argc > 2)
seed = atoi(argv[2]);
lfsr_init(bits);
branches(seed, 1 << bits);
return 0;
}