[RFC][PATCH 0/7] perf: Branch stack annotation and fixes

From: Peter Zijlstra
Date: Fri Jul 08 2016 - 10:06:55 EST


Hi,

These here patches improve the perf branch-stack support and add branch-stack
support to perf-annotate.

They appear to work for me; but some of it is fairly hairy code so please have
a hard look.


The last patch includes the userspace changes; and includes samples
output of 'perf annotate branches', but since email doesn't do color,
lots of information is lost. A screenshot of the same data can be found
here:

http://programming.kicks-ass.net/sekrit/peterz1.png

And the actual program can be found below.


--- branches.c ---

#include <stdlib.h>
#include <stdio.h>

#define B(x) (1 << x)

long lfsr_taps[] =
{
[2] = B(0) | B(1),
[3] = B(0) | B(2),
[4] = B(0) | B(3),
[5] = B(1) | B(4),
[6] = B(0) | B(5),
[7] = B(0) | B(6),
[8] = B(1) | B(2) | B(3) | B(7),
[9] = B(3) | B(8),
[10] = B(2) | B(9),
[11] = B(1) | B(10),
[12] = B(0) | B(3) | B(5) | B(11),
[13] = B(0) | B(2) | B(3) | B(12),
[14] = B(0) | B(2) | B(4) | B(13),
[15] = B(0) | B(14),
[16] = B(1) | B(2) | B(4) | B(15),
[17] = B(2) | B(16),
[18] = B(6) | B(17),
[19] = B(0) | B(1) | B(4) | B(18),
[20] = B(2) | B(19),
[21] = B(1) | B(20),
[22] = B(0) | B(21),
[23] = B(4) | B(22),
[24] = B(0) | B(2) | B(3) | B(23),
[25] = B(2) | B(24),
[26] = B(0) | B(1) | B(5) | B(25),
[27] = B(0) | B(1) | B(4) | B(26),
[28] = B(2) | B(27),
[29] = B(1) | B(28),
[30] = B(0) | B(3) | B(5) | B(29),
[31] = B(2) | B(30),
[32] = B(1) | B(5) | B(6) | B(31),
};

unsigned long taps;

static unsigned long lfsr(unsigned long lfsr)
{
lfsr = (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
return lfsr;
}

void lfsr_init(long bits)
{
taps = lfsr_taps[bits];
}

unsigned volatile long acc = 0;

void branches(unsigned long seed, unsigned long iterations)
{
long i, reg = seed;

for (i = 0; i < iterations; i++) {
if (reg & 0x1)
acc++;
else
acc--;

reg = lfsr(reg);

if (seed & 1)
acc >>= 2;

if (~reg & 0x1)
acc--;
else
acc++;

reg = lfsr(reg);
}
}

int main(int argc, char **argv)
{
long bits = 22;
long seed = 2;

if (argc > 1)
bits = atoi(argv[1]);

if (argc > 2)
seed = atoi(argv[2]);

lfsr_init(bits);
branches(seed, 1 << bits);

return 0;
}