Re: [PATCH V4 14/24] perf tools: Add Intel PT decoder

From: Arnaldo Carvalho de Melo
Date: Mon May 11 2015 - 09:22:30 EST


Em Thu, Apr 30, 2015 at 05:37:37PM +0300, Adrian Hunter escreveu:
> Add support for decoding an Intel Processor Trace.

Thanks for the function comments in kerneldoc style, we need more of that!

Some issues below:

- Arnaldo

> Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx>
> ---
> tools/perf/util/intel-pt-decoder/Build | 2 +-
> .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 1738 ++++++++++++++++++++
> .../perf/util/intel-pt-decoder/intel-pt-decoder.h | 89 +
> 3 files changed, 1828 insertions(+), 1 deletion(-)
> create mode 100644 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> create mode 100644 tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
>
> diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
> index 587321a..fa12eac 100644
> --- a/tools/perf/util/intel-pt-decoder/Build
> +++ b/tools/perf/util/intel-pt-decoder/Build
> @@ -1,4 +1,4 @@
> -libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o
> +libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
>
> inat_tables_script = ../../arch/x86/tools/gen-insn-attr-x86.awk
> inat_tables_maps = ../../arch/x86/lib/x86-opcode-map.txt
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> new file mode 100644
> index 0000000..435b61b
> --- /dev/null
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> @@ -0,0 +1,1738 @@
> +/*
> + * intel_pt_decoder.c: Intel Processor Trace support
> + * Copyright (c) 2013-2014, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +
> +#include "intel-pt-insn-decoder.h"
> +#include "intel-pt-pkt-decoder.h"
> +#include "intel-pt-decoder.h"
> +#include "intel-pt-log.h"
> +
> +#define INTEL_PT_BLK_SIZE 1024
> +
> +#define BIT63 (((uint64_t)1 << 63))
> +
> +#define INTEL_PT_RETURN 1
> +
> +struct intel_pt_blk {
> + struct intel_pt_blk *prev;
> + uint64_t ip[INTEL_PT_BLK_SIZE];
> +};
> +
> +struct intel_pt_stack {
> + struct intel_pt_blk *blk;
> + struct intel_pt_blk *spare;
> + int pos;
> +};
> +
> +enum intel_pt_pkt_state {
> + INTEL_PT_STATE_NO_PSB,
> + INTEL_PT_STATE_NO_IP,
> + INTEL_PT_STATE_ERR_RESYNC,
> + INTEL_PT_STATE_IN_SYNC,
> + INTEL_PT_STATE_TNT,
> + INTEL_PT_STATE_TIP,
> + INTEL_PT_STATE_TIP_PGD,
> + INTEL_PT_STATE_FUP,
> + INTEL_PT_STATE_FUP_NO_TIP,
> +};
> +
> +#ifdef INTEL_PT_STRICT
> +#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB
> +#else
> +#define INTEL_PT_STATE_ERR1 (decoder->pkt_state)
> +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP
> +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC
> +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC
> +#endif
> +
> +struct intel_pt_decoder {
> + int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
> + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
> + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
> + uint64_t max_insn_cnt, void *data);
> + void *data;
> + struct intel_pt_state state;
> + const unsigned char *buf;
> + size_t len;
> + bool return_compression;
> + bool pge;
> + uint64_t pos;
> + uint64_t last_ip;
> + uint64_t ip;
> + uint64_t cr3;
> + uint64_t timestamp;
> + uint64_t tsc_timestamp;
> + uint64_t ref_timestamp;
> + uint64_t ret_addr;
> + struct intel_pt_stack stack;
> + enum intel_pt_pkt_state pkt_state;
> + struct intel_pt_pkt packet;
> + struct intel_pt_pkt tnt;
> + int pkt_step;
> + int pkt_len;
> + unsigned int cbr;
> + int exec_mode;
> + unsigned int insn_bytes;
> + uint64_t sign_bit;
> + uint64_t sign_bits;
> + uint64_t period;
> + enum intel_pt_period_type period_type;
> + uint64_t period_insn_cnt;
> + uint64_t period_mask;
> + uint64_t period_ticks;
> + uint64_t last_masked_timestamp;
> + bool continuous_period;
> + bool overflow;
> + bool set_fup_tx_flags;
> + unsigned int fup_tx_flags;
> + unsigned int tx_flags;
> + uint64_t timestamp_insn_cnt;
> + const unsigned char *next_buf;
> + size_t next_len;
> + unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
> +};
> +
> +static uint64_t intel_pt_lower_power_of_2(uint64_t x)
> +{
> + int i;
> +
> + for (i = 0; x != 1; i++)
> + x >>= 1;
> +
> + return x << i;
> +}

We have in tools/perf/ the same function used in the kernel:

/**
* rounddown_pow_of_two - round the given value down to nearest power of
* two
* @n - parameter
*
* round the given value down to the nearest power of two
* - the result is undefined when n == 0
* - this can be used to initialise global variables from constant data
*/
#define rounddown_pow_of_two(n) \
( \
__builtin_constant_p(n) ? ( \
(1UL << ilog2(n))) : \
__rounddown_pow_of_two(n) \
)

> +
> +static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
> +{
> + if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
> + uint64_t period;
> +
> + period = intel_pt_lower_power_of_2(decoder->period);
> + decoder->period_mask = ~(period - 1);
> + decoder->period_ticks = period;
> + }
> +}
> +
> +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
> +{
> + struct intel_pt_decoder *decoder;
> +
> + if (!params->get_trace || !params->walk_insn)
> + return NULL;
> +
> + decoder = malloc(sizeof(struct intel_pt_decoder));
> + if (!decoder)
> + return NULL;
> +
> + memset(decoder, 0, sizeof(struct intel_pt_decoder));


We have either zalloc or calloc for the above sequence of allocating +
zeroing.

> +
> + decoder->get_trace = params->get_trace;
> + decoder->walk_insn = params->walk_insn;
> + decoder->data = params->data;
> + decoder->return_compression = params->return_compression;

I am not strict about this, but its common in reviews to ask for
aligning the =.

> +
> + decoder->sign_bit = (uint64_t)1 << 47;
> + decoder->sign_bits = ~(((uint64_t)1 << 48) - 1);
> +
> + decoder->period = params->period;
> + decoder->period_type = params->period_type;
> +
> + intel_pt_setup_period(decoder);
> +
> + return decoder;
> +}
> +
> +static void intel_pt_pop_blk(struct intel_pt_stack *stack)
> +{
> + struct intel_pt_blk *blk;
> +
> + blk = stack->blk;


Also not strict about this, but this makes the function shorter:

+ struct intel_pt_blk *blk = stack->blk;

> + stack->blk = blk->prev;
> + if (!stack->spare)
> + stack->spare = blk;
> + else
> + free(blk);
> +}
> +
> +static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
> +{
> + if (!stack->pos) {
> + if (!stack->blk)
> + return 0;
> + intel_pt_pop_blk(stack);
> + if (!stack->blk)
> + return 0;
> + stack->pos = INTEL_PT_BLK_SIZE;
> + }
> + return stack->blk->ip[--stack->pos];
> +}
> +
> +static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
> +{
> + struct intel_pt_blk *blk;
> +
> + if (stack->spare) {
> + blk = stack->spare;
> + stack->spare = NULL;
> + } else {
> + blk = malloc(sizeof(struct intel_pt_blk));
> + if (!blk)
> + return -ENOMEM;
> + }
> +
> + blk->prev = stack->blk;
> + stack->blk = blk;
> + stack->pos = 0;
> + return 0;
> +}
> +
> +static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
> +{
> + int err;
> +
> + if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
> + err = intel_pt_alloc_blk(stack);
> + if (err)
> + return err;
> + }
> +
> + stack->blk->ip[stack->pos++] = ip;
> + return 0;
> +}


All of those routines is not "intel_pt" specific at all, right?

> +
> +static void intel_pt_clear_stack(struct intel_pt_stack *stack)
> +{
> + while (stack->blk)
> + intel_pt_pop_blk(stack);
> + stack->pos = 0;
> +}
> +
> +static void intel_pt_free_stack(struct intel_pt_stack *stack)
> +{
> + intel_pt_clear_stack(stack);
> + free(stack->blk);
> + free(stack->spare);
> +}

zfree was introduced to zero out these variables, i.e.:

zfree(&stack->blk);
zfree(&stack->spare);

> +
> +void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
> +{
> + intel_pt_free_stack(&decoder->stack);
> + free(decoder);
> +}
> +
> +const char *intel_pt_error_message(int code)
> +{
> + switch (code) {
> + case ENOMEM:
> + return "Memory allocation failed";
> + case ENOSYS:
> + return "Internal error";
> + case EBADMSG:
> + return "Bad packet";
> + case ENODATA:
> + return "No more data";
> + case EILSEQ:
> + return "Failed to get instruction";
> + case ENOENT:
> + return "Trace doesn't match instruction";
> + case EOVERFLOW:
> + return "Overflow packet";
> + case ESHUTDOWN:
> + return "Trace stop packet";
> + default:
> + return "Unknown error!";
> + }


The above idiom uses intel_pt__strerror(int err) {} elsewhere, i.e. a
way to map a errno to a string ios called "strerror", see for instance:

[acme@zoo linux]$ grep __strerror tools/perf/*.c
tools/perf/builtin-kvm.c: target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
tools/perf/builtin-record.c: target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
tools/perf/builtin-record.c: target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
tools/perf/builtin-top.c: dso__strerror_load(al.map->dso, serr, sizeof(serr));
tools/perf/builtin-top.c: target__strerror(target, status, errbuf, BUFSIZ);
tools/perf/builtin-top.c: target__strerror(target, status, errbuf, BUFSIZ);
tools/perf/builtin-trace.c: debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
tools/perf/builtin-trace.c: debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
tools/perf/builtin-trace.c: perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
tools/perf/builtin-trace.c: perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
tools/perf/builtin-trace.c: target__strerror(&trace.opts.target, err, bf, sizeof(bf));
tools/perf/builtin-trace.c: target__strerror(&trace.opts.target, err, bf, sizeof(bf));
[acme@zoo linux]$

Also, we try to be consistennt in separating the class name (intel_pt) from the
method (strerror).

> +}
> +
> +static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
> + const struct intel_pt_pkt *packet,
> + uint64_t last_ip)
> +{
> + uint64_t ip;
> +
> + switch (packet->count) {
> + case 2:
> + ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
> + packet->payload;
> + break;
> + case 4:
> + ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
> + packet->payload;
> + break;
> + case 6:
> + ip = packet->payload;
> + break;
> + default:
> + return 0;
> + }
> +
> + if (ip & decoder->sign_bit)
> + return ip | decoder->sign_bits;
> +
> + return ip;
> +}
> +
> +static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
> +{
> + decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
> + decoder->last_ip);
> +}
> +
> +static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
> +{
> + intel_pt_set_last_ip(decoder);
> + decoder->ip = decoder->last_ip;
> +}
> +
> +static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
> +{
> + intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
> + decoder->buf);
> +}
> +
> +static int intel_pt_bug(struct intel_pt_decoder *decoder)
> +{
> + intel_pt_log("ERROR: Internal error\n");
> + decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
> + return -ENOSYS;
> +}
> +
> +static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
> +{
> + decoder->tx_flags = 0;
> +}
> +
> +static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
> +{
> + decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
> +}
> +
> +static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
> +{
> + intel_pt_clear_tx_flags(decoder);
> + decoder->pkt_len = 1;
> + decoder->pkt_step = 1;
> + intel_pt_decoder_log_packet(decoder);
> + if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
> + intel_pt_log("ERROR: Bad packet\n");


And all this intel_pt_log(), do we really need a separate logging facility?
What is wrong with using pr_err(), pr_warning(), pr_debug(), as tools/ try to
use (we need to improve that more, but still), and the kernel as well?

> + decoder->pkt_state = INTEL_PT_STATE_ERR1;
> + }
> + return -EBADMSG;
> +}
> +
> +static int intel_pt_get_data(struct intel_pt_decoder *decoder)
> +{
> + struct intel_pt_buffer buffer = { .buf = 0, };
> + int ret;
> +
> + decoder->pkt_step = 0;
> +
> + intel_pt_log("Getting more data\n");
> + ret = decoder->get_trace(&buffer, decoder->data);
> + if (ret)
> + return ret;
> + decoder->buf = buffer.buf;
> + decoder->len = buffer.len;
> + if (!decoder->len) {
> + intel_pt_log("No more data\n");
> + return -ENODATA;
> + }
> + if (!buffer.consecutive) {
> + decoder->ip = 0;
> + decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
> + decoder->ref_timestamp = buffer.ref_timestamp;
> + decoder->timestamp = 0;
> + decoder->state.trace_nr = buffer.trace_nr;
> + intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
> + decoder->ref_timestamp);
> + return -ENOLINK;
> + }
> +
> + return 0;
> +}
> +
> +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
> +{
> + if (!decoder->next_buf)
> + return intel_pt_get_data(decoder);
> +
> + decoder->buf = decoder->next_buf;
> + decoder->len = decoder->next_len;
> + decoder->next_buf = 0;
> + decoder->next_len = 0;
> + return 0;
> +}
> +
> +static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
> +{
> + unsigned char *buf = decoder->temp_buf;
> + size_t old_len, len, n;
> + int ret;
> +
> + old_len = decoder->len;
> + len = decoder->len;
> + memcpy(buf, decoder->buf, len);
> +
> + ret = intel_pt_get_data(decoder);
> + if (ret) {
> + decoder->pos += old_len;
> + return ret < 0 ? ret : -EINVAL;
> + }
> +
> + n = INTEL_PT_PKT_MAX_SZ - len;
> + if (n > decoder->len)
> + n = decoder->len;
> + memcpy(buf + len, decoder->buf, n);
> + len += n;
> +
> + ret = intel_pt_get_packet(buf, len, &decoder->packet);
> + if (ret < (int)old_len) {
> + decoder->next_buf = decoder->buf;
> + decoder->next_len = decoder->len;
> + decoder->buf = buf;
> + decoder->len = old_len;
> + return intel_pt_bad_packet(decoder);
> + }
> +
> + decoder->next_buf = decoder->buf + (ret - old_len);
> + decoder->next_len = decoder->len - (ret - old_len);
> +
> + decoder->buf = buf;
> + decoder->len = ret;
> +
> + return ret;
> +}
> +
> +static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
> +{
> + int ret;
> +
> + do {
> + decoder->pos += decoder->pkt_step;
> + decoder->buf += decoder->pkt_step;
> + decoder->len -= decoder->pkt_step;
> +
> + if (!decoder->len) {
> + ret = intel_pt_get_next_data(decoder);
> + if (ret)
> + return ret;
> + }
> +
> + ret = intel_pt_get_packet(decoder->buf, decoder->len,
> + &decoder->packet);
> + if (ret == INTEL_PT_NEED_MORE_BYTES &&
> + decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
> + ret = intel_pt_get_split_packet(decoder);
> + if (ret < 0)
> + return ret;
> + }
> + if (ret <= 0)
> + return intel_pt_bad_packet(decoder);
> +
> + decoder->pkt_len = ret;
> + decoder->pkt_step = ret;
> + intel_pt_decoder_log_packet(decoder);
> + } while (decoder->packet.type == INTEL_PT_PAD);
> +
> + return 0;
> +}
> +
> +static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
> +{
> + uint64_t timestamp, masked_timestamp;
> +
> + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
> + masked_timestamp = timestamp & decoder->period_mask;
> + if (decoder->continuous_period) {
> + if (masked_timestamp != decoder->last_masked_timestamp)
> + return 1;
> + } else {
> + timestamp += 1;
> + masked_timestamp = timestamp & decoder->period_mask;
> + if (masked_timestamp != decoder->last_masked_timestamp) {
> + decoder->last_masked_timestamp = masked_timestamp;
> + decoder->continuous_period = true;
> + }
> + }
> + return decoder->period_ticks - (timestamp - masked_timestamp);
> +}
> +
> +static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
> +{
> + switch (decoder->period_type) {
> + case INTEL_PT_PERIOD_INSTRUCTIONS:
> + return decoder->period - decoder->period_insn_cnt;
> + case INTEL_PT_PERIOD_TICKS:
> + return intel_pt_next_period(decoder);
> + case INTEL_PT_PERIOD_NONE:
> + default:
> + return 0;
> + }
> +}
> +
> +static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
> +{
> + uint64_t timestamp, masked_timestamp;
> +
> + switch (decoder->period_type) {
> + case INTEL_PT_PERIOD_INSTRUCTIONS:
> + decoder->period_insn_cnt = 0;
> + break;
> + case INTEL_PT_PERIOD_TICKS:
> + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
> + masked_timestamp = timestamp & decoder->period_mask;
> + decoder->last_masked_timestamp = masked_timestamp;
> + break;
> + case INTEL_PT_PERIOD_NONE:
> + default:
> + break;
> + }
> +
> + decoder->state.type |= INTEL_PT_INSTRUCTION;
> +}
> +
> +static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
> + struct intel_pt_insn *intel_pt_insn, uint64_t ip)
> +{
> + uint64_t max_insn_cnt, insn_cnt = 0;
> + int err;
> +
> + max_insn_cnt = intel_pt_next_sample(decoder);
> +
> + err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
> + max_insn_cnt, decoder->data);
> +
> + decoder->timestamp_insn_cnt += insn_cnt;
> + decoder->period_insn_cnt += insn_cnt;
> +
> + if (err) {
> + decoder->pkt_state = INTEL_PT_STATE_ERR2;
> + intel_pt_log_at("ERROR: Failed to get instruction",
> + decoder->ip);
> + if (err == -ENOENT)
> + return -ENOLINK;
> + return -EILSEQ;
> + }
> +
> + if (ip && decoder->ip == ip) {
> + err = -EAGAIN;
> + goto out;
> + }
> +
> + if (max_insn_cnt && insn_cnt >= max_insn_cnt)
> + intel_pt_sample_insn(decoder);
> +
> + if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
> + decoder->state.type = INTEL_PT_INSTRUCTION;
> + decoder->state.from_ip = decoder->ip;
> + decoder->state.to_ip = 0;
> + decoder->ip += intel_pt_insn->length;
> + err = INTEL_PT_RETURN;
> + goto out;
> + }
> +
> + if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
> + /* Zero-length calls are excluded */
> + if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
> + intel_pt_insn->rel) {
> + err = intel_pt_push(&decoder->stack, decoder->ip +
> + intel_pt_insn->length);
> + if (err)
> + goto out;
> + }
> + } else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
> + decoder->ret_addr = intel_pt_pop(&decoder->stack);
> + }
> +
> + if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
> + decoder->state.from_ip = decoder->ip;
> + decoder->ip += intel_pt_insn->length +
> + intel_pt_insn->rel;
> + decoder->state.to_ip = decoder->ip;
> + err = INTEL_PT_RETURN;
> + }
> +out:
> + decoder->state.insn_op = intel_pt_insn->op;
> + decoder->state.insn_len = intel_pt_insn->length;
> +
> + if (decoder->tx_flags & INTEL_PT_IN_TX)
> + decoder->state.flags |= INTEL_PT_IN_TX;
> +
> + return err;
> +}
> +
> +static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
> +{
> + struct intel_pt_insn intel_pt_insn;
> + uint64_t ip;
> + int err;
> +
> + ip = decoder->last_ip;
> +
> + while (1) {
> + err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
> + if (err == INTEL_PT_RETURN)
> + return 0;
> + if (err == -EAGAIN) {
> + if (decoder->set_fup_tx_flags) {
> + decoder->set_fup_tx_flags = false;
> + decoder->tx_flags = decoder->fup_tx_flags;
> + decoder->state.type = INTEL_PT_TRANSACTION;
> + decoder->state.from_ip = decoder->ip;
> + decoder->state.to_ip = 0;
> + decoder->state.flags = decoder->fup_tx_flags;
> + return 0;
> + }
> + return err;
> + }
> + decoder->set_fup_tx_flags = false;
> + if (err)
> + return err;
> +
> + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> + intel_pt_log_at("ERROR: Unexpected indirect branch",
> + decoder->ip);
> + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> + return -ENOENT;
> + }
> +
> + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> + intel_pt_log_at("ERROR: Unexpected conditional branch",
> + decoder->ip);
> + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> + return -ENOENT;
> + }
> +
> + intel_pt_bug(decoder);
> + }
> +}
> +
> +static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
> +{
> + struct intel_pt_insn intel_pt_insn;
> + int err;
> +
> + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
> + if (err == INTEL_PT_RETURN)
> + return 0;
> + if (err)
> + return err;
> +
> + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> + if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
> + decoder->pge = false;
> + decoder->continuous_period = false;
> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> + decoder->state.from_ip = decoder->ip;
> + decoder->state.to_ip = 0;
> + if (decoder->packet.count != 0)
> + decoder->ip = decoder->last_ip;
> + } else {
> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> + decoder->state.from_ip = decoder->ip;
> + if (decoder->packet.count == 0) {
> + decoder->state.to_ip = 0;
> + } else {
> + decoder->state.to_ip = decoder->last_ip;
> + decoder->ip = decoder->last_ip;
> + }
> + }
> + return 0;
> + }
> +
> + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> + intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
> + decoder->ip);
> + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> + return -ENOENT;
> + }
> +
> + return intel_pt_bug(decoder);
> +}
> +
> +static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
> +{
> + struct intel_pt_insn intel_pt_insn;
> + int err;
> +
> + while (1) {
> + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
> + if (err == INTEL_PT_RETURN)
> + return 0;
> + if (err)
> + return err;
> +
> + if (intel_pt_insn.op == INTEL_PT_OP_RET) {
> + if (!decoder->return_compression) {
> + intel_pt_log_at("ERROR: RET when expecting conditional branch",
> + decoder->ip);
> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
> + return -ENOENT;
> + }
> + if (!decoder->ret_addr) {
> + intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
> + decoder->ip);
> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
> + return -ENOENT;
> + }
> + if (!(decoder->tnt.payload & BIT63)) {
> + intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
> + decoder->ip);
> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
> + return -ENOENT;
> + }
> + decoder->tnt.count -= 1;
> + if (!decoder->tnt.count)
> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> + decoder->tnt.payload <<= 1;
> + decoder->state.from_ip = decoder->ip;
> + decoder->ip = decoder->ret_addr;
> + decoder->state.to_ip = decoder->ip;
> + return 0;
> + }
> +
> + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> + /* Handle deferred TIPs */
> + err = intel_pt_get_next_packet(decoder);
> + if (err)
> + return err;
> + if (decoder->packet.type != INTEL_PT_TIP ||
> + decoder->packet.count == 0) {
> + intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
> + decoder->ip);
> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
> + decoder->pkt_step = 0;
> + return -ENOENT;
> + }
> + intel_pt_set_last_ip(decoder);
> + decoder->state.from_ip = decoder->ip;
> + decoder->state.to_ip = decoder->last_ip;
> + decoder->ip = decoder->last_ip;
> + return 0;
> + }
> +
> + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> + decoder->tnt.count -= 1;
> + if (!decoder->tnt.count)
> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> + if (decoder->tnt.payload & BIT63) {
> + decoder->tnt.payload <<= 1;
> + decoder->state.from_ip = decoder->ip;
> + decoder->ip += intel_pt_insn.length +
> + intel_pt_insn.rel;
> + decoder->state.to_ip = decoder->ip;
> + return 0;
> + }
> + /* Instruction sample for a non-taken branch */
> + if (decoder->state.type & INTEL_PT_INSTRUCTION) {
> + decoder->tnt.payload <<= 1;
> + decoder->state.type = INTEL_PT_INSTRUCTION;
> + decoder->state.from_ip = decoder->ip;
> + decoder->state.to_ip = 0;
> + decoder->ip += intel_pt_insn.length;
> + return 0;
> + }
> + decoder->ip += intel_pt_insn.length;
> + if (!decoder->tnt.count)
> + return -EAGAIN;
> + decoder->tnt.payload <<= 1;
> + continue;
> + }
> +
> + return intel_pt_bug(decoder);
> + }
> +}
> +
> +static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
> +{
> + unsigned int fup_tx_flags;
> + int err;
> +
> + fup_tx_flags = decoder->packet.payload &
> + (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
> + err = intel_pt_get_next_packet(decoder);
> + if (err)
> + return err;
> + if (decoder->packet.type == INTEL_PT_FUP) {
> + decoder->fup_tx_flags = fup_tx_flags;
> + decoder->set_fup_tx_flags = true;
> + if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
> + *no_tip = true;
> + } else {
> + intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
> + decoder->pos);
> + intel_pt_update_in_tx(decoder);
> + }
> + return 0;
> +}
> +
> +static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
> +{
> + uint64_t timestamp;
> +
> + if (decoder->ref_timestamp) {
> + timestamp = decoder->packet.payload |
> + (decoder->ref_timestamp & (0xffULL << 56));
> + if (timestamp < decoder->ref_timestamp) {
> + if (decoder->ref_timestamp - timestamp > (1ULL << 55))
> + timestamp += (1ULL << 56);
> + } else {
> + if (timestamp - decoder->ref_timestamp > (1ULL << 55))
> + timestamp -= (1ULL << 56);
> + }
> + decoder->tsc_timestamp = timestamp;
> + decoder->timestamp = timestamp;
> + decoder->ref_timestamp = 0;
> + decoder->timestamp_insn_cnt = 0;
> + } else if (decoder->timestamp) {
> + timestamp = decoder->packet.payload |
> + (decoder->timestamp & (0xffULL << 56));
> + if (timestamp < decoder->timestamp &&
> + decoder->timestamp - timestamp < 0x100) {
> + intel_pt_log_to("ERROR: Suppressing backwards timestamp",
> + timestamp);
> + timestamp = decoder->timestamp;
> + }
> + while (timestamp < decoder->timestamp) {
> + intel_pt_log_to("Wraparound timestamp", timestamp);
> + timestamp += (1ULL << 56);
> + }
> + decoder->tsc_timestamp = timestamp;
> + decoder->timestamp = timestamp;
> + decoder->timestamp_insn_cnt = 0;
> + }
> +
> + intel_pt_log_to("Setting timestamp", decoder->timestamp);
> +}
> +
> +static int intel_pt_overflow(struct intel_pt_decoder *decoder)
> +{
> + intel_pt_log("ERROR: Buffer overflow\n");
> + intel_pt_clear_tx_flags(decoder);
> + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> + decoder->overflow = true;
> + return -EOVERFLOW;
> +}
> +
> +/* Walk PSB+ packets when already in sync. */
> +static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
> +{
> + int err;
> +
> + while (1) {
> + err = intel_pt_get_next_packet(decoder);
> + if (err)
> + return err;
> +
> + switch (decoder->packet.type) {
> + case INTEL_PT_PSBEND:
> + return 0;
> +
> + case INTEL_PT_TIP_PGD:
> + case INTEL_PT_TIP_PGE:
> + case INTEL_PT_TIP:
> + case INTEL_PT_TNT:
> + case INTEL_PT_BAD:
> + case INTEL_PT_PSB:
> + intel_pt_log("ERROR: Unexpected packet\n");
> + return -EAGAIN;
> +
> + case INTEL_PT_OVF:
> + return intel_pt_overflow(decoder);
> +
> + case INTEL_PT_TSC:
> + intel_pt_calc_tsc_timestamp(decoder);
> + break;
> +
> + case INTEL_PT_CBR:
> + decoder->cbr = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_MODE_EXEC:
> + decoder->exec_mode = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_PIP:
> + decoder->cr3 = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_FUP:
> + decoder->pge = true;
> + break;
> +
> + case INTEL_PT_MODE_TSX:
> + intel_pt_update_in_tx(decoder);
> + break;
> +
> + case INTEL_PT_PAD:
> + default:
> + break;
> + }
> + }
> +}
> +
> +static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
> +{
> + int err;
> +
> + if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
> + decoder->tx_flags = 0;
> + decoder->state.flags &= ~INTEL_PT_IN_TX;
> + decoder->state.flags |= INTEL_PT_ABORT_TX;
> + } else {
> + decoder->state.flags |= INTEL_PT_ASYNC;
> + }
> +
> + while (1) {
> + err = intel_pt_get_next_packet(decoder);
> + if (err)
> + return err;
> +
> + switch (decoder->packet.type) {
> + case INTEL_PT_TNT:
> + case INTEL_PT_FUP:
> + case INTEL_PT_PSB:
> + case INTEL_PT_TSC:
> + case INTEL_PT_CBR:
> + case INTEL_PT_MODE_TSX:
> + case INTEL_PT_BAD:
> + case INTEL_PT_PSBEND:
> + intel_pt_log("ERROR: Missing TIP after FUP\n");
> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
> + return -ENOENT;
> +
> + case INTEL_PT_OVF:
> + return intel_pt_overflow(decoder);
> +
> + case INTEL_PT_TIP_PGD:
> + decoder->state.from_ip = decoder->ip;
> + decoder->state.to_ip = 0;
> + if (decoder->packet.count != 0) {
> + intel_pt_set_ip(decoder);
> + intel_pt_log("Omitting PGD ip " x64_fmt "\n",
> + decoder->ip);
> + }
> + decoder->pge = false;
> + decoder->continuous_period = false;
> + return 0;
> +
> + case INTEL_PT_TIP_PGE:
> + decoder->pge = true;
> + intel_pt_log("Omitting PGE ip " x64_fmt "\n",
> + decoder->ip);
> + decoder->state.from_ip = 0;
> + if (decoder->packet.count == 0) {
> + decoder->state.to_ip = 0;
> + } else {
> + intel_pt_set_ip(decoder);
> + decoder->state.to_ip = decoder->ip;
> + }
> + return 0;
> +
> + case INTEL_PT_TIP:
> + decoder->state.from_ip = decoder->ip;
> + if (decoder->packet.count == 0) {
> + decoder->state.to_ip = 0;
> + } else {
> + intel_pt_set_ip(decoder);
> + decoder->state.to_ip = decoder->ip;
> + }
> + return 0;
> +
> + case INTEL_PT_PIP:
> + decoder->cr3 = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_MODE_EXEC:
> + decoder->exec_mode = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_PAD:
> + break;
> +
> + default:
> + return intel_pt_bug(decoder);
> + }
> + }
> +}
> +
> +static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
> +{
> + bool no_tip = false;
> + int err;
> +
> + while (1) {
> + err = intel_pt_get_next_packet(decoder);
> + if (err)
> + return err;
> +next:
> + switch (decoder->packet.type) {
> + case INTEL_PT_TNT:
> + if (!decoder->packet.count)
> + break;
> + decoder->tnt = decoder->packet;
> + decoder->pkt_state = INTEL_PT_STATE_TNT;
> + err = intel_pt_walk_tnt(decoder);
> + if (err == -EAGAIN)
> + break;
> + return err;
> +
> + case INTEL_PT_TIP_PGD:
> + if (decoder->packet.count != 0)
> + intel_pt_set_last_ip(decoder);
> + decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
> + return intel_pt_walk_tip(decoder);
> +
> + case INTEL_PT_TIP_PGE: {
> + decoder->pge = true;
> + if (decoder->packet.count == 0) {
> + intel_pt_log_at("Skipping zero TIP.PGE",
> + decoder->pos);
> + break;
> + }
> + intel_pt_set_ip(decoder);
> + decoder->state.from_ip = 0;
> + decoder->state.to_ip = decoder->ip;
> + return 0;
> + }
> +
> + case INTEL_PT_OVF:
> + return intel_pt_overflow(decoder);
> +
> + case INTEL_PT_TIP:
> + if (decoder->packet.count != 0)
> + intel_pt_set_last_ip(decoder);
> + decoder->pkt_state = INTEL_PT_STATE_TIP;
> + return intel_pt_walk_tip(decoder);
> +
> + case INTEL_PT_FUP:
> + if (decoder->packet.count == 0) {
> + intel_pt_log_at("Skipping zero FUP",
> + decoder->pos);
> + no_tip = false;
> + break;
> + }
> + intel_pt_set_last_ip(decoder);
> + err = intel_pt_walk_fup(decoder);
> + if (err != -EAGAIN) {
> + if (err)
> + return err;
> + if (no_tip)
> + decoder->pkt_state =
> + INTEL_PT_STATE_FUP_NO_TIP;
> + else
> + decoder->pkt_state = INTEL_PT_STATE_FUP;
> + return 0;
> + }
> + if (no_tip) {
> + no_tip = false;
> + break;
> + }
> + return intel_pt_walk_fup_tip(decoder);
> +
> + case INTEL_PT_PSB:
> + intel_pt_clear_stack(&decoder->stack);
> + err = intel_pt_walk_psbend(decoder);
> + if (err == -EAGAIN)
> + goto next;
> + if (err)
> + return err;
> + break;
> +
> + case INTEL_PT_PIP:
> + decoder->cr3 = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_TSC:
> + intel_pt_calc_tsc_timestamp(decoder);
> + break;
> +
> + case INTEL_PT_CBR:
> + decoder->cbr = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_MODE_EXEC:
> + decoder->exec_mode = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_MODE_TSX:
> + /* MODE_TSX need not be followed by FUP */
> + if (!decoder->pge) {
> + intel_pt_update_in_tx(decoder);
> + break;
> + }
> + err = intel_pt_mode_tsx(decoder, &no_tip);
> + if (err)
> + return err;
> + goto next;
> +
> + case INTEL_PT_BAD: /* Does not happen */
> + return intel_pt_bug(decoder);
> +
> + case INTEL_PT_PSBEND:
> + case INTEL_PT_PAD:
> + break;
> +
> + default:
> + return intel_pt_bug(decoder);
> + }
> + }
> +}
> +
> +/* Walk PSB+ packets to get in sync. */
> +static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
> +{
> + int err;
> +
> + while (1) {
> + err = intel_pt_get_next_packet(decoder);
> + if (err)
> + return err;
> +
> + switch (decoder->packet.type) {
> + case INTEL_PT_TIP_PGD:
> + decoder->continuous_period = false;
> + case INTEL_PT_TIP_PGE:
> + case INTEL_PT_TIP:
> + intel_pt_log("ERROR: Unexpected packet\n");
> + return -ENOENT;
> +
> + case INTEL_PT_FUP:
> + decoder->pge = true;
> + if (decoder->last_ip || decoder->packet.count == 6 ||
> + decoder->packet.count == 0) {
> + uint64_t current_ip = decoder->ip;
> +
> + intel_pt_set_ip(decoder);
> + if (current_ip)
> + intel_pt_log_to("Setting IP",
> + decoder->ip);
> + }
> + break;
> +
> + case INTEL_PT_TSC:
> + intel_pt_calc_tsc_timestamp(decoder);
> + break;
> +
> + case INTEL_PT_CBR:
> + decoder->cbr = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_PIP:
> + decoder->cr3 = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_MODE_EXEC:
> + decoder->exec_mode = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_MODE_TSX:
> + intel_pt_update_in_tx(decoder);
> + break;
> +
> + case INTEL_PT_TNT:
> + intel_pt_log("ERROR: Unexpected packet\n");
> + if (decoder->ip)
> + decoder->pkt_state = INTEL_PT_STATE_ERR4;
> + else
> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
> + return -ENOENT;
> +
> + case INTEL_PT_BAD: /* Does not happen */
> + return intel_pt_bug(decoder);
> +
> + case INTEL_PT_OVF:
> + return intel_pt_overflow(decoder);
> +
> + case INTEL_PT_PSBEND:
> + return 0;
> +
> + case INTEL_PT_PSB:
> + case INTEL_PT_PAD:
> + default:
> + break;
> + }
> + }
> +}
> +
> +static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
> +{
> + int err;
> +
> + while (1) {
> + err = intel_pt_get_next_packet(decoder);
> + if (err)
> + return err;
> +
> + switch (decoder->packet.type) {
> + case INTEL_PT_TIP_PGD:
> + decoder->continuous_period = false;
> + case INTEL_PT_TIP_PGE:
> + case INTEL_PT_TIP:
> + decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
> + if (decoder->last_ip || decoder->packet.count == 6 ||
> + decoder->packet.count == 0)
> + intel_pt_set_ip(decoder);
> + if (decoder->ip)
> + return 0;
> + break;
> +
> + case INTEL_PT_FUP:
> + if (decoder->overflow) {
> + if (decoder->last_ip ||
> + decoder->packet.count == 6 ||
> + decoder->packet.count == 0)
> + intel_pt_set_ip(decoder);
> + if (decoder->ip)
> + return 0;
> + }
> + if (decoder->packet.count)
> + intel_pt_set_last_ip(decoder);
> + break;
> +
> + case INTEL_PT_TSC:
> + intel_pt_calc_tsc_timestamp(decoder);
> + break;
> +
> + case INTEL_PT_CBR:
> + decoder->cbr = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_PIP:
> + decoder->cr3 = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_MODE_EXEC:
> + decoder->exec_mode = decoder->packet.payload;
> + break;
> +
> + case INTEL_PT_MODE_TSX:
> + intel_pt_update_in_tx(decoder);
> + break;
> +
> + case INTEL_PT_OVF:
> + return intel_pt_overflow(decoder);
> +
> + case INTEL_PT_BAD: /* Does not happen */
> + return intel_pt_bug(decoder);
> +
> + case INTEL_PT_PSB:
> + err = intel_pt_walk_psb(decoder);
> + if (err)
> + return err;
> + if (decoder->ip) {
> + /* Do not have a sample */
> + decoder->state.type = 0;
> + return 0;
> + }
> + break;
> +
> + case INTEL_PT_TNT:
> + case INTEL_PT_PSBEND:
> + case INTEL_PT_PAD:
> + default:
> + break;
> + }
> + }
> +}
> +
> +static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
> +{
> + int err;
> +
> + intel_pt_log("Scanning for full IP\n");
> + err = intel_pt_walk_to_ip(decoder);
> + if (err)
> + return err;
> +
> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> + decoder->overflow = false;
> +
> + decoder->state.from_ip = 0;
> + decoder->state.to_ip = decoder->ip;
> + intel_pt_log_to("Setting IP", decoder->ip);
> +
> + return 0;
> +}
> +
> +static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
> +{
> + const unsigned char *end = decoder->buf + decoder->len;
> + size_t i;
> +
> + for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
> + if (i > decoder->len)
> + continue;
> + if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
> + return i;
> + }
> + return 0;
> +}
> +
> +static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
> +{
> + size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
> + const char *psb = INTEL_PT_PSB_STR;
> +
> + if (rest_psb > decoder->len ||
> + memcmp(decoder->buf, psb + part_psb, rest_psb))
> + return 0;
> +
> + return rest_psb;
> +}
> +
> +static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
> + int part_psb)
> +{
> + int rest_psb, ret;
> +
> + decoder->pos += decoder->len;
> + decoder->len = 0;
> +
> + ret = intel_pt_get_next_data(decoder);
> + if (ret)
> + return ret;
> +
> + rest_psb = intel_pt_rest_psb(decoder, part_psb);
> + if (!rest_psb)
> + return 0;
> +
> + decoder->pos -= part_psb;
> + decoder->next_buf = decoder->buf + rest_psb;
> + decoder->next_len = decoder->len - rest_psb;
> + memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> + decoder->buf = decoder->temp_buf;
> + decoder->len = INTEL_PT_PSB_LEN;
> +
> + return 0;
> +}
> +
> +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
> +{
> + unsigned char *next;
> + int ret;
> +
> + intel_pt_log("Scanning for PSB\n");
> + while (1) {
> + if (!decoder->len) {
> + ret = intel_pt_get_next_data(decoder);
> + if (ret)
> + return ret;
> + }
> +
> + next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
> + INTEL_PT_PSB_LEN);
> + if (!next) {
> + int part_psb;
> +
> + part_psb = intel_pt_part_psb(decoder);
> + if (part_psb) {
> + ret = intel_pt_get_split_psb(decoder, part_psb);
> + if (ret)
> + return ret;
> + } else {
> + decoder->pos += decoder->len;
> + decoder->len = 0;
> + }
> + continue;
> + }
> +
> + decoder->pkt_step = next - decoder->buf;
> + return intel_pt_get_next_packet(decoder);
> + }
> +}
> +
> +static int intel_pt_sync(struct intel_pt_decoder *decoder)
> +{
> + int err;
> +
> + decoder->pge = false;
> + decoder->continuous_period = false;
> + decoder->last_ip = 0;
> + decoder->ip = 0;
> + intel_pt_clear_stack(&decoder->stack);
> +
> + err = intel_pt_scan_for_psb(decoder);
> + if (err)
> + return err;
> +
> + decoder->pkt_state = INTEL_PT_STATE_NO_IP;
> +
> + err = intel_pt_walk_psb(decoder);
> + if (err)
> + return err;
> +
> + if (decoder->ip) {
> + decoder->state.type = 0; /* Do not have a sample */
> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> + } else {
> + return intel_pt_sync_ip(decoder);
> + }
> +
> + return 0;
> +}
> +
> +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
> +{
> + int err;
> +
> + do {
> + decoder->state.type = INTEL_PT_BRANCH;
> + decoder->state.flags = 0;
> +
> + switch (decoder->pkt_state) {
> + case INTEL_PT_STATE_NO_PSB:
> + err = intel_pt_sync(decoder);
> + break;
> + case INTEL_PT_STATE_NO_IP:
> + decoder->last_ip = 0;
> + /* Fall through */
> + case INTEL_PT_STATE_ERR_RESYNC:
> + err = intel_pt_sync_ip(decoder);
> + break;
> + case INTEL_PT_STATE_IN_SYNC:
> + err = intel_pt_walk_trace(decoder);
> + break;
> + case INTEL_PT_STATE_TNT:
> + err = intel_pt_walk_tnt(decoder);
> + if (err == -EAGAIN)
> + err = intel_pt_walk_trace(decoder);
> + break;
> + case INTEL_PT_STATE_TIP:
> + case INTEL_PT_STATE_TIP_PGD:
> + err = intel_pt_walk_tip(decoder);
> + break;
> + case INTEL_PT_STATE_FUP:
> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> + err = intel_pt_walk_fup(decoder);
> + if (err == -EAGAIN)
> + err = intel_pt_walk_fup_tip(decoder);
> + else if (!err)
> + decoder->pkt_state = INTEL_PT_STATE_FUP;
> + break;
> + case INTEL_PT_STATE_FUP_NO_TIP:
> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> + err = intel_pt_walk_fup(decoder);
> + if (err == -EAGAIN)
> + err = intel_pt_walk_trace(decoder);
> + break;
> + default:
> + err = intel_pt_bug(decoder);
> + break;
> + }
> + } while (err == -ENOLINK);
> +
> + decoder->state.err = err;
> + decoder->state.timestamp = decoder->timestamp;
> + decoder->state.est_timestamp = decoder->timestamp +
> + (decoder->timestamp_insn_cnt << 1);
> + decoder->state.cr3 = decoder->cr3;
> +
> + if (err)
> + decoder->state.from_ip = decoder->ip;
> +
> + return &decoder->state;
> +}
> +
> +static bool intel_pt_at_psb(unsigned char *buf, size_t len)
> +{
> + if (len < INTEL_PT_PSB_LEN)
> + return false;
> + return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
> + INTEL_PT_PSB_LEN);
> +}
> +
> +/**
> + * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
> + * @buf: pointer to buffer pointer
> + * @len: size of buffer
> + *
> + * Updates the buffer pointer to point to the start of the next PSB packet if
> + * there is one, otherwise the buffer pointer is unchanged. If @buf is updated,
> + * @len is adjusted accordingly.
> + *
> + * Return: %true if a PSB packet is found, %false otherwise.
> + */
> +static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
> +{
> + unsigned char *next;
> +
> + next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> + if (next) {
> + *len -= next - *buf;
> + *buf = next;
> + return true;
> + }
> + return false;
> +}
> +
> +/**
> + * intel_pt_step_psb - move buffer pointer to the start of the following PSB
> + * packet.
> + * @buf: pointer to buffer pointer
> + * @len: size of buffer
> + *
> + * Updates the buffer pointer to point to the start of the following PSB packet
> + * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
> + * pointer is unchanged. If @buf is updated, @len is adjusted accordingly.
> + *
> + * Return: %true if a PSB packet is found, %false otherwise.
> + */
> +static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
> +{
> + unsigned char *next;
> +
> + if (!*len)
> + return false;
> +
> + next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> + if (next) {
> + *len -= next - *buf;
> + *buf = next;
> + return true;
> + }
> + return false;
> +}
> +
> +/**
> + * intel_pt_last_psb - find the last PSB packet in a buffer.
> + * @buf: buffer
> + * @len: size of buffer
> + *
> + * This function finds the last PSB in a buffer.
> + *
> + * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
> + */
> +static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
> +{
> + const char *n = INTEL_PT_PSB_STR;
> + unsigned char *p;
> + size_t k;
> +
> + if (len < INTEL_PT_PSB_LEN)
> + return NULL;
> +
> + k = len - INTEL_PT_PSB_LEN + 1;
> + while (1) {
> + p = memrchr(buf, n[0], k);
> + if (!p)
> + return NULL;
> + if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
> + return p;
> + k = p - buf;
> + if (!k)
> + return NULL;
> + }
> +}
> +
> +/**
> + * intel_pt_next_tsc - find and return next TSC.
> + * @buf: buffer
> + * @len: size of buffer
> + * @tsc: TSC value returned
> + *
> + * Find a TSC packet in @buf and return the TSC value. This function assumes
> + * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
> + * PSBEND packet is found.
> + *
> + * Return: %true if TSC is found, false otherwise.
> + */
> +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
> +{
> + struct intel_pt_pkt packet;
> + int ret;
> +
> + while (len) {
> + ret = intel_pt_get_packet(buf, len, &packet);
> + if (ret <= 0)
> + return false;
> + if (packet.type == INTEL_PT_TSC) {
> + *tsc = packet.payload;
> + return true;
> + }
> + if (packet.type == INTEL_PT_PSBEND)
> + return false;
> + buf += ret;
> + len -= ret;
> + }
> + return false;
> +}
> +
> +/**
> + * intel_pt_tsc_cmp - compare 7-byte TSCs.
> + * @tsc1: first TSC to compare
> + * @tsc2: second TSC to compare
> + *
> + * This function compares 7-byte TSC values allowing for the possibility that
> + * TSC wrapped around. Generally it is not possible to know if TSC has wrapped
> + * around so for that purpose this function assumes the absolute difference is
> + * less than half the maximum difference.
> + *
> + * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
> + * after @tsc2.
> + */
> +static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
> +{
> + const uint64_t halfway = (1ULL << 55);
> +
> + if (tsc1 == tsc2)
> + return 0;
> +
> + if (tsc1 < tsc2) {
> + if (tsc2 - tsc1 < halfway)
> + return -1;
> + else
> + return 1;
> + } else {
> + if (tsc1 - tsc2 < halfway)
> + return 1;
> + else
> + return -1;
> + }
> +}
> +
> +/**
> + * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
> + * using TSC.
> + * @buf_a: first buffer
> + * @len_a: size of first buffer
> + * @buf_b: second buffer
> + * @len_b: size of second buffer
> + *
> + * If the trace contains TSC we can look at the last TSC of @buf_a and the
> + * first TSC of @buf_b in order to determine if the buffers overlap, and then
> + * walk forward in @buf_b until a later TSC is found. A precondition is that
> + * @buf_a and @buf_b are positioned at a PSB.
> + *
> + * Return: A pointer into @buf_b from where non-overlapped data starts, or
> + * @buf_b + @len_b if there is no non-overlapped data.
> + */
> +static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
> + size_t len_a,
> + unsigned char *buf_b,
> + size_t len_b)
> +{
> + uint64_t tsc_a, tsc_b;
> + unsigned char *p;
> + size_t len;
> +
> + p = intel_pt_last_psb(buf_a, len_a);
> + if (!p)
> + return buf_b; /* No PSB in buf_a => no overlap */
> +
> + len = len_a - (p - buf_a);
> + if (!intel_pt_next_tsc(p, len, &tsc_a)) {
> + /* The last PSB+ in buf_a is incomplete, so go back one more */
> + len_a -= len;
> + p = intel_pt_last_psb(buf_a, len_a);
> + if (!p)
> + return buf_b; /* No full PSB+ => assume no overlap */
> + len = len_a - (p - buf_a);
> + if (!intel_pt_next_tsc(p, len, &tsc_a))
> + return buf_b; /* No TSC in buf_a => assume no overlap */
> + }
> +
> + while (1) {
> + /* Ignore PSB+ with no TSC */
> + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
> + intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
> + return buf_b; /* tsc_a < tsc_b => no overlap */
> +
> + if (!intel_pt_step_psb(&buf_b, &len_b))
> + return buf_b + len_b; /* No PSB in buf_b => no data */
> + }
> +}
> +
> +/**
> + * intel_pt_find_overlap - determine start of non-overlapped trace data.
> + * @buf_a: first buffer
> + * @len_a: size of first buffer
> + * @buf_b: second buffer
> + * @len_b: size of second buffer
> + * @have_tsc: can use TSC packets to detect overlap
> + *
> + * When trace samples or snapshots are recorded there is the possibility that
> + * the data overlaps. Note that, for the purposes of decoding, data is only
> + * useful if it begins with a PSB packet.
> + *
> + * Return: A pointer into @buf_b from where non-overlapped data starts, or
> + * @buf_b + @len_b if there is no non-overlapped data.
> + */
> +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
> + unsigned char *buf_b, size_t len_b,
> + bool have_tsc)
> +{
> + unsigned char *found;
> +
> + /* Buffer 'b' must start at PSB so throw away everything before that */
> + if (!intel_pt_next_psb(&buf_b, &len_b))
> + return buf_b + len_b; /* No PSB */
> +
> + if (!intel_pt_next_psb(&buf_a, &len_a))
> + return buf_b; /* No overlap */
> +
> + if (have_tsc) {
> + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
> + if (found)
> + return found;
> + }
> +
> + /*
> + * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
> + * we can ignore the first part of buffer 'a'.
> + */
> + while (len_b < len_a) {
> + if (!intel_pt_step_psb(&buf_a, &len_a))
> + return buf_b; /* No overlap */
> + }
> +
> + /* Now len_b >= len_a */
> + if (len_b > len_a) {
> + /* The leftover buffer 'b' must start at a PSB */
> + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
> + if (!intel_pt_step_psb(&buf_a, &len_a))
> + return buf_b; /* No overlap */
> + }
> + }
> +
> + while (1) {
> + /* Potential overlap so check the bytes */
> + found = memmem(buf_a, len_a, buf_b, len_a);
> + if (found)
> + return buf_b + len_a;
> +
> + /* Try again at next PSB in buffer 'a' */
> + if (!intel_pt_step_psb(&buf_a, &len_a))
> + return buf_b; /* No overlap */
> +
> + /* The leftover buffer 'b' must start at a PSB */
> + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
> + if (!intel_pt_step_psb(&buf_a, &len_a))
> + return buf_b; /* No overlap */
> + }
> + }
> +}
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> new file mode 100644
> index 0000000..e55615a
> --- /dev/null
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> @@ -0,0 +1,89 @@
> +/*
> + * intel_pt_decoder.h: Intel Processor Trace support
> + * Copyright (c) 2013-2014, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#ifndef INCLUDE__INTEL_PT_DECODER_H__
> +#define INCLUDE__INTEL_PT_DECODER_H__
> +
> +#include <stdint.h>
> +#include <stddef.h>
> +#include <stdbool.h>
> +
> +#include "intel-pt-insn-decoder.h"
> +
> +#define INTEL_PT_IN_TX (1 << 0)
> +#define INTEL_PT_ABORT_TX (1 << 1)
> +#define INTEL_PT_ASYNC (1 << 2)
> +
> +enum intel_pt_sample_type {
> + INTEL_PT_BRANCH = 1 << 0,
> + INTEL_PT_INSTRUCTION = 1 << 1,
> + INTEL_PT_TRANSACTION = 1 << 2,
> +};
> +
> +enum intel_pt_period_type {
> + INTEL_PT_PERIOD_NONE,
> + INTEL_PT_PERIOD_INSTRUCTIONS,
> + INTEL_PT_PERIOD_TICKS,
> +};
> +
> +struct intel_pt_state {
> + enum intel_pt_sample_type type;
> + int err;
> + uint64_t from_ip;
> + uint64_t to_ip;
> + uint64_t cr3;
> + uint64_t timestamp;
> + uint64_t est_timestamp;
> + uint64_t trace_nr;
> + uint32_t flags;
> + enum intel_pt_insn_op insn_op;
> + int insn_len;
> +};
> +
> +struct intel_pt_insn;
> +
> +struct intel_pt_buffer {
> + const unsigned char *buf;
> + size_t len;
> + bool consecutive;
> + uint64_t ref_timestamp;
> + uint64_t trace_nr;
> +};
> +
> +struct intel_pt_params {
> + int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
> + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
> + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
> + uint64_t max_insn_cnt, void *data);
> + void *data;
> + bool return_compression;
> + uint64_t period;
> + enum intel_pt_period_type period_type;
> +};
> +
> +struct intel_pt_decoder;
> +
> +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
> +void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
> +
> +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
> +
> +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
> + unsigned char *buf_b, size_t len_b,
> + bool have_tsc);
> +
> +const char *intel_pt_error_message(int code);
> +
> +#endif
> --
> 1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/