Re: [PATCH] x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue

From: Borislav Petkov
Date: Tue Apr 28 2015 - 11:55:55 EST


On Mon, Apr 27, 2015 at 01:14:51PM -0700, H. Peter Anvin wrote:
> I did a microbenchmark in user space... let's see if I can find it.

How about the simple one below?

Provided it is correct, it shows that the 0x66-prefixed 3-byte NOPs are
better than the 0F 1F 00 suggested by the manual (Haha!):

$ taskset -c 3 ./nops
Running 600 times, 10000000 loops per run.
nop_0x90 average: 439.805220
nop_3_byte average: 442.412915

---
/*
* How to run:
*
* taskset -c <cpunum> argv0
*/
#include <stdio.h>
#include <sys/syscall.h>
#include <stdlib.h>
#include <unistd.h>

typedef unsigned long long u64;

#define DECLARE_ARGS(val, low, high) unsigned low, high
#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32))
#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)

static __always_inline unsigned long long rdtsc(void)
{
DECLARE_ARGS(val, low, high);

asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));

return EAX_EDX_VAL(val, low, high);
}

static inline u64 read_tsc(void)
{
u64 ret;

asm volatile("mfence");
ret = rdtsc();
asm volatile("mfence");

return ret;
}

static inline void nop_0x90(void)
{
asm volatile(
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"

".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"

".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
".byte 0x66, 0x66, 0x90\n\t"
);
}

static inline void nop_3_byte(void)
{
asm volatile(
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"

".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"

".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
".byte 0x0f, 0x1f, 0x00\n\t"
);
}

int main()
{
int i, j;
u64 p1, p2;
u64 r;
double avg, t;

#define TIMES 600
#define LOOPS 10000000ULL

printf("Running %d times, %lld loops per run.\n", TIMES, LOOPS);

avg = 0;

for (r = 0, j = 0; j < TIMES; j++) {
for (i = 0; i < LOOPS; i++) {
p1 = read_tsc();
nop_0x90();
p2 = read_tsc();

r += (p2 - p1);
}

t = (double)r / LOOPS;

// printf("NOP cycles: %lld, cycles/nop_0x90: %f\n", r, t);
avg += t;
r = 0;
}

printf("nop_0x90 average: %f\n", avg/TIMES);

avg = 0;

for (r = 0, j = 0; j < TIMES; j++) {
for (i = 0; i < LOOPS; i++) {
p1 = read_tsc();
nop_3_byte();
p2 = read_tsc();

r += (p2 - p1);
}

t = (double)r / LOOPS;

// printf("NOP cycles: %lld, cycles/nop_3_byte: %f\n", r, t);
avg += t;
r = 0;
}

printf("nop_3_byte average: %f\n", avg/TIMES);

return 0;
}

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/