[v2, 1/4] Build target for emulate.o as a userspace binary

From: Sam Caccavale
Date: Wed Jun 12 2019 - 11:41:34 EST


This commit contains the minimal set of functionality to build
afl-harness around arch/x86/emulate.c which allows exercising code
in that source file, like x86_emulate_insn. Resolving the
dependencies was done via GCC's -H flag by get_headers.py.

CR: https://code.amazon.com/reviews/CR-8325546
---
tools/Makefile | 9 ++
tools/fuzz/x86ie/.gitignore | 2 +
tools/fuzz/x86ie/Makefile | 51 +++++++++++
tools/fuzz/x86ie/README.md | 12 +++
tools/fuzz/x86ie/afl-harness.c | 149 ++++++++++++++++++++++++++++++++
tools/fuzz/x86ie/common.h | 87 +++++++++++++++++++
tools/fuzz/x86ie/emulator_ops.c | 58 +++++++++++++
tools/fuzz/x86ie/emulator_ops.h | 117 +++++++++++++++++++++++++
tools/fuzz/x86ie/stubs.c | 56 ++++++++++++
tools/fuzz/x86ie/stubs.h | 52 +++++++++++
10 files changed, 593 insertions(+)
create mode 100644 tools/fuzz/x86ie/.gitignore
create mode 100644 tools/fuzz/x86ie/Makefile
create mode 100644 tools/fuzz/x86ie/README.md
create mode 100644 tools/fuzz/x86ie/afl-harness.c
create mode 100644 tools/fuzz/x86ie/common.h
create mode 100644 tools/fuzz/x86ie/emulator_ops.c
create mode 100644 tools/fuzz/x86ie/emulator_ops.h
create mode 100644 tools/fuzz/x86ie/stubs.c
create mode 100644 tools/fuzz/x86ie/stubs.h

diff --git a/tools/Makefile b/tools/Makefile
index 3dfd72ae6c1a..4d68817b7e49 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -94,6 +94,12 @@ freefall: FORCE
kvm_stat: FORCE
$(call descend,kvm/$@)

+fuzz: FORCE
+ $(call descend,fuzz/x86_instruction_emulation)
+
+fuzz_deps: FORCE
+ $(call descend,fuzz/x86_instruction_emulation,fuzz_deps)
+
all: acpi cgroup cpupower gpio hv firewire liblockdep \
perf selftests spi turbostat usb \
virtio vm bpf x86_energy_perf_policy \
@@ -171,6 +177,9 @@ tmon_clean:
freefall_clean:
$(call descend,laptop/freefall,clean)

+fuzz_clean:
+ $(call descend,fuzz/x86_instruction_emulation,clean)
+
build_clean:
$(call descend,build,clean)

diff --git a/tools/fuzz/x86ie/.gitignore b/tools/fuzz/x86ie/.gitignore
new file mode 100644
index 000000000000..7d44f7ce266e
--- /dev/null
+++ b/tools/fuzz/x86ie/.gitignore
@@ -0,0 +1,2 @@
+*.o
+*-harness
diff --git a/tools/fuzz/x86ie/Makefile b/tools/fuzz/x86ie/Makefile
new file mode 100644
index 000000000000..d45fe6d266b9
--- /dev/null
+++ b/tools/fuzz/x86ie/Makefile
@@ -0,0 +1,51 @@
+ROOT_DIR=../../..
+THIS_DIR=tools/fuzz/x86_instruction_emulation
+
+include ../../scripts/Makefile.include
+
+.DEFAULT_GOAL := all
+
+INCLUDES := $(patsubst -I./%,-I./$(ROOT_DIR)/%, $(LINUXINCLUDE))
+INCLUDES := $(patsubst ./include/%,./$(ROOT_DIR)/include/%, $(INCLUDES))
+INCLUDES += -include ./$(ROOT_DIR)/include/linux/compiler_types.h
+
+$(ROOT_DIR)/.config:
+ make -C $(ROOT_DIR) menuconfig
+ sed -i -r 's/^#? *CONFIG_KVM(.*)=.*/CONFIG_KVM\1=y/' $(ROOT_DIR)/.config
+
+
+KBUILD_CFLAGS += -fsanitize=address
+BAD_FLAGS := -mcmodel=kernel # Causes all kinds of errors in a userspace bin
+BAD_FLAGS += -mpreferred-stack-boundary=3 # Similar to ^, breaks ubuntu 16 too
+BAD_FLAGS += -mno-sse # stdlibs use sse, would like to have it
+
+ifdef DEBUG
+KBUILD_CFLAGS += -DDEBUG
+KBUILD_CFLAGS += -O0
+BAD_FLAGS += -O3
+BAD_FLAGS += -O2
+BAD_FLAGS += -O1
+endif
+
+KBUILD_CFLAGS := $(filter-out $(BAD_FLAGS),$(KBUILD_CFLAGS))
+
+KERNEL_OBJS := arch/x86/kvm/emulate.o \
+ arch/x86/lib/retpoline.o \
+ lib/find_bit.o
+KERNEL_OBJS := $(patsubst %,$(ROOT_DIR)/%, $(KERNEL_OBJS))
+# $(KERNEL_OBJS): $(ROOT_DIR)/.config
+# $(error Run `./tools/fuzz/x86_instruction_emulation/scripts/make_deps' first (setting envvar CC if desired).)
+
+DEPS := emulator_ops.h stubs.h common.h
+%.o: %.c $(DEPS)
+ $(CC) $(KBUILD_CFLAGS) $(INCLUDES) -g -c -o $@ $<
+
+LOCAL_OBJS := emulator_ops.o stubs.o
+afl-harness: afl-harness.o $(LOCAL_OBJS) $(KERNEL_OBJS)
+ @$(CC) -v $(KBUILD_CFLAGS) $(LOCAL_OBJS) $(KERNEL_OBJS) $< $(INCLUDES) -Istubs.h -o $@ -no-pie
+
+all: afl-harness
+
+.PHONY: clean
+clean:
+ $(RM) -r *.o afl-harness
diff --git a/tools/fuzz/x86ie/README.md b/tools/fuzz/x86ie/README.md
new file mode 100644
index 000000000000..a78ac51b0152
--- /dev/null
+++ b/tools/fuzz/x86ie/README.md
@@ -0,0 +1,12 @@
+# Building
+
+From the root of linux, run:
+1. `./tools/fuzz/x86_instruction_emulation/scripts/make_deps`
+ - Optionally, set the envvar CC with your desired compiler.
+2. `make tools/fuzz`
+
+## TODO
+
+I'd like to add the object files built in `make_deps` as real
+dependencies of `make tools/fuzz` but it causes a cycle in Make to
+add them to `tools/fuzz/x86_instruction_emulation/Makefile`.
\ No newline at end of file
diff --git a/tools/fuzz/x86ie/afl-harness.c b/tools/fuzz/x86ie/afl-harness.c
new file mode 100644
index 000000000000..b3b09d7f15f2
--- /dev/null
+++ b/tools/fuzz/x86ie/afl-harness.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86 Instruction Emulation Fuzzing Wrapper
+ *
+ * Authors:
+ * Sam Caccavale <samcacc@xxxxxxxxx>
+ *
+ * Supporting code from xen:
+ * xen/tools/fuzz/x86_instruction_emulation/afl-harness.c
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * From: xen/master f68f35fd2016e36ee30f8b3e7dfd46c554407ac1
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include "emulator_ops.h"
+
+/* Arbitrary, but limiting fuzz input size is important. */
+#define MAX_INPUT_SIZE 4096
+#define INSTRUCTION_BYTES (MAX_INPUT_SIZE - MIN_INPUT_SIZE)
+
+int main(int argc, char **argv)
+{
+ size_t size;
+ FILE *fp = NULL;
+ int max, count;
+ struct state *state;
+
+ setbuf(stdin, NULL);
+ setbuf(stdout, NULL);
+
+ while (1) {
+ enum { OPT_INPUT_SIZE,
+ };
+ static const struct option lopts[] = {
+ { "input-size", no_argument, NULL, OPT_INPUT_SIZE },
+ { 0, 0, 0, 0 }
+ };
+ int c = getopt_long_only(argc, argv, "", lopts, NULL);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case OPT_INPUT_SIZE:
+ printf("Min: %u\n", MIN_INPUT_SIZE);
+ printf("Max: %u\n", MAX_INPUT_SIZE);
+ exit(0);
+ break;
+
+ case '?':
+ printf("Usage: %s $FILE [$FILE...] | [--input-size]\n",
+ argv[0]);
+ exit(-1);
+ break;
+
+ default:
+ printf("Bad getopt return %d (%c)\n", c, c);
+ exit(-1);
+ break;
+ }
+ }
+
+ max = argc - optind;
+
+ if (!max) { /* No positional parameters. Use stdin. */
+ max = 1;
+ fp = stdin;
+ }
+
+ state = create_emulator();
+ state->data = malloc(INSTRUCTION_BYTES);
+
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+ __AFL_INIT();
+
+ /*
+ * This is the number of times AFL's forkserver should reuse a
+ * process to fuzz the target. 1000 is the recommended starting
+ * point. Future tweaking may or may not yeild better results.
+ */
+ for (count = 0; __AFL_LOOP(1000);)
+#else
+ for (count = 0; count < max; count++)
+#endif
+ {
+ if (fp != stdin) { /* If not stdin, open the provided file. */
+ printf("Opening file %s\n", argv[optind + count]);
+ fp = fopen(argv[optind + count], "rb");
+ if (fp == NULL) {
+ perror("fopen");
+ exit(-1);
+ }
+ }
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+ else {
+ /*
+ * This will ensure we're dealing with a clean stream
+ * state after the afl-fuzz process messes with the
+ * open file handle.
+ */
+ fseek(fp, 0, SEEK_SET);
+ }
+#endif
+ reset_emulator(state);
+
+ size = fread(state, 1, MIN_INPUT_SIZE, fp);
+ if (size != MIN_INPUT_SIZE) {
+ printf("Input does not populate state\n");
+ if (max == 1)
+ exit(-1);
+ }
+
+ size = fread(state->data, 1, INSTRUCTION_BYTES, fp);
+ state->data_available = size;
+
+ if (ferror(fp)) {
+ perror("fread");
+ exit(-1);
+ }
+
+ /* Only run the test if the input file was < than INPUT_SIZE */
+ if (feof(fp)) {
+ initialize_emulator(state);
+ emulate_until_complete(state);
+ } else {
+ printf("Input too large\n");
+ /* Don't exit if we're doing batch processing */
+ if (max == 1)
+ exit(-1);
+ }
+
+ if (fp != stdin) {
+ fclose(fp);
+ fp = NULL;
+ }
+ }
+
+ free(state->data);
+ free_emulator(state);
+ return 0;
+}
diff --git a/tools/fuzz/x86ie/common.h b/tools/fuzz/x86ie/common.h
new file mode 100644
index 000000000000..9aec2bea3d50
--- /dev/null
+++ b/tools/fuzz/x86ie/common.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <stdint.h>
+
+#define PRIx64 "llx"
+#define MSR_INDEX_MAX 16
+
+#ifdef DEBUG
+#define DEBUG_PRINT 1
+#else
+#define DEBUG_PRINT 0
+#endif
+
+#define debug(...) \
+ do { \
+ if (DEBUG_PRINT) \
+ fprintf(stderr, __VA_ARGS__); \
+ } while (0)
+
+enum x86_segment {
+ /* General purpose. */
+ x86_seg_es,
+ x86_seg_cs,
+ x86_seg_ss,
+ x86_seg_ds,
+ x86_seg_fs,
+ x86_seg_gs,
+ /* System: Valid to use for implicit table references. */
+ x86_seg_tr,
+ x86_seg_ldtr,
+ x86_seg_gdtr,
+ x86_seg_idtr,
+ /* No Segment: For accesses which are already linear. */
+ x86_seg_none
+};
+
+#define NR_SEG x86_seg_none
+
+struct segment_register {
+ uint16_t sel;
+ union {
+ uint16_t attr;
+ struct {
+ uint16_t type : 4;
+ uint16_t s : 1;
+ uint16_t dpl : 2;
+ uint16_t p : 1;
+ uint16_t avl : 1;
+ uint16_t l : 1;
+ uint16_t db : 1;
+ uint16_t g : 1;
+ uint16_t pad : 4;
+ };
+ };
+ uint32_t limit;
+ uint64_t base;
+};
+
+enum user_regs {
+ REGS_RAX,
+ REGS_RCX,
+ REGS_RDX,
+ REGS_RBX,
+ REGS_RSP,
+ REGS_RBP,
+ REGS_RSI,
+ REGS_RDI,
+ REGS_R8,
+ REGS_R9,
+ REGS_R10,
+ REGS_R11,
+ REGS_R12,
+ REGS_R13,
+ REGS_R14,
+ REGS_R15,
+ NR_REGS
+};
+#define NR_VCPU_REGS NR_REGS
+
+static inline void print_n_bytes(unsigned char *bytes, size_t n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ fprintf(stderr, " %02x", bytes[i]);
+ fprintf(stderr, "\n");
+}
diff --git a/tools/fuzz/x86ie/emulator_ops.c b/tools/fuzz/x86ie/emulator_ops.c
new file mode 100644
index 000000000000..55ae4e8fbd96
--- /dev/null
+++ b/tools/fuzz/x86ie/emulator_ops.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86 Instruction Emulation Fuzzing Wrapper
+ *
+ * Authors:
+ * Sam Caccavale <samcacc@xxxxxxxxx>
+ *
+ * Supporting code from xen:
+ * xen/tools/fuzz/x86_instruction_emulation/fuzz_emul.c
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * From: xen/master f68f35fd2016e36ee30f8b3e7dfd46c554407ac1
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include "stubs.h"
+#include "emulator_ops.h"
+
+#include <linux/compiler_attributes.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/processor-flags.h>
+#include <asm/user_64.h>
+#include <asm/kvm.h>
+
+void initialize_emulator(struct state *state)
+{
+}
+
+int step_emulator(struct state *state)
+{
+ return 0;
+}
+
+int emulate_until_complete(struct state *state)
+{
+ return 0;
+}
+
+struct state *create_emulator(void)
+{
+ struct state *state = malloc(sizeof(struct state));
+ return state;
+}
+
+void reset_emulator(struct state *state)
+{
+}
+
+void free_emulator(struct state *state)
+{
+}
diff --git a/tools/fuzz/x86ie/emulator_ops.h b/tools/fuzz/x86ie/emulator_ops.h
new file mode 100644
index 000000000000..5ae072d5f205
--- /dev/null
+++ b/tools/fuzz/x86ie/emulator_ops.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef EMULATOR_OPS_H
+#define EMULATOR_OPS_H
+
+#include "common.h"
+#include "stubs.h"
+#include <asm/kvm_emulate.h>
+
+struct vcpu {
+ unsigned long cr[5];
+ uint64_t msr[MSR_INDEX_MAX];
+ uint64_t regs[NR_REGS];
+ uint64_t rflags;
+ struct segment_register segments[NR_SEG];
+};
+
+/*
+ * Internal state of the emulate harness. Calculated initially from the input
+ * corpus, and later mutated by the emulation callbacks.
+ */
+struct state {
+ /* Bitmask which enables/disables hooks. */
+ unsigned long options;
+
+ /* Internal representation of emulated CPU. */
+ struct vcpu vcpu;
+
+ /*
+ * Input bytes are consumed at s.data[eip + s.other_bytes_consumed]
+ * while eip + size_requested + other_bytes_consumed < data_available
+ *
+ * emul_fetch consumes bytes for use as x86 instructions as eip grows
+ *
+ * get_bytes_and_increment consumes bytes and increments
+ * other_bytes_consumed. These bytes can be used as return values for
+ * memory reads, random chances to fail, or other purposes.
+ *
+ * Only these two functions should be used to access this data.
+ *
+ * This causes .data to be an interspliced source of instructions and
+ * other data. Xen's instruction emulation does this to provide
+ * deterministic randomness on fuzz runs at the cost of complexity to
+ * crash output.
+ *
+ * Other alternatives (two separate streams, getting ins bytes from
+ * low, and random bytes from high, etc) yield byte streams which may
+ * not bear an as close correlation with AFL's input. This was chosen
+ * since the only drawback to this approach is remedied by simple
+ * bookkeeping of instruction bytes when debugging crashes. This is
+ * enabled by the TODO flag.
+ */
+ unsigned char *data;
+
+ /* Real amount of data backing state->data[]. */
+ size_t data_available;
+
+ /*
+ * Amount of bytes consumed for purposes other than instructions.
+ * E.G. whether a memory access should fault.
+ */
+ size_t other_bytes_consumed;
+
+ /* Emulation context */
+ struct x86_emulate_ctxt ctxt;
+};
+
+#define MIN_INPUT_SIZE (offsetof(struct state, data))
+
+#define container_of(ptr, type, member) \
+ ({ \
+ const typeof(((type *)0)->member) * __mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); \
+ })
+
+#define get_state(h) container_of(h, struct state, ctxt)
+
+void buffer_stderr(void) __attribute__((constructor));
+
+/*
+ * Allocates space for, and creates a `struct state`. The user should set
+ * state->data to their instruction stream and do any modification of the
+ * state-vpcu if desired.
+ */
+extern struct state *create_emulator(void);
+
+/*
+ * memset's all fields of state except data and data_available.
+ */
+extern void reset_emulator(struct state *state);
+
+/*
+ * free_emulator does not free the state->data member, the user should free
+ * it before freeing the emulator. The alternative implementation either
+ * restrains data to a fixed size or has the user pass in a (pointer, size)
+ * pair which would have to be copied. Copying this would be slow to fuzz.
+ */
+extern void free_emulator(struct state *state);
+
+/*
+ * Uses the state->option field to disable certain functionality and
+ * initializes the state->ctxt to a valid state. This is optional if
+ * intentionally testing the emulator in an invalid state.
+ */
+extern void initialize_emulator(struct state *state);
+
+/*
+ * Advances the emulator one instruction and handles any exceptions.
+ */
+extern int step_emulator(struct state *state);
+
+/*
+ * Steps the emulator until no instructions remain or something fails.
+ */
+extern int emulate_until_complete(struct state *state);
+
+#endif /* ifdef EMULATOR_OPS_H */
diff --git a/tools/fuzz/x86ie/stubs.c b/tools/fuzz/x86ie/stubs.c
new file mode 100644
index 000000000000..d2ffeb3ec599
--- /dev/null
+++ b/tools/fuzz/x86ie/stubs.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * These functions/symbols are required to build emulate.o but belong in
+ * linux tree files with include many other headers/unnecessary symbols
+ * and cause building emulate.o to become far more complicated than just
+ * stubbing them out here. In order to stub them without modifying the
+ * included source, we're declaring many of them here.
+ */
+#include <linux/types.h>
+
+#include "stubs.h"
+
+#define VMWARE_BACKDOOR_PMC_HOST_TSC 0x10000
+#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
+#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
+
+/*
+ * This is easy enough to stub out its full functionality.
+ */
+bool is_vmware_backdoor_pmc(u32 pmc_idx)
+{
+ switch (pmc_idx) {
+ case VMWARE_BACKDOOR_PMC_HOST_TSC:
+ case VMWARE_BACKDOOR_PMC_REAL_TIME:
+ case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Printk has no side effects so we don't need to worry about it.
+ */
+int printk(const char *s, ...)
+{
+ return 0;
+}
+
+/*
+ * This is required by source included from emulate.c and would be linked in.
+ */
+bool enable_vmware_backdoor;
+
+struct exception_table_entry;
+struct pt_regs;
+
+/*
+ * TODO: we likely need to implement this to handle emulated exceptions.
+ */
+bool ex_handler_default(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr)
+{
+ return true;
+}
diff --git a/tools/fuzz/x86ie/stubs.h b/tools/fuzz/x86ie/stubs.h
new file mode 100644
index 000000000000..02c2f6f9bc26
--- /dev/null
+++ b/tools/fuzz/x86ie/stubs.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef STUBS_H
+#define STUBS_H
+
+
+/*
+ * Several typedefs in linux/types.h collide with ones in standard libs.
+ *
+ * kvm_emulate.h uses many other types in linux/types.h heavily, so we must
+ * include it but having access to the standard libs is also important for the
+ * harnesses.
+ *
+ * A solution would be to not include any kernel files in the harness but then
+ * useful introspection into state->ctxt is impossible as x86_emulate_ctxt is
+ * defined in kvm_emulate.h and uses a ton of linux/types.h types.
+ *
+ * Instead, we use dummy defines to avoid the collision and linux seems
+ * content to use the equivalent types from the standard libs.
+ */
+#define fd_set fake_fd_set
+#define dev_t fake_dev_t
+#define nlink_t fake_nlink_t
+#define timer_t fake_timer_t
+#define loff_t fake_loff_t
+#define u_int64_t fake_u_int64_t
+#define int64_t fake_int64_t
+#define blkcnt_t fake_blkcnt_t
+#define uint64_t fake_uint64_t
+#include <linux/types.h>
+#undef fd_set
+#undef dev_t
+#undef nlint_t
+#undef timer_t
+#undef loff_t
+#undef u_int64_t
+#undef int64_t
+#undef blkcnt_t
+#undef uint64_t
+
+/*
+ * These are identically defined in string.h and included kernel headers.
+ */
+#ifdef __always_inline
+#undef __always_inline
+#endif
+
+#ifdef __attribute_const__
+#undef __attribute_const__
+#endif
+
+#endif /* STUBS_H */
--
2.17.1




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Ralf Herbrich
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879