[PATCH bpf-next v3 5/5] error-injection: Support fault injection framework

From: Masami Hiramatsu
Date: Wed Jan 10 2018 - 05:19:37 EST


Support in-kernel fault-injection framework via debugfs.
This allows you to inject a conditional error to specified
function using debugfs interfaces.

Here is the result of test script described in
Documentation/fault-injection/fault-injection.txt

===========
# ./test_fail_function.sh
1+0 records in
1+0 records out
1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0227404 s, 46.1 MB/s
btrfs-progs v4.4
See http://btrfs.wiki.kernel.org for more information.

Label: (null)
UUID: bfa96010-12e9-4360-aed0-42eec7af5798
Node size: 16384
Sector size: 4096
Filesystem size: 1001.00MiB
Block group profiles:
Data: single 8.00MiB
Metadata: DUP 58.00MiB
System: DUP 12.00MiB
SSD detected: no
Incompat features: extref, skinny-metadata
Number of devices: 1
Devices:
ID SIZE PATH
1 1001.00MiB /dev/loop2

mount: mount /dev/loop2 on /opt/tmpmnt failed: Cannot allocate memory
SUCCESS!
===========


Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
---
Changes in v3:
- Check and adjust error value for each target function
- Clear kporbe flag for reuse
- Add more documents and example
---
Documentation/fault-injection/fault-injection.txt | 62 ++++++
kernel/Makefile | 1
kernel/fail_function.c | 217 +++++++++++++++++++++
lib/Kconfig.debug | 10 +
4 files changed, 290 insertions(+)
create mode 100644 kernel/fail_function.c

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 918972babcd8..4aecbceef9d2 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -30,6 +30,12 @@ o fail_mmc_request
injects MMC data errors on devices permitted by setting
debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request

+o fail_function
+
+ injects error return on specific functions, which are marked by
+ ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
+ under /sys/kernel/debug/fail_function. No boot option supported.
+
Configure fault-injection capabilities behavior
-----------------------------------------------

@@ -123,6 +129,24 @@ configuration of fault-injection capabilities.
default is 'N', setting it to 'Y' will disable failure injections
when dealing with private (address space) futexes.

+- /sys/kernel/debug/fail_function/inject:
+
+ specifies the target function of error injection by name.
+
+- /sys/kernel/debug/fail_function/retval:
+
+ specifies the "error" return value to inject to the given
+ function.
+
+- /sys/kernel/debug/fail_function/injectable:
+
+ (read only) shows error injectable functions and what type of
+ error values can be specified. The error type will be one of
+ below;
+ - NULL: retval must be 0.
+ - ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
+ - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
+
o Boot option

In order to inject faults while debugfs is not available (early boot time),
@@ -268,6 +292,44 @@ trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
echo "Injecting errors into the module $module... (interrupt to stop)"
sleep 1000000

+------------------------------------------------------------------------------
+
+o Inject open_ctree error while btrfs mount
+
+#!/bin/bash
+
+rm -f testfile.img
+dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
+DEVICE=$(losetup --show -f testfile.img)
+mkfs.btrfs -f $DEVICE
+mkdir -p tmpmnt
+
+FAILTYPE=fail_function
+echo open_ctree > /sys/kernel/debug/$FAILTYPE/inject
+echo -12 > /sys/kernel/debug/$FAILTYPE/retval
+echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 100 > /sys/kernel/debug/$FAILTYPE/probability
+echo 0 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
+
+mount -t btrfs $DEVICE tmpmnt
+if [ $? -ne 0 ]
+then
+ echo "SUCCESS!"
+else
+ echo "FAILED!"
+ umount tmpmnt
+fi
+
+echo > /sys/kernel/debug/$FAILTYPE/inject
+
+rmdir tmpmnt
+losetup -d $DEVICE
+rm testfile.img
+
+
Tool to run command with failslab or fail_page_alloc
----------------------------------------------------
In order to make it easier to accomplish the tasks mentioned above, we can use
diff --git a/kernel/Makefile b/kernel/Makefile
index 172d151d429c..f85ae5dfa474 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_KCOV) += kcov.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
new file mode 100644
index 000000000000..d7a20c8f74d0
--- /dev/null
+++ b/kernel/fail_function.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fail_function.c: Function-based error injection
+ */
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
+
+static DEFINE_MUTEX(fei_lock);
+static struct {
+ struct kprobe kp;
+ unsigned long retval;
+ struct fault_attr attr;
+} fei_attr = {
+ .kp = { .pre_handler = fei_kprobe_handler, },
+ .retval = (unsigned long)-EINVAL,
+ .attr = FAULT_ATTR_INITIALIZER,
+};
+
+static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
+{
+ if (should_fail(&fei_attr.attr, 1)) {
+ regs_set_return_value(regs, fei_attr.retval);
+ override_function_with_return(regs);
+ /* Kprobe specific fixup */
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+
+ return 0;
+}
+NOKPROBE_SYMBOL(fei_kprobe_handler)
+
+static unsigned long adjust_error_retval(unsigned long addr, unsigned long retv)
+{
+ switch (get_injectable_error_type(addr)) {
+ case EI_ETYPE_NULL:
+ if (retv != 0)
+ return 0;
+ break;
+ case EI_ETYPE_ERRNO:
+ if (retv < (unsigned long)-MAX_ERRNO)
+ return (unsigned long)-EINVAL;
+ break;
+ case EI_ETYPE_ERRNO_NULL:
+ if (retv != 0 && retv < (unsigned long)-MAX_ERRNO)
+ return (unsigned long)-EINVAL;
+ break;
+ }
+
+ return retv;
+}
+
+static void *fei_seq_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&fei_lock);
+ return *pos == 0 ? (void *)1 : NULL;
+}
+
+static void fei_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&fei_lock);
+}
+
+static void *fei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return NULL;
+}
+
+static int fei_seq_show(struct seq_file *m, void *v)
+{
+ if (fei_attr.kp.addr)
+ seq_printf(m, "%pf\n", fei_attr.kp.addr);
+ else
+ seq_puts(m, "# not specified\n");
+ return 0;
+}
+
+static const struct seq_operations fei_seq_ops = {
+ .start = fei_seq_start,
+ .next = fei_seq_next,
+ .stop = fei_seq_stop,
+ .show = fei_seq_show,
+};
+
+static int fei_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &fei_seq_ops);
+}
+
+static ssize_t fei_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ unsigned long addr;
+ char *buf, *sym;
+ int ret;
+
+ /* cut off if it is too long */
+ if (count > KSYM_NAME_LEN)
+ count = KSYM_NAME_LEN;
+ buf = kmalloc(sizeof(char) * (count + 1), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, buffer, count)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ buf[count] = '\0';
+ sym = strstrip(buf);
+
+ if (strlen(sym) == 0 || sym[0] == '0') {
+ if (fei_attr.kp.addr) {
+ unregister_kprobe(&fei_attr.kp);
+ fei_attr.kp.addr = NULL;
+ fei_attr.kp.flags = 0;
+ }
+ ret = count;
+ goto out;
+ }
+
+ addr = kallsyms_lookup_name(sym);
+ if (!addr) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!within_error_injection_list(addr)) {
+ ret = -ERANGE;
+ goto out;
+ }
+
+ if (fei_attr.kp.addr) {
+ unregister_kprobe(&fei_attr.kp);
+ fei_attr.kp.addr = NULL;
+ }
+ fei_attr.kp.addr = (void *)addr;
+ fei_attr.retval = adjust_error_retval(addr, fei_attr.retval);
+ ret = register_kprobe(&fei_attr.kp);
+ if (ret < 0)
+ fei_attr.kp.addr = NULL;
+ else
+ ret = count;
+out:
+ kfree(buf);
+ return ret;
+}
+
+static const struct file_operations fei_ops = {
+ .open = fei_open,
+ .read = seq_read,
+ .write = fei_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int fei_retval_set(void *data, u64 val)
+{
+ unsigned long retv = (unsigned long)val;
+ int err = 0;
+
+ mutex_lock(&fei_lock);
+ if (fei_attr.kp.addr) {
+ if (adjust_error_retval((unsigned long)fei_attr.kp.addr,
+ val) != retv)
+ err = -EINVAL;
+ }
+ if (!err)
+ *(unsigned long *)data = val;
+ mutex_unlock(&fei_lock);
+
+ return err;
+}
+
+static int fei_retval_get(void *data, u64 *val)
+{
+ *val = *(unsigned long *)data;
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fei_err_ops, fei_retval_get, fei_retval_set, "%llx\n");
+
+static int __init fei_debugfs_init(void)
+{
+ struct dentry *dir;
+
+ dir = fault_create_debugfs_attr("fail_function", NULL,
+ &fei_attr.attr);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ /* injectable attribute is just a symlink of error_inject/list */
+ if (!debugfs_create_symlink("injectable", dir,
+ "../error_injection/list"))
+ goto error;
+
+ if (!debugfs_create_file("inject", 0600, dir, NULL, &fei_ops))
+ goto error;
+
+ if (!debugfs_create_file("retval", 0600, dir, &fei_attr.retval,
+ &fei_err_ops))
+ goto error;
+
+ return 0;
+error:
+ debugfs_remove_recursive(dir);
+ return -ENOMEM;
+}
+
+late_initcall(fei_debugfs_init);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2a33efdd1fea..890d4766cef3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1551,6 +1551,16 @@ config FAIL_FUTEX
help
Provide fault-injection capability for futexes.

+config FAIL_FUNCTION
+ bool "Fault-injection capability for functions"
+ depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
+ help
+ Provide function-based fault-injection capability.
+ This will allow you to override a specific function with a return
+ with given return value. As a result, function caller will see
+ an error value and have to handle it. This is useful to test the
+ error handling in various subsystems.
+
config FAULT_INJECTION_DEBUG_FS
bool "Debugfs entries for fault-injection capabilities"
depends on FAULT_INJECTION && SYSFS && DEBUG_FS