[RFC PATCH stable 5.4 7/8] bpf: rework the compat kernel probe handling

From: Jiri Olsa
Date: Mon May 22 2023 - 16:36:23 EST


From: Christoph Hellwig <hch@xxxxxx>

commit 8d92db5c04d10381f4db70ed99b1b576f5db18a7 upstream.

[Conflicts due to applying hunks only to the functions that
were taken in 6ae08ae3dea2 upstream commit backport earlier]

Instead of using the dangerous probe_kernel_read and strncpy_from_unsafe
helpers, rework the compat probes to check if an address is a kernel or
userspace one, and then use the low-level kernel or user probe helper
shared by the proper kernel and user probe helpers. This slightly
changes behavior as the compat probe on a user address doesn't check
the lockdown flags, just as the pure user probes do.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/20200521152301.2587579-14-hch@xxxxxx
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---
kernel/trace/bpf_trace.c | 93 +++++++++++++++++++++++++++-------------
1 file changed, 64 insertions(+), 29 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d1fd13a47bdf..a46256f99229 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -139,65 +139,99 @@ static const struct bpf_func_proto bpf_override_return_proto = {
#endif

static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr,
- const bool compat)
+bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
{
- int ret = security_locked_down(LOCKDOWN_BPF_READ);
+ int ret;

+ ret = probe_user_read(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
- goto out;
- ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
- probe_kernel_read_strict(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
-out:
memset(dst, 0, size);
return ret;
}

-BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
- const void *, unsafe_ptr)
+static __always_inline int
+bpf_probe_read_user_str_common(void *dst, u32 size,
+ const void __user *unsafe_ptr)
{
- return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
+ int ret;
+
+ ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
}

-static const struct bpf_func_proto bpf_probe_read_compat_proto = {
- .func = bpf_probe_read_compat,
- .gpl_only = true,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_UNINIT_MEM,
- .arg2_type = ARG_CONST_SIZE_OR_ZERO,
- .arg3_type = ARG_ANYTHING,
-};
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = security_locked_down(LOCKDOWN_BPF_READ);
+
+ if (unlikely(ret < 0))
+ goto fail;
+ ret = probe_kernel_read_strict(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ goto fail;
+ return ret;
+fail:
+ memset(dst, 0, size);
+ return ret;
+}

static __always_inline int
-bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr,
- const bool compat)
+bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
{
int ret = security_locked_down(LOCKDOWN_BPF_READ);

if (unlikely(ret < 0))
- goto out;
+ goto fail;
+
/*
- * The strncpy_from_unsafe_*() call will likely not fill the entire
- * buffer, but that's okay in this circumstance as we're probing
+ * The strncpy_from_kernel_nofault() call will likely not fill the
+ * entire buffer, but that's okay in this circumstance as we're probing
* arbitrary memory anyway similar to bpf_probe_read_*() and might
* as well probe the stack. Thus, memory is explicitly cleared
* only in error case, so that improper users ignoring return
* code altogether don't copy garbage; otherwise length of string
* is returned that can be used for bpf_perf_event_output() et al.
*/
- ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) :
- strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
+ ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
-out:
- memset(dst, 0, size);
+ goto fail;
+
+ return 0;
+fail:
+ memset(dst, 0, size);
return ret;
}

+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
+ const void *, unsafe_ptr)
+{
+ if ((unsigned long)unsafe_ptr < TASK_SIZE) {
+ return bpf_probe_read_user_common(dst, size,
+ (__force void __user *)unsafe_ptr);
+ }
+ return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
+}
+
+static const struct bpf_func_proto bpf_probe_read_compat_proto = {
+ .func = bpf_probe_read_compat,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+};
+
BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
const void *, unsafe_ptr)
{
- return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
+ if ((unsigned long)unsafe_ptr < TASK_SIZE) {
+ return bpf_probe_read_user_str_common(dst, size,
+ (__force void __user *)unsafe_ptr);
+ }
+ return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
}

static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
@@ -208,6 +242,7 @@ static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
+#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */

BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
u32, size)
--
2.40.1