How to properly fix reading user pointers in bpf in android kernel 4.9?

From: Marcel
Date: Thu May 23 2024 - 12:52:41 EST


This seems that it was a long standing problem with the Linux kernel in general. bpf_probe_read should have worked for both kernel and user pointers but it fails with access error when reading an user one instead.

I know there's a patch upstream that fixes this by introducing new helpers for reading kernel and userspace pointers and I tried to back port them back to my kernel but with no success. Tools like bcc fail to use them and instead they report that the arguments sent to the helpers are invalid. I assume this is due to the arguments ARG_CONST_STACK_SIZE and ARG_PTR_TO_RAW_STACK handle data different in the 4.9 android version and the upstream version but I'm not sure that this is the cause. I left the patch I did below and with a link to the kernel I'm working on and maybe someone can take a look and give me an hand (the patch isn't applied yet)

<https://github.com/nitanmarcel/android_kernel_oneplus_sdm845-bpf>

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 744b4763b80e..de94c13b7193 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -559,6 +559,43 @@ enum bpf_func_id {
*/
BPF_FUNC_probe_read_user,

+ /**
+ * int bpf_probe_read_kernel(void *dst, int size, void *src)
+ * Read a kernel pointer safely.
+ * Return: 0 on success or negative error
+ */
+ BPF_FUNC_probe_read_kernel,
+
+ /**
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Copy a NUL terminated string from user unsafe address. In case the string
+ * length is smaller than size, the target is not padded with further NUL
+ * bytes. In case the string length is larger than size, just count-1
+ * bytes are copied and the last byte is set to NUL.
+ * @dst: destination address
+ * @size: maximum number of bytes to copy, including the trailing NUL
+ * @unsafe_ptr: unsafe address
+ * Return:
+ * > 0 length of the string including the trailing NUL on success
+ * < 0 error
+ */
+ BPF_FUNC_probe_read_user_str,
+
+ /**
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Copy a NUL terminated string from unsafe address. In case the string
+ * length is smaller than size, the target is not padded with further NUL
+ * bytes. In case the string length is larger than size, just count-1
+ * bytes are copied and the last byte is set to NUL.
+ * @dst: destination address
+ * @size: maximum number of bytes to copy, including the trailing NUL
+ * @unsafe_ptr: unsafe address
+ * Return:
+ * > 0 length of the string including the trailing NUL on success
+ * < 0 error
+ */
+ BPF_FUNC_probe_read_kernel_str,
+
__BPF_FUNC_MAX_ID,
};

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a1e37a5d8c88..3478ca744a45 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -94,7 +94,7 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING,
};

-BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, const void *, unsafe_ptr)
+BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, const void __user *, unsafe_ptr)
{
int ret;

@@ -115,6 +115,27 @@ static const struct bpf_func_proto bpf_probe_read_user_proto = {
};


+BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, const void *, unsafe_ptr)
+{
+ int ret;
+
+ ret = probe_kernel_read(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
+ .func = bpf_probe_read_kernel,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_RAW_STACK,
+ .arg2_type = ARG_CONST_STACK_SIZE,
+ .arg3_type = ARG_ANYTHING,
+};
+
+
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
u32, size)
{
@@ -487,6 +508,69 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
.arg3_type = ARG_ANYTHING,
};

+
+
+BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
+ const void __user *, unsafe_ptr)
+{
+ int ret;
+
+ /*
+ * The strncpy_from_unsafe() call will likely not fill the entire
+ * buffer, but that's okay in this circumstance as we're probing
+ * arbitrary memory anyway similar to bpf_probe_read() and might
+ * as well probe the stack. Thus, memory is explicitly cleared
+ * only in error case, so that improper users ignoring return
+ * code altogether don't copy garbage; otherwise length of string
+ * is returned that can be used for bpf_perf_event_output() et al.
+ */
+ ret = strncpy_from_unsafe_user(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
+ .func = bpf_probe_read_user_str,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_RAW_STACK,
+ .arg2_type = ARG_CONST_STACK_SIZE,
+ .arg3_type = ARG_ANYTHING,
+};
+
+
+BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
+ const void *, unsafe_ptr)
+{
+ int ret;
+
+ /*
+ * The strncpy_from_unsafe() call will likely not fill the entire
+ * buffer, but that's okay in this circumstance as we're probing
+ * arbitrary memory anyway similar to bpf_probe_read() and might
+ * as well probe the stack. Thus, memory is explicitly cleared
+ * only in error case, so that improper users ignoring return
+ * code altogether don't copy garbage; otherwise length of string
+ * is returned that can be used for bpf_perf_event_output() et al.
+ */
+ ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
+ .func = bpf_probe_read_kernel_str,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_RAW_STACK,
+ .arg2_type = ARG_CONST_STACK_SIZE,
+ .arg3_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -500,8 +584,14 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_probe_read_proto;
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
+ case BPF_FUNC_probe_read_kernel:
+ return &bpf_probe_read_kernel_proto;
case BPF_FUNC_probe_read_str:
return &bpf_probe_read_str_proto;
+ case BPF_FUNC_probe_read_user_str:
+ return &bpf_probe_read_user_str_proto;
+ case BPF_FUNC_probe_read_kernel_str:
+ return &bpf_probe_read_kernel_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_tail_call:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 155ce25c069d..91d5691288a7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -522,7 +522,44 @@ enum bpf_func_id {
* Return: 0 on success or negative error
*/
BPF_FUNC_probe_read_user,
+
+ /**
+ * int bpf_probe_read_kernel(void *dst, int size, void *src)
+ * Read a kernel pointer safely.
+ * Return: 0 on success or negative error
+ */
+ BPF_FUNC_probe_read_kernel,

+ /**
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Copy a NUL terminated string from user unsafe address. In case the string
+ * length is smaller than size, the target is not padded with further NUL
+ * bytes. In case the string length is larger than size, just count-1
+ * bytes are copied and the last byte is set to NUL.
+ * @dst: destination address
+ * @size: maximum number of bytes to copy, including the trailing NUL
+ * @unsafe_ptr: unsafe address
+ * Return:
+ * > 0 length of the string including the trailing NUL on success
+ * < 0 error
+ */
+ BPF_FUNC_probe_read_user_str,
+
+ /**
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Copy a NUL terminated string from unsafe address. In case the string
+ * length is smaller than size, the target is not padded with further NUL
+ * bytes. In case the string length is larger than size, just count-1
+ * bytes are copied and the last byte is set to NUL.
+ * @dst: destination address
+ * @size: maximum number of bytes to copy, including the trailing NUL
+ * @unsafe_ptr: unsafe address
+ * Return:
+ * > 0 length of the string including the trailing NUL on success
+ * < 0 error
+ */
+ BPF_FUNC_probe_read_kernel_str,
+
__BPF_FUNC_MAX_ID,
};