Re: [RFC PATCH] bpf: Add helpers to read useful task_struct members

From: Tushar Dave
Date: Mon Nov 06 2017 - 19:17:02 EST




On 11/02/2017 11:58 PM, Sandipan Das wrote:
For added security, the layout of some structures can be
randomized by enabling CONFIG_GCC_PLUGIN_RANDSTRUCT. One
such structure is task_struct. To build BPF programs, we
use Clang which does not support this feature. So, if we
attempt to read a field of a structure with a randomized
layout within a BPF program, we do not get the expected
value because of incorrect offsets. To observe this, it
is not mandatory to have CONFIG_GCC_PLUGIN_RANDSTRUCT
enabled because the structure annotations/members added
for this purpose are enough to cause this. So, all kernel
builds are affected.

For example, considering samples/bpf/offwaketime_kern.c,
if we try to print the values of pid and comm inside the
task_struct passed to waker() by adding the following
lines of code at the appropriate place

char fmt[] = "waker(): p->pid = %u, p->comm = %s\n";
bpf_trace_printk(fmt, sizeof(fmt), _(p->pid), _(p->comm));

it is seen that upon rebuilding and running this sample
followed by inspecting /sys/kernel/debug/tracing/trace,
the output looks like the following

_-----=> irqs-off
/ _----=> need-resched
| / _---=> hardirq/softirq
|| / _--=> preempt-depth
||| / delay
TASK-PID CPU# |||| TIMESTAMP FUNCTION
| | | |||| | |
<idle>-0 [007] d.s. 1883.443594: 0x00000001: waker(): p->pid = 0, p->comm =
<idle>-0 [018] d.s. 1883.453588: 0x00000001: waker(): p->pid = 0, p->comm =
<idle>-0 [007] d.s. 1883.463584: 0x00000001: waker(): p->pid = 0, p->comm =
<idle>-0 [009] d.s. 1883.483586: 0x00000001: waker(): p->pid = 0, p->comm =
<idle>-0 [005] d.s. 1883.493583: 0x00000001: waker(): p->pid = 0, p->comm =
<idle>-0 [009] d.s. 1883.503583: 0x00000001: waker(): p->pid = 0, p->comm =
<idle>-0 [018] d.s. 1883.513578: 0x00000001: waker(): p->pid = 0, p->comm =
systemd-journal-3140 [003] d... 1883.627660: 0x00000001: waker(): p->pid = 0, p->comm =
systemd-journal-3140 [003] d... 1883.627704: 0x00000001: waker(): p->pid = 0, p->comm =
systemd-journal-3140 [003] d... 1883.627723: 0x00000001: waker(): p->pid = 0, p->comm =

To avoid this, we add new BPF helpers that read the
correct values for some of the important task_struct
members such as pid, tgid, comm and flags which are
extensively used in BPF-based analysis tools such as
bcc. Since these helpers are built with GCC, they use
the correct offsets when referencing a member.
Just to add that we were seeing the same issue (but had no clue until looked at this patch , thanks). Its easy to reproduce by running bcc example task_switch.py where pid (prev_pid) is retrieved from struct task_struct and that is always zero. we tried printing other task_struct members such as 'comm' and see that as empty string as well.


-Tushar

Signed-off-by: Sandipan Das <sandipan@xxxxxxxxxxxxxxxxxx>
---
include/linux/bpf.h | 3 ++
include/uapi/linux/bpf.h | 13 ++++++
kernel/bpf/core.c | 3 ++
kernel/bpf/helpers.c | 75 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 6 +++
tools/testing/selftests/bpf/bpf_helpers.h | 9 ++++
6 files changed, 109 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f1af7d63d678..5993a0f5262b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -418,6 +418,9 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
+extern const struct bpf_func_proto bpf_get_task_pid_tgid_proto;
+extern const struct bpf_func_proto bpf_get_task_comm_proto;
+extern const struct bpf_func_proto bpf_get_task_flags_proto;
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f90860d1f897..324508d27bd2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -338,6 +338,16 @@ union bpf_attr {
* @skb: pointer to skb
* Return: classid if != 0
*
+ * u64 bpf_get_task_pid_tgid(struct task_struct *task)
+ * Return: task->tgid << 32 | task->pid
+ *
+ * int bpf_get_task_comm(struct task_struct *task)
+ * Stores task->comm into buf
+ * Return: 0 on success or negative error
+ *
+ * u32 bpf_get_task_flags(struct task_struct *task)
+ * Return: task->flags
+ *
* int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
* Return: 0 on success or negative error
*
@@ -602,6 +612,9 @@ union bpf_attr {
FN(get_current_uid_gid), \
FN(get_current_comm), \
FN(get_cgroup_classid), \
+ FN(get_task_pid_tgid), \
+ FN(get_task_comm), \
+ FN(get_task_flags), \
FN(skb_vlan_push), \
FN(skb_vlan_pop), \
FN(skb_get_tunnel_key), \
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7b62df86be1d..c69c17d6514a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1438,6 +1438,9 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
+const struct bpf_func_proto bpf_get_task_pid_tgid_proto __weak;
+const struct bpf_func_proto bpf_get_task_comm_proto __weak;
+const struct bpf_func_proto bpf_get_task_flags_proto __weak;
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3d24e238221e..f45259dce117 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -179,3 +179,78 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE,
};
+
+BPF_CALL_1(bpf_get_task_pid_tgid, struct task_struct *, task)
+{
+ int ret;
+ u32 pid, tgid;
+
+ ret = probe_kernel_read(&pid, &task->pid, sizeof(pid));
+ if (unlikely(ret < 0))
+ goto err;
+
+ ret = probe_kernel_read(&tgid, &task->tgid, sizeof(tgid));
+ if (unlikely(ret < 0))
+ goto err;
+
+ return (u64) tgid << 32 | pid;
+err:
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_task_pid_tgid_proto = {
+ .func = bpf_get_task_pid_tgid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_get_task_comm, struct task_struct *, task, char *, buf, u32, size)
+{
+ int ret;
+ char comm[TASK_COMM_LEN];
+
+ ret = probe_kernel_read(comm, task->comm, sizeof(comm));
+ if (unlikely(ret < 0))
+ goto err_clear;
+
+ strncpy(buf, comm, size);
+
+ /* Verifier guarantees that size > 0. For task->comm exceeding
+ * size, guarantee that buf is %NUL-terminated. Unconditionally
+ * done here to save the size test.
+ */
+ buf[size - 1] = 0;
+ return 0;
+err_clear:
+ memset(buf, 0, size);
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_task_comm_proto = {
+ .func = bpf_get_task_comm,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_1(bpf_get_task_flags, struct task_struct *, task)
+{
+ int ret;
+ unsigned int flags;
+
+ ret = probe_kernel_read(&flags, &task->flags, sizeof(flags));
+ if (unlikely(ret < 0))
+ return -EINVAL;
+
+ return flags;
+}
+
+const struct bpf_func_proto bpf_get_task_flags_proto = {
+ .func = bpf_get_task_flags,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index dc498b605d5d..a31f5cf68cbc 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -477,6 +477,12 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_get_numa_node_id:
return &bpf_get_numa_node_id_proto;
+ case BPF_FUNC_get_task_pid_tgid:
+ return &bpf_get_task_pid_tgid_proto;
+ case BPF_FUNC_get_task_comm:
+ return &bpf_get_task_comm_proto;
+ case BPF_FUNC_get_task_flags:
+ return &bpf_get_task_flags_proto;
case BPF_FUNC_perf_event_read:
return &bpf_perf_event_read_proto;
case BPF_FUNC_probe_write_user:
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index b2e02bdcd098..8c64df027d2c 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -1,6 +1,8 @@
#ifndef __BPF_HELPERS_H
#define __BPF_HELPERS_H
+struct task_struct;
+
/* helper macro to place programs, maps, license in
* different sections in elf_bpf file. Section names
* are interpreted by elf_bpf loader
@@ -31,6 +33,13 @@ static unsigned long long (*bpf_get_current_uid_gid)(void) =
(void *) BPF_FUNC_get_current_uid_gid;
static int (*bpf_get_current_comm)(void *buf, int buf_size) =
(void *) BPF_FUNC_get_current_comm;
+static unsigned long long (*bpf_get_task_pid_tgid)(struct task_struct *task) =
+ (void *) BPF_FUNC_get_task_pid_tgid;
+static int (*bpf_get_task_comm)(struct task_struct *task,
+ void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_task_comm;
+static unsigned int (*bpf_get_task_flags)(struct task_struct *task) =
+ (void *) BPF_FUNC_get_task_flags;
static unsigned long long (*bpf_perf_event_read)(void *map,
unsigned long long flags) =
(void *) BPF_FUNC_perf_event_read;