[PATCH bpf-next 1/6] bpf: introduce BPF_PROG_TYPE_FILE_FILTER

From: Alexei Starovoitov
Date: Wed Oct 03 2018 - 22:58:19 EST


Similar to networking sandboxing programs and cgroup-v2 based hooks
(BPF_CGROUP_INET_[INGRESS|EGRESS,] BPF_CGROUP_INET[4|6]_[BIND|CONNECT], etc)
introduce basic per-container sandboxing for file access via
new BPF_PROG_TYPE_FILE_FILTER program type that attaches after
security_file_open() LSM hook and works as additional file_open filter.
The new cgroup bpf hook is called BPF_CGROUP_FILE_OPEN.

Just like other cgroup-bpf programs new BPF_PROG_TYPE_FILE_FILTER type
is only available to root.

This program type has access to single argument 'struct bpf_file_info'
that contains standard sys_stat fields:
struct bpf_file_info {
__u64 inode;
__u32 dev_major;
__u32 dev_minor;
__u32 fs_magic;
__u32 mnt_id;
__u32 nlink;
__u32 mode; /* file mode S_ISDIR, S_ISLNK, 0755, etc */
__u32 flags; /* open flags O_RDWR, O_CREAT, etc */
};
Other file attributes can be added in the future to the end of this struct
without breaking bpf programs.

For debugging introduce bpf_get_file_path() helper that returns
NUL-terminated full path of the file. It should never be used for sandboxing.

Use cases:
- disallow certain FS types within containers (fs_magic == CGROUP2_SUPER_MAGIC)
- restrict permissions in particular mount (mnt_id == X && (flags & O_RDWR))
- disallow access to hard linked sensitive files (nlink > 1 && mode == 0700)
- disallow access to world writeable files (mode == 0..7)
- disallow access to given set of files (dev_major == X && dev_minor == Y && inode == Z)

Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
---
include/linux/bpf-cgroup.h | 10 +++
include/linux/bpf_types.h | 1 +
include/uapi/linux/bpf.h | 28 +++++-
kernel/bpf/cgroup.c | 171 +++++++++++++++++++++++++++++++++++++
kernel/bpf/syscall.c | 7 ++
5 files changed, 216 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 588dd5f0bd85..766f0223c222 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -109,6 +109,8 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type);

+int __cgroup_bpf_file_filter(struct file *file, enum bpf_attach_type type);
+
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
{
@@ -253,6 +255,13 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
\
__ret; \
})
+#define BPF_CGROUP_RUN_PROG_FILE_FILTER(file) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_file_filter(file, BPF_CGROUP_FILE_OPEN); \
+ __ret; \
+})
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -321,6 +330,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_FILE_FILTER(file) ({ 0; })

#define for_each_cgroup_storage_type(stype) for (; false; )

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..f182b2e37b94 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -33,6 +33,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
#ifdef CONFIG_INET
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
#endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_FILE_FILTER, file_filter)

BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..c0df8dd99edc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -154,6 +154,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_FILE_FILTER,
};

enum bpf_attach_type {
@@ -175,6 +176,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
+ BPF_CGROUP_FILE_OPEN,
__MAX_BPF_ATTACH_TYPE
};

@@ -2215,6 +2217,18 @@ union bpf_attr {
* pointer that was returned from bpf_sk_lookup_xxx\ ().
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_file_path(struct bpf_file_info *file, char *buf, u32 size_of_buf)
+ * Description
+ * Reconstruct the full path of *file* and store it into *buf* of
+ * *size_of_buf*. The *size_of_buf* must be strictly positive.
+ * On success, the helper makes sure that the *buf* is NUL-terminated.
+ * On failure, it is filled with string "(error)".
+ * This helper should only be used for debugging.
+ * 'char *path' should never be used for permission checks.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2303,7 +2317,8 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
- FN(sk_release),
+ FN(sk_release), \
+ FN(get_file_path),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2896,4 +2911,15 @@ struct bpf_flow_keys {
};
};

+struct bpf_file_info {
+ __u64 inode;
+ __u32 dev_major;
+ __u32 dev_minor;
+ __u32 fs_magic;
+ __u32 mnt_id;
+ __u32 nlink;
+ __u32 mode; /* file mode S_ISDIR, S_ISLNK, 0755, etc */
+ __u32 flags; /* open flags O_RDWR, O_CREAT, etc */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..38d0b4aa83ea 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -15,6 +15,7 @@
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h>
+#include <../fs/mount.h>

DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
@@ -754,3 +755,173 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
.get_func_proto = cgroup_dev_func_proto,
.is_valid_access = cgroup_dev_is_valid_access,
};
+
+int __cgroup_bpf_file_filter(struct file *file, enum bpf_attach_type type)
+{
+ struct cgroup *cgrp;
+ int ret;
+
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(current);
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], file, BPF_PROG_RUN);
+ rcu_read_unlock();
+
+ return ret == 1 ? 0 : -EPERM;
+}
+EXPORT_SYMBOL(__cgroup_bpf_file_filter);
+
+BPF_CALL_3(bpf_get_file_path, struct file *, file, char *, buf, u64, size)
+{
+ char *p = file_path(file, buf, size);
+ int len;
+
+ if (IS_ERR(p)) {
+ strncpy(buf, "(error)", size);
+ return PTR_ERR(p);
+ }
+ len = buf + size - p;
+ memmove(buf, p, len);
+ memset(buf + len, 0, size - len);
+ return 0;
+}
+
+const struct bpf_func_proto bpf_get_file_path_proto = {
+ .func = bpf_get_file_path,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+static const struct bpf_func_proto *
+cgroup_file_filter_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_get_file_path:
+ return &bpf_get_file_path_proto;
+ default:
+ return cgroup_dev_func_proto(func_id, prog);
+ }
+}
+
+static bool cgroup_file_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off + size > sizeof(struct bpf_file_info) ||
+ off % size || type != BPF_READ)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_file_info, fs_magic):
+ case offsetof(struct bpf_file_info, mnt_id):
+ case offsetof(struct bpf_file_info, dev_major):
+ case offsetof(struct bpf_file_info, dev_minor):
+ case offsetof(struct bpf_file_info, nlink):
+ case offsetof(struct bpf_file_info, mode):
+ case offsetof(struct bpf_file_info, flags):
+ return size == size_default;
+
+ case offsetof(struct bpf_file_info, inode):
+ return size == sizeof(__u64);
+
+ default:
+ if (size != size_default)
+ return false;
+ }
+ return true;
+}
+
+#define LD_1(F) ({ \
+ typeof(F) val = 0; \
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(val), \
+ si->dst_reg, si->src_reg, \
+ ((size_t)&F)); \
+ *target_size = sizeof(val); \
+ val; \
+ })
+
+#define LD_n(F) ({ \
+ typeof(F) val = 0; \
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(val), \
+ si->dst_reg, si->dst_reg, \
+ ((size_t)&F)); \
+ *target_size = sizeof(val); \
+ val; \
+ })
+
+static u32 cgroup_file_filter_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ struct file *file = NULL;
+ struct inode *inode;
+ struct super_block *sb;
+ struct mount *mnt;
+
+ switch (si->off) {
+ case offsetof(struct bpf_file_info, fs_magic):
+ /* dst = file->f_inode->i_sb->s_magic */
+ inode = LD_1(file->f_inode);
+ sb = LD_n(inode->i_sb);
+ LD_n(sb->s_magic);
+ break;
+ case offsetof(struct bpf_file_info, dev_major):
+ /* dst = file->f_inode->i_sb->s_dev */
+ inode = LD_1(file->f_inode);
+ sb = LD_n(inode->i_sb);
+ LD_n(sb->s_dev);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, MINORBITS);
+ break;
+ case offsetof(struct bpf_file_info, dev_minor):
+ /* dst = file->f_inode->i_sb->s_dev */
+ inode = LD_1(file->f_inode);
+ sb = LD_n(inode->i_sb);
+ LD_n(sb->s_dev);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, MINORMASK);
+ break;
+ case offsetof(struct bpf_file_info, inode):
+ /* dst = file->f_inode->i_ino */
+ inode = LD_1(file->f_inode);
+ LD_n(inode->i_ino);
+ break;
+ case offsetof(struct bpf_file_info, mode):
+ /* dst = file->f_inode->i_mode */
+ inode = LD_1(file->f_inode);
+ LD_n(inode->i_mode);
+ break;
+ case offsetof(struct bpf_file_info, nlink):
+ /* dst = file->f_inode->i_nlink */
+ inode = LD_1(file->f_inode);
+ LD_n(inode->i_nlink);
+ break;
+ case offsetof(struct bpf_file_info, flags):
+ /* dst = file->f_flags */
+ LD_1(file->f_flags);
+ break;
+ case offsetof(struct bpf_file_info, mnt_id):
+ /* dst = real_mount(file->f_path.mnt)->mnt_id */
+ mnt = real_mount(LD_1(file->f_path.mnt));
+ LD_n(mnt->mnt_id);
+ break;
+ }
+ return insn - insn_buf;
+}
+#undef LD_1
+#undef LD_n
+
+const struct bpf_prog_ops file_filter_prog_ops = {
+};
+
+const struct bpf_verifier_ops file_filter_verifier_ops = {
+ .get_func_proto = cgroup_file_filter_proto,
+ .is_valid_access = cgroup_file_filter_is_valid_access,
+ .convert_ctx_access = cgroup_file_filter_ctx_access
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5742df21598c..7b0ffb8d7063 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1630,6 +1630,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_FLOW_DISSECTOR:
ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
break;
+ case BPF_CGROUP_FILE_OPEN:
+ ptype = BPF_PROG_TYPE_FILE_FILTER;
+ break;
default:
return -EINVAL;
}
@@ -1699,6 +1702,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_DEVICE:
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break;
+ case BPF_CGROUP_FILE_OPEN:
+ ptype = BPF_PROG_TYPE_FILE_FILTER;
+ break;
case BPF_SK_MSG_VERDICT:
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
case BPF_SK_SKB_STREAM_PARSER:
@@ -1741,6 +1747,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_UDP6_SENDMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
+ case BPF_CGROUP_FILE_OPEN:
break;
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
--
2.17.1