[PATCH RFC v2 net-next 4/4] bpfilter: rough bpfilter codegen example hack
From: Alexei Starovoitov
Date: Thu May 03 2018 - 00:36:22 EST
From: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Signed-off-by: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
---
net/bpfilter/Makefile | 2 +-
net/bpfilter/bpfilter_mod.h | 285 ++++++++++++++++++++++++++++++++++++++++++-
net/bpfilter/ctor.c | 57 +++++----
net/bpfilter/gen.c | 290 ++++++++++++++++++++++++++++++++++++++++++++
net/bpfilter/init.c | 11 +-
net/bpfilter/main.c | 15 ++-
net/bpfilter/sockopt.c | 137 ++++++++++++++++-----
net/bpfilter/tables.c | 5 +-
net/bpfilter/tgts.c | 1 +
9 files changed, 737 insertions(+), 66 deletions(-)
create mode 100644 net/bpfilter/gen.c
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index bec6181de995..3796651c76cb 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -4,7 +4,7 @@
#
hostprogs-y := bpfilter_umh
-bpfilter_umh-objs := main.o tgts.o targets.o tables.o init.o ctor.o sockopt.o
+bpfilter_umh-objs := main.o tgts.o targets.o tables.o init.o ctor.o sockopt.o gen.o
HOSTCFLAGS += -I. -Itools/include/
# a bit of elf magic to convert bpfilter_umh binary into a binary blob
diff --git a/net/bpfilter/bpfilter_mod.h b/net/bpfilter/bpfilter_mod.h
index f0de41b20793..b4209985efff 100644
--- a/net/bpfilter/bpfilter_mod.h
+++ b/net/bpfilter/bpfilter_mod.h
@@ -21,8 +21,8 @@ struct bpfilter_table_info {
unsigned int initial_entries;
unsigned int hook_entry[BPFILTER_INET_HOOK_MAX];
unsigned int underflow[BPFILTER_INET_HOOK_MAX];
- unsigned int stacksize;
- void ***jumpstack;
+// unsigned int stacksize;
+// void ***jumpstack;
unsigned char entries[0] __aligned(8);
};
@@ -64,22 +64,55 @@ struct bpfilter_ipt_error {
struct bpfilter_target {
struct list_head all_target_list;
- const char name[BPFILTER_EXTENSION_MAXNAMELEN];
+ char name[BPFILTER_EXTENSION_MAXNAMELEN];
unsigned int size;
int hold;
u16 family;
u8 rev;
};
+struct bpfilter_gen_ctx {
+ struct bpf_insn *img;
+ u32 len_cur;
+ u32 len_max;
+ u32 default_verdict;
+ int fd;
+ int ifindex;
+ bool offloaded;
+};
+
+union bpf_attr;
+int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+
+int bpfilter_gen_init(struct bpfilter_gen_ctx *ctx);
+int bpfilter_gen_prologue(struct bpfilter_gen_ctx *ctx);
+int bpfilter_gen_epilogue(struct bpfilter_gen_ctx *ctx);
+int bpfilter_gen_append(struct bpfilter_gen_ctx *ctx,
+ struct bpfilter_ipt_ip *ent, int verdict);
+int bpfilter_gen_commit(struct bpfilter_gen_ctx *ctx);
+void bpfilter_gen_destroy(struct bpfilter_gen_ctx *ctx);
+
struct bpfilter_target *bpfilter_target_get_by_name(const char *name);
void bpfilter_target_put(struct bpfilter_target *tgt);
int bpfilter_target_add(struct bpfilter_target *tgt);
-struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl);
+struct bpfilter_table_info *
+bpfilter_ipv4_table_alloc(struct bpfilter_table *tbl, __u32 size_ents);
+struct bpfilter_table_info *
+bpfilter_ipv4_table_finalize(struct bpfilter_table *tbl,
+ struct bpfilter_table_info *info,
+ __u32 size_ents, __u32 num_ents);
+struct bpfilter_table_info *
+bpfilter_ipv4_table_finalize2(struct bpfilter_table *tbl,
+ struct bpfilter_table_info *info,
+ __u32 size_ents, __u32 num_ents);
+
int bpfilter_ipv4_register_targets(void);
void bpfilter_tables_init(void);
int bpfilter_get_info(void *addr, int len);
int bpfilter_get_entries(void *cmd, int len);
+int bpfilter_set_replace(void *cmd, int len);
+int bpfilter_set_add_counters(void *cmd, int len);
int bpfilter_ipv4_init(void);
int copy_from_user(void *dst, void *addr, int len);
@@ -93,4 +126,248 @@ extern int pid;
extern int debug_fd;
#define ENOTSUPP 524
+/* Helper macros for filter block array initializers. */
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
+
+#define BPF_ENDIAN(TYPE, DST, LEN) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = LEN })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
+
+#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
+
+#define BPF_LD_IND(SIZE, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \
+ .dst_reg = 0, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_JA, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Function call */
+
+#define BPF_EMIT_CALL(FUNC) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((FUNC) - __bpf_call_base) })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
#endif
diff --git a/net/bpfilter/ctor.c b/net/bpfilter/ctor.c
index efb7feef3c42..ba44c21cacfa 100644
--- a/net/bpfilter/ctor.c
+++ b/net/bpfilter/ctor.c
@@ -1,8 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
-#include <sys/socket.h>
-#include <linux/bitops.h>
#include <stdlib.h>
#include <stdio.h>
+#include <string.h>
+
+#include <sys/socket.h>
+
+#include <linux/bitops.h>
+
#include "bpfilter_mod.h"
unsigned int __sw_hweight32(unsigned int w)
@@ -13,35 +17,47 @@ unsigned int __sw_hweight32(unsigned int w)
return (w * 0x01010101) >> 24;
}
-struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl)
+struct bpfilter_table_info *bpfilter_ipv4_table_alloc(struct bpfilter_table *tbl,
+ __u32 size_ents)
{
unsigned int num_hooks = hweight32(tbl->valid_hooks);
- struct bpfilter_ipt_standard *tgts;
struct bpfilter_table_info *info;
- struct bpfilter_ipt_error *term;
- unsigned int mask, offset, h, i;
unsigned int size, alloc_size;
size = sizeof(struct bpfilter_ipt_standard) * num_hooks;
size += sizeof(struct bpfilter_ipt_error);
+ size += size_ents;
alloc_size = size + sizeof(struct bpfilter_table_info);
info = malloc(alloc_size);
- if (!info)
- return NULL;
+ if (info) {
+ memset(info, 0, alloc_size);
+ info->size = size;
+ }
+ return info;
+}
+
+struct bpfilter_table_info *bpfilter_ipv4_table_finalize(struct bpfilter_table *tbl,
+ struct bpfilter_table_info *info,
+ __u32 size_ents, __u32 num_ents)
+{
+ unsigned int num_hooks = hweight32(tbl->valid_hooks);
+ struct bpfilter_ipt_standard *tgts;
+ struct bpfilter_ipt_error *term;
+ struct bpfilter_ipt_entry *ent;
+ unsigned int mask, offset, h, i;
- info->num_entries = num_hooks + 1;
- info->size = size;
+ info->num_entries = num_ents + num_hooks + 1;
- tgts = (struct bpfilter_ipt_standard *) (info + 1);
- term = (struct bpfilter_ipt_error *) (tgts + num_hooks);
+ ent = (struct bpfilter_ipt_entry *)(info + 1);
+ tgts = (struct bpfilter_ipt_standard *)((u8 *)ent + size_ents);
+ term = (struct bpfilter_ipt_error *)(tgts + num_hooks);
mask = tbl->valid_hooks;
offset = 0;
h = 0;
i = 0;
- dprintf(debug_fd, "mask %x num_hooks %d\n", mask, num_hooks);
while (mask) {
struct bpfilter_ipt_standard *t;
@@ -55,7 +71,6 @@ struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl)
BPFILTER_IPT_STANDARD_INIT(BPFILTER_NF_ACCEPT);
t->target.target.u.kernel.target =
bpfilter_target_get_by_name(t->target.target.u.user.name);
- dprintf(debug_fd, "user.name %s\n", t->target.target.u.user.name);
if (!t->target.target.u.kernel.target)
goto out_fail;
@@ -67,14 +82,10 @@ struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl)
*term = (struct bpfilter_ipt_error) BPFILTER_IPT_ERROR_INIT;
term->target.target.u.kernel.target =
bpfilter_target_get_by_name(term->target.target.u.user.name);
- dprintf(debug_fd, "user.name %s\n", term->target.target.u.user.name);
- if (!term->target.target.u.kernel.target)
- goto out_fail;
-
- dprintf(debug_fd, "info %p\n", info);
- return info;
-
+ if (!term->target.target.u.kernel.target) {
out_fail:
- free(info);
- return NULL;
+ free(info);
+ return NULL;
+ }
+ return info;
}
diff --git a/net/bpfilter/gen.c b/net/bpfilter/gen.c
new file mode 100644
index 000000000000..8e08561b78f1
--- /dev/null
+++ b/net/bpfilter/gen.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <linux/rtnetlink.h>
+#include <linux/bpf.h>
+typedef __u16 __bitwise __sum16; /* hack */
+#include <linux/ip.h>
+
+#include <arpa/inet.h>
+
+#include "bpfilter_mod.h"
+
+unsigned int if_nametoindex(const char *ifname);
+
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static int bpf_prog_load(enum bpf_prog_type type,
+ const struct bpf_insn *insns,
+ unsigned int insn_num,
+ __u32 offload_ifindex)
+{
+ union bpf_attr attr = {};
+
+ attr.prog_type = type;
+ attr.insns = bpf_ptr_to_u64(insns);
+ attr.insn_cnt = insn_num;
+ attr.license = bpf_ptr_to_u64("GPL");
+ attr.prog_ifindex = offload_ifindex;
+
+ return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ struct sockaddr_nl sa;
+ int sock, seq = 0, len, ret = -1;
+ char buf[4096];
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ struct nlmsghdr *nh;
+ struct nlmsgerr *err;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ printf("open netlink socket: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ printf("bind to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+
+ /* started nested attribute for XDP */
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+ nla->nla_len = NLA_HDRLEN;
+
+ /* add XDP fd */
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len += nla_xdp->nla_len;
+
+ /* if user passed in any flags, add those too */
+ if (flags) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ printf("send to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ printf("recv from netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != getpid()) {
+ printf("Wrong pid %d, expected %d\n",
+ nh->nlmsg_pid, getpid());
+ goto cleanup;
+ }
+ if (nh->nlmsg_seq != seq) {
+ printf("Wrong seq %d, expected %d\n",
+ nh->nlmsg_seq, seq);
+ goto cleanup;
+ }
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ printf("nlmsg error %s\n", strerror(-err->error));
+ goto cleanup;
+ case NLMSG_DONE:
+ break;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int bpfilter_load_dev(struct bpfilter_gen_ctx *ctx)
+{
+ u32 xdp_flags = 0;
+
+ if (ctx->offloaded)
+ xdp_flags |= XDP_FLAGS_HW_MODE;
+ return bpf_set_link_xdp_fd(ctx->ifindex, ctx->fd, xdp_flags);
+}
+
+int bpfilter_gen_init(struct bpfilter_gen_ctx *ctx)
+{
+ unsigned int len_max = BPF_MAXINSNS;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->img = calloc(len_max, sizeof(struct bpf_insn));
+ if (!ctx->img)
+ return -ENOMEM;
+ ctx->len_max = len_max;
+ ctx->fd = -1;
+ ctx->default_verdict = XDP_PASS;
+
+ return 0;
+}
+
+#define EMIT(x) \
+ do { \
+ if (ctx->len_cur + 1 > ctx->len_max) \
+ return -ENOMEM; \
+ ctx->img[ctx->len_cur++] = x; \
+ } while (0)
+
+int bpfilter_gen_prologue(struct bpfilter_gen_ctx *ctx)
+{
+ EMIT(BPF_MOV64_REG(BPF_REG_9, BPF_REG_1));
+ EMIT(BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_9,
+ offsetof(struct xdp_md, data)));
+ EMIT(BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_9,
+ offsetof(struct xdp_md, data_end)));
+ EMIT(BPF_MOV64_REG(BPF_REG_1, BPF_REG_2));
+ EMIT(BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, ETH_HLEN));
+ EMIT(BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 2));
+ EMIT(BPF_MOV32_IMM(BPF_REG_0, ctx->default_verdict));
+ EMIT(BPF_EXIT_INSN());
+ return 0;
+}
+
+int bpfilter_gen_epilogue(struct bpfilter_gen_ctx *ctx)
+{
+ EMIT(BPF_MOV32_IMM(BPF_REG_0, ctx->default_verdict));
+ EMIT(BPF_EXIT_INSN());
+ return 0;
+}
+
+static int bpfilter_gen_check_entry(const struct bpfilter_ipt_ip *ent)
+{
+#define M_FF "\xff\xff\xff\xff"
+ static const __u8 mask1[IFNAMSIZ] = M_FF M_FF M_FF M_FF;
+ static const __u8 mask0[IFNAMSIZ] = { };
+ int ones = strlen(ent->in_iface); ones += ones > 0;
+#undef M_FF
+ if (strlen(ent->out_iface) > 0)
+ return -ENOTSUPP;
+ if (memcmp(ent->in_iface_mask, mask1, ones) ||
+ memcmp(&ent->in_iface_mask[ones], mask0, sizeof(mask0) - ones))
+ return -ENOTSUPP;
+ if ((ent->src_mask != 0 && ent->src_mask != 0xffffffff) ||
+ (ent->dst_mask != 0 && ent->dst_mask != 0xffffffff))
+ return -ENOTSUPP;
+
+ return 0;
+}
+
+int bpfilter_gen_append(struct bpfilter_gen_ctx *ctx,
+ struct bpfilter_ipt_ip *ent, int verdict)
+{
+ u32 match_xdp = verdict == -1 ? XDP_DROP : XDP_PASS;
+ int ret, ifindex, match_state = 0;
+
+ /* convention R1: tmp, R2: data, R3: data_end, R9: xdp_buff */
+ ret = bpfilter_gen_check_entry(ent);
+ if (ret < 0)
+ return ret;
+ if (ent->src_mask == 0 && ent->dst_mask == 0)
+ return 0;
+
+ ifindex = if_nametoindex(ent->in_iface);
+ if (!ifindex)
+ return 0;
+ if (ctx->ifindex && ctx->ifindex != ifindex)
+ return -ENOTSUPP;
+
+ ctx->ifindex = ifindex;
+ match_state = !!ent->src_mask + !!ent->dst_mask;
+
+ EMIT(BPF_MOV64_REG(BPF_REG_1, BPF_REG_2));
+ EMIT(BPF_MOV32_IMM(BPF_REG_5, 0));
+ EMIT(BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_1,
+ offsetof(struct ethhdr, h_proto)));
+ EMIT(BPF_JMP_IMM(BPF_JNE, BPF_REG_4, htons(ETH_P_IP),
+ 3 + match_state * 3));
+ EMIT(BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ sizeof(struct ethhdr) + sizeof(struct iphdr)));
+ EMIT(BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1 + match_state * 3));
+ EMIT(BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -(int)sizeof(struct iphdr)));
+ if (ent->src_mask) {
+ EMIT(BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct iphdr, saddr)));
+ EMIT(BPF_JMP_IMM(BPF_JNE, BPF_REG_4, ent->src, 1));
+ EMIT(BPF_ALU32_IMM(BPF_ADD, BPF_REG_5, 1));
+ }
+ if (ent->dst_mask) {
+ EMIT(BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct iphdr, daddr)));
+ EMIT(BPF_JMP_IMM(BPF_JNE, BPF_REG_4, ent->dst, 1));
+ EMIT(BPF_ALU32_IMM(BPF_ADD, BPF_REG_5, 1));
+ }
+ EMIT(BPF_JMP_IMM(BPF_JNE, BPF_REG_5, match_state, 2));
+ EMIT(BPF_MOV32_IMM(BPF_REG_0, match_xdp));
+ EMIT(BPF_EXIT_INSN());
+ return 0;
+}
+
+int bpfilter_gen_commit(struct bpfilter_gen_ctx *ctx)
+{
+ int ret;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_XDP, ctx->img,
+ ctx->len_cur, ctx->ifindex);
+ if (ret > 0)
+ ctx->offloaded = true;
+ if (ret < 0)
+ ret = bpf_prog_load(BPF_PROG_TYPE_XDP, ctx->img,
+ ctx->len_cur, 0);
+ if (ret > 0) {
+ ctx->fd = ret;
+ ret = bpfilter_load_dev(ctx);
+ }
+
+ return ret < 0 ? ret : 0;
+}
+
+void bpfilter_gen_destroy(struct bpfilter_gen_ctx *ctx)
+{
+ free(ctx->img);
+ close(ctx->fd);
+}
diff --git a/net/bpfilter/init.c b/net/bpfilter/init.c
index 699f3f623189..14e621a03217 100644
--- a/net/bpfilter/init.c
+++ b/net/bpfilter/init.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <sys/socket.h>
#include <errno.h>
+
+#include <sys/socket.h>
+
#include "bpfilter_mod.h"
static struct bpfilter_table filter_table_ipv4 = {
@@ -22,12 +24,13 @@ int bpfilter_ipv4_init(void)
if (err)
return err;
- info = bpfilter_ipv4_table_ctor(t);
+ info = bpfilter_ipv4_table_alloc(t, 0);
+ if (!info)
+ return -ENOMEM;
+ info = bpfilter_ipv4_table_finalize(t, info, 0, 0);
if (!info)
return -ENOMEM;
-
t->info = info;
-
return bpfilter_table_add(&filter_table_ipv4);
}
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
index e0273ca201ad..ebd8a4fb1e95 100644
--- a/net/bpfilter/main.c
+++ b/net/bpfilter/main.c
@@ -1,20 +1,23 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
-#include <sys/uio.h>
#include <errno.h>
#include <stdio.h>
-#include <sys/socket.h>
#include <fcntl.h>
#include <unistd.h>
-#include "include/uapi/linux/bpf.h"
+
+#include <sys/uio.h>
+#include <sys/socket.h>
+
#include <asm/unistd.h>
+
+#include "include/uapi/linux/bpf.h"
+
#include "bpfilter_mod.h"
#include "msgfmt.h"
extern long int syscall (long int __sysno, ...);
-static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
- unsigned int size)
+int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
{
return syscall(321, cmd, attr, size);
}
@@ -39,7 +42,7 @@ int copy_to_user(void *addr, const void *src, int len)
struct iovec local;
struct iovec remote;
- local.iov_base = (void *) src;
+ local.iov_base = (void *)src;
local.iov_len = len;
remote.iov_base = addr;
remote.iov_len = len;
diff --git a/net/bpfilter/sockopt.c b/net/bpfilter/sockopt.c
index 43687daf51a3..26ad12a11736 100644
--- a/net/bpfilter/sockopt.c
+++ b/net/bpfilter/sockopt.c
@@ -1,10 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
-#include <sys/socket.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/socket.h>
+
#include "bpfilter_mod.h"
+/* TODO: Get all of this in here properly done in encoding/decoding layer. */
static int fetch_name(void *addr, int len, char *name, int name_len)
{
if (copy_from_user(name, addr, name_len))
@@ -55,12 +59,17 @@ int bpfilter_get_info(void *addr, int len)
return err;
}
-static int copy_target(struct bpfilter_standard_target *ut,
- struct bpfilter_standard_target *kt)
+static int target_u2k(struct bpfilter_standard_target *kt)
{
- struct bpfilter_target *tgt;
- int sz;
+ kt->target.u.kernel.target =
+ bpfilter_target_get_by_name(kt->target.u.user.name);
+ return kt->target.u.kernel.target ? 0 : -EINVAL;
+}
+static int target_k2u(struct bpfilter_standard_target *ut,
+ struct bpfilter_standard_target *kt)
+{
+ struct bpfilter_target *tgt;
if (put_user(kt->target.u.target_size,
&ut->target.u.target_size))
@@ -69,12 +78,9 @@ static int copy_target(struct bpfilter_standard_target *ut,
tgt = kt->target.u.kernel.target;
if (copy_to_user(ut->target.u.user.name, tgt->name, strlen(tgt->name)))
return -EFAULT;
-
if (put_user(tgt->rev, &ut->target.u.user.revision))
return -EFAULT;
-
- sz = tgt->size;
- if (copy_to_user(ut->target.data, kt->target.data, sz))
+ if (copy_to_user(ut->target.data, kt->target.data, tgt->size))
return -EFAULT;
return 0;
@@ -84,30 +90,25 @@ static int do_get_entries(void *up,
struct bpfilter_table *tbl,
struct bpfilter_table_info *info)
{
- unsigned int total_size = info->size;
const struct bpfilter_ipt_entry *ent;
+ unsigned int total_size = info->size;
+ void *base = info->entries;
unsigned int off;
- void *base;
-
- base = info->entries;
for (off = 0; off < total_size; off += ent->next_offset) {
- struct bpfilter_xt_counters *cntrs;
struct bpfilter_standard_target *tgt;
+ struct bpfilter_xt_counters *cntrs;
ent = base + off;
if (copy_to_user(up + off, ent, sizeof(*ent)))
return -EFAULT;
-
- /* XXX Just clear counters for now. XXX */
+ /* XXX: Just clear counters for now. */
cntrs = up + off + offsetof(struct bpfilter_ipt_entry, cntrs);
if (put_user(0, &cntrs->packet_cnt) ||
put_user(0, &cntrs->byte_cnt))
return -EINVAL;
-
- tgt = (void *) ent + ent->target_offset;
- dprintf(debug_fd, "target.verdict %d\n", tgt->verdict);
- if (copy_target(up + off + ent->target_offset, tgt))
+ tgt = (void *)ent + ent->target_offset;
+ if (target_k2u(up + off + ent->target_offset, tgt))
return -EFAULT;
}
return 0;
@@ -123,31 +124,113 @@ int bpfilter_get_entries(void *cmd, int len)
if (len < sizeof(struct bpfilter_ipt_get_entries))
return -EINVAL;
-
if (copy_from_user(&req, cmd, sizeof(req)))
return -EFAULT;
-
tbl = bpfilter_table_get_by_name(req.name, strlen(req.name));
if (!tbl)
return -ENOENT;
-
info = tbl->info;
if (!info) {
err = -ENOENT;
goto out_put;
}
-
if (info->size != req.size) {
err = -EINVAL;
goto out_put;
}
-
err = do_get_entries(uptr->entries, tbl, info);
- dprintf(debug_fd, "do_get_entries %d req.size %d\n", err, req.size);
-
out_put:
bpfilter_table_put(tbl);
+ return err;
+}
+static int do_set_replace(struct bpfilter_ipt_replace *req, void *base,
+ struct bpfilter_table *tbl)
+{
+ unsigned int total_size = req->size;
+ struct bpfilter_table_info *info;
+ struct bpfilter_ipt_entry *ent;
+ struct bpfilter_gen_ctx ctx;
+ unsigned int off, sents = 0, ents = 0;
+ int ret;
+
+ ret = bpfilter_gen_init(&ctx);
+ if (ret < 0)
+ return ret;
+ ret = bpfilter_gen_prologue(&ctx);
+ if (ret < 0)
+ return ret;
+ info = bpfilter_ipv4_table_alloc(tbl, total_size);
+ if (!info)
+ return -ENOMEM;
+ if (copy_from_user(&info->entries[0], base, req->size)) {
+ free(info);
+ return -EFAULT;
+ }
+ base = &info->entries[0];
+ for (off = 0; off < total_size; off += ent->next_offset) {
+ struct bpfilter_standard_target *tgt;
+ ent = base + off;
+ ents++;
+ sents += ent->next_offset;
+ tgt = (void *) ent + ent->target_offset;
+ target_u2k(tgt);
+ ret = bpfilter_gen_append(&ctx, &ent->ip, tgt->verdict);
+ if (ret < 0)
+ goto err;
+ }
+ info->num_entries = ents;
+ info->size = sents;
+ memcpy(info->hook_entry, req->hook_entry, sizeof(info->hook_entry));
+ memcpy(info->underflow, req->underflow, sizeof(info->hook_entry));
+ ret = bpfilter_gen_epilogue(&ctx);
+ if (ret < 0)
+ goto err;
+ ret = bpfilter_gen_commit(&ctx);
+ if (ret < 0)
+ goto err;
+ free(tbl->info);
+ tbl->info = info;
+ bpfilter_gen_destroy(&ctx);
+ dprintf(debug_fd, "offloaded %u\n", ctx.offloaded);
+ return ret;
+err:
+ free(info);
+ return ret;
+}
+
+int bpfilter_set_replace(void *cmd, int len)
+{
+ struct bpfilter_ipt_replace *uptr = cmd;
+ struct bpfilter_ipt_replace req;
+ struct bpfilter_table_info *info;
+ struct bpfilter_table *tbl;
+ int err;
+
+ if (len < sizeof(req))
+ return -EINVAL;
+ if (copy_from_user(&req, cmd, sizeof(req)))
+ return -EFAULT;
+ if (req.num_counters >= INT_MAX / sizeof(struct bpfilter_xt_counters))
+ return -ENOMEM;
+ if (req.num_counters == 0)
+ return -EINVAL;
+ req.name[sizeof(req.name) - 1] = 0;
+ tbl = bpfilter_table_get_by_name(req.name, strlen(req.name));
+ if (!tbl)
+ return -ENOENT;
+ info = tbl->info;
+ if (!info) {
+ err = -ENOENT;
+ goto out_put;
+ }
+ err = do_set_replace(&req, uptr->entries, tbl);
+out_put:
+ bpfilter_table_put(tbl);
return err;
}
+int bpfilter_set_add_counters(void *cmd, int len)
+{
+ return 0;
+}
diff --git a/net/bpfilter/tables.c b/net/bpfilter/tables.c
index 9a96599be634..e0dab283092d 100644
--- a/net/bpfilter/tables.c
+++ b/net/bpfilter/tables.c
@@ -1,8 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
-#include <sys/socket.h>
#include <errno.h>
#include <string.h>
+
+#include <sys/socket.h>
+
#include <linux/hashtable.h>
+
#include "bpfilter_mod.h"
static unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
diff --git a/net/bpfilter/tgts.c b/net/bpfilter/tgts.c
index eac5e8ac0b4b..0a00bc289d3d 100644
--- a/net/bpfilter/tgts.c
+++ b/net/bpfilter/tgts.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <sys/socket.h>
+
#include "bpfilter_mod.h"
struct bpfilter_target std_tgt = {
--
2.9.5