Currently, if a struct_ops map is loaded with BPF_F_LINK, it must also
define the .validate() and .update() callbacks in its corresponding
struct bpf_struct_ops in the kernel. Enabling struct_ops link is useful
in its own right to ensure that the map is unloaded if an application
crashes. For example, with sched_ext, we want to automatically unload
the host-wide scheduler if the application crashes. We would likely
never support updating elements of a sched_ext struct_ops map, so we'd
have to implement these callbacks showing that they _can't_ support
element updates just to benefit from the basic lifetime management of
struct_ops links.
Let's enable struct_ops maps to work with BPF_F_LINK even if they
haven't defined these callbacks, by assuming that a struct_ops map
element cannot be updated by default.
Signed-off-by: David Vernet <void@xxxxxxxxxxxxx>
---
kernel/bpf/bpf_struct_ops.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index eaff04eefb31..fdc3e8705a3c 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -509,9 +509,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
}
if (st_map->map.map_flags & BPF_F_LINK) {
- err = st_ops->validate(kdata);
- if (err)
- goto reset_unlock;
+ err = 0;
+ if (st_ops->validate) {
+ err = st_ops->validate(kdata);
+ if (err)
+ goto reset_unlock;
+ }
set_memory_rox((long)st_map->image, 1);
/* Let bpf_link handle registration & unregistration.
*
@@ -663,9 +666,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
if (attr->value_size != vt->size)
return ERR_PTR(-EINVAL);
- if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update))
- return ERR_PTR(-EOPNOTSUPP);
-
t = st_ops->type;
st_map_size = sizeof(*st_map) +
@@ -823,6 +823,9 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
if (!bpf_struct_ops_valid_to_reg(new_map))
return -EINVAL;
+ if (!st_map->st_ops->update)
+ return -EOPNOTSUPP;
+
mutex_lock(&update_mutex);
old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));