Re: [PATCH bpf-next v3 1/9] bpf: Implement task local storage

From: Martin KaFai Lau
Date: Wed Nov 04 2020 - 17:19:35 EST


Please ignore this reply which has missed some recipients.

On Wed, Nov 04, 2020 at 02:08:14PM -0800, Martin KaFai Lau wrote:
> On Wed, Nov 04, 2020 at 05:44:45PM +0100, KP Singh wrote:
> [ ... ]
>
> > +static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
> > +{
> > + struct bpf_local_storage_data *sdata;
> > + struct task_struct *task;
> > + unsigned int f_flags;
> > + struct pid *pid;
> > + int fd, err;
> > +
> > + fd = *(int *)key;
> > + pid = pidfd_get_pid(fd, &f_flags);
> > + if (IS_ERR(pid))
> > + return ERR_CAST(pid);
> > +
> > + /* We should be in an RCU read side critical section, it should be safe
> > + * to call pid_task.
> > + */
> > + WARN_ON_ONCE(!rcu_read_lock_held());
> > + task = pid_task(pid, PIDTYPE_PID);
> > + if (!task) {
> > + err = -ENOENT;
> > + goto out;
> > + }
> > +
> > + sdata = task_storage_lookup(task, map, true);
> > + put_pid(pid);
> > + return sdata ? sdata->data : NULL;
> > +out:
> > + put_pid(pid);
> > + return ERR_PTR(err);
> > +}
> > +
> > +static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
> > + void *value, u64 map_flags)
> > +{
> > + struct bpf_local_storage_data *sdata;
> > + struct task_struct *task;
> > + unsigned int f_flags;
> > + struct pid *pid;
> > + int fd, err;
> > +
> > + fd = *(int *)key;
> > + pid = pidfd_get_pid(fd, &f_flags);
> > + if (IS_ERR(pid))
> > + return PTR_ERR(pid);
> > +
> > + /* We should be in an RCU read side critical section, it should be safe
> > + * to call pid_task.
> > + */
> > + WARN_ON_ONCE(!rcu_read_lock_held());
> > + task = pid_task(pid, PIDTYPE_PID);
> > + if (!task) {
> > + err = -ENOENT;
> > + goto out;
> > + }
> > +
> > + sdata = bpf_local_storage_update(
> > + task, (struct bpf_local_storage_map *)map, value, map_flags);
> It seems the task is protected by rcu here and the task may be going away.
> Is it ok?
>
> or the following comment in the later "BPF_CALL_4(bpf_task_storage_get, ...)"
> is no longer valid?
> /* This helper must only called from where the task is guaranteed
> * to have a refcount and cannot be freed.
> */
>
> > +
> > + err = PTR_ERR_OR_ZERO(sdata);
> > +out:
> > + put_pid(pid);
> > + return err;
> > +}
> > +
>
> [ ... ]
>
> > +BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
> > + task, void *, value, u64, flags)
> > +{
> > + struct bpf_local_storage_data *sdata;
> > +
> > + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
> > + return (unsigned long)NULL;
> > +
> > + /* explicitly check that the task_storage_ptr is not
> > + * NULL as task_storage_lookup returns NULL in this case and
> > + * bpf_local_storage_update expects the owner to have a
> > + * valid storage pointer.
> > + */
> > + if (!task_storage_ptr(task))
> > + return (unsigned long)NULL;
> > +
> > + sdata = task_storage_lookup(task, map, true);
> > + if (sdata)
> > + return (unsigned long)sdata->data;
> > +
> > + /* This helper must only called from where the task is guaranteed
> > + * to have a refcount and cannot be freed.
> > + */
> > + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
> > + sdata = bpf_local_storage_update(
> > + task, (struct bpf_local_storage_map *)map, value,
> > + BPF_NOEXIST);
> > + return IS_ERR(sdata) ? (unsigned long)NULL :
> > + (unsigned long)sdata->data;
> > + }
> > +
> > + return (unsigned long)NULL;
> > +}
> > +
>
> [ ... ]
>
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 8f50c9c19f1b..f3fe9f53f93c 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
> > map->map_type != BPF_MAP_TYPE_ARRAY &&
> > map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
> > map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
> > - map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
> > + map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
> > + map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
> This is to enable spin lock support in a map's value. Without peeking
> patch 5, I was confused a bit here. It seems patch 5 was missed when
> inode storage was added.
>
> > return -ENOTSUPP;
> > if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
> > map->value_size) {