Re: [PATCH v2 00/15] Make the user mode driver code a better citizen

From: Tetsuo Handa
Date: Tue Jun 30 2020 - 02:18:15 EST


On 2020/06/30 10:13, Eric W. Biederman wrote:
> Alexei Starovoitov <alexei.starovoitov@xxxxxxxxx> writes:
>
>> On Mon, Jun 29, 2020 at 02:55:05PM -0500, Eric W. Biederman wrote:
>>>
>>> I have tested thes changes by booting with the code compiled in and
>>> by killing "bpfilter_umh" and running iptables -vnL to restart
>>> the userspace driver.
>>>
>>> I have compiled tested each change with and without CONFIG_BPFILTER
>>> enabled.
>>
>> With
>> CONFIG_BPFILTER=y
>> CONFIG_BPFILTER_UMH=m
>> it doesn't build:
>>
>> ERROR: modpost: "kill_pid_info" [net/bpfilter/bpfilter.ko] undefined!
>>
>> I've added:
>> +EXPORT_SYMBOL(kill_pid_info);
>> to continue testing...

kill_pid() is already exported.

>>
>> And then did:
>> while true; do iptables -L;rmmod bpfilter; done
>>
>> Unfortunately sometimes 'rmmod bpfilter' hangs in wait_event().
>>
>> I suspect patch 13 is somehow responsible:
>> + if (tgid) {
>> + kill_pid_info(SIGKILL, SEND_SIG_PRIV, tgid);
>> + wait_event(tgid->wait_pidfd, !pid_task(tgid, PIDTYPE_TGID));
>> + bpfilter_umh_cleanup(info);
>> + }
>>
>> I cannot figure out why it hangs. Some sort of race ?
>> Since adding short delay between kill and wait makes it work.

Because there is a race window that detach_pid() from __unhash_process() from
__exit_signal() from release_task() from exit_notify() from do_exit() is called
some time after wake_up_all(&pid->wait_pidfd) from do_notify_pidfd() from
do_notify_parent() from exit_notify() from do_exit() was called (in other words,
we can't use pid->wait_pidfd when pid_task() is used at wait_event()) ?

Below are changes I suggest.

diff --git a/kernel/umd.c b/kernel/umd.c
index ff79fb16d738..f688813b8830 100644
--- a/kernel/umd.c
+++ b/kernel/umd.c
@@ -26,7 +26,7 @@ static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *na
if (IS_ERR(mnt))
return mnt;

- file = file_open_root(mnt->mnt_root, mnt, name, O_CREAT | O_WRONLY, 0700);
+ file = file_open_root(mnt->mnt_root, mnt, name, O_CREAT | O_WRONLY | O_EXCL, 0700);
if (IS_ERR(file)) {
mntput(mnt);
return ERR_CAST(file);
@@ -52,16 +52,18 @@ static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *na

/**
* umd_load_blob - Remember a blob of bytes for fork_usermode_driver
- * @info: information about usermode driver
- * @data: a blob of bytes that can be executed as a file
- * @len: The lentgh of the blob
+ * @info: information about usermode driver (shouldn't be NULL)
+ * @data: a blob of bytes that can be executed as a file (shouldn't be NULL)
+ * @len: The lentgh of the blob (shouldn't be 0)
*
*/
int umd_load_blob(struct umd_info *info, const void *data, size_t len)
{
struct vfsmount *mnt;

- if (WARN_ON_ONCE(info->wd.dentry || info->wd.mnt))
+ if (!info || !info->driver_name || !data || !len)
+ return -EINVAL;
+ if (info->wd.dentry || info->wd.mnt)
return -EBUSY;

mnt = blob_to_mnt(data, len, info->driver_name);
@@ -76,15 +78,14 @@ EXPORT_SYMBOL_GPL(umd_load_blob);

/**
* umd_unload_blob - Disassociate @info from a previously loaded blob
- * @info: information about usermode driver
+ * @info: information about usermode driver (shouldn't be NULL)
*
*/
int umd_unload_blob(struct umd_info *info)
{
- if (WARN_ON_ONCE(!info->wd.mnt ||
- !info->wd.dentry ||
- info->wd.mnt->mnt_root != info->wd.dentry))
+ if (!info || !info->driver_name || !info->wd.dentry || !info->wd.mnt)
return -EINVAL;
+ BUG_ON(info->wd.mnt->mnt_root != info->wd.dentry);

kern_unmount(info->wd.mnt);
info->wd.mnt = NULL;
@@ -138,7 +139,7 @@ static void umd_cleanup(struct subprocess_info *info)
{
struct umd_info *umd_info = info->data;

- /* cleanup if umh_setup() was successful but exec failed */
+ /* cleanup if umd_setup() was successful but exec failed */
if (info->retval) {
fput(umd_info->pipe_to_umh);
fput(umd_info->pipe_from_umh);
@@ -163,7 +164,10 @@ int fork_usermode_driver(struct umd_info *info)
const char *argv[] = { info->driver_name, NULL };
int err;

- if (WARN_ON_ONCE(info->tgid))
+ if (!info || !info->driver_name || !info->wd.dentry || !info->wd.mnt)
+ return -EINVAL;
+ BUG_ON(info->wd.mnt->mnt_root != info->wd.dentry);
+ if (info->tgid)
return -EBUSY;

err = -ENOMEM;
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 91474884ddb7..9dd70aacb81a 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -19,8 +19,13 @@ static void shutdown_umh(void)
struct pid *tgid = info->tgid;

if (tgid) {
- kill_pid_info(SIGKILL, SEND_SIG_PRIV, tgid);
- wait_event(tgid->wait_pidfd, !pid_task(tgid, PIDTYPE_TGID));
+ kill_pid(tgid, SIGKILL, 1);
+ while (({ bool done;
+ rcu_read_lock();
+ done = !pid_task(tgid, PIDTYPE_TGID);
+ rcu_read_unlock();
+ done; }))
+ schedule_timeout_uninterruptible(1);
bpfilter_umh_cleanup(info);
}
}