Re: For review: seccomp_user_notif(2) manual page

From: Jann Horn
Date: Wed Sep 30 2020 - 21:54:04 EST


On Thu, Oct 1, 2020 at 1:25 AM Tycho Andersen <tycho@tycho.pizza> wrote:
> On Thu, Oct 01, 2020 at 01:11:33AM +0200, Jann Horn wrote:
> > On Thu, Oct 1, 2020 at 1:03 AM Tycho Andersen <tycho@tycho.pizza> wrote:
> > > On Wed, Sep 30, 2020 at 10:34:51PM +0200, Michael Kerrisk (man-pages) wrote:
> > > > On 9/30/20 5:03 PM, Tycho Andersen wrote:
> > > > > On Wed, Sep 30, 2020 at 01:07:38PM +0200, Michael Kerrisk (man-pages) wrote:
> > > > >> ┌─────────────────────────────────────────────────────┐
> > > > >> │FIXME │
> > > > >> ├─────────────────────────────────────────────────────┤
> > > > >> │From my experiments, it appears that if a SEC‐ │
> > > > >> │COMP_IOCTL_NOTIF_RECV is done after the target │
> > > > >> │process terminates, then the ioctl() simply blocks │
> > > > >> │(rather than returning an error to indicate that the │
> > > > >> │target process no longer exists). │
> > > > >
> > > > > Yeah, I think Christian wanted to fix this at some point,
> > > >
> > > > Do you have a pointer that discussion? I could not find it with a
> > > > quick search.
> > > >
> > > > > but it's a
> > > > > bit sticky to do.
> > > >
> > > > Can you say a few words about the nature of the problem?
> > >
> > > I remembered wrong, it's actually in the tree: 99cdb8b9a573 ("seccomp:
> > > notify about unused filter"). So maybe there's a bug here?
> >
> > That thing only notifies on ->poll, it doesn't unblock ioctls; and
> > Michael's sample code uses SECCOMP_IOCTL_NOTIF_RECV to wait. So that
> > commit doesn't have any effect on this kind of usage.
>
> Yes, thanks. And the ones stuck in RECV are waiting on a semaphore so
> we don't have a count of all of them, unfortunately.
>
> We could maybe look inside the wait_list, but that will probably make
> people angry :)

The easiest way would probably be to open-code the semaphore-ish part,
and let the semaphore and poll share the waitqueue. The current code
kind of mirrors the semaphore's waitqueue in the wqh - open-coding the
entire semaphore would IMO be cleaner than that. And it's not like
semaphore semantics are even a good fit for this code anyway.

Let's see... if we didn't have the existing UAPI to worry about, I'd
do it as follows (*completely* untested). That way, the ioctl would
block exactly until either there actually is a request to deliver or
there are no more users of the filter. The problem is that if we just
apply this patch, existing users of SECCOMP_IOCTL_NOTIF_RECV that use
an event loop and don't set O_NONBLOCK will be screwed. So we'd
probably also have to add some stupid counter in place of the
semaphore's counter that we can use to preserve the old behavior of
returning -ENOENT once for each cancelled request. :(

I guess this is a nice point in favor of Michael's usual complaint
that if there are no man pages for a feature by the time the feature
lands upstream, there's a higher chance that the UAPI will suck
forever...



diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 676d4af62103..f0f4c68e0bc6 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -138,7 +138,6 @@ struct seccomp_kaddfd {
* @notifications: A list of struct seccomp_knotif elements.
*/
struct notification {
- struct semaphore request;
u64 next_id;
struct list_head notifications;
};
@@ -859,7 +858,6 @@ static int seccomp_do_user_notification(int this_syscall,
list_add(&n.list, &match->notif->notifications);
INIT_LIST_HEAD(&n.addfd);

- up(&match->notif->request);
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
mutex_unlock(&match->notify_lock);

@@ -1175,9 +1173,10 @@ find_notification(struct seccomp_filter *filter, u64 id)


static long seccomp_notify_recv(struct seccomp_filter *filter,
- void __user *buf)
+ void __user *buf, bool blocking)
{
struct seccomp_knotif *knotif = NULL, *cur;
+ DECLARE_WAITQUEUE(wait, current);
struct seccomp_notif unotif;
ssize_t ret;

@@ -1190,11 +1189,9 @@ static long seccomp_notify_recv(struct
seccomp_filter *filter,

memset(&unotif, 0, sizeof(unotif));

- ret = down_interruptible(&filter->notif->request);
- if (ret < 0)
- return ret;
-
mutex_lock(&filter->notify_lock);
+
+retry:
list_for_each_entry(cur, &filter->notif->notifications, list) {
if (cur->state == SECCOMP_NOTIFY_INIT) {
knotif = cur;
@@ -1202,14 +1199,32 @@ static long seccomp_notify_recv(struct
seccomp_filter *filter,
}
}

- /*
- * If we didn't find a notification, it could be that the task was
- * interrupted by a fatal signal between the time we were woken and
- * when we were able to acquire the rw lock.
- */
if (!knotif) {
- ret = -ENOENT;
- goto out;
+ /* This has to happen before checking &filter->users. */
+ prepare_to_wait(&filter->wqh, &wait, TASK_INTERRUPTIBLE);
+
+ /*
+ * If all users of the filter are gone, throw an error instead
+ * of pointlessly continuing to block.
+ */
+ if (refcount_read(&filter->users) == 0) {
+ ret = -ENOTCON;
+ goto out;
+ }
+ if (blocking) {
+ /* No notifications pending - wait for one,
then retry. */
+ mutex_unlock(&filter->notify_lock);
+ schedule();
+ mutex_lock(&filter->notify_lock);
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
+ goto retry;
+ } else {
+ ret = -ENOENT;
+ goto out;
+ }
}

unotif.id = knotif->id;
@@ -1220,6 +1235,7 @@ static long seccomp_notify_recv(struct
seccomp_filter *filter,
wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
ret = 0;
out:
+ finish_wait(&filter->wqh, &wait);
mutex_unlock(&filter->notify_lock);

if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
@@ -1233,10 +1249,8 @@ static long seccomp_notify_recv(struct
seccomp_filter *filter,
*/
mutex_lock(&filter->notify_lock);
knotif = find_notification(filter, unotif.id);
- if (knotif) {
+ if (knotif)
knotif->state = SECCOMP_NOTIFY_INIT;
- up(&filter->notif->request);
- }
mutex_unlock(&filter->notify_lock);
}

@@ -1412,11 +1426,12 @@ static long seccomp_notify_ioctl(struct file
*file, unsigned int cmd,
{
struct seccomp_filter *filter = file->private_data;
void __user *buf = (void __user *)arg;
+ bool blocking = !(file->f_flags & O_NONBLOCK);

/* Fixed-size ioctls */
switch (cmd) {
case SECCOMP_IOCTL_NOTIF_RECV:
- return seccomp_notify_recv(filter, buf);
+ return seccomp_notify_recv(filter, buf, blocking);
case SECCOMP_IOCTL_NOTIF_SEND:
return seccomp_notify_send(filter, buf);
case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
@@ -1485,7 +1500,6 @@ static struct file *init_listener(struct
seccomp_filter *filter)
if (!filter->notif)
goto out;

- sema_init(&filter->notif->request, 0);
filter->notif->next_id = get_random_u64();
INIT_LIST_HEAD(&filter->notif->notifications);