Re: [PATCH RFC] signalfd: add support for SFD_TASK
From: Jens Axboe
Date: Wed Nov 27 2019 - 15:48:40 EST
On 11/27/19 12:23 PM, Jann Horn wrote:
On Wed, Nov 27, 2019 at 6:11 AM Jens Axboe <axboe@xxxxxxxxx> wrote:
I posted this a few weeks back, took another look at it and refined it a
bit. I'd like some input on the viability of this approach.
A new signalfd setup flag is added, SFD_TASK. This is only valid if used
with SFD_CLOEXEC. If set, the task setting up the signalfd descriptor is
remembered in the signalfd context, and will be the one we use for
checking signals in the poll/read handlers in signalfd.
This is needed to make signalfd useful with io_uring and aio, of which
the former in particular has my interest.
I _think_ this is sane. To prevent the case of a task clearing O_CLOEXEC
on the signalfd descriptor, forking, and then exiting, we grab a
reference to the task when we assign it. If that original task exits, we
catch it in signalfd_flush() and ensure waiters are woken up.
Mh... that's not really reliable, because you only get ->flush() from
the last exiting thread (or more precisely, the last exiting task that
shares the files_struct).
What is your goal here? To have a reference to a task without keeping
the entire task_struct around in memory if someone leaks the signalfd
to another process - basically like a weak pointer? If so, you could
store a refcounted reference to "struct pid" instead of a refcounted
reference to the task_struct, and then do the lookup of the
task_struct on ->poll and ->read (similar to what procfs does).
Yeah, I think that works out much better (and cleaner). How about this,
then? Follows your advice and turns it into a struct pid instead. I
don't particularly like the -ESRCH in dequeue and setup, what do you
think? For poll, POLLERR seems like a prudent choice.
Tested with the test cases I sent out yesterday, works for me.
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 44b6845b071c..ccb1173b20aa 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -50,6 +50,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
struct signalfd_ctx {
sigset_t sigmask;
+ struct pid *task_pid;
};
static int signalfd_release(struct inode *inode, struct file *file)
@@ -58,20 +59,41 @@ static int signalfd_release(struct inode *inode, struct file *file)
return 0;
}
+static void signalfd_put_task(struct signalfd_ctx *ctx, struct task_struct *tsk)
+{
+ if (ctx->task_pid)
+ put_task_struct(tsk);
+}
+
+static struct task_struct *signalfd_get_task(struct signalfd_ctx *ctx)
+{
+ if (ctx->task_pid)
+ return get_pid_task(ctx->task_pid, PIDTYPE_PID);
+
+ return current;
+}
+
static __poll_t signalfd_poll(struct file *file, poll_table *wait)
{
struct signalfd_ctx *ctx = file->private_data;
+ struct task_struct *tsk;
__poll_t events = 0;
- poll_wait(file, ¤t->sighand->signalfd_wqh, wait);
+ tsk = signalfd_get_task(ctx);
+ if (tsk) {
+ poll_wait(file, &tsk->sighand->signalfd_wqh, wait);
- spin_lock_irq(¤t->sighand->siglock);
- if (next_signal(¤t->pending, &ctx->sigmask) ||
- next_signal(¤t->signal->shared_pending,
- &ctx->sigmask))
- events |= EPOLLIN;
- spin_unlock_irq(¤t->sighand->siglock);
+ spin_lock_irq(&tsk->sighand->siglock);
+ if (next_signal(&tsk->pending, &ctx->sigmask) ||
+ next_signal(&tsk->signal->shared_pending,
+ &ctx->sigmask))
+ events |= EPOLLIN;
+ spin_unlock_irq(&tsk->sighand->siglock);
+ signalfd_put_task(ctx, tsk);
+ } else {
+ events |= EPOLLERR;
+ }
return events;
}
@@ -167,10 +189,15 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
int nonblock)
{
ssize_t ret;
+ struct task_struct *tsk;
DECLARE_WAITQUEUE(wait, current);
- spin_lock_irq(¤t->sighand->siglock);
- ret = dequeue_signal(current, &ctx->sigmask, info);
+ tsk = signalfd_get_task(ctx);
+ if (!tsk)
+ return -ESRCH;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ ret = dequeue_signal(tsk, &ctx->sigmask, info);
switch (ret) {
case 0:
if (!nonblock)
@@ -178,29 +205,31 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
ret = -EAGAIN;
/* fall through */
default:
- spin_unlock_irq(¤t->sighand->siglock);
+ spin_unlock_irq(&tsk->sighand->siglock);
+ signalfd_put_task(ctx, tsk);
return ret;
}
- add_wait_queue(¤t->sighand->signalfd_wqh, &wait);
+ add_wait_queue(&tsk->sighand->signalfd_wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- ret = dequeue_signal(current, &ctx->sigmask, info);
+ ret = dequeue_signal(tsk, &ctx->sigmask, info);
if (ret != 0)
break;
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
- spin_unlock_irq(¤t->sighand->siglock);
+ spin_unlock_irq(&tsk->sighand->siglock);
schedule();
- spin_lock_irq(¤t->sighand->siglock);
+ spin_lock_irq(&tsk->sighand->siglock);
}
- spin_unlock_irq(¤t->sighand->siglock);
+ spin_unlock_irq(&tsk->sighand->siglock);
- remove_wait_queue(¤t->sighand->signalfd_wqh, &wait);
+ remove_wait_queue(&tsk->sighand->signalfd_wqh, &wait);
__set_current_state(TASK_RUNNING);
+ signalfd_put_task(ctx, tsk);
return ret;
}
@@ -267,19 +296,24 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
/* Check the SFD_* constants for consistency. */
BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+ BUILD_BUG_ON(SFD_TASK & (SFD_CLOEXEC | SFD_NONBLOCK));
- if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
+ if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK | SFD_TASK))
+ return -EINVAL;
+ if ((flags & (SFD_CLOEXEC | SFD_TASK)) == SFD_TASK)
return -EINVAL;
sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
signotset(mask);
if (ufd == -1) {
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->sigmask = *mask;
+ if (flags & SFD_TASK)
+ ctx->task_pid = get_task_pid(current, PIDTYPE_PID);
/*
* When we call this, the initialization must be complete, since
@@ -290,6 +324,7 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
if (ufd < 0)
kfree(ctx);
} else {
+ struct task_struct *tsk;
struct fd f = fdget(ufd);
if (!f.file)
return -EBADF;
@@ -298,11 +333,17 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
fdput(f);
return -EINVAL;
}
- spin_lock_irq(¤t->sighand->siglock);
+ tsk = signalfd_get_task(ctx);
+ if (!tsk) {
+ fdput(f);
+ return -ESRCH;
+ }
+ spin_lock_irq(&tsk->sighand->siglock);
ctx->sigmask = *mask;
- spin_unlock_irq(¤t->sighand->siglock);
+ spin_unlock_irq(&tsk->sighand->siglock);
- wake_up(¤t->sighand->signalfd_wqh);
+ wake_up(&tsk->sighand->signalfd_wqh);
+ signalfd_put_task(ctx, tsk);
fdput(f);
}
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index 83429a05b698..064c5dc3eb99 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -16,6 +16,7 @@
/* Flags for signalfd4. */
#define SFD_CLOEXEC O_CLOEXEC
#define SFD_NONBLOCK O_NONBLOCK
+#define SFD_TASK 00000001
struct signalfd_siginfo {
__u32 ssi_signo;
--
Jens Axboe