Re: [RFC][PATCH] fcntl: F_[SG]ETOWN_TID

From: stephane eranian
Date: Fri Jul 31 2009 - 10:01:18 EST


Hi,

I have tried this patch in 2.6.30 and perfmon given I already had a
stress test program
for this problem. I can report that the patch seems to work for me.

This is a self-sampling multi-threaded program in which each thread also
uses setitimer(ITIMER_REAL) and thus is getting SIGALRM.

First run with stock F_SETOWN:

$ self_smpl_multi 8 20000000 29 0
program_time = 8, threshold = 20000000, signum = 29 fcntl(F_SETOWN)

launch main: fd: 3, tid: 5836, self: 0x7f75f43206e0
launch side: fd: 4, tid: 5837, self: 0x40b59950
1: fd = 3, count = 30, iter = 651, rate = 46/Kiter
1: fd = 4, count = 29, iter = 623, rate = 46/Kiter
(bad thread) ser = 287, fd = 4, tid = 5836, self = 0x7f75f43206e0
2: fd = 4, count = 119, iter = 2540, rate = 46/Kiter
2: fd = 3, count = 118, iter = 2510, rate = 47/Kiter
(bad thread) ser = 330, fd = 4, tid = 5836, self = 0x7f75f43206e0
(bad thread) ser = 395, fd = 4, tid = 5836, self = 0x7f75f43206e0


The "(bad thread)" message shows up when the signal goes to the wrong
thread.

Second run with F_SETOWN_TID:

$ ./self_smpl_multi 8 20000000 29 1
program_time = 8, threshold = 20000000, signum = 29 fcntl(F_SETOWN_TID)

launch main: fd: 3, tid: 5838, self: 0x7fa1800af6e0
launch side: fd: 4, tid: 5839, self: 0x4253a950
1: fd = 4, count = 52, iter = 1115, rate = 46/Kiter
1: fd = 3, count = 52, iter = 1115, rate = 46/Kiter
2: fd = 4, count = 119, iter = 2541, rate = 46/Kiter
2: fd = 3, count = 117, iter = 2490, rate = 46/Kiter
3: fd = 3, count = 114, iter = 2427, rate = 46/Kiter
3: fd = 4, count = 119, iter = 2541, rate = 46/Kiter
4: fd = 4, count = 119, iter = 2541, rate = 46/Kiter
4: fd = 3, count = 114, iter = 2428, rate = 46/Kiter
5: fd = 4, count = 119, iter = 2541, rate = 46/Kiter

No more error messages.

Thanks.


On Fri, Jul 31, 2009 at 10:35 AM, Peter Zijlstra<a.p.zijlstra@xxxxxxxxx> wrote:
> In order to direct the SIGIO signal to a particular thread of a
> multi-threaded application we cannot, like suggested by the manpage, put
> a TID into the regular fcntl(F_SETOWN) call. It will still be send to
> the whole process of which that thread is part.
>
> Since people do want to properly direct SIGIO we introduce F_SETOWN_TID,
> which functions similarly to F_SETOWN, except positive arguments are
> interpreted as TIDs and negative arguments are interpreted as PIDs.
>
> This extension is fully bug compatible with the old F_GETOWN
> implementation in that F_GETOWN_TID will be troubled by the negative
> return value for PIDs similarly to F_GETOWN's trouble with process
> groups.
>
> [ compile tested only so far ]
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> ---
> Âarch/parisc/include/asm/fcntl.h | Â Â2 +
> Âfs/fcntl.c           Â|  64 +++++++++++++++++++++++++++++++++-----
> Âinclude/asm-generic/fcntl.h   |  Â4 ++
> Âinclude/linux/fs.h       Â|  11 +++++-
> Ânet/socket.c          Â|  Â2 +-
> Â5 files changed, 71 insertions(+), 12 deletions(-)
>
> diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h
> index 1e1c824..5d5235a 100644
> --- a/arch/parisc/include/asm/fcntl.h
> +++ b/arch/parisc/include/asm/fcntl.h
> @@ -28,6 +28,8 @@
> Â#define F_SETOWN Â Â Â 12 Â Â Â/* Âfor sockets. */
> Â#define F_SETSIG Â Â Â 13 Â Â Â/* Âfor sockets. */
> Â#define F_GETSIG Â Â Â 14 Â Â Â/* Âfor sockets. */
> +#define F_GETOWN_TID Â 15
> +#define F_SETOWN_TID Â 16
>
> Â/* for posix fcntl() and lockf() */
> Â#define F_RDLCK Â Â Â Â Â Â Â Â01
> diff --git a/fs/fcntl.c b/fs/fcntl.c
> index ae41308..8d0b7f9 100644
> --- a/fs/fcntl.c
> +++ b/fs/fcntl.c
> @@ -197,13 +197,15 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
> Â}
>
> Âstatic void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
> - Â Â Â Â Â Â Â Â Â Â int force)
> + Â Â Â Â Â Â Â Â Â Â int flags)
> Â{
> Â Â Â Âwrite_lock_irq(&filp->f_owner.lock);
> - Â Â Â if (force || !filp->f_owner.pid) {
> + Â Â Â if ((flags & FF_SETOWN_FORCE) || !filp->f_owner.pid) {
> Â Â Â Â Â Â Â Âput_pid(filp->f_owner.pid);
> Â Â Â Â Â Â Â Âfilp->f_owner.pid = get_pid(pid);
> Â Â Â Â Â Â Â Âfilp->f_owner.pid_type = type;
> + Â Â Â Â Â Â Â filp->f_owner.task_only =
> + Â Â Â Â Â Â Â Â Â Â Â (type == PIDTYPE_PID && (flags & FF_SETOWN_TID));
>
> Â Â Â Â Â Â Â Âif (pid) {
> Â Â Â Â Â Â Â Â Â Â Â Âconst struct cred *cred = current_cred();
> @@ -215,7 +217,7 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
> Â}
>
> Âint __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
> - Â Â Â Â Â Â Â int force)
> + Â Â Â Â Â Â Â int flags)
> Â{
> Â Â Â Âint err;
>
> @@ -223,12 +225,12 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
> Â Â Â Âif (err)
> Â Â Â Â Â Â Â Âreturn err;
>
> - Â Â Â f_modown(filp, pid, type, force);
> + Â Â Â f_modown(filp, pid, type, flags);
> Â Â Â Âreturn 0;
> Â}
> ÂEXPORT_SYMBOL(__f_setown);
>
> -int f_setown(struct file *filp, unsigned long arg, int force)
> +int f_setown(struct file *filp, unsigned long arg, int flags)
> Â{
> Â Â Â Âenum pid_type type;
> Â Â Â Âstruct pid *pid;
> @@ -241,7 +243,7 @@ int f_setown(struct file *filp, unsigned long arg, int force)
> Â Â Â Â}
> Â Â Â Ârcu_read_lock();
> Â Â Â Âpid = find_vpid(who);
> - Â Â Â result = __f_setown(filp, pid, type, force);
> + Â Â Â result = __f_setown(filp, pid, type, flags);
> Â Â Â Ârcu_read_unlock();
> Â Â Â Âreturn result;
> Â}
> @@ -263,6 +265,40 @@ pid_t f_getown(struct file *filp)
> Â Â Â Âreturn pid;
> Â}
>
> +static int f_setown_tid(struct file *filp, unsigned long arg)
> +{
> + Â Â Â int flags = FF_SETOWN_FORCE;
> + Â Â Â struct pid *pid;
> + Â Â Â int who = arg;
> + Â Â Â int ret = 0;
> +
> + Â Â Â if (who < 0)
> + Â Â Â Â Â Â Â who = -who;
> + Â Â Â else
> + Â Â Â Â Â Â Â flags |= FF_SETOWN_TID;
> +
> + Â Â Â rcu_read_lock();
> + Â Â Â pid = find_vpid(who);
> + Â Â Â ret = __f_setown(filp, pid, PIDTYPE_PID, flags);
> + Â Â Â rcu_read_unlock();
> +
> + Â Â Â return ret;
> +}
> +
> +static pid_t f_getown_tid(struct file *filp)
> +{
> + Â Â Â pid_t tid;
> +
> + Â Â Â read_lock(&filp->f_owner.lock);
> + Â Â Â tid = pid_vnr(filp->f_owner.pid);
> + Â Â Â if (filp->f_owner.pid_type == PIDTYPE_PGID)
> + Â Â Â Â Â Â Â tid = 0;
> + Â Â Â if (!filp->f_owner.task_only)
> + Â Â Â Â Â Â Â tid = -tid;
> + Â Â Â read_unlock(&filp->f_owner.lock);
> + Â Â Â return tid;
> +}
> +
> Âstatic long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
> Â Â Â Â Â Â Â Âstruct file *filp)
> Â{
> @@ -311,7 +347,14 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
> Â Â Â Â Â Â Â Âforce_successful_syscall_return();
> Â Â Â Â Â Â Â Âbreak;
> Â Â Â Âcase F_SETOWN:
> - Â Â Â Â Â Â Â err = f_setown(filp, arg, 1);
> + Â Â Â Â Â Â Â err = f_setown(filp, arg, FF_SETOWN_FORCE);
> + Â Â Â Â Â Â Â break;
> + Â Â Â case F_GETOWN_TID:
> + Â Â Â Â Â Â Â err = f_getown_tid(filp);
> + Â Â Â Â Â Â Â force_successful_syscall_return();
> + Â Â Â Â Â Â Â break;
> + Â Â Â case F_SETOWN_TID:
> + Â Â Â Â Â Â Â err = f_setown_tid(filp, arg);
> Â Â Â Â Â Â Â Âbreak;
> Â Â Â Âcase F_GETSIG:
> Â Â Â Â Â Â Â Âerr = filp->f_owner.signum;
> @@ -431,6 +474,7 @@ static void send_sigio_to_task(struct task_struct *p,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â int fd,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â int reason)
> Â{
> + Â Â Â int (*send_sig)(int, struct siginfo *, struct task_struct *);
> Â Â Â Â/*
> Â Â Â Â * F_SETSIG can change ->signum lockless in parallel, make
> Â Â Â Â * sure we read it once and use the same value throughout.
> @@ -440,6 +484,8 @@ static void send_sigio_to_task(struct task_struct *p,
> Â Â Â Âif (!sigio_perm(p, fown, signum))
> Â Â Â Â Â Â Â Âreturn;
>
> + Â Â Â send_sig = fown->task_only ? send_sig_info : group_send_sig_info;
> +
> Â Â Â Âswitch (signum) {
> Â Â Â Â Â Â Â Âsiginfo_t si;
> Â Â Â Â Â Â Â Âdefault:
> @@ -461,11 +507,11 @@ static void send_sigio_to_task(struct task_struct *p,
> Â Â Â Â Â Â Â Â Â Â Â Âelse
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âsi.si_band = band_table[reason - POLL_IN];
>            Âsi.si_fd  Â= fd;
> - Â Â Â Â Â Â Â Â Â Â Â if (!group_send_sig_info(signum, &si, p))
> + Â Â Â Â Â Â Â Â Â Â Â if (!send_sig(signum, &si, p))
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âbreak;
> Â Â Â Â Â Â Â Â/* fall-through: fall back on the old plain SIGIO signal */
> Â Â Â Â Â Â Â Âcase 0:
> - Â Â Â Â Â Â Â Â Â Â Â group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
> + Â Â Â Â Â Â Â Â Â Â Â send_sig(SIGIO, SEND_SIG_PRIV, p);
> Â Â Â Â}
> Â}
>
> diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
> index 4d3e483..d7906b8 100644
> --- a/include/asm-generic/fcntl.h
> +++ b/include/asm-generic/fcntl.h
> @@ -73,6 +73,10 @@
> Â#define F_SETSIG Â Â Â 10 Â Â Â/* for sockets. */
> Â#define F_GETSIG Â Â Â 11 Â Â Â/* for sockets. */
> Â#endif
> +#ifndef F_SETOWN_TID
> +#define F_SETOWN_TID Â 12
> +#define F_GETOWN_TID Â 13
> +#endif
>
> Â/* for F_[GET|SET]FL */
> Â#define FD_CLOEXEC Â Â 1 Â Â Â /* actually anything with low bit set goes */
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 0872372..42697e7 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -872,6 +872,7 @@ struct fown_struct {
> Â Â Â Ârwlock_t lock; Â Â Â Â Â/* protects pid, uid, euid fields */
> Â Â Â Âstruct pid *pid; Â Â Â Â/* pid or -pgrp where SIGIO should be sent */
> Â Â Â Âenum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
> + Â Â Â bool task_only; Â Â Â Â /* task or group signal */
> Â Â Â Âuid_t uid, euid; Â Â Â Â/* uid/euid of process setting the owner */
> Â Â Â Âint signum; Â Â Â Â Â Â /* posix.1b rt signal to be delivered on IO */
> Â};
> @@ -1291,8 +1292,14 @@ extern void kill_fasync(struct fasync_struct **, int, int);
> Â/* only for net: no internal synchronization */
> Âextern void __kill_fasync(struct fasync_struct *, int, int);
>
> -extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
> -extern int f_setown(struct file *filp, unsigned long arg, int force);
> +/*
> + * setown flags
> + */
> +#define FF_SETOWN_FORCE Â Â Â Â Â Â Â Â1
> +#define FF_SETOWN_TID Â Â Â Â Â2
> +
> +extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int flags);
> +extern int f_setown(struct file *filp, unsigned long arg, int flags);
> Âextern void f_delown(struct file *filp);
> Âextern pid_t f_getown(struct file *filp);
> Âextern int send_sigurg(struct fown_struct *fown);
> diff --git a/net/socket.c b/net/socket.c
> index 791d71a..ac57f8e 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -916,7 +916,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
> Â Â Â Â Â Â Â Â Â Â Â Âerr = -EFAULT;
> Â Â Â Â Â Â Â Â Â Â Â Âif (get_user(pid, (int __user *)argp))
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âbreak;
> - Â Â Â Â Â Â Â Â Â Â Â err = f_setown(sock->file, pid, 1);
> + Â Â Â Â Â Â Â Â Â Â Â err = f_setown(sock->file, pid, FF_SETOWN_FORCE);
> Â Â Â Â Â Â Â Â Â Â Â Âbreak;
> Â Â Â Â Â Â Â Âcase FIOGETOWN:
> Â Â Â Â Â Â Â Âcase SIOCGPGRP:
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/