Re: [PATCH 3/2 -v4] fcntl: F_[SG]ETOWN_EX

From: stephane eranian
Date: Fri Aug 07 2009 - 08:10:58 EST


HI,

On Thu, Aug 6, 2009 at 9:05 PM, Oleg Nesterov<oleg@xxxxxxxxxx> wrote:
> On 08/06, Peter Zijlstra wrote:
>>
>> Subject: fcntl: F_[SG]ETOWN_EX
>> From: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
>> Date: Fri, 31 Jul 2009 10:35:30 +0200
>>
>> In order to direct the SIGIO signal to a particular thread of a
>> multi-threaded application we cannot, like suggested by the manpage, put
>> a TID into the regular fcntl(F_SETOWN) call. It will still be send to
>> the whole process of which that thread is part.
>>
>> Since people do want to properly direct SIGIO we introduce F_SETOWN_EX.
>>
>> The need to direct SIGIO comes from self-monitoring profiling such as
>> with perf-counters. Perf-counters uses SIGIO to notify that new sample
>> data is available. If the signal is delivered to the same task that
>> generated the new sample it can augment that data by inspecting the
>> task's user-space state right after it returns from the kernel. This
>> is esp. convenient for interpreted or virtual machine driven
>> environments.
>>
>> Both F_SETOWN_EX and F_GETOWN_EX take a pointer to a struct f_owner_ex
>> as argument:
>>
>> struct f_owner_ex {
>>    int  type;
>> Â Â Â pid_t pid;
>> };
>>
>> Where type is one of F_OWNER_TID, F_OWNER_PID or F_OWNER_GID.
>
> I think the patch is right.
>
> Reviewed-by: Oleg Nesterov <oleg@xxxxxxxxxx>
>

I have tested the patch in 2.6.30 (backport) + perfmon and it seems to
work in my test case.
Have not tried with perfcounters + 2.6.31.
I am glad there is finally a solution to this problem.
Thanks.

>> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
>> ---
>> Âarch/alpha/include/asm/fcntl.h Â| Â Â2
>> Âarch/parisc/include/asm/fcntl.h | Â Â2
>> Âfs/fcntl.c           Â| Â108 +++++++++++++++++++++++++++++++++++++---
>> Âinclude/asm-generic/fcntl.h   |  13 ++++
>> Â4 files changed, 117 insertions(+), 8 deletions(-)
>>
>> Index: linux-2.6/arch/parisc/include/asm/fcntl.h
>> ===================================================================
>> --- linux-2.6.orig/arch/parisc/include/asm/fcntl.h
>> +++ linux-2.6/arch/parisc/include/asm/fcntl.h
>> @@ -28,6 +28,8 @@
>> Â#define F_SETOWN Â Â 12 Â Â Â/* Âfor sockets. */
>> Â#define F_SETSIG Â Â 13 Â Â Â/* Âfor sockets. */
>> Â#define F_GETSIG Â Â 14 Â Â Â/* Âfor sockets. */
>> +#define F_GETOWN_EX Â15
>> +#define F_SETOWN_EX Â16
>>
>> Â/* for posix fcntl() and lockf() */
>> Â#define F_RDLCK Â Â Â Â Â Â Â01
>> Index: linux-2.6/fs/fcntl.c
>> ===================================================================
>> --- linux-2.6.orig/fs/fcntl.c
>> +++ linux-2.6/fs/fcntl.c
>> @@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
>> Â Â Â return pid;
>> Â}
>>
>> +static int f_setown_ex(struct file *filp, unsigned long arg)
>> +{
>> + Â Â struct f_owner_ex * __user owner_p = (void * __user)arg;
>> + Â Â struct f_owner_ex owner;
>> + Â Â struct pid *pid;
>> + Â Â int type;
>> + Â Â int ret;
>> +
>> + Â Â ret = copy_from_user(&owner, owner_p, sizeof(owner));
>> + Â Â if (ret)
>> + Â Â Â Â Â Â return ret;
>> +
>> + Â Â switch (owner.type) {
>> + Â Â case F_OWNER_TID:
>> + Â Â Â Â Â Â type = PIDTYPE_MAX;
>> + Â Â Â Â Â Â break;
>> +
>> + Â Â case F_OWNER_PID:
>> + Â Â Â Â Â Â type = PIDTYPE_PID;
>> + Â Â Â Â Â Â break;
>> +
>> + Â Â case F_OWNER_GID:
>> + Â Â Â Â Â Â type = PIDTYPE_PGID;
>> + Â Â Â Â Â Â break;
>> +
>> + Â Â default:
>> + Â Â Â Â Â Â return -EINVAL;
>> + Â Â }
>> +
>> + Â Â rcu_read_lock();
>> + Â Â pid = find_vpid(owner.pid);
>> + Â Â if (owner.pid && !pid)
>> + Â Â Â Â Â Â ret = -ESRCH;
>> + Â Â else
>> + Â Â Â Â Â Â ret = __f_setown(filp, pid, type, 1);
>> + Â Â rcu_read_unlock();
>> +
>> + Â Â return ret;
>> +}
>> +
>> +static int f_getown_ex(struct file *filp, unsigned long arg)
>> +{
>> + Â Â struct f_owner_ex * __user owner_p = (void * __user)arg;
>> + Â Â struct f_owner_ex owner;
>> + Â Â int ret = 0;
>> +
>> + Â Â read_lock(&filp->f_owner.lock);
>> + Â Â owner.pid = pid_vnr(filp->f_owner.pid);
>> + Â Â switch (filp->f_owner.pid_type) {
>> + Â Â case PIDTYPE_MAX:
>> + Â Â Â Â Â Â owner.type = F_OWNER_TID;
>> + Â Â Â Â Â Â break;
>> +
>> + Â Â case PIDTYPE_PID:
>> + Â Â Â Â Â Â owner.type = F_OWNER_PID;
>> + Â Â Â Â Â Â break;
>> +
>> + Â Â case PIDTYPE_PGID:
>> + Â Â Â Â Â Â owner.type = F_OWNER_GID;
>> + Â Â Â Â Â Â break;
>> +
>> + Â Â default:
>> + Â Â Â Â Â Â WARN_ON(1);
>> + Â Â Â Â Â Â ret = -EINVAL;
>> + Â Â Â Â Â Â break;
>> + Â Â }
>> + Â Â read_unlock(&filp->f_owner.lock);
>> +
>> + Â Â if (!ret)
>> + Â Â Â Â Â Â ret = copy_to_user(owner_p, &owner, sizeof(owner));
>> + Â Â return ret;
>> +}
>> +
>> Âstatic long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
>> Â Â Â Â Â Â Â struct file *filp)
>> Â{
>> @@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned in
>> Â Â Â case F_SETOWN:
>> Â Â Â Â Â Â Â err = f_setown(filp, arg, 1);
>> Â Â Â Â Â Â Â break;
>> + Â Â case F_GETOWN_EX:
>> + Â Â Â Â Â Â err = f_getown_ex(filp, arg);
>> + Â Â Â Â Â Â break;
>> + Â Â case F_SETOWN_EX:
>> + Â Â Â Â Â Â err = f_setown_ex(filp, arg);
>> + Â Â Â Â Â Â break;
>> Â Â Â case F_GETSIG:
>> Â Â Â Â Â Â Â err = filp->f_owner.signum;
>> Â Â Â Â Â Â Â break;
>> @@ -428,8 +507,7 @@ static inline int sigio_perm(struct task
>>
>> Âstatic void send_sigio_to_task(struct task_struct *p,
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âstruct fown_struct *fown,
>> - Â Â Â Â Â Â Â Â Â Â Â Â Â Âint fd,
>> - Â Â Â Â Â Â Â Â Â Â Â Â Â Âint reason)
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Âint fd, int reason, int group)
>> Â{
>> Â Â Â /*
>> Â Â Â Â* F_SETSIG can change ->signum lockless in parallel, make
>> @@ -461,11 +539,11 @@ static void send_sigio_to_task(struct ta
>> Â Â Â Â Â Â Â Â Â Â Â else
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â si.si_band = band_table[reason - POLL_IN];
>>            si.si_fd  Â= fd;
>> - Â Â Â Â Â Â Â Â Â Â if (!do_send_sig_info(signum, &si, p, true))
>> + Â Â Â Â Â Â Â Â Â Â if (!do_send_sig_info(signum, &si, p, group))
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
>> Â Â Â Â Â Â Â /* fall-through: fall back on the old plain SIGIO signal */
>> Â Â Â Â Â Â Â case 0:
>> - Â Â Â Â Â Â Â Â Â Â do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, true);
>> + Â Â Â Â Â Â Â Â Â Â do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
>> Â Â Â }
>> Â}
>>
>> @@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown
>> Â Â Â struct task_struct *p;
>> Â Â Â enum pid_type type;
>> Â Â Â struct pid *pid;
>> + Â Â int group = 1;
>>
>> Â Â Â read_lock(&fown->lock);
>> +
>> Â Â Â type = fown->pid_type;
>> + Â Â if (type == PIDTYPE_MAX) {
>> + Â Â Â Â Â Â group = 0;
>> + Â Â Â Â Â Â type = PIDTYPE_PID;
>> + Â Â }
>> +
>> Â Â Â pid = fown->pid;
>> Â Â Â if (!pid)
>> Â Â Â Â Â Â Â goto out_unlock_fown;
>>
>> Â Â Â read_lock(&tasklist_lock);
>> Â Â Â do_each_pid_task(pid, type, p) {
>> - Â Â Â Â Â Â send_sigio_to_task(p, fown, fd, band);
>> + Â Â Â Â Â Â send_sigio_to_task(p, fown, fd, band, group);
>> Â Â Â } while_each_pid_task(pid, type, p);
>> Â Â Â read_unlock(&tasklist_lock);
>> Â out_unlock_fown:
>> @@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown
>> Â}
>>
>> Âstatic void send_sigurg_to_task(struct task_struct *p,
>> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âstruct fown_struct *fown)
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct fown_struct *fown, int group)
>> Â{
>> Â Â Â if (sigio_perm(p, fown, SIGURG))
>> - Â Â Â Â Â Â group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
>> + Â Â Â Â Â Â do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
>> Â}
>>
>> Âint send_sigurg(struct fown_struct *fown)
>> @@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown
>> Â Â Â struct task_struct *p;
>> Â Â Â enum pid_type type;
>> Â Â Â struct pid *pid;
>> + Â Â int group = 1;
>> Â Â Â int ret = 0;
>>
>> Â Â Â read_lock(&fown->lock);
>> +
>> Â Â Â type = fown->pid_type;
>> + Â Â if (type == PIDTYPE_MAX) {
>> + Â Â Â Â Â Â group = 0;
>> + Â Â Â Â Â Â type = PIDTYPE_PID;
>> + Â Â }
>> +
>> Â Â Â pid = fown->pid;
>> Â Â Â if (!pid)
>> Â Â Â Â Â Â Â goto out_unlock_fown;
>> @@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown
>>
>> Â Â Â read_lock(&tasklist_lock);
>> Â Â Â do_each_pid_task(pid, type, p) {
>> - Â Â Â Â Â Â send_sigurg_to_task(p, fown);
>> + Â Â Â Â Â Â send_sigurg_to_task(p, fown, group);
>> Â Â Â } while_each_pid_task(pid, type, p);
>> Â Â Â read_unlock(&tasklist_lock);
>> Â out_unlock_fown:
>> Index: linux-2.6/include/asm-generic/fcntl.h
>> ===================================================================
>> --- linux-2.6.orig/include/asm-generic/fcntl.h
>> +++ linux-2.6/include/asm-generic/fcntl.h
>> @@ -73,6 +73,19 @@
>> Â#define F_SETSIG Â Â 10 Â Â Â/* for sockets. */
>> Â#define F_GETSIG Â Â 11 Â Â Â/* for sockets. */
>> Â#endif
>> +#ifndef F_SETOWN_EX
>> +#define F_SETOWN_EX Â12
>> +#define F_GETOWN_EX Â13
>> +#endif
>> +
>> +#define F_OWNER_TID Â0
>> +#define F_OWNER_PID Â1
>> +#define F_OWNER_GID Â2
>> +
>> +struct f_owner_ex {
>> +   int   type;
>> +   pid_t  pid;
>> +};
>>
>> Â/* for F_[GET|SET]FL */
>> Â#define FD_CLOEXEC Â 1 Â Â Â /* actually anything with low bit set goes */
>> Index: linux-2.6/arch/alpha/include/asm/fcntl.h
>> ===================================================================
>> --- linux-2.6.orig/arch/alpha/include/asm/fcntl.h
>> +++ linux-2.6/arch/alpha/include/asm/fcntl.h
>> @@ -26,6 +26,8 @@
>> Â#define F_GETOWN Â Â 6 Â Â Â /* Âfor sockets. */
>> Â#define F_SETSIG Â Â 10 Â Â Â/* Âfor sockets. */
>> Â#define F_GETSIG Â Â 11 Â Â Â/* Âfor sockets. */
>> +#define F_SETOWN_EX Â12
>> +#define F_GETOWN_EX Â13
>>
>> Â/* for posix fcntl() and lockf() */
>> Â#define F_RDLCK Â Â Â Â Â Â Â1
>>
>>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/