Re: [RESEND PATCH] futex: fix key reference counter in case of requeue.

From: Matthieu FertrÃ
Date: Mon Oct 18 2010 - 08:51:21 EST


Hi Darren,

Le 15/10/2010 21:13, Darren Hart a Ãcrit :
> On 10/14/2010 04:30 AM, Louis Rilling wrote:
>> From: Matthieu FertrÃ<matthieu.fertre@xxxxxxxxxxx>
>
> Hi Matthew,
>
>>
>> This patch ensures that we are referring to the right key when dropping
>> reference for the futex_wait operation.
>>
>> The following scenario explains a typical case where the bug was
>> happening:
>>
>> Process P calls futex_wait() on futex identified by 'key1'. 2 references
>> are taken on this key: one for the struct futex_q itself, and one for the
>> futex_wait operation.
>> If now, process P is requeued on a futex identified by 'key2', its
>> futex_q->key is updated from 'key1' to 'key2' and a reference is got
>> to 'key2' and one is dropped to 'key1'.
>> Later, another process calls futex_wake(): it gets a reference to
>> 'key2', wakes process P, and drops reference to 'key2'.
>> Once process P is woken up, it should unqueue, drop reference to 'key2'
>> (the one referring to the futex_q, this is done in unqueue_me())
>> and to 'key1' (the one referring to futex_wait operation). Without this
>> patch it drops reference to 'key2' instead of 'key1'.
>
> Nice catch. How did this manifest itself? Did you catch it just by code
> inspection?


I found it while testing the distributed implementation of futex in
Kerrighed (www.kerrighed.org).

After deeply looking, I noticed that the bug comes from vanilla linux
kernel 2.6.30 (the one on which current version of Kerrighed is based).
Then I checked if the bug still existed in latest linux rc or if there
were some bugfixes.

I have attached the test that reveals the bug on my system. The test
runs some basic wait/wake/requeue scenario on futex "hosted" in a sysv
shared memory segment. It is composed of one executable and 2 scripts
that are to be used with LTP. To run it without LPT, you can replace
calls to tst_resm/tst_brkm with echo in the shell scripts.

Without debugging facilities, it may BUG while destroying the shared
segment. As far as I remember, with some kernel hacking features
enabled, it was complaining in the kernel log, but there was no crash
and I don't remember exactly about what it complains.

>
> I've been trying to develop a futex test suite to catch issues with the
> futex implementation, as well as to test any changes made to avoid
> regressions. Mind having a look?
>
> http://git.kernel.org/?p=linux/kernel/git/dvhart/futextest.git;a=summary

I had already a git checkout for this futex test suite :)
It was not fitting to my tests since I was checking behavior of
distributed futex inside Kerrighed. (I need test done by separate
processes spreaded on different nodes accessing the futex through sysv
shared segments).

Regards,

Matthieu

>
>> Signed-off-by: Matthieu FertrÃ<matthieu.fertre@xxxxxxxxxxx>
>> Signed-off-by: Louis Rilling<louis.rilling@xxxxxxxxxxx>
>> ---
>> kernel/futex.c | 8 ++++++--
>> 1 files changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/kernel/futex.c b/kernel/futex.c
>> index 6a3a5fa..bed6717 100644
>> --- a/kernel/futex.c
>> +++ b/kernel/futex.c
>> @@ -1791,6 +1791,7 @@ static int futex_wait(u32 __user *uaddr, int
>> fshared,
>> struct restart_block *restart;
>> struct futex_hash_bucket *hb;
>> struct futex_q q;
>> + union futex_key key;
>
> We should be able to do this properly without requiring an additional
> key variable. I think tglx has proposed a suitable fix - but it needs
> testing to avoid any subtle regressions.
>

/*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@xxxxxxxxxx)
*
* Copyright (C) 2010 Kerlabs - Matthieu Fertré
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/mman.h>
#include <asm/unistd.h>
#include <linux/futex.h>
#include <sys/time.h>

static inline int futex(int *uaddr, int op, int val,
const struct timespec *utime, int *uaddr2, int val3)
{
return syscall(__NR_futex, uaddr, op, val, utime, uaddr2, val3);
}

#define SYSERROR(X, Y) \
do { \
if ((long)(X) == -1L) { \
perror(Y); \
exit(EXIT_FAILURE); \
} \
} while(0)

int shmkey = 23;
int shmkey2 = 24;
int nr_wake = 1;
int nr_requeue = 1;
int bitset = 0;
int quiet = 0;
struct timespec utime;

void print_usage(const char* cmd)
{
printf("%s -h: show this help\n", cmd);
}

void dowait(void)
{
int shmid, ret, *f, n;
struct timespec *timeout = NULL;

shmid = shmget(shmkey, 4, IPC_CREAT|0666);
SYSERROR(shmid, "shmget");

f = shmat(shmid, NULL, 0);
SYSERROR(f, "shmat");

n = *f;

if (utime.tv_sec)
timeout = &utime;

if (bitset) {
if (!quiet)
printf("WAIT_BITSET: %p{%x} bits: %x\n", f, n, bitset);
ret = futex(f, FUTEX_WAIT_BITSET, n, timeout, NULL, bitset);
} else {
if (!quiet)
printf("WAIT: %p{%x}\n", f, n);
ret = futex(f, FUTEX_WAIT, n, timeout, NULL, 0);
}
SYSERROR(ret, "futex_wait");
if (!quiet)
printf("WAITED: %d\n", ret);

ret = shmdt(f);
SYSERROR(ret, "shmdt");
}

int dowake(void)
{
int shmid, ret, *f, nr_proc;

shmid = shmget(shmkey, 4, IPC_CREAT|0666);
SYSERROR(shmid, "shmget");

f = shmat(shmid, NULL, 0);
SYSERROR(f, "shmat");

(*f)++;

if (bitset) {
if (!quiet)
printf("WAKE_BITSET: %p{%x} bits: %x\n", f, *f, bitset);
ret = futex(f, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset);
} else {
if (!quiet)
printf("WAKE: %p{%x}\n", f, *f);
ret = futex(f, FUTEX_WAKE, nr_wake, NULL, NULL, 0);
}

SYSERROR(ret, "futex_wake");
if (!quiet)
printf("WOKE: %d\n", ret);
nr_proc = ret;

ret = shmdt(f);
SYSERROR(ret, "shmdt");

if (!ret)
ret = nr_proc;

return ret;
}

int dorequeue(void)
{
int shmid1, shmid2, ret, *f1, *f2, nr_proc;

shmid1 = shmget(shmkey, 4, IPC_CREAT|0666);
SYSERROR(shmid1, "shmget");

shmid2 = shmget(shmkey2, 4, IPC_CREAT|0666);
SYSERROR(shmid2, "shmget");

f1 = shmat(shmid1, NULL, 0);
SYSERROR(f1, "shmat");

f2 = shmat(shmid2, NULL, 0);
SYSERROR(f2, "shmat");

/* requeue */
ret = futex(f1, FUTEX_REQUEUE, nr_wake,
(const struct timespec *) (long) nr_requeue, f2, 0);
SYSERROR(ret, "futex_requeue");

if (!quiet)
printf("WOKE or REQUEUED: %d\n", ret);
nr_proc = ret;

/* detaching shms */
ret = shmdt(f1);
SYSERROR(ret, "shmdt");

ret = shmdt(f2);
SYSERROR(ret, "shmdt");

if (!ret)
ret = nr_proc;

return ret;
}

void badfutex(void)
{
int *x;
int ret;

x = mmap(NULL, 16384, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
SYSERROR(x, "mmap");

ret = futex(x, FUTEX_WAIT, 0, NULL, NULL, 0);
SYSERROR(ret, "futex");
}

void deletekey(void)
{
int shmid, ret;

shmid = shmget(shmkey, 4, 0666);
SYSERROR(shmid, "shmget");

ret = shmctl(shmid, IPC_RMID, NULL);
SYSERROR(ret, "shmctl(IPC_RMID)");

if (!quiet)
printf("SHM %d (id=%d) DELETED\n", shmkey, shmid);
}

void parse_args(int argc, char *argv[])
{
int c;

utime.tv_sec = 0;
utime.tv_nsec = 0;

while (1) {
c = getopt(argc, argv, "hqb:k:K:r:t:w:");
if (c == -1)
break;

switch (c) {
case 'h':
print_usage(argv[0]);
exit(EXIT_SUCCESS);
break;
case 'q':
quiet=1;
break;

case 'b':
bitset = atoi(optarg);
break;
case 'k':
shmkey = atoi(optarg);
break;
case 'K':
shmkey2 = atoi(optarg);
break;
case 'r':
nr_requeue = atoi(optarg);
break;
case 't':
utime.tv_sec = atoi(optarg);
break;
case 'w':
nr_wake = atoi(optarg);
break;
}
}
}

int main(int argc, char **argv)
{
char *action;
int ret = 0;

parse_args(argc, argv);

if (argc - optind == 0) {
print_usage(argv[0]);
exit(EXIT_FAILURE);
}

action = argv[optind];

if (!quiet)
printf("Command: %s\n", action);

if (strcmp(action, "badfutex") == 0)
badfutex();

else if (strcmp(action, "wait") == 0)
dowait();

else if (strcmp(action, "wake") == 0)
ret = dowake();

else if (strcmp(action, "requeue") == 0)
ret = dorequeue();

else if (strcmp(action, "delete") == 0)
deletekey();

else {
fprintf(stderr, "Unknown command\n");
print_usage(argv[0]);
exit(EXIT_FAILURE);
}

exit(ret);
}

Attachment: futex_shm01.sh
Description: application/shellscript

Attachment: lib_futex.sh
Description: application/shellscript