Re: [PATCH 6/6] Btrfs: do aio_write instead of write

From: liubo
Date: Wed May 26 2010 - 23:10:50 EST


On 05/27/2010 11:06 AM, liubo wrote:
> On 05/22/2010 01:03 AM, Josef Bacik wrote:
>
>> In order for AIO to work, we need to implement aio_write. This patch converts
>> our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and
>> nothing broke, and the AIO stuff magically started working. Thanks,
>>
>> Signed-off-by: Josef Bacik <josef@xxxxxxxxxx>
>>
>>
>
> Hi, Josef,
>
> I've tested your patch(May 22) with my tools, and one case triggered a bug
> which made writev operation hang up, more information is followed.
>
> - Steps to trigger it:
> # mount /dev/sda8 /home/btrfsdisk -o nodatacow
> # gcc direct-io.c -o direct-io
> # ./direct-io O_DIRECT writev /home/btrfsdisk/testrw 4M
>
> then on another tty, after "dmesg"...
>
> [snip]
> device fsid f44b0879c75c0e99-1d4b28f2d5c503ae devid 1 transid 11177
> /dev/sda8
> btrfs: setting nodatacow
> INFO: task direct-io:1399 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> direct-io D 0000000000000003 0 1399 1341 0x00000000
> ffff880137c379c8 0000000000000082 ffff880137c379d8 ffffffff00000000
> ffff880137c37fd8 ffff880139730000 0000000000015440 ffff880137c37fd8
> 0000000000015440 0000000000015440 0000000000015440 0000000000015440
> Call Trace:
> [<ffffffffa0119d4a>] wait_extent_bit+0xe3/0x163 [btrfs]
> [<ffffffff8106651f>] ? autoremove_wake_function+0x0/0x39
> [<ffffffffa0119e47>] lock_extent_bits+0x7d/0xa8 [btrfs]
> [<ffffffffa0119e88>] lock_extent+0x16/0x18 [btrfs]
> [<ffffffffa01025ce>] btrfs_direct_IO+0x8e/0x1be [btrfs]
> [<ffffffff810c7301>] generic_file_direct_write+0xed/0x16d
> [<ffffffffa010bb91>] btrfs_file_aio_write+0x2af/0x8d2 [btrfs]
> [<ffffffff81100eae>] ? try_get_mem_cgroup_from_mm+0x39/0x49
> [<ffffffffa010b8e2>] ? btrfs_file_aio_write+0x0/0x8d2 [btrfs]
> [<ffffffff811063ed>] do_sync_readv_writev+0xc1/0x100
> [<ffffffff81106120>] ? might_fault+0x21/0x23
> [<ffffffff81106151>] ? copy_from_user+0x2f/0x31
> [<ffffffff811c90ab>] ? security_file_permission+0x16/0x18
> [<ffffffff81107145>] do_readv_writev+0xa7/0x127
> [<ffffffff81107208>] vfs_writev+0x43/0x4e
> [<ffffffff811072f8>] sys_writev+0x4a/0x93
> [<ffffffff81009c32>] system_call_fastpath+0x16/0x1b
>
>
> So, can you figure out if anything in your patch leads to the bug?
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
>

Sorry, I forgot the attachment...

Plz get it.

Thanks,

- Liubo

/******************************************************************************/
/* */
/* Copyright (c) 2010 FUJITSU LIMITED */
/* */
/* This program is free software; you can redistribute it and/or modify */
/* it under the terms of the GNU General Public License as published by */
/* the Free Software Foundation; either version 2 of the License, or */
/* (at your option) any later version. */
/* */
/* This program is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */
/* the GNU General Public License for more details. */
/* */
/* You should have received a copy of the GNU General Public License */
/* along with this program; if not, write to the Free Software */
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* */
/* Author: Liu Bo <liubo2009@xxxxxxxxxxxxxx> */
/* */
/******************************************************************************/


#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/uio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <string.h>
#include <malloc.h>

#define BUFFERSIZE 8192

#ifndef O_DIRECT
#define O_DIRECT 00040000 /* direct disk access hint */
#endif

static int rw_test(char *type, int fd, int nullfd, int zerofd, char *size);


/* Direct-io Test
*
* Return Value:
* 0: succeed
* 1: fail
*
* Parse command line arguments
* flag:
* O_DIRECT, O_DIRECT | O_SYNC
* rwtype:
* read, readv, write, writev
* test file name:
* testrw
* test file size:
* 0 | 1K | 4K | 4M | 4G | 4G+1
*/
int main(int argc, char *argv[])
{
char *rwtype;
char *testfile;
char *size;
int fd, nullfd, zerofd;
int flag = O_RDWR;
int ret;

if (--argc != 4) {
fprintf(stderr, "4 arguments are needed!\n");
exit(1);
}

if (!strcmp(argv[1], "O_DIRECT"))
flag |= O_DIRECT;
else if (!strcmp(argv[1], "O_DIRECT | O_SYNC") ||
!strcmp(argv[1], "O_DIRECT|O_SYNC"))
flag |= O_DIRECT | O_SYNC;

rwtype = argv[2];
testfile = argv[3];
size = argv[4];

if ((fd = open(testfile, flag)) < 0) {
fprintf(stderr, "Cannot open the test file! \
: %s\n\n", strerror(errno));
exit(1);
}
if ((nullfd = open("/dev/null", O_WRONLY)) < 0) {
fprintf(stderr, "Cannot open the test file! \
: %s\n\n", strerror(errno));
exit(1);
}
if ((zerofd = open("/dev/zero", O_RDONLY)) < 0) {
fprintf(stderr, "Cannot open the test file! \
: %s\n\n", strerror(errno));
exit(1);
}

ret = rw_test(rwtype, fd, nullfd, zerofd, size);

close(fd);
close(nullfd);
close(zerofd);

if (ret != 0)
return 1;
return 0;
}

/*
echo "=========================" >> $RESULTFILE
* rw_test is used to test read/write/readv/writev.
*
* Arguments:
* type: read/write/readv/writev
* fd: read/write file descripter
* nullfd: write file descripter binded to /dev/null
* zerofd: read file descripter binded to /dev/zero
* size: 0 | 1K | 4K | 4M | 4G | 4G+1
*
* Return value:
* 0: succeed
* 1: write error
* -1: read error
*/
static int rw_test(char *type, int fd, int nullfd, int zerofd, char *size)
{
ssize_t n;
int iovcnt;
long limit = 0;

if(!strcmp(size, "1K")) {
limit = 1024;
} else if (!strcmp(size, "4K")) {
limit = 1024 * 4;
} else if (!strcmp(size, "4M")) {
limit = 1024 * 1024 * 4;
} else if (!strcmp(size, "4G")) {
limit = 1024 * 1024 * 4;
limit *= 1024;
} else if (!strcmp(size, "4G+1")) {
limit = 1024 * 1024 * 4;
limit *= 1024;
limit += 1;
}

if (!strcmp(type, "read")) {
char *buf;

buf = valloc(BUFFERSIZE);
if (!buf) {
perror("valloc error");
exit(1);
}
memset(buf, 0, BUFFERSIZE);

while ((n = read(fd, buf, BUFFERSIZE)) > 0) {
if (write(nullfd, buf, n) != n) {
fprintf(stderr, "write error! :\
%s\n", strerror(errno));
return 1;
}
}

if (n < 0) {
fprintf(stderr, "read error : %s\n\n", \
strerror(errno));
return -1;
}

} else if (!strcmp(type, "write")) {
char *buf;
long count = 0;

buf = valloc(BUFFERSIZE);
if (!buf) {
perror("valloc error");
exit(1);
}
memset(buf, 0, BUFFERSIZE);

while ((n = read(zerofd, buf, BUFFERSIZE)) > 0) {
if (write(fd, buf, n) != n) {
fprintf(stderr, "write error! :\
%s\n", strerror(errno));
return 1;
}
count += n;
if (count >= limit)
break;
}

if (count < limit) {
fprintf(stderr, "read error : %s\n\n", \
strerror(errno));
return -1;
}

} else if (!strcmp(type, "readv")) {
iovcnt = 2;
char *buf[iovcnt];
struct iovec iov[iovcnt];
int i;

for (i = 0; i < iovcnt; i++) {
buf[i] = valloc(BUFFERSIZE);
if (!buf[i]) {
perror("valloc error");
exit(1);
}

memset(buf[i], 0, BUFFERSIZE);

iov[i].iov_base = buf[i];
iov[i].iov_len = BUFFERSIZE;
}

while ((n = readv(fd, iov, iovcnt)) > 0) {
if (writev(nullfd, iov, iovcnt) !=
(iovcnt * BUFFERSIZE)) {
fprintf(stderr, "writev error! :\
%s\n", strerror(errno));
return 1;
}
}

if (n < 0) {
fprintf(stderr, "readv error : %s\n\n", \
strerror(errno));
return -1;
}

} else if (!strcmp(type, "writev")) {
iovcnt = 2;
char *buf[iovcnt];
struct iovec iov[iovcnt];
long count = 0;
int i;

for (i = 0; i < iovcnt; i++) {
buf[i] = valloc(BUFFERSIZE);
if (!buf[i]) {
perror("valloc error");
exit(1);
}

memset(buf[i], 0, BUFFERSIZE);

iov[i].iov_base = buf[i];
iov[i].iov_len = BUFFERSIZE;
}

while ((n = readv(zerofd, iov, iovcnt)) > 0) {
if (writev(fd, iov, iovcnt) !=
(iovcnt * BUFFERSIZE)) {
fprintf(stderr, "writev error! :\
%s\n", strerror(errno));
return 1;
}
count += n;
if (count >= limit)
break;
}

if (count < limit) {
fprintf(stderr, "readv error : %s\n\n", \
strerror(errno));
return -1;
}

}

return 0;
}