Re: [PATCH] fs: sync: fixed performance regression

From: Paul Taysom
Date: Wed Jul 10 2013 - 19:45:49 EST


On Wed, Jul 10, 2013 at 4:12 PM, Paul Taysom <taysom@xxxxxxxxxxxx> wrote:
> The following commit introduced a 10x regression for
> syncing inodes in ext4 with relatime enabled where just
> the atime had been modified.
>
> commit 4ea425b63a3dfeb7707fc7cc7161c11a51e871ed
> Author: Jan Kara <jack@xxxxxxx>
> Date: Tue Jul 3 16:45:34 2012 +0200
> vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes
>
> See also: http://www.kernelhub.org/?msg=93100&p=2
>
> Fixed by putting back in the call to writeback_inodes_sb.
>
> I'll attach the test in a reply to this e-mail.
>
> The test starts by creating 512 files, syncing, reading one byte
> from each of those files, syncing, and then deleting each file
> and syncing. The time to do each sync is printed. The process
> is then repeated for 1024 files and then the next power of
> two up to 262144 files.
>
> Note, when running the test, the slow down doesn't always happen
> but most of the tests will show a slow down.
>
> In response to crbug.com/240422
>
> Signed-off-by: Paul Taysom <taysom@xxxxxxxxxxxx>
> ---
> fs/sync.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/fs/sync.c b/fs/sync.c
> index 905f3f6..55c3316 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -73,6 +73,12 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg)
> sync_inodes_sb(sb);
> }
>
> +static void writeback_inodes_one_sb(struct super_block *sb, void *arg)
> +{
> + if (!(sb->s_flags & MS_RDONLY))
> + writeback_inodes_sb(sb, WB_REASON_SYNC);
> +}
> +
> static void sync_fs_one_sb(struct super_block *sb, void *arg)
> {
> if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs)
> @@ -104,6 +110,7 @@ SYSCALL_DEFINE0(sync)
> int nowait = 0, wait = 1;
>
> wakeup_flusher_threads(0, WB_REASON_SYNC);
> + iterate_supers(writeback_inodes_one_sb, NULL);
> iterate_supers(sync_inodes_one_sb, NULL);
> iterate_supers(sync_fs_one_sb, &nowait);
> iterate_supers(sync_fs_one_sb, &wait);
> --
> 1.8.3
>
Try this again but in plaintext mode. Attaching test results and test
program. Tests were run on a Pixel x86 with SSD storage.
/*
* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/

/*
* Does time test of sync for creating, reading and deleting files.
*
* To compile:
* cc syncperf.c -rlt -o syncperf
*
* To run, cd to a directory on volume where test should run.
* syncperf
*
* The syncperf will create the directory "synctestdir" and do
* several test runs creating twice as many files each time.
*
* The test prints the time to sync after creating the files,
* after reading the files and after deleting the files.
*
* There will be some runs, where the read sync time is
* fast event on systems that exhibit the problem.
*
* When the tests finishes, it cleans up "synctestdir".
*/

#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>

enum { MAX_NAME = 12,
FILE_SIZE = 1 << 14,
BYTES_TO_READ = 1,
NUM_START_FILES = 1 << 9,
NUM_FILES = 1 << 18 };

struct file {
char name[MAX_NAME];
};

struct file *File;

static inline uint64_t nsecs(void)
{
struct timespec t;

clock_gettime(CLOCK_REALTIME, &t);
return (uint64_t)t.tv_sec * 1000000000ULL + t.tv_nsec;
}

static void fill(uint8_t *buf, int n)
{
int i;

for (i = 0; i < n; i++)
buf[i] = rand();
}

static void createfiles(int num_files)
{
uint8_t buf[FILE_SIZE];
int fd;
int i;

fill(buf, sizeof(buf));
for (i = 0; i < num_files; i++) {
snprintf(File[i].name, MAX_NAME, "f%d", i);
fd = creat(File[i].name, 0600);
if (write(fd, buf, sizeof(buf)) == -1)
perror("write");
close(fd);
}
}

static void unlinkfiles(int num_files)
{
int i;

for (i = 0; i < num_files; i++)
unlink(File[i].name);
}

static void readfiles(int num_files)
{
uint8_t buf[BYTES_TO_READ];
int fd;
int i;

for (i = 0; i < num_files; i++) {
fd = open(File[i].name, O_RDONLY);
if (read(fd, buf, BYTES_TO_READ) == -1)
perror("read");
close(fd);
}
}

static void time_sync(const char *label, int n)
{
uint64_t start;
uint64_t finish;

start = nsecs();
sync();
finish = nsecs();
printf("%10s %8d. %10.2f ms\n",
label, n, (double)(finish - start)/1000000.0);
}

void crsyncdel(int n)
{
createfiles(n);
time_sync("create", n);
readfiles(n);
time_sync("read", n);
unlinkfiles(n);
time_sync("unlink", n);
}

static void cleanup(const char *dir)
{
char cmd[1024];

if (chdir("..") == -1)
perror(dir);
snprintf(cmd, sizeof(cmd), "rm -fr %s", dir);
if (system(cmd) == -1)
perror(cmd);
}

static void setup(const char *dir)
{
mkdir(dir, 0700);
if (chdir(dir) == -1)
perror(dir);
sync();
File = malloc(sizeof(*File) * NUM_FILES);
}

int main(int argc, char *argv[])
{
char *dir = "synctestdir";
int i;

setup(dir);
/*
* Number of files grows by powers of two until the
* specified number of files is reached.
* Start with a large enough number to skip noise.
*/
for (i = NUM_START_FILES; i <= NUM_FILES; i <<= 1)
crsyncdel(i);
cleanup(dir);
return 0;
}

Attachment: syncperf.results
Description: Binary data