Re: [PATCH] Add ext3 data=guarded mode

From: Jan Kara
Date: Mon Apr 20 2009 - 05:26:29 EST


On Mon 20-04-09 11:07:17, Mike Galbraith wrote:
> On Sun, 2009-04-19 at 11:54 +0530, Amit Shah wrote:
> > On (Sat) Apr 18 2009 [09:28:21], Mike Galbraith wrote:
>
> > > Probably because you're swapping heavily, and that is perturbing your
> >
> > The variance only affects the 4k test; the other times more or less
> > remain the same.
>
> My box disagrees.
>
> (bumps ulimits to test 4BG... OS+swap live on sdb btw)
>
> ./test-file-zero-alloc-speed 4 /dev/sdf3 /media/root ext3 rw,_netdev,noatime,data=foo,acl,user_xattr
>
> foo=guarded
> 4k 225 141 80 142 361
> 8k 74 96 362 78 84
> mm 55 57 57 57 57
>
> foo=writeback
> 4k 179 264 187 125 93
> 8k 94 161 73 334 84
> mm 57 58 57 56 57
>
> foo=ordered
> 4k 81 74 76 80 75
> 8k 77 76 224 79 79
> mm 59 56 60 58 59
>
> foo=journal
> 4k 95 297 69 83 420
> 8k 73 139 158 80 78
> mm 57 58 56 59 56
>
> ./test-file-zero-alloc-speed 2 /dev/sdf3 /media/root ext3 rw,_netdev,noatime,data=foo,acl,user_xattr
>
> foo=guarded
> 4k 28 27 27 28 28
> 8k 28 27 27 28 27
>
> foo=writeback
> 4k 27 27 27 27 27
> 8k 28 28 27 27 28
>
> All journal modes seem subject to bad throughput under heavy pressure,
> though data=ordered seems much less likely to suffer for some reason.
> Major difference _seems_ to be that write()+largefile induces very much
> swap activity.
My rough guess is that this depends on the VM writeout behavior. In
ordered mode, we forcibly writeout all the dirty data on a transaction
commit which happens every 5 seconds so they don't accumulate that much.
In other journaling modes we don't do that and decisions about writeout
(probably how much pdflush manages to write in background vs. how much
VM throttles the process to do the writeback itself) cause variances in
the run time. But this is just a guess. You could gather blktraces of
slow and fast runs and then look if the amount of IO done by different
processes significantly differs. If Chris has merged by improvements to
Seekwatcher, then you could nicely visualize this (hmm, that doesn't seem
to be the case so I'm attaching the diff and a helper script - see comments
in the beginning of the script and command helps for usage).

Honza
--
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR
--- seekwatcher.orig 2009-01-15 00:04:20.000000000 +0100
+++ seekwatcher 2009-01-26 12:04:21.000000000 +0100
@@ -56,6 +56,7 @@
from optparse import OptionParser

blktrace_only = False
+tags = { "": 0 }

try:
from matplotlib import rcParams
@@ -136,9 +137,9 @@
return 2.0
sys.stderr.write("unknown command %s\n" % com)

-def loaddata(fh,delimiter=None, converters=None):
+def loaddata(fh,delimiter=None, converters=None, filter=True):

- def iter(fh, delimiter, converters):
+ def iter(fh, delimiter, converters, filter):
global devices_sector_max

if converters is None: converters = {}
@@ -149,10 +150,20 @@
last_cmd = None
last_size = None
last_dev = None
+ last_tag = None
for i,line in enumerate(fh):
- if not line.startswith('C'):
+ if filter and not line.startswith('C'):
continue
- row = [converters.get(i,float)(val) for i,val in enumerate(line.split(delimiter))]
+ if not filter:
+ line = "C " + line
+ this_tag = 0
+ row = []
+ for i,val in enumerate(line.split(delimiter)):
+ if i < 9:
+ row.append(converters.get(i,float)(val))
+ else:
+ this_tag = tags.setdefault(val, len(tags))
+ row.append(this_tag)
this_time = row[7]
this_dev = row[8]
this_sector = row[4]
@@ -165,7 +176,7 @@
if (last_row and this_rw == last_rw and
this_dev == last_dev and
this_time - last_time < .5 and last_size < 512 and
- this_sector == last_end):
+ this_sector == last_end and this_tag == last_tag):
last_end += this_size
last_size += this_size
last_row[5] += row[5]
@@ -182,11 +193,12 @@
last_end = this_sector + this_size
last_size = this_size
last_dev = this_dev
+ last_tag = this_tag
if last_row:
for x in last_row:
yield x

- X = numpy.fromiter(iter(fh, delimiter, converters), dtype=float)
+ X = numpy.fromiter(iter(fh, delimiter, converters, filter), dtype=float)
return X

def sort_by_time(data):
@@ -367,7 +379,7 @@
os.remove(os.path.join(root, name))
os.rmdir(png_dir)

-def plot_data(ax, rw, data, style, label, alpha=1):
+def plot_data(ax, data, style, label, alpha=1):
def reduce_plot():
reduce = {}
skipped = 0
@@ -380,37 +392,48 @@
y += 1
h = reduce.setdefault(x, {})
h[y] = 1
+ yield rbs[i]
yield x * x_per_cell
yield y * y_per_cell
+ yield tg[i]
xcells = 325.0 * options.io_graph_cell_multi
x_per_cell = (xmax - xmin) / xcells
ycells = 80.0 * options.io_graph_cell_multi
y_per_cell = (yzoommax - yzoommin) / ycells

- if rw is None:
- if options.reads_only:
- rw = 0
- if options.writes_only:
- rw = 1
- if rw != None:
- if options.reads_only and rw != 0:
- return
- if options.writes_only and rw != 1:
- return
- rbs = data[:,1]
- data = data[numpy.where(rbs == rw)]
+ rbs = data[:,1]
+ if options.reads_only:
+ data = data[numpy.where(rbs == 0)]
+ if options.writes_only:
+ data = data[numpy.where(rbs == 1)]
+
+ if len(data) == 0:
+ return []
+
times = data[:,7]
sectors = data[:,4]
- if len(times) > 0:
- t = numpy.fromiter(reduce_plot(), dtype=float)
- t.shape = (len(t)/2, 2)
- xdata = t[:,0]
- ydata = t[:,1]
- lines = ax.plot(t[:,0], t[:,1], options.io_graph_dots, mew=0,
- ms=options.io_graph_marker_size,
- label=label, alpha=alpha)
- return lines
- return []
+ tg = data[:,9]
+ t = numpy.fromiter(reduce_plot(), dtype=float)
+ t.shape = (len(t)/4, 4)
+ lines = []
+ for tag in tags:
+ at = t[numpy.where(t[:,3] == tags[tag])]
+ if len(at) == 0:
+ continue
+ if not options.writes_only:
+ atr = at[numpy.where(at[:,0] == 0)]
+ lines.extend(ax.plot(atr[:,1], atr[:,2], style, mew=0,
+ ms=options.io_graph_marker_size,
+ alpha=alpha,
+ label=tag + " Reads " + label))
+ if not options.reads_only:
+ atr = at[numpy.where(at[:,0] == 1)]
+ lines.extend(ax.plot(atr[:,1], atr[:,2], style, mew=0,
+ ms=options.io_graph_marker_size,
+ alpha=alpha,
+ label=tag + " Writes " + label))
+ return lines
+

def add_roll(roll, max, num):
if len(roll) == max:
@@ -624,11 +647,11 @@
return data

def shapeit(X):
- lines = len(X) / 9
- X.shape = (lines, 9)
+ lines = len(X) / 10
+ X.shape = (lines, 10)

def unshapeit(X):
- lines = len(X) * 9
+ lines = len(X) * 10
X.shape = (lines, 1)

def getlabel(i):
@@ -692,12 +715,49 @@
def translate_sector(dev, sector):
return device_translate[dev] + sector;

+def process_input(input, type):
+ global devices_sector_max
+ global device_translate
+ global must_sort
+
+ devices_sector_max = {}
+ if type == 0:
+ run = run_blkparse(input, converters)
+ elif type == 1:
+ if input == "-":
+ p = sys.stdin
+ else:
+ p = open(input, 'r')
+ run = loaddata(p, converters=converters, filter=False)
+
+ device_translate = {}
+ total = 0
+ if len(devices_sector_max) > 1:
+ must_sort = True
+ for x in devices_sector_max:
+ device_translate[x] = total + devices_sector_max[x]
+ total += devices_sector_max[x]
+ shapeit(run)
+ if len(devices_sector_max) > 1:
+ for x in run:
+ sector = x[4]
+ dev = x[8]
+ x[4] = device_translate[dev] + sector
+
+ sorted = sort_by_time(run)
+ run = sorted
+
+ unshapeit(run)
+ return run
+
usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-d", "--device", help="Device for blktrace", default=[],
action="append")
parser.add_option("-t", "--trace", help="blktrace file", default=[],
action="append")
+parser.add_option("-f", "--file", help="parsed blktrace file", default=[],
+ action="append")
parser.add_option("-p", "--prog", help="exec program", default="")
parser.add_option("", "--full-trace", help="Don't filter blktrace events",
default=False, action="store_true")
@@ -710,7 +770,7 @@
parser.add_option("-o", "--output", help="output file", default="trace.png")
parser.add_option("-l", "--label", help="label", default=[],
action="append")
- parser.add_option("", "--dpi", help="dpi", default=120)
+ parser.add_option("", "--dpi", help="dpi", default=120, type="float")
parser.add_option("", "--io-graph-dots", help="Disk IO dot style",
default='s')
parser.add_option("", "--io-graph-marker-size", help="Disk IO dot size",
@@ -719,6 +779,8 @@
default=2, type="float")
parser.add_option("-I", "--no-io-graph", help="Don't create an IO graph",
default=False, action="store_true")
+ parser.add_option("", "--only-io-graph", help="Create only IO graph",
+ default=False, action="store_true");
parser.add_option("-r", "--rolling-avg",
help="Rolling average for seeks and throughput (in seconds)",
default=None)
@@ -761,7 +823,7 @@
rcParams['interactive'] = 'False'
from pylab import *

-if not options.trace:
+if not options.trace and not options.file:
parser.print_help()
sys.exit(1)

@@ -788,29 +850,16 @@
data = numpy.array([])
runs = []
must_sort = True
+devices_sector_max = {}
+device_translate = {}

for x in options.trace:
- devices_sector_max = {}
- run = run_blkparse(x, converters)
-
- device_translate = {}
- total = 0
- if len(devices_sector_max) > 1:
- must_sort = True
- for x in devices_sector_max:
- device_translate[x] = total + devices_sector_max[x]
- total += devices_sector_max[x]
- shapeit(run)
- if len(devices_sector_max) > 1:
- for x in run:
- sector = x[4]
- dev = x[8]
- x[4] = device_translate[dev] + sector
-
- sorted = sort_by_time(run)
- run = sorted
+ run = process_input(x, 0)
+ runs.append(run)
+ data = numpy.append(data, run)

- unshapeit(run)
+for x in options.file:
+ run = process_input(x, 1)
runs.append(run)
data = numpy.append(data, run)

@@ -910,6 +959,8 @@

if options.no_io_graph:
total_graphs = 2
+elif options.only_io_graph:
+ total_graphs = 1
else:
total_graphs = 3

@@ -922,76 +973,78 @@
if options.title:
options.title += "\n\n"

-# Throughput goes at the botoom
-a = subplot(total_graphs, 1, total_graphs)
-for i in xrange(len(runs)):
- label = getlabel(i)
- plot_throughput(a, None, runs[i], '-', label)
-
+# Prepare ticks
# make sure the final second goes on the x axes
ticks = list(arange(xmin, xmax, xmax/8))
ticks.append(xmax)
xticks = ticks
-a.set_xticks(ticks)
-a.set_yticklabels( [ "%d" % x for x in ticks ])
if ticks[-1] < 4:
xticklabels = [ "%.1f" % x for x in ticks ]
else:
xticklabels = [ "%d" % x for x in ticks ]
-a.set_xticklabels(xticklabels)

-# cut down the number of yticks to something more reasonable
-ticks = a.get_yticks()
-ticks = list(arange(0, ticks[-1] + ticks[-1]/4, ticks[-1]/4))
-a.set_yticks(ticks)
+if not options.only_io_graph:
+ # Throughput goes at the botoom
+ a = subplot(total_graphs, 1, total_graphs)
+ for i in xrange(len(runs)):
+ label = getlabel(i)
+ plot_throughput(a, None, runs[i], '-', label)

-if ticks[-1] < 4:
- a.set_yticklabels( [ "%.1f" % x for x in ticks ])
-else:
- a.set_yticklabels( [ "%d" % x for x in ticks ])
+ a.set_xticks(xticks)
+ a.set_yticklabels( [ "%d" % x for x in xticks ])
+ a.set_xticklabels(xticklabels)

-a.set_title('Throughput')
-a.set_ylabel('MB/s')
+ # cut down the number of yticks to something more reasonable
+ ticks = a.get_yticks()
+ ticks = list(arange(0, ticks[-1] + ticks[-1]/4, ticks[-1]/4))
+ a.set_yticks(ticks)

-# the bottom graph gets xticks, set it here
-a.set_xlabel('Time (seconds)')
-if options.label:
- a.legend(loc=(1.01, 0.5), shadow=True, pad=0.5, numpoints=2,
- handletextsep = 0.005,
- labelsep = 0.01,
- prop=FontProperties(size='x-small') )
-
-# next is the seek count graph
-a = subplot(total_graphs, 1, total_graphs - 1)
-for i in xrange(len(runs)):
- label = getlabel(i)
- plot_seek_count(a, None, runs[i], '-', label)
-
-# cut down the number of yticks to something more reasonable
-ticks = a.get_yticks()
-ticks = list(arange(0, ticks[-1] + ticks[-1]/4, ticks[-1]/4))
-a.set_yticks(ticks)
-a.set_yticklabels( [ str(int(x)) for x in ticks ])
+ if ticks[-1] < 4:
+ a.set_yticklabels( [ "%.1f" % x for x in ticks ])
+ else:
+ a.set_yticklabels( [ "%d" % x for x in ticks ])

-if options.no_io_graph and options.title:
- a.set_title(options.title + 'Seek Count')
-else:
- a.set_title('Seek Count')
+ a.set_title('Throughput')
+ a.set_ylabel('MB/s')
+
+ # the bottom graph gets xticks, set it here
+ a.set_xlabel('Time (seconds)')
+ if options.label:
+ a.legend(loc=(1.01, 0.5), shadow=True, pad=0.5, numpoints=2,
+ handletextsep = 0.005,
+ labelsep = 0.01,
+ prop=FontProperties(size='x-small') )

-a.set_ylabel('Seeks / sec')
-if options.label:
- a.legend(loc=(1.01, 0.5), shadow=True, pad=0.5, numpoints=2,
- handletextsep = 0.005,
- labelsep = 0.01,
- prop=FontProperties(size='x-small') )
+ # next is the seek count graph
+ a = subplot(total_graphs, 1, total_graphs - 1)
+ for i in xrange(len(runs)):
+ label = getlabel(i)
+ plot_seek_count(a, None, runs[i], '-', label)
+
+ # cut down the number of yticks to something more reasonable
+ ticks = a.get_yticks()
+ ticks = list(arange(0, ticks[-1] + ticks[-1]/4, ticks[-1]/4))
+ a.set_yticks(ticks)
+ a.set_yticklabels( [ str(int(x)) for x in ticks ])
+
+ if options.no_io_graph and options.title:
+ a.set_title(options.title + 'Seek Count')
+ else:
+ a.set_title('Seek Count')
+
+ a.set_ylabel('Seeks / sec')
+ if options.label:
+ a.legend(loc=(1.01, 0.5), shadow=True, pad=0.5, numpoints=2,
+ handletextsep = 0.005,
+ labelsep = 0.01,
+ prop=FontProperties(size='x-small') )

# and the optional IO graph
if not options.no_io_graph:
- a = subplot(total_graphs, 1, total_graphs - 2)
+ a = subplot(total_graphs, 1, 1)
for i in xrange(len(runs)):
label = getlabel(i)
- plot_data(a, 0, runs[i], options.io_graph_dots, label + " Read")
- plot_data(a, 1, runs[i], options.io_graph_dots, label + " Write")
+ plot_data(a, runs[i], options.io_graph_dots, label)

af = AnnoteFinder(axis=a)
connect('button_press_event', af)
@@ -1008,17 +1061,27 @@
ticks.append(yzoommax)
a.set_yticks(ticks)
a.set_yticklabels( [ str(int(x/2048)) for x in ticks ] )
- a.legend(loc=(1.01, 0.5), shadow=True, pad=0.3, numpoints=1,
- handletextsep = 0.005,
- labelsep = 0.01,
- markerscale = 1.1,
- prop=FontProperties(size='x-small') )
+ if not options.only_io_graph:
+ a.legend(loc=(1.01, 0.5), shadow=True, pad=0.3, numpoints=1,
+ handletextsep = 0.005,
+ labelsep = 0.01,
+ markerscale = 1.1,
+ prop=FontProperties(size='x-small') )
+ else:
+ a.legend(loc=(0,-0.25), ncol=4, columnspacing=0.1,
+ shadow=True, borderpad=0.3, numpoints=1,
+ handletextpad = 0.005,
+ labelspacing = 0.01,
+ markerscale = 1.1,
+ prop=FontProperties(size='x-small') )
+ subplots_adjust(bottom=0.2)
a.set_ylim(yzoommin, yzoommax)

-# squeeze the graphs over to the left a bit to make room for the
-# legends
-#
-subplots_adjust(right = 0.8, hspace=0.3)
+if not options.only_io_graph:
+ # squeeze the graphs over to the left a bit to make room for the
+ # legends
+ #
+ subplots_adjust(right = 0.8, hspace=0.3)

# finally, some global bits for each subplot
for x in xrange(1, total_graphs + 1):
#!/usr/bin/env python

# Copyright (C) 2009 Novell. All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License v2 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 021110-1307, USA.
#
# You can use this script to produce files for seekwatcher with
# IO tagged by PID and / or command. Use option -m <command> to
# merge processes which have the same command name.
#
# Example:
# tag-by-process -t sda | seekwatcher -f - --only-io-graph

import sys, os, signal, time, commands, tempfile, signal
from optparse import OptionParser

def loaddata(fh):
for i,line in enumerate(fh):
if not line.startswith('Q'):
continue
row = line.split()
if options.merge.count(row[10]) > 0:
row[9] = row[10]
else:
row[9] = row[10] + "(" + row[9] + ")"
for i in range(1,10):
print row[i],
print

def run_blkparse(trace):
tracefiles = []
seen = {}
sys.stderr.write("run_blkparse on %s\n" % trace)
if not os.path.exists(trace + "blktrace.0"):
dirname = os.path.dirname(trace) or "."
files = os.listdir(dirname)
joinname = os.path.dirname(trace) or ""
for x in files:
x = os.path.join(joinname, x)
if x.startswith(trace) and ".blktrace." in x:
i = x.rindex('.blktrace.')
cur = x[0:i]
if cur not in seen:
tracefiles.append(x[0:i])
seen[cur] = 1
else:
tracefiles.append(trace)

for x in tracefiles:
sys.stderr.write("using tracefile %s\n" % x)
p = os.popen('blkparse -q -i ' + x +
' -f "%a %d %M %m %S %N %s %5T.%9t %D %p %C\n"')
cur = loaddata(p)

usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-t", "--trace", help="blktrace file", default=[],
action="append")
parser.add_option("-m", "--merge", help="merge commands with given names",
default=[], action="append")

(options,args) = parser.parse_args()

for x in options.trace:
run = run_blkparse(x)