Re: High write latency, iowait, slow writes 2.6.9

From: Paul A. Sumner
Date: Thu Jan 13 2005 - 14:14:11 EST


Thanks much. I still seem to be getting latency, etc. w/ 2.6.11 rc1 bk w/ the patch... Here you go (vmstat.log is a little long).

Jens Axboe wrote:
On Wed, Jan 12 2005, Paul A. Sumner wrote:

Thanks... I've tried the as, deadline and cfq schedulers. Deadline is
giving me the best results. I've also tried tweaking the stuff in
/sys/block/sda/queue/iosched/.

For lack of a better way of describing it, it seems like something is
thrashing.


I you have time, I would like you to try current BK with this patch:

http://www.kernel.org/pub/linux/kernel/people/axboe/patches/v2.6/2.6.11-rc1/cfq-time-slices-20.gz

You should enable CONFIG_IOPRIO_WRITE, it is in the io scheduler config
section, if you do a make oldconfig it should pop up for you. Boot with
elevator=cfq to select cfq.

A simple profile of the bad period would also be nice, along with
vmstat 1 info from that period. If you boot with profile=2, do:

# readprofile -r
# run bad workload
# readprofile | sort -nr +2 > result_file

And send that along with the logged vmstat 1 from that same period.

Oh, and lastly, remember to CC people on lkml when you respond, thanks.

+ uname -a
Linux taz 2.6.11-rc1-bk1 #1 SMP Thu Jan 13 09:11:26 PST 2005 x86_64 AMD Opteron(tm) Processor 250 AuthenticAMD GNU/Linux
+ grep MemTotal /proc/meminfo
MemTotal: 16126744 kB
+ dmesg
+ grep Bootdata
Bootdata ok (command line is root=/dev/ram0 init=/linuxrc ramdisk=8192 real_root=/dev/sda3 console=tty0 console=ttyS0,115200n8 elevator=cfq profile=2 gentoo=udev)
+ readprofile -r
+ vmstat 1
+ tiotest -d /data/scratch -f 8192
Tiotest results for 4 concurrent io threads:
,----------------------------------------------------------------------.
| Item | Time | Rate | Usr CPU | Sys CPU |
+-----------------------+----------+--------------+----------+---------+
| Write 32768 MBs | 484.3 s | 67.662 MB/s | 0.8 % | 12.9 % |
| Random Write 16 MBs | 3.2 s | 4.913 MB/s | 0.1 % | 1.6 % |
| Read 32768 MBs | 155.1 s | 211.207 MB/s | 1.5 % | 22.9 % |
| Random Read 16 MBs | 8.7 s | 1.795 MB/s | 0.1 % | 0.3 % |
`----------------------------------------------------------------------'
Tiotest latency results:
,-------------------------------------------------------------------------.
| Item | Average latency | Maximum latency | % >2 sec | % >10 sec |
+--------------+-----------------+-----------------+----------+-----------+
| Write | 0.152 ms | 178448.837 ms | 0.00032 | 0.00015 |
| Random Write | 0.007 ms | 6.228 ms | 0.00000 | 0.00000 |
| Read | 0.073 ms | 363.501 ms | 0.00000 | 0.00000 |
| Random Read | 8.515 ms | 270.383 ms | 0.00000 | 0.00000 |
|--------------+-----------------+-----------------+----------+-----------|
| Total | 0.115 ms | 178448.837 ms | 0.00016 | 0.00008 |
`--------------+-----------------+-----------------+----------+-----------'

+ pkill vmstat
+ readprofile
+ sort -nr +2
./lkml.sh: line 10: 8599 Terminated vmstat 1 >vmstat.log
1171085 default_idle 24397.6042
34154 copy_user_generic_c 898.7895
1160 __wake_up_bit 24.1667
2697 find_get_page 21.0703
2122 find_trylock_page 18.9464
1755 kmem_cache_free 18.2812
85 ret_from_sys_call 17.0000
1072 kmem_cache_alloc 16.7500
3659 add_to_page_cache 15.2458
842 unlock_page 13.1562
201 xfs_bmap 12.5625
1061 page_waitqueue 11.0521
990 truncate_complete_page 10.3125
1713 fget_light 9.7330
2246 end_buffer_async_write 9.3583
562 bio_put 8.7812
259 mark_buffer_async_write 8.0938
1896 test_clear_page_dirty 7.9000
1876 test_clear_page_writeback 7.3281
569 current_kernel_time 7.1125
565 end_page_writeback 7.0625
99 xfs_size_fn 6.1875
295 bio_add_page 6.1458
191 fput 5.9688
1043 __smp_call_function 5.9261
1040 find_lock_page 5.9091
740 mpage_end_io_read 5.7812
1102 dnotify_parent 5.7396
1783 __set_page_dirty_nobuffers 5.5719
1277 __pagevec_lru_add 5.3208
1575 free_hot_cold_page 5.1809
1625 test_set_page_writeback 5.0781
792 xfs_map_at_offset 4.9500
150 xfs_bmbt_get_startoff 4.6875
574 system_call 4.3817
138 linvfs_prepare_write 4.3125
412 xfs_trans_unlocked_item 4.2917
671 create_empty_buffers 4.1937
577 xfs_count_page_state 4.0069
5929 __make_request 3.9845
940 __do_softirq 3.9167
605 __down_write 3.7812
3015 buffered_rmqueue 3.7687
230 mark_page_accessed 3.5938
333 generic_commit_write 3.4688
485 finish_task_switch 3.3681
604 memset 3.2128
599 __block_commit_write 3.1198
136 mark_buffer_dirty 2.8333
43 xfs_bmbt_get_blockcount 2.6875
414 put_page 2.5875
39 mempool_alloc_slab 2.4375
2605 scsi_request_fn 2.3943
1138 linvfs_get_block_core 2.3708
354 __down_read 2.2987
434 try_to_free_buffers 2.2604
35 mempool_free_slab 2.1875
675 __find_get_block_slow 2.1094
364 alloc_page_buffers 2.0682
129 end_bio_bh_io_sync 2.0156
313 alloc_pages_current 1.9563
480 scsi_end_request 1.8750
298 mempool_free 1.8625
551 __up_write 1.8125
290 find_get_pages 1.8125
737 mempool_alloc 1.7716
170 set_bh_page 1.7708
140 free_buffer_head 1.7500
275 xfs_iunlock 1.7188
546 vfs_write 1.7063
190 linvfs_write 1.6964
539 vfs_read 1.6844
160 bad_range 1.6667
260 __up_read 1.6250
544 submit_bh 1.6190
714 __mark_inode_dirty 1.5938
250 update_atime 1.5625
75 block_prepare_write 1.5625
146 current_fs_time 1.5208
170 xfs_mod_incore_sb 1.5179
213 sys_write 1.4792
496 xfs_ichgtime 1.4762
233 drop_buffers 1.4563
23 xfs_extent_state 1.4375
6439 shrink_zone 1.4271
815 __end_that_request_first 1.4149
225 radix_tree_preload 1.4062
244 xfs_ilock 1.3864
664 __bio_add_page 1.3833
796 generic_make_request 1.3819
44 linvfs_get_blocks_direct 1.3750
401 xfs_submit_page 1.3191
839 __do_page_cache_readahead 1.2790
225 zone_watermark_ok 1.2784
142 xfs_bmap_worst_indlen 1.2679
634 scsi_dispatch_cmd 1.2008
518 bio_alloc 1.1562
73 alloc_buffer_head 1.1406
359 submit_bio 1.1219
465 release_pages 1.1178
87 smp_call_function_all_cpus 1.0875
240 do_sync_write 1.0714
51 xfs_ilock_map_shared 1.0625
34 xfs_bmbt_get_state 1.0625
935 __alloc_pages 1.0435
131 bio_endio 1.0234
32 linvfs_get_block 1.0000
156 __pagevec_release_nonlru 0.9750
684 xfs_read 0.9716
45 __pagevec_free 0.9375
15 set_page_refs 0.9375
74 bio_destructor 0.9250
1149 generic_file_buffered_write 0.9207
103 linvfs_read 0.9196
102 smp_call_function 0.9107
301 page_referenced 0.8958
269 mpage_readpages 0.8849
181 inode_update_time 0.8702
1073 xfs_iomap 0.8489
271 file_read_actor 0.8469
119 zonelist_policy 0.8264
178 linvfs_release_page 0.7946
25 xfs_bmbt_get_startblock 0.7812
172 __cache_shrink 0.7679
61 flat_send_IPI_allbutself 0.7625
164 do_sync_read 0.7321
635 xfs_bmap_do_search_extents 0.7216
33 xfs_inode_shake 0.6875
218 shrink_slab 0.6813
75 cond_resched 0.6696
259 generic_write_checks 0.6475
23 init_buffer_head 0.6389
254 balance_dirty_pages_ratelimited 0.6350
10 xfs_ail_min 0.6250
137 xfs_bmap_search_extents 0.6116
87 sys_read 0.6042
1565 xfs_write 0.6038
572 do_mpage_readpage 0.5958
33 do_softirq_thunk 0.5690
45 try_to_release_page 0.5625
320 xfs_convert_page 0.5556
62 put_io_context 0.5536
26 xfs_rwunlock 0.5417
932 xfs_page_state_convert 0.5295
8 fs_noerr 0.5000
480 __block_prepare_write 0.4839
37 timespec_trunc 0.4625
551 do_generic_mapping_read 0.4592
44 cpu_idle 0.4583
60 get_io_context 0.4167
237 try_to_wake_up 0.4115
52 __read_page_state 0.4062
24 io_schedule 0.3750
1301831 total 0.3712
231 get_request 0.3208
15 xfs_offset_to_map 0.3125
10 scsi_next_command 0.3125
5 free_hot_page 0.3125
175 blk_recount_segments 0.2878
18 unmap_underlying_metadata 0.2812
49 wakeup_kswapd 0.2784
22 __down_read_trylock 0.2292
11 elv_set_request 0.2292
21 blockable_page_cache_readahead 0.2188
37 wait_on_page_bit 0.2102
140 blk_queue_bounce 0.2083
96 __generic_file_aio_read 0.2000
48 scsi_put_command 0.2000
30 do_page_cache_readahead 0.1875
3 xfs_iunlock_map_shared 0.1875
1482 xfs_bmapi 0.1856
34 scsi_finish_command 0.1771
34 recalc_bh_state 0.1771
48 get_request_wait 0.1765
14 sysret_check 0.1687
8 add_disk_randomness 0.1667
93 truncate_inode_pages 0.1571
10 wake_up_bit 0.1562
7 scsi_free_sgtable 0.1458
20 task_rq_lock 0.1389
17 kref_put 0.1328
61 xfs_mod_incore_sb_unlocked 0.1271
95 page_cache_readahead 0.1263
16 thread_return 0.1250
10 xfs_bmbt_get_all 0.1250
8 xfs_bmap_cancel 0.1250
4 xfs_bmbt_set_blockcount 0.1250
4 sched_clock 0.1250
2 pagebuf_ispin 0.1250
9 shrink_dcache_memory 0.1125
7 bio_get_nr_vecs 0.1094
36 __mod_timer 0.1071
164 cfq_set_request 0.1068
5 pagebuf_daemon_wakeup 0.1042
5 kmem_cache_shrink 0.1042
8 lru_add_drain 0.1000
8 block_sync_page 0.1000
37 mb_cache_shrink_fn 0.0964
3 put_device 0.0938
3 handle_ra_miss 0.0938
7 xfs_trans_tail_ail 0.0875
20 scsi_softirq 0.0833
9 remove_from_page_cache 0.0804
88 cache_grow 0.0797
41 pagebuf_daemon 0.0777
11 finish_wait 0.0764
13 memcpy 0.0739
111 xfs_iomap_write_delay 0.0738
21 copy_user_generic 0.0705
10 scsi_device_unbusy 0.0694
30 __bitmap_weight 0.0670
9 scsi_add_timer 0.0625
7 alloc_io_context 0.0625
3 mpage_bio_submit 0.0625
2 blk_backing_dev_unplug 0.0625
1 xfs_trans_find_item 0.0625
20 xfs_log_move_tail 0.0595
8 del_timer 0.0556
7 prepare_to_wait_exclusive 0.0547
52 scsi_io_completion 0.0524
10 __clear_page_dirty 0.0521
9 xfs_bmap_eof 0.0511
4 get_next_ra_size 0.0500
6 .text.lock.buffer 0.0484
2 xfs_bmbt_set_startblock 0.0417
2 __pagevec_release 0.0417
49 kswapd 0.0414
22 add_timer_randomness 0.0382
16 scsi_run_queue 0.0345
3 __get_free_pages 0.0312
2 ioc_set_batching 0.0312
1 linvfs_readpages 0.0312
1 kobject_put 0.0312
1 retint_careful 0.0303
9 scsi_decide_disposition 0.0281
44 schedule 0.0264
1 del_singleshot_timer_sync 0.0208
4 schedule_timeout 0.0192
4 find_get_pages_tag 0.0192
4 generic_file_direct_IO 0.0179
2 percpu_counter_mod 0.0179
2 __wake_up 0.0179
12 xfs_map_unwritten 0.0163
2 __wait_on_bit_lock 0.0156
4 generic_cont_expand 0.0139
2 mpage_alloc 0.0125
1 sync_page 0.0125
2 background_writeout 0.0114
3 profile_hit 0.0094
1 schedule_tail 0.0089
1 journal_unfile_buffer 0.0089
1 end_buffer_write_sync 0.0089
40 xfs_bmap_add_extent 0.0078
6 shrink_icache_memory 0.0078
2 linvfs_writepage 0.0078
2 get_dirty_limits 0.0078
1 sys_lseek 0.0078
1 clear_page_dirty_for_io 0.0078
1 __wait_on_bit 0.0078
4 sd_rw_intr 0.0074
3 prune_dcache 0.0063
1 lookup_mnt 0.0063
1 devfs_d_iput 0.0063
1 sched_fork 0.0057
1 __lock_page 0.0057
1 xfs_mod_incore_sb_batch 0.0048
1 kfree 0.0045
1 writeback_inodes 0.0042
1 ext3_get_branch 0.0042
1 __sched_text_start 0.0037
2 try_to_free_pages 0.0036
1 release_task 0.0027
1 journal_write_revoke_records 0.0026
2 __generic_file_aio_write_nolock 0.0022
1 page_referenced_one 0.0021
1 __getblk_slow 0.0018
1 journal_stop 0.0016
1 xfs_getattr 0.0015
1 __journal_file_buffer 0.0014
1 do_wp_page 0.0010
1 start_this_handle 0.0009
1 clear_page_range 0.0009
1 copy_page_range 0.0007
1 journal_commit_transaction 0.0002

Attachment: vmstat.log.gz
Description: application/gzip