Re: [PATCH 3/9] readahead: record readahead patterns
From: Jan Kara
Date: Tue Nov 29 2011 - 09:40:35 EST
On Tue 29-11-11 21:09:03, Wu Fengguang wrote:
> Record the readahead pattern in ra->pattern and extend the ra_submit()
> parameters, to be used by the next readahead tracing/stats patches.
>
> 7 patterns are defined:
>
> pattern readahead for
> -----------------------------------------------------------
> RA_PATTERN_INITIAL start-of-file read
> RA_PATTERN_SUBSEQUENT trivial sequential read
> RA_PATTERN_CONTEXT interleaved sequential read
> RA_PATTERN_OVERSIZE oversize read
> RA_PATTERN_MMAP_AROUND mmap fault
> RA_PATTERN_FADVISE posix_fadvise()
> RA_PATTERN_RANDOM random read
>
> Note that random reads will be recorded in file_ra_state now.
> This won't deteriorate cache bouncing because the ra->prev_pos update
> in do_generic_file_read() already pollutes the data cache, and
> filemap_fault() will stop calling into us after MMAP_LOTSAMISS.
>
> CC: Ingo Molnar <mingo@xxxxxxx>
> CC: Jens Axboe <axboe@xxxxxxxxx>
> CC: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> Acked-by: Rik van Riel <riel@xxxxxxxxxx>
> Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx>
The patch looks OK. You can add:
Acked-by: Jan Kara <jack@xxxxxxx>
Honza
> ---
> include/linux/fs.h | 36 +++++++++++++++++++++++++++++++++++-
> include/linux/mm.h | 4 +++-
> mm/filemap.c | 3 ++-
> mm/readahead.c | 29 ++++++++++++++++++++++-------
> 4 files changed, 62 insertions(+), 10 deletions(-)
>
> --- linux-next.orig/include/linux/fs.h 2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/include/linux/fs.h 2011-11-29 10:23:41.000000000 +0800
> @@ -945,11 +945,45 @@ struct file_ra_state {
> there are only # of pages ahead */
>
> unsigned int ra_pages; /* Maximum readahead window */
> - unsigned int mmap_miss; /* Cache miss stat for mmap accesses */
> + u16 mmap_miss; /* Cache miss stat for mmap accesses */
> + u8 pattern; /* one of RA_PATTERN_* */
> +
> loff_t prev_pos; /* Cache last read() position */
> };
>
> /*
> + * Which policy makes decision to do the current read-ahead IO?
> + *
> + * RA_PATTERN_INITIAL readahead window is initially opened,
> + * normally when reading from start of file
> + * RA_PATTERN_SUBSEQUENT readahead window is pushed forward
> + * RA_PATTERN_CONTEXT no readahead window available, querying the
> + * page cache to decide readahead start/size.
> + * This typically happens on interleaved reads (eg.
> + * reading pages 0, 1000, 1, 1001, 2, 1002, ...)
> + * where one file_ra_state struct is not enough
> + * for recording 2+ interleaved sequential read
> + * streams.
> + * RA_PATTERN_MMAP_AROUND read-around on mmap page faults
> + * (w/o any sequential/random hints)
> + * RA_PATTERN_FADVISE triggered by POSIX_FADV_WILLNEED or FMODE_RANDOM
> + * RA_PATTERN_OVERSIZE a random read larger than max readahead size,
> + * do max readahead to break down the read size
> + * RA_PATTERN_RANDOM a small random read
> + */
> +enum readahead_pattern {
> + RA_PATTERN_INITIAL,
> + RA_PATTERN_SUBSEQUENT,
> + RA_PATTERN_CONTEXT,
> + RA_PATTERN_MMAP_AROUND,
> + RA_PATTERN_FADVISE,
> + RA_PATTERN_OVERSIZE,
> + RA_PATTERN_RANDOM,
> + RA_PATTERN_ALL, /* for summary stats */
> + RA_PATTERN_MAX
> +};
> +
> +/*
> * Check if @index falls in the readahead windows.
> */
> static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
> --- linux-next.orig/mm/readahead.c 2011-11-28 22:24:16.000000000 +0800
> +++ linux-next/mm/readahead.c 2011-11-29 10:17:14.000000000 +0800
> @@ -249,7 +249,10 @@ unsigned long max_sane_readahead(unsigne
> * Submit IO for the read-ahead request in file_ra_state.
> */
> unsigned long ra_submit(struct file_ra_state *ra,
> - struct address_space *mapping, struct file *filp)
> + struct address_space *mapping,
> + struct file *filp,
> + pgoff_t offset,
> + unsigned long req_size)
> {
> pgoff_t eof = ((i_size_read(mapping->host)-1) >> PAGE_CACHE_SHIFT) + 1;
> pgoff_t start = ra->start;
> @@ -390,6 +393,7 @@ static int try_context_readahead(struct
> if (size >= offset)
> size *= 2;
>
> + ra->pattern = RA_PATTERN_CONTEXT;
> ra->start = offset;
> ra->size = get_init_ra_size(size + req_size, max);
> ra->async_size = ra->size;
> @@ -411,8 +415,10 @@ ondemand_readahead(struct address_space
> /*
> * start of file
> */
> - if (!offset)
> + if (!offset) {
> + ra->pattern = RA_PATTERN_INITIAL;
> goto initial_readahead;
> + }
>
> /*
> * It's the expected callback offset, assume sequential access.
> @@ -420,6 +426,7 @@ ondemand_readahead(struct address_space
> */
> if ((offset == (ra->start + ra->size - ra->async_size) ||
> offset == (ra->start + ra->size))) {
> + ra->pattern = RA_PATTERN_SUBSEQUENT;
> ra->start += ra->size;
> ra->size = get_next_ra_size(ra, max);
> ra->async_size = ra->size;
> @@ -442,6 +449,7 @@ ondemand_readahead(struct address_space
> if (!start || start - offset > max)
> return 0;
>
> + ra->pattern = RA_PATTERN_CONTEXT;
> ra->start = start;
> ra->size = start - offset; /* old async_size */
> ra->size += req_size;
> @@ -453,14 +461,18 @@ ondemand_readahead(struct address_space
> /*
> * oversize read
> */
> - if (req_size > max)
> + if (req_size > max) {
> + ra->pattern = RA_PATTERN_OVERSIZE;
> goto initial_readahead;
> + }
>
> /*
> * sequential cache miss
> */
> - if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
> + if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL) {
> + ra->pattern = RA_PATTERN_INITIAL;
> goto initial_readahead;
> + }
>
> /*
> * Query the page cache and look for the traces(cached history pages)
> @@ -471,9 +483,12 @@ ondemand_readahead(struct address_space
>
> /*
> * standalone, small random read
> - * Read as is, and do not pollute the readahead state.
> */
> - return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
> + ra->pattern = RA_PATTERN_RANDOM;
> + ra->start = offset;
> + ra->size = req_size;
> + ra->async_size = 0;
> + goto readit;
>
> initial_readahead:
> ra->start = offset;
> @@ -491,7 +506,7 @@ readit:
> ra->size += ra->async_size;
> }
>
> - return ra_submit(ra, mapping, filp);
> + return ra_submit(ra, mapping, filp, offset, req_size);
> }
>
> /**
> --- linux-next.orig/include/linux/mm.h 2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/include/linux/mm.h 2011-11-28 22:24:16.000000000 +0800
> @@ -1456,7 +1456,9 @@ void page_cache_async_readahead(struct a
> unsigned long max_sane_readahead(unsigned long nr);
> unsigned long ra_submit(struct file_ra_state *ra,
> struct address_space *mapping,
> - struct file *filp);
> + struct file *filp,
> + pgoff_t offset,
> + unsigned long req_size);
>
> /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
> extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
> --- linux-next.orig/mm/filemap.c 2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/mm/filemap.c 2011-11-29 10:17:14.000000000 +0800
> @@ -1611,11 +1611,12 @@ static void do_sync_mmap_readahead(struc
> /*
> * mmap read-around
> */
> + ra->pattern = RA_PATTERN_MMAP_AROUND;
> ra_pages = max_sane_readahead(ra->ra_pages);
> ra->start = max_t(long, 0, offset - ra_pages / 2);
> ra->size = ra_pages;
> ra->async_size = ra_pages / 4;
> - ra_submit(ra, mapping, file);
> + ra_submit(ra, mapping, file, offset, 1);
> }
>
> /*
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/