Re: [PATCH] trace: Add tracepoints to fs subsystem

From: Vaibhav Nagarnaik
Date: Thu Apr 28 2011 - 19:17:32 EST


Hi Alexander

Do you think this patch makes sense for mainline inclusion?

Thanks
Vaibhav Nagarnaik



On Mon, Apr 25, 2011 at 4:53 PM, Vaibhav Nagarnaik
<vnagarnaik@xxxxxxxxxx> wrote:
> From: Jiaying Zhang <jiayingz@xxxxxxxxxx>
>
> These few fs tracepoints are useful while debugging latency issues in
> filesystems and were used specifically for debugging various writeback
> subsystem issues. This patch adds entry and exit tracepoints for the
> following functions, viz.:
> wait_on_buffer
> block_write_full_page
> mpage_readpages
> file_read
>
> Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@xxxxxxxxxx>
> ---
>  fs/buffer.c               |   10 +++
>  fs/mpage.c                |    3 +
>  include/trace/events/fs.h |  162 +++++++++++++++++++++++++++++++++++++++++++++
>  mm/filemap.c              |    4 +-
>  4 files changed, 178 insertions(+), 1 deletions(-)
>  create mode 100644 include/trace/events/fs.h
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index a08bb8e..1c118f4 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -42,6 +42,9 @@
>  #include <linux/mpage.h>
>  #include <linux/bit_spinlock.h>
>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/fs.h>
> +
>  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
>
>  #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
> @@ -82,7 +85,9 @@ EXPORT_SYMBOL(unlock_buffer);
>  */
>  void __wait_on_buffer(struct buffer_head * bh)
>  {
> +       trace_fs_buffer_wait_enter(bh);
>        wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
> +       trace_fs_buffer_wait_exit(bh);
>  }
>  EXPORT_SYMBOL(__wait_on_buffer);
>
> @@ -1647,6 +1652,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
>        head = page_buffers(page);
>        bh = head;
>
> +       trace_block_write_full_page_enter(inode, block, last_block);
> +
>        /*
>         * Get all the dirty buffers mapped to disk addresses and
>         * handle any aliases from the underlying blockdev's mapping.
> @@ -1736,6 +1743,9 @@ done:
>                 * here on.
>                 */
>        }
> +
> +       trace_block_write_full_page_exit(inode, nr_underway, err);
> +
>        return err;
>
>  recover:
> diff --git a/fs/mpage.c b/fs/mpage.c
> index 0afc809..1c3b8e1 100644
> --- a/fs/mpage.c
> +++ b/fs/mpage.c
> @@ -28,6 +28,7 @@
>  #include <linux/backing-dev.h>
>  #include <linux/pagevec.h>
>
> +#include <trace/events/fs.h>
>  /*
>  * I/O completion handler for multipage BIOs.
>  *
> @@ -373,6 +374,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
>        for (page_idx = 0; page_idx < nr_pages; page_idx++) {
>                struct page *page = list_entry(pages->prev, struct page, lru);
>
> +               if (page_idx == 0)
> +                       trace_mpage_readpages(page, mapping, nr_pages);
>                prefetchw(&page->flags);
>                list_del(&page->lru);
>                if (!add_to_page_cache_lru(page, mapping,
> diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
> new file mode 100644
> index 0000000..95f7bc8
> --- /dev/null
> +++ b/include/trace/events/fs.h
> @@ -0,0 +1,162 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM fs
> +
> +#if !defined(_TRACE_FS_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_FS_H
> +
> +#include <linux/tracepoint.h>
> +
> +DECLARE_EVENT_CLASS(fs_buffer_wait,
> +
> +       TP_PROTO(struct buffer_head *bh),
> +
> +       TP_ARGS(bh),
> +
> +       TP_STRUCT__entry(
> +               __field(        void *, bh      )
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->bh = bh;
> +       ),
> +
> +       TP_printk("bh %p", __entry->bh)
> +);
> +
> +DEFINE_EVENT(fs_buffer_wait, fs_buffer_wait_enter,
> +
> +       TP_PROTO(struct buffer_head *bh),
> +
> +       TP_ARGS(bh)
> +);
> +
> +DEFINE_EVENT(fs_buffer_wait, fs_buffer_wait_exit,
> +
> +       TP_PROTO(struct buffer_head *bh),
> +
> +       TP_ARGS(bh)
> +);
> +
> +TRACE_EVENT(block_write_full_page_enter,
> +
> +       TP_PROTO(struct inode *inode, sector_t block, sector_t last_block),
> +
> +       TP_ARGS(inode, block, last_block),
> +
> +       TP_STRUCT__entry(
> +               __field(        dev_t,          dev             )
> +               __field(        unsigned long,  ino             )
> +               __field(        sector_t,       block           )
> +               __field(        sector_t,       last_block      )
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->dev            = inode->i_sb->s_dev;
> +               __entry->ino            = inode->i_ino;
> +               __entry->block          = block;
> +               __entry->last_block     = last_block;
> +       ),
> +
> +       TP_printk("dev %d,%d ino %lu block %lu last block %lu",
> +                 MAJOR(__entry->dev), MINOR(__entry->dev),
> +                 __entry->ino,
> +                 (unsigned long)__entry->block,
> +                 (unsigned long)__entry->last_block)
> +);
> +
> +TRACE_EVENT(block_write_full_page_exit,
> +
> +       TP_PROTO(struct inode *inode, int nr_underway, int err),
> +
> +       TP_ARGS(inode, nr_underway, err),
> +
> +       TP_STRUCT__entry(
> +               __field(        dev_t,          dev             )
> +               __field(        unsigned long,  ino             )
> +               __field(        int,            nr_underway     )
> +               __field(        int,            err             )
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->dev            = inode->i_sb->s_dev;
> +               __entry->ino            = inode->i_ino;
> +               __entry->nr_underway    = nr_underway;
> +               __entry->err            = err;
> +       ),
> +
> +       TP_printk("dev %d,%d ino %lu nr_underway %d err %d",
> +                 MAJOR(__entry->dev), MINOR(__entry->dev),
> +                 __entry->ino, __entry->nr_underway, __entry->err)
> +);
> +
> +DECLARE_EVENT_CLASS(file_read,
> +       TP_PROTO(struct inode *inode, loff_t pos, size_t len),
> +
> +       TP_ARGS(inode, pos, len),
> +
> +       TP_STRUCT__entry(
> +               __field(        ino_t,  ino                     )
> +               __field(        dev_t,  dev                     )
> +               __field(        loff_t, pos                     )
> +               __field(        size_t, len                     )
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->ino    = inode->i_ino;
> +               __entry->dev    = inode->i_sb->s_dev;
> +               __entry->pos    = pos;
> +               __entry->len    = len;
> +       ),
> +
> +       TP_printk("dev %d,%d ino %lu pos %llu len %lu",
> +                 MAJOR(__entry->dev), MINOR(__entry->dev),
> +                 (unsigned long) __entry->ino,
> +                  __entry->pos,  __entry->len)
> +);
> +
> +DEFINE_EVENT(file_read, file_read_enter,
> +
> +       TP_PROTO(struct inode *inode, loff_t pos, size_t len),
> +
> +       TP_ARGS(inode, pos, len)
> +);
> +
> +DEFINE_EVENT(file_read, file_read_exit,
> +
> +       TP_PROTO(struct inode *inode, loff_t pos, size_t len),
> +
> +       TP_ARGS(inode, pos, len)
> +);
> +
> +TRACE_EVENT(mpage_readpages,
> +       TP_PROTO(struct page *page, struct address_space *mapping,
> +                unsigned nr_pages),
> +
> +       TP_ARGS(page, mapping, nr_pages),
> +
> +       TP_STRUCT__entry(
> +               __field(        pgoff_t, index                  )
> +               __field(        ino_t,  ino                     )
> +               __field(        dev_t,  dev                     )
> +               __field(        unsigned,       nr_pages        )
> +
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->index  = page->index;
> +               __entry->ino    = mapping->host->i_ino;
> +               __entry->dev    = mapping->host->i_sb->s_dev;
> +               __entry->nr_pages       = nr_pages;
> +       ),
> +
> +       TP_printk("dev %d,%d ino %lu page_index %lu nr_pages %u",
> +                 MAJOR(__entry->dev), MINOR(__entry->dev),
> +                 (unsigned long) __entry->ino,
> +                 __entry->index, __entry->nr_pages)
> +);
> +
> +#endif /* _TRACE_FS_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> +
> diff --git a/mm/filemap.c b/mm/filemap.c
> index c641edf..94e549c 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -42,7 +42,7 @@
>  #include <linux/buffer_head.h> /* for try_to_free_buffers */
>
>  #include <asm/mman.h>
> -
> +#include <trace/events/fs.h>
>  /*
>  * Shared mappings implemented 30.11.1994. It's not fully working yet,
>  * though.
> @@ -1054,6 +1054,7 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos,
>        unsigned int prev_offset;
>        int error;
>
> +       trace_file_read_enter(inode, *ppos, desc->count);
>        index = *ppos >> PAGE_CACHE_SHIFT;
>        prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
>        prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
> @@ -1254,6 +1255,7 @@ out:
>        ra->prev_pos <<= PAGE_CACHE_SHIFT;
>        ra->prev_pos |= prev_offset;
>
> +       trace_file_read_exit(inode, *ppos, desc->written);
>        *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
>        file_accessed(filp);
>  }
> --
> 1.7.3.1
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/