Re: [PATCH 2/8] ring-buffer: Add buffer meta data for persistent ring buffer
From: Google
Date: Thu Feb 06 2025 - 00:10:46 EST
On Wed, 05 Feb 2025 17:50:33 -0500
Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:
> From: Steven Rostedt <rostedt@xxxxxxxxxxx>
>
> Instead of just having a meta data at the first page of each sub buffer
> that has duplicate data, add a new meta page to the entire block of memory
> that holds the duplicate data and remove it from the sub buffer meta data.
>
> This will open up the extra memory in this first page to be used by the
> tracer for its own persistent data.
>
> Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx>
> ---
> kernel/trace/ring_buffer.c | 165 ++++++++++++++++++++++++++-----------
> 1 file changed, 115 insertions(+), 50 deletions(-)
>
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 7146b780176f..0446d053dbd6 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -49,7 +49,12 @@ static void update_pages_handler(struct work_struct *work);
>
Can you add a comment to explain the layout of these meta-data and subbufs?
> struct ring_buffer_meta {
> int magic;
> - int struct_size;
> + int struct_sizes;
> + unsigned long total_size;
> + unsigned long buffers_offset;
> +};
> +
> +struct ring_buffer_cpu_meta {
> unsigned long kaslr_addr;
> unsigned long first_buffer;
> unsigned long head_buffer;
> @@ -517,7 +522,7 @@ struct ring_buffer_per_cpu {
> struct mutex mapping_lock;
> unsigned long *subbuf_ids; /* ID to subbuf VA */
> struct trace_buffer_meta *meta_page;
> - struct ring_buffer_meta *ring_meta;
> + struct ring_buffer_cpu_meta *ring_meta;
>
> /* ring buffer pages to update, > 0 to add, < 0 to remove */
> long nr_pages_to_update;
> @@ -550,6 +555,8 @@ struct trace_buffer {
> unsigned long range_addr_start;
> unsigned long range_addr_end;
>
> + struct ring_buffer_meta *meta;
> +
> unsigned long kaslr_addr;
>
> unsigned int subbuf_size;
> @@ -1270,7 +1277,7 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
> rb_set_list_to_head(head->list.prev);
>
> if (cpu_buffer->ring_meta) {
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
> meta->head_buffer = (unsigned long)head->page;
> }
> }
> @@ -1568,7 +1575,7 @@ static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
> static unsigned long
> rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs)
> {
> - addr += sizeof(struct ring_buffer_meta) +
> + addr += sizeof(struct ring_buffer_cpu_meta) +
> sizeof(int) * nr_subbufs;
> return ALIGN(addr, subbuf_size);
> }
> @@ -1579,19 +1586,22 @@ rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs)
> static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu)
> {
> int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE;
> - unsigned long ptr = buffer->range_addr_start;
> - struct ring_buffer_meta *meta;
> + struct ring_buffer_cpu_meta *meta;
> + struct ring_buffer_meta *bmeta;
> + unsigned long ptr;
> int nr_subbufs;
>
> - if (!ptr)
> + bmeta = buffer->meta;
> + if (!bmeta)
> return NULL;
>
> + ptr = (unsigned long)bmeta + bmeta->buffers_offset;
> + meta = (struct ring_buffer_cpu_meta *)ptr;
> +
> /* When nr_pages passed in is zero, the first meta has already been initialized */
> if (!nr_pages) {
> - meta = (struct ring_buffer_meta *)ptr;
> nr_subbufs = meta->nr_subbufs;
> } else {
> - meta = NULL;
> /* Include the reader page */
> nr_subbufs = nr_pages + 1;
> }
> @@ -1623,7 +1633,7 @@ static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu)
> }
>
> /* Return the start of subbufs given the meta pointer */
> -static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta)
> +static void *rb_subbufs_from_meta(struct ring_buffer_cpu_meta *meta)
> {
> int subbuf_size = meta->subbuf_size;
> unsigned long ptr;
> @@ -1639,7 +1649,7 @@ static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta)
> */
> static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx)
> {
> - struct ring_buffer_meta *meta;
> + struct ring_buffer_cpu_meta *meta;
> unsigned long ptr;
> int subbuf_size;
>
> @@ -1664,14 +1674,73 @@ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx)
> return (void *)ptr;
> }
>
> +/*
> + * See if the existing memory contains a valid meta section.
> + * if so, use that, otherwise initialize it.
Note that this must be called only for the persistent ring buffer.
> + */
> +static bool rb_meta_init(struct trace_buffer *buffer)
> +{
> + unsigned long ptr = buffer->range_addr_start;
> + struct ring_buffer_meta *bmeta;
> + unsigned long total_size;
> + int struct_sizes;
> +
> + bmeta = (struct ring_buffer_meta *)ptr;
> + buffer->meta = bmeta;
> +
> + total_size = buffer->range_addr_end - buffer->range_addr_start;
> +
> + struct_sizes = sizeof(struct ring_buffer_cpu_meta);
> + struct_sizes |= sizeof(*bmeta) << 16;
> +
> + /* The first buffer will start one page after the meta page */
> + ptr += sizeof(*bmeta);
> + ptr = ALIGN(ptr, PAGE_SIZE);
> +
> + if (bmeta->magic != RING_BUFFER_META_MAGIC) {
> + pr_info("Ring buffer boot meta mismatch of magic\n");
> + goto init;
> + }
> +
> + if (bmeta->struct_sizes != struct_sizes) {
> + pr_info("Ring buffer boot meta mismatch of struct size\n");
> + goto init;
> + }
> +
> + if (bmeta->total_size != total_size) {
> + pr_info("Ring buffer boot meta mismatch of total size\n");
> + goto init;
> + }
> +
> + if (bmeta->buffers_offset > bmeta->total_size) {
> + pr_info("Ring buffer boot meta mismatch of offset outside of total size\n");
> + goto init;
> + }
> +
> + if (bmeta->buffers_offset != (void *)ptr - (void *)bmeta) {
> + pr_info("Ring buffer boot meta mismatch of first buffer offset\n");
> + goto init;
> + }
> +
> + return true;
> +
> + init:
> + bmeta->magic = RING_BUFFER_META_MAGIC;
> + bmeta->struct_sizes = struct_sizes;
> + bmeta->total_size = total_size;
> + bmeta->buffers_offset = (void *)ptr - (void *)bmeta;
> +
> + return false;
> +}
> +
> /*
> * See if the existing memory contains valid ring buffer data.
> * As the previous kernel must be the same as this kernel, all
> * the calculations (size of buffers and number of buffers)
> * must be the same.
> */
> -static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
> - struct trace_buffer *buffer, int nr_pages)
> +static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
> + struct trace_buffer *buffer, int nr_pages)
> {
> int subbuf_size = PAGE_SIZE;
> struct buffer_data_page *subbuf;
> @@ -1679,20 +1748,6 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
> unsigned long buffers_end;
> int i;
>
> - /* Check the meta magic and meta struct size */
> - if (meta->magic != RING_BUFFER_META_MAGIC ||
> - meta->struct_size != sizeof(*meta)) {
> - pr_info("Ring buffer boot meta[%d] mismatch of magic or struct size\n", cpu);
> - return false;
> - }
> -
> - /* The subbuffer's size and number of subbuffers must match */
> - if (meta->subbuf_size != subbuf_size ||
> - meta->nr_subbufs != nr_pages + 1) {
> - pr_info("Ring buffer boot meta [%d] mismatch of subbuf_size/nr_pages\n", cpu);
> - return false;
> - }
> -
> buffers_start = meta->first_buffer;
> buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs);
>
> @@ -1730,7 +1785,7 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
> return true;
> }
>
> -static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf);
> +static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf);
>
> static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu,
> unsigned long long *timestamp, u64 *delta_ptr)
> @@ -1797,7 +1852,7 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu)
> /* If the meta data has been validated, now validate the events */
> static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
> {
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
> struct buffer_page *head_page;
> unsigned long entry_bytes = 0;
> unsigned long entries = 0;
> @@ -1873,7 +1928,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
> }
> }
>
> -static void rb_meta_init_text_addr(struct ring_buffer_meta *meta)
> +static void rb_meta_init_text_addr(struct ring_buffer_cpu_meta *meta)
> {
> #ifdef CONFIG_RANDOMIZE_BASE
> meta->kaslr_addr = kaslr_offset();
> @@ -1884,18 +1939,25 @@ static void rb_meta_init_text_addr(struct ring_buffer_meta *meta)
>
> static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
> {
> - struct ring_buffer_meta *meta;
> + struct ring_buffer_cpu_meta *meta;
> + struct ring_buffer_meta *bmeta;
> unsigned long delta;
> void *subbuf;
> + bool valid = false;
> int cpu;
> int i;
>
> + if (rb_meta_init(buffer))
> + valid = true;
> +
> + bmeta = buffer->meta;
> +
> for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
> void *next_meta;
>
> meta = rb_range_meta(buffer, nr_pages, cpu);
>
> - if (rb_meta_valid(meta, cpu, buffer, nr_pages)) {
> + if (valid && rb_cpu_meta_valid(meta, cpu, buffer, nr_pages)) {
> /* Make the mappings match the current address */
> subbuf = rb_subbufs_from_meta(meta);
> delta = (unsigned long)subbuf - meta->first_buffer;
> @@ -1913,9 +1975,6 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
>
> memset(meta, 0, next_meta - (void *)meta);
>
> - meta->magic = RING_BUFFER_META_MAGIC;
> - meta->struct_size = sizeof(*meta);
> -
> meta->nr_subbufs = nr_pages + 1;
> meta->subbuf_size = PAGE_SIZE;
>
> @@ -1943,7 +2002,7 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
> static void *rbm_start(struct seq_file *m, loff_t *pos)
> {
> struct ring_buffer_per_cpu *cpu_buffer = m->private;
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
> unsigned long val;
>
> if (!meta)
> @@ -1968,7 +2027,7 @@ static void *rbm_next(struct seq_file *m, void *v, loff_t *pos)
> static int rbm_show(struct seq_file *m, void *v)
> {
> struct ring_buffer_per_cpu *cpu_buffer = m->private;
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
> unsigned long val = (unsigned long)v;
>
> if (val == 1) {
> @@ -2017,7 +2076,7 @@ int ring_buffer_meta_seq_init(struct file *file, struct trace_buffer *buffer, in
> static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer,
> struct buffer_page *bpage)
> {
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
>
> if (meta->head_buffer == (unsigned long)bpage->page)
> cpu_buffer->head_page = bpage;
> @@ -2032,7 +2091,7 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
> long nr_pages, struct list_head *pages)
> {
> struct trace_buffer *buffer = cpu_buffer->buffer;
> - struct ring_buffer_meta *meta = NULL;
> + struct ring_buffer_cpu_meta *meta = NULL;
> struct buffer_page *bpage, *tmp;
> bool user_thread = current->mm != NULL;
> gfp_t mflags;
> @@ -2156,7 +2215,7 @@ static struct ring_buffer_per_cpu *
> rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
> {
> struct ring_buffer_per_cpu *cpu_buffer;
> - struct ring_buffer_meta *meta;
> + struct ring_buffer_cpu_meta *meta;
> struct buffer_page *bpage;
> struct page *page;
> int ret;
> @@ -2327,10 +2386,16 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
>
> /* If start/end are specified, then that overrides size */
> if (start && end) {
> + unsigned long buffers_start;
> unsigned long ptr;
> int n;
>
> size = end - start;
> +
> + /* Subtract the buffer meta data */
> + size -= PAGE_SIZE;
> + buffers_start = start + PAGE_SIZE;
> +
So the buffer meta data is PAGE_SIZE aligned?
> size = size / nr_cpu_ids;
>
> /*
> @@ -2340,7 +2405,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
> * needed, plus account for the integer array index that
> * will be appended to the meta data.
> */
> - nr_pages = (size - sizeof(struct ring_buffer_meta)) /
> + nr_pages = (size - sizeof(struct ring_buffer_cpu_meta)) /
> (subbuf_size + sizeof(int));
> /* Need at least two pages plus the reader page */
> if (nr_pages < 3)
> @@ -2348,8 +2413,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
>
> again:
> /* Make sure that the size fits aligned */
> - for (n = 0, ptr = start; n < nr_cpu_ids; n++) {
> - ptr += sizeof(struct ring_buffer_meta) +
> + for (n = 0, ptr = buffers_start; n < nr_cpu_ids; n++) {
> + ptr += sizeof(struct ring_buffer_cpu_meta) +
> sizeof(int) * nr_pages;
> ptr = ALIGN(ptr, subbuf_size);
> ptr += subbuf_size * nr_pages;
> @@ -3075,7 +3140,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
> }
>
> /* Return the index into the sub-buffers for a given sub-buffer */
> -static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf)
> +static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf)
> {
> void *subbuf_array;
>
> @@ -3087,7 +3152,7 @@ static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf)
> static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer,
> struct buffer_page *next_page)
> {
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
> unsigned long old_head = (unsigned long)next_page->page;
> unsigned long new_head;
>
> @@ -3104,7 +3169,7 @@ static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer,
> static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer,
> struct buffer_page *reader)
> {
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
> void *old_reader = cpu_buffer->reader_page->page;
> void *new_reader = reader->page;
> int id;
> @@ -3733,7 +3798,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
> rb_page_write(cpu_buffer->commit_page));
> rb_inc_page(&cpu_buffer->commit_page);
> if (cpu_buffer->ring_meta) {
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
> meta->commit_buffer = (unsigned long)cpu_buffer->commit_page->page;
> }
> /* add barrier to keep gcc from optimizing too much */
> @@ -5986,7 +6051,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
> if (cpu_buffer->mapped) {
> rb_update_meta_page(cpu_buffer);
> if (cpu_buffer->ring_meta) {
> - struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
> meta->commit_buffer = meta->head_buffer;
> }
> }
> @@ -6020,7 +6085,7 @@ static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
> void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
> {
> struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
> - struct ring_buffer_meta *meta;
> + struct ring_buffer_cpu_meta *meta;
>
> if (!cpumask_test_cpu(cpu, buffer->cpumask))
> return;
> @@ -6058,7 +6123,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
> void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
> {
> struct ring_buffer_per_cpu *cpu_buffer;
> - struct ring_buffer_meta *meta;
> + struct ring_buffer_cpu_meta *meta;
> int cpu;
>
> /* prevent another thread from changing buffer sizes */
Thank you,
> --
> 2.45.2
>
>
--
Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>