tracing: Update subbuffer with kilobytes not page order
authorSteven Rostedt (Google) <rostedt@goodmis.org>
Tue, 19 Dec 2023 18:54:29 +0000 (13:54 -0500)
committerSteven Rostedt (Google) <rostedt@goodmis.org>
Thu, 21 Dec 2023 16:04:15 +0000 (11:04 -0500)
Using page order for deciding what the size of the ring buffer sub buffers
are is exposing a bit too much of the implementation. Although the sub
buffers are only allocated in orders of pages, allow the user to specify
the minimum size of each sub-buffer via kilobytes like they can with the
buffer size itself.

If the user specifies 3 via:

  echo 3 > buffer_subbuf_size_kb

Then the sub-buffer size will round up to 4kb (on a 4kb page size system).

If they specify:

  echo 6 > buffer_subbuf_size_kb

The sub-buffer size will become 8kb.

and so on.

Link: https://lore.kernel.org/linux-trace-kernel/20231219185631.809766769@goodmis.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Documentation/trace/ftrace.rst
kernel/trace/trace.c
tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_order.tc [deleted file]
tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc [new file with mode: 0644]

index 231d26ceedb05c138bc088d620490b11970369dd..933e7efb9f1b322ffc96b5a47d26b76401483d6c 100644 (file)
@@ -203,32 +203,26 @@ of ftrace. Here is a list of some of the key files:
 
        This displays the total combined size of all the trace buffers.
 
-  buffer_subbuf_order:
-
-       This sets or displays the sub buffer page size order. The ring buffer
-       is broken up into several same size "sub buffers". An event can not be
-       bigger than the size of the sub buffer. Normally, the sub buffer is
-       the size of the architecture's page (4K on x86). The sub buffer also
-       contains meta data at the start which also limits the size of an event.
-       That means when the sub buffer is a page size, no event can be larger
-       than the page size minus the sub buffer meta data.
-
-       The buffer_subbuf_order allows the user to change the size of the sub
-       buffer. As the sub buffer is a set of pages by the power of 2, thus
-       the sub buffer total size is defined by the order:
-
-       order           size
-       ----            ----
-       0               PAGE_SIZE
-       1               PAGE_SIZE * 2
-       2               PAGE_SIZE * 4
-       3               PAGE_SIZE * 8
-
-       Changing the order will change the sub buffer size allowing for events
-       to be larger than the page size.
-
-       Note: When changing the order, tracing is stopped and any data in the
-       ring buffer and the snapshot buffer will be discarded.
+  buffer_subbuf_size_kb:
+
+       This sets or displays the sub buffer size. The ring buffer is broken up
+       into several same size "sub buffers". An event can not be bigger than
+       the size of the sub buffer. Normally, the sub buffer is the size of the
+       architecture's page (4K on x86). The sub buffer also contains meta data
+       at the start which also limits the size of an event.  That means when
+       the sub buffer is a page size, no event can be larger than the page
+       size minus the sub buffer meta data.
+
+       Note, the buffer_subbuf_size_kb is a way for the user to specify the
+       minimum size of the subbuffer. The kernel may make it bigger due to the
+       implementation details, or simply fail the operation if the kernel can
+       not handle the request.
+
+       Changing the sub buffer size allows for events to be larger than the
+       page size.
+
+       Note: When changing the sub-buffer size, tracing is stopped and any
+       data in the ring buffer and the snapshot buffer will be discarded.
 
   free_buffer:
 
index 82303bd2bba1372c267a6f2199f3a72d12988d17..46dbe22121e911f896f742669091a394ce913ff9 100644 (file)
@@ -9384,42 +9384,54 @@ static const struct file_operations buffer_percent_fops = {
 };
 
 static ssize_t
-buffer_order_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 {
        struct trace_array *tr = filp->private_data;
+       size_t size;
        char buf[64];
+       int order;
        int r;
 
-       r = sprintf(buf, "%d\n", ring_buffer_subbuf_order_get(tr->array_buffer.buffer));
+       order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
+       size = (PAGE_SIZE << order) / 1024;
+
+       r = sprintf(buf, "%zd\n", size);
 
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
 
 static ssize_t
-buffer_order_write(struct file *filp, const char __user *ubuf,
-                  size_t cnt, loff_t *ppos)
+buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
+                        size_t cnt, loff_t *ppos)
 {
        struct trace_array *tr = filp->private_data;
        unsigned long val;
        int old_order;
+       int order;
+       int pages;
        int ret;
 
        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
        if (ret)
                return ret;
 
+       val *= 1024; /* value passed in is in KB */
+
+       pages = DIV_ROUND_UP(val, PAGE_SIZE);
+       order = fls(pages - 1);
+
        /* limit between 1 and 128 system pages */
-       if (val < 0 || val > 7)
+       if (order < 0 || order > 7)
                return -EINVAL;
 
        /* Do not allow tracing while changing the order of the ring buffer */
        tracing_stop_tr(tr);
 
        old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
-       if (old_order == val)
+       if (old_order == order)
                goto out;
 
-       ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, val);
+       ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
        if (ret)
                goto out;
 
@@ -9428,7 +9440,7 @@ buffer_order_write(struct file *filp, const char __user *ubuf,
        if (!tr->allocated_snapshot)
                goto out_max;
 
-       ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, val);
+       ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
        if (ret) {
                /* Put back the old order */
                cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
@@ -9460,10 +9472,10 @@ buffer_order_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
 
-static const struct file_operations buffer_order_fops = {
+static const struct file_operations buffer_subbuf_size_fops = {
        .open           = tracing_open_generic_tr,
-       .read           = buffer_order_read,
-       .write          = buffer_order_write,
+       .read           = buffer_subbuf_size_read,
+       .write          = buffer_subbuf_size_write,
        .release        = tracing_release_generic_tr,
        .llseek         = default_llseek,
 };
@@ -9934,8 +9946,8 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
        trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
                        tr, &buffer_percent_fops);
 
-       trace_create_file("buffer_subbuf_order", TRACE_MODE_WRITE, d_tracer,
-                         tr, &buffer_order_fops);
+       trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
+                         tr, &buffer_subbuf_size_fops);
 
        create_trace_options_dir(tr);
 
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_order.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_order.tc
deleted file mode 100644 (file)
index ecbcc81..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# description: Change the ringbuffer sub-buffer order
-# requires: buffer_subbuf_order
-# flags: instance
-
-get_buffer_data_size() {
-       sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
-}
-
-get_buffer_data_offset() {
-       sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page
-}
-
-get_event_header_size() {
-       type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
-       time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
-       array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
-       total_bits=$((type_len+time_len+array_len))
-       total_bits=$((total_bits+7))
-       echo $((total_bits/8))
-}
-
-get_print_event_buf_offset() {
-       sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format
-}
-
-event_header_size=`get_event_header_size`
-print_header_size=`get_print_event_buf_offset`
-
-data_offset=`get_buffer_data_offset`
-
-marker_meta=$((event_header_size+print_header_size))
-
-make_str() {
-        cnt=$1
-       printf -- 'X%.0s' $(seq $cnt)
-}
-
-write_buffer() {
-       size=$1
-
-       str=`make_str $size`
-
-       # clear the buffer
-       echo > trace
-
-       # write the string into the marker
-       echo $str > trace_marker
-
-       echo $str
-}
-
-test_buffer() {
-       orde=$1
-       page_size=$((4096<<order))
-
-       size=`get_buffer_data_size`
-
-       # the size must be greater than or equal to page_size - data_offset
-       page_size=$((page_size-data_offset))
-       if [ $size -lt $page_size ]; then
-               exit fail
-       fi
-
-       # Now add a little more the meta data overhead will overflow
-
-       str=`write_buffer $size`
-
-       # Make sure the line was broken
-       new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace`
-
-       if [ "$new_str" = "$str" ]; then
-               exit fail;
-       fi
-
-       # Make sure the entire line can be found
-       new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace`
-
-       if [ "$new_str" != "$str" ]; then
-               exit fail;
-       fi
-}
-
-ORIG=`cat buffer_subbuf_order`
-
-# Could test bigger orders than 3, but then creating the string
-# to write into the ring buffer takes too long
-for a in 0 1 2 3 ; do
-       echo $a > buffer_subbuf_order
-       test_buffer $a
-done
-
-echo $ORIG > buffer_subbuf_order
-
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc
new file mode 100644 (file)
index 0000000..d44d09a
--- /dev/null
@@ -0,0 +1,95 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Change the ringbuffer sub-buffer size
+# requires: buffer_subbuf_size_kb
+# flags: instance
+
+get_buffer_data_size() {
+       sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_buffer_data_offset() {
+       sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_event_header_size() {
+       type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+       time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+       array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+       total_bits=$((type_len+time_len+array_len))
+       total_bits=$((total_bits+7))
+       echo $((total_bits/8))
+}
+
+get_print_event_buf_offset() {
+       sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format
+}
+
+event_header_size=`get_event_header_size`
+print_header_size=`get_print_event_buf_offset`
+
+data_offset=`get_buffer_data_offset`
+
+marker_meta=$((event_header_size+print_header_size))
+
+make_str() {
+        cnt=$1
+       printf -- 'X%.0s' $(seq $cnt)
+}
+
+write_buffer() {
+       size=$1
+
+       str=`make_str $size`
+
+       # clear the buffer
+       echo > trace
+
+       # write the string into the marker
+       echo $str > trace_marker
+
+       echo $str
+}
+
+test_buffer() {
+       size_kb=$1
+       page_size=$((size_kb*1024))
+
+       size=`get_buffer_data_size`
+
+       # the size must be greater than or equal to page_size - data_offset
+       page_size=$((page_size-data_offset))
+       if [ $size -lt $page_size ]; then
+               exit fail
+       fi
+
+       # Now add a little more the meta data overhead will overflow
+
+       str=`write_buffer $size`
+
+       # Make sure the line was broken
+       new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace`
+
+       if [ "$new_str" = "$str" ]; then
+               exit fail;
+       fi
+
+       # Make sure the entire line can be found
+       new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace`
+
+       if [ "$new_str" != "$str" ]; then
+               exit fail;
+       fi
+}
+
+ORIG=`cat buffer_subbuf_size_kb`
+
+# Could test bigger sizes than 32K, but then creating the string
+# to write into the ring buffer takes too long
+for a in 4 8 16 32 ; do
+       echo $a > buffer_subbuf_size_kb
+       test_buffer $a
+done
+
+echo $ORIG > buffer_subbuf_size_kb
+