[PATCH v2 2/2] hexdump: Allow skipping identical lines

From: Miquel Raynal
Date: Fri Jan 10 2025 - 13:56:27 EST


When dumping long buffers (especially for debug purposes) it may be very
convenient to sometimes avoid spitting all the lines of the buffer if
the lines are identical. Typically on embedded devices, the console
would be wired to a UART running at 115200 bauds, which makes the dumps
very (very) slow. In this case, having a flag to avoid printing
duplicated lines is handy.

Example of a made up repetitive output:
0f 53 63 47 56 55 78 7a aa b7 8c ff ff ff ff ff
ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
ff ff ff ff ff ff ff ff ff ff ff ff 01 2a 39 eb

Same but with the flag enabled:
0f 53 63 47 56 55 78 7a aa b7 8c ff ff ff ff ff
ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
*
ff ff ff ff ff ff ff ff ff ff ff ff 01 2a 39 eb

Signed-off-by: Miquel Raynal <miquel.raynal@xxxxxxxxxxx>
---
Documentation/core-api/printk-formats.rst | 4 +++-
include/linux/printk.h | 1 +
lib/hexdump.c | 21 +++++++++++++++++++--
3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index ecccc0473da9c10f45f2464566f690472c61401e..90e6616284d1faf5882019eba8de6bebffe4883a 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -310,7 +310,9 @@ Raw buffer as a hex string

For printing small buffers (up to 64 bytes long) as a hex string with a
certain separator. For larger buffers consider using
-:c:func:`print_hex_dump`.
+:c:func:`print_hex_dump`, especially since dupplicated lines can be
+skipped automatically to reduce the overhead with the
+``DUMP_FLAG_SKIP_IDENTICAL_LINES`` flag.

MAC/FDDI addresses
------------------
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 59e9e4c445108d66a3df422cfeaf79920e2ff08f..f89b4117483dce34d2da2f699848f16304deb942 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -755,6 +755,7 @@ enum {

enum {
DUMP_FLAG_ASCII,
+ DUMP_FLAG_SKIP_IDENTICAL_LINES,
};

extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 4ac9c32c28a046d2ca037eaef95c785c1a866627..eaacd3f95b0442c0cebe884b26d9fc12e237eb68 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -4,6 +4,7 @@
*/

#include <linux/types.h>
+#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -239,7 +240,8 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
* @buf: data blob to dump
* @len: number of bytes in the @buf
* @flags: controls the output, typically %DUMP_FLAG_ASCII will print the ascii
- * equivalent after the hex output.
+ * equivalent after the hex output, %DUMP_FLAG_SKIP_IDENTICAL_LINES will display
+ * a single '*' instead of duplicated lines.
*
* Given a buffer of u8 data, print_hex_dump() prints a hex + ASCII dump
* to the kernel log at the specified kernel log level, with an optional
@@ -264,8 +266,9 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
const void *buf, size_t len, unsigned int flags)
{
const u8 *ptr = buf;
- int i, linelen, remaining = len;
+ int i, prev_i, linelen, remaining = len;
unsigned char linebuf[32 * 3 + 2 + 32 + 1];
+ bool same_line = false;

if (rowsize != 16 && rowsize != 32)
rowsize = 16;
@@ -274,6 +277,20 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
linelen = min(remaining, rowsize);
remaining -= rowsize;

+ if (flags & DUMP_FLAG_SKIP_IDENTICAL_LINES) {
+ if (i && !memcmp(ptr + i, ptr + prev_i, linelen)) {
+ prev_i = i;
+ if (same_line)
+ continue;
+ same_line = true;
+ printk("%s*\n", level);
+ continue;
+ } else {
+ prev_i = i;
+ same_line = false;
+ }
+ }
+
hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
linebuf, sizeof(linebuf),
flags & DUMP_FLAG_ASCII);

--
2.47.0