[PATCH v2 1/2] seq_file: Optimize seq_puts()

From: Christophe JAILLET
Date: Mon Apr 22 2024 - 06:25:55 EST


Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_putc() or seq_write() can then be used
instead.

This saves a few cycles.

To have an estimation of how often this optimization triggers:
$ git grep seq_puts.*\" | wc -l
3436

$ git grep seq_puts.*\".\" | wc -l
84

Signed-off-by: Christophe JAILLET <christophe.jaillet@xxxxxxxxxx>
---
Changes in v2:
- Use a function, instead of a macro [Al Viro]
- Handle the case of 1 char only strings, in order to use seq_putc() [Al Viro]
- Use __always_inline [David Laight]

V1: https://lore.kernel.org/all/5c4f7ad7b88f5026940efa9c8be36a58755ec1b3.1704374916.git.christophe.jaillet@xxxxxxxxxx/


Checked by comparing the output of a few .s files.
Here is one of these outputs:

$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w

.L2918:
@@ -46072,10 +46073,11 @@
call clk_prepare_unlock #
# drivers/clk/clk.c:3438: clk_pm_runtime_put_all();
call clk_pm_runtime_put_all #
-# drivers/clk/clk.c:3440: seq_puts(s, "}\n");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ movl $2, %edx #,
movq $.LC94, %rsi #,
movq %rbp, %rdi # s,
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3441: return 0;
jmp .L2917 #
.size clk_dump_show, .-clk_dump_show
@@ -46200,7 +46202,7 @@
leaq 128(%rbx), %r15 #, _97
movq %r15, %rdi # _97,
--
# drivers/clk/clk.c:1987: __clk_recalc_rates(core, false, 0);
@@ -46480,15 +46482,17 @@
call seq_printf #
jmp .L2950 #
.L2946:
-# drivers/clk/clk.c:3315: seq_puts(s, "-----");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
call __sanitizer_cov_trace_pc #
+ movl $5, %edx #,
movq $.LC100, %rsi #,
movq %rbp, %rdi # s,
- call seq_puts #
+ call seq_write #
+# ./include/linux/seq_file.h:131: }
jmp .L2947 #
.L2957:
# drivers/clk/clk.c:1903: return clk_core_get_accuracy_no_lock(core);
- xorl %r14d, %r14d # _34
+ xorl %r14d, %r14d # _35
--
@@ -46736,21 +46740,22 @@
call __sanitizer_cov_trace_pc #
leaq 240(%r12), %rdi #, tmp101
call __tsan_read8 #
-# drivers/clk/clk.c:3355: seq_puts(s, " enable prepare protect duty hardware connection\n");
- movq $.LC104, %rsi #,
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ movl $142, %edx #,
movq %r12, %rdi # s,
+ movq $.LC104, %rsi #,
# drivers/clk/clk.c:3352: struct hlist_head **lists = s->private;
movq 240(%r12), %r13 # s_10(D)->private, lists
-# drivers/clk/clk.c:3355: seq_puts(s, " enable prepare protect duty hardware connection\n");
- call seq_puts #
-# drivers/clk/clk.c:3356: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ call seq_write #
+ movl $142, %edx #,
movq $.LC105, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
-# drivers/clk/clk.c:3357: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+ call seq_write #
movq $.LC106, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
+ movl $142, %edx #,
+ call seq_write #
# drivers/clk/clk.c:3359: ret = clk_pm_runtime_get_all();
call clk_pm_runtime_get_all #
# drivers/clk/clk.c:3360: if (ret)
@@ -57943,7 +57948,7 @@
subq $88, %rsp #,
# drivers/clk/clk.c:5338: {


The output for seq_putc() generation has also be checked and works.
---
fs/seq_file.c | 4 ++--
include/linux/seq_file.h | 13 ++++++++++++-
2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
}
EXPORT_SYMBOL(seq_putc);

-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
{
int len = strlen(s);

@@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
memcpy(m->buf + m->count, s, len);
m->count += len;
}
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);

/**
* seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..8bd4fda6e027 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,18 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
__printf(2, 3)
void seq_printf(struct seq_file *m, const char *fmt, ...);
void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+
+static __always_inline void seq_puts(struct seq_file *m, const char *s)
+{
+ if (!__builtin_constant_p(*s))
+ __seq_puts(m, s);
+ else if (s[0] && !s[1])
+ seq_putc(m, s[0]);
+ else
+ seq_write(m, s, __builtin_strlen(s));
+}
+
void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
unsigned long long num, unsigned int width);
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
--
2.44.0