RE: [PATCH v2 30/28] docs: kdoc_parser: avoid tokenizing structs everytime

From: Loktionov, Aleksandr

Date: Fri Mar 13 2026 - 07:05:27 EST

> -----Original Message-----
> From: Mauro Carvalho Chehab <mchehab+huawei@xxxxxxxxxx>
> Sent: Friday, March 13, 2026 9:34 AM
> To: Jonathan Corbet <corbet@xxxxxxx>; Linux Doc Mailing List <linux-
> doc@xxxxxxxxxxxxxxx>
> Cc: Mauro Carvalho Chehab <mchehab+huawei@xxxxxxxxxx>; linux-
> kernel@xxxxxxxxxxxxxxx; Loktionov, Aleksandr
> <aleksandr.loktionov@xxxxxxxxx>; Mauro Carvalho Chehab
> <mchehab@xxxxxxxxxx>; Randy Dunlap <rdunlap@xxxxxxxxxxxxx>
> Subject: [PATCH v2 30/28] docs: kdoc_parser: avoid tokenizing structs
> everytime
>
> Most of the rules inside CTransforms are of the type CMatch.
>
> Don't re-parse the source code every time.
>
> Doing this doesn't change the output, but makes kdoc almost as fast as
> before the tokenizer patches:
>
> # Before tokenizer patches
> $ time ./scripts/kernel-doc . -man >original 2>&1
>
> real 0m42.933s
> user 0m36.523s
> sys 0m1.145s
>
> # After tokenizer patches
> $ time ./scripts/kernel-doc . -man >before 2>&1
>
> real 1m29.853s
> user 1m23.974s
> sys 0m1.237s
>
> # After this patch
> $ time ./scripts/kernel-doc . -man >after 2>&1
>
> real 0m48.579s
> user 0m45.938s
> sys 0m0.988s
>
> $ diff -s before after
> Files before and after are identical
>
> Manually checked the differences between original and after
> with:
>
> $ diff -U0 -prBw original after|grep -v Warning|grep -v "@@"|less
>
> They're due:
> - whitespace fixes;
> - struct_group are now better handled;
> - several badly-generated man pages from broken inline kernel-doc
> markups are now fixed.
>
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@xxxxxxxxxx>
> ---
> tools/lib/python/kdoc/kdoc_parser.py | 1 -
> tools/lib/python/kdoc/xforms_lists.py | 30 +++++++++++++++++++++------
> 2 files changed, 24 insertions(+), 7 deletions(-)
>
> diff --git a/tools/lib/python/kdoc/kdoc_parser.py
> b/tools/lib/python/kdoc/kdoc_parser.py
> index ed378edb1e05..3b99740ebed3 100644
> --- a/tools/lib/python/kdoc/kdoc_parser.py
> +++ b/tools/lib/python/kdoc/kdoc_parser.py
> @@ -738,7 +738,6 @@ class KernelDoc:
> #
> # Go through the list of members applying all of our
> transformations.
> #
> - members = trim_private_members(members)
> members = self.xforms.apply("struct", members)
>
> #
> diff --git a/tools/lib/python/kdoc/xforms_lists.py
> b/tools/lib/python/kdoc/xforms_lists.py
> index c3c532c45cdc..f6ea9efb11ae 100644
> --- a/tools/lib/python/kdoc/xforms_lists.py
> +++ b/tools/lib/python/kdoc/xforms_lists.py
> @@ -5,7 +5,7 @@
> import re
>
> from kdoc.kdoc_re import KernRe
> -from kdoc.c_lex import CMatch
> +from kdoc.c_lex import CMatch, CTokenizer
>
> struct_args_pattern = r"([^,)]+)"
>
> @@ -17,6 +17,12 @@ class CTransforms:
> into something we can parse and generate kdoc for.
> """
>
> + #
> + # NOTE:
> + # Due to performance reasons, place CMatch rules before
> KernRe,
> + # as this avoids running the C parser every time.
> + #
> +
> #: Transforms for structs and unions.
> struct_xforms = [
> (CMatch("__attribute__"), ""),
> @@ -123,13 +129,25 @@ class CTransforms:
> "var": var_xforms,
> }
>
> - def apply(self, xforms_type, text):
> + def apply(self, xforms_type, source):
> """
> - Apply a set of transforms to a block of text.
> + Apply a set of transforms to a block of source.
> +
> + As tokenizer is used here, this function also remove comments
> + at the end.
> """
> if xforms_type not in self.xforms:
> - return text
> + return source
> +
> + if isinstance(source, str):
> + source = CTokenizer(source)
>
> for search, subst in self.xforms[xforms_type]:
> - text = search.sub(subst, text)
> - return text
> + #
> + # KernRe only accept strings.
> + #
> + if isinstance(search, KernRe):
> + source = str(source)
> +
> + source = search.sub(subst, source)
> + return str(source)
> --
> 2.53.0

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@xxxxxxxxx>