Re: [PATCH 4/9] scripts: python: Implement parsing of input data in convertPerfScriptProfile

From: Namhyung Kim
Date: Fri Jun 23 2023 - 20:03:37 EST


Hi Anup,

On Wed, Jun 21, 2023 at 12:41 PM Anup Sharma <anupnewsmail@xxxxxxxxx> wrote:
>
> The lines variable is created by splitting the profile string into individual
> lines. It allows for iterating over each line for processing.
>
> The line is considered the start of a sample. It is matched against a regular
> expression pattern to extract relevant information such as before_time_stamp,
> time_stamp, threadNamePidAndTidMatch, threadName, pid, and tid.
>
> The stack frames of the current sample are then parsed in a nested loop.
> Each stackFrameLine is matched against a regular expression pattern to
> extract rawFunc and mod information.
>
> Also fixed few checkpatch warnings.
>
> Signed-off-by: Anup Sharma <anupnewsmail@xxxxxxxxx>
> ---
> .../scripts/python/firefox-gecko-converter.py | 62 ++++++++++++++++++-
> 1 file changed, 60 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/scripts/python/firefox-gecko-converter.py b/tools/perf/scripts/python/firefox-gecko-converter.py
> index 0ff70c0349c8..e5bc7a11c3e6 100644
> --- a/tools/perf/scripts/python/firefox-gecko-converter.py
> +++ b/tools/perf/scripts/python/firefox-gecko-converter.py
> @@ -1,4 +1,5 @@
> #!/usr/bin/env python3
> +# SPDX-License-Identifier: GPL-2.0

Please put this line in the first commit.

> import re
> import sys
> import json
> @@ -14,13 +15,13 @@ def isPerfScriptFormat(profile):
> firstLine = profile[:profile.index('\n')]
> return bool(re.match(r'^\S.*?\s+(?:\d+/)?\d+\s+(?:\d+\d+\s+)?[\d.]+:', firstLine))
>
> -def convertPerfScriptProfile(profile):
> +def convertPerfScriptProfile(profile):

You'd better configure your editor to warn or even fix
the trailing whitespace automatically.

Thanks,
Namhyung


>
> def addSample(threadName, stackArray, time):
> nonlocal name
> if name != threadName:
> name = threadName
> - # TODO:
> + # TODO:
> # get_or_create_stack will create a new stack if it doesn't exist, or return the existing stack if it does.
> # get_or_create_frame will create a new frame if it doesn't exist, or return the existing frame if it does.
> stack = reduce(lambda prefix, stackFrame: get_or_create_stack(get_or_create_frame(stackFrame), prefix), stackArray, None)
> @@ -54,3 +55,60 @@ def convertPerfScriptProfile(profile):
> thread = _createtread(threadName, pid, tid)
> threadMap[tid] = thread
> thread['addSample'](threadName, stack, time_stamp)
> +
> + lines = profile.split('\n')
> +
> + line_index = 0
> + startTime = 0
> + while line_index < len(lines):
> + line = lines[line_index]
> + line_index += 1
> + # perf script --header outputs header lines beginning with #
> + if line == '' or line.startswith('#'):
> + continue
> +
> + sample_start_line = line
> +
> + sample_start_match = re.match(r'^(.*)\s+([\d.]+):', sample_start_line)
> + if not sample_start_match:
> + print(f'Could not parse line as the start of a sample in the "perf script" profile format: "{sample_start_line}"')
> + continue
> +
> + before_time_stamp = sample_start_match[1]
> + time_stamp = float(sample_start_match[2]) * 1000
> + threadNamePidAndTidMatch = re.match(r'^(.*)\s+(?:(\d+)\/)?(\d+)\b', before_time_stamp)
> +
> + if not threadNamePidAndTidMatch:
> + print('Could not parse line as the start of a sample in the "perf script" profile format: "%s"' % sampleStartLine)
> + continue
> + threadName = threadNamePidAndTidMatch[1].strip()
> + pid = int(threadNamePidAndTidMatch[2] or 0)
> + tid = int(threadNamePidAndTidMatch[3] or 0)
> + if startTime == 0:
> + startTime = time_stamp
> + # Parse the stack frames of the current sample in a nested loop.
> + stack = []
> + while line_index < len(lines):
> + stackFrameLine = lines[line_index]
> + line_index += 1
> + if stackFrameLine.strip() == '':
> + # Sample ends.
> + break
> + stackFrameMatch = re.match(r'^\s*(\w+)\s*(.+) \(([^)]*)\)', stackFrameLine)
> + if stackFrameMatch:
> + rawFunc = stackFrameMatch[2]
> + mod = stackFrameMatch[3]
> + rawFunc = re.sub(r'\+0x[\da-f]+$', '', rawFunc)
> +
> + if rawFunc.startswith('('):
> + continue # skip process names
> +
> + if mod:
> + # If we have a module name, provide it.
> + # The code processing the profile will search for
> + # "functionName (in libraryName)" using a regexp,
> + # and automatically create the library information.
> + rawFunc += f' (in {mod})'
> +
> + stack.append(rawFunc)
> +
> --
> 2.34.1
>