[PATCH v1 48/58] perf rw-by-file: Port rw-by-file to use python module

From: Ian Rogers

Date: Sun Apr 19 2026 - 20:14:29 EST


Port the legacy Perl script rw-by-file.pl to a python script using the
perf module in tools/perf/python.

The new script uses a class-based architecture and leverages the
perf.session API for event processing.

It tracks read and write activity by file descriptor for a given
program name, aggregating bytes requested/written and total counts.

Complications:
- Had to split long lines in __init__ to satisfy pylint.
- pylint warns about the module name not being snake_case, but it is
kept for consistency with the original script name.

Assisted-by: Gemini:gemini-3.1-pro-preview
Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/python/rw-by-file.py | 103 ++++++++++++++++++++++++++++++++
1 file changed, 103 insertions(+)
create mode 100755 tools/perf/python/rw-by-file.py

diff --git a/tools/perf/python/rw-by-file.py b/tools/perf/python/rw-by-file.py
new file mode 100755
index 000000000000..4dd164a091e2
--- /dev/null
+++ b/tools/perf/python/rw-by-file.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+"""Display r/w activity for files read/written to for a given program."""
+
+import argparse
+from collections import defaultdict
+import sys
+from typing import Optional, Dict
+import perf
+
+class RwByFile:
+ """Tracks and displays read/write activity by file descriptor."""
+ def __init__(self, comm: str) -> None:
+ self.for_comm = comm
+ self.reads: Dict[int, Dict[str, int]] = defaultdict(
+ lambda: {"bytes_requested": 0, "total_reads": 0}
+ )
+ self.writes: Dict[int, Dict[str, int]] = defaultdict(
+ lambda: {"bytes_written": 0, "total_writes": 0}
+ )
+ self.unhandled: Dict[str, int] = defaultdict(int)
+ self.session: Optional[perf.session] = None
+
+ def process_event(self, sample: perf.sample_event) -> None:
+ """Process events."""
+ event_name = str(sample.evsel)
+
+ pid = sample.sample_pid
+ assert self.session is not None
+ try:
+ comm = self.session.process(pid).comm()
+ except Exception: # pylint: disable=broad-except
+ comm = "unknown"
+
+ if comm != self.for_comm:
+ return
+
+ if "sys_enter_read" in event_name:
+ try:
+ fd = sample.fd
+ count = sample.count
+ self.reads[fd]["bytes_requested"] += count
+ self.reads[fd]["total_reads"] += 1
+ except AttributeError:
+ return
+ elif "sys_enter_write" in event_name:
+ try:
+ fd = sample.fd
+ count = sample.count
+ self.writes[fd]["bytes_written"] += count
+ self.writes[fd]["total_writes"] += 1
+ except AttributeError:
+ return
+ else:
+ self.unhandled[event_name] += 1
+
+ def print_totals(self) -> None:
+ """Print summary tables."""
+ print(f"file read counts for {self.for_comm}:\n")
+ print(f"{'fd':>6s} {'# reads':>10s} {'bytes_requested':>15s}")
+ print(f"{'-'*6} {'-'*10} {'-'*15}")
+
+ for fd, data in sorted(self.reads.items(),
+ key=lambda kv: kv[1]["bytes_requested"], reverse=True):
+ print(f"{fd:6d} {data['total_reads']:10d} {data['bytes_requested']:15d}")
+
+ print(f"\nfile write counts for {self.for_comm}:\n")
+ print(f"{'fd':>6s} {'# writes':>10s} {'bytes_written':>15s}")
+ print(f"{'-'*6} {'-'*10} {'-'*15}")
+
+ for fd, data in sorted(self.writes.items(),
+ key=lambda kv: kv[1]["bytes_written"], reverse=True):
+ print(f"{fd:6d} {data['total_writes']:10d} {data['bytes_written']:15d}")
+
+ if self.unhandled:
+ print("\nunhandled events:\n")
+ print(f"{'event':<40s} {'count':>10s}")
+ print(f"{'-'*40} {'-'*10}")
+ for event_name, count in self.unhandled.items():
+ print(f"{event_name:<40s} {count:10d}")
+
+ def run(self, input_file: str) -> None:
+ """Run the session."""
+ self.session = perf.session(perf.data(input_file), sample=self.process_event)
+ self.session.process_events()
+ self.print_totals()
+
+def main() -> None:
+ """Main function."""
+ parser = argparse.ArgumentParser(description="Trace r/w activity by file")
+ parser.add_argument("comm", help="Filter by command name")
+ parser.add_argument("-i", "--input", default="perf.data", help="Input file")
+ args = parser.parse_args()
+
+ analyzer = RwByFile(args.comm)
+ try:
+ analyzer.run(args.input)
+ except IOError as e:
+ print(e, file=sys.stderr)
+ sys.exit(1)
+
+if __name__ == "__main__":
+ main()
--
2.54.0.rc1.513.gad8abe7a5a-goog