[PATCH v1 49/58] perf rw-by-pid: Port rw-by-pid to use python module
From: Ian Rogers
Date: Sun Apr 19 2026 - 20:09:50 EST
Port the legacy Perl script rw-by-pid.pl to a python script using the
perf module in tools/perf/python.
The new script uses a class-based architecture and leverages the
perf.session API for event processing.
It tracks read and write activity by PID for all processes,
aggregating bytes requested, bytes read, total reads, and errors.
Complications:
- Refactored process_event to extract helper methods
(_handle_sys_enter_read, etc.) to reduce the number of branches and
satisfy pylint.
- Split long lines to comply with line length limits.
- pylint warns about the module name not being snake_case, but it is
kept for consistency with the original script name.
Assisted-by: Gemini:gemini-3.1-pro-preview
Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/python/rw-by-pid.py | 170 +++++++++++++++++++++++++++++++++
1 file changed, 170 insertions(+)
create mode 100755 tools/perf/python/rw-by-pid.py
diff --git a/tools/perf/python/rw-by-pid.py b/tools/perf/python/rw-by-pid.py
new file mode 100755
index 000000000000..7bb51d15eb8d
--- /dev/null
+++ b/tools/perf/python/rw-by-pid.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+"""Display r/w activity for all processes."""
+
+import argparse
+from collections import defaultdict
+import sys
+from typing import Optional, Dict, List, Tuple, Any
+import perf
+
+class RwByPid:
+ """Tracks and displays read/write activity by PID."""
+ def __init__(self) -> None:
+ self.reads: Dict[int, Dict[str, Any]] = defaultdict(
+ lambda: {
+ "bytes_requested": 0,
+ "bytes_read": 0,
+ "total_reads": 0,
+ "comm": "",
+ "errors": defaultdict(int),
+ }
+ )
+ self.writes: Dict[int, Dict[str, Any]] = defaultdict(
+ lambda: {
+ "bytes_written": 0,
+ "total_writes": 0,
+ "comm": "",
+ "errors": defaultdict(int),
+ }
+ )
+ self.unhandled: Dict[str, int] = defaultdict(int)
+ self.session: Optional[perf.session] = None
+
+ def process_event(self, sample: perf.sample_event) -> None:
+ """Process events."""
+ event_name = str(sample.evsel)
+ pid = sample.sample_pid
+
+ assert self.session is not None
+ try:
+ comm = self.session.process(pid).comm()
+ except Exception: # pylint: disable=broad-except
+ comm = "unknown"
+
+ if "sys_enter_read" in event_name:
+ self._handle_sys_enter_read(sample, pid, comm)
+ elif "sys_exit_read" in event_name:
+ self._handle_sys_exit_read(sample, pid)
+ elif "sys_enter_write" in event_name:
+ self._handle_sys_enter_write(sample, pid, comm)
+ elif "sys_exit_write" in event_name:
+ self._handle_sys_exit_write(sample, pid)
+ else:
+ self.unhandled[event_name] += 1
+
+ def _handle_sys_enter_read(self, sample: perf.sample_event, pid: int, comm: str) -> None:
+ try:
+ count = sample.count
+ self.reads[pid]["bytes_requested"] += count
+ self.reads[pid]["total_reads"] += 1
+ self.reads[pid]["comm"] = comm
+ except AttributeError:
+ pass
+
+ def _handle_sys_exit_read(self, sample: perf.sample_event, pid: int) -> None:
+ try:
+ ret = sample.ret
+ if ret > 0:
+ self.reads[pid]["bytes_read"] += ret
+ else:
+ self.reads[pid]["errors"][ret] += 1
+ except AttributeError:
+ pass
+
+ def _handle_sys_enter_write(self, sample: perf.sample_event, pid: int, comm: str) -> None:
+ try:
+ count = sample.count
+ self.writes[pid]["bytes_written"] += count
+ self.writes[pid]["total_writes"] += 1
+ self.writes[pid]["comm"] = comm
+ except AttributeError:
+ pass
+
+ def _handle_sys_exit_write(self, sample: perf.sample_event, pid: int) -> None:
+ try:
+ ret = sample.ret
+ if ret <= 0:
+ self.writes[pid]["errors"][ret] += 1
+ except AttributeError:
+ pass
+
+ def print_totals(self) -> None:
+ """Print summary tables."""
+ print("read counts by pid:\n")
+ print(
+ f"{'pid':>6s} {'comm':<20s} {'# reads':>10s} "
+ f"{'bytes_requested':>15s} {'bytes_read':>10s}"
+ )
+ print(f"{'-'*6} {'-'*20} {'-'*10} {'-'*15} {'-'*10}")
+
+ for pid, data in sorted(self.reads.items(),
+ key=lambda kv: kv[1]["bytes_read"], reverse=True):
+ print(
+ f"{pid:6d} {data['comm']:<20s} {data['total_reads']:10d} "
+ f"{data['bytes_requested']:15d} {data['bytes_read']:10d}"
+ )
+
+ print("\nfailed reads by pid:\n")
+ print(f"{'pid':>6s} {'comm':<20s} {'error #':>6s} {'# errors':>10s}")
+ print(f"{'-'*6} {'-'*20} {'-'*6} {'-'*10}")
+
+ errcounts: List[Tuple[int, str, int, int]] = []
+ for pid, data in self.reads.items():
+ for error, count in data["errors"].items():
+ errcounts.append((pid, data["comm"], error, count))
+
+ for pid, comm, error, count in sorted(errcounts, key=lambda x: x[3], reverse=True):
+ print(f"{pid:6d} {comm:<20s} {error:6d} {count:10d}")
+
+ print("\nwrite counts by pid:\n")
+ print(f"{'pid':>6s} {'comm':<20s} {'# writes':>10s} {'bytes_written':>15s}")
+ print(f"{'-'*6} {'-'*20} {'-'*10} {'-'*15}")
+
+ for pid, data in sorted(self.writes.items(),
+ key=lambda kv: kv[1]["bytes_written"], reverse=True):
+ print(
+ f"{pid:6d} {data['comm']:<20s} "
+ f"{data['total_writes']:10d} {data['bytes_written']:15d}"
+ )
+
+ print("\nfailed writes by pid:\n")
+ print(f"{'pid':>6s} {'comm':<20s} {'error #':>6s} {'# errors':>10s}")
+ print(f"{'-'*6} {'-'*20} {'-'*6} {'-'*10}")
+
+ errcounts = []
+ for pid, data in self.writes.items():
+ for error, count in data["errors"].items():
+ errcounts.append((pid, data["comm"], error, count))
+
+ for pid, comm, error, count in sorted(errcounts, key=lambda x: x[3], reverse=True):
+ print(f"{pid:6d} {comm:<20s} {error:6d} {count:10d}")
+
+ if self.unhandled:
+ print("\nunhandled events:\n")
+ print(f"{'event':<40s} {'count':>10s}")
+ print(f"{'-'*40} {'-'*10}")
+ for event_name, count in self.unhandled.items():
+ print(f"{event_name:<40s} {count:10d}")
+
+ def run(self, input_file: str) -> None:
+ """Run the session."""
+ self.session = perf.session(perf.data(input_file), sample=self.process_event)
+ self.session.process_events()
+ self.print_totals()
+
+def main() -> None:
+ """Main function."""
+ parser = argparse.ArgumentParser(description="Trace r/w activity by PID")
+ parser.add_argument("-i", "--input", default="perf.data", help="Input file")
+ args = parser.parse_args()
+
+ analyzer = RwByPid()
+ try:
+ analyzer.run(args.input)
+ except IOError as e:
+ print(e, file=sys.stderr)
+ sys.exit(1)
+
+if __name__ == "__main__":
+ main()
--
2.54.0.rc1.513.gad8abe7a5a-goog