[PATCH v1] tools/cgroup: introduce cgroup v2 memory.events listener
From: Dmitry Rokosov
Date: Fri Oct 13 2023 - 14:41:31 EST
This is a simple listener for memory events that handles counter
changes in runtime. It can be set up for a specific memory cgroup v2.
The output example:
=====
$ /tmp/cgroup_v2_event_listener test
Initialized MEMCG events with counters:
MEMCG events:
low: 0
high: 0
max: 0
oom: 0
oom_kill: 0
oom_group_kill: 0
Started monitoring memory events from '/sys/fs/cgroup/test/memory.events'...
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 0 => 1
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 1 => 2
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 2 => 3
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 3 => 4
Received event in /sys/fs/cgroup/test/memory.events:
*** 2 MEMCG max events, change counter 0 => 2
Received event in /sys/fs/cgroup/test/memory.events:
*** 8 MEMCG max events, change counter 2 => 10
*** 1 MEMCG oom event, change counter 0 => 1
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 4 => 5
^CExiting cgroup v2 event listener...
=====
Signed-off-by: Dmitry Rokosov <ddrokosov@xxxxxxxxxxxxxxxxx>
---
tools/cgroup/Makefile | 4 +-
tools/cgroup/cgroup_v2_event_listener.c | 330 ++++++++++++++++++++++++
2 files changed, 332 insertions(+), 2 deletions(-)
create mode 100644 tools/cgroup/cgroup_v2_event_listener.c
diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile
index ffca068e4a76..86bd357a8f54 100644
--- a/tools/cgroup/Makefile
+++ b/tools/cgroup/Makefile
@@ -3,9 +3,9 @@
CFLAGS = -Wall -Wextra
-all: cgroup_event_listener
+all: cgroup_event_listener cgroup_v2_event_listener
%: %.c
$(CC) $(CFLAGS) -o $@ $^
clean:
- $(RM) cgroup_event_listener
+ $(RM) cgroup_event_listener cgroup_v2_event_listener
diff --git a/tools/cgroup/cgroup_v2_event_listener.c b/tools/cgroup/cgroup_v2_event_listener.c
new file mode 100644
index 000000000000..987261db5369
--- /dev/null
+++ b/tools/cgroup/cgroup_v2_event_listener.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * cgroup_v2_event_listener.c - Simple listener of cgroup v2 memory.events
+ *
+ * Copyright (c) 2023, SaluteDevices. All Rights Reserved.
+ *
+ * Author: Dmitry Rokosov <ddrokosov@xxxxxxxxxxxxxxxxx>
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#define MEMCG_EVENTS "memory.events"
+
+/* Size of buffer to use when reading inotify events */
+#define INOTIFY_BUFFER_SIZE 8192
+
+#define INOTIFY_EVENT_NEXT(event, length) ({ \
+ (length) -= sizeof(*(event)) + (event)->len; \
+ (event)++; \
+})
+
+#define INOTIFY_EVENT_OK(event, length) ((length) >= (ssize_t)sizeof(*(event)))
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+
+struct memcg_counters {
+ long low;
+ long high;
+ long max;
+ long oom;
+ long oom_kill;
+ long oom_group_kill;
+};
+
+struct memcg_events {
+ struct memcg_counters counters;
+ char path[PATH_MAX];
+ int inotify_fd;
+ int inotify_wd;
+};
+
+static void print_memcg_counters(const struct memcg_counters *counters)
+{
+ printf("MEMCG events:\n");
+ printf("\tlow: %ld\n", counters->low);
+ printf("\thigh: %ld\n", counters->high);
+ printf("\tmax: %ld\n", counters->max);
+ printf("\toom: %ld\n", counters->oom);
+ printf("\toom_kill: %ld\n", counters->oom_kill);
+ printf("\toom_group_kill: %ld\n", counters->oom_group_kill);
+}
+
+static int get_memcg_counter(char *line, const char *name, long *counter)
+{
+ size_t len = strlen(name);
+ char *endptr;
+ long tmp;
+
+ if (memcmp(line, name, len)) {
+ warnx("Counter line %s has wrong name, %s is expected",
+ line, name);
+ return -EINVAL;
+ }
+
+ /* skip the whitespace delimiter */
+ len += 1;
+
+ errno = 0;
+ tmp = strtol(&line[len], &endptr, 10);
+ if (((tmp == LONG_MAX || tmp == LONG_MIN) && errno == ERANGE) ||
+ (errno && !tmp)) {
+ warnx("Failed to parse: %s", &line[len]);
+ return -ERANGE;
+ }
+
+ if (endptr == &line[len]) {
+ warnx("Not digits were found in line %s", &line[len]);
+ return -EINVAL;
+ }
+
+ if (!(*endptr == '\0' || (*endptr == '\n' && *++endptr == '\0'))) {
+ warnx("Further characters after number: %s", endptr);
+ return -EINVAL;
+ }
+
+ *counter = tmp;
+
+ return 0;
+}
+
+static int read_memcg_events(struct memcg_events *events, bool show_diff)
+{
+ FILE *fp = fopen(events->path, "re");
+ size_t i;
+ int ret = 0;
+ bool any_new_events = false;
+ char *line = NULL;
+ size_t len = 0;
+ struct memcg_counters new_counters;
+ struct memcg_counters *counters = &events->counters;
+ struct {
+ const char *name;
+ long *new;
+ long *old;
+ } map[] = {
+ {
+ .name = "low",
+ .new = &new_counters.low,
+ .old = &counters->low,
+ },
+ {
+ .name = "high",
+ .new = &new_counters.high,
+ .old = &counters->high,
+ },
+ {
+ .name = "max",
+ .new = &new_counters.max,
+ .old = &counters->max,
+ },
+ {
+ .name = "oom",
+ .new = &new_counters.oom,
+ .old = &counters->oom,
+ },
+ {
+ .name = "oom_kill",
+ .new = &new_counters.oom_kill,
+ .old = &counters->oom_kill,
+ },
+ {
+ .name = "oom_group_kill",
+ .new = &new_counters.oom_group_kill,
+ .old = &counters->oom_group_kill,
+ },
+ };
+
+ if (!fp) {
+ warn("Failed to open memcg events file %s", events->path);
+ return -EBADF;
+ }
+
+ /* Read new values for memcg counters */
+ for (i = 0; i < ARRAY_SIZE(map); ++i) {
+ ssize_t nread;
+
+ errno = 0;
+ nread = getline(&line, &len, fp);
+ if (nread == -1) {
+ if (errno) {
+ warn("Failed to read line for counter %s",
+ map[i].name);
+ ret = -EIO;
+ goto exit;
+ }
+
+ break;
+ }
+
+ ret = get_memcg_counter(line, map[i].name, map[i].new);
+ if (ret) {
+ warnx("Failed to get counter value from line %s", line);
+ goto exit;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(map); ++i) {
+ long diff;
+
+ if (*map[i].new > *map[i].old) {
+ diff = *map[i].new - *map[i].old;
+
+ if (show_diff)
+ printf("*** %ld MEMCG %s event%s, "
+ "change counter %ld => %ld\n",
+ diff, map[i].name,
+ (diff == 1) ? "" : "s",
+ *map[i].old, *map[i].new);
+
+ *map[i].old += diff;
+ any_new_events = true;
+ }
+ }
+
+ if (show_diff && !any_new_events)
+ printf("*** No new untracked memcg events available\n");
+
+exit:
+ free(line);
+ fclose(fp);
+
+ return ret;
+}
+
+static void process_memcg_events(struct memcg_events *events,
+ struct inotify_event *event)
+{
+ int ret;
+
+ if (events->inotify_wd != event->wd) {
+ warnx("Unknown inotify event %d, should be %d", event->wd,
+ events->inotify_wd);
+ return;
+ }
+
+ printf("Received event in %s:\n", events->path);
+
+ if (!(event->mask & IN_MODIFY)) {
+ warnx("No IN_MODIFY event, skip it");
+ return;
+ }
+
+ ret = read_memcg_events(events, /* show_diff = */true);
+ if (ret)
+ warnx("Can't read memcg events");
+}
+
+static void monitor_events(struct memcg_events *events)
+{
+ struct pollfd fds[1];
+ int ret;
+
+ printf("Started monitoring memory events from '%s'...\n", events->path);
+
+ fds[0].fd = events->inotify_fd;
+ fds[0].events = POLLIN;
+
+ for (;;) {
+ ret = poll(fds, ARRAY_SIZE(fds), -1);
+ if (ret < 0 && errno != EAGAIN)
+ err(EXIT_FAILURE, "Can't poll memcg events (%d)", ret);
+
+ if (fds[0].revents & POLLERR)
+ err(EXIT_FAILURE, "Got POLLERR during monitor events");
+
+ if (fds[0].revents & POLLIN) {
+ struct inotify_event *event;
+ char buffer[INOTIFY_BUFFER_SIZE];
+ ssize_t length;
+
+ length = read(fds[0].fd, buffer, INOTIFY_BUFFER_SIZE);
+ if (length <= 0)
+ continue;
+
+ event = (struct inotify_event *)buffer;
+ while (INOTIFY_EVENT_OK(event, length)) {
+ process_memcg_events(events, event);
+ event = INOTIFY_EVENT_NEXT(event, length);
+ }
+ }
+ }
+}
+
+static int initialize_memcg_events(struct memcg_events *events,
+ const char *cgroup)
+{
+ int ret;
+
+ memset(events, 0, sizeof(struct memcg_events));
+
+ ret = snprintf(events->path, PATH_MAX,
+ "/sys/fs/cgroup/%s/memory.events", cgroup);
+ if (ret >= PATH_MAX) {
+ warnx("Path to cgroup memory.events is too long");
+ return -EMSGSIZE;
+ } else if (ret < 0) {
+ warn("Can't generate cgroup event full name");
+ return ret;
+ }
+
+ ret = read_memcg_events(events, /* show_diff = */false);
+ if (ret) {
+ warnx("Failed to read initial memcg events state (%d)", ret);
+ return ret;
+ }
+
+ events->inotify_fd = inotify_init();
+ if (events->inotify_fd < 0) {
+ warn("Failed to setup new inotify device");
+ return -EMFILE;
+ }
+
+ events->inotify_wd = inotify_add_watch(events->inotify_fd,
+ events->path, IN_MODIFY);
+ if (events->inotify_wd < 0) {
+ warn("Couldn't add monitor in dir %s", events->path);
+ return -EIO;
+ }
+
+ printf("Initialized MEMCG events with counters:\n");
+ print_memcg_counters(&events->counters);
+
+ return 0;
+}
+
+static void cleanup_memcg_events(struct memcg_events *events)
+{
+ inotify_rm_watch(events->inotify_fd, events->inotify_wd);
+ close(events->inotify_fd);
+}
+
+int main(int argc, const char **argv)
+{
+ struct memcg_events events;
+ ssize_t ret;
+
+ if (argc != 2)
+ errx(EXIT_FAILURE, "Usage: %s <cgroup>", argv[0]);
+
+ ret = initialize_memcg_events(&events, argv[1]);
+ if (ret)
+ errx(EXIT_FAILURE, "Can't initialize memcg events (%zd)", ret);
+
+ monitor_events(&events);
+
+ cleanup_memcg_events(&events);
+
+ printf("Exiting cgroup v2 event listener...\n");
+
+ return EXIT_SUCCESS;
+}
--
2.36.0