RE: [PATCH v2 2/8] selftests/resctrl: Add basic resctrl file system operations and data
From: Moger, Babu
Date: Mon Oct 29 2018 - 17:51:59 EST
Hi Fenghua, Sai,
> -----Original Message-----
> From: Fenghua Yu <fenghua.yu@xxxxxxxxx>
> Sent: Thursday, October 25, 2018 6:07 PM
> To: Thomas Gleixner <tglx@xxxxxxxxxxxxx>; Ingo Molnar
> <mingo@xxxxxxxxxx>; H Peter Anvin <hpa@xxxxxxxxx>; Tony Luck
> <tony.luck@xxxxxxxxx>; Peter Zijlstra <peterz@xxxxxxxxxxxxx>; Reinette
> Chatre <reinette.chatre@xxxxxxxxx>; Moger, Babu
> <Babu.Moger@xxxxxxx>; James Morse <james.morse@xxxxxxx>; Ravi V
> Shankar <ravi.v.shankar@xxxxxxxxx>; Sai Praneeth Prakhya
> <sai.praneeth.prakhya@xxxxxxxxx>; Arshiya Hayatkhan Pathan
> <arshiya.hayatkhan.pathan@xxxxxxxxx>
> Cc: linux-kernel <linux-kernel@xxxxxxxxxxxxxxx>; Fenghua Yu
> <fenghua.yu@xxxxxxxxx>
> Subject: [PATCH v2 2/8] selftests/resctrl: Add basic resctrl file system
> operations and data
>
> From: Sai Praneeth Prakhya <sai.praneeth.prakhya@xxxxxxxxx>
>
> The basic resctrl file system operations and data are added for future
> usage by resctrl selftest tool.
>
> Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@xxxxxxxxx>
> Signed-off-by: Arshiya Hayatkhan Pathan
> <arshiya.hayatkhan.pathan@xxxxxxxxx>
> Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
> ---
> tools/testing/selftests/resctrl/Makefile | 10 +
> tools/testing/selftests/resctrl/resctrl.h | 53 ++++
> tools/testing/selftests/resctrl/resctrlfs.c | 465
> ++++++++++++++++++++++++++++
> 3 files changed, 528 insertions(+)
> create mode 100644 tools/testing/selftests/resctrl/Makefile
> create mode 100644 tools/testing/selftests/resctrl/resctrl.h
> create mode 100644 tools/testing/selftests/resctrl/resctrlfs.c
>
> diff --git a/tools/testing/selftests/resctrl/Makefile
> b/tools/testing/selftests/resctrl/Makefile
> new file mode 100644
> index 000000000000..bd5c5418961e
> --- /dev/null
> +++ b/tools/testing/selftests/resctrl/Makefile
> @@ -0,0 +1,10 @@
> +CC = gcc
> +CFLAGS = -g -Wall
> +
> +*.o: *.c
> + $(CC) $(CFLAGS) -c *.c
> +
> +.PHONY: clean
> +
> +clean:
> + $(RM) *.o *~
> diff --git a/tools/testing/selftests/resctrl/resctrl.h
> b/tools/testing/selftests/resctrl/resctrl.h
> new file mode 100644
> index 000000000000..fe3c3434df97
> --- /dev/null
> +++ b/tools/testing/selftests/resctrl/resctrl.h
> @@ -0,0 +1,53 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#define _GNU_SOURCE
> +#ifndef RESCTRL_H
> +#define RESCTRL_H
> +#include <stdio.h>
> +#include <errno.h>
> +#include <sched.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <signal.h>
> +#include <dirent.h>
> +#include <stdbool.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <sys/mount.h>
> +#include <sys/types.h>
> +#include <asm/unistd.h>
> +#include <linux/perf_event.h>
> +
> +#define MB (1024 * 1024)
> +#define RESCTRL_PATH "/sys/fs/resctrl"
> +#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu"
> +#define RESCTRL_MBM "L3 monitoring detected"
> +#define RESCTRL_MBA "MB allocation detected"
> +#define MAX_RESCTRL_FEATURES 2
> +#define RM_SIG_FILE "rm -rf sig"
> +
> +#define PARENT_EXIT(err_msg) \
> + do { \
> + perror(err_msg); \
> + kill(ppid, SIGKILL); \
> + exit(EXIT_FAILURE); \
> + } while (0)
> +
> +pid_t bm_pid, ppid;
> +int ben_count;
> +
> +int remount_resctrlfs(bool mum_resctrlfs);
> +char get_sock_num(int cpu_no);
> +int validate_bw_report_request(char *bw_report);
> +int validate_resctrl_feature_request(char *resctrl_val);
> +int taskset_benchmark(pid_t bm_pid, int cpu_no);
> +void run_benchmark(int signum, siginfo_t *info, void *ucontext);
> +int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
> + char *resctrl_val);
> +int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
> + char *resctrl_val);
> +int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
> + int group_fd, unsigned long flags);
> +int run_fill_buf(int span, int malloc_and_init_memory, int memflush, int
> op);
> +
> +#endif /* RESCTRL_H */
> diff --git a/tools/testing/selftests/resctrl/resctrlfs.c
> b/tools/testing/selftests/resctrl/resctrlfs.c
> new file mode 100644
> index 000000000000..d73726ef2002
> --- /dev/null
> +++ b/tools/testing/selftests/resctrl/resctrlfs.c
> @@ -0,0 +1,465 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Basic resctrl file system operations
> + *
> + * Copyright (C) 2018 Intel Corporation
> + *
> + * Authors:
> + * Arshiya Hayatkhan Pathan <arshiya.hayatkhan.pathan@xxxxxxxxx>
> + * Sai Praneeth Prakhya <sai.praneeth.prakhya@xxxxxxxxx>,
> + * Fenghua Yu <fenghua.yu@xxxxxxxxx>
> + */
> +#include "resctrl.h"
> +
> +/*
> + * remount_resctrlfs: Remount resctrl FS at /sys/fs/resctrl
> + * @mum_resctrlfs: Should the resctrl FS be remounted?
> + *
> + * If not mounted, mount it.
> + * If mounted and mum_resctrlfs then remount resctrl FS.
> + * If mounted and !mum_resctrlfs then noop
> + *
> + * Return: 0 on success, non-zero on failure
> + */
> +int remount_resctrlfs(bool mum_resctrlfs)
> +{
> + DIR *dp;
> + struct dirent *ep;
> + unsigned int count = 0;
> +
> + /*
> + * If kernel is built with CONFIG_RESCTRL, then /sys/fs/resctrl should
> + * be present by default
> + */
> + dp = opendir(RESCTRL_PATH);
> + if (dp) {
> + while ((ep = readdir(dp)) != NULL)
> + count++;
> + closedir(dp);
> + } else {
> + perror("Unable to read /sys/fs/resctrl");
> +
> + return errno;
> + }
> +
> + /*
> + * If resctrl FS has more than two entries, it means that resctrl FS has
> + * already been mounted. The two default entries are "." and "..",
> these
> + * are present even when resctrl FS is not mounted
> + */
> + if (count > 2) {
> + if (mum_resctrlfs) {
> + if (umount(RESCTRL_PATH) != 0) {
> + perror("Unable to umount resctrl");
> +
> + return errno;
> + }
> + printf("Remount: done!\n");
> + } else {
> + printf("Mounted already. Not remounting!\n");
> +
> + return 0;
> + }
> + }
> +
> + if (mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL) != 0) {
> + perror("Unable to mount resctrl FS at /sys/fs/resctrl");
> +
> + return errno;
> + }
> +
> + return 0;
> +}
> +
> +int umount_resctrlfs(void)
> +{
> + if (umount(RESCTRL_PATH)) {
> + perror("Unable to umount resctrl");
> +
> + return errno;
> + }
> +
> + return 0;
> +}
> +
> +char get_sock_num(int cpu_no)
> +{
> + char sock_num, phys_pkg_path[1024];
> + FILE *fp;
> +
> + sprintf(phys_pkg_path, "%s%d/topology/physical_package_id",
> + PHYS_ID_PATH, cpu_no);
> + fp = fopen(phys_pkg_path, "r");
There should corresponding fclose for this. In general, I would check all the fopens in this series. I found few of the files not closed while returning.
More comments below.
> + if (!fp || fscanf(fp, "%c", &sock_num) <= 0 || fclose(fp) == EOF) {
> + perror("Could not get socket number");
> +
> + return -1;
> + }
> +
> + return sock_num;
> +}
> +
> +/*
> + * taskset_benchmark: Taskset PID (i.e. benchmark) to a specified
> cpu
> + * @bm_pid: PID that should be binded
> + * @cpu_no: CPU number at which the PID would be binded
> + *
> + * Return: 0 on success, non-zero on failure
> + */
> +int taskset_benchmark(pid_t bm_pid, int cpu_no)
> +{
> + cpu_set_t my_set;
> +
> + CPU_ZERO(&my_set);
> + CPU_SET(cpu_no, &my_set);
> +
> + if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) {
> + perror("Unable to taskset benchmark");
> +
> + return -1;
> + }
> +
> + printf("Taskset benchmark: done!\n");
> +
> + return 0;
> +}
> +
> +/*
> + * Run a specified benchmark or fill_buf (default benchmark). Direct
> + * benchmark stdio to /dev/null
> + */
> +void run_benchmark(int signum, siginfo_t *info, void *ucontext)
> +{
> + char **benchmark_cmd;
> + int span, operation, ret;
> +
> + benchmark_cmd = info->si_ptr;
> +
> + /*
> + * Direct stdio of child to /dev/null, so that only parent writes to
> + * stdio (console)
> + */
> + if (!freopen("/dev/null", "w", stdout))
> + PARENT_EXIT("Unable to direct BM op to /dev/null");
Do you need fclose for this before returning from this function?
> +
> + if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
> + /* Execute default fill_buf benchmark */
> + span = atoi(benchmark_cmd[1]);
> + operation = atoi(benchmark_cmd[4]);
> + if (run_fill_buf(span, 1, 1, operation))
> + fprintf(stderr, "Error in running fill buffer\n");
> + } else {
> + /* Execute specified benchmark */
> + ret = execvp(benchmark_cmd[0], benchmark_cmd);
> + if (ret)
> + perror("wrong\n");
> + }
> +
> + PARENT_EXIT("Unable to run specified benchmark");
> +}
> +
> +/*
> + * create_con_mon_grp: Create a con_mon group *only* if one
> doesn't exist
> + * @ctrlgrp: Name of the con_mon group
> + * @controlgroup: Path at which it should be created
> + *
> + * Return: 0 on success, non-zero on failure
> + */
> +static int create_con_mon_grp(const char *ctrlgrp, const char
> *controlgroup)
> +{
> + int found_ctrl_grp = 0;
> + struct dirent *ep;
> + DIR *dp;
> +
> + /*
> + * At this point, we are guaranteed to have resctrl FS mounted and if
> + * ctrlgrp == NULL, it means, user wants to use root con_mon grp, so
> do
> + * nothing
> + */
> + if (!ctrlgrp)
> + return 0;
> +
> + /* Check if requested con_mon grp exists or not */
> + dp = opendir(RESCTRL_PATH);
> + if (dp) {
> + while ((ep = readdir(dp)) != NULL) {
> + if (strcmp(ep->d_name, ctrlgrp) == 0)
> + found_ctrl_grp = 1;
> + }
> + closedir(dp);
> + } else {
> + perror("Unable to open resctrlfs for con_mon grp");
> +
> + return errno;
> + }
> +
> + /* Requested con_mon grp doesn't exist, hence create it */
> + if (found_ctrl_grp == 0) {
> + if (mkdir(controlgroup, 0) == -1) {
> + perror("Unable to create con_mon group");
> +
> + return errno;
> + }
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * create_mon_grp: Create a monitor group *only* if one doesn't exist
> + * @mongrp: Name of the monitor group
> + * @controlgroup: Path of con_mon grp at which the mon grp will be
> created
> + *
> + * Return: 0 on success, non-zero on failure
> + */
> +static int create_mon_grp(const char *mongrp, const char *controlgroup)
> +{
> + char monitorgroup[1024];
> + int found_mon_grp = 0;
> + struct dirent *ep;
> + DIR *dp;
> +
> + /* Check if requested mon grp exists or not */
> + sprintf(monitorgroup, "%s/mon_groups", controlgroup);
> + dp = opendir(monitorgroup);
> + if (dp) {
> + while ((ep = readdir(dp)) != NULL) {
> + if (strcmp(ep->d_name, mongrp) == 0)
> + found_mon_grp = 1;
> + }
> + closedir(dp);
> + } else {
> + perror("Unable to open resctrl FS for mon group");
> +
> + return -1;
> + }
> +
> + /* Requested mon grp doesn't exist, hence create it */
> + sprintf(monitorgroup, "%s/mon_groups/%s", controlgroup,
> mongrp);
> + if (found_mon_grp == 0) {
> + if (mkdir(monitorgroup, 0) == -1) {
> + perror("Unable to create mon group");
> +
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * write_bm_pid_to_resctrl: Write a PID (i.e. benchmark) to resctrl FS
> + * @bm_pid: PID that should be written
> + * @ctrlgrp: Name of the control monitor group
> (con_mon grp)
> + * @mongrp: Name of the monitor group (mon grp)
> + * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
> + *
> + * If a con_mon grp is requested, create it and write pid to it, otherwise
> + * write pid to root con_mon grp.
> + * If a mon grp is requested, create it and write pid to it, otherwise
> + * pid is not written, this means that pid is in con_mon grp and hence
> + * should consult con_mon grp's mon_data directory for results.
> + *
> + * Return: 0 on success, non-zero on failure
> + */
> +int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
> + char *resctrl_val)
> +{
> + char controlgroup[1024], monitorgroup[1024];
> + FILE *fp;
> + int ret;
> +
> + if (ctrlgrp)
> + sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp);
> + else
> + sprintf(controlgroup, "%s", RESCTRL_PATH);
> +
> + ret = create_con_mon_grp(ctrlgrp, controlgroup);
> + if (ret)
> + return ret;
> +
> + /* Create mon grp, only for monitoring features like "mbm" */
> + if ((strcmp(resctrl_val, "mbm") == 0)) {
> + if (mongrp) {
> + ret = create_mon_grp(mongrp, controlgroup);
> + if (ret)
> + return ret;
> +
> + sprintf(monitorgroup, "%s/mon_groups/%s/tasks",
> + controlgroup, mongrp);
> + }
> + }
> +
> + strcat(controlgroup, "/tasks");
> +
> + /* Write child pid to con_mon grp */
> + fp = fopen(controlgroup, "w");
I don't see corresponding fclose.
> + if (!fp || fprintf(fp, "%d\n", bm_pid) <= 0 || fclose(fp) == EOF) {
> + perror("Failed to write child to con_mon grp");
> +
> + return errno;
> + }
> +
> + /* Write child pid to mon grp, only for "mbm" */
> + if ((strcmp(resctrl_val, "mbm") == 0)) {
> + if (mongrp) {
> + fp = fopen(monitorgroup, "w");
> + if (!fp || fprintf(fp, "%d\n", bm_pid) <= 0 ||
> + fclose(fp) == EOF) {
I feel too many checks at one place. If fprintf fails, will it fclose the file? I suggest to separate these checks.
> + perror("Failed to write child to mon grp");
> +
> + return errno;
> + }
> + }
> + }
> +
> + printf("Write benchmark to resctrl FS: done!\n");
> +
> + return 0;
> +}
> +
> +/*
> + * write_schemata: Update schemata of a con_mon grp
> + * @ctrlgrp: Name of the con_mon grp
> + * @schemata: Schemata that should be updated to
> + * @cpu_no: CPU number that the benchmark PID is binded to
> + * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
> + *
> + * Update schemata of a con_mon grp *only* if requested resctrl feature is
> + * allocation type
> + *
> + * Return: 0 on success, non-zero on failure
> + */
> +int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char
> *resctrl_val)
> +{
> + char sock_num, controlgroup[1024], schema[1024];
> + FILE *fp;
> +
> + if (strcmp(resctrl_val, "mba") == 0) {
> + if (!schemata) {
> + printf("Schemata empty, so not updating\n");
> +
> + return 0;
> + }
> + sock_num = get_sock_num(cpu_no);
> + if (sock_num < 0)
> + return -1;
> +
> + if (ctrlgrp)
> + sprintf(controlgroup, "%s/%s/schemata",
> RESCTRL_PATH,
> + ctrlgrp);
> + else
> + sprintf(controlgroup, "%s/schemata",
> RESCTRL_PATH);
> + sprintf(schema, "%s%c%c%s", "MB:", sock_num, '=',
> schemata);
> +
> + fp = fopen(controlgroup, "w");
> + if (!fp || fprintf(fp, "%s\n", schema) <= 0 ||
> + fclose(fp) == EOF) {
Same comment as above.. If fprintf fails, will it fclose the file? I suggest to separate these checks.
> + perror("Unable to write schemata to con_mon grp");
> +
> + return errno;
> + }
> + printf("Write schemata to resctrl FS: done!\n");
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Check if the requested feature is a valid resctrl feature or not.
> + * If yes, check if it's supported by this platform or not.
> + *
> + * Return: 0 on success, non-zero on failure
> + */
> +int validate_resctrl_feature_request(char *resctrl_val)
> +{
> + const char *resctrl_features_list[MAX_RESCTRL_FEATURES] = {
> + "mbm", "mba"};
> + int resctrl_features_supported[MAX_RESCTRL_FEATURES] = {0, 0};
> + int i, valid_resctrl_feature = -1;
> + char line[1024];
> + FILE *fp;
> +
> + if (!resctrl_val) {
> + fprintf(stderr, "resctrl feature cannot be NULL\n");
> +
> + return -1;
> + }
> +
> + /* Is the resctrl feature request valid? */
> + for (i = 0; i < MAX_RESCTRL_FEATURES; i++) {
> + if (strcmp(resctrl_features_list[i], resctrl_val) == 0)
> + valid_resctrl_feature = i;
> + }
> + if (valid_resctrl_feature == -1) {
> + fprintf(stderr, "Not a valid resctrl feature request\n");
> +
> + return -1;
> + }
> +
> + /* Enumerate resctrl features supported by this platform */
> + if (system("dmesg > dmesg") != 0) {
> + perror("Could not create custom dmesg file");
> +
> + return errno;
> + }
> +
> + fp = fopen("dmesg", "r");
> + if (!fp) {
> + perror("Could not read custom created dmesg");
> +
> + return errno;
> + }
> +
> + while (fgets(line, 1024, fp)) {
> + if ((strstr(line, RESCTRL_MBM)) != NULL)
> + resctrl_features_supported[0] = 1;
> + if ((strstr(line, RESCTRL_MBA)) != NULL)
> + resctrl_features_supported[1] = 1;
> + }
> + if (fclose(fp) == EOF) {
> + perror("Error in closing file");
> +
> + return errno;
> + }
> +
> + if (system("rm -rf dmesg") != 0)
> + perror("Unable to remove 'dmesg' file");
> +
> + /* Is the resctrl feature request supported? */
> + if (!resctrl_features_supported[valid_resctrl_feature]) {
> + fprintf(stderr, "resctrl feature not supported!");
> +
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +int validate_bw_report_request(char *bw_report)
> +{
> + if (strcmp(bw_report, "reads") == 0)
> + return 0;
> + if (strcmp(bw_report, "writes") == 0)
> + return 0;
> + if (strcmp(bw_report, "nt-writes") == 0) {
> + strcpy(bw_report, "writes");
> + return 0;
> + }
> + if (strcmp(bw_report, "total") == 0)
> + return 0;
> +
> + fprintf(stderr, "Requested iMC B/W report type unavailable\n");
> +
> + return -1;
> +}
> +
> +int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
> + int group_fd, unsigned long flags)
> +{
> + int ret;
> +
> + ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
> + group_fd, flags);
> + return ret;
> +}
> --
> 2.5.0