[PATCH] x86, msr: Allow read access to /dev/cpu/X/msr

From: Prarit Bhargava
Date: Fri Jun 26 2015 - 13:52:30 EST


Customers write system monitoring software for single systems as well as
clusters. In load-balancing software it is useful to know how "busy" a
core is. Unfortunately the only way to get this data is to run as root,
or use setcap to allow userspace access for particular programs. Both of
these options are clunky at best.

This patch allows read access to the msr dev files which should be okay.
No damage can be done by reading the MSR values and it allows non-root
users to run system monitoring software.

The turbostat code specifically checks for CAP_SYS_RAWIO, which it
shouldn't have to and I've removed that code. Additionally I've modified
the turbostat man page to remove documentation about configuring
CAP_SYS_RAW_IO.

Note: Write access to msr is still restricted with this patch.

Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: Len Brown <len.brown@xxxxxxxxx>
Cc: Prarit Bhargava <prarit@xxxxxxxxxx>
Cc: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@xxxxxxxxx>
Signed-off-by: Prarit Bhargava <prarit@xxxxxxxxxx>
---
arch/x86/kernel/msr.c | 11 ++++++++---
tools/power/x86/turbostat/turbostat.8 | 8 --------
tools/power/x86/turbostat/turbostat.c | 17 -----------------
3 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 113e707..380d2ac 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -105,6 +105,9 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
int err = 0;
ssize_t bytes = 0;

+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
if (count % 8)
return -EINVAL; /* Invalid chunk size */

@@ -148,6 +151,10 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
break;

case X86_IOC_WRMSR_REGS:
+ if (!capable(CAP_SYS_RAWIO)) {
+ err = -EPERM;
+ break;
+ }
if (!(file->f_mode & FMODE_WRITE)) {
err = -EBADF;
break;
@@ -176,9 +183,6 @@ static int msr_open(struct inode *inode, struct file *file)
unsigned int cpu = iminor(file_inode(file));
struct cpuinfo_x86 *c;

- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
-
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */

@@ -241,6 +245,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {

static char *msr_devnode(struct device *dev, umode_t *mode)
{
+ *mode = (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
}

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 05b8fc3..7b3fce4 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -204,14 +204,6 @@ not including any non-busy idle time.
.SH NOTES

.B "turbostat "
-must be run as root.
-Alternatively, non-root users can be enabled to run turbostat this way:
-
-# setcap cap_sys_rawio=ep ./turbostat
-
-# chmod +r /dev/cpu/*/msr
-
-.B "turbostat "
reads hardware counters, but doesn't write them.
So it will not interfere with the OS or other programs, including
multiple invocations of itself.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 323b65e..d043ae8 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1705,26 +1705,9 @@ void check_dev_msr()

void check_permissions()
{
- struct __user_cap_header_struct cap_header_data;
- cap_user_header_t cap_header = &cap_header_data;
- struct __user_cap_data_struct cap_data_data;
- cap_user_data_t cap_data = &cap_data_data;
- extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
int do_exit = 0;
char pathname[32];

- /* check for CAP_SYS_RAWIO */
- cap_header->pid = getpid();
- cap_header->version = _LINUX_CAPABILITY_VERSION;
- if (capget(cap_header, cap_data) < 0)
- err(-6, "capget(2) failed");
-
- if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
- do_exit++;
- warnx("capget(CAP_SYS_RAWIO) failed,"
- " try \"# setcap cap_sys_rawio=ep %s\"", progname);
- }
-
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
--
1.7.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/