[PATCH v2] x86/msr: Allow unprivileged read access to some MSRs

From: Tim Wiederhake
Date: Tue May 30 2023 - 06:28:34 EST


Software such as qemu and libvirt require the raw content of some MSRs
to calculate host CPU capabilities. This is currently done through
/dev/cpu/*/msr which is locked behind both CAP_SYS_RAWIO and file mode
0600, allowing only root to read and write MSRs.

Expose some non-security sensitive MSRs through sysfs to allow access
for unprivileged processes. This also helps other programs that are
interested in IA32_EFER for x86-64-v1 detection.

Signed-off-by: Tim Wiederhake <twiederh@xxxxxxxxxx>
---
Changes to v1 (https://lkml.org/lkml/2023/5/23/1230):
* removed patch to limit reads to /dev/cpu/*/msr to 8 bytes per read
* removed CAP_SYS_RAWIO-less access to /dev/cpu/*/msr
* introduced sysfs interface to msrs

With this sysfs-based, unrestricted read access to some select msrs in
place, a later patch could introduce checks for CAP_SYS_RAWIO for every
access to /dev/cpu/*/msr as mentioned in the feedback to v1.
---
arch/x86/kernel/msr.c | 45 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7bb17d37db01..3c8354f3c2bd 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -50,6 +50,31 @@ enum allow_write_msrs {

static enum allow_write_msrs allow_writes = MSR_WRITES_DEFAULT;

+struct allow_read_msrs {
+ const char *procname;
+ u32 index;
+ u32 value[2];
+};
+
+static struct allow_read_msrs allow_reads[] = {
+ {
+ .procname = "ia32_core_caps",
+ .index = MSR_IA32_CORE_CAPS,
+ },
+ {
+ .procname = "ia32_arch_capabilities",
+ .index = MSR_IA32_ARCH_CAPABILITIES,
+ },
+ {
+ .procname = "efer",
+ .index = MSR_EFER,
+ },
+};
+
+static struct ctl_table msr_files[ARRAY_SIZE(allow_reads) + 1];
+
+static struct ctl_table_header *msr_files_header;
+
static ssize_t msr_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -258,6 +283,25 @@ static char *msr_devnode(const struct device *dev, umode_t *mode)
static int __init msr_init(void)
{
int err;
+ int i, j;
+
+ for (i = 0, j = 0; i < ARRAY_SIZE(allow_reads); ++i) {
+ err = rdmsr_safe_on_cpu(0, allow_reads[i].index,
+ &allow_reads[i].value[0],
+ &allow_reads[i].value[1]);
+ if (err)
+ continue;
+ msr_files[j].procname = allow_reads[i].procname;
+ msr_files[j].data = &allow_reads[i].value;
+ msr_files[j].maxlen = 2 * sizeof(u32);
+ msr_files[j].mode = 0444;
+ msr_files[j].proc_handler = proc_doulongvec_minmax;
+ ++j;
+ }
+
+ msr_files_header = register_sysctl("vm/msr", msr_files);
+ if (!msr_files_header)
+ return -ENOMEM;

if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) {
pr_err("unable to get major %d for msr\n", MSR_MAJOR);
@@ -287,6 +331,7 @@ module_init(msr_init);

static void __exit msr_exit(void)
{
+ unregister_sysctl_table(msr_files_header);
cpuhp_remove_state(cpuhp_msr_state);
class_destroy(msr_class);
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
--
2.39.2