Re: x86/mce: suspicious RCU usage in 4.13.4

From: Borislav Petkov
Date: Wed Oct 11 2017 - 07:51:03 EST


Ok,

here's a second attempt at a more rigorous simplification: RCU stuff is
gone and only a single loop scans through the elements.

---
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 10cec43aac38..1e1c6d22c93e 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -24,14 +24,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
static char mce_helper[128];
static char *mce_helper_argv[2] = { mce_helper, NULL };

-#define mce_log_get_idx_check(p) \
-({ \
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
- !lockdep_is_held(&mce_chrdev_read_mutex), \
- "suspicious mce_log_get_idx_check() usage"); \
- smp_load_acquire(&(p)); \
-})
-
/*
* Lockless MCE logging infrastructure.
* This avoids deadlocks on printk locks without having to break locks. Also
@@ -53,43 +45,36 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
- unsigned int next, entry;
-
- wmb();
- for (;;) {
- entry = mce_log_get_idx_check(mcelog.next);
- for (;;) {
-
- /*
- * When the buffer fills up discard new entries.
- * Assume that the earlier errors are the more
- * interesting ones:
- */
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW,
- (unsigned long *)&mcelog.flags);
- return NOTIFY_OK;
- }
- /* Old left over entry. Skip: */
- if (mcelog.entry[entry].finished) {
- entry++;
- continue;
- }
- break;
- }
- smp_rmb();
- next = entry + 1;
- if (cmpxchg(&mcelog.next, entry, next) == entry)
- break;
+ unsigned int entry;
+
+ mutex_lock(&mce_chrdev_read_mutex);
+
+ for (entry = mcelog.next; entry < MCE_LOG_LEN; entry++) {
+ /* Old left over entry. Skip: */
+ if (mcelog.entry[entry].finished)
+ continue;
+ }
+
+ /*
+ * When the buffer fills up discard new entries. Assume that the
+ * earlier errors are the more interesting ones:
+ */
+ if (entry >= MCE_LOG_LEN) {
+ set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+ goto unlock;
}
+
+ mcelog.next = entry + 1;
+
memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
- wmb();
mcelog.entry[entry].finished = 1;
- wmb();

/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);

+unlock:
+ mutex_unlock(&mce_chrdev_read_mutex);
+
return NOTIFY_OK;
}

@@ -247,7 +232,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
goto out;
}

- next = mce_log_get_idx_check(mcelog.next);
+ next = mcelog.next;

/* Only supports full reads right now */
err = -EINVAL;
@@ -281,8 +266,6 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
next = cmpxchg(&mcelog.next, prev, 0);
} while (next != prev);

- synchronize_sched();
-
/*
* Collect entries that were still getting written before the
* synchronize.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.