[PATCH] x86-64: Make lockless machine check record passing a bit more robust.

One machine is constantly throwing NMI watchdog timeouts in mce_log

This was one attempt to fix it.

(AK: this doesn't actually fix the bug I'm seeing unfortunately, probably
drop.  I don't like it that the reader can spin forever now waiting
for a writer)

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Andi Kleen 2005-09-12 18:49:24 +02:00 committed by Linus Torvalds
parent a54e678b8f
commit 673242c10d

View file

@ -56,15 +56,19 @@ void mce_log(struct mce *mce)
smp_wmb();
for (;;) {
entry = rcu_dereference(mcelog.next);
/* When the buffer fills up discard new entries. Assume
that the earlier errors are the more interesting. */
if (entry >= MCE_LOG_LEN) {
set_bit(MCE_OVERFLOW, &mcelog.flags);
return;
for (;;) {
/* When the buffer fills up discard new entries. Assume
that the earlier errors are the more interesting. */
if (entry >= MCE_LOG_LEN) {
set_bit(MCE_OVERFLOW, &mcelog.flags);
return;
}
/* Old left over entry. Skip. */
if (mcelog.entry[entry].finished) {
entry++;
continue;
}
}
/* Old left over entry. Skip. */
if (mcelog.entry[entry].finished)
continue;
smp_rmb();
next = entry + 1;
if (cmpxchg(&mcelog.next, entry, next) == entry)
@ -404,9 +408,15 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
}
err = 0;
for (i = 0; i < next; i++) {
if (!mcelog.entry[i].finished)
continue;
for (i = 0; i < next; i++) {
unsigned long start = jiffies;
while (!mcelog.entry[i].finished) {
if (!time_before(jiffies, start + 2)) {
memset(mcelog.entry + i,0, sizeof(struct mce));
continue;
}
cpu_relax();
}
smp_rmb();
err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
buf += sizeof(struct mce);