mirror of
https://github.com/adulau/aha.git
synced 2025-01-04 07:03:38 +00:00
IB/ipath: Workaround problem of errormask register being overwritten
On some system hardware, we are seeing moderately common cases of the chip errormask register being overwritten due to a chip bug in iba6120 that is triggered by a vendor-specific PCIe broadcast message. This patch merely checks periodically, and corrects it if needed (the overwrite can cause us to not get error and hardware error interrupts). Also, make dd->ipath_errormask the one, true canonical source for kr_errormask, and remove references to ipath_ignorederrs as it is currently unused. Signed-off-by: Dave Olson <dave.olson@qlogic.com> Signed-off-by: John Gregor <john.gregor@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
parent
3810f2a84e
commit
78d1e02fac
4 changed files with 66 additions and 29 deletions
|
@ -851,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
|
|||
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
|
||||
dd->ipath_hwerrmask);
|
||||
|
||||
dd->ipath_maskederrs = dd->ipath_ignorederrs;
|
||||
/* clear all */
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
|
||||
/* enable errors that are masked, at least this first time. */
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
|
||||
~dd->ipath_maskederrs);
|
||||
/* clear any interrups up to this point (ints still not enabled) */
|
||||
dd->ipath_errormask = ipath_read_kreg64(dd,
|
||||
dd->ipath_kregs->kr_errormask);
|
||||
/* clear any interrupts up to this point (ints still not enabled) */
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
|
||||
|
||||
/*
|
||||
|
|
|
@ -517,10 +517,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
|
|||
|
||||
supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
|
||||
|
||||
/*
|
||||
* don't report errors that are masked (includes those always
|
||||
* ignored)
|
||||
*/
|
||||
/* don't report errors that are masked */
|
||||
errs &= ~dd->ipath_maskederrs;
|
||||
|
||||
/* do these first, they are most important */
|
||||
|
@ -566,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
|
|||
* ones on this particular interrupt, which also isn't great
|
||||
*/
|
||||
dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
|
||||
dd->ipath_errormask &= ~dd->ipath_maskederrs;
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
|
||||
~dd->ipath_maskederrs);
|
||||
dd->ipath_errormask);
|
||||
s_iserr = ipath_decode_err(msg, sizeof msg,
|
||||
(dd->ipath_maskederrs & ~dd->
|
||||
ipath_ignorederrs));
|
||||
dd->ipath_maskederrs);
|
||||
|
||||
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
|
||||
if (dd->ipath_maskederrs &
|
||||
~(INFINIPATH_E_RRCVEGRFULL |
|
||||
INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
|
||||
ipath_dev_err(dd, "Temporarily disabling "
|
||||
"error(s) %llx reporting; too frequent (%s)\n",
|
||||
(unsigned long long) (dd->ipath_maskederrs &
|
||||
~dd->ipath_ignorederrs), msg);
|
||||
(unsigned long long)dd->ipath_maskederrs,
|
||||
msg);
|
||||
else {
|
||||
/*
|
||||
* rcvegrfull and rcvhdrqfull are "normal",
|
||||
|
@ -793,6 +790,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
|
|||
/* disable error interrupts, to avoid confusion */
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
|
||||
|
||||
/* also disable interrupts; errormask is sometimes overwriten */
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
|
||||
|
||||
/*
|
||||
* clear all sends, because they have may been
|
||||
* completed by usercode while in freeze mode, and
|
||||
|
@ -817,7 +817,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
|
|||
for (i = 0; i < dd->ipath_pioavregs; i++) {
|
||||
/* deal with 6110 chip bug */
|
||||
im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
|
||||
val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64)));
|
||||
val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im);
|
||||
dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
|
||||
= le64_to_cpu(val);
|
||||
}
|
||||
|
@ -832,7 +832,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
|
|||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
|
||||
E_SPKT_ERRS_IGNORE);
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
|
||||
~dd->ipath_maskederrs);
|
||||
dd->ipath_errormask);
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
|
||||
}
|
||||
|
||||
|
|
|
@ -261,18 +261,10 @@ struct ipath_devdata {
|
|||
* limiting of hwerror reporting
|
||||
*/
|
||||
ipath_err_t ipath_lasthwerror;
|
||||
/*
|
||||
* errors masked because they occur too fast, also includes errors
|
||||
* that are always ignored (ipath_ignorederrs)
|
||||
*/
|
||||
/* errors masked because they occur too fast */
|
||||
ipath_err_t ipath_maskederrs;
|
||||
/* time in jiffies at which to re-enable maskederrs */
|
||||
unsigned long ipath_unmasktime;
|
||||
/*
|
||||
* errors always ignored (masked), at least for a given
|
||||
* chip/device, because they are wrong or not useful
|
||||
*/
|
||||
ipath_err_t ipath_ignorederrs;
|
||||
/* count of egrfull errors, combined for all ports */
|
||||
u64 ipath_last_tidfull;
|
||||
/* for ipath_qcheck() */
|
||||
|
@ -436,6 +428,7 @@ struct ipath_devdata {
|
|||
u64 ipath_lastibcstat;
|
||||
/* hwerrmask shadow */
|
||||
ipath_err_t ipath_hwerrmask;
|
||||
ipath_err_t ipath_errormask; /* errormask shadow */
|
||||
/* interrupt config reg shadow */
|
||||
u64 ipath_intconfig;
|
||||
/* kr_sendpiobufbase value */
|
||||
|
|
|
@ -196,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd)
|
|||
}
|
||||
}
|
||||
|
||||
static void ipath_chk_errormask(struct ipath_devdata *dd)
|
||||
{
|
||||
static u32 fixed;
|
||||
u32 ctrl;
|
||||
unsigned long errormask;
|
||||
unsigned long hwerrs;
|
||||
|
||||
if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
|
||||
return;
|
||||
|
||||
errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
|
||||
|
||||
if (errormask == dd->ipath_errormask)
|
||||
return;
|
||||
fixed++;
|
||||
|
||||
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
|
||||
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
|
||||
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
|
||||
dd->ipath_errormask);
|
||||
|
||||
if ((hwerrs & dd->ipath_hwerrmask) ||
|
||||
(ctrl & INFINIPATH_C_FREEZEMODE)) {
|
||||
/* force re-interrupt of pending events, just in case */
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
|
||||
dev_info(&dd->pcidev->dev,
|
||||
"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
|
||||
fixed, errormask, (unsigned long)dd->ipath_errormask,
|
||||
ctrl, hwerrs);
|
||||
} else
|
||||
ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
|
||||
fixed, errormask,
|
||||
(unsigned long)dd->ipath_errormask);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ipath_get_faststats - get word counters from chip before they overflow
|
||||
* @opaque - contains a pointer to the infinipath device ipath_devdata
|
||||
|
@ -251,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque)
|
|||
dd->ipath_lasterror = 0;
|
||||
if (dd->ipath_lasthwerror)
|
||||
dd->ipath_lasthwerror = 0;
|
||||
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
|
||||
if (dd->ipath_maskederrs
|
||||
&& time_after(jiffies, dd->ipath_unmasktime)) {
|
||||
char ebuf[256];
|
||||
int iserr;
|
||||
iserr = ipath_decode_err(ebuf, sizeof ebuf,
|
||||
(dd->ipath_maskederrs & ~dd->
|
||||
ipath_ignorederrs));
|
||||
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
|
||||
dd->ipath_maskederrs);
|
||||
if (dd->ipath_maskederrs &
|
||||
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
|
||||
INFINIPATH_E_PKTERRS ))
|
||||
ipath_dev_err(dd, "Re-enabling masked errors "
|
||||
|
@ -278,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque)
|
|||
ipath_cdbg(ERRPKT, "Re-enabling packet"
|
||||
" problem interrupt (%s)\n", ebuf);
|
||||
}
|
||||
dd->ipath_maskederrs = dd->ipath_ignorederrs;
|
||||
|
||||
/* re-enable masked errors */
|
||||
dd->ipath_errormask |= dd->ipath_maskederrs;
|
||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
|
||||
~dd->ipath_maskederrs);
|
||||
dd->ipath_errormask);
|
||||
dd->ipath_maskederrs = 0;
|
||||
}
|
||||
|
||||
/* limit qfull messages to ~one per minute per port */
|
||||
|
@ -294,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque)
|
|||
}
|
||||
}
|
||||
|
||||
ipath_chk_errormask(dd);
|
||||
done:
|
||||
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue