aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath/ipath_iba6120.c
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2006-09-28 09:00:18 -0700
committerRoland Dreier <rolandd@cisco.com>2006-09-28 11:17:03 -0700
commit89d1e09b6a6d844ef327937f41658a426be42501 (patch)
tree5730241c737baf67b0b1ddf89ff38f6936d649c4 /drivers/infiniband/hw/ipath/ipath_iba6120.c
parent510847750c9d26052a71631e0fcad9e7f7a5f369 (diff)
downloadlinux-89d1e09b6a6d844ef327937f41658a426be42501.tar.gz
IB/ipath: Fix and recover TXE piobuf and PBC parity errors
We can sometimes trigger parity errors due to processor speculative reads to our write-combined memory (mostly seen on Woodcrest). Add a stats counter for these. Factored out the sendbuffererror buffer cancellation code so it can be used in the new handling; suppress likely subsequent error messages if within two jiffies of the cancellation. Also restore 2 dropped TXE lines on hwe_bitsextant noticed while debugging. Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_iba6120.c')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c37
1 files changed, 34 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 08a44dd9ed6fa6..024b6aa320f1cc 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -370,7 +370,10 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
+ if ((hwerrs & ~(dd->ipath_lasthwerror |
+ ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
@@ -383,6 +386,33 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+ ipath_stats.sps_txeparity++;
+ ipath_dbg("Recovering from TXE parity error (%llu), "
+ "hwerrstatus=%llx\n",
+ (unsigned long long) ipath_stats.sps_txeparity,
+ (unsigned long long) hwerrs);
+ ipath_disarm_senderrbufs(dd);
+ hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+ if (!hwerrs) { /* else leave in freeze mode */
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ return;
+ }
+ }
if (hwerrs) {
/*
* if any set that we aren't ignoring only make the
@@ -406,9 +436,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
} else {
ipath_dbg("Clearing freezemode on ignored hardware "
"error\n");
- ctrl &= ~INFINIPATH_C_FREEZEMODE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- ctrl);
+ dd->ipath_control);
}
}
@@ -880,6 +909,8 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
INFINIPATH_HWE_PCIE1PLLFAILED |