From: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Don Hiatt <don.hiatt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,
Mike Marciniszyn
<mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Subject: [PATCH v2 17/20] IB/hfi1: Add transmit fault injection feature
Date: Mon, 20 Mar 2017 17:26:20 -0700 [thread overview]
Message-ID: <20170321002619.28538.31428.stgit@scvm10.sc.intel.com> (raw)
In-Reply-To: <20170321001900.28538.38175.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
From: Don Hiatt <don.hiatt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Add ability to fault packets on transmit by opcode.
Dropping by packet can be achieved by setting the mask to 0.
In order to drop non-verbs traffic we set PbcInsertHrc
to NONE (0x2). The packet will still be delivered to
the receiving node but a KHdrHCRCErr (KDETH packet
with a bad HCRC) will be triggered and the packet will
not be delivered to the correct context.
In order to drop regular verbs traffic we set the
PbcTestEbp flag. The packet will still be delivered
to the receiving node but a 'late ebp error' will
be triggered and will be dropped.
A global toggle (/sys/kernel/debug/hfi1/hfi1_X/fault_suppress_err)
has been added to suppress the error messages on the receive
node when a packet was faulted on the sending node.
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Don Hiatt <don.hiatt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/infiniband/hw/hfi1/chip.c | 4 +++
drivers/infiniband/hw/hfi1/debugfs.c | 8 ++++++
drivers/infiniband/hw/hfi1/debugfs.h | 11 ++++++++
drivers/infiniband/hw/hfi1/driver.c | 11 ++++++++
drivers/infiniband/hw/hfi1/verbs.c | 49 +++++++++++++++++++++++++++++-----
drivers/infiniband/hw/hfi1/verbs.h | 1 +
include/rdma/ib_pack.h | 2 +
7 files changed, 79 insertions(+), 7 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 77f4b41..79a316a 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -64,6 +64,7 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
+#include "debugfs.h"
#define NUM_IB_PORTS 1
@@ -7898,6 +7899,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
+ if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+ reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
/* report any remaining errors */
if (reg)
dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index cac6d52..dc2c1c9 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1240,6 +1240,11 @@ static int fault_init_debugfs(struct hfi1_ibdev *ibd)
return ret;
}
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return ibd->fault_suppress_err;
+}
+
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
{
bool ret = false;
@@ -1329,6 +1334,9 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
}
#ifdef CONFIG_FAULT_INJECTION
+ debugfs_create_bool("fault_suppress_err", 0600,
+ ibd->hfi1_ibdev_dbg,
+ &ibd->fault_suppress_err);
fault_init_debugfs(ibd);
#endif
}
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index 70be5ca..38c38a9 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -75,6 +75,7 @@ struct fault_packet {
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
#else
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
@@ -86,6 +87,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
{
return false;
}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
#endif
#else
@@ -115,6 +121,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
{
return false;
}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index c0b012f..64bdbce 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1367,6 +1367,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr,
rhf_egr_index(packet->rhf));
+ if (unlikely(
+ (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ (packet->rhf & RHF_DC_ERR))))
+ return RHF_RCV_CONTINUE;
+
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
@@ -1402,6 +1407,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet)
{
+ /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+ if (unlikely(
+ hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ rhf_rcv_type_err(packet->rhf) == 3))
+ return RHF_RCV_CONTINUE;
+
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 9f016da..5e7e577 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -518,6 +518,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
return NULL;
}
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
+{
+#ifdef CONFIG_HFI1_FAULT_INJECTION
+ if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+ /*
+ * In order to drop non-IB traffic we
+ * set PbcInsertHrc to NONE (0x2).
+ * The packet will still be delivered
+ * to the receiving node but a
+ * KHdrHCRCErr (KDETH packet with a bad
+ * HCRC) will be triggered and the
+ * packet will not be delivered to the
+ * correct context.
+ */
+ pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+ else
+ /*
+ * In order to drop regular verbs
+ * traffic we set the PbcTestEbp
+ * flag. The packet will still be
+ * delivered to the receiving node but
+ * a 'late ebp error' will be
+ * triggered and will be dropped.
+ */
+ pbc |= PBC_TEST_EBP;
+#endif
+ return pbc;
+}
+
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
@@ -803,7 +832,6 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
}
-
/* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx);
@@ -822,7 +850,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
- u64 pbc_flags = 0;
u8 sc5 = priv->s_sc;
int ret;
@@ -831,12 +858,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd,
- pbc_flags,
+ pbc,
qp->srate_mbps,
vl,
plen);
@@ -939,7 +970,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
- u64 pbc_flags = 0;
u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
@@ -964,9 +994,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ struct verbs_txreq *tx = ps->s_txreq;
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 2756ec3..6c549e7 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -198,6 +198,7 @@ struct hfi1_ibdev {
#ifdef CONFIG_FAULT_INJECTION
struct fault_opcode *fault_opcode;
struct fault_packet *fault_packet;
+ bool fault_suppress_err;
#endif
#endif
};
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index b13419c..3665589 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -80,6 +80,8 @@ enum {
IB_OPCODE_UD = 0x60,
/* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
IB_OPCODE_CNP = 0x80,
+ /* Manufacturer specific */
+ IB_OPCODE_MSP = 0xe0,
/* operations -- just used to define real constants */
IB_OPCODE_SEND_FIRST = 0x00,
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2017-03-21 0:26 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-21 0:24 [PATCH v2 00/20] IB/hfi1, qib, rdmavt: Another round of patches for 4.11 Dennis Dalessandro
[not found] ` <20170321001900.28538.38175.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-03-21 0:24 ` [PATCH v2 01/20] IB/hfi1: Force logical link down Dennis Dalessandro
2017-03-21 0:24 ` [PATCH v2 02/20] IB/hfi1: Race hazard avoidance in user SDMA driver Dennis Dalessandro
2017-03-21 0:24 ` [PATCH v2 03/20] IB/hfi1: Cache registers during state change Dennis Dalessandro
2017-03-21 0:24 ` [PATCH v2 04/20] IB/hfi1: NULL pointer dereference when freeing rhashtable Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 05/20] IB/rdmavt, IB/hfi1, IB/qib: Make wc opcode translation driver dependent Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 06/20] IB/rdmavt: Add additional fields to post send trace Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 07/20] IB/rdmavt: Add tracing for cq entry and poll Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 08/20] IB/rdmavt: Add swqe completion trace Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 09/20] IB/hfi1: Check device id early during init Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 10/20] IB/hfi1: Protect the global dev_cntr_names and port_cntr_names Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 11/20] IB/hfi1: Check for QSFP presence before attempting reads Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 12/20] IB/hfi1: Add a patch value to the firmware version string Dennis Dalessandro
2017-03-21 0:25 ` [PATCH v2 13/20] IB/rdmavt, IB/hfi1: Fix timer migration regressions Dennis Dalessandro
2017-03-21 0:26 ` [PATCH v2 14/20] IB/rdmavt: Avoid reseting wqe send_flags in unreserve Dennis Dalessandro
2017-03-21 0:26 ` [PATCH v2 15/20] IB/hfi1: Ensure VL index is within bounds Dennis Dalessandro
2017-03-21 0:26 ` [PATCH v2 16/20] IB/hfi1: Add receive fault injection feature Dennis Dalessandro
2017-03-21 0:26 ` Dennis Dalessandro [this message]
[not found] ` <20170321002619.28538.31428.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-04-05 18:34 ` [PATCH v2 17/20] IB/hfi1: Add transmit " Doug Ledford
[not found] ` <1491417255.2923.5.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-04-05 18:49 ` Dennis Dalessandro
2017-03-21 0:26 ` [PATCH v2 18/20] IB/hfi1: Eliminate synchronize_rcu() in mr delete Dennis Dalessandro
2017-03-21 0:26 ` [PATCH v2 19/20] IB/rdmavt, IB/qib, IB/hfi1: Make percpu refcount optional for user MRs Dennis Dalessandro
[not found] ` <20170321002631.28538.2121.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-04-05 18:38 ` Doug Ledford
[not found] ` <1491417489.2923.6.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-04-05 19:46 ` Dennis Dalessandro
[not found] ` <f008c532-340e-01f2-80e6-4bea74175e3e-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2017-04-05 19:51 ` Leon Romanovsky
[not found] ` <CALq1K=JsjSCiSBeZVe4kHQmjw7tznL36JcsamZTVGZ5RhBvZPw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-05 20:09 ` Marciniszyn, Mike
[not found] ` <32E1700B9017364D9B60AED9960492BC342EA858-RjuIdWtd+YbTXloPLtfHfbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2017-04-06 7:49 ` Leon Romanovsky
[not found] ` <20170406074955.GG2269-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-04-06 11:45 ` Dennis Dalessandro
[not found] ` <8cdf2fbb-f2a9-0b4b-b144-397ee73d1569-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2017-04-06 12:37 ` Leon Romanovsky
[not found] ` <20170406123726.GH2269-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-04-06 13:00 ` Dennis Dalessandro
[not found] ` <f1703866-9c5c-a30a-0d95-9f6a33cc4f75-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2017-04-06 13:33 ` Leon Romanovsky
2017-04-06 14:47 ` Marciniszyn, Mike
[not found] ` <32E1700B9017364D9B60AED9960492BC342EABD0-RjuIdWtd+YbTXloPLtfHfbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2017-04-06 17:13 ` Jason Gunthorpe
[not found] ` <20170406171354.GA19854-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-04-06 17:16 ` Marciniszyn, Mike
[not found] ` <32E1700B9017364D9B60AED9960492BC342EADEE-RjuIdWtd+YbTXloPLtfHfbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2017-04-06 17:44 ` Jason Gunthorpe
[not found] ` <20170406174438.GA20020-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-04-07 21:12 ` Marciniszyn, Mike
[not found] ` <32E1700B9017364D9B60AED9960492BC342EBA18-RjuIdWtd+YbTXloPLtfHfbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2017-04-07 22:06 ` Jason Gunthorpe
[not found] ` <20170407220618.GA29138-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-04-09 6:26 ` Leon Romanovsky
2017-04-05 19:47 ` Leon Romanovsky
2017-03-21 0:26 ` [PATCH v2 20/20] IB/core: If the MGID/MLID pair is not on the list return an error Dennis Dalessandro
2017-04-05 18:50 ` [PATCH v2 00/20] IB/hfi1, qib, rdmavt: Another round of patches for 4.11 Doug Ledford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170321002619.28538.31428.stgit@scvm10.sc.intel.com \
--to=dennis.dalessandro-ral2jqcrhueavxtiumwx3w@public.gmane.org \
--cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=don.hiatt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).