From: Rohit Nair <rohit.sajan.kumar@oracle.com>
To: leon@kernel.org, jgg@ziepe.ca, saeedm@nvidia.com,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, linux-rdma@vger.kernel.org,
linux-kernel@vger.kernel.org, netdev@vger.kernel.org
Cc: manjunath.b.patil@oracle.com, rama.nichanamatlu@oracle.com,
rohit.sajan.kumar@oracle.com,
Michael Guralnik <michaelgur@nvidia.com>
Subject: [PATCH 1/1] IB/mlx5: Add a signature check to received EQEs and CQEs
Date: Wed, 5 Oct 2022 10:45:20 -0700 [thread overview]
Message-ID: <20221005174521.63619-1-rohit.sajan.kumar@oracle.com> (raw)
As PRM defines, the bytewise XOR of the EQE and the EQE index should be
0xff. Otherwise, we can assume we have a corrupt EQE. The same is
applicable to CQE as well.
Adding a check to verify the EQE and CQE is valid in that aspect and if
not, dump the CQE and EQE to dmesg to be inspected.
This patch does not introduce any significant performance degradations
and has been tested using qperf.
Suggested-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Rohit Nair <rohit.sajan.kumar@oracle.com>
---
drivers/infiniband/hw/mlx5/cq.c | 40 ++++++++++++++++++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/eq.c | 39 +++++++++++++++++++++++++++
2 files changed, 79 insertions(+)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index be189e0..2a6d722 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -441,6 +441,44 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
}
}
+static void verify_cqe(struct mlx5_cqe64 *cqe64, struct mlx5_ib_cq *cq)
+{
+ int i = 0;
+ u64 temp_xor = 0;
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+
+ u32 cons_index = cq->mcq.cons_index;
+ u64 *eight_byte_raw_cqe = (u64 *)cqe64;
+ u8 *temp_bytewise_xor = (u8 *)(&temp_xor);
+ u8 cqe_bytewise_xor = (cons_index & 0xff) ^
+ ((cons_index & 0xff00) >> 8) ^
+ ((cons_index & 0xff0000) >> 16);
+
+ for (i = 0; i < sizeof(struct mlx5_cqe64); i += 8) {
+ temp_xor ^= *eight_byte_raw_cqe;
+ eight_byte_raw_cqe++;
+ }
+
+ for (i = 0; i < (sizeof(u64)); i++) {
+ cqe_bytewise_xor ^= *temp_bytewise_xor;
+ temp_bytewise_xor++;
+ }
+
+ if (cqe_bytewise_xor == 0xff)
+ return;
+
+ dev_err(&dev->mdev->pdev->dev,
+ "Faulty CQE - checksum failure: cqe=0x%x cqn=0x%x cqe_bytewise_xor=0x%x\n",
+ cq->ibcq.cqe, cq->mcq.cqn, cqe_bytewise_xor);
+ dev_err(&dev->mdev->pdev->dev,
+ "cons_index=%u arm_sn=%u irqn=%u cqe_size=0x%x\n",
+ cq->mcq.cons_index, cq->mcq.arm_sn, cq->mcq.irqn, cq->mcq.cqe_sz);
+
+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
+ 16, 1, cqe64, sizeof(*cqe64), false);
+ BUG();
+}
+
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
@@ -463,6 +501,8 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+ verify_cqe(cqe64, cq);
+
++cq->mcq.cons_index;
/* Make sure we read CQ entry contents after we've checked the
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 229728c..f2a6d8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -102,6 +102,43 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
return cq;
}
+static void verify_eqe(struct mlx5_eq *eq, struct mlx5_eqe *eqe)
+{
+ u64 *eight_byte_raw_eqe = (u64 *)eqe;
+ u8 eqe_bytewise_xor = (eq->cons_index & 0xff) ^
+ ((eq->cons_index & 0xff00) >> 8) ^
+ ((eq->cons_index & 0xff0000) >> 16);
+
+ int i = 0;
+ u64 temp_xor = 0;
+ u8 *temp_bytewise_xor = (u8 *)(&temp_xor);
+
+ for (i = 0; i < sizeof(struct mlx5_eqe); i += 8) {
+ temp_xor ^= *eight_byte_raw_eqe;
+ eight_byte_raw_eqe++;
+ }
+
+ for (i = 0; i < (sizeof(u64)); i++) {
+ eqe_bytewise_xor ^= *temp_bytewise_xor;
+ temp_bytewise_xor++;
+ }
+
+ if (eqe_bytewise_xor == 0xff)
+ return;
+
+ dev_err(&eq->dev->pdev->dev,
+ "Faulty EQE - checksum failure: ci=0x%x eqe_type=0x%x eqe_bytewise_xor=0x%x",
+ eq->cons_index, eqe->type, eqe_bytewise_xor);
+
+ dev_err(&eq->dev->pdev->dev,
+ "EQ addr=%p eqn=%u irqn=%u vec_index=%u",
+ eq, eq->eqn, eq->irqn, eq->vecidx);
+
+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
+ 16, 1, eqe, sizeof(*eqe), false);
+ BUG();
+}
+
static int mlx5_eq_comp_int(struct notifier_block *nb,
__always_unused unsigned long action,
__always_unused void *data)
@@ -127,6 +164,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
/* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+ verify_eqe(eq, eqe);
+
cq = mlx5_eq_cq_get(eq, cqn);
if (likely(cq)) {
++cq->arm_sn;
--
1.8.3.1
next reply other threads:[~2022-10-05 17:47 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-05 17:45 Rohit Nair [this message]
2022-10-11 7:17 ` [PATCH 1/1] IB/mlx5: Add a signature check to received EQEs and CQEs Leon Romanovsky
2022-10-25 17:44 ` [External] : " Rohit Nair
2022-10-27 12:23 ` Leon Romanovsky
2022-10-28 23:48 ` Rohit Nair
2022-11-06 18:03 ` Leon Romanovsky
2022-11-07 17:51 ` Rohit Nair
[not found] ` <f3a56720-4df4-6b17-bfdf-4385dc27a2c0@oracle.com>
2022-11-09 18:33 ` Leon Romanovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221005174521.63619-1-rohit.sajan.kumar@oracle.com \
--to=rohit.sajan.kumar@oracle.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=jgg@ziepe.ca \
--cc=kuba@kernel.org \
--cc=leon@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=manjunath.b.patil@oracle.com \
--cc=michaelgur@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=rama.nichanamatlu@oracle.com \
--cc=saeedm@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).