From: Bob Pearson <rpearsonhpe@gmail.com>
To: jgg@nvidia.com, zyjzyj2000@gmail.com, linux-rdma@vger.kernel.org
Cc: Bob Pearson <rpearsonhpe@gmail.com>
Subject: [PATCH for-next v12 5/6] RDMA/rxe: For mcast copy qp list to temp array
Date: Thu, 17 Feb 2022 18:35:43 -0600 [thread overview]
Message-ID: <20220218003543.205799-6-rpearsonhpe@gmail.com> (raw)
In-Reply-To: <20220218003543.205799-1-rpearsonhpe@gmail.com>
Currently rxe_rcv_mcast_pkt performs most of its work under the
rxe->mcg_lock and calls into rxe_rcv which queues the packets
to the responder and completer tasklets holding the lock which is
a very bad idea. This patch walks the qp_list in mcg and copies the
qp addresses to a temporary array under the lock but does the rest
of the work without holding the lock. The critical section is now
very small.
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
drivers/infiniband/sw/rxe/rxe_recv.c | 103 +++++++++++++++++----------
1 file changed, 64 insertions(+), 39 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 53924453abef..9b21cbb22602 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -232,11 +232,15 @@ static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb)
static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
{
+ struct sk_buff *skb_copy;
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+ struct rxe_pkt_info *pkt_copy;
struct rxe_mcg *mcg;
struct rxe_mca *mca;
struct rxe_qp *qp;
+ struct rxe_qp **qp_array;
union ib_gid dgid;
+ int n, nmax;
int err;
if (skb->protocol == htons(ETH_P_IP))
@@ -248,68 +252,89 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
/* lookup mcast group corresponding to mgid, takes a ref */
mcg = rxe_lookup_mcg(rxe, &dgid);
if (!mcg)
- goto drop; /* mcast group not registered */
+ goto err_drop; /* mcast group not registered */
+
+ /* this is the current number of qp's attached to mcg plus a
+ * little room in case new qp's are attached between here
+ * and when we finish walking the qp list. If someone can
+ * attach more than 4 new qp's we will miss forwarding
+ * packets to those qp's. This is actually OK since UD is
+ * a unreliable service.
+ */
+ nmax = atomic_read(&mcg->qp_num) + 4;
+ qp_array = kmalloc_array(nmax, sizeof(qp), GFP_KERNEL);
+ n = 0;
spin_lock_bh(&rxe->mcg_lock);
-
- /* this is unreliable datagram service so we let
- * failures to deliver a multicast packet to a
- * single QP happen and just move on and try
- * the rest of them on the list
- */
list_for_each_entry(mca, &mcg->qp_list, qp_list) {
- qp = mca->qp;
+ /* protect the qp pointers in the list */
+ rxe_add_ref(mca->qp);
+ qp_array[n++] = mca->qp;
+ if (n == nmax)
+ break;
+ }
+ spin_unlock_bh(&rxe->mcg_lock);
+ nmax = n;
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
- /* validate qp for incoming packet */
+ for (n = 0; n < nmax; n++) {
+ qp = qp_array[n];
+
+ /* since this is an unreliable transport if
+ * one of the qp's fails to pass these checks
+ * just don't forward a packet and continue
+ * on to the other qp's. If there aren't any
+ * drop the skb
+ */
err = check_type_state(rxe, pkt, qp);
- if (err)
+ if (err) {
+ rxe_drop_ref(qp);
+ if (n == nmax - 1)
+ goto err_free;
continue;
+ }
err = check_keys(rxe, pkt, bth_qpn(pkt), qp);
- if (err)
+ if (err) {
+ rxe_drop_ref(qp);
+ if (n == nmax - 1)
+ goto err_free;
continue;
+ }
- /* for all but the last QP create a new clone of the
- * skb and pass to the QP. Pass the original skb to
- * the last QP in the list.
+ /* for all but the last qp create a new copy(clone)
+ * of the skb and pass to the qp. Pass the original
+ * skb to the last qp in the list unless it failed
+ * checks above
*/
- if (mca->qp_list.next != &mcg->qp_list) {
- struct sk_buff *cskb;
- struct rxe_pkt_info *cpkt;
-
- cskb = skb_clone(skb, GFP_ATOMIC);
- if (unlikely(!cskb))
+ if (n < nmax - 1) {
+ skb_copy = skb_clone(skb, GFP_KERNEL);
+ if (unlikely(!skb_copy)) {
+ rxe_drop_ref(qp);
continue;
+ }
if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
- kfree_skb(cskb);
- break;
+ kfree_skb(skb_copy);
+ rxe_drop_ref(qp);
+ continue;
}
- cpkt = SKB_TO_PKT(cskb);
- cpkt->qp = qp;
- rxe_add_ref(qp);
- rxe_rcv_pkt(cpkt, cskb);
+ pkt_copy = SKB_TO_PKT(skb_copy);
+ pkt_copy->qp = qp;
+ rxe_rcv_pkt(pkt_copy, skb_copy);
} else {
pkt->qp = qp;
- rxe_add_ref(qp);
rxe_rcv_pkt(pkt, skb);
- skb = NULL; /* mark consumed */
}
}
- spin_unlock_bh(&rxe->mcg_lock);
-
- kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
-
- if (likely(!skb))
- return;
-
- /* This only occurs if one of the checks fails on the last
- * QP in the list above
- */
+ kfree(qp_array);
+ return;
-drop:
+err_free:
+ kfree(qp_array);
+err_drop:
kfree_skb(skb);
ib_device_put(&rxe->ib_dev);
}
--
2.32.0
next prev parent reply other threads:[~2022-02-18 0:36 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-18 0:35 [PATCH for-next v12 0/6] Move two object pools to rxe_mcast.c Bob Pearson
2022-02-18 0:35 ` [PATCH for-next v12 1/6] RDMA/rxe: Add code to cleanup mcast memory Bob Pearson
2022-02-23 19:33 ` Jason Gunthorpe
2022-02-18 0:35 ` [PATCH for-next v12 2/6] RDMA/rxe: Collect mca init code in a subroutine Bob Pearson
2022-02-18 0:35 ` [PATCH for-next v12 3/6] RDMA/rxe: Collect cleanup mca " Bob Pearson
2022-02-18 0:35 ` [PATCH for-next v12 4/6] RDMA/rxe: Cleanup rxe_mcast.c Bob Pearson
2022-02-18 0:35 ` Bob Pearson [this message]
2022-02-18 0:35 ` [PATCH for-next v12 6/6] RDMA/rxe: Convert mca read locking to RCU Bob Pearson
2022-02-23 19:52 ` Jason Gunthorpe
2022-02-23 22:40 ` Bob Pearson
2022-02-24 0:04 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220218003543.205799-6-rpearsonhpe@gmail.com \
--to=rpearsonhpe@gmail.com \
--cc=jgg@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=zyjzyj2000@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).