From: Bob Pearson <rpearsonhpe@gmail.com>
To: jgg@nvidia.com, zyjzyj2000@gmail.com, linux-rdma@vger.kernel.org
Cc: Bob Pearson <rpearsonhpe@gmail.com>
Subject: [RFC PATCH v9 12/26] RDMA/rxe: Replace pool key by rxe->mcg_tree
Date: Thu, 27 Jan 2022 15:37:41 -0600 [thread overview]
Message-ID: <20220127213755.31697-13-rpearsonhpe@gmail.com> (raw)
In-Reply-To: <20220127213755.31697-1-rpearsonhpe@gmail.com>
Continuing to decouple mcg from rxe pools. Create red-black tree code
in rxe_mcast.c to hold mcg index.
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
drivers/infiniband/sw/rxe/rxe.c | 1 +
drivers/infiniband/sw/rxe/rxe_loc.h | 3 +-
drivers/infiniband/sw/rxe/rxe_mcast.c | 187 +++++++++++++++++++++-----
drivers/infiniband/sw/rxe/rxe_recv.c | 4 +-
drivers/infiniband/sw/rxe/rxe_verbs.h | 3 +
5 files changed, 159 insertions(+), 39 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 46a07e2d9dcf..310e184ae9e8 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -199,6 +199,7 @@ static int rxe_init(struct rxe_dev *rxe)
return err;
spin_lock_init(&rxe->mcg_lock);
+ rxe->mcg_tree = RB_ROOT;
/* init pending mmap list */
spin_lock_init(&rxe->mmap_offset_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index af40e3c212fb..d9faf3a1ee61 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -40,9 +40,10 @@ void rxe_cq_disable(struct rxe_cq *cq);
void rxe_cq_cleanup(struct rxe_pool_elem *arg);
/* rxe_mcast.c */
-void rxe_mc_cleanup(struct rxe_pool_elem *arg);
+struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid);
int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
+void rxe_mc_cleanup(struct rxe_pool_elem *arg);
/* rxe_mmap.c */
struct rxe_mmap_info {
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 62ace10206b0..4c3eb9c723b4 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -25,60 +25,172 @@ static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
return dev_mc_del(rxe->ndev, ll_addr);
}
-/* caller should hold mc_grp_rxe->mcg_lock */
-static struct rxe_mcg *create_grp(struct rxe_dev *rxe,
- struct rxe_pool *pool,
- union ib_gid *mgid)
+/**
+ * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree)
+ * @mcg: mcast group object with an embedded red-black tree node
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock and
+ * is responsible to avoid adding the same mcg twice to the tree.
+ */
+static void __rxe_insert_mcg(struct rxe_mcg *mcg)
{
- int err;
+ struct rb_root *tree = &mcg->rxe->mcg_tree;
+ struct rb_node **link = &tree->rb_node;
+ struct rb_node *node = NULL;
+ struct rxe_mcg *tmp;
+ int cmp;
+
+ while (*link) {
+ node = *link;
+ tmp = rb_entry(node, struct rxe_mcg, node);
+
+ cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid));
+ if (cmp > 0)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ rb_link_node(&mcg->node, node, link);
+ rb_insert_color(&mcg->node, tree);
+}
+
+/**
+ * __rxe_remove_mcg - remove an mcg from red-black tree holding lock
+ * @mcg: mcast group object with an embedded red-black tree node
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock
+ */
+static void __rxe_remove_mcg(struct rxe_mcg *mcg)
+{
+ rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
+}
+
+/**
+ * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock
+ * @rxe: rxe device object
+ * @mgid: multicast IP address
+ *
+ * Context: caller must hold rxe->mcg_lock
+ * Returns: mcg on success and takes a ref to mcg else NULL
+ */
+static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
+ union ib_gid *mgid)
+{
+ struct rb_root *tree = &rxe->mcg_tree;
struct rxe_mcg *mcg;
+ struct rb_node *node;
+ int cmp;
- mcg = rxe_alloc_locked(&rxe->mc_grp_pool);
- if (!mcg)
- return ERR_PTR(-ENOMEM);
- rxe_add_ref(mcg);
+ node = tree->rb_node;
- INIT_LIST_HEAD(&mcg->qp_list);
- mcg->rxe = rxe;
- rxe_add_key_locked(mcg, mgid);
+ while (node) {
+ mcg = rb_entry(node, struct rxe_mcg, node);
- err = rxe_mcast_add(rxe, mgid);
- if (unlikely(err)) {
- rxe_drop_key_locked(mcg);
- rxe_drop_ref(mcg);
- return ERR_PTR(err);
+ cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid));
+
+ if (cmp > 0)
+ node = node->rb_left;
+ else if (cmp < 0)
+ node = node->rb_right;
+ else
+ break;
}
- return mcg;
+ if (node) {
+ rxe_add_ref(mcg);
+ return mcg;
+ }
+
+ return NULL;
}
-static int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
- struct rxe_mcg **mcgp)
+/**
+ * rxe_lookup_mcg - lookup up mcg in red-back tree
+ * @rxe: rxe device object
+ * @mgid: multicast IP address
+ *
+ * Returns: mcg if found else NULL
+ */
+struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
- int err;
struct rxe_mcg *mcg;
+
+ spin_lock_bh(&rxe->mcg_lock);
+ mcg = __rxe_lookup_mcg(rxe, mgid);
+ spin_unlock_bh(&rxe->mcg_lock);
+
+ return mcg;
+}
+
+/**
+ * rxe_get_mcg - lookup or allocate a mcg
+ * @rxe: rxe device object
+ * @mgid: multicast IP address
+ * @mcgp: address of returned mcg value
+ *
+ * Adds one ref if mcg already exists else add a second reference
+ * which is dropped when qp_num goes to zero.
+ *
+ * Returns: 0 and sets *mcgp to mcg on success else an error
+ */
+static int rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
+ struct rxe_mcg **mcgp)
+{
+ struct rxe_mcg *mcg, *tmp;
+ int ret;
struct rxe_pool *pool = &rxe->mc_grp_pool;
- if (rxe->attr.max_mcast_qp_attach == 0)
+ if (rxe->attr.max_mcast_grp == 0)
return -EINVAL;
- spin_lock_bh(&rxe->mcg_lock);
+ /* check to see if mcg already exists */
+ mcg = rxe_lookup_mcg(rxe, mgid);
+ if (mcg) {
+ *mcgp = mcg;
+ return 0;
+ }
- mcg = rxe_pool_get_key_locked(pool, mgid);
- if (mcg)
- goto done;
+ /* speculative alloc of mcg without using GFP_ATOMIC */
+ mcg = rxe_alloc(pool);
+ if (!mcg)
+ return -ENOMEM;
- mcg = create_grp(rxe, pool, mgid);
- if (IS_ERR(mcg)) {
+ spin_lock_bh(&rxe->mcg_lock);
+ /* re-check to see if someone else just added it */
+ tmp = __rxe_lookup_mcg(rxe, mgid);
+ if (tmp) {
spin_unlock_bh(&rxe->mcg_lock);
- err = PTR_ERR(mcg);
- return err;
+ rxe_drop_ref(mcg);
+ mcg = tmp;
+ goto out;
}
-done:
+ if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp)
+ goto err_dec;
+
+ ret = rxe_mcast_add(rxe, mgid);
+ if (ret)
+ goto err_out;
+
+ rxe_add_ref(mcg);
+ mcg->rxe = rxe;
+ memcpy(&mcg->mgid, mgid, sizeof(*mgid));
+ INIT_LIST_HEAD(&mcg->qp_list);
+ atomic_inc(&rxe->mcg_num);
+ __rxe_insert_mcg(mcg);
spin_unlock_bh(&rxe->mcg_lock);
+out:
*mcgp = mcg;
return 0;
+
+err_dec:
+ atomic_dec(&rxe->mcg_num);
+ ret = -ENOMEM;
+err_out:
+ spin_unlock_bh(&rxe->mcg_lock);
+ rxe_drop_ref(mcg);
+ return ret;
}
static int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
@@ -136,7 +248,7 @@ static int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
struct rxe_mca *mca, *tmp;
int n;
- mcg = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
+ mcg = rxe_lookup_mcg(rxe, mgid);
if (!mcg)
goto err1;
@@ -151,14 +263,14 @@ static int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
atomic_dec(&qp->mcg_num);
spin_unlock_bh(&rxe->mcg_lock);
- rxe_drop_ref(mcg); /* ref from get_key */
+ rxe_drop_ref(mcg);
kfree(mca);
return 0;
}
}
spin_unlock_bh(&rxe->mcg_lock);
- rxe_drop_ref(mcg); /* ref from get_key */
+ rxe_drop_ref(mcg);
err1:
return -EINVAL;
}
@@ -168,7 +280,10 @@ void rxe_mc_cleanup(struct rxe_pool_elem *elem)
struct rxe_mcg *mcg = container_of(elem, typeof(*mcg), elem);
struct rxe_dev *rxe = mcg->rxe;
- rxe_drop_key(mcg);
+ spin_lock_bh(&rxe->mcg_lock);
+ __rxe_remove_mcg(mcg);
+ spin_unlock_bh(&rxe->mcg_lock);
+
rxe_mcast_delete(rxe, &mcg->mgid);
}
@@ -180,7 +295,7 @@ int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
struct rxe_mcg *mcg;
/* takes a ref on mcg if successful */
- err = rxe_mcast_get_grp(rxe, mgid, &mcg);
+ err = rxe_get_mcg(rxe, mgid, &mcg);
if (err)
return err;
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 11246589fda7..f1ca83e09160 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -254,7 +254,7 @@ static void rxe_rcv_mcast_pkt(struct sk_buff *skb)
memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid));
/* lookup mcast group corresponding to mgid, takes a ref */
- mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
+ mcg = rxe_lookup_mcg(rxe, &dgid);
if (!mcg)
goto drop; /* mcast group not registered */
@@ -320,7 +320,7 @@ static void rxe_rcv_mcast_pkt(struct sk_buff *skb)
kfree(qp_array);
- rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
+ rxe_drop_ref(mcg);
if (likely(!skb))
return;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index b72f8f09d984..ea2d9ff29744 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -353,6 +353,7 @@ struct rxe_mw {
struct rxe_mcg {
struct rxe_pool_elem elem;
+ struct rb_node node;
struct rxe_dev *rxe;
struct list_head qp_list;
atomic_t qp_num;
@@ -397,6 +398,8 @@ struct rxe_dev {
struct rxe_pool mc_grp_pool;
spinlock_t mcg_lock; /* guard multicast groups */
+ struct rb_root mcg_tree;
+ atomic_t mcg_num;
spinlock_t pending_lock; /* guard pending_mmaps */
struct list_head pending_mmaps;
--
2.32.0
next prev parent reply other threads:[~2022-01-27 21:38 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-27 21:37 [RFC PATCH v9 00/26] Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 01/26] RDMA/rxe: Move rxe_mcast_add/delete to rxe_mcast.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 02/26] RDMA/rxe: Move rxe_mcast_attach/detach " Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 03/26] RDMA/rxe: Rename rxe_mc_grp and rxe_mc_elem Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 04/26] RDMA/rxe: Enforce IBA o10-2.2.3 Bob Pearson
2022-01-28 12:53 ` Jason Gunthorpe
2022-01-28 16:18 ` Bob Pearson
2022-01-28 16:42 ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 05/26] RDMA/rxe: Remove rxe_drop_all_macst_groups Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 06/26] RDMA/rxe: Remove qp->grp_lock and qp->grp_list Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 07/26] RDMA/rxe: Use kzmalloc/kfree for mca Bob Pearson
2022-01-28 18:00 ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 08/26] RDMA/rxe: Rename grp to mcg and mce to mca Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 09/26] RDMA/rxe: Introduce RXECB(skb) Bob Pearson
2022-01-28 18:29 ` Jason Gunthorpe
2022-01-30 17:47 ` Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 10/26] RDMA/rxe: Split rxe_rcv_mcast_pkt into two phases Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 11/26] RDMA/rxe: Replace locks by rxe->mcg_lock Bob Pearson
2022-01-27 21:37 ` Bob Pearson [this message]
2022-01-28 18:32 ` [RFC PATCH v9 12/26] RDMA/rxe: Replace pool key by rxe->mcg_tree Jason Gunthorpe
2022-01-30 23:23 ` Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 13/26] RDMA/rxe: Remove key'ed object support Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 14/26] RDMA/rxe: Remove mcg from rxe pools Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 15/26] RDMA/rxe: Add code to cleanup mcast memory Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 16/26] RDMA/rxe: Add comments to rxe_mcast.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 17/26] RDMA/rxe: Separate code into subroutines Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 18/26] RDMA/rxe: Convert mca read locking to RCU Bob Pearson
2022-01-28 18:39 ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 19/26] RDMA/rxe: Reverse the sense of RXE_POOL_NO_ALLOC Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 20/26] RDMA/rxe: Delete _locked() APIs for pool objects Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 21/26] RDMA/rxe: Replace obj by elem in declaration Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 22/26] RDMA/rxe: Replace red-black trees by xarrays Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 23/26] RDMA/rxe: Change pool locking to RCU Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 24/26] RDMA/rxe: Add wait_for_completion to pool objects Bob Pearson
2022-01-28 3:58 ` kernel test robot
2022-01-28 3:58 ` kernel test robot
2022-01-27 21:37 ` [RFC PATCH v9 25/26] RDMA/rxe: Fix ref error in rxe_av.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 26/26] RDMA/rxe: Replace mr by rkey in responder resources Bob Pearson
2022-01-28 18:42 ` [RFC PATCH v9 00/26] Jason Gunthorpe
2022-02-07 19:20 ` Bob Pearson
2022-02-07 19:38 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220127213755.31697-13-rpearsonhpe@gmail.com \
--to=rpearsonhpe@gmail.com \
--cc=jgg@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=zyjzyj2000@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.