From: Leon Romanovsky <leon@kernel.org>
To: Doug Ledford <dledford@redhat.com>, Jason Gunthorpe <jgg@mellanox.com>
Cc: Leon Romanovsky <leonro@mellanox.com>,
RDMA mailing list <linux-rdma@vger.kernel.org>,
Majd Dibbiny <majd@mellanox.com>, Mark Zhang <markz@mellanox.com>,
Saeed Mahameed <saeedm@mellanox.com>,
linux-netdev <netdev@vger.kernel.org>
Subject: [PATCH rdma-next v5 06/17] RDMA/counter: Add "auto" configuration mode support
Date: Tue, 2 Jul 2019 13:02:35 +0300 [thread overview]
Message-ID: <20190702100246.17382-7-leon@kernel.org> (raw)
In-Reply-To: <20190702100246.17382-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
In auto mode all QPs belong to one category are bind automatically to
a single counter set. Currently only "qp type" is supported.
In this mode the qp counter is set in RST2INIT modification, and when
a qp is destroyed the counter is unbound.
Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/core/counters.c | 221 +++++++++++++++++++++++++++++
drivers/infiniband/core/device.c | 3 +
drivers/infiniband/core/verbs.c | 9 ++
include/rdma/ib_verbs.h | 18 +++
include/rdma/rdma_counter.h | 8 ++
5 files changed, 259 insertions(+)
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 6167914fba06..60639452669c 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -54,6 +54,227 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
return ret;
}
+static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
+ enum rdma_nl_counter_mode mode)
+{
+ struct rdma_counter *counter;
+
+ if (!dev->ops.counter_dealloc)
+ return NULL;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return NULL;
+
+ counter->device = dev;
+ counter->port = port;
+ counter->res.type = RDMA_RESTRACK_COUNTER;
+ counter->mode.mode = mode;
+ kref_init(&counter->kref);
+ mutex_init(&counter->lock);
+
+ return counter;
+}
+
+static void rdma_counter_free(struct rdma_counter *counter)
+{
+ rdma_restrack_del(&counter->res);
+ kfree(counter);
+}
+
+static void auto_mode_init_counter(struct rdma_counter *counter,
+ const struct ib_qp *qp,
+ enum rdma_nl_counter_mask new_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+
+ counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
+ counter->mode.mask = new_mask;
+
+ if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ param->qp_type = qp->qp_type;
+}
+
+static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
+ enum rdma_nl_counter_mask auto_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+ bool match = true;
+
+ if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res))
+ return false;
+
+ /* Ensure that counter belong to right PID */
+ if (!rdma_is_kernel_res(&counter->res) &&
+ !rdma_is_kernel_res(&qp->res) &&
+ (task_pid_vnr(counter->res.task) != current->pid))
+ return false;
+
+ if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ match &= (param->qp_type == qp->qp_type);
+
+ return match;
+}
+
+static int __rdma_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ int ret;
+
+ if (qp->counter)
+ return -EINVAL;
+
+ if (!qp->device->ops.counter_bind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_bind_qp(counter, qp);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static int __rdma_counter_unbind_qp(struct ib_qp *qp)
+{
+ struct rdma_counter *counter = qp->counter;
+ int ret;
+
+ if (!qp->device->ops.counter_unbind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_unbind_qp(qp);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+/**
+ * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
+ * with in auto mode
+ *
+ * Return: The counter (with ref-count increased) if found
+ */
+static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
+ u8 port)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter = NULL;
+ struct ib_device *dev = qp->device;
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ unsigned long id = 0;
+
+ port_counter = &dev->port_data[port].port_counter;
+ rt = &dev->res[RDMA_RESTRACK_COUNTER];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_is_visible_in_pid_ns(res))
+ continue;
+
+ counter = container_of(res, struct rdma_counter, res);
+ if ((counter->device != qp->device) || (counter->port != port))
+ goto next;
+
+ if (auto_mode_match(qp, counter, port_counter->mode.mask))
+ break;
+next:
+ counter = NULL;
+ }
+
+ if (counter)
+ kref_get(&counter->kref);
+
+ xa_unlock(&rt->xa);
+ return counter;
+}
+
+static void rdma_counter_res_add(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ if (rdma_is_kernel_res(&qp->res)) {
+ rdma_restrack_set_task(&counter->res, qp->res.kern_name);
+ rdma_restrack_kadd(&counter->res);
+ } else {
+ rdma_restrack_attach_task(&counter->res, qp->res.task);
+ rdma_restrack_uadd(&counter->res);
+ }
+}
+
+static void counter_release(struct kref *kref)
+{
+ struct rdma_counter *counter;
+
+ counter = container_of(kref, struct rdma_counter, kref);
+ counter->device->ops.counter_dealloc(counter);
+ rdma_counter_free(counter);
+}
+
+/**
+ * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
+ * the auto-mode rule
+ */
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
+{
+ struct rdma_port_counter *port_counter;
+ struct ib_device *dev = qp->device;
+ struct rdma_counter *counter;
+ int ret;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
+ return 0;
+
+ counter = rdma_get_counter_auto_mode(qp, port);
+ if (counter) {
+ ret = __rdma_counter_bind_qp(counter, qp);
+ if (ret) {
+ kref_put(&counter->kref, counter_release);
+ return ret;
+ }
+ } else {
+ counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO);
+ if (!counter)
+ return -ENOMEM;
+
+ auto_mode_init_counter(counter, qp, port_counter->mode.mask);
+
+ ret = __rdma_counter_bind_qp(counter, qp);
+ if (ret) {
+ rdma_counter_free(counter);
+ return ret;
+ }
+
+ rdma_counter_res_add(counter, qp);
+ }
+
+ return 0;
+}
+
+/**
+ * rdma_counter_unbind_qp - Unbind a qp from a counter
+ * @force:
+ * true - Decrease the counter ref-count anyway (e.g., qp destroy)
+ */
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
+{
+ struct rdma_counter *counter = qp->counter;
+ int ret;
+
+ if (!counter)
+ return -EINVAL;
+
+ ret = __rdma_counter_unbind_qp(qp);
+ if (ret && !force)
+ return ret;
+
+ kref_put(&counter->kref, counter_release);
+ return 0;
+}
+
void rdma_counter_init(struct ib_device *dev)
{
struct rdma_port_counter *port_counter;
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 6579865e4866..f3181b74c863 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2471,6 +2471,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, alloc_xrcd);
SET_DEVICE_OP(dev_ops, attach_mcast);
SET_DEVICE_OP(dev_ops, check_mr_status);
+ SET_DEVICE_OP(dev_ops, counter_bind_qp);
+ SET_DEVICE_OP(dev_ops, counter_dealloc);
+ SET_DEVICE_OP(dev_ops, counter_unbind_qp);
SET_DEVICE_OP(dev_ops, create_ah);
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 4a04e94a72db..92349bf37589 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1690,6 +1690,14 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
}
}
+ /*
+ * Bind this qp to a counter automatically based on the rdma counter
+ * rules. This only set in RST2INIT with port specified
+ */
+ if (!qp->counter && (attr_mask & IB_QP_PORT) &&
+ ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT))
+ rdma_counter_bind_qp_auto(qp, attr->port_num);
+
ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
if (ret)
goto out;
@@ -1885,6 +1893,7 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
+ rdma_counter_unbind_qp(qp, true);
rdma_restrack_del(&qp->res);
ret = qp->device->ops.destroy_qp(qp, udata);
if (!ret) {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3d19c056fbc0..0205472eb73a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1698,6 +1698,9 @@ struct ib_qp {
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
+
+ /* The counter the qp is bind to */
+ struct rdma_counter *counter;
};
struct ib_dm {
@@ -2485,6 +2488,21 @@ struct ib_device_ops {
u8 pdata_len);
int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
+ /**
+ * counter_bind_qp - Bind a QP to a counter.
+ * @counter - The counter to be bound. If counter->id is zero then
+ * the driver needs to allocate a new counter and set counter->id
+ */
+ int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
+ /**
+ * counter_unbind_qp - Unbind the qp from the dynamically-allocated
+ * counter and bind it onto the default one
+ */
+ int (*counter_unbind_qp)(struct ib_qp *qp);
+ /**
+ * counter_dealloc -De-allocate the hw counter
+ */
+ int (*counter_dealloc)(struct rdma_counter *counter);
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
index 8dd2619c015d..9f93a2403c9c 100644
--- a/include/rdma/rdma_counter.h
+++ b/include/rdma/rdma_counter.h
@@ -7,11 +7,14 @@
#define _RDMA_COUNTER_H_
#include <linux/mutex.h>
+#include <linux/pid_namespace.h>
#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
#include <rdma/rdma_netlink.h>
+struct ib_qp;
+
struct auto_mode_param {
int qp_type;
};
@@ -31,6 +34,9 @@ struct rdma_counter {
struct rdma_restrack_entry res;
struct ib_device *device;
uint32_t id;
+ struct kref kref;
+ struct rdma_counter_mode mode;
+ struct mutex lock;
u8 port;
};
@@ -38,5 +44,7 @@ void rdma_counter_init(struct ib_device *dev);
void rdma_counter_release(struct ib_device *dev);
int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
bool on, enum rdma_nl_counter_mask mask);
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port);
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
#endif /* _RDMA_COUNTER_H_ */
--
2.20.1
next prev parent reply other threads:[~2019-07-02 10:02 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-02 10:02 [PATCH rdma-next v5 00/17] Statistics counter support Leon Romanovsky
2019-07-02 10:02 ` Leon Romanovsky
2019-07-02 10:02 ` [PATCH mlx5-next v5 01/17] net/mlx5: Add rts2rts_qp_counters_set_id field in hca cap Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 02/17] RDMA/restrack: Introduce statistic counter Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 03/17] RDMA/restrack: Add an API to attach a task to a resource Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 04/17] RDMA/restrack: Make is_visible_in_pid_ns() as an API Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 05/17] RDMA/counter: Add set/clear per-port auto mode support Leon Romanovsky
2019-07-02 10:02 ` Leon Romanovsky [this message]
2019-07-04 18:09 ` [PATCH rdma-next v5 06/17] RDMA/counter: Add "auto" configuration " Jason Gunthorpe
2019-07-02 10:02 ` [PATCH mlx5-next v5 07/17] IB/mlx5: Support set qp counter Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 08/17] IB/mlx5: Add counter set id as a parameter for mlx5_ib_query_q_counters() Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 09/17] IB/mlx5: Support statistic q counter configuration Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 10/17] RDMA/nldev: Allow counter auto mode configration through RDMA netlink Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 11/17] RDMA/netlink: Implement counter dumpit calback Leon Romanovsky
2019-07-04 18:07 ` Jason Gunthorpe
2019-07-04 18:22 ` Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 12/17] IB/mlx5: Add counter_alloc_stats() and counter_update_stats() support Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 13/17] RDMA/core: Get sum value of all counters when perform a sysfs stat read Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 14/17] RDMA/counter: Allow manual mode configuration support Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 15/17] RDMA/nldev: Allow counter manual mode configration through RDMA netlink Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 16/17] RDMA/nldev: Allow get counter mode " Leon Romanovsky
2019-07-02 10:02 ` [PATCH rdma-next v5 17/17] RDMA/nldev: Allow get default counter statistics " Leon Romanovsky
2019-07-04 18:25 ` [PATCH rdma-next v5 00/17] Statistics counter support Jason Gunthorpe
2019-07-04 18:39 ` Leon Romanovsky
2019-07-05 15:50 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190702100246.17382-7-leon@kernel.org \
--to=leon@kernel.org \
--cc=dledford@redhat.com \
--cc=jgg@mellanox.com \
--cc=leonro@mellanox.com \
--cc=linux-rdma@vger.kernel.org \
--cc=majd@mellanox.com \
--cc=markz@mellanox.com \
--cc=netdev@vger.kernel.org \
--cc=saeedm@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.