From: Eli Cohen <eli-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
To: Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
Cc: RDMA list <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: [PATCH 3/3] mlx4: add support for reading performance counters
Date: Wed, 10 Nov 2010 14:24:00 +0200 [thread overview]
Message-ID: <20101110122400.GD12037@mtldesk30> (raw)
This patch uses basic or extended counters which can be read by a command
interface, to report counters for all the QPs that work on an rdmaoe port. This
effectively allows to implement performance counter ala IB.
Signed-off-by: Eli Cohen <eli-VPRAkNaXOzVS1MOuV/RT9w@public.gmane.org>
---
drivers/infiniband/hw/mlx4/mad.c | 87 ++++++++++++++++++++++++++++++++-
drivers/infiniband/hw/mlx4/main.c | 16 ++++++-
drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 +
drivers/infiniband/hw/mlx4/qp.c | 7 ++-
drivers/net/mlx4/main.c | 22 +++++++--
include/linux/mlx4/cmd.h | 4 ++
include/linux/mlx4/device.h | 36 ++++++++++++++
include/linux/mlx4/qp.h | 3 +-
8 files changed, 165 insertions(+), 11 deletions(-)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index c9a8dd6..397bfb8 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -230,9 +230,9 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
}
}
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- struct ib_wc *in_wc, struct ib_grh *in_grh,
- struct ib_mad *in_mad, struct ib_mad *out_mad)
+static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
{
u16 slid, prev_lid = 0;
int err;
@@ -300,6 +300,87 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
+static __be32 be64_to_be32(__be64 b64)
+{
+ return cpu_to_be32(be64_to_cpu(b64) & 0xffffffff);
+}
+
+static void edit_counters(struct mlx4_counters *cnt, void *data)
+{
+ *(__be32 *)(data + 40 + 24) = be64_to_be32(cnt->tx_bytes);
+ *(__be32 *)(data + 40 + 28) = be64_to_be32(cnt->rx_bytes);
+ *(__be32 *)(data + 40 + 32) = be64_to_be32(cnt->tx_frames);
+ *(__be32 *)(data + 40 + 36) = be64_to_be32(cnt->rx_frames);
+}
+
+static void edit_ext_counters(struct mlx4_counters_ext *cnt, void *data)
+{
+ *(__be32 *)(data + 40 + 24) = be64_to_be32(cnt->tx_uni_bytes);
+ *(__be32 *)(data + 40 + 28) = be64_to_be32(cnt->rx_uni_bytes);
+ *(__be32 *)(data + 40 + 32) = be64_to_be32(cnt->tx_uni_frames);
+ *(__be32 *)(data + 40 + 36) = be64_to_be32(cnt->rx_uni_frames);
+ *(__be32 *)(data + 40 + 8) = be64_to_be32(cnt->rx_err_frames);
+}
+
+static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int err;
+ u32 inmod = dev->counters[port_num - 1] & 0xffff;
+ int mode;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return -EINVAL;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+ if (IS_ERR(mailbox))
+ return IB_MAD_RESULT_FAILURE;
+
+ err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
+ MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
+ if (err)
+ err = IB_MAD_RESULT_FAILURE;
+ else {
+ memset(out_mad->data, 0, sizeof out_mad->data);
+ mode = be32_to_cpu(((struct mlx4_counters *)mailbox->buf)->counter_mode) & 0xf;
+ switch (mode) {
+ case 0:
+ edit_counters(mailbox->buf, out_mad->data);
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ break;
+ case 1:
+ edit_ext_counters(mailbox->buf, out_mad->data);
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ break;
+ default:
+ err = IB_MAD_RESULT_FAILURE;
+ }
+ }
+
+ mlx4_free_cmd_mailbox(dev->dev, mailbox);
+
+ return err;
+}
+
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ switch (rdma_port_get_link_layer(ibdev, port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
+ in_grh, in_mad, out_mad);
+ case IB_LINK_LAYER_ETHERNET:
+ return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
+ in_grh, in_mad, out_mad);
+ default:
+ return -EINVAL;
+ }
+}
+
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bf3e20c..a7879c0 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -981,6 +981,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int i;
int err;
struct mlx4_ib_iboe *iboe;
+ int k;
printk_once(KERN_INFO "%s", mlx4_ib_version);
@@ -1097,11 +1098,17 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (init_node_data(ibdev))
goto err_map;
+ for (k = 0; k < ibdev->num_ports; ++k) {
+ err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[k]);
+ if (err)
+ ibdev->counters[k] = -1;
+ }
+
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
if (ib_register_device(&ibdev->ib_dev, NULL))
- goto err_map;
+ goto err_counter;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
@@ -1131,6 +1138,10 @@ err_notif:
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_counter:
+ for (; k; --k)
+ mlx4_counter_free(ibdev->dev, ibdev->counters[k - 1]);
+
err_map:
iounmap(ibdev->uar_map);
@@ -1150,6 +1161,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
int p;
+ int k;
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
@@ -1157,6 +1169,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
printk(KERN_WARNING "failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
+ for (k = 0; k < ibdev->num_ports; ++k)
+ mlx4_counter_free(ibdev->dev, ibdev->counters[k]);
}
iounmap(ibdev->uar_map);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 2a322f2..e4bf2cf 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -193,6 +193,7 @@ struct mlx4_ib_dev {
struct mutex cap_mask_mutex;
bool ib_active;
struct mlx4_ib_iboe iboe;
+ int counters[MLX4_MAX_PORTS];
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 9a7794a..115e106 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -893,7 +893,6 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
--path->static_rate;
} else
path->static_rate = 0;
- path->counter_index = 0xff;
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
@@ -1034,6 +1033,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
+ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR &&
+ dev->counters[qp->port - 1] != -1) {
+ context->pri_path.counter_index = dev->counters[qp->port - 1];
+ optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+ }
+
if (attr_mask & IB_QP_PKEY_INDEX) {
context->pri_path.pkey_index = attr->pkey_index;
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 70985d8..0126ea2 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -865,17 +865,29 @@ int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
- if (*idx == -1)
+ switch (dev->caps.counters_mode) {
+ case MLX4_COUNTERS_BASIC:
+ case MLX4_COUNTERS_EXT:
+ *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
+ if (*idx == -1)
+ return -ENOMEM;
+ return 0;
+ default:
return -ENOMEM;
-
- return 0;
+ }
}
EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
{
- mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
+ switch (dev->caps.counters_mode) {
+ case MLX4_COUNTERS_BASIC:
+ case MLX4_COUNTERS_EXT:
+ mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
+ return;
+ default:
+ return;
+ }
}
EXPORT_SYMBOL_GPL(mlx4_counter_free);
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 9a18667..60d3036 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -123,6 +123,10 @@ enum {
/* debug commands */
MLX4_CMD_QUERY_DEBUG_MSG = 0x2a,
MLX4_CMD_SET_DEBUG_MSG = 0x2b,
+
+ /* statistics commands */
+ MLX4_CMD_QUERY_IF_STAT = 0X54,
+ MLX4_CMD_SET_IF_STAT = 0X55,
};
enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 0992434..95d39eb 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -414,6 +414,42 @@ union mlx4_ext_av {
struct mlx4_eth_av eth;
};
+struct mlx4_counters {
+ __be32 counter_mode;
+ __be32 num_ifc;
+ u32 reserved[2];
+ __be64 rx_frames;
+ __be64 rx_bytes;
+ __be64 tx_frames;
+ __be64 tx_bytes;
+};
+
+struct mlx4_counters_ext {
+ __be32 counter_mode;
+ __be32 num_ifc;
+ u32 reserved[2];
+ __be64 rx_uni_frames;
+ __be64 rx_uni_bytes;
+ __be64 rx_mcast_frames;
+ __be64 rx_mcast_bytes;
+ __be64 rx_bcast_frames;
+ __be64 rx_bcast_bytes;
+ __be64 rx_nobuf_frames;
+ __be64 rx_nobuf_bytes;
+ __be64 rx_err_frames;
+ __be64 rx_err_bytes;
+ __be64 tx_uni_frames;
+ __be64 tx_uni_bytes;
+ __be64 tx_mcast_frames;
+ __be64 tx_mcast_bytes;
+ __be64 tx_bcast_frames;
+ __be64 tx_bcast_bytes;
+ __be64 tx_nobuf_frames;
+ __be64 tx_nobuf_bytes;
+ __be64 tx_err_frames;
+ __be64 tx_err_bytes;
+};
+
struct mlx4_dev {
struct pci_dev *pdev;
unsigned long flags;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 0eeb2a1..22fc0a4 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -54,7 +54,8 @@ enum mlx4_qp_optpar {
MLX4_QP_OPTPAR_RETRY_COUNT = 1 << 12,
MLX4_QP_OPTPAR_RNR_RETRY = 1 << 13,
MLX4_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,
- MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16
+ MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16,
+ MLX4_QP_OPTPAR_COUNTER_INDEX = 1 << 20
};
enum mlx4_qp_state {
--
1.6.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
reply other threads:[~2010-11-10 12:24 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101110122400.GD12037@mtldesk30 \
--to=eli-ldsdmyg8hgv8yrgs2mwiifqbs+8scbdb@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.