From: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
To: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
Cc: linux-rdma <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
Al Chu <chu11-i2BcT+NCU+M@public.gmane.org>
Subject: [PATCH] tests/subnet_discover: discover test utility
Date: Sun, 20 Dec 2009 14:14:06 +0200 [thread overview]
Message-ID: <20091220121406.GF5262@me> (raw)
In-Reply-To: <20091023234349.GK5764@me>
'subnet_discover' is simple test utility which implements "non-blocking"
discovery method where mads are sending "in parallel" (unlike the
current implementation of 'ibnetdiscover' and similar to how OpenSM
does). For this a recently discovered node id value is encoded as lower
16 bits of mad transaction id.
Signed-off-by: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
Hi Ira,
On 01:43 Sat 24 Oct , Sasha Khapyorsky wrote:
> >
> > Current Master: Threaded version:
> > real 0m9.149s 0m2.223s
> > user 0m0.016s 0m0.374s
> > sys 0m0.372s 0m1.056s
> >
> > With that in mind...
>
> Good. So what do you think due to which factor most of this performance
> gain was achieved? Due to using multiple threads or due to SMP queries
> parallelism? I would suspect that it is a parallelism.
For some purposes in ibsim/tests I wrote a simple utility
'subnet_discover', this works as single thread and utilizes a "parallel"
mad sending method and also uses libibumad for all mad
sending/receiving stuff.
I think that similar implementation in libibnetdisc (I can do it if we
are in agreement :)) would improve its performance.
Would you like to look at this?
Sasha
tests/Makefile | 2 +-
tests/subnet_discover.c | 495 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 496 insertions(+), 1 deletions(-)
create mode 100644 tests/subnet_discover.c
diff --git a/tests/Makefile b/tests/Makefile
index dd4cd55..bd415d8 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,4 +1,4 @@
-progs:= mcast_storm
+progs:= subnet_discover mcast_storm
-include ../defs.mk
diff --git a/tests/subnet_discover.c b/tests/subnet_discover.c
new file mode 100644
index 0000000..a577cc7
--- /dev/null
+++ b/tests/subnet_discover.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2009 Voltaire, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <getopt.h>
+
+#include <infiniband/umad.h>
+#include <infiniband/mad.h>
+
+struct port {
+ struct node *node;
+ uint64_t guid;
+ struct port *remote;
+ uint8_t port_info[IB_SMP_DATA_SIZE];
+};
+
+struct node {
+ uint64_t guid;
+ unsigned num_ports;
+ unsigned is_switch;
+ uint8_t node_info[IB_SMP_DATA_SIZE];
+ uint8_t node_desc[IB_SMP_DATA_SIZE];
+ uint8_t switch_info[IB_SMP_DATA_SIZE];
+ struct port ports[];
+};
+
+static struct node *node_array[32 * 1024];
+static unsigned node_count = 0;
+static unsigned trid_cnt = 0;
+static unsigned outstanding = 0;
+static unsigned timeout = 100;
+static unsigned retries = 3;
+static unsigned verbose = 0;
+
+#define ERROR(fmt, ...) fprintf(stderr, "ERR: " fmt, ##__VA_ARGS__)
+#define VERBOSE(fmt, ...) if (verbose) fprintf(stderr, fmt, ##__VA_ARGS__)
+#define NOISE(fmt, ...) if (verbose > 1) fprintf(stderr, fmt, ##__VA_ARGS__)
+
+static const char *print_path(uint8_t path[], size_t path_cnt)
+{
+ static char buf[256];
+ int i, n = 0;
+ for (i = 0; i <= path_cnt; i++)
+ n += snprintf(buf + n, sizeof(buf) - n, "%u,", path[i]);
+ buf[n] = '\0';
+ return buf;
+}
+
+#define DBG_DUMP_FUNC(name) static void dbg_dump_##name(void *data) \
+{ \
+ char buf[2048]; \
+ mad_dump_##name(buf, sizeof(buf), data, IB_SMP_DATA_SIZE); \
+ NOISE("### "#name":\n%s\n", buf); \
+}
+
+DBG_DUMP_FUNC(nodeinfo);
+DBG_DUMP_FUNC(nodedesc);
+DBG_DUMP_FUNC(portinfo);
+DBG_DUMP_FUNC(switchinfo);
+
+static void build_umad_req(void *umad, uint8_t * path, unsigned path_cnt,
+ uint64_t trid, uint8_t method,
+ uint16_t attr_id, uint32_t attr_mod, uint64_t mkey)
+{
+ void *mad = umad_get_mad(umad);
+
+ memset(umad, 0, umad_size() + IB_MAD_SIZE);
+ umad_set_addr(umad, 0xffff, 0, 0, 0);
+ mad_set_field(mad, 0, IB_MAD_METHOD_F, method);
+ mad_set_field(mad, 0, IB_MAD_CLASSVER_F, 1);
+ mad_set_field(mad, 0, IB_MAD_MGMTCLASS_F, IB_SMI_DIRECT_CLASS);
+ mad_set_field(mad, 0, IB_MAD_BASEVER_F, 1);
+ mad_set_field(mad, 0, IB_DRSMP_HOPCNT_F, path_cnt);
+ mad_set_field(mad, 0, IB_DRSMP_HOPPTR_F, 0);
+ mad_set_field64(mad, 0, IB_MAD_TRID_F, trid);
+ mad_set_field(mad, 0, IB_DRSMP_DRDLID_F, 0xffff);
+ mad_set_field(mad, 0, IB_DRSMP_DRSLID_F, 0xffff);
+ mad_set_array(mad, 0, IB_DRSMP_PATH_F, path);
+ mad_set_field(mad, 0, IB_MAD_ATTRID_F, attr_id);
+ mad_set_field(mad, 0, IB_MAD_ATTRMOD_F, attr_mod);
+ mad_set_field64(mad, 0, IB_MAD_MKEY_F, mkey);
+}
+
+static int send_query(int fd, int agent, void *umad, unsigned node_id,
+ uint8_t * path, size_t path_cnt, uint16_t attr_id,
+ uint32_t attr_mod)
+{
+ uint64_t trid;
+ int ret;
+
+ trid = (trid_cnt++ << 16) | (node_id & 0xffff);
+ build_umad_req(umad, path, path_cnt, trid, IB_MAD_METHOD_GET, attr_id,
+ attr_mod, 0);
+
+ ret = umad_send(fd, agent, umad, IB_MAD_SIZE, timeout, retries);
+ if (ret < 0) {
+ ERROR("umad_send failed: trid 0x%016" PRIx64
+ ", attr_id %x, attr_mod %x: %s\n",
+ trid, attr_id, attr_mod, strerror(errno));
+ return -1;
+ }
+
+ outstanding++;
+
+ VERBOSE("send %016" PRIx64 ": attr %x, mod %x to %s\n", trid, attr_id,
+ attr_mod, print_path(path, path_cnt));
+
+ return ret;
+}
+
+static int recv_response(int fd, int agent, uint8_t * umad, size_t length)
+{
+ int len = length, ret;
+
+ do {
+ ret = umad_recv(fd, umad, &len, timeout);
+ } while (ret >= 0 && ret != agent);
+
+ if (ret < 0 || umad_status(umad)) {
+ ERROR("umad_recv failed: umad status %x: %s\n",
+ umad_status(umad), strerror(errno));
+ return -1;
+ }
+
+ return ret;
+}
+
+static int query_node_info(int fd, int agent, void *umad, unsigned node_id,
+ uint8_t * path, size_t path_cnt)
+{
+ return send_query(fd, agent, umad, node_id, path, path_cnt,
+ IB_ATTR_NODE_INFO, 0);
+}
+
+static int query_node_desc(int fd, int agent, void *umad, unsigned node_id,
+ uint8_t * path, size_t path_cnt)
+{
+ return send_query(fd, agent, umad, node_id, path, path_cnt,
+ IB_ATTR_NODE_DESC, 0);
+}
+
+static int query_switch_info(int fd, int agent, void *umad, unsigned node_id,
+ uint8_t * path, size_t path_cnt)
+{
+ return send_query(fd, agent, umad, node_id, path, path_cnt,
+ IB_ATTR_SWITCH_INFO, 0);
+}
+
+static int query_port_info(int fd, int agent, void *umad, unsigned node_id,
+ uint8_t * path, size_t path_cnt, unsigned port_num)
+{
+ return send_query(fd, agent, umad, node_id, path, path_cnt,
+ IB_ATTR_PORT_INFO, port_num);
+}
+
+static int add_node(uint8_t * node_info)
+{
+ struct node *node;
+ unsigned i, num_ports = mad_get_field(node_info, 0, IB_NODE_NPORTS_F);
+
+ node = malloc(sizeof(*node) + (num_ports + 1) * sizeof(node->ports[0]));
+ if (!node)
+ return -1;
+ memset(node, 0,
+ sizeof(*node) + (num_ports + 1) * sizeof(node->ports[0]));
+
+ node->num_ports = num_ports;
+ node->guid = mad_get_field64(node_info, 0, IB_NODE_GUID_F);
+ node->is_switch = ((mad_get_field(node_info, 0, IB_NODE_TYPE_F)) ==
+ IB_NODE_SWITCH);
+ memcpy(node->node_info, node_info, sizeof(node->node_info));
+ for (i = 0; i <= num_ports; i++)
+ node->ports[i].node = node;
+
+ node_array[node_count] = node;
+
+ return node_count++;
+}
+
+static int find_node(uint8_t * node_info)
+{
+ uint64_t guid = mad_get_field64(node_info, 0, IB_NODE_GUID_F);
+ unsigned i;
+
+ for (i = 0; i < node_count; i++)
+ if (node_array[i]->guid == guid)
+ return i;
+ return -1;
+}
+
+static int process_port_info(void *umad, unsigned node_id, int fd, int agent,
+ uint8_t path[], size_t path_cnt)
+{
+ struct node *node = node_array[node_id];
+ struct port *port;
+ uint8_t *port_info = umad + umad_size() + IB_SMP_DATA_OFFS;
+ unsigned port_num, local_port;
+
+ dbg_dump_portinfo(port_info);
+
+ port_num = mad_get_field(umad_get_mad(umad), 0, IB_MAD_ATTRMOD_F);
+ local_port = mad_get_field(port_info, 0, IB_PORT_LOCAL_PORT_F);
+
+ port = &node->ports[port_num];
+ memcpy(port->port_info, port_info, sizeof(port->port_info));
+
+ if (port_num &&
+ mad_get_field(port_info, 0, IB_PORT_PHYS_STATE_F) == 5 &&
+ ((node->is_switch && port_num != local_port) ||
+ (node_id == 0 && port_num == local_port))) {
+ path[++path_cnt] = port_num;
+ return query_node_info(fd, agent, umad, node_id, path,
+ path_cnt);
+ }
+
+ return 0;
+}
+
+static int process_switch_info(unsigned node_id, uint8_t * switch_info)
+{
+ struct node *node = node_array[node_id];
+
+ dbg_dump_switchinfo(switch_info);
+ memcpy(node->switch_info, switch_info, sizeof(node->switch_info));
+
+ return 0;
+}
+
+static int process_node_desc(unsigned node_id, uint8_t * node_desc)
+{
+ struct node *node = node_array[node_id];
+
+ dbg_dump_nodedesc(node_desc);
+ memcpy(node->node_desc, node_desc, sizeof(node->node_desc));
+
+ return 0;
+}
+
+static void connect_ports(unsigned node1_id, unsigned port1_num,
+ unsigned node2_id, unsigned port2_num)
+{
+ struct port *port1 = &node_array[node1_id]->ports[port1_num];
+ struct port *port2 = &node_array[node2_id]->ports[port2_num];
+ VERBOSE("connecting %u:%u <--> %u:%u\n",
+ node1_id, port1_num, node2_id, port2_num);
+ port1->remote = port2;
+ port2->remote = port1;
+}
+
+static int process_node(void *umad, unsigned remote_id, int fd, int agent,
+ uint8_t path[], size_t path_cnt)
+{
+ struct node *node;
+ uint8_t *node_info = umad_get_mad(umad) + IB_SMP_DATA_OFFS;
+ unsigned port_num = mad_get_field(node_info, 0, IB_NODE_LOCAL_PORT_F);
+ unsigned node_is_new = 0;
+ int i, id;
+
+ dbg_dump_nodeinfo(node_info);
+
+ if ((id = find_node(node_info)) < 0) {
+ id = add_node(node_info);
+ if (id < 0)
+ return -1;
+ node_is_new = 1;
+ }
+
+ node = node_array[id];
+
+ node->ports[port_num].guid =
+ mad_get_field64(node_info, 0, IB_NODE_PORT_GUID_F);
+
+ if (id) /* skip connect for very first node */
+ connect_ports(id, port_num, remote_id, path[path_cnt]);
+
+ if (!node_is_new)
+ return 0;
+
+ query_node_desc(fd, agent, umad, id, path, path_cnt);
+
+ if (node->is_switch)
+ query_switch_info(fd, agent, umad, id, path, path_cnt);
+
+ for (i = !node->is_switch; i <= node->num_ports; i++)
+ query_port_info(fd, agent, umad, id, path, path_cnt, i);
+
+ return 0;
+}
+
+static int recv_smp_resp(int fd, int agent, uint8_t * umad, uint8_t path[])
+{
+ void *mad;
+ uint64_t trid;
+ uint8_t method;
+ uint16_t status;
+ uint16_t attr_id;
+ uint32_t attr_mod;
+ size_t path_cnt;
+ unsigned node_id;
+ int ret;
+
+ ret = recv_response(fd, agent, umad, IB_MAD_SIZE);
+
+ mad = umad_get_mad(umad);
+ status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F);
+ method = mad_get_field(mad, 0, IB_MAD_METHOD_F);
+ trid = mad_get_field64(mad, 0, IB_MAD_TRID_F);
+ attr_id = mad_get_field(mad, 0, IB_MAD_ATTRID_F);
+ attr_mod = mad_get_field(mad, 0, IB_MAD_ATTRMOD_F);
+ path_cnt = mad_get_field(mad, 0, IB_DRSMP_HOPCNT_F);
+ mad_get_array(mad, 0, IB_DRSMP_PATH_F, path);
+
+ if (method != IB_MAD_METHOD_GET)
+ return 0;
+
+ outstanding--;
+
+ if (ret < 0 || status) {
+ ERROR("error response 0x%016" PRIx64 ": attr_id %x"
+ ", attr_mod %x from %s with status %x\n", trid,
+ attr_id, attr_mod, print_path(path, path_cnt), status);
+ return -1;
+ }
+
+ node_id = trid & 0xffff;
+
+ VERBOSE("recv %016" PRIx64 ": attr %x, mod %x from %s\n", trid, attr_id,
+ attr_mod, print_path(path, path_cnt));
+
+ switch (attr_id) {
+ case IB_ATTR_NODE_INFO:
+ process_node(umad, node_id, fd, agent, path, path_cnt);
+ break;
+ case IB_ATTR_NODE_DESC:
+ process_node_desc(node_id, mad + IB_SMP_DATA_OFFS);
+ break;
+ case IB_ATTR_SWITCH_INFO:
+ process_switch_info(node_id, mad + IB_SMP_DATA_OFFS);
+ break;
+ case IB_ATTR_PORT_INFO:
+ process_port_info(umad, node_id, fd, agent, path, path_cnt);
+ break;
+ default:
+ VERBOSE("unsolicited response 0x%016" PRIx64 ": attr_id %x"
+ ", attr_mod %x\n", trid, attr_id, attr_mod);
+ return 0;
+ }
+
+ return ret;
+}
+
+static int discovery(int fd, int agent)
+{
+ uint8_t path[64] = { 0 };
+ void *umad;
+ int ret;
+
+ umad = malloc(IB_MAD_SIZE + umad_size());
+ if (!umad)
+ return -ENOMEM;
+
+ ret = query_node_info(fd, agent, umad, 0, path, 0);
+ if (ret < 0)
+ return ret;
+
+ while (outstanding)
+ if (recv_smp_resp(fd, agent, umad, path))
+ ret = 1;
+
+ free(umad);
+
+ return ret;
+}
+
+static int umad_discovery(char *card_name, unsigned int port_num)
+{
+ int fd, agent, ret;
+
+ ret = umad_init();
+ if (ret) {
+ ERROR("cannot init umad\n");
+ return -1;
+ }
+
+ fd = umad_open_port(card_name, port_num);
+ if (fd < 0) {
+ ERROR("cannot open umad port %s:%u: %s\n",
+ card_name ? card_name : "NULL", port_num,
+ strerror(errno));
+ return -1;
+ }
+
+ agent = umad_register(fd, IB_SMI_DIRECT_CLASS, 1, 0, NULL);
+ if (agent < 0) {
+ ERROR("cannot register SMI DR class for umad port %s:%u: %s\n",
+ card_name ? card_name : "NULL", port_num,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = discovery(fd, agent);
+ if (ret)
+ ERROR("Failed to discover.\n");
+
+ umad_unregister(fd, agent);
+ umad_close_port(fd);
+
+ umad_done();
+
+ return ret;
+}
+
+static void print_subnet()
+{
+ struct node *node;
+ struct port *local, *remote;
+ unsigned i, j;
+
+ for (i = 0; i < node_count; i++) {
+ node = node_array[i];
+ printf("%s %u \"%s-%016" PRIx64 "\" \t# %s\n",
+ node->is_switch ? "Switch" : "Ca", node->num_ports,
+ node->is_switch ? "S" : "H", node->guid,
+ node->node_desc);
+ for (j = 1; j <= node->num_ports; j++) {
+ local = &node->ports[j];
+ remote = local->remote;
+ if (!remote)
+ continue;
+ printf("[%u] \t\"%s-%016" PRIx64 "\"[%lu] \t# %s\n", j,
+ remote->node->is_switch ? "S" : "H",
+ remote->node->guid, remote - remote->node->ports,
+ remote->node->node_desc);
+ }
+ printf("\n");
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const struct option long_opts[] = {
+ {"Card", 1, 0, 'C'},
+ {"Port", 1, 0, 'P'},
+ {"timeout", 1, 0, 't'},
+ {"retries", 1, 0, 'r'},
+ {}
+ };
+ char *card_name = NULL;
+ unsigned int port_num = 0;
+ int ch, ret;
+
+ while (1) {
+ ch = getopt_long(argc, argv, "C:P:t:r:v", long_opts, NULL);
+ if (ch == -1)
+ break;
+ switch (ch) {
+ case 'C':
+ card_name = optarg;
+ break;
+ case 'P':
+ port_num = strtoul(optarg, NULL, 0);
+ break;
+ case 't':
+ timeout = strtoul(optarg, NULL, 0);
+ break;
+ case 'r':
+ retries = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default:
+ printf("usage: %s [-C card_name] [-P port_num]"
+ " [-t timeout] [-r retries] [-v[v]]\n", argv[0]);
+ exit(2);
+ break;
+ }
+ }
+
+ ret = umad_discovery(card_name, port_num);
+
+ print_subnet();
+
+ return ret;
+}
--
1.6.6.rc3
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2009-12-20 12:14 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20090813204306.dffc3237.weiny2@llnl.gov>
[not found] ` <20090816110200.GS25501@me>
[not found] ` <20090817083023.da17378b.weiny2@llnl.gov>
[not found] ` <20090823120609.GG9547@me>
[not found] ` <20090831170144.da0e7185.weiny2@llnl.gov>
[not found] ` <20090831170144.da0e7185.weiny2-i2BcT+NCU+M@public.gmane.org>
2009-10-23 17:45 ` [PATCH 4/5] infiniband-diags/libibnetdisc: Introduce a context object Sasha Khapyorsky
[not found] ` <20090826164026.8dcce4b2.weiny2@llnl.gov>
[not found] ` <20090826164026.8dcce4b2.weiny2-i2BcT+NCU+M@public.gmane.org>
2009-10-23 23:43 ` Multi-threaded diags (Was: Re: [PATCH 4/5] infiniband-diags/libibnetdisc: Introduce a context object.) Sasha Khapyorsky
2009-12-20 12:14 ` Sasha Khapyorsky [this message]
[not found] ` <20091220182809.f7e17fae.weiny2@llnl.gov>
[not found] ` <20091220182809.f7e17fae.weiny2-i2BcT+NCU+M@public.gmane.org>
2009-12-21 7:35 ` [PATCH] tests/subnet_discover: discover test utility Sasha Khapyorsky
2009-12-21 14:02 ` Hal Rosenstock
[not found] ` <f0e08f230912210602i5e3f528h2b0630420346db82-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-12-22 11:27 ` Sasha Khapyorsky
2009-12-28 9:22 ` Sasha Khapyorsky
2010-01-11 13:56 ` Hal Rosenstock
[not found] ` <f0e08f231001110556y7c47cc54oa3cfd5859f9a4e76-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-12 9:31 ` Sasha Khapyorsky
2010-01-13 20:11 ` Hal Rosenstock
[not found] ` <f0e08f231001131211y64489a51nd2621cefdb27ad25-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-16 19:36 ` Sasha Khapyorsky
2010-01-23 12:24 ` Hal Rosenstock
2010-01-21 20:38 ` Ira Weiny
[not found] ` <20100121123841.43df4cdc.weiny2-i2BcT+NCU+M@public.gmane.org>
2010-01-25 15:18 ` Sasha Khapyorsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091220121406.GF5262@me \
--to=sashak-smomgflxvozwk0htik3j/w@public.gmane.org \
--cc=chu11-i2BcT+NCU+M@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=weiny2-i2BcT+NCU+M@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox