All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
To: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
Cc: linux-rdma <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	Al Chu <chu11-i2BcT+NCU+M@public.gmane.org>
Subject: [PATCH] tests/subnet_discover: discover test utility
Date: Sun, 20 Dec 2009 14:14:06 +0200	[thread overview]
Message-ID: <20091220121406.GF5262@me> (raw)
In-Reply-To: <20091023234349.GK5764@me>


'subnet_discover' is simple test utility which implements "non-blocking"
discovery method where mads are sending "in parallel" (unlike the
current implementation of 'ibnetdiscover' and similar to how OpenSM
does). For this a recently discovered node id value is encoded as lower
16 bits of mad transaction id.

Signed-off-by: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---

Hi Ira,

On 01:43 Sat 24 Oct     , Sasha Khapyorsky wrote:
> > 
> > Current Master:        Threaded version:
> > real    0m9.149s        0m2.223s
> > user    0m0.016s        0m0.374s
> > sys     0m0.372s        0m1.056s
> > 
> > With that in mind...
> 
> Good. So what do you think due to which factor most of this performance
> gain was achieved? Due to using multiple threads or due to SMP queries
> parallelism? I would suspect that it is a parallelism.

For some purposes in ibsim/tests I wrote a simple utility
'subnet_discover', this works as single thread and utilizes a "parallel"
mad sending method and also uses libibumad for all mad
sending/receiving stuff.

I think that similar implementation in libibnetdisc (I can do it if we
are in agreement :)) would improve its performance.

Would you like to look at this?

Sasha

 tests/Makefile          |    2 +-
 tests/subnet_discover.c |  495 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 496 insertions(+), 1 deletions(-)
 create mode 100644 tests/subnet_discover.c

diff --git a/tests/Makefile b/tests/Makefile
index dd4cd55..bd415d8 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,4 +1,4 @@
-progs:= mcast_storm
+progs:= subnet_discover mcast_storm
 
 -include ../defs.mk
 
diff --git a/tests/subnet_discover.c b/tests/subnet_discover.c
new file mode 100644
index 0000000..a577cc7
--- /dev/null
+++ b/tests/subnet_discover.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2009 Voltaire, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <getopt.h>
+
+#include <infiniband/umad.h>
+#include <infiniband/mad.h>
+
+struct port {
+	struct node *node;
+	uint64_t guid;
+	struct port *remote;
+	uint8_t port_info[IB_SMP_DATA_SIZE];
+};
+
+struct node {
+	uint64_t guid;
+	unsigned num_ports;
+	unsigned is_switch;
+	uint8_t node_info[IB_SMP_DATA_SIZE];
+	uint8_t node_desc[IB_SMP_DATA_SIZE];
+	uint8_t switch_info[IB_SMP_DATA_SIZE];
+	struct port ports[];
+};
+
+static struct node *node_array[32 * 1024];
+static unsigned node_count = 0;
+static unsigned trid_cnt = 0;
+static unsigned outstanding = 0;
+static unsigned timeout = 100;
+static unsigned retries = 3;
+static unsigned verbose = 0;
+
+#define ERROR(fmt, ...) fprintf(stderr, "ERR: " fmt, ##__VA_ARGS__)
+#define VERBOSE(fmt, ...) if (verbose) fprintf(stderr, fmt, ##__VA_ARGS__)
+#define NOISE(fmt, ...) if (verbose > 1) fprintf(stderr, fmt, ##__VA_ARGS__)
+
+static const char *print_path(uint8_t path[], size_t path_cnt)
+{
+	static char buf[256];
+	int i, n = 0;
+	for (i = 0; i <= path_cnt; i++)
+		n += snprintf(buf + n, sizeof(buf) - n, "%u,", path[i]);
+	buf[n] = '\0';
+	return buf;
+}
+
+#define DBG_DUMP_FUNC(name) static void dbg_dump_##name(void *data) \
+{ \
+	char buf[2048]; \
+	mad_dump_##name(buf, sizeof(buf), data, IB_SMP_DATA_SIZE); \
+	NOISE("### "#name":\n%s\n", buf); \
+}
+
+DBG_DUMP_FUNC(nodeinfo);
+DBG_DUMP_FUNC(nodedesc);
+DBG_DUMP_FUNC(portinfo);
+DBG_DUMP_FUNC(switchinfo);
+
+static void build_umad_req(void *umad, uint8_t * path, unsigned path_cnt,
+			   uint64_t trid, uint8_t method,
+			   uint16_t attr_id, uint32_t attr_mod, uint64_t mkey)
+{
+	void *mad = umad_get_mad(umad);
+
+	memset(umad, 0, umad_size() + IB_MAD_SIZE);
+	umad_set_addr(umad, 0xffff, 0, 0, 0);
+	mad_set_field(mad, 0, IB_MAD_METHOD_F, method);
+	mad_set_field(mad, 0, IB_MAD_CLASSVER_F, 1);
+	mad_set_field(mad, 0, IB_MAD_MGMTCLASS_F, IB_SMI_DIRECT_CLASS);
+	mad_set_field(mad, 0, IB_MAD_BASEVER_F, 1);
+	mad_set_field(mad, 0, IB_DRSMP_HOPCNT_F, path_cnt);
+	mad_set_field(mad, 0, IB_DRSMP_HOPPTR_F, 0);
+	mad_set_field64(mad, 0, IB_MAD_TRID_F, trid);
+	mad_set_field(mad, 0, IB_DRSMP_DRDLID_F, 0xffff);
+	mad_set_field(mad, 0, IB_DRSMP_DRSLID_F, 0xffff);
+	mad_set_array(mad, 0, IB_DRSMP_PATH_F, path);
+	mad_set_field(mad, 0, IB_MAD_ATTRID_F, attr_id);
+	mad_set_field(mad, 0, IB_MAD_ATTRMOD_F, attr_mod);
+	mad_set_field64(mad, 0, IB_MAD_MKEY_F, mkey);
+}
+
+static int send_query(int fd, int agent, void *umad, unsigned node_id,
+		      uint8_t * path, size_t path_cnt, uint16_t attr_id,
+		      uint32_t attr_mod)
+{
+	uint64_t trid;
+	int ret;
+
+	trid = (trid_cnt++ << 16) | (node_id & 0xffff);
+	build_umad_req(umad, path, path_cnt, trid, IB_MAD_METHOD_GET, attr_id,
+		       attr_mod, 0);
+
+	ret = umad_send(fd, agent, umad, IB_MAD_SIZE, timeout, retries);
+	if (ret < 0) {
+		ERROR("umad_send failed: trid 0x%016" PRIx64
+		      ", attr_id %x, attr_mod %x: %s\n",
+		      trid, attr_id, attr_mod, strerror(errno));
+		return -1;
+	}
+
+	outstanding++;
+
+	VERBOSE("send %016" PRIx64 ": attr %x, mod %x to %s\n", trid, attr_id,
+		attr_mod, print_path(path, path_cnt));
+
+	return ret;
+}
+
+static int recv_response(int fd, int agent, uint8_t * umad, size_t length)
+{
+	int len = length, ret;
+
+	do {
+		ret = umad_recv(fd, umad, &len, timeout);
+	} while (ret >= 0 && ret != agent);
+
+	if (ret < 0 || umad_status(umad)) {
+		ERROR("umad_recv failed: umad status %x: %s\n",
+		      umad_status(umad), strerror(errno));
+		return -1;
+	}
+
+	return ret;
+}
+
+static int query_node_info(int fd, int agent, void *umad, unsigned node_id,
+			   uint8_t * path, size_t path_cnt)
+{
+	return send_query(fd, agent, umad, node_id, path, path_cnt,
+			  IB_ATTR_NODE_INFO, 0);
+}
+
+static int query_node_desc(int fd, int agent, void *umad, unsigned node_id,
+			   uint8_t * path, size_t path_cnt)
+{
+	return send_query(fd, agent, umad, node_id, path, path_cnt,
+			  IB_ATTR_NODE_DESC, 0);
+}
+
+static int query_switch_info(int fd, int agent, void *umad, unsigned node_id,
+			     uint8_t * path, size_t path_cnt)
+{
+	return send_query(fd, agent, umad, node_id, path, path_cnt,
+			  IB_ATTR_SWITCH_INFO, 0);
+}
+
+static int query_port_info(int fd, int agent, void *umad, unsigned node_id,
+			   uint8_t * path, size_t path_cnt, unsigned port_num)
+{
+	return send_query(fd, agent, umad, node_id, path, path_cnt,
+			  IB_ATTR_PORT_INFO, port_num);
+}
+
+static int add_node(uint8_t * node_info)
+{
+	struct node *node;
+	unsigned i, num_ports = mad_get_field(node_info, 0, IB_NODE_NPORTS_F);
+
+	node = malloc(sizeof(*node) + (num_ports + 1) * sizeof(node->ports[0]));
+	if (!node)
+		return -1;
+	memset(node, 0,
+	       sizeof(*node) + (num_ports + 1) * sizeof(node->ports[0]));
+
+	node->num_ports = num_ports;
+	node->guid = mad_get_field64(node_info, 0, IB_NODE_GUID_F);
+	node->is_switch = ((mad_get_field(node_info, 0, IB_NODE_TYPE_F)) ==
+			   IB_NODE_SWITCH);
+	memcpy(node->node_info, node_info, sizeof(node->node_info));
+	for (i = 0; i <= num_ports; i++)
+		node->ports[i].node = node;
+
+	node_array[node_count] = node;
+
+	return node_count++;
+}
+
+static int find_node(uint8_t * node_info)
+{
+	uint64_t guid = mad_get_field64(node_info, 0, IB_NODE_GUID_F);
+	unsigned i;
+
+	for (i = 0; i < node_count; i++)
+		if (node_array[i]->guid == guid)
+			return i;
+	return -1;
+}
+
+static int process_port_info(void *umad, unsigned node_id, int fd, int agent,
+			     uint8_t path[], size_t path_cnt)
+{
+	struct node *node = node_array[node_id];
+	struct port *port;
+	uint8_t *port_info = umad + umad_size() + IB_SMP_DATA_OFFS;
+	unsigned port_num, local_port;
+
+	dbg_dump_portinfo(port_info);
+
+	port_num = mad_get_field(umad_get_mad(umad), 0, IB_MAD_ATTRMOD_F);
+	local_port = mad_get_field(port_info, 0, IB_PORT_LOCAL_PORT_F);
+
+	port = &node->ports[port_num];
+	memcpy(port->port_info, port_info, sizeof(port->port_info));
+
+	if (port_num &&
+	    mad_get_field(port_info, 0, IB_PORT_PHYS_STATE_F) == 5 &&
+	    ((node->is_switch && port_num != local_port) ||
+	     (node_id == 0 && port_num == local_port))) {
+		path[++path_cnt] = port_num;
+		return query_node_info(fd, agent, umad, node_id, path,
+				       path_cnt);
+	}
+
+	return 0;
+}
+
+static int process_switch_info(unsigned node_id, uint8_t * switch_info)
+{
+	struct node *node = node_array[node_id];
+
+	dbg_dump_switchinfo(switch_info);
+	memcpy(node->switch_info, switch_info, sizeof(node->switch_info));
+
+	return 0;
+}
+
+static int process_node_desc(unsigned node_id, uint8_t * node_desc)
+{
+	struct node *node = node_array[node_id];
+
+	dbg_dump_nodedesc(node_desc);
+	memcpy(node->node_desc, node_desc, sizeof(node->node_desc));
+
+	return 0;
+}
+
+static void connect_ports(unsigned node1_id, unsigned port1_num,
+			  unsigned node2_id, unsigned port2_num)
+{
+	struct port *port1 = &node_array[node1_id]->ports[port1_num];
+	struct port *port2 = &node_array[node2_id]->ports[port2_num];
+	VERBOSE("connecting %u:%u <--> %u:%u\n",
+		node1_id, port1_num, node2_id, port2_num);
+	port1->remote = port2;
+	port2->remote = port1;
+}
+
+static int process_node(void *umad, unsigned remote_id, int fd, int agent,
+			uint8_t path[], size_t path_cnt)
+{
+	struct node *node;
+	uint8_t *node_info = umad_get_mad(umad) + IB_SMP_DATA_OFFS;
+	unsigned port_num = mad_get_field(node_info, 0, IB_NODE_LOCAL_PORT_F);
+	unsigned node_is_new = 0;
+	int i, id;
+
+	dbg_dump_nodeinfo(node_info);
+
+	if ((id = find_node(node_info)) < 0) {
+		id = add_node(node_info);
+		if (id < 0)
+			return -1;
+		node_is_new = 1;
+	}
+
+	node = node_array[id];
+
+	node->ports[port_num].guid =
+	    mad_get_field64(node_info, 0, IB_NODE_PORT_GUID_F);
+
+	if (id)			/* skip connect for very first node */
+		connect_ports(id, port_num, remote_id, path[path_cnt]);
+
+	if (!node_is_new)
+		return 0;
+
+	query_node_desc(fd, agent, umad, id, path, path_cnt);
+
+	if (node->is_switch)
+		query_switch_info(fd, agent, umad, id, path, path_cnt);
+
+	for (i = !node->is_switch; i <= node->num_ports; i++)
+		query_port_info(fd, agent, umad, id, path, path_cnt, i);
+
+	return 0;
+}
+
+static int recv_smp_resp(int fd, int agent, uint8_t * umad, uint8_t path[])
+{
+	void *mad;
+	uint64_t trid;
+	uint8_t method;
+	uint16_t status;
+	uint16_t attr_id;
+	uint32_t attr_mod;
+	size_t path_cnt;
+	unsigned node_id;
+	int ret;
+
+	ret = recv_response(fd, agent, umad, IB_MAD_SIZE);
+
+	mad = umad_get_mad(umad);
+	status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F);
+	method = mad_get_field(mad, 0, IB_MAD_METHOD_F);
+	trid = mad_get_field64(mad, 0, IB_MAD_TRID_F);
+	attr_id = mad_get_field(mad, 0, IB_MAD_ATTRID_F);
+	attr_mod = mad_get_field(mad, 0, IB_MAD_ATTRMOD_F);
+	path_cnt = mad_get_field(mad, 0, IB_DRSMP_HOPCNT_F);
+	mad_get_array(mad, 0, IB_DRSMP_PATH_F, path);
+
+	if (method != IB_MAD_METHOD_GET)
+		return 0;
+
+	outstanding--;
+
+	if (ret < 0 || status) {
+		ERROR("error response 0x%016" PRIx64 ": attr_id %x"
+		      ", attr_mod %x from %s with status %x\n", trid,
+		      attr_id, attr_mod, print_path(path, path_cnt), status);
+		return -1;
+	}
+
+	node_id = trid & 0xffff;
+
+	VERBOSE("recv %016" PRIx64 ": attr %x, mod %x from %s\n", trid, attr_id,
+		attr_mod, print_path(path, path_cnt));
+
+	switch (attr_id) {
+	case IB_ATTR_NODE_INFO:
+		process_node(umad, node_id, fd, agent, path, path_cnt);
+		break;
+	case IB_ATTR_NODE_DESC:
+		process_node_desc(node_id, mad + IB_SMP_DATA_OFFS);
+		break;
+	case IB_ATTR_SWITCH_INFO:
+		process_switch_info(node_id, mad + IB_SMP_DATA_OFFS);
+		break;
+	case IB_ATTR_PORT_INFO:
+		process_port_info(umad, node_id, fd, agent, path, path_cnt);
+		break;
+	default:
+		VERBOSE("unsolicited response 0x%016" PRIx64 ": attr_id %x"
+			", attr_mod %x\n", trid, attr_id, attr_mod);
+		return 0;
+	}
+
+	return ret;
+}
+
+static int discovery(int fd, int agent)
+{
+	uint8_t path[64] = { 0 };
+	void *umad;
+	int ret;
+
+	umad = malloc(IB_MAD_SIZE + umad_size());
+	if (!umad)
+		return -ENOMEM;
+
+	ret = query_node_info(fd, agent, umad, 0, path, 0);
+	if (ret < 0)
+		return ret;
+
+	while (outstanding)
+		if (recv_smp_resp(fd, agent, umad, path))
+			ret = 1;
+
+	free(umad);
+
+	return ret;
+}
+
+static int umad_discovery(char *card_name, unsigned int port_num)
+{
+	int fd, agent, ret;
+
+	ret = umad_init();
+	if (ret) {
+		ERROR("cannot init umad\n");
+		return -1;
+	}
+
+	fd = umad_open_port(card_name, port_num);
+	if (fd < 0) {
+		ERROR("cannot open umad port %s:%u: %s\n",
+		      card_name ? card_name : "NULL", port_num,
+		      strerror(errno));
+		return -1;
+	}
+
+	agent = umad_register(fd, IB_SMI_DIRECT_CLASS, 1, 0, NULL);
+	if (agent < 0) {
+		ERROR("cannot register SMI DR class for umad port %s:%u: %s\n",
+		      card_name ? card_name : "NULL", port_num,
+		      strerror(errno));
+		return -1;
+	}
+
+	ret = discovery(fd, agent);
+	if (ret)
+		ERROR("Failed to discover.\n");
+
+	umad_unregister(fd, agent);
+	umad_close_port(fd);
+
+	umad_done();
+
+	return ret;
+}
+
+static void print_subnet()
+{
+	struct node *node;
+	struct port *local, *remote;
+	unsigned i, j;
+
+	for (i = 0; i < node_count; i++) {
+		node = node_array[i];
+		printf("%s %u \"%s-%016" PRIx64 "\" \t# %s\n",
+		       node->is_switch ? "Switch" : "Ca", node->num_ports,
+		       node->is_switch ? "S" : "H", node->guid,
+		       node->node_desc);
+		for (j = 1; j <= node->num_ports; j++) {
+			local = &node->ports[j];
+			remote = local->remote;
+			if (!remote)
+				continue;
+			printf("[%u] \t\"%s-%016" PRIx64 "\"[%lu] \t# %s\n", j,
+			       remote->node->is_switch ? "S" : "H",
+			       remote->node->guid, remote - remote->node->ports,
+			       remote->node->node_desc);
+		}
+		printf("\n");
+	}
+}
+
+int main(int argc, char **argv)
+{
+	const struct option long_opts[] = {
+		{"Card", 1, 0, 'C'},
+		{"Port", 1, 0, 'P'},
+		{"timeout", 1, 0, 't'},
+		{"retries", 1, 0, 'r'},
+		{}
+	};
+	char *card_name = NULL;
+	unsigned int port_num = 0;
+	int ch, ret;
+
+	while (1) {
+		ch = getopt_long(argc, argv, "C:P:t:r:v", long_opts, NULL);
+		if (ch == -1)
+			break;
+		switch (ch) {
+		case 'C':
+			card_name = optarg;
+			break;
+		case 'P':
+			port_num = strtoul(optarg, NULL, 0);
+			break;
+		case 't':
+			timeout = strtoul(optarg, NULL, 0);
+			break;
+		case 'r':
+			retries = strtoul(optarg, NULL, 0);
+			break;
+		case 'v':
+			verbose++;
+			break;
+		default:
+			printf("usage: %s [-C card_name] [-P port_num]"
+			       " [-t timeout] [-r retries] [-v[v]]\n", argv[0]);
+			exit(2);
+			break;
+		}
+	}
+
+	ret = umad_discovery(card_name, port_num);
+
+	print_subnet();
+
+	return ret;
+}
-- 
1.6.6.rc3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2009-12-20 12:14 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20090813204306.dffc3237.weiny2@llnl.gov>
     [not found] ` <20090816110200.GS25501@me>
     [not found]   ` <20090817083023.da17378b.weiny2@llnl.gov>
     [not found]     ` <20090823120609.GG9547@me>
     [not found]       ` <20090831170144.da0e7185.weiny2@llnl.gov>
     [not found]         ` <20090831170144.da0e7185.weiny2-i2BcT+NCU+M@public.gmane.org>
2009-10-23 17:45           ` [PATCH 4/5] infiniband-diags/libibnetdisc: Introduce a context object Sasha Khapyorsky
     [not found]       ` <20090826164026.8dcce4b2.weiny2@llnl.gov>
     [not found]         ` <20090826164026.8dcce4b2.weiny2-i2BcT+NCU+M@public.gmane.org>
2009-10-23 23:43           ` Multi-threaded diags (Was: Re: [PATCH 4/5] infiniband-diags/libibnetdisc: Introduce a context object.) Sasha Khapyorsky
2009-12-20 12:14             ` Sasha Khapyorsky [this message]
     [not found]               ` <20091220182809.f7e17fae.weiny2@llnl.gov>
     [not found]                 ` <20091220182809.f7e17fae.weiny2-i2BcT+NCU+M@public.gmane.org>
2009-12-21  7:35                   ` [PATCH] tests/subnet_discover: discover test utility Sasha Khapyorsky
2009-12-21 14:02                     ` Hal Rosenstock
     [not found]                       ` <f0e08f230912210602i5e3f528h2b0630420346db82-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-12-22 11:27                         ` Sasha Khapyorsky
2009-12-28  9:22                     ` Sasha Khapyorsky
2010-01-11 13:56                       ` Hal Rosenstock
     [not found]                         ` <f0e08f231001110556y7c47cc54oa3cfd5859f9a4e76-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-12  9:31                           ` Sasha Khapyorsky
2010-01-13 20:11                             ` Hal Rosenstock
     [not found]                               ` <f0e08f231001131211y64489a51nd2621cefdb27ad25-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-01-16 19:36                                 ` Sasha Khapyorsky
2010-01-23 12:24                                   ` Hal Rosenstock
2010-01-21 20:38                                 ` Ira Weiny
     [not found]                                   ` <20100121123841.43df4cdc.weiny2-i2BcT+NCU+M@public.gmane.org>
2010-01-25 15:18                                     ` Sasha Khapyorsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091220121406.GF5262@me \
    --to=sashak-smomgflxvozwk0htik3j/w@public.gmane.org \
    --cc=chu11-i2BcT+NCU+M@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=weiny2-i2BcT+NCU+M@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.