All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perftest/rdma_bw for XRC
@ 2010-07-30 20:11 frank zago
  0 siblings, 0 replies; only message in thread
From: frank zago @ 2010-07-30 20:11 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 243 bytes --]

Hello,

This is a sample code to use rdma_bw with XRC, both with rdma CM and out of band connection.
This modified rdma_bw code, uses a 1 to 1 connection to demonstrate that RDMA CM can establish
a connection for XRC QPs.

Regards,
  frank.



[-- Attachment #2: rdma-bw-xrc-v1.diff --]
[-- Type: text/x-patch, Size: 12752 bytes --]

diff -ruw /usr/src/redhat/BUILD/perftest-1.2.3/rdma_bw.c perftest-1.2.3/rdma_bw.c
--- /usr/src/redhat/BUILD/perftest-1.2.3/rdma_bw.c	2009-12-10 08:33:52.000000000 -0600
+++ perftest-1.2.3/rdma_bw.c	2010-07-30 14:54:05.000000000 -0500
@@ -52,6 +52,7 @@
 #include <arpa/inet.h>
 #include <byteswap.h>
 #include <time.h>
+#include <fcntl.h>
 
 #include <infiniband/verbs.h>
 #include <rdma/rdma_cma.h>
@@ -77,6 +78,11 @@
 	int                 tx_depth;
 	struct ibv_sge      list;
 	struct ibv_send_wr  wr;
+
+	/* XRC support. */
+	struct ibv_xrc_domain *xrc_domain;
+	struct ibv_srq *xrc_srq;
+	uint32_t xrc_rcv_qpn;
 };
 
 struct pingpong_dest {
@@ -85,6 +91,7 @@
 	int psn;
 	unsigned rkey;
 	unsigned long long vaddr;
+	uint32_t xrc_srq_num;
 };
 
 struct pp_data {
@@ -93,6 +100,7 @@
 	unsigned            		size;
 	int                 		tx_depth;
 	int				use_cma;
+	int				use_xrc;
 	int 		    		sockfd;
 	char				*servername;
 	struct pingpong_dest		my_dest;
@@ -107,7 +115,7 @@
 static void pp_wait_for_done(struct pingpong_context *);
 static void pp_send_done(struct pingpong_context *);
 static void pp_wait_for_start(struct pingpong_context *);
-static void pp_send_start(struct pingpong_context *);
+static void pp_send_start(struct pingpong_context *, struct pp_data *);
 static void pp_close_cma(struct pp_data );
 static struct pingpong_context *pp_init_ctx(void *, struct pp_data *);
 
@@ -283,13 +291,14 @@
 
 static int pp_client_exch_dest(struct pp_data *data)
 {
-	char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
+	char msg[sizeof "0000:000000:000000:00000000:0000000000000000:000000"];
 	int parsed;
 	
 	if (!data->use_cma) {
-		sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", data->my_dest.lid, 
+		sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx:%06x", data->my_dest.lid, 
 				data->my_dest.qpn, data->my_dest.psn,
-				data->my_dest.rkey, data->my_dest.vaddr);
+				data->my_dest.rkey, data->my_dest.vaddr, 0);
+
 		if (write(data->sockfd, msg, sizeof msg) != sizeof msg) {
 			perror("client write");
 			fprintf(stderr, "%d:%s: Couldn't send local address\n", 
@@ -310,11 +319,12 @@
 		if (!data->rem_dest)
 			goto err;
 	
-		parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &data->rem_dest->lid,
+		parsed = sscanf(msg, "%x:%x:%x:%x:%Lx:%x", &data->rem_dest->lid,
 				&data->rem_dest->qpn, &data->rem_dest->psn,
-				&data->rem_dest->rkey, &data->rem_dest->vaddr);
+						&data->rem_dest->rkey, &data->rem_dest->vaddr,
+						&data->rem_dest->xrc_srq_num);
 	
-		if (parsed != 5) {
+		if (parsed != 6) {
 			fprintf(stderr, "%d:%s: Couldn't parse line <%.*s>\n",
 					pid, __func__, (int)sizeof msg, msg);
 			free(data->rem_dest);
@@ -404,6 +414,11 @@
 		conn_param.initiator_depth = 1;
 		conn_param.private_data = &data->my_dest;
 		conn_param.private_data_len = sizeof(data->my_dest);
+
+		if (data->use_xrc) {
+			data->my_dest.xrc_srq_num = ctx->xrc_srq->xrc_srq_num;
+		}
+
 		if (rdma_accept(child_cm_id, &conn_param)) {
 			fprintf(stderr, "%d:%s: rdma_accept failed\n", pid, __func__);
 			goto err1;
@@ -476,7 +491,7 @@
 
 static int pp_server_exch_dest(struct pp_data *data)
 {
-	char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
+	char msg[sizeof "0000:000000:000000:00000000:0000000000000000:000000"];
 	int parsed;
 	int n;
 	
@@ -495,19 +510,22 @@
 		if (!data->rem_dest)
 			goto err;
 	
-		parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &data->rem_dest->lid,
+		parsed = sscanf(msg, "%x:%x:%x:%x:%Lx:%x", &data->rem_dest->lid,
 			      &data->rem_dest->qpn, &data->rem_dest->psn,
-			      &data->rem_dest->rkey, &data->rem_dest->vaddr);
-		if (parsed != 5) {
+						&data->rem_dest->rkey, &data->rem_dest->vaddr,
+						&data->rem_dest->xrc_srq_num);
+		if (parsed != 6) {
 			fprintf(stderr, "%d:%s: Couldn't parse line <%.*s>\n", pid,
 						 __func__, (int)sizeof msg, msg);
 			free(data->rem_dest);
 			goto err;
 		}
 	
-		sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", data->my_dest.lid,
+		sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx:%06x", data->my_dest.lid,
 					 data->my_dest.qpn, data->my_dest.psn,
-					 data->my_dest.rkey, data->my_dest.vaddr);
+				data->my_dest.rkey, data->my_dest.vaddr,
+				data->my_dest.xrc_srq_num);
+
 		if (write(data->sockfd, msg, sizeof msg) != sizeof msg) {
 			perror("server write");
 			fprintf(stderr, "%d:%s: Couldn't send local address\n", 
@@ -522,13 +540,27 @@
 	return 1;
 }
 
+static int my_modify_qp(struct pingpong_context *ctx, struct pp_data *data, struct ibv_qp_attr *attr,
+						int attr_mask)
+{
+	int rc;
+
+	if (data->use_xrc && ctx->xrc_rcv_qpn)
+		rc = ibv_modify_xrc_rcv_qp(ctx->xrc_domain, ctx->xrc_rcv_qpn,
+								   attr, attr_mask);
+	else
+		rc = ibv_modify_qp(ctx->qp, attr, attr_mask);
+
+	return rc;
+}
+
 static struct pingpong_context *pp_init_ctx(void *ptr, struct pp_data *data)
 {
 	struct pingpong_context *ctx;
 	struct ibv_device *ib_dev;
 	struct rdma_cm_id *cm_id = NULL;
 
-	ctx = malloc(sizeof *ctx);
+	ctx = calloc(1, sizeof *ctx);
 	if (!ctx)
 		return NULL;
 
@@ -601,8 +633,35 @@
 		return NULL;
 	}
 
+	if (data->use_xrc) {
+		struct ibv_srq_init_attr srq_init_attr;
+
+		/* Open XRC domain. */
+		int fd = -1;
+		ctx->xrc_domain = ibv_open_xrc_domain(ctx->context, fd, O_CREAT);
+		if (!ctx->xrc_domain) {
+			fprintf(stderr, "%d:%s: Couldn't create XRC domain.\n", pid, __func__);
+			return NULL;
+		}
+
+		srq_init_attr.srq_context = data->ib_dev;
+		srq_init_attr.attr.max_wr = 1;
+		srq_init_attr.attr.max_sge = 1;
+		srq_init_attr.attr.srq_limit = 30; /* should be ignored */
+
+		if (!data->servername) {
+			ctx->xrc_srq = ibv_create_xrc_srq(ctx->pd,
+											  ctx->xrc_domain,
+											  ctx->rcq,
+											  &srq_init_attr);
+			if (!ctx->xrc_srq) {
+				fprintf(stderr, "Failed to create SRQ.\n");
+				return NULL;
+			}
+		}
+	}
 
-	struct ibv_qp_init_attr attr = {
+	struct ibv_qp_init_attr init_attr = {
 		.send_cq = ctx->scq,
 		.recv_cq = ctx->rcq,
 		.cap     = {
@@ -614,32 +673,55 @@
 			.max_recv_sge = 1,
 			.max_inline_data = 0
 		},
-		.qp_type = IBV_QPT_RC
 	};
 
+	if (data->use_xrc) {
+		init_attr.qp_type = IBV_QPT_XRC;
+		init_attr.xrc_domain = ctx->xrc_domain;
+
+		if (!data->servername)
+			init_attr.cap.max_send_wr = 0; /* important */
+
+	} else {
+		init_attr.qp_type = IBV_QPT_RC;
+	}
+	
 	if (data->use_cma) {
-		if (rdma_create_qp(cm_id, ctx->pd, &attr)) {
+		if (rdma_create_qp(cm_id, ctx->pd, &init_attr)) {
 			fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
 			return NULL;
 		}
+		
 		ctx->qp = cm_id->qp;
+		
+		/* Don't post anything on the XRC send side. */
+		if (!(data->use_xrc && data->servername))
 		pp_post_recv(ctx);
-		return ctx;
+
 	} else {
-		ctx->qp = ibv_create_qp(ctx->pd, &attr);
+		if (data->use_xrc && !data->servername) {
+			/* Server. Create XRC receive QP. */
+			if (ibv_create_xrc_rcv_qp(&init_attr, &ctx->xrc_rcv_qpn)) {
+				fprintf(stderr, "%d:%s: Failed to create XRC receive QP.\n", pid, __func__);
+				return NULL;
+			}
+		} else {
+			ctx->qp = ibv_create_qp(ctx->pd, &init_attr);
+
 		if (!ctx->qp)  {
 			fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
 			return NULL;
 		}
-		{
-			struct ibv_qp_attr attr;
+		}
 	
-			attr.qp_state        = IBV_QPS_INIT;
-			attr.pkey_index      = 0;
-			attr.port_num        = data->ib_port;
-			attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE;
+		struct ibv_qp_attr attr = {
+			.qp_state        = IBV_QPS_INIT,
+			.pkey_index      = 0,
+			.port_num        = data->ib_port,
+			.qp_access_flags = IBV_ACCESS_REMOTE_WRITE,
+		};
 	
-			if (ibv_modify_qp(ctx->qp, &attr,
+		if (my_modify_qp(ctx, data, &attr,
 					IBV_QP_STATE              |
 					IBV_QP_PKEY_INDEX         |
 					IBV_QP_PORT               |
@@ -653,8 +735,6 @@
 		return ctx;
 	}
 	
-}
-
 static int pp_connect_ctx(struct pingpong_context *ctx, struct pp_data data)			  
 {
 	struct ibv_qp_attr attr;
@@ -671,7 +751,7 @@
 	attr.ah_attr.sl         = sl;
 	attr.ah_attr.src_path_bits = 0;
 	attr.ah_attr.port_num   = data.ib_port;
-	if (ibv_modify_qp(ctx->qp, &attr,
+	if (my_modify_qp(ctx, &data, &attr,
 			  IBV_QP_STATE              |
 			  IBV_QP_AV                 |
 			  IBV_QP_PATH_MTU           |
@@ -689,7 +769,7 @@
 	attr.rnr_retry 	    = 7;
 	attr.sq_psn 	    =  data.my_dest.psn;
 	attr.max_rd_atomic  = 1;
-	if (ibv_modify_qp(ctx->qp, &attr,
+	if (my_modify_qp(ctx, &data, &attr,
 			  IBV_QP_STATE              |
 			  IBV_QP_TIMEOUT            |
 			  IBV_QP_RETRY_CNT          |
@@ -717,7 +797,11 @@
         wr.sg_list = &list;
         wr.num_sge = 1;
 
+		if (ctx->xrc_srq) {
+			rc = ibv_post_srq_recv(ctx->xrc_srq, &wr, &bad_wr);
+		} else {
         rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
+		}
         if (rc) {
                 perror("ibv_post_recv");
                 fprintf(stderr, "%d:%s: ibv_post_recv failed %d\n", pid,
@@ -761,6 +845,7 @@
 	ctx->wr.opcode     = IBV_WR_SEND;
 	ctx->wr.send_flags = IBV_SEND_SIGNALED;
 	ctx->wr.next       = NULL;
+
 	if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
 		fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
 		return;
@@ -803,7 +888,7 @@
 	pp_post_recv(ctx);
 }
 
-static void pp_send_start(struct pingpong_context *ctx)
+static void pp_send_start(struct pingpong_context *ctx, struct pp_data *data)
 {
 	struct ibv_send_wr *bad_wr;
 	struct ibv_wc wc;
@@ -818,6 +903,10 @@
 	ctx->wr.opcode     = IBV_WR_SEND;
 	ctx->wr.send_flags = IBV_SEND_SIGNALED;
 	ctx->wr.next       = NULL;
+
+	if (data->use_xrc)
+		ctx->wr.xrc_remote_srq_num = data->rem_dest->xrc_srq_num;
+
 	if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
 		fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
 		return;
@@ -964,10 +1053,11 @@
 			{ .name = "sl",             .has_arg = 1, .val = 'S' },
 			{ .name = "bidirectional",  .has_arg = 0, .val = 'b' },
 			{ .name = "cma", 	    .has_arg = 0, .val = 'c' },
+			{ .name = "xrc", 	    .has_arg = 0, .val = 'x' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:n:t:S:bc", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:n:t:S:bcx", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -1026,6 +1116,11 @@
 		case 'c':
 			data.use_cma = 1;
 			break;
+
+		case 'x':
+			data.use_xrc = 1;
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -1039,6 +1134,13 @@
 		return 1;
 	}
 
+#if 0
+	if (data.use_cma && data.use_xrc) {
+		fprintf(stderr, "RDMA CM and XRC are not compatible.\n");
+		return 1;
+	}
+#endif
+
 	/* Get the PID and prepend it to every output on stdout/stderr
 	 * This helps to parse output when multiple client/server are
 	 * run from single host
@@ -1112,7 +1214,16 @@
 						pid, __func__);
 			return 1;
 		}
+
+		if (data.use_xrc && !data.servername) {
+			data.my_dest.qpn = ctx->xrc_rcv_qpn;
+			data.my_dest.xrc_srq_num = ctx->xrc_srq->xrc_srq_num;
+		}
+		else {
 		data.my_dest.qpn = ctx->qp->qp_num;
+			data.my_dest.xrc_srq_num = 0;
+		}
+
 		data.my_dest.psn = lrand48() & 0xffffff;
 		data.my_dest.rkey = ctx->mr->rkey;
 		data.my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
@@ -1129,14 +1240,20 @@
 	}
 
 	printf("%d: Local address:  LID %#04x, QPN %#06x, PSN %#06x "
-			"RKey %#08x VAddr %#016Lx\n", pid, 
+			"RKey %#08x VAddr %#016Lx", pid, 
 			data.my_dest.lid, data.my_dest.qpn, data.my_dest.psn,
 			data.my_dest.rkey, data.my_dest.vaddr);	
+	if (data.use_xrc && !data.servername)
+		printf(", XRCSRQ %#06x", data.my_dest.xrc_srq_num);
+	printf("\n");
 
 	printf("%d: Remote address: LID %#04x, QPN %#06x, PSN %#06x, "
-			"RKey %#08x VAddr %#016Lx\n\n", pid, 
+			"RKey %#08x VAddr %#016Lx", pid, 
 			data.rem_dest->lid, data.rem_dest->qpn, data.rem_dest->psn,
 			data.rem_dest->rkey, data.rem_dest->vaddr);
+	if (data.use_xrc && data.servername)
+		printf(", XRCSRQ %#06x", data.rem_dest->xrc_srq_num);
+	printf("\n\n");
 
 	if (data.use_cma) {
 		/*
@@ -1144,7 +1261,7 @@
 		 * the first message (MPA requirement).
 		 */
 		if (data.servername) {			
-			pp_send_start(ctx);
+			pp_send_start(ctx, &data);
 		} else {
 			pp_wait_for_start(ctx);
 		}
@@ -1168,6 +1285,7 @@
 	if (!data.servername && !duplex) {
 		if (data.use_cma) {
 			pp_wait_for_done(ctx);
+			if (!data.use_xrc)
                         pp_send_done(ctx);
 			pp_close_cma(data);
 		} else {
@@ -1189,6 +1307,7 @@
 	ctx->wr.opcode     = IBV_WR_RDMA_WRITE;
 	ctx->wr.send_flags = IBV_SEND_SIGNALED;
 	ctx->wr.next       = NULL;
+	ctx->wr.xrc_remote_srq_num = data.rem_dest->xrc_srq_num;
 
 	scnt = 0;
 	ccnt = 0;
@@ -1254,7 +1373,9 @@
 
 	if (data.use_cma) {
 		/* This is racy when duplex mode is used*/
+		if (!(data.use_xrc && !data.servername))
 		pp_send_done(ctx);
+		if (!(data.use_xrc && data.servername))
                	pp_wait_for_done(ctx);
 		pp_close_cma(data);
 	} else {



^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2010-07-30 20:11 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-07-30 20:11 [PATCH] perftest/rdma_bw for XRC frank zago

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.