All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org>
To: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
Cc: "linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org"
	<linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: Re: [PATCH] ibnetdisc: Separate calls to umad and mad layer to avoid race condition on response MAD's
Date: Sat, 22 May 2010 17:40:10 +0300	[thread overview]
Message-ID: <20100522144010.GR28549@me> (raw)
In-Reply-To: <20100511164812.e16e41e6.weiny2-i2BcT+NCU+M@public.gmane.org>

On 16:48 Tue 11 May     , Ira Weiny wrote:
> 
> From: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>
> Date: Tue, 11 May 2010 15:36:08 -0700
> Subject: [PATCH] ibnetdisc: Separate calls to umad and mad layer to avoid race condition on response MAD's
> 
> 	Specify CA/Port to use which allows parallel scanning to other operations.
> 
> Signed-off-by: Ira Weiny <weiny2-i2BcT+NCU+M@public.gmane.org>

Applied. Thanks.

However see a minor comment below.

> ---
>  .../libibnetdisc/include/infiniband/ibnetdisc.h    |   15 ++--
>  infiniband-diags/libibnetdisc/src/ibnetdisc.c      |   52 +++++++-----
>  infiniband-diags/libibnetdisc/src/internal.h       |   11 ++-
>  infiniband-diags/libibnetdisc/src/query_smp.c      |   83 ++++++++++++++++----
>  infiniband-diags/libibnetdisc/test/testleaks.c     |   16 +---
>  infiniband-diags/src/iblinkinfo.c                  |    8 +-
>  infiniband-diags/src/ibnetdiscover.c               |   14 +---
>  infiniband-diags/src/ibqueryerrors.c               |   11 ++-
>  8 files changed, 134 insertions(+), 76 deletions(-)

[snip]

> diff --git a/infiniband-diags/libibnetdisc/src/internal.h b/infiniband-diags/libibnetdisc/src/internal.h
> index 2cfde02..d037a60 100644
> --- a/infiniband-diags/libibnetdisc/src/internal.h
> +++ b/infiniband-diags/libibnetdisc/src/internal.h
> @@ -54,6 +54,8 @@
>  #define MAXHOPS         63
>  
>  #define DEFAULT_MAX_SMP_ON_WIRE 2
> +#define DEFAULT_TIMEOUT 1000
> +#define DEFAULT_RETRIES 3
>  
>  typedef struct ibnd_scan {
>  	ib_portid_t selfportid;
> @@ -76,16 +78,19 @@ struct ibnd_smp {
>  
>  struct smp_engine {
>  	struct ibmad_port *ibmad_port;

After this patch ibmad_port is not used by smp_engine, but instead by an
"upper" layer (ibnetdisc). So I would suggest to move this there (for
example to be a member of scan struct).

Sasha

> +	int umad_fd;
> +	int smi_agent;
> +	int smi_dir_agent;
>  	ibnd_smp_t *smp_queue_head;
>  	ibnd_smp_t *smp_queue_tail;
>  	void *user_data;
>  	cl_qmap_t smps_on_wire;
> -	int max_smps_on_wire;
> +	struct ibnd_config *cfg;
>  	unsigned total_smps;
>  };
>  
> -void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port,
> -		     void *user_data, int max_smps_on_wire);
> +int smp_engine_init(smp_engine_t * engine, char * ca_name, int ca_port,
> +		    void *user_data, ibnd_config_t *cfg);
>  int issue_smp(smp_engine_t * engine, ib_portid_t * portid,
>  	      unsigned attrid, unsigned mod, smp_comp_cb_t cb, void *cb_data);
>  int process_mads(smp_engine_t * engine);
> diff --git a/infiniband-diags/libibnetdisc/src/query_smp.c b/infiniband-diags/libibnetdisc/src/query_smp.c
> index 7234844..4dbfa0d 100644
> --- a/infiniband-diags/libibnetdisc/src/query_smp.c
> +++ b/infiniband-diags/libibnetdisc/src/query_smp.c
> @@ -61,25 +61,32 @@ static ibnd_smp_t *get_smp(smp_engine_t * engine)
>  	return rc;
>  }
>  
> -static int send_smp(ibnd_smp_t * smp, struct ibmad_port *srcport)
> +static int send_smp(ibnd_smp_t * smp, smp_engine_t * engine)
>  {
>  	int rc = 0;
>  	uint8_t umad[1024];
>  	ib_rpc_t *rpc = &smp->rpc;
> +	int agent = 0;
>  
>  	memset(umad, 0, umad_size() + IB_MAD_SIZE);
>  
> +	if (rpc->mgtclass == IB_SMI_CLASS) {
> +		agent = engine->smi_agent;
> +	} else if (rpc->mgtclass == IB_SMI_DIRECT_CLASS) {
> +		agent = engine->smi_dir_agent;
> +	} else {
> +		IBND_ERROR("Invalid class for RPC\n");
> +		return (-EIO);
> +	}
> +
>  	if ((rc = mad_build_pkt(umad, &smp->rpc, &smp->path, NULL, NULL))
>  	    < 0) {
>  		IBND_ERROR("mad_build_pkt failed; %d\n", rc);
>  		return rc;
>  	}
>  
> -	if ((rc = umad_send(mad_rpc_portid(srcport),
> -			    mad_rpc_class_agent(srcport, rpc->mgtclass),
> -			    umad, IB_MAD_SIZE,
> -			    mad_get_timeout(srcport, rpc->timeout),
> -			    mad_get_retries(srcport))) < 0) {
> +	if ((rc = umad_send(engine->umad_fd, agent, umad, IB_MAD_SIZE,
> +			    engine->cfg->timeout_ms, engine->cfg->retries)) < 0) {
>  		IBND_ERROR("send failed; %d\n", rc);
>  		return rc;
>  	}
> @@ -91,12 +98,13 @@ static int process_smp_queue(smp_engine_t * engine)
>  {
>  	int rc = 0;
>  	ibnd_smp_t *smp;
> -	while (cl_qmap_count(&engine->smps_on_wire) < engine->max_smps_on_wire) {
> +	while (cl_qmap_count(&engine->smps_on_wire)
> +	       < engine->cfg->max_smps) {
>  		smp = get_smp(engine);
>  		if (!smp)
>  			return 0;
>  
> -		if ((rc = send_smp(smp, engine->ibmad_port)) != 0) {
> +		if ((rc = send_smp(smp, engine)) != 0) {
>  			free(smp);
>  			return rc;
>  		}
> @@ -122,7 +130,7 @@ int issue_smp(smp_engine_t * engine, ib_portid_t * portid,
>  	smp->rpc.method = IB_MAD_METHOD_GET;
>  	smp->rpc.attr.id = attrid;
>  	smp->rpc.attr.mod = mod;
> -	smp->rpc.timeout = mad_get_timeout(engine->ibmad_port, 0);
> +	smp->rpc.timeout = engine->cfg->timeout_ms;
>  	smp->rpc.datasz = IB_SMP_DATA_SIZE;
>  	smp->rpc.dataoffs = IB_SMP_DATA_OFFS;
>  	smp->rpc.trid = mad_trid();
> @@ -153,7 +161,7 @@ static int process_one_recv(smp_engine_t * engine)
>  	memset(umad, 0, sizeof(umad));
>  
>  	/* wait for the next message */
> -	if ((rc = umad_recv(mad_rpc_portid(engine->ibmad_port), umad, &length,
> +	if ((rc = umad_recv(engine->umad_fd, umad, &length,
>  			    0)) < 0) {
>  		if (rc == -EWOULDBLOCK)
>  			return 0;
> @@ -190,14 +198,58 @@ error:
>  	return rc;
>  }
>  
> -void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port,
> -		     void *user_data, int max_smps_on_wire)
> +int smp_engine_init(smp_engine_t * engine, char * ca_name, int ca_port,
> +		    void *user_data, ibnd_config_t *cfg)
>  {
> +	int nc = 2;
> +	int mc[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS };
> +
>  	memset(engine, 0, sizeof(*engine));
> -	engine->ibmad_port = ibmad_port;
> +
> +	engine->ibmad_port = mad_rpc_open_port(ca_name, ca_port, mc, nc);
> +	if (!engine->ibmad_port) {
> +		IBND_ERROR("can't open MAD port (%s:%d)\n", ca_name, ca_port);
> +		return -EIO;
> +	}
> +	mad_rpc_set_timeout(engine->ibmad_port, cfg->timeout_ms);
> +	mad_rpc_set_retries(engine->ibmad_port, cfg->retries);
> +
> +	if (umad_init() < 0) {
> +		IBND_ERROR("umad_init failed\n");
> +		mad_rpc_close_port(engine->ibmad_port);
> +		return -EIO;
> +	}
> +
> +	engine->umad_fd = umad_open_port(ca_name, ca_port);
> +	if (engine->umad_fd < 0) {
> +		IBND_ERROR("can't open UMAD port (%s:%d)\n", ca_name, ca_port);
> +		mad_rpc_close_port(engine->ibmad_port);
> +		return -EIO;
> +	}
> +
> +	if ((engine->smi_agent = umad_register(engine->umad_fd,
> +	     IB_SMI_CLASS, 1, 0, 0)) < 0) {
> +		IBND_ERROR("Failed to register SMI agent on (%s:%d)\n",
> +			   ca_name, ca_port);
> +		goto eio_close;
> +	}
> +
> +	if ((engine->smi_dir_agent = umad_register(engine->umad_fd,
> +	     IB_SMI_DIRECT_CLASS, 1, 0, 0)) < 0) {
> +		IBND_ERROR("Failed to register SMI_DIRECT agent on (%s:%d)\n",
> +			   ca_name, ca_port);
> +		goto eio_close;
> +	}
> +
>  	engine->user_data = user_data;
>  	cl_qmap_init(&engine->smps_on_wire);
> -	engine->max_smps_on_wire = max_smps_on_wire;
> +	engine->cfg = cfg;
> +	return (0);
> +
> +eio_close:
> +	mad_rpc_close_port(engine->ibmad_port);
> +	umad_close_port(engine->umad_fd);
> +	return (-EIO);
>  }
>  
>  void smp_engine_destroy(smp_engine_t * engine)
> @@ -221,6 +273,9 @@ void smp_engine_destroy(smp_engine_t * engine)
>  		cl_qmap_remove_item(&engine->smps_on_wire, item);
>  		free(item);
>  	}
> +
> +	umad_close_port(engine->umad_fd);
> +	mad_rpc_close_port(engine->ibmad_port);
>  }
>  
>  int process_mads(smp_engine_t * engine)
> diff --git a/infiniband-diags/libibnetdisc/test/testleaks.c b/infiniband-diags/libibnetdisc/test/testleaks.c
> index da2fc0a..9a91f50 100644
> --- a/infiniband-diags/libibnetdisc/test/testleaks.c
> +++ b/infiniband-diags/libibnetdisc/test/testleaks.c
> @@ -54,8 +54,6 @@
>  char *argv0 = "iblinkinfotest";
>  static FILE *f;
>  
> -static int timeout_ms = 500;
> -
>  void usage(void)
>  {
>  	fprintf(stderr,
> @@ -88,9 +86,6 @@ int main(int argc, char **argv)
>  	ib_portid_t port_id;
>  	int iters = -1;
>  
> -	struct ibmad_port *ibmad_port;
> -	int mgmt_classes[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS };
> -
>  	static char const str_opts[] = "S:D:n:C:P:t:shuf:i:";
>  	static const struct option long_opts[] = {
>  		{"S", 1, 0, 'S'},
> @@ -139,7 +134,7 @@ int main(int argc, char **argv)
>  			iters = (int)strtol(optarg, NULL, 0);
>  			break;
>  		case 't':
> -			timeout_ms = strtoul(optarg, 0, 0);
> +			config.timeout_ms = strtoul(optarg, 0, 0);
>  			break;
>  		case 'S':
>  			guid = (uint64_t) strtoull(optarg, 0, 0);
> @@ -152,15 +147,11 @@ int main(int argc, char **argv)
>  	argc -= optind;
>  	argv += optind;
>  
> -	ibmad_port = mad_rpc_open_port(ca, ca_port, mgmt_classes, 2);
> -
> -	mad_rpc_set_timeout(ibmad_port, timeout_ms);
> -
>  	while (iters == -1 || iters-- > 0) {
>  		if (from) {
>  			/* only scan part of the fabric */
>  			str2drpath(&(port_id.drpath), from, 0, 0);
> -			if ((fabric = ibnd_discover_fabric(ibmad_port,
> +			if ((fabric = ibnd_discover_fabric(ca, ca_port,
>  							   &port_id, &config))
>  			    == NULL) {
>  				fprintf(stderr, "discover failed\n");
> @@ -168,7 +159,7 @@ int main(int argc, char **argv)
>  				goto close_port;
>  			}
>  			guid = 0;
> -		} else if ((fabric = ibnd_discover_fabric(ibmad_port, NULL,
> +		} else if ((fabric = ibnd_discover_fabric(ca, ca_port, NULL,
>  							  &config)) == NULL) {
>  			fprintf(stderr, "discover failed\n");
>  			rc = 1;
> @@ -179,6 +170,5 @@ int main(int argc, char **argv)
>  	}
>  
>  close_port:
> -	mad_rpc_close_port(ibmad_port);
>  	exit(rc);
>  }
> diff --git a/infiniband-diags/src/iblinkinfo.c b/infiniband-diags/src/iblinkinfo.c
> index 029573f..d0c9b13 100644
> --- a/infiniband-diags/src/iblinkinfo.c
> +++ b/infiniband-diags/src/iblinkinfo.c
> @@ -337,8 +337,10 @@ int main(int argc, char **argv)
>  		exit(1);
>  	}
>  
> -	if (ibd_timeout)
> +	if (ibd_timeout) {
>  		mad_rpc_set_timeout(ibmad_port, ibd_timeout);
> +		config.timeout_ms = ibd_timeout;
> +	}
>  
>  	node_name_map = open_node_name_map(node_name_map_file);
>  
> @@ -371,12 +373,12 @@ int main(int argc, char **argv)
>  	} else {
>  		if (resolved >= 0 &&
>  		    !(fabric =
> -		      ibnd_discover_fabric(ibmad_port, &port_id, &config)))
> +		      ibnd_discover_fabric(ibd_ca, ibd_ca_port, &port_id, &config)))
>  			IBWARN("Single node discover failed;"
>  			       " attempting full scan\n");
>  
>  		if (!fabric &&
> -		    !(fabric = ibnd_discover_fabric(ibmad_port, NULL, &config))) {
> +		    !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, &config))) {
>  			fprintf(stderr, "discover failed\n");
>  			rc = 1;
>  			goto close_port;
> diff --git a/infiniband-diags/src/ibnetdiscover.c b/infiniband-diags/src/ibnetdiscover.c
> index 57f9625..8f08f06 100644
> --- a/infiniband-diags/src/ibnetdiscover.c
> +++ b/infiniband-diags/src/ibnetdiscover.c
> @@ -67,8 +67,6 @@
>  #define DIFF_FLAG_DEFAULT (DIFF_FLAG_SWITCH | DIFF_FLAG_CA | DIFF_FLAG_ROUTER \
>  			   | DIFF_FLAG_PORT_CONNECTION)
>  
> -struct ibmad_port *srcport;
> -
>  static FILE *f;
>  
>  static char *node_name_map_file = NULL;
> @@ -938,9 +936,6 @@ int main(int argc, char **argv)
>  	ibnd_fabric_t *fabric = NULL;
>  	ibnd_fabric_t *diff_fabric = NULL;
>  
> -	struct ibmad_port *ibmad_port;
> -	int mgmt_classes[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS };
> -
>  	const struct ibdiag_opt opts[] = {
>  		{"show", 's', 0, NULL, "show more information"},
>  		{"list", 'l', 0, NULL, "list of connected nodes"},
> @@ -975,12 +970,8 @@ int main(int argc, char **argv)
>  	argc -= optind;
>  	argv += optind;
>  
> -	ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 2);
> -	if (!ibmad_port)
> -		IBERROR("Failed to open %s port %d", ibd_ca, ibd_ca_port);
> -
>  	if (ibd_timeout)
> -		mad_rpc_set_timeout(ibmad_port, ibd_timeout);
> +		config.timeout_ms = ibd_timeout;
>  
>  	if (argc && !(f = fopen(argv[0], "w")))
>  		IBERROR("can't open file %s for writing", argv[0]);
> @@ -996,7 +987,7 @@ int main(int argc, char **argv)
>  			IBERROR("loading cached fabric failed\n");
>  	} else {
>  		if ((fabric =
> -		     ibnd_discover_fabric(ibmad_port, NULL, &config)) == NULL)
> +		     ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, &config)) == NULL)
>  			IBERROR("discover failed\n");
>  	}
>  
> @@ -1017,6 +1008,5 @@ int main(int argc, char **argv)
>  	if (diff_fabric)
>  		ibnd_destroy_fabric(diff_fabric);
>  	close_node_name_map(node_name_map);
> -	mad_rpc_close_port(ibmad_port);
>  	exit(0);
>  }
> diff --git a/infiniband-diags/src/ibqueryerrors.c b/infiniband-diags/src/ibqueryerrors.c
> index e896254..f04e47f 100644
> --- a/infiniband-diags/src/ibqueryerrors.c
> +++ b/infiniband-diags/src/ibqueryerrors.c
> @@ -600,8 +600,10 @@ int main(int argc, char **argv)
>  	if (!ibmad_port)
>  		IBERROR("Failed to open port; %s:%d\n", ibd_ca, ibd_ca_port);
>  
> -	if (ibd_timeout)
> +	if (ibd_timeout) {
>  		mad_rpc_set_timeout(ibmad_port, ibd_timeout);
> +		config.timeout_ms = ibd_timeout;
> +	}
>  
>  	node_name_map = open_node_name_map(node_name_map_file);
>  
> @@ -633,11 +635,14 @@ int main(int argc, char **argv)
>  		}
>  	} else {
>  		if (resolved >= 0 &&
> -		    !(fabric = ibnd_discover_fabric(ibmad_port, &portid, 0)))
> +		    !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port,
> +						    &portid, &config)))
>  			IBWARN("Single node discover failed;"
>  			       " attempting full scan");
>  
> -		if (!fabric && !(fabric = ibnd_discover_fabric(ibmad_port, NULL,
> +		if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca,
> +							       ibd_ca_port,
> +							       NULL,
>  							       &config))) {
>  			fprintf(stderr, "discover failed\n");
>  			rc = 1;
> -- 
> 1.5.4.5
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2010-05-22 14:40 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-11 23:48 [PATCH] ibnetdisc: Separate calls to umad and mad layer to avoid race condition on response MAD's Ira Weiny
     [not found] ` <20100511164812.e16e41e6.weiny2-i2BcT+NCU+M@public.gmane.org>
2010-05-22 14:40   ` Sasha Khapyorsky [this message]
2010-05-25 17:45     ` Ira Weiny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100522144010.GR28549@me \
    --to=sashak-smomgflxvozwk0htik3j/w@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=weiny2-i2BcT+NCU+M@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.