public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH]: infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
@ 2013-02-20 11:58 Dan Ben Yosef
       [not found] ` <20130220115807.GC20018-Kyg1NGsloJ/nXW+7WiO5SxL4W9x8LtSr@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Dan Ben Yosef @ 2013-02-20 11:58 UTC (permalink / raw)
  To: Ira Weiny; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

   1) if use -G or -D option : by default we obtain sl before doing perf query.
   2) if no destination is given : we don't obtain sl,for every pair
source-destination we use sl=0.
   3) if no destination is given and use -W option : we obtain sl to all nodes
in the fabric, then we use the right sl for each pair source-destination.

    Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 src/ibqueryerrors.c |   84 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
index 6320972..01bbb5a 100644
--- a/src/ibqueryerrors.c
+++ b/src/ibqueryerrors.c
@@ -55,11 +55,14 @@
 #include <infiniband/mad.h>
 
 #include "ibdiag_common.h"
+#include "ibdiag_sa.h"
 
 struct ibmad_port *ibmad_port;
 static char *node_name_map_file = NULL;
 static nn_map_t *node_name_map = NULL;
 static char *load_cache_file = NULL;
+static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
+static int half_world_query = 0;
 
 int data_counters = 0;
 int data_counters_only = 0;
@@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
 #define PRINT_ROUTER 0x4
 #define PRINT_ALL 0xFF		/* all nodes default flag */
 
+#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
+
 struct {
 	int nodes_checked;
 	int bad_nodes;
@@ -298,6 +303,51 @@ static int print_summary(void)
 	return (summary.bad_ports);
 }
 
+static void insert_lid2sl_table(struct sa_query_result *r)
+{
+	unsigned int i;
+	for (i = 0; i < r->result_cnt; i++) {
+		ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
+		lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
+	}
+}
+
+static int path_record_query(int src_lid,int dest_lid)
+{
+	ib_path_rec_t pr;
+	ib_net64_t comp_mask = 0;
+	uint8_t reversible = 0;
+	struct sa_handle * h;
+
+	h = sa_get_handle();
+	ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
+	memset(&pr, 0, sizeof(pr));
+
+	CHECK_AND_SET_VAL(src_lid, 16, 0, pr.slid, PR, SLID);
+	CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);/*if dlid is 0 then we do half world query*/
+	CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
+	CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
+	pr.num_path |= reversible << 7;
+	struct sa_query_result result;
+	int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
+			   (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
+			   &pr, sizeof(pr), &result);
+	if (ret) {
+		fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
+		return ret;
+	}
+	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
+		sa_report_err(result.status);
+		ret = EIO;
+		goto Exit;
+	}
+
+	insert_lid2sl_table(&result);
+Exit:
+	sa_free_result_mad(&result);
+	return ret;
+}
+
 static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
 			  ibnd_node_t * node, char *node_name, int portnum,
 			  const char *attr_name, uint16_t attr_id,
@@ -447,6 +497,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
 	uint8_t pc[1024] = { 0 };
 	uint16_t rc_cap_mask;
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	/* PerfMgt ClassPortInfo is a required attribute */
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
 			   ibmad_port)) {
@@ -474,6 +526,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
 
 	memset(pc, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
 		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
@@ -543,6 +597,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
 	memset(pc, 0, 1024);
 	memset(pce, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
 		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
@@ -830,6 +886,9 @@ static int process_opt(void *context, int ch, char *optarg)
 	case 'D':
 		dr_path = strdup(optarg);
 		break;
+	case 'W':
+		half_world_query = 1;
+		break;
 	case 'r':
 		port_config++;
 		break;
@@ -858,6 +917,8 @@ int main(int argc, char **argv)
 	ib_portid_t portid = { 0 };
 	int rc = 0;
 	ibnd_fabric_t *fabric = NULL;
+	int self_lid = 0;
+	int port = 0;
 
 	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
 		IB_PERFORMANCE_CLASS
@@ -875,6 +936,8 @@ int main(int argc, char **argv)
 		 "Same as \"-G\" for backward compatibility"},
 		{"Direct", 'D', 1, "<dr_path>",
 		 "report the node containing the port specified by <dr_path>"},
+		{"obtain-sl", 'W', 0, NULL,
+		"obtain SL to all destinations from local source port"},
 		{"report-port", 'r', 0, NULL,
 		 "report port link information"},
 		{"threshold-file", 8, 1, NULL,
@@ -933,6 +996,11 @@ int main(int argc, char **argv)
 
 	/* limit the scan the fabric around the target */
 	if (dr_path) {
+		if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
+			IBERROR("can't resolve self port %s", argv[0]);
+			goto close_port;
+		}
+		self_lid = portid.lid;
 		if ((resolved =
 		     resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path,
 					IB_DEST_DRPATH, NULL, ibmad_port)) < 0) {
@@ -947,6 +1015,13 @@ int main(int argc, char **argv)
 			IBWARN("Failed to resolve %s;",port_guid_str);
 			goto close_port;
 		}
+		lid2sl_table[portid.lid] = portid.sl;
+	} else {
+		if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
+			IBERROR("can't resolve self port %s", argv[0]);
+			goto close_port;
+		}
+		self_lid = portid.lid;
 	}
 
 	if (load_cache_file) {
@@ -996,12 +1071,17 @@ int main(int argc, char **argv)
 
 		port = ibnd_find_port_guid(fabric, port_guid);
 		if (port) {
+			if(path_record_query(self_lid,port->base_lid))
+				goto close_port;
 			print_node(port->node, NULL);
 		} else
 			fprintf(stderr, "Failed to find node: %s\n", dr_path);
-	} else
+	} else {
+		if(half_world_query)
+			if(path_record_query(self_lid,0))
+				goto close_port;
 		ibnd_iter_nodes(fabric, print_node, NULL);
-
+	}
 	rc = print_summary();
 	if (rc)
 		rc = 1;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH]: infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found] ` <20130220115807.GC20018-Kyg1NGsloJ/nXW+7WiO5SxL4W9x8LtSr@public.gmane.org>
@ 2013-02-20 18:46   ` Ira Weiny
  2013-02-24  8:21   ` [PATCHv2] " Dan Ben Yosef
  1 sibling, 0 replies; 10+ messages in thread
From: Ira Weiny @ 2013-02-20 18:46 UTC (permalink / raw)
  To: Dan Ben Yosef; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

First off thanks, this is really needed.

On Wed, 20 Feb 2013 13:58:07 +0200
Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:

>    1) if use -G or -D option : by default we obtain sl before doing perf query.
>    2) if no destination is given : we don't obtain sl,for every pair
> source-destination we use sl=0.
>    3) if no destination is given and use -W option : we obtain sl to all nodes
> in the fabric, then we use the right sl for each pair source-destination.

I think the default should be to query the SA for both 1 and 2.  3 should be an option to _skip_ the query.  Skipping the query could also allow a full SMP scan which your previous patch removed.  This allows for operation in a degraded mode when the SM is either broken or crippled.  This is particularly compounded when you may be fighting hardware errors which you would be using ibqueryerrors to identify.

Could you rework the patch with the above requirements?  Also update the documentation for the new option.

Thanks,
Ira

> 
>     Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
>  src/ibqueryerrors.c |   84 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 82 insertions(+), 2 deletions(-)
> 
> diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
> index 6320972..01bbb5a 100644
> --- a/src/ibqueryerrors.c
> +++ b/src/ibqueryerrors.c
> @@ -55,11 +55,14 @@
>  #include <infiniband/mad.h>
>  
>  #include "ibdiag_common.h"
> +#include "ibdiag_sa.h"
>  
>  struct ibmad_port *ibmad_port;
>  static char *node_name_map_file = NULL;
>  static nn_map_t *node_name_map = NULL;
>  static char *load_cache_file = NULL;
> +static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
> +static int half_world_query = 0;
>  
>  int data_counters = 0;
>  int data_counters_only = 0;
> @@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
>  #define PRINT_ROUTER 0x4
>  #define PRINT_ALL 0xFF		/* all nodes default flag */
>  
> +#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
> +
>  struct {
>  	int nodes_checked;
>  	int bad_nodes;
> @@ -298,6 +303,51 @@ static int print_summary(void)
>  	return (summary.bad_ports);
>  }
>  
> +static void insert_lid2sl_table(struct sa_query_result *r)
> +{
> +	unsigned int i;
> +	for (i = 0; i < r->result_cnt; i++) {
> +		ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
> +		lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
> +	}
> +}
> +
> +static int path_record_query(int src_lid,int dest_lid)
> +{
> +	ib_path_rec_t pr;
> +	ib_net64_t comp_mask = 0;
> +	uint8_t reversible = 0;
> +	struct sa_handle * h;
> +
> +	h = sa_get_handle();
> +	ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
> +	memset(&pr, 0, sizeof(pr));
> +
> +	CHECK_AND_SET_VAL(src_lid, 16, 0, pr.slid, PR, SLID);
> +	CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);/*if dlid is 0 then we do half world query*/
> +	CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
> +	CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
> +	pr.num_path |= reversible << 7;
> +	struct sa_query_result result;
> +	int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
> +			   (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
> +			   &pr, sizeof(pr), &result);
> +	if (ret) {
> +		fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
> +		return ret;
> +	}
> +	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
> +		sa_report_err(result.status);
> +		ret = EIO;
> +		goto Exit;
> +	}
> +
> +	insert_lid2sl_table(&result);
> +Exit:
> +	sa_free_result_mad(&result);
> +	return ret;
> +}
> +
>  static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
>  			  ibnd_node_t * node, char *node_name, int portnum,
>  			  const char *attr_name, uint16_t attr_id,
> @@ -447,6 +497,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
>  	uint8_t pc[1024] = { 0 };
>  	uint16_t rc_cap_mask;
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	/* PerfMgt ClassPortInfo is a required attribute */
>  	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
>  			   ibmad_port)) {
> @@ -474,6 +526,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
>  
>  	memset(pc, 0, 1024);
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
>  		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>  				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
> @@ -543,6 +597,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
>  	memset(pc, 0, 1024);
>  	memset(pce, 0, 1024);
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>  			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
>  		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
> @@ -830,6 +886,9 @@ static int process_opt(void *context, int ch, char *optarg)
>  	case 'D':
>  		dr_path = strdup(optarg);
>  		break;
> +	case 'W':
> +		half_world_query = 1;
> +		break;
>  	case 'r':
>  		port_config++;
>  		break;
> @@ -858,6 +917,8 @@ int main(int argc, char **argv)
>  	ib_portid_t portid = { 0 };
>  	int rc = 0;
>  	ibnd_fabric_t *fabric = NULL;
> +	int self_lid = 0;
> +	int port = 0;
>  
>  	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
>  		IB_PERFORMANCE_CLASS
> @@ -875,6 +936,8 @@ int main(int argc, char **argv)
>  		 "Same as \"-G\" for backward compatibility"},
>  		{"Direct", 'D', 1, "<dr_path>",
>  		 "report the node containing the port specified by <dr_path>"},
> +		{"obtain-sl", 'W', 0, NULL,
> +		"obtain SL to all destinations from local source port"},
>  		{"report-port", 'r', 0, NULL,
>  		 "report port link information"},
>  		{"threshold-file", 8, 1, NULL,
> @@ -933,6 +996,11 @@ int main(int argc, char **argv)
>  
>  	/* limit the scan the fabric around the target */
>  	if (dr_path) {
> +		if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
> +			IBERROR("can't resolve self port %s", argv[0]);
> +			goto close_port;
> +		}
> +		self_lid = portid.lid;
>  		if ((resolved =
>  		     resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path,
>  					IB_DEST_DRPATH, NULL, ibmad_port)) < 0) {
> @@ -947,6 +1015,13 @@ int main(int argc, char **argv)
>  			IBWARN("Failed to resolve %s;",port_guid_str);
>  			goto close_port;
>  		}
> +		lid2sl_table[portid.lid] = portid.sl;
> +	} else {
> +		if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
> +			IBERROR("can't resolve self port %s", argv[0]);
> +			goto close_port;
> +		}
> +		self_lid = portid.lid;
>  	}
>  
>  	if (load_cache_file) {
> @@ -996,12 +1071,17 @@ int main(int argc, char **argv)
>  
>  		port = ibnd_find_port_guid(fabric, port_guid);
>  		if (port) {
> +			if(path_record_query(self_lid,port->base_lid))
> +				goto close_port;
>  			print_node(port->node, NULL);
>  		} else
>  			fprintf(stderr, "Failed to find node: %s\n", dr_path);
> -	} else
> +	} else {
> +		if(half_world_query)
> +			if(path_record_query(self_lid,0))
> +				goto close_port;
>  		ibnd_iter_nodes(fabric, print_node, NULL);
> -
> +	}
>  	rc = print_summary();
>  	if (rc)
>  		rc = 1;
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Ira Weiny
Member of Technical Staff
Lawrence Livermore National Lab
925-423-8008
weiny2-i2BcT+NCU+M@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv2] infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found] ` <20130220115807.GC20018-Kyg1NGsloJ/nXW+7WiO5SxL4W9x8LtSr@public.gmane.org>
  2013-02-20 18:46   ` Ira Weiny
@ 2013-02-24  8:21   ` Dan Ben Yosef
       [not found]     ` <20130224082140.GA11648-Kyg1NGsloJ/nXW+7WiO5SxL4W9x8LtSr@public.gmane.org>
  1 sibling, 1 reply; 10+ messages in thread
From: Dan Ben Yosef @ 2013-02-24  8:21 UTC (permalink / raw)
  To: Ira Weiny; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

1) if use -G or -D option : we obtain sl before doing perf query.
2) if no destination is given : we obtain sl for every pair source-destination.
3) if no destination is given and use --skip-sl option : we don't obtain
sl to all nodes in the fabric,in this case sl=0 for all node pairs.

Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
Changes since v1:
        Change 2 and 3: by default we obtain sl. When using skip-sl option
        we don't obtain sl.
	Add option to documentation.

 doc/man/ibqueryerrors.8.in |    3 ++
 src/ibqueryerrors.c        |   83 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/doc/man/ibqueryerrors.8.in b/doc/man/ibqueryerrors.8.in
index 00b0f61..65e5206 100644
--- a/doc/man/ibqueryerrors.8.in
+++ b/doc/man/ibqueryerrors.8.in
@@ -56,6 +56,9 @@ Report the port information.  This includes LID, port, external port (if
 applicable), link speed setting, remote GUID, remote port, remote external port
 (if applicable), and remote node description information.
 .sp
+\fB\-\-skip-sl\fP
+Use the default sl for queries.
+.sp
 \fB\-\-data\fP
 Include the optional transmit and receive data counters.
 .sp
diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
index 6320972..c35cf6e 100644
--- a/src/ibqueryerrors.c
+++ b/src/ibqueryerrors.c
@@ -55,11 +55,14 @@
 #include <infiniband/mad.h>
 
 #include "ibdiag_common.h"
+#include "ibdiag_sa.h"
 
 struct ibmad_port *ibmad_port;
 static char *node_name_map_file = NULL;
 static nn_map_t *node_name_map = NULL;
 static char *load_cache_file = NULL;
+static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
+static int half_world_query = 1;
 
 int data_counters = 0;
 int data_counters_only = 0;
@@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
 #define PRINT_ROUTER 0x4
 #define PRINT_ALL 0xFF		/* all nodes default flag */
 
+#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
+
 struct {
 	int nodes_checked;
 	int bad_nodes;
@@ -298,6 +303,51 @@ static int print_summary(void)
 	return (summary.bad_ports);
 }
 
+static void insert_lid2sl_table(struct sa_query_result *r)
+{
+	unsigned int i;
+	for (i = 0; i < r->result_cnt; i++) {
+		ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
+		lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
+	}
+}
+
+static int path_record_query(int src_lid,int dest_lid)
+{
+	ib_path_rec_t pr;
+	ib_net64_t comp_mask = 0;
+	uint8_t reversible = 0;
+	struct sa_handle * h;
+
+	h = sa_get_handle();
+	ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
+	memset(&pr, 0, sizeof(pr));
+
+	CHECK_AND_SET_VAL(src_lid, 16, 0, pr.slid, PR, SLID);
+	CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);/*if dlid is 0 then we do half world query*/
+	CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
+	CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
+	pr.num_path |= reversible << 7;
+	struct sa_query_result result;
+	int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
+			   (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
+			   &pr, sizeof(pr), &result);
+	if (ret) {
+		fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
+		return ret;
+	}
+	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
+		sa_report_err(result.status);
+		ret = EIO;
+		goto Exit;
+	}
+
+	insert_lid2sl_table(&result);
+Exit:
+	sa_free_result_mad(&result);
+	return ret;
+}
+
 static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
 			  ibnd_node_t * node, char *node_name, int portnum,
 			  const char *attr_name, uint16_t attr_id,
@@ -447,6 +497,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
 	uint8_t pc[1024] = { 0 };
 	uint16_t rc_cap_mask;
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	/* PerfMgt ClassPortInfo is a required attribute */
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
 			   ibmad_port)) {
@@ -474,6 +526,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
 
 	memset(pc, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
 		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
@@ -543,6 +597,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
 	memset(pc, 0, 1024);
 	memset(pce, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
 		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
@@ -822,6 +878,9 @@ static int process_opt(void *context, int ch, char *optarg)
 	case 9:
 		data_counters_only = 1;
 		break;
+	case 10:
+		half_world_query = 0;
+		break;
 	case 'G':
 	case 'S':
 		port_guid_str = optarg;
@@ -858,6 +917,8 @@ int main(int argc, char **argv)
 	ib_portid_t portid = { 0 };
 	int rc = 0;
 	ibnd_fabric_t *fabric = NULL;
+	int self_lid = 0;
+	int port = 0;
 
 	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
 		IB_PERFORMANCE_CLASS
@@ -875,6 +936,7 @@ int main(int argc, char **argv)
 		 "Same as \"-G\" for backward compatibility"},
 		{"Direct", 'D', 1, "<dr_path>",
 		 "report the node containing the port specified by <dr_path>"},
+		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
 		{"report-port", 'r', 0, NULL,
 		 "report port link information"},
 		{"threshold-file", 8, 1, NULL,
@@ -933,6 +995,11 @@ int main(int argc, char **argv)
 
 	/* limit the scan the fabric around the target */
 	if (dr_path) {
+		if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
+			IBERROR("can't resolve self port %s", argv[0]);
+			goto close_port;
+		}
+		self_lid = portid.lid;
 		if ((resolved =
 		     resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path,
 					IB_DEST_DRPATH, NULL, ibmad_port)) < 0) {
@@ -947,6 +1014,13 @@ int main(int argc, char **argv)
 			IBWARN("Failed to resolve %s;",port_guid_str);
 			goto close_port;
 		}
+		lid2sl_table[portid.lid] = portid.sl;
+	} else {
+		if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
+			IBERROR("can't resolve self port %s", argv[0]);
+			goto close_port;
+		}
+		self_lid = portid.lid;
 	}
 
 	if (load_cache_file) {
@@ -996,12 +1070,17 @@ int main(int argc, char **argv)
 
 		port = ibnd_find_port_guid(fabric, port_guid);
 		if (port) {
+			if(path_record_query(self_lid,port->base_lid))
+				goto close_port;
 			print_node(port->node, NULL);
 		} else
 			fprintf(stderr, "Failed to find node: %s\n", dr_path);
-	} else
+	} else {
+		if(half_world_query)
+			if(path_record_query(self_lid,0))
+				goto close_port;
 		ibnd_iter_nodes(fabric, print_node, NULL);
-
+	}
 	rc = print_summary();
 	if (rc)
 		rc = 1;
-- 
1.7.1




































































































































































































--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv2] infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found]     ` <20130224082140.GA11648-Kyg1NGsloJ/nXW+7WiO5SxL4W9x8LtSr@public.gmane.org>
@ 2013-02-27  1:07       ` Ira Weiny
       [not found]         ` <20130226170744.53adb74b6af9a54d9481dd4c-i2BcT+NCU+M@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Ira Weiny @ 2013-02-27  1:07 UTC (permalink / raw)
  To: Dan Ben Yosef; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Sun, 24 Feb 2013 10:21:40 +0200
Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:

> 1) if use -G or -D option : we obtain sl before doing perf query.
> 2) if no destination is given : we obtain sl for every pair source-destination.
> 3) if no destination is given and use --skip-sl option : we don't obtain
> sl to all nodes in the fabric,in this case sl=0 for all node pairs.

--skip-sl should apply to all modes.  Sorry if I was not clear about that.

More comments below.

> 
> Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
> Changes since v1:
>         Change 2 and 3: by default we obtain sl. When using skip-sl option
>         we don't obtain sl.
> 	Add option to documentation.
> 
>  doc/man/ibqueryerrors.8.in |    3 ++
>  src/ibqueryerrors.c        |   83 ++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 84 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/man/ibqueryerrors.8.in b/doc/man/ibqueryerrors.8.in
> index 00b0f61..65e5206 100644
> --- a/doc/man/ibqueryerrors.8.in
> +++ b/doc/man/ibqueryerrors.8.in
> @@ -56,6 +56,9 @@ Report the port information.  This includes LID, port, external port (if
>  applicable), link speed setting, remote GUID, remote port, remote external port
>  (if applicable), and remote node description information.
>  .sp
> +\fB\-\-skip-sl\fP
> +Use the default sl for queries.
> +.sp
>  \fB\-\-data\fP
>  Include the optional transmit and receive data counters.
>  .sp

This should be added to doc/rst/ibqueryerrors.8.in.rst and then the output from that added to the git tree.

> diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
> index 6320972..c35cf6e 100644
> --- a/src/ibqueryerrors.c
> +++ b/src/ibqueryerrors.c
> @@ -55,11 +55,14 @@
>  #include <infiniband/mad.h>
>  
>  #include "ibdiag_common.h"
> +#include "ibdiag_sa.h"
>  
>  struct ibmad_port *ibmad_port;
>  static char *node_name_map_file = NULL;
>  static nn_map_t *node_name_map = NULL;
>  static char *load_cache_file = NULL;
> +static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
> +static int half_world_query = 1;
>  
>  int data_counters = 0;
>  int data_counters_only = 0;
> @@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
>  #define PRINT_ROUTER 0x4
>  #define PRINT_ALL 0xFF		/* all nodes default flag */
>  
> +#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)

This is fine for now but it might be nice to have an option for this as well.

> +
>  struct {
>  	int nodes_checked;
>  	int bad_nodes;
> @@ -298,6 +303,51 @@ static int print_summary(void)
>  	return (summary.bad_ports);
>  }
>  
> +static void insert_lid2sl_table(struct sa_query_result *r)
> +{
> +	unsigned int i;
> +	for (i = 0; i < r->result_cnt; i++) {
> +		ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
> +		lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
> +	}
> +}
> +
> +static int path_record_query(int src_lid,int dest_lid)
> +{
> +	ib_path_rec_t pr;
> +	ib_net64_t comp_mask = 0;
> +	uint8_t reversible = 0;
> +	struct sa_handle * h;
> +
> +	h = sa_get_handle();
> +	ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
> +	memset(&pr, 0, sizeof(pr));
> +
> +	CHECK_AND_SET_VAL(src_lid, 16, 0, pr.slid, PR, SLID);
> +	CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);/*if dlid is 0 then we do half world query*/
> +	CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
> +	CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
> +	pr.num_path |= reversible << 7;
> +	struct sa_query_result result;
> +	int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
> +			   (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
> +			   &pr, sizeof(pr), &result);
> +	if (ret) {
> +		fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
> +		return ret;
> +	}
> +	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
> +		sa_report_err(result.status);
> +		ret = EIO;
> +		goto Exit;
> +	}
> +
> +	insert_lid2sl_table(&result);
> +Exit:
> +	sa_free_result_mad(&result);
> +	return ret;
> +}
> +
>  static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
>  			  ibnd_node_t * node, char *node_name, int portnum,
>  			  const char *attr_name, uint16_t attr_id,
> @@ -447,6 +497,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
>  	uint8_t pc[1024] = { 0 };
>  	uint16_t rc_cap_mask;
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	/* PerfMgt ClassPortInfo is a required attribute */
>  	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
>  			   ibmad_port)) {
> @@ -474,6 +526,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
>  
>  	memset(pc, 0, 1024);
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
>  		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>  				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
> @@ -543,6 +597,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
>  	memset(pc, 0, 1024);
>  	memset(pce, 0, 1024);
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>  			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
>  		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
> @@ -822,6 +878,9 @@ static int process_opt(void *context, int ch, char *optarg)
>  	case 9:
>  		data_counters_only = 1;
>  		break;
> +	case 10:
> +		half_world_query = 0;
> +		break;
>  	case 'G':
>  	case 'S':
>  		port_guid_str = optarg;
> @@ -858,6 +917,8 @@ int main(int argc, char **argv)
>  	ib_portid_t portid = { 0 };
>  	int rc = 0;
>  	ibnd_fabric_t *fabric = NULL;
> +	int self_lid = 0;
> +	int port = 0;
>  
>  	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
>  		IB_PERFORMANCE_CLASS
> @@ -875,6 +936,7 @@ int main(int argc, char **argv)
>  		 "Same as \"-G\" for backward compatibility"},
>  		{"Direct", 'D', 1, "<dr_path>",
>  		 "report the node containing the port specified by <dr_path>"},
> +		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
>  		{"report-port", 'r', 0, NULL,
>  		 "report port link information"},
>  		{"threshold-file", 8, 1, NULL,
> @@ -933,6 +995,11 @@ int main(int argc, char **argv)
>  
>  	/* limit the scan the fabric around the target */
>  	if (dr_path) {
> +		if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
> +			IBERROR("can't resolve self port %s", argv[0]);
> +			goto close_port;
> +		}
> +		self_lid = portid.lid;

I think you should call this before the check for dr_path and not below.  It is not too expensive to just resolve our local lid with resolve_self.

>  		if ((resolved =
>  		     resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path,
>  					IB_DEST_DRPATH, NULL, ibmad_port)) < 0) {
> @@ -947,6 +1014,13 @@ int main(int argc, char **argv)
>  			IBWARN("Failed to resolve %s;",port_guid_str);
>  			goto close_port;
>  		}
> +		lid2sl_table[portid.lid] = portid.sl;
> +	} else {

This else here is a compile error:

src/ibqueryerrors.c: In function ‘main’:
src/ibqueryerrors.c:1041: error: ‘else’ without a previous ‘if’


> +		if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
> +			IBERROR("can't resolve self port %s", argv[0]);
> +			goto close_port;
> +		}
> +		self_lid = portid.lid;

I believe calling this here will break the DR mode since the NodeInfo query used in DR mode uses portid.

>  	}
>  
>  	if (load_cache_file) {
> @@ -996,12 +1070,17 @@ int main(int argc, char **argv)
>  
>  		port = ibnd_find_port_guid(fabric, port_guid);
>  		if (port) {
> +			if(path_record_query(self_lid,port->base_lid))
> +				goto close_port;

goto destroy_fabric since the fabric object is created at this point.

>  			print_node(port->node, NULL);
>  		} else
>  			fprintf(stderr, "Failed to find node: %s\n", dr_path);
> -	} else
> +	} else {
> +		if(half_world_query)
> +			if(path_record_query(self_lid,0))
> +				goto close_port;

goto destroy_fabric since the fabric object is created at this point.

>  		ibnd_iter_nodes(fabric, print_node, NULL);
> -
> +	}
>  	rc = print_summary();
>  	if (rc)
>  		rc = 1;
> -- 
> 1.7.1
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Ira Weiny
Member of Technical Staff
Lawrence Livermore National Lab
925-423-8008
weiny2-i2BcT+NCU+M@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv3] infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found]         ` <20130226170744.53adb74b6af9a54d9481dd4c-i2BcT+NCU+M@public.gmane.org>
@ 2013-02-27 14:10           ` Dan Ben Yosef
       [not found]             ` <20130227141032.GA9919-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Dan Ben Yosef @ 2013-02-27 14:10 UTC (permalink / raw)
  To: Ira Weiny; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

1) if use -G or -D option : we obtain sl before doing perf query.
2) if no destination is given : we obtain sl for every pair source-destination.
3) if no destination is given and use --skip-sl option : we don't obtain
sl to all nodes in the fabric,in this case sl=0 for all node pairs.

Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
Changes since v2:
	1.Skip-sl option will apply to 1 and 2.
	2.Update documentation doc/rst/ibqueryerrors.8.in.rst
	3.Pop the resolve_self call to be before the check for dr_path. 
	4.Add new variable self_portid for the usage of resolve_self only.
	5.Change "goto close_port" into "goto destroy_fabric" if fabric object
	  is created.

 doc/rst/ibqueryerrors.8.in.rst |    2 +
 src/ibqueryerrors.c            |   81 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/doc/rst/ibqueryerrors.8.in.rst b/doc/rst/ibqueryerrors.8.in.rst
index 9a05e7b..b910368 100644
--- a/doc/rst/ibqueryerrors.8.in.rst
+++ b/doc/rst/ibqueryerrors.8.in.rst
@@ -52,6 +52,8 @@ Specify an alternate threshold file.  The default is @IBDIAG_CONFIG_PATH@/error_
 
 **--ca**  print data for CA's only
 
+**--skip-sl**  Use the default sl for queries
+
 **--router**  print data for routers only
 
 **--clear-errors -k** Clear error counters after read.
diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
index 6320972..6d17497 100644
--- a/src/ibqueryerrors.c
+++ b/src/ibqueryerrors.c
@@ -55,11 +55,14 @@
 #include <infiniband/mad.h>
 
 #include "ibdiag_common.h"
+#include "ibdiag_sa.h"
 
 struct ibmad_port *ibmad_port;
 static char *node_name_map_file = NULL;
 static nn_map_t *node_name_map = NULL;
 static char *load_cache_file = NULL;
+static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
+static int obtain_sl = 1;
 
 int data_counters = 0;
 int data_counters_only = 0;
@@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
 #define PRINT_ROUTER 0x4
 #define PRINT_ALL 0xFF		/* all nodes default flag */
 
+#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
+
 struct {
 	int nodes_checked;
 	int bad_nodes;
@@ -298,6 +303,51 @@ static int print_summary(void)
 	return (summary.bad_ports);
 }
 
+static void insert_lid2sl_table(struct sa_query_result *r)
+{
+	unsigned int i;
+	for (i = 0; i < r->result_cnt; i++) {
+		ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
+		lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
+	}
+}
+
+static int path_record_query(int src_lid,int dest_lid)
+{
+	ib_path_rec_t pr;
+	ib_net64_t comp_mask = 0;
+	uint8_t reversible = 0;
+	struct sa_handle * h;
+
+	h = sa_get_handle();
+	ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
+	memset(&pr, 0, sizeof(pr));
+
+	CHECK_AND_SET_VAL(src_lid, 16, 0, pr.slid, PR, SLID);
+	CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);/*if dlid is 0 then we do half world query*/
+	CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
+	CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
+	pr.num_path |= reversible << 7;
+	struct sa_query_result result;
+	int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
+			   (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
+			   &pr, sizeof(pr), &result);
+	if (ret) {
+		fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
+		return ret;
+	}
+	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
+		sa_report_err(result.status);
+		ret = EIO;
+		goto Exit;
+	}
+
+	insert_lid2sl_table(&result);
+Exit:
+	sa_free_result_mad(&result);
+	return ret;
+}
+
 static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
 			  ibnd_node_t * node, char *node_name, int portnum,
 			  const char *attr_name, uint16_t attr_id,
@@ -447,6 +497,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
 	uint8_t pc[1024] = { 0 };
 	uint16_t rc_cap_mask;
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	/* PerfMgt ClassPortInfo is a required attribute */
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
 			   ibmad_port)) {
@@ -474,6 +526,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
 
 	memset(pc, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
 		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
@@ -543,6 +597,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
 	memset(pc, 0, 1024);
 	memset(pce, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
 		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
@@ -822,6 +878,9 @@ static int process_opt(void *context, int ch, char *optarg)
 	case 9:
 		data_counters_only = 1;
 		break;
+	case 10:
+		obtain_sl = 0;
+		break;
 	case 'G':
 	case 'S':
 		port_guid_str = optarg;
@@ -856,8 +915,11 @@ int main(int argc, char **argv)
 	struct ibnd_config config = { 0 };
 	int resolved = -1;
 	ib_portid_t portid = { 0 };
+	ib_portid_t self_portid = { 0 };
 	int rc = 0;
 	ibnd_fabric_t *fabric = NULL;
+	int self_lid = 0;
+	int port = 0;
 
 	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
 		IB_PERFORMANCE_CLASS
@@ -875,6 +937,7 @@ int main(int argc, char **argv)
 		 "Same as \"-G\" for backward compatibility"},
 		{"Direct", 'D', 1, "<dr_path>",
 		 "report the node containing the port specified by <dr_path>"},
+		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
 		{"report-port", 'r', 0, NULL,
 		 "report port link information"},
 		{"threshold-file", 8, 1, NULL,
@@ -931,6 +994,12 @@ int main(int argc, char **argv)
 		exit(-1);
 	}
 
+	if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, 0) < 0) {
+		IBERROR("can't resolve self port %s", argv[0]);
+		goto close_port;
+	}
+	self_lid = self_portid.lid;
+
 	/* limit the scan the fabric around the target */
 	if (dr_path) {
 		if ((resolved =
@@ -947,6 +1016,8 @@ int main(int argc, char **argv)
 			IBWARN("Failed to resolve %s;",port_guid_str);
 			goto close_port;
 		}
+		if(obtain_sl)
+			lid2sl_table[portid.lid] = portid.sl;
 	}
 
 	if (load_cache_file) {
@@ -996,12 +1067,18 @@ int main(int argc, char **argv)
 
 		port = ibnd_find_port_guid(fabric, port_guid);
 		if (port) {
+			if(obtain_sl)
+				if(path_record_query(self_lid,port->base_lid))
+					goto destroy_fabric;
 			print_node(port->node, NULL);
 		} else
 			fprintf(stderr, "Failed to find node: %s\n", dr_path);
-	} else
+	} else {
+		if(obtain_sl)
+			if(path_record_query(self_lid,0))
+				goto destroy_fabric;
 		ibnd_iter_nodes(fabric, print_node, NULL);
-
+	}
 	rc = print_summary();
 	if (rc)
 		rc = 1;
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv3] infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found]             ` <20130227141032.GA9919-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
@ 2013-02-28 16:11               ` Hal Rosenstock
       [not found]                 ` <512F81A3.6000100-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Hal Rosenstock @ 2013-02-28 16:11 UTC (permalink / raw)
  To: Dan Ben Yosef; +Cc: Ira Weiny, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 2/27/2013 9:10 AM, Dan Ben Yosef wrote:
> 1) if use -G or -D option : we obtain sl before doing perf query.
> 2) if no destination is given : we obtain sl for every pair source-destination.
> 3) if no destination is given and use --skip-sl option : we don't obtain
> sl to all nodes in the fabric,in this case sl=0 for all node pairs.

It might be noted that this is not recommended in the case of QoS aware
routing engines as this can cause credit deadlock.

> 
> Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
> Changes since v2:
> 	1.Skip-sl option will apply to 1 and 2.
> 	2.Update documentation doc/rst/ibqueryerrors.8.in.rst
> 	3.Pop the resolve_self call to be before the check for dr_path. 
> 	4.Add new variable self_portid for the usage of resolve_self only.
> 	5.Change "goto close_port" into "goto destroy_fabric" if fabric object
> 	  is created.
> 
>  doc/rst/ibqueryerrors.8.in.rst |    2 +
>  src/ibqueryerrors.c            |   81 +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 81 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/rst/ibqueryerrors.8.in.rst b/doc/rst/ibqueryerrors.8.in.rst
> index 9a05e7b..b910368 100644
> --- a/doc/rst/ibqueryerrors.8.in.rst
> +++ b/doc/rst/ibqueryerrors.8.in.rst
> @@ -52,6 +52,8 @@ Specify an alternate threshold file.  The default is @IBDIAG_CONFIG_PATH@/error_
>  
>  **--ca**  print data for CA's only
>  
> +**--skip-sl**  Use the default sl for queries
> +
>  **--router**  print data for routers only
>  
>  **--clear-errors -k** Clear error counters after read.
> diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
> index 6320972..6d17497 100644
> --- a/src/ibqueryerrors.c
> +++ b/src/ibqueryerrors.c
> @@ -55,11 +55,14 @@
>  #include <infiniband/mad.h>
>  
>  #include "ibdiag_common.h"
> +#include "ibdiag_sa.h"
>  
>  struct ibmad_port *ibmad_port;
>  static char *node_name_map_file = NULL;
>  static nn_map_t *node_name_map = NULL;
>  static char *load_cache_file = NULL;
> +static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
> +static int obtain_sl = 1;
>  
>  int data_counters = 0;
>  int data_counters_only = 0;
> @@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
>  #define PRINT_ROUTER 0x4
>  #define PRINT_ALL 0xFF		/* all nodes default flag */
>  
> +#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
> +
>  struct {
>  	int nodes_checked;
>  	int bad_nodes;
> @@ -298,6 +303,51 @@ static int print_summary(void)
>  	return (summary.bad_ports);
>  }
>  
> +static void insert_lid2sl_table(struct sa_query_result *r)
> +{
> +	unsigned int i;
> +	for (i = 0; i < r->result_cnt; i++) {
> +		ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
> +		lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
> +	}
> +}
> +
> +static int path_record_query(int src_lid,int dest_lid)
> +{
> +	ib_path_rec_t pr;
> +	ib_net64_t comp_mask = 0;
> +	uint8_t reversible = 0;
> +	struct sa_handle * h;
> +
> +	h = sa_get_handle();
> +	ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
> +	memset(&pr, 0, sizeof(pr));
> +
> +	CHECK_AND_SET_VAL(src_lid, 16, 0, pr.slid, PR, SLID);

Doesn't SGID rather than SLID need to be specified to be compliant
GetTable query ?

> +	CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);/*if dlid is 0 then we do half world query*/

I think this should be:
	if (dest_lid)
		CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);
so that comp mask bit is not set for DLID so that it's wildcarded.

> +	CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
> +	CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
> +	pr.num_path |= reversible << 7;
> +	struct sa_query_result result;
> +	int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
> +			   (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
> +			   &pr, sizeof(pr), &result);
> +	if (ret) {
> +		fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
> +		return ret;
> +	}
> +	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
> +		sa_report_err(result.status);
> +		ret = EIO;
> +		goto Exit;
> +	}
> +
> +	insert_lid2sl_table(&result);
> +Exit:
> +	sa_free_result_mad(&result);
> +	return ret;
> +}
> +
>  static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
>  			  ibnd_node_t * node, char *node_name, int portnum,
>  			  const char *attr_name, uint16_t attr_id,
> @@ -447,6 +497,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
>  	uint8_t pc[1024] = { 0 };
>  	uint16_t rc_cap_mask;
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	/* PerfMgt ClassPortInfo is a required attribute */
>  	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
>  			   ibmad_port)) {
> @@ -474,6 +526,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
>  
>  	memset(pc, 0, 1024);
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
>  		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>  				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
> @@ -543,6 +597,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
>  	memset(pc, 0, 1024);
>  	memset(pce, 0, 1024);
>  
> +	portid->sl = lid2sl_table[portid->lid];
> +
>  	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>  			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
>  		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
> @@ -822,6 +878,9 @@ static int process_opt(void *context, int ch, char *optarg)
>  	case 9:
>  		data_counters_only = 1;
>  		break;
> +	case 10:
> +		obtain_sl = 0;
> +		break;
>  	case 'G':
>  	case 'S':
>  		port_guid_str = optarg;
> @@ -856,8 +915,11 @@ int main(int argc, char **argv)
>  	struct ibnd_config config = { 0 };
>  	int resolved = -1;
>  	ib_portid_t portid = { 0 };
> +	ib_portid_t self_portid = { 0 };
>  	int rc = 0;
>  	ibnd_fabric_t *fabric = NULL;
> +	int self_lid = 0;
> +	int port = 0;
>  
>  	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
>  		IB_PERFORMANCE_CLASS
> @@ -875,6 +937,7 @@ int main(int argc, char **argv)
>  		 "Same as \"-G\" for backward compatibility"},
>  		{"Direct", 'D', 1, "<dr_path>",
>  		 "report the node containing the port specified by <dr_path>"},
> +		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
>  		{"report-port", 'r', 0, NULL,
>  		 "report port link information"},
>  		{"threshold-file", 8, 1, NULL,
> @@ -931,6 +994,12 @@ int main(int argc, char **argv)
>  		exit(-1);
>  	}
>  
> +	if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, 0) < 0) {

resolve_self is deprecated; should use resolve_self_via

-- Hal

> +		IBERROR("can't resolve self port %s", argv[0]);
> +		goto close_port;
> +	}
> +	self_lid = self_portid.lid;
> +
>  	/* limit the scan the fabric around the target */
>  	if (dr_path) {
>  		if ((resolved =
> @@ -947,6 +1016,8 @@ int main(int argc, char **argv)
>  			IBWARN("Failed to resolve %s;",port_guid_str);
>  			goto close_port;
>  		}
> +		if(obtain_sl)
> +			lid2sl_table[portid.lid] = portid.sl;
>  	}
>  
>  	if (load_cache_file) {
> @@ -996,12 +1067,18 @@ int main(int argc, char **argv)
>  
>  		port = ibnd_find_port_guid(fabric, port_guid);
>  		if (port) {
> +			if(obtain_sl)
> +				if(path_record_query(self_lid,port->base_lid))
> +					goto destroy_fabric;
>  			print_node(port->node, NULL);
>  		} else
>  			fprintf(stderr, "Failed to find node: %s\n", dr_path);
> -	} else
> +	} else {
> +		if(obtain_sl)
> +			if(path_record_query(self_lid,0))
> +				goto destroy_fabric;
>  		ibnd_iter_nodes(fabric, print_node, NULL);
> -
> +	}
>  	rc = print_summary();
>  	if (rc)
>  		rc = 1;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3] infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found]                 ` <512F81A3.6000100-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2013-03-01  1:33                   ` Ira Weiny
       [not found]                     ` <20130228173336.18bc61527d20d160c9f39818-i2BcT+NCU+M@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Ira Weiny @ 2013-03-01  1:33 UTC (permalink / raw)
  To: Hal Rosenstock; +Cc: Dan Ben Yosef, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, 28 Feb 2013 11:11:15 -0500
Hal Rosenstock <hal-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:

> On 2/27/2013 9:10 AM, Dan Ben Yosef wrote:
> > 1) if use -G or -D option : we obtain sl before doing perf query.
> > 2) if no destination is given : we obtain sl for every pair source-destination.
> > 3) if no destination is given and use --skip-sl option : we don't obtain
> > sl to all nodes in the fabric,in this case sl=0 for all node pairs.
> 
> It might be noted that this is not recommended in the case of QoS aware
> routing engines as this can cause credit deadlock.

Yes, but the tool has been broken for so long I think documenting that in the rst file would be sufficient.

> 
> > 
> > Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> > ---
> > Changes since v2:
> > 	1.Skip-sl option will apply to 1 and 2.
> > 	2.Update documentation doc/rst/ibqueryerrors.8.in.rst
> > 	3.Pop the resolve_self call to be before the check for dr_path. 
> > 	4.Add new variable self_portid for the usage of resolve_self only.
> > 	5.Change "goto close_port" into "goto destroy_fabric" if fabric object
> > 	  is created.
> > 
> >  doc/rst/ibqueryerrors.8.in.rst |    2 +
> >  src/ibqueryerrors.c            |   81 +++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 81 insertions(+), 2 deletions(-)
> > 
> > diff --git a/doc/rst/ibqueryerrors.8.in.rst b/doc/rst/ibqueryerrors.8.in.rst
> > index 9a05e7b..b910368 100644
> > --- a/doc/rst/ibqueryerrors.8.in.rst
> > +++ b/doc/rst/ibqueryerrors.8.in.rst
> > @@ -52,6 +52,8 @@ Specify an alternate threshold file.  The default is @IBDIAG_CONFIG_PATH@/error_
> >  
> >  **--ca**  print data for CA's only
> >  
> > +**--skip-sl**  Use the default sl for queries
> > +
> >  **--router**  print data for routers only
> >  
> >  **--clear-errors -k** Clear error counters after read.
> > diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
> > index 6320972..6d17497 100644
> > --- a/src/ibqueryerrors.c
> > +++ b/src/ibqueryerrors.c
> > @@ -55,11 +55,14 @@
> >  #include <infiniband/mad.h>
> >  
> >  #include "ibdiag_common.h"
> > +#include "ibdiag_sa.h"
> >  
> >  struct ibmad_port *ibmad_port;
> >  static char *node_name_map_file = NULL;
> >  static nn_map_t *node_name_map = NULL;
> >  static char *load_cache_file = NULL;
> > +static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
> > +static int obtain_sl = 1;
> >  
> >  int data_counters = 0;
> >  int data_counters_only = 0;
> > @@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
> >  #define PRINT_ROUTER 0x4
> >  #define PRINT_ALL 0xFF		/* all nodes default flag */
> >  
> > +#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
> > +
> >  struct {
> >  	int nodes_checked;
> >  	int bad_nodes;
> > @@ -298,6 +303,51 @@ static int print_summary(void)
> >  	return (summary.bad_ports);
> >  }
> >  
> > +static void insert_lid2sl_table(struct sa_query_result *r)
> > +{
> > +	unsigned int i;
> > +	for (i = 0; i < r->result_cnt; i++) {
> > +		ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
> > +		lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
> > +	}
> > +}
> > +
> > +static int path_record_query(int src_lid,int dest_lid)
> > +{
> > +	ib_path_rec_t pr;
> > +	ib_net64_t comp_mask = 0;
> > +	uint8_t reversible = 0;
> > +	struct sa_handle * h;
> > +
> > +	h = sa_get_handle();
> > +	ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
> > +	memset(&pr, 0, sizeof(pr));
> > +
> > +	CHECK_AND_SET_VAL(src_lid, 16, 0, pr.slid, PR, SLID);
> 
> Doesn't SGID rather than SLID need to be specified to be compliant
> GetTable query ?

Yep, looks like it.

> 
> > +	CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);/*if dlid is 0 then we do half world query*/
> 
> I think this should be:
> 	if (dest_lid)
> 		CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);
> so that comp mask bit is not set for DLID so that it's wildcarded.
> 

Agreed.

> > +	CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
> > +	CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
> > +	pr.num_path |= reversible << 7;
> > +	struct sa_query_result result;
> > +	int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
> > +			   (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
> > +			   &pr, sizeof(pr), &result);
> > +	if (ret) {
> > +		fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
> > +		return ret;
> > +	}
> > +	if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
> > +		sa_report_err(result.status);
> > +		ret = EIO;
> > +		goto Exit;
> > +	}
> > +
> > +	insert_lid2sl_table(&result);
> > +Exit:
> > +	sa_free_result_mad(&result);
> > +	return ret;
> > +}
> > +
> >  static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
> >  			  ibnd_node_t * node, char *node_name, int portnum,
> >  			  const char *attr_name, uint16_t attr_id,
> > @@ -447,6 +497,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
> >  	uint8_t pc[1024] = { 0 };
> >  	uint16_t rc_cap_mask;
> >  
> > +	portid->sl = lid2sl_table[portid->lid];
> > +
> >  	/* PerfMgt ClassPortInfo is a required attribute */
> >  	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
> >  			   ibmad_port)) {
> > @@ -474,6 +526,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
> >  
> >  	memset(pc, 0, 1024);
> >  
> > +	portid->sl = lid2sl_table[portid->lid];
> > +
> >  	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
> >  		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
> >  				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
> > @@ -543,6 +597,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
> >  	memset(pc, 0, 1024);
> >  	memset(pce, 0, 1024);
> >  
> > +	portid->sl = lid2sl_table[portid->lid];
> > +
> >  	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
> >  			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
> >  		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
> > @@ -822,6 +878,9 @@ static int process_opt(void *context, int ch, char *optarg)
> >  	case 9:
> >  		data_counters_only = 1;
> >  		break;
> > +	case 10:
> > +		obtain_sl = 0;
> > +		break;
> >  	case 'G':
> >  	case 'S':
> >  		port_guid_str = optarg;
> > @@ -856,8 +915,11 @@ int main(int argc, char **argv)
> >  	struct ibnd_config config = { 0 };
> >  	int resolved = -1;
> >  	ib_portid_t portid = { 0 };
> > +	ib_portid_t self_portid = { 0 };
> >  	int rc = 0;
> >  	ibnd_fabric_t *fabric = NULL;
> > +	int self_lid = 0;
> > +	int port = 0;
> >  
> >  	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
> >  		IB_PERFORMANCE_CLASS
> > @@ -875,6 +937,7 @@ int main(int argc, char **argv)
> >  		 "Same as \"-G\" for backward compatibility"},
> >  		{"Direct", 'D', 1, "<dr_path>",
> >  		 "report the node containing the port specified by <dr_path>"},
> > +		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
> >  		{"report-port", 'r', 0, NULL,
> >  		 "report port link information"},
> >  		{"threshold-file", 8, 1, NULL,
> > @@ -931,6 +994,12 @@ int main(int argc, char **argv)
> >  		exit(-1);
> >  	}
> >  
> > +	if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, 0) < 0) {
> 
> resolve_self is deprecated; should use resolve_self_via

Actually this is probably a bad thing but the diags have their own internal resolve_self in ibdiag_common now.  This uses umad rather than and SMP PortInfo query to obtain the local port information.

Ira

> 
> -- Hal
> 
> > +		IBERROR("can't resolve self port %s", argv[0]);
> > +		goto close_port;
> > +	}
> > +	self_lid = self_portid.lid;
> > +
> >  	/* limit the scan the fabric around the target */
> >  	if (dr_path) {
> >  		if ((resolved =
> > @@ -947,6 +1016,8 @@ int main(int argc, char **argv)
> >  			IBWARN("Failed to resolve %s;",port_guid_str);
> >  			goto close_port;
> >  		}
> > +		if(obtain_sl)
> > +			lid2sl_table[portid.lid] = portid.sl;
> >  	}
> >  
> >  	if (load_cache_file) {
> > @@ -996,12 +1067,18 @@ int main(int argc, char **argv)
> >  
> >  		port = ibnd_find_port_guid(fabric, port_guid);
> >  		if (port) {
> > +			if(obtain_sl)
> > +				if(path_record_query(self_lid,port->base_lid))
> > +					goto destroy_fabric;
> >  			print_node(port->node, NULL);
> >  		} else
> >  			fprintf(stderr, "Failed to find node: %s\n", dr_path);
> > -	} else
> > +	} else {
> > +		if(obtain_sl)
> > +			if(path_record_query(self_lid,0))
> > +				goto destroy_fabric;
> >  		ibnd_iter_nodes(fabric, print_node, NULL);
> > -
> > +	}
> >  	rc = print_summary();
> >  	if (rc)
> >  		rc = 1;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Ira Weiny
Member of Technical Staff
Lawrence Livermore National Lab
925-423-8008
weiny2-i2BcT+NCU+M@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv4] infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found]                     ` <20130228173336.18bc61527d20d160c9f39818-i2BcT+NCU+M@public.gmane.org>
@ 2013-03-14 14:09                       ` Dan Ben Yosef
       [not found]                         ` <20130314140902.GA10423-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Dan Ben Yosef @ 2013-03-14 14:09 UTC (permalink / raw)
  To: Ira Weiny; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

1) if use -G or -D option : we obtain sl before doing perf query.
2) if no destination is given : we obtain sl for every pair source-destination.
3) if no destination is given and use --skip-sl option : we don't obtain
sl to all nodes in the fabric,in this case sl=0 for all node pairs.

Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
Changes since v3:
	1. Change PathRecord query to use sgid/dgid instead of slid/dlid.

 doc/rst/ibqueryerrors.8.in.rst |    2 +
 src/ibqueryerrors.c            |   90 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 91 insertions(+), 1 deletions(-)

diff --git a/doc/rst/ibqueryerrors.8.in.rst b/doc/rst/ibqueryerrors.8.in.rst
index 9a05e7b..b910368 100644
--- a/doc/rst/ibqueryerrors.8.in.rst
+++ b/doc/rst/ibqueryerrors.8.in.rst
@@ -52,6 +52,8 @@ Specify an alternate threshold file.  The default is @IBDIAG_CONFIG_PATH@/error_
 
 **--ca**  print data for CA's only
 
+**--skip-sl**  Use the default sl for queries
+
 **--router**  print data for routers only
 
 **--clear-errors -k** Clear error counters after read.
diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
index 076327c..15b7702 100644
--- a/src/ibqueryerrors.c
+++ b/src/ibqueryerrors.c
@@ -55,11 +55,14 @@
 #include <infiniband/mad.h>
 
 #include "ibdiag_common.h"
+#include "ibdiag_sa.h"
 
 struct ibmad_port *ibmad_port;
 static char *node_name_map_file = NULL;
 static nn_map_t *node_name_map = NULL;
 static char *load_cache_file = NULL;
+static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
+static int obtain_sl = 1;
 
 int data_counters = 0;
 int data_counters_only = 0;
@@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
 #define PRINT_ROUTER 0x4
 #define PRINT_ALL 0xFF		/* all nodes default flag */
 
+#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
+
 struct {
 	int nodes_checked;
 	int bad_nodes;
@@ -93,6 +98,13 @@ static char *threshold_file = DEF_THRES_FILE;
 uint8_t thresholds[1204] = { 0 };
 char * threshold_str = "";
 
+static unsigned valid_gid(ib_gid_t * gid)
+{
+	ib_gid_t zero_gid;
+	memset(&zero_gid, 0, sizeof zero_gid);
+	return memcmp(&zero_gid, gid, sizeof(*gid));
+}
+
 static void set_thres(char *name, uint32_t val)
 {
 	int f;
@@ -298,6 +310,55 @@ static int print_summary(void)
 	return (summary.bad_ports);
 }
 
+static void insert_lid2sl_table(struct sa_query_result *r)
+{
+    unsigned int i;
+    for (i = 0; i < r->result_cnt; i++) {
+	    ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
+	    lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
+    }
+}
+
+static int path_record_query(ib_gid_t sgid,uint64_t dguid)
+{
+     ib_path_rec_t pr;
+     ib_net64_t comp_mask = 0;
+     uint8_t reversible = 0;
+     struct sa_handle * h;
+
+     h = sa_get_handle();
+     ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
+     memset(&pr, 0, sizeof(pr));
+
+     CHECK_AND_SET_GID(sgid, pr.sgid, PR, SGID);
+     if(dguid) {
+	     mad_encode_field(&sgid, IB_GID_GUID_F, &dguid);
+	     CHECK_AND_SET_GID(sgid, pr.dgid, PR, DGID);
+     }
+
+     CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
+     CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
+     pr.num_path |= reversible << 7;
+     struct sa_query_result result;
+     int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
+                        (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
+                        &pr, sizeof(pr), &result);
+     if (ret) {
+             fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
+             return ret;
+     }
+     if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
+             sa_report_err(result.status);
+             ret = EIO;
+             goto Exit;
+     }
+
+     insert_lid2sl_table(&result);
+Exit:
+     sa_free_result_mad(&result);
+     return ret;
+}
+
 static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
 			  ibnd_node_t * node, char *node_name, int portnum,
 			  const char *attr_name, uint16_t attr_id,
@@ -447,6 +508,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
 	uint8_t pc[1024] = { 0 };
 	uint16_t rc_cap_mask;
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	/* PerfMgt ClassPortInfo is a required attribute */
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
 			   ibmad_port)) {
@@ -474,6 +537,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
 
 	memset(pc, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
 		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
@@ -543,6 +608,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
 	memset(pc, 0, 1024);
 	memset(pce, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
 		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
@@ -822,6 +889,9 @@ static int process_opt(void *context, int ch, char *optarg)
 	case 9:
 		data_counters_only = 1;
 		break;
+	case 10:
+		obtain_sl = 0;
+		break;
 	case 'G':
 	case 'S':
 		port_guid_str = optarg;
@@ -856,8 +926,11 @@ int main(int argc, char **argv)
 	struct ibnd_config config = { 0 };
 	int resolved = -1;
 	ib_portid_t portid = { 0 };
+	ib_portid_t self_portid = { 0 };
 	int rc = 0;
 	ibnd_fabric_t *fabric = NULL;
+	ib_gid_t self_gid;
+	int port = 0;
 
 	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
 		IB_PERFORMANCE_CLASS
@@ -875,6 +948,7 @@ int main(int argc, char **argv)
 		 "Same as \"-G\" for backward compatibility"},
 		{"Direct", 'D', 1, "<dr_path>",
 		 "report the node containing the port specified by <dr_path>"},
+		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
 		{"report-port", 'r', 0, NULL,
 		 "report port link information"},
 		{"threshold-file", 8, 1, NULL,
@@ -931,6 +1005,11 @@ int main(int argc, char **argv)
 		exit(-1);
 	}
 
+	if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, &self_gid) < 0) {
+		IBERROR("can't resolve self port %s", argv[0]);
+		goto close_port;
+	}
+
 	/* limit the scan the fabric around the target */
 	if (dr_path) {
 		if ((resolved =
@@ -945,6 +1024,8 @@ int main(int argc, char **argv)
 					       ibmad_port)) < 0)
 			IBWARN("Failed to resolve %s; attempting full scan",
 			       port_guid_str);
+		if(obtain_sl)
+			lid2sl_table[portid.lid] = portid.sl;
 	}
 
 	if (load_cache_file) {
@@ -994,11 +1075,18 @@ int main(int argc, char **argv)
 
 		port = ibnd_find_port_guid(fabric, port_guid);
 		if (port) {
+			if(obtain_sl)
+				if(path_record_query(self_gid,port->guid))
+					goto destroy_fabric;
 			print_node(port->node, NULL);
 		} else
 			fprintf(stderr, "Failed to find node: %s\n", dr_path);
-	} else
+	} else {
+		if(obtain_sl)
+			if(path_record_query(self_gid,0))
+				goto destroy_fabric;
 		ibnd_iter_nodes(fabric, print_node, NULL);
+	}
 
 	rc = print_summary();
 	if (rc)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv5] infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found]                         ` <20130314140902.GA10423-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
@ 2013-03-14 14:52                           ` Dan Ben Yosef
       [not found]                             ` <20130314145246.GA13095-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Dan Ben Yosef @ 2013-03-14 14:52 UTC (permalink / raw)
  To: Ira Weiny; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

1) if use -G or -D option : we obtain sl before doing perf query.
2) if no destination is given : we obtain sl for every pair source-destination.
3) if no destination is given and use --skip-sl option : we don't obtain
sl to all nodes in the fabric,in this case sl=0 for all node pairs.

Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
Changes since v4:
	1. changed the documentation for --skip-sl option

 doc/rst/ibqueryerrors.8.in.rst |    3 +
 src/ibqueryerrors.c            |   90 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 92 insertions(+), 1 deletions(-)

diff --git a/doc/rst/ibqueryerrors.8.in.rst b/doc/rst/ibqueryerrors.8.in.rst
index 9a05e7b..196ab9d 100644
--- a/doc/rst/ibqueryerrors.8.in.rst
+++ b/doc/rst/ibqueryerrors.8.in.rst
@@ -52,6 +52,9 @@ Specify an alternate threshold file.  The default is @IBDIAG_CONFIG_PATH@/error_
 
 **--ca**  print data for CA's only
 
+**--skip-sl**  Use the default sl for queries. This is not recommended when
+using a QoS aware routing engine as it can cause a credit deadlock.
+
 **--router**  print data for routers only
 
 **--clear-errors -k** Clear error counters after read.
diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
index 076327c..15b7702 100644
--- a/src/ibqueryerrors.c
+++ b/src/ibqueryerrors.c
@@ -55,11 +55,14 @@
 #include <infiniband/mad.h>
 
 #include "ibdiag_common.h"
+#include "ibdiag_sa.h"
 
 struct ibmad_port *ibmad_port;
 static char *node_name_map_file = NULL;
 static nn_map_t *node_name_map = NULL;
 static char *load_cache_file = NULL;
+static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
+static int obtain_sl = 1;
 
 int data_counters = 0;
 int data_counters_only = 0;
@@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
 #define PRINT_ROUTER 0x4
 #define PRINT_ALL 0xFF		/* all nodes default flag */
 
+#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
+
 struct {
 	int nodes_checked;
 	int bad_nodes;
@@ -93,6 +98,13 @@ static char *threshold_file = DEF_THRES_FILE;
 uint8_t thresholds[1204] = { 0 };
 char * threshold_str = "";
 
+static unsigned valid_gid(ib_gid_t * gid)
+{
+	ib_gid_t zero_gid;
+	memset(&zero_gid, 0, sizeof zero_gid);
+	return memcmp(&zero_gid, gid, sizeof(*gid));
+}
+
 static void set_thres(char *name, uint32_t val)
 {
 	int f;
@@ -298,6 +310,55 @@ static int print_summary(void)
 	return (summary.bad_ports);
 }
 
+static void insert_lid2sl_table(struct sa_query_result *r)
+{
+    unsigned int i;
+    for (i = 0; i < r->result_cnt; i++) {
+	    ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
+	    lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
+    }
+}
+
+static int path_record_query(ib_gid_t sgid,uint64_t dguid)
+{
+     ib_path_rec_t pr;
+     ib_net64_t comp_mask = 0;
+     uint8_t reversible = 0;
+     struct sa_handle * h;
+
+     h = sa_get_handle();
+     ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
+     memset(&pr, 0, sizeof(pr));
+
+     CHECK_AND_SET_GID(sgid, pr.sgid, PR, SGID);
+     if(dguid) {
+	     mad_encode_field(&sgid, IB_GID_GUID_F, &dguid);
+	     CHECK_AND_SET_GID(sgid, pr.dgid, PR, DGID);
+     }
+
+     CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
+     CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
+     pr.num_path |= reversible << 7;
+     struct sa_query_result result;
+     int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
+                        (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
+                        &pr, sizeof(pr), &result);
+     if (ret) {
+             fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
+             return ret;
+     }
+     if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
+             sa_report_err(result.status);
+             ret = EIO;
+             goto Exit;
+     }
+
+     insert_lid2sl_table(&result);
+Exit:
+     sa_free_result_mad(&result);
+     return ret;
+}
+
 static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
 			  ibnd_node_t * node, char *node_name, int portnum,
 			  const char *attr_name, uint16_t attr_id,
@@ -447,6 +508,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
 	uint8_t pc[1024] = { 0 };
 	uint16_t rc_cap_mask;
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	/* PerfMgt ClassPortInfo is a required attribute */
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
 			   ibmad_port)) {
@@ -474,6 +537,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
 
 	memset(pc, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
 		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
@@ -543,6 +608,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
 	memset(pc, 0, 1024);
 	memset(pce, 0, 1024);
 
+	portid->sl = lid2sl_table[portid->lid];
+
 	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
 			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
 		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
@@ -822,6 +889,9 @@ static int process_opt(void *context, int ch, char *optarg)
 	case 9:
 		data_counters_only = 1;
 		break;
+	case 10:
+		obtain_sl = 0;
+		break;
 	case 'G':
 	case 'S':
 		port_guid_str = optarg;
@@ -856,8 +926,11 @@ int main(int argc, char **argv)
 	struct ibnd_config config = { 0 };
 	int resolved = -1;
 	ib_portid_t portid = { 0 };
+	ib_portid_t self_portid = { 0 };
 	int rc = 0;
 	ibnd_fabric_t *fabric = NULL;
+	ib_gid_t self_gid;
+	int port = 0;
 
 	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
 		IB_PERFORMANCE_CLASS
@@ -875,6 +948,7 @@ int main(int argc, char **argv)
 		 "Same as \"-G\" for backward compatibility"},
 		{"Direct", 'D', 1, "<dr_path>",
 		 "report the node containing the port specified by <dr_path>"},
+		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
 		{"report-port", 'r', 0, NULL,
 		 "report port link information"},
 		{"threshold-file", 8, 1, NULL,
@@ -931,6 +1005,11 @@ int main(int argc, char **argv)
 		exit(-1);
 	}
 
+	if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, &self_gid) < 0) {
+		IBERROR("can't resolve self port %s", argv[0]);
+		goto close_port;
+	}
+
 	/* limit the scan the fabric around the target */
 	if (dr_path) {
 		if ((resolved =
@@ -945,6 +1024,8 @@ int main(int argc, char **argv)
 					       ibmad_port)) < 0)
 			IBWARN("Failed to resolve %s; attempting full scan",
 			       port_guid_str);
+		if(obtain_sl)
+			lid2sl_table[portid.lid] = portid.sl;
 	}
 
 	if (load_cache_file) {
@@ -994,11 +1075,18 @@ int main(int argc, char **argv)
 
 		port = ibnd_find_port_guid(fabric, port_guid);
 		if (port) {
+			if(obtain_sl)
+				if(path_record_query(self_gid,port->guid))
+					goto destroy_fabric;
 			print_node(port->node, NULL);
 		} else
 			fprintf(stderr, "Failed to find node: %s\n", dr_path);
-	} else
+	} else {
+		if(obtain_sl)
+			if(path_record_query(self_gid,0))
+				goto destroy_fabric;
 		ibnd_iter_nodes(fabric, print_node, NULL);
+	}
 
 	rc = print_summary();
 	if (rc)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv5] infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query
       [not found]                             ` <20130314145246.GA13095-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
@ 2013-03-18 20:34                               ` Ira Weiny
  0 siblings, 0 replies; 10+ messages in thread
From: Ira Weiny @ 2013-03-18 20:34 UTC (permalink / raw)
  To: Dan Ben Yosef; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Mar 14, 2013 at 7:52 AM, Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org> wrote:
> 1) if use -G or -D option : we obtain sl before doing perf query.
> 2) if no destination is given : we obtain sl for every pair source-destination.
> 3) if no destination is given and use --skip-sl option : we don't obtain
> sl to all nodes in the fabric,in this case sl=0 for all node pairs.
>
> Signed-off-by: Dan Ben Yosef <danby-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Thanks applied,
Ira

> ---
> Changes since v4:
>         1. changed the documentation for --skip-sl option
>
>  doc/rst/ibqueryerrors.8.in.rst |    3 +
>  src/ibqueryerrors.c            |   90 +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 92 insertions(+), 1 deletions(-)
>
> diff --git a/doc/rst/ibqueryerrors.8.in.rst b/doc/rst/ibqueryerrors.8.in.rst
> index 9a05e7b..196ab9d 100644
> --- a/doc/rst/ibqueryerrors.8.in.rst
> +++ b/doc/rst/ibqueryerrors.8.in.rst
> @@ -52,6 +52,9 @@ Specify an alternate threshold file.  The default is @IBDIAG_CONFIG_PATH@/error_
>
>  **--ca**  print data for CA's only
>
> +**--skip-sl**  Use the default sl for queries. This is not recommended when
> +using a QoS aware routing engine as it can cause a credit deadlock.
> +
>  **--router**  print data for routers only
>
>  **--clear-errors -k** Clear error counters after read.
> diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
> index 076327c..15b7702 100644
> --- a/src/ibqueryerrors.c
> +++ b/src/ibqueryerrors.c
> @@ -55,11 +55,14 @@
>  #include <infiniband/mad.h>
>
>  #include "ibdiag_common.h"
> +#include "ibdiag_sa.h"
>
>  struct ibmad_port *ibmad_port;
>  static char *node_name_map_file = NULL;
>  static nn_map_t *node_name_map = NULL;
>  static char *load_cache_file = NULL;
> +static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
> +static int obtain_sl = 1;
>
>  int data_counters = 0;
>  int data_counters_only = 0;
> @@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
>  #define PRINT_ROUTER 0x4
>  #define PRINT_ALL 0xFF         /* all nodes default flag */
>
> +#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
> +
>  struct {
>         int nodes_checked;
>         int bad_nodes;
> @@ -93,6 +98,13 @@ static char *threshold_file = DEF_THRES_FILE;
>  uint8_t thresholds[1204] = { 0 };
>  char * threshold_str = "";
>
> +static unsigned valid_gid(ib_gid_t * gid)
> +{
> +       ib_gid_t zero_gid;
> +       memset(&zero_gid, 0, sizeof zero_gid);
> +       return memcmp(&zero_gid, gid, sizeof(*gid));
> +}
> +
>  static void set_thres(char *name, uint32_t val)
>  {
>         int f;
> @@ -298,6 +310,55 @@ static int print_summary(void)
>         return (summary.bad_ports);
>  }
>
> +static void insert_lid2sl_table(struct sa_query_result *r)
> +{
> +    unsigned int i;
> +    for (i = 0; i < r->result_cnt; i++) {
> +           ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
> +           lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
> +    }
> +}
> +
> +static int path_record_query(ib_gid_t sgid,uint64_t dguid)
> +{
> +     ib_path_rec_t pr;
> +     ib_net64_t comp_mask = 0;
> +     uint8_t reversible = 0;
> +     struct sa_handle * h;
> +
> +     h = sa_get_handle();
> +     ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
> +     memset(&pr, 0, sizeof(pr));
> +
> +     CHECK_AND_SET_GID(sgid, pr.sgid, PR, SGID);
> +     if(dguid) {
> +            mad_encode_field(&sgid, IB_GID_GUID_F, &dguid);
> +            CHECK_AND_SET_GID(sgid, pr.dgid, PR, DGID);
> +     }
> +
> +     CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
> +     CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
> +     pr.num_path |= reversible << 7;
> +     struct sa_query_result result;
> +     int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
> +                        (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
> +                        &pr, sizeof(pr), &result);
> +     if (ret) {
> +             fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
> +             return ret;
> +     }
> +     if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
> +             sa_report_err(result.status);
> +             ret = EIO;
> +             goto Exit;
> +     }
> +
> +     insert_lid2sl_table(&result);
> +Exit:
> +     sa_free_result_mad(&result);
> +     return ret;
> +}
> +
>  static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
>                           ibnd_node_t * node, char *node_name, int portnum,
>                           const char *attr_name, uint16_t attr_id,
> @@ -447,6 +508,8 @@ static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
>         uint8_t pc[1024] = { 0 };
>         uint16_t rc_cap_mask;
>
> +       portid->sl = lid2sl_table[portid->lid];
> +
>         /* PerfMgt ClassPortInfo is a required attribute */
>         if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
>                            ibmad_port)) {
> @@ -474,6 +537,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
>
>         memset(pc, 0, 1024);
>
> +       portid->sl = lid2sl_table[portid->lid];
> +
>         if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
>                 if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>                                    IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
> @@ -543,6 +608,8 @@ static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
>         memset(pc, 0, 1024);
>         memset(pce, 0, 1024);
>
> +       portid->sl = lid2sl_table[portid->lid];
> +
>         if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>                            IB_GSI_PORT_COUNTERS, ibmad_port)) {
>                 IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
> @@ -822,6 +889,9 @@ static int process_opt(void *context, int ch, char *optarg)
>         case 9:
>                 data_counters_only = 1;
>                 break;
> +       case 10:
> +               obtain_sl = 0;
> +               break;
>         case 'G':
>         case 'S':
>                 port_guid_str = optarg;
> @@ -856,8 +926,11 @@ int main(int argc, char **argv)
>         struct ibnd_config config = { 0 };
>         int resolved = -1;
>         ib_portid_t portid = { 0 };
> +       ib_portid_t self_portid = { 0 };
>         int rc = 0;
>         ibnd_fabric_t *fabric = NULL;
> +       ib_gid_t self_gid;
> +       int port = 0;
>
>         int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
>                 IB_PERFORMANCE_CLASS
> @@ -875,6 +948,7 @@ int main(int argc, char **argv)
>                  "Same as \"-G\" for backward compatibility"},
>                 {"Direct", 'D', 1, "<dr_path>",
>                  "report the node containing the port specified by <dr_path>"},
> +               {"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
>                 {"report-port", 'r', 0, NULL,
>                  "report port link information"},
>                 {"threshold-file", 8, 1, NULL,
> @@ -931,6 +1005,11 @@ int main(int argc, char **argv)
>                 exit(-1);
>         }
>
> +       if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, &self_gid) < 0) {
> +               IBERROR("can't resolve self port %s", argv[0]);
> +               goto close_port;
> +       }
> +
>         /* limit the scan the fabric around the target */
>         if (dr_path) {
>                 if ((resolved =
> @@ -945,6 +1024,8 @@ int main(int argc, char **argv)
>                                                ibmad_port)) < 0)
>                         IBWARN("Failed to resolve %s; attempting full scan",
>                                port_guid_str);
> +               if(obtain_sl)
> +                       lid2sl_table[portid.lid] = portid.sl;
>         }
>
>         if (load_cache_file) {
> @@ -994,11 +1075,18 @@ int main(int argc, char **argv)
>
>                 port = ibnd_find_port_guid(fabric, port_guid);
>                 if (port) {
> +                       if(obtain_sl)
> +                               if(path_record_query(self_gid,port->guid))
> +                                       goto destroy_fabric;
>                         print_node(port->node, NULL);
>                 } else
>                         fprintf(stderr, "Failed to find node: %s\n", dr_path);
> -       } else
> +       } else {
> +               if(obtain_sl)
> +                       if(path_record_query(self_gid,0))
> +                               goto destroy_fabric;
>                 ibnd_iter_nodes(fabric, print_node, NULL);
> +       }
>
>         rc = print_summary();
>         if (rc)
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-03-18 20:34 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-02-20 11:58 [PATCH]: infiniband-diags/ibqueryerrors.c: obtain destination sl before perf query Dan Ben Yosef
     [not found] ` <20130220115807.GC20018-Kyg1NGsloJ/nXW+7WiO5SxL4W9x8LtSr@public.gmane.org>
2013-02-20 18:46   ` Ira Weiny
2013-02-24  8:21   ` [PATCHv2] " Dan Ben Yosef
     [not found]     ` <20130224082140.GA11648-Kyg1NGsloJ/nXW+7WiO5SxL4W9x8LtSr@public.gmane.org>
2013-02-27  1:07       ` Ira Weiny
     [not found]         ` <20130226170744.53adb74b6af9a54d9481dd4c-i2BcT+NCU+M@public.gmane.org>
2013-02-27 14:10           ` [PATCHv3] " Dan Ben Yosef
     [not found]             ` <20130227141032.GA9919-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
2013-02-28 16:11               ` Hal Rosenstock
     [not found]                 ` <512F81A3.6000100-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2013-03-01  1:33                   ` Ira Weiny
     [not found]                     ` <20130228173336.18bc61527d20d160c9f39818-i2BcT+NCU+M@public.gmane.org>
2013-03-14 14:09                       ` [PATCHv4] " Dan Ben Yosef
     [not found]                         ` <20130314140902.GA10423-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
2013-03-14 14:52                           ` [PATCHv5] " Dan Ben Yosef
     [not found]                             ` <20130314145246.GA13095-Kyg1NGsloJ8bMuq+TCmafCexzy7lsfyC@public.gmane.org>
2013-03-18 20:34                               ` Ira Weiny

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox