* [PATCH] opensm: Add configurable retries for transactions
@ 2009-10-24 14:05 Hal Rosenstock
[not found] ` <20091024140538.GA5213-Wuw85uim5zDR7s880joybQ@public.gmane.org>
0 siblings, 1 reply; 7+ messages in thread
From: Hal Rosenstock @ 2009-10-24 14:05 UTC (permalink / raw)
To: sashak-smomgflXvOZWk0Htik3J/w; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
As part of this change, transaction timeouts (and retries) are per
umad agent (supplied at osm_vendor_bind).
Old ABI is supported without these bind parameters.
Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
diff --git a/opensm/include/opensm/osm_madw.h b/opensm/include/opensm/osm_madw.h
index afa7047..10a065b 100644
--- a/opensm/include/opensm/osm_madw.h
+++ b/opensm/include/opensm/osm_madw.h
@@ -2,6 +2,7 @@
* Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
* Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -76,6 +77,8 @@ typedef struct osm_bind_info {
boolean_t is_report_processor;
uint32_t send_q_size;
uint32_t recv_q_size;
+ uint32_t timeout;
+ uint32_t retries;
} osm_bind_info_t;
/*
* FIELDS
@@ -103,6 +106,12 @@ typedef struct osm_bind_info {
* recv_q_size
* Receive Queue Size
*
+* timeout
+* Transaction timeout
+*
+* retries
+* Number of retries for transaction
+*
* SEE ALSO
*********/
diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
index b63c97e..77347c8 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -4,6 +4,7 @@
* Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
* Copyright (c) 2008 Xsigo Systems Inc. All rights reserved.
* Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -149,6 +150,7 @@ typedef struct osm_subn_opt {
uint32_t sweep_interval;
uint32_t max_wire_smps;
uint32_t transaction_timeout;
+ uint32_t transaction_retries;
uint8_t sm_priority;
uint8_t lmc;
boolean_t lmc_esp0;
@@ -261,6 +263,9 @@ typedef struct osm_subn_opt {
* The maximum time in milliseconds allowed for a transaction
* to complete. Default is 200.
*
+* transaction_retries
+* The number of retries for a transaction. Default is 3.
+*
* sm_priority
* The priority of this SM as specified by the user. This
* value is made available in the SMInfo attribute.
diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c
index 8d3c680..c98141e 100644
--- a/opensm/libvendor/osm_vendor_ibumad.c
+++ b/opensm/libvendor/osm_vendor_ibumad.c
@@ -85,6 +85,8 @@ typedef struct _osm_umad_bind_info {
int port_id;
int agent_id;
int agent_id1; /* SMI requires two agents */
+ uint32_t timeout;
+ uint32_t max_retries;
} osm_umad_bind_info_t;
typedef struct _umad_receiver {
@@ -476,7 +478,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend,
p_vend->mtbl.max = tmp;
else
OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "Error:"
- "OSM_UMAD_MAX_PENDING=%d is invalid",
+ "OSM_UMAD_MAX_PENDING=%d is invalid\n",
tmp);
}
@@ -819,6 +821,13 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend,
p_bind->send_err_callback = send_err_callback;
p_bind->p_mad_pool = p_mad_pool;
p_bind->port_guid = port_guid;
+ if (p_vend->timeout == -1) {
+ p_bind->timeout = p_user_bind->timeout;
+ p_bind->max_retries = p_user_bind->retries;
+ } else {
+ p_bind->timeout = p_vend->timeout;
+ p_bind->max_retries = p_vend->max_retries;
+ }
memset(method_mask, 0, sizeof method_mask);
if (p_user_bind->is_responder) {
@@ -1086,8 +1095,8 @@ Resp:
#endif
if ((ret = umad_send(p_bind->port_id, p_bind->agent_id, p_vw->umad,
sent_mad_size,
- resp_expected ? p_vend->timeout : 0,
- p_vend->max_retries)) < 0) {
+ resp_expected ? p_bind->timeout : 0,
+ p_bind->max_retries)) < 0) {
OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5430: "
"Send p_madw = %p of size %d failed %d (%m)\n",
p_madw, sent_mad_size, ret);
diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in
index 03002c0..bd8ab4e 100644
--- a/opensm/man/opensm.8.in
+++ b/opensm/man/opensm.8.in
@@ -1,4 +1,4 @@
-.TH OPENSM 8 "September 14, 2009" "OpenIB" "OpenIB Management"
+.TH OPENSM 8 "October 22, 2009" "OpenIB" "OpenIB Management"
.SH NAME
opensm \- InfiniBand subnet manager and administration (SM/SA)
@@ -31,6 +31,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
[\-o(nce)]
[\-s(weep) <interval>]
[\-t(imeout) <milliseconds>]
+[\-\-retries <number>]
[\-maxsmps <number>]
[\-console [off | local | socket | loopback]]
[\-console-port <port>]
@@ -233,6 +234,12 @@ Specifying -t 0 disables timeouts.
Without -t, OpenSM defaults to a timeout value of
200 milliseconds.
.TP
+\fB\-\-retries\fR <number>
+This option specifies the number of retries used
+for transactions.
+Without --retries, OpenSM defaults to 3 retries
+for transactions.
+.TP
\fB\-maxsmps\fR <number>
This option specifies the number of VL15 SMP MADs
allowed on the wire at any one time.
diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
index 2e28c83..b697994 100644
--- a/opensm/opensm/main.c
+++ b/opensm/opensm/main.c
@@ -248,6 +248,11 @@ static void show_usage(void)
" Specifying -t 0 disables timeouts.\n"
" Without -t, OpenSM defaults to a timeout value of\n"
" 200 milliseconds.\n\n");
+ printf("--retries <number>\n"
+ " This option specifies the number of retries used\n"
+ " for transactions.\n"
+ " Without --retries, OpenSM defaults to %u retries\n"
+ " for transactions.\n\n", OSM_DEFAULT_RETRY_COUNT);
printf("--maxsmps, -n <number>\n"
" This option specifies the number of VL15 SMP MADs\n"
" allowed on the wire at any one time.\n"
@@ -610,6 +615,7 @@ int main(int argc, char *argv[])
{"do_mesh_analysis", 0, NULL, 5},
{"lash_start_vl", 1, NULL, 6},
{"sm_sl", 1, NULL, 7},
+ {"retries", 1, NULL, 8},
{NULL, 0, NULL, 0} /* Required at the end of the array */
};
@@ -983,6 +989,11 @@ int main(int argc, char *argv[])
opt.sm_sl = (uint8_t) temp;
printf(" SMSL = %d\n", opt.sm_sl);
break;
+ case 8:
+ opt.transaction_retries = strtol(optarg, NULL, 0);
+ printf(" Transaction retries = %u\n",
+ opt.transaction_retries);
+ break;
case 'h':
case '?':
case ':':
diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
index 1f2b067..52668ca 100644
--- a/opensm/opensm/osm_opensm.c
+++ b/opensm/opensm/osm_opensm.c
@@ -2,6 +2,7 @@
* Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
* Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -392,8 +393,7 @@ ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm,
if (status != IB_SUCCESS)
goto Exit;
- p_osm->p_vendor =
- osm_vendor_new(&p_osm->log, p_opt->transaction_timeout);
+ p_osm->p_vendor = osm_vendor_new(&p_osm->log, -1);
if (p_osm->p_vendor == NULL) {
status = IB_INSUFFICIENT_RESOURCES;
goto Exit;
diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c
index f95610e..f0ec92d 100644
--- a/opensm/opensm/osm_perfmgr.c
+++ b/opensm/opensm/osm_perfmgr.c
@@ -264,6 +264,8 @@ ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid)
bind_info.is_trap_processor = FALSE;
bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE;
bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE;
+ bind_info.timeout = pm->subn->opt.transaction_timeout;
+ bind_info.retries = pm->subn->opt.transaction_retries;
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
"Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
diff --git a/opensm/opensm/osm_sa_mad_ctrl.c b/opensm/opensm/osm_sa_mad_ctrl.c
index f22411b..04b6693 100644
--- a/opensm/opensm/osm_sa_mad_ctrl.c
+++ b/opensm/opensm/osm_sa_mad_ctrl.c
@@ -2,6 +2,7 @@
* Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
* Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -528,6 +529,8 @@ ib_api_status_t osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * p_ctrl,
bind_info.port_guid = port_guid;
bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE;
bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE;
+ bind_info.timeout = p_ctrl->sa->p_subn->opt.transaction_timeout;
+ bind_info.retries = p_ctrl->sa->p_subn->opt.transaction_retries;
OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE,
"Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
diff --git a/opensm/opensm/osm_sm_mad_ctrl.c b/opensm/opensm/osm_sm_mad_ctrl.c
index f96aff4..c51c158 100644
--- a/opensm/opensm/osm_sm_mad_ctrl.c
+++ b/opensm/opensm/osm_sm_mad_ctrl.c
@@ -885,6 +885,8 @@ ib_api_status_t osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * p_ctrl,
bind_info.port_guid = port_guid;
bind_info.recv_q_size = OSM_SM_DEFAULT_QP0_RCV_SIZE;
bind_info.send_q_size = OSM_SM_DEFAULT_QP0_SEND_SIZE;
+ bind_info.timeout = p_ctrl->p_subn->opt.transaction_timeout;
+ bind_info.retries = p_ctrl->p_subn->opt.transaction_retries;
OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE,
"Binding to port 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index 8976b5f..a9a7981 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -4,6 +4,7 @@
* Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
* Copyright (c) 2008 Xsigo Systems Inc. All rights reserved.
* Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -299,6 +300,7 @@ static const opt_rec_t opt_tbl[] = {
{ "console", OPT_OFFSET(console), opts_parse_charp, NULL, 0 },
{ "console_port", OPT_OFFSET(console_port), opts_parse_uint16, NULL, 0 },
{ "transaction_timeout", OPT_OFFSET(transaction_timeout), opts_parse_uint32, NULL, 1 },
+ { "transaction_retries", OPT_OFFSET(transaction_retries), opts_parse_uint32, NULL, 1 },
{ "max_msg_fifo_timeout", OPT_OFFSET(max_msg_fifo_timeout), opts_parse_uint32, NULL, 1 },
{ "sm_priority", OPT_OFFSET(sm_priority), opts_parse_uint8, opts_setup_sm_priority, 1 },
{ "lmc", OPT_OFFSET(lmc), opts_parse_uint8, NULL, 1 },
@@ -694,6 +696,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt)
p_opt->console = strdup(OSM_DEFAULT_CONSOLE);
p_opt->console_port = OSM_DEFAULT_CONSOLE_PORT;
p_opt->transaction_timeout = OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC;
+ p_opt->transaction_retries = OSM_DEFAULT_RETRY_COUNT;
/* by default we will consider waiting for 50x transaction timeout normal */
p_opt->max_msg_fifo_timeout = 50 * OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC;
p_opt->sm_priority = OSM_DEFAULT_SM_PRIORITY;
@@ -1500,6 +1503,8 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts)
"max_wire_smps %u\n\n"
"# The maximum time in [msec] allowed for a transaction to complete\n"
"transaction_timeout %u\n\n"
+ " The maximum number of retries allowed for a transaction to complete\n"
+ "transaction_retries %u\n\n"
"# Maximal time in [msec] a message can stay in the incoming message queue.\n"
"# If there is more than one message in the queue and the last message\n"
"# stayed in the queue more than this value, any SA request will be\n"
@@ -1509,6 +1514,7 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts)
"single_thread %s\n\n",
p_opts->max_wire_smps,
p_opts->transaction_timeout,
+ p_opts->transaction_retries,
p_opts->max_msg_fifo_timeout,
p_opts->single_thread ? "TRUE" : "FALSE");
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 7+ messages in thread[parent not found: <20091024140538.GA5213-Wuw85uim5zDR7s880joybQ@public.gmane.org>]
* Re: [PATCH] opensm: Add configurable retries for transactions [not found] ` <20091024140538.GA5213-Wuw85uim5zDR7s880joybQ@public.gmane.org> @ 2009-10-30 2:55 ` Sasha Khapyorsky 2009-10-30 15:48 ` Hal Rosenstock 0 siblings, 1 reply; 7+ messages in thread From: Sasha Khapyorsky @ 2009-10-30 2:55 UTC (permalink / raw) To: Hal Rosenstock; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA On 10:05 Sat 24 Oct , Hal Rosenstock wrote: > > As part of this change, transaction timeouts (and retries) are per > umad agent (supplied at osm_vendor_bind). > > Old ABI is supported without these bind parameters. > > Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> > --- > diff --git a/opensm/include/opensm/osm_madw.h b/opensm/include/opensm/osm_madw.h > index afa7047..10a065b 100644 > --- a/opensm/include/opensm/osm_madw.h > +++ b/opensm/include/opensm/osm_madw.h > @@ -2,6 +2,7 @@ > * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. > * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. > * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. > + * Copyright (c) 2009 HNR Consulting. All rights reserved. > * > * This software is available to you under a choice of one of two > * licenses. You may choose to be licensed under the terms of the GNU > @@ -76,6 +77,8 @@ typedef struct osm_bind_info { > boolean_t is_report_processor; > uint32_t send_q_size; > uint32_t recv_q_size; > + uint32_t timeout; > + uint32_t retries; > } osm_bind_info_t; > /* > * FIELDS > @@ -103,6 +106,12 @@ typedef struct osm_bind_info { > * recv_q_size > * Receive Queue Size > * > +* timeout > +* Transaction timeout > +* > +* retries > +* Number of retries for transaction > +* > * SEE ALSO > *********/ > > diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h > index b63c97e..77347c8 100644 > --- a/opensm/include/opensm/osm_subnet.h > +++ b/opensm/include/opensm/osm_subnet.h > @@ -4,6 +4,7 @@ > * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. > * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. > * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. > + * Copyright (c) 2009 HNR Consulting. All rights reserved. > * > * This software is available to you under a choice of one of two > * licenses. You may choose to be licensed under the terms of the GNU > @@ -149,6 +150,7 @@ typedef struct osm_subn_opt { > uint32_t sweep_interval; > uint32_t max_wire_smps; > uint32_t transaction_timeout; > + uint32_t transaction_retries; > uint8_t sm_priority; > uint8_t lmc; > boolean_t lmc_esp0; > @@ -261,6 +263,9 @@ typedef struct osm_subn_opt { > * The maximum time in milliseconds allowed for a transaction > * to complete. Default is 200. > * > +* transaction_retries > +* The number of retries for a transaction. Default is 3. > +* > * sm_priority > * The priority of this SM as specified by the user. This > * value is made available in the SMInfo attribute. > diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c > index 8d3c680..c98141e 100644 > --- a/opensm/libvendor/osm_vendor_ibumad.c > +++ b/opensm/libvendor/osm_vendor_ibumad.c > @@ -85,6 +85,8 @@ typedef struct _osm_umad_bind_info { > int port_id; > int agent_id; > int agent_id1; /* SMI requires two agents */ > + uint32_t timeout; > + uint32_t max_retries; You don't need fixed size types for those values, do you? And umad_send() and umad_recv() accept timeout and retries parameters as 'int' - would be better to be consistent with types. > } osm_umad_bind_info_t; > > typedef struct _umad_receiver { > @@ -476,7 +478,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, > p_vend->mtbl.max = tmp; > else > OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "Error:" > - "OSM_UMAD_MAX_PENDING=%d is invalid", > + "OSM_UMAD_MAX_PENDING=%d is invalid\n", > tmp); > } > > @@ -819,6 +821,13 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, > p_bind->send_err_callback = send_err_callback; > p_bind->p_mad_pool = p_mad_pool; > p_bind->port_guid = port_guid; > + if (p_vend->timeout == -1) { > + p_bind->timeout = p_user_bind->timeout; > + p_bind->max_retries = p_user_bind->retries; > + } else { > + p_bind->timeout = p_vend->timeout; > + p_bind->max_retries = p_vend->max_retries; > + } Hmm, shouldn't we respect user requested data? Something like: p_bind->timeout = p_user_bind->timeout ? p_user_bind->timeout : p_vend->timeout; p_bind->retries = p_user_bind->retries ? p_user_bind->retries : p_vend->retries; ? > > memset(method_mask, 0, sizeof method_mask); > if (p_user_bind->is_responder) { > @@ -1086,8 +1095,8 @@ Resp: > #endif > if ((ret = umad_send(p_bind->port_id, p_bind->agent_id, p_vw->umad, > sent_mad_size, > - resp_expected ? p_vend->timeout : 0, > - p_vend->max_retries)) < 0) { > + resp_expected ? p_bind->timeout : 0, > + p_bind->max_retries)) < 0) { > OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5430: " > "Send p_madw = %p of size %d failed %d (%m)\n", > p_madw, sent_mad_size, ret); > diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in > index 03002c0..bd8ab4e 100644 > --- a/opensm/man/opensm.8.in > +++ b/opensm/man/opensm.8.in > @@ -1,4 +1,4 @@ > -.TH OPENSM 8 "September 14, 2009" "OpenIB" "OpenIB Management" > +.TH OPENSM 8 "October 22, 2009" "OpenIB" "OpenIB Management" > > .SH NAME > opensm \- InfiniBand subnet manager and administration (SM/SA) > @@ -31,6 +31,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA) > [\-o(nce)] > [\-s(weep) <interval>] > [\-t(imeout) <milliseconds>] > +[\-\-retries <number>] > [\-maxsmps <number>] > [\-console [off | local | socket | loopback]] > [\-console-port <port>] > @@ -233,6 +234,12 @@ Specifying -t 0 disables timeouts. > Without -t, OpenSM defaults to a timeout value of > 200 milliseconds. > .TP > +\fB\-\-retries\fR <number> > +This option specifies the number of retries used > +for transactions. > +Without --retries, OpenSM defaults to 3 retries > +for transactions. > +.TP > \fB\-maxsmps\fR <number> > This option specifies the number of VL15 SMP MADs > allowed on the wire at any one time. > diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c > index 2e28c83..b697994 100644 > --- a/opensm/opensm/main.c > +++ b/opensm/opensm/main.c > @@ -248,6 +248,11 @@ static void show_usage(void) > " Specifying -t 0 disables timeouts.\n" > " Without -t, OpenSM defaults to a timeout value of\n" > " 200 milliseconds.\n\n"); > + printf("--retries <number>\n" > + " This option specifies the number of retries used\n" > + " for transactions.\n" > + " Without --retries, OpenSM defaults to %u retries\n" > + " for transactions.\n\n", OSM_DEFAULT_RETRY_COUNT); > printf("--maxsmps, -n <number>\n" > " This option specifies the number of VL15 SMP MADs\n" > " allowed on the wire at any one time.\n" > @@ -610,6 +615,7 @@ int main(int argc, char *argv[]) > {"do_mesh_analysis", 0, NULL, 5}, > {"lash_start_vl", 1, NULL, 6}, > {"sm_sl", 1, NULL, 7}, > + {"retries", 1, NULL, 8}, > {NULL, 0, NULL, 0} /* Required at the end of the array */ > }; > > @@ -983,6 +989,11 @@ int main(int argc, char *argv[]) > opt.sm_sl = (uint8_t) temp; > printf(" SMSL = %d\n", opt.sm_sl); > break; > + case 8: > + opt.transaction_retries = strtol(optarg, NULL, 0); Please use strtoul(). > + printf(" Transaction retries = %u\n", > + opt.transaction_retries); > + break; > case 'h': > case '?': > case ':': > diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c > index 1f2b067..52668ca 100644 > --- a/opensm/opensm/osm_opensm.c > +++ b/opensm/opensm/osm_opensm.c > @@ -2,6 +2,7 @@ > * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. > * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. > * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. > + * Copyright (c) 2009 HNR Consulting. All rights reserved. > * > * This software is available to you under a choice of one of two > * licenses. You may choose to be licensed under the terms of the GNU > @@ -392,8 +393,7 @@ ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm, > if (status != IB_SUCCESS) > goto Exit; > > - p_osm->p_vendor = > - osm_vendor_new(&p_osm->log, p_opt->transaction_timeout); > + p_osm->p_vendor = osm_vendor_new(&p_osm->log, -1); Why to not make p_opt->transaction_timeout as default for newly allocated vendor object? Sasha > if (p_osm->p_vendor == NULL) { > status = IB_INSUFFICIENT_RESOURCES; > goto Exit; > diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c > index f95610e..f0ec92d 100644 > --- a/opensm/opensm/osm_perfmgr.c > +++ b/opensm/opensm/osm_perfmgr.c > @@ -264,6 +264,8 @@ ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid) > bind_info.is_trap_processor = FALSE; > bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE; > bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE; > + bind_info.timeout = pm->subn->opt.transaction_timeout; > + bind_info.retries = pm->subn->opt.transaction_retries; > > OSM_LOG(pm->log, OSM_LOG_VERBOSE, > "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); > diff --git a/opensm/opensm/osm_sa_mad_ctrl.c b/opensm/opensm/osm_sa_mad_ctrl.c > index f22411b..04b6693 100644 > --- a/opensm/opensm/osm_sa_mad_ctrl.c > +++ b/opensm/opensm/osm_sa_mad_ctrl.c > @@ -2,6 +2,7 @@ > * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. > * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. > * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. > + * Copyright (c) 2009 HNR Consulting. All rights reserved. > * > * This software is available to you under a choice of one of two > * licenses. You may choose to be licensed under the terms of the GNU > @@ -528,6 +529,8 @@ ib_api_status_t osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * p_ctrl, > bind_info.port_guid = port_guid; > bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE; > bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE; > + bind_info.timeout = p_ctrl->sa->p_subn->opt.transaction_timeout; > + bind_info.retries = p_ctrl->sa->p_subn->opt.transaction_retries; > > OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, > "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); > diff --git a/opensm/opensm/osm_sm_mad_ctrl.c b/opensm/opensm/osm_sm_mad_ctrl.c > index f96aff4..c51c158 100644 > --- a/opensm/opensm/osm_sm_mad_ctrl.c > +++ b/opensm/opensm/osm_sm_mad_ctrl.c > @@ -885,6 +885,8 @@ ib_api_status_t osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * p_ctrl, > bind_info.port_guid = port_guid; > bind_info.recv_q_size = OSM_SM_DEFAULT_QP0_RCV_SIZE; > bind_info.send_q_size = OSM_SM_DEFAULT_QP0_SEND_SIZE; > + bind_info.timeout = p_ctrl->p_subn->opt.transaction_timeout; > + bind_info.retries = p_ctrl->p_subn->opt.transaction_retries; > > OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, > "Binding to port 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); > diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c > index 8976b5f..a9a7981 100644 > --- a/opensm/opensm/osm_subnet.c > +++ b/opensm/opensm/osm_subnet.c > @@ -4,6 +4,7 @@ > * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. > * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. > * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. > + * Copyright (c) 2009 HNR Consulting. All rights reserved. > * > * This software is available to you under a choice of one of two > * licenses. You may choose to be licensed under the terms of the GNU > @@ -299,6 +300,7 @@ static const opt_rec_t opt_tbl[] = { > { "console", OPT_OFFSET(console), opts_parse_charp, NULL, 0 }, > { "console_port", OPT_OFFSET(console_port), opts_parse_uint16, NULL, 0 }, > { "transaction_timeout", OPT_OFFSET(transaction_timeout), opts_parse_uint32, NULL, 1 }, > + { "transaction_retries", OPT_OFFSET(transaction_retries), opts_parse_uint32, NULL, 1 }, > { "max_msg_fifo_timeout", OPT_OFFSET(max_msg_fifo_timeout), opts_parse_uint32, NULL, 1 }, > { "sm_priority", OPT_OFFSET(sm_priority), opts_parse_uint8, opts_setup_sm_priority, 1 }, > { "lmc", OPT_OFFSET(lmc), opts_parse_uint8, NULL, 1 }, > @@ -694,6 +696,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt) > p_opt->console = strdup(OSM_DEFAULT_CONSOLE); > p_opt->console_port = OSM_DEFAULT_CONSOLE_PORT; > p_opt->transaction_timeout = OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; > + p_opt->transaction_retries = OSM_DEFAULT_RETRY_COUNT; > /* by default we will consider waiting for 50x transaction timeout normal */ > p_opt->max_msg_fifo_timeout = 50 * OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; > p_opt->sm_priority = OSM_DEFAULT_SM_PRIORITY; > @@ -1500,6 +1503,8 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts) > "max_wire_smps %u\n\n" > "# The maximum time in [msec] allowed for a transaction to complete\n" > "transaction_timeout %u\n\n" > + " The maximum number of retries allowed for a transaction to complete\n" > + "transaction_retries %u\n\n" > "# Maximal time in [msec] a message can stay in the incoming message queue.\n" > "# If there is more than one message in the queue and the last message\n" > "# stayed in the queue more than this value, any SA request will be\n" > @@ -1509,6 +1514,7 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts) > "single_thread %s\n\n", > p_opts->max_wire_smps, > p_opts->transaction_timeout, > + p_opts->transaction_retries, > p_opts->max_msg_fifo_timeout, > p_opts->single_thread ? "TRUE" : "FALSE"); > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] opensm: Add configurable retries for transactions 2009-10-30 2:55 ` Sasha Khapyorsky @ 2009-10-30 15:48 ` Hal Rosenstock [not found] ` <f0e08f230910300848i3a36ceedh4472478b6f492a7d-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Hal Rosenstock @ 2009-10-30 15:48 UTC (permalink / raw) To: Sasha Khapyorsky; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA On Thu, Oct 29, 2009 at 10:55 PM, Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org> wrote: > On 10:05 Sat 24 Oct , Hal Rosenstock wrote: >> >> As part of this change, transaction timeouts (and retries) are per >> umad agent (supplied at osm_vendor_bind). >> >> Old ABI is supported without these bind parameters. >> >> Signed-off-by: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> >> --- >> diff --git a/opensm/include/opensm/osm_madw.h b/opensm/include/opensm/osm_madw.h >> index afa7047..10a065b 100644 >> --- a/opensm/include/opensm/osm_madw.h >> +++ b/opensm/include/opensm/osm_madw.h >> @@ -2,6 +2,7 @@ >> * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. >> * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. >> * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. >> + * Copyright (c) 2009 HNR Consulting. All rights reserved. >> * >> * This software is available to you under a choice of one of two >> * licenses. You may choose to be licensed under the terms of the GNU >> @@ -76,6 +77,8 @@ typedef struct osm_bind_info { >> boolean_t is_report_processor; >> uint32_t send_q_size; >> uint32_t recv_q_size; >> + uint32_t timeout; >> + uint32_t retries; >> } osm_bind_info_t; >> /* >> * FIELDS >> @@ -103,6 +106,12 @@ typedef struct osm_bind_info { >> * recv_q_size >> * Receive Queue Size >> * >> +* timeout >> +* Transaction timeout >> +* >> +* retries >> +* Number of retries for transaction >> +* >> * SEE ALSO >> *********/ >> >> diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h >> index b63c97e..77347c8 100644 >> --- a/opensm/include/opensm/osm_subnet.h >> +++ b/opensm/include/opensm/osm_subnet.h >> @@ -4,6 +4,7 @@ >> * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. >> * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. >> * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. >> + * Copyright (c) 2009 HNR Consulting. All rights reserved. >> * >> * This software is available to you under a choice of one of two >> * licenses. You may choose to be licensed under the terms of the GNU >> @@ -149,6 +150,7 @@ typedef struct osm_subn_opt { >> uint32_t sweep_interval; >> uint32_t max_wire_smps; >> uint32_t transaction_timeout; >> + uint32_t transaction_retries; >> uint8_t sm_priority; >> uint8_t lmc; >> boolean_t lmc_esp0; >> @@ -261,6 +263,9 @@ typedef struct osm_subn_opt { >> * The maximum time in milliseconds allowed for a transaction >> * to complete. Default is 200. >> * >> +* transaction_retries >> +* The number of retries for a transaction. Default is 3. >> +* >> * sm_priority >> * The priority of this SM as specified by the user. This >> * value is made available in the SMInfo attribute. >> diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c >> index 8d3c680..c98141e 100644 >> --- a/opensm/libvendor/osm_vendor_ibumad.c >> +++ b/opensm/libvendor/osm_vendor_ibumad.c >> @@ -85,6 +85,8 @@ typedef struct _osm_umad_bind_info { >> int port_id; >> int agent_id; >> int agent_id1; /* SMI requires two agents */ >> + uint32_t timeout; >> + uint32_t max_retries; > > You don't need fixed size types for those values, do you? And umad_send() > and umad_recv() accept timeout and retries parameters as 'int' - would > be better to be consistent with types. > >> } osm_umad_bind_info_t; >> >> typedef struct _umad_receiver { >> @@ -476,7 +478,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, >> p_vend->mtbl.max = tmp; >> else >> OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "Error:" >> - "OSM_UMAD_MAX_PENDING=%d is invalid", >> + "OSM_UMAD_MAX_PENDING=%d is invalid\n", >> tmp); >> } >> >> @@ -819,6 +821,13 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, >> p_bind->send_err_callback = send_err_callback; >> p_bind->p_mad_pool = p_mad_pool; >> p_bind->port_guid = port_guid; >> + if (p_vend->timeout == -1) { >> + p_bind->timeout = p_user_bind->timeout; >> + p_bind->max_retries = p_user_bind->retries; >> + } else { >> + p_bind->timeout = p_vend->timeout; >> + p_bind->max_retries = p_vend->max_retries; >> + } > > Hmm, shouldn't we respect user requested data? Something like: > > p_bind->timeout = p_user_bind->timeout ? p_user_bind->timeout : > p_vend->timeout; > p_bind->retries = p_user_bind->retries ? p_user_bind->retries : > p_vend->retries; > > ? The -1 is for the new ABI. >> >> memset(method_mask, 0, sizeof method_mask); >> if (p_user_bind->is_responder) { >> @@ -1086,8 +1095,8 @@ Resp: >> #endif >> if ((ret = umad_send(p_bind->port_id, p_bind->agent_id, p_vw->umad, >> sent_mad_size, >> - resp_expected ? p_vend->timeout : 0, >> - p_vend->max_retries)) < 0) { >> + resp_expected ? p_bind->timeout : 0, >> + p_bind->max_retries)) < 0) { >> OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5430: " >> "Send p_madw = %p of size %d failed %d (%m)\n", >> p_madw, sent_mad_size, ret); >> diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in >> index 03002c0..bd8ab4e 100644 >> --- a/opensm/man/opensm.8.in >> +++ b/opensm/man/opensm.8.in >> @@ -1,4 +1,4 @@ >> -.TH OPENSM 8 "September 14, 2009" "OpenIB" "OpenIB Management" >> +.TH OPENSM 8 "October 22, 2009" "OpenIB" "OpenIB Management" >> >> .SH NAME >> opensm \- InfiniBand subnet manager and administration (SM/SA) >> @@ -31,6 +31,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA) >> [\-o(nce)] >> [\-s(weep) <interval>] >> [\-t(imeout) <milliseconds>] >> +[\-\-retries <number>] >> [\-maxsmps <number>] >> [\-console [off | local | socket | loopback]] >> [\-console-port <port>] >> @@ -233,6 +234,12 @@ Specifying -t 0 disables timeouts. >> Without -t, OpenSM defaults to a timeout value of >> 200 milliseconds. >> .TP >> +\fB\-\-retries\fR <number> >> +This option specifies the number of retries used >> +for transactions. >> +Without --retries, OpenSM defaults to 3 retries >> +for transactions. >> +.TP >> \fB\-maxsmps\fR <number> >> This option specifies the number of VL15 SMP MADs >> allowed on the wire at any one time. >> diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c >> index 2e28c83..b697994 100644 >> --- a/opensm/opensm/main.c >> +++ b/opensm/opensm/main.c >> @@ -248,6 +248,11 @@ static void show_usage(void) >> " Specifying -t 0 disables timeouts.\n" >> " Without -t, OpenSM defaults to a timeout value of\n" >> " 200 milliseconds.\n\n"); >> + printf("--retries <number>\n" >> + " This option specifies the number of retries used\n" >> + " for transactions.\n" >> + " Without --retries, OpenSM defaults to %u retries\n" >> + " for transactions.\n\n", OSM_DEFAULT_RETRY_COUNT); >> printf("--maxsmps, -n <number>\n" >> " This option specifies the number of VL15 SMP MADs\n" >> " allowed on the wire at any one time.\n" >> @@ -610,6 +615,7 @@ int main(int argc, char *argv[]) >> {"do_mesh_analysis", 0, NULL, 5}, >> {"lash_start_vl", 1, NULL, 6}, >> {"sm_sl", 1, NULL, 7}, >> + {"retries", 1, NULL, 8}, >> {NULL, 0, NULL, 0} /* Required at the end of the array */ >> }; >> >> @@ -983,6 +989,11 @@ int main(int argc, char *argv[]) >> opt.sm_sl = (uint8_t) temp; >> printf(" SMSL = %d\n", opt.sm_sl); >> break; >> + case 8: >> + opt.transaction_retries = strtol(optarg, NULL, 0); > > Please use strtoul(). OK. What about transaction_timeout ? >> + printf(" Transaction retries = %u\n", >> + opt.transaction_retries); >> + break; >> case 'h': >> case '?': >> case ':': >> diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c >> index 1f2b067..52668ca 100644 >> --- a/opensm/opensm/osm_opensm.c >> +++ b/opensm/opensm/osm_opensm.c >> @@ -2,6 +2,7 @@ >> * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. >> * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. >> * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. >> + * Copyright (c) 2009 HNR Consulting. All rights reserved. >> * >> * This software is available to you under a choice of one of two >> * licenses. You may choose to be licensed under the terms of the GNU >> @@ -392,8 +393,7 @@ ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm, >> if (status != IB_SUCCESS) >> goto Exit; >> >> - p_osm->p_vendor = >> - osm_vendor_new(&p_osm->log, p_opt->transaction_timeout); >> + p_osm->p_vendor = osm_vendor_new(&p_osm->log, -1); > > Why to not make p_opt->transaction_timeout as default for newly > allocated vendor object? To support old and new ABI. -- Hal > Sasha > >> if (p_osm->p_vendor == NULL) { >> status = IB_INSUFFICIENT_RESOURCES; >> goto Exit; >> diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c >> index f95610e..f0ec92d 100644 >> --- a/opensm/opensm/osm_perfmgr.c >> +++ b/opensm/opensm/osm_perfmgr.c >> @@ -264,6 +264,8 @@ ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid) >> bind_info.is_trap_processor = FALSE; >> bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE; >> bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE; >> + bind_info.timeout = pm->subn->opt.transaction_timeout; >> + bind_info.retries = pm->subn->opt.transaction_retries; >> >> OSM_LOG(pm->log, OSM_LOG_VERBOSE, >> "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); >> diff --git a/opensm/opensm/osm_sa_mad_ctrl.c b/opensm/opensm/osm_sa_mad_ctrl.c >> index f22411b..04b6693 100644 >> --- a/opensm/opensm/osm_sa_mad_ctrl.c >> +++ b/opensm/opensm/osm_sa_mad_ctrl.c >> @@ -2,6 +2,7 @@ >> * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. >> * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. >> * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. >> + * Copyright (c) 2009 HNR Consulting. All rights reserved. >> * >> * This software is available to you under a choice of one of two >> * licenses. You may choose to be licensed under the terms of the GNU >> @@ -528,6 +529,8 @@ ib_api_status_t osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * p_ctrl, >> bind_info.port_guid = port_guid; >> bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE; >> bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE; >> + bind_info.timeout = p_ctrl->sa->p_subn->opt.transaction_timeout; >> + bind_info.retries = p_ctrl->sa->p_subn->opt.transaction_retries; >> >> OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, >> "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); >> diff --git a/opensm/opensm/osm_sm_mad_ctrl.c b/opensm/opensm/osm_sm_mad_ctrl.c >> index f96aff4..c51c158 100644 >> --- a/opensm/opensm/osm_sm_mad_ctrl.c >> +++ b/opensm/opensm/osm_sm_mad_ctrl.c >> @@ -885,6 +885,8 @@ ib_api_status_t osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * p_ctrl, >> bind_info.port_guid = port_guid; >> bind_info.recv_q_size = OSM_SM_DEFAULT_QP0_RCV_SIZE; >> bind_info.send_q_size = OSM_SM_DEFAULT_QP0_SEND_SIZE; >> + bind_info.timeout = p_ctrl->p_subn->opt.transaction_timeout; >> + bind_info.retries = p_ctrl->p_subn->opt.transaction_retries; >> >> OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, >> "Binding to port 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); >> diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c >> index 8976b5f..a9a7981 100644 >> --- a/opensm/opensm/osm_subnet.c >> +++ b/opensm/opensm/osm_subnet.c >> @@ -4,6 +4,7 @@ >> * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. >> * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. >> * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. >> + * Copyright (c) 2009 HNR Consulting. All rights reserved. >> * >> * This software is available to you under a choice of one of two >> * licenses. You may choose to be licensed under the terms of the GNU >> @@ -299,6 +300,7 @@ static const opt_rec_t opt_tbl[] = { >> { "console", OPT_OFFSET(console), opts_parse_charp, NULL, 0 }, >> { "console_port", OPT_OFFSET(console_port), opts_parse_uint16, NULL, 0 }, >> { "transaction_timeout", OPT_OFFSET(transaction_timeout), opts_parse_uint32, NULL, 1 }, >> + { "transaction_retries", OPT_OFFSET(transaction_retries), opts_parse_uint32, NULL, 1 }, >> { "max_msg_fifo_timeout", OPT_OFFSET(max_msg_fifo_timeout), opts_parse_uint32, NULL, 1 }, >> { "sm_priority", OPT_OFFSET(sm_priority), opts_parse_uint8, opts_setup_sm_priority, 1 }, >> { "lmc", OPT_OFFSET(lmc), opts_parse_uint8, NULL, 1 }, >> @@ -694,6 +696,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt) >> p_opt->console = strdup(OSM_DEFAULT_CONSOLE); >> p_opt->console_port = OSM_DEFAULT_CONSOLE_PORT; >> p_opt->transaction_timeout = OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; >> + p_opt->transaction_retries = OSM_DEFAULT_RETRY_COUNT; >> /* by default we will consider waiting for 50x transaction timeout normal */ >> p_opt->max_msg_fifo_timeout = 50 * OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; >> p_opt->sm_priority = OSM_DEFAULT_SM_PRIORITY; >> @@ -1500,6 +1503,8 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts) >> "max_wire_smps %u\n\n" >> "# The maximum time in [msec] allowed for a transaction to complete\n" >> "transaction_timeout %u\n\n" >> + " The maximum number of retries allowed for a transaction to complete\n" >> + "transaction_retries %u\n\n" >> "# Maximal time in [msec] a message can stay in the incoming message queue.\n" >> "# If there is more than one message in the queue and the last message\n" >> "# stayed in the queue more than this value, any SA request will be\n" >> @@ -1509,6 +1514,7 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts) >> "single_thread %s\n\n", >> p_opts->max_wire_smps, >> p_opts->transaction_timeout, >> + p_opts->transaction_retries, >> p_opts->max_msg_fifo_timeout, >> p_opts->single_thread ? "TRUE" : "FALSE"); >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in >> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 7+ messages in thread
[parent not found: <f0e08f230910300848i3a36ceedh4472478b6f492a7d-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH] opensm: Add configurable retries for transactions [not found] ` <f0e08f230910300848i3a36ceedh4472478b6f492a7d-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2009-10-30 20:17 ` Sasha Khapyorsky 2009-10-30 20:51 ` Hal Rosenstock 0 siblings, 1 reply; 7+ messages in thread From: Sasha Khapyorsky @ 2009-10-30 20:17 UTC (permalink / raw) To: Hal Rosenstock; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA On 11:48 Fri 30 Oct , Hal Rosenstock wrote: > >> @@ -819,6 +821,13 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, > >> p_bind->send_err_callback = send_err_callback; > >> p_bind->p_mad_pool = p_mad_pool; > >> p_bind->port_guid = port_guid; > >> + if (p_vend->timeout == -1) { > >> + p_bind->timeout = p_user_bind->timeout; > >> + p_bind->max_retries = p_user_bind->retries; > >> + } else { > >> + p_bind->timeout = p_vend->timeout; > >> + p_bind->max_retries = p_vend->max_retries; > >> + } > > > > Hmm, shouldn't we respect user requested data? Something like: > > > > p_bind->timeout = p_user_bind->timeout ? p_user_bind->timeout : > > p_vend->timeout; > > p_bind->retries = p_user_bind->retries ? p_user_bind->retries : > > p_vend->retries; > > > > ? > > The -1 is for the new ABI. Could you elaborate? > >> @@ -983,6 +989,11 @@ int main(int argc, char *argv[]) > >> opt.sm_sl = (uint8_t) temp; > >> printf(" SMSL = %d\n", opt.sm_sl); > >> break; > >> + case 8: > >> + opt.transaction_retries = strtol(optarg, NULL, 0); > > > > Please use strtoul(). > > OK. What about transaction_timeout ? Yes, the same is applicable here. > >> @@ -392,8 +393,7 @@ ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm, > >> if (status != IB_SUCCESS) > >> goto Exit; > >> > >> - p_osm->p_vendor = > >> - osm_vendor_new(&p_osm->log, p_opt->transaction_timeout); > >> + p_osm->p_vendor = osm_vendor_new(&p_osm->log, -1); > > > > Why to not make p_opt->transaction_timeout as default for newly > > allocated vendor object? > > To support old and new ABI. What do you mean? OpenSM will run against older vendor? Even if so how '-1' is helpful? Likely it will break things, no? Sasha -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] opensm: Add configurable retries for transactions 2009-10-30 20:17 ` Sasha Khapyorsky @ 2009-10-30 20:51 ` Hal Rosenstock [not found] ` <f0e08f230910301351o3f81d787g5e177be10da72319-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Hal Rosenstock @ 2009-10-30 20:51 UTC (permalink / raw) To: Sasha Khapyorsky; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA On Fri, Oct 30, 2009 at 4:17 PM, Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org> wrote: > On 11:48 Fri 30 Oct , Hal Rosenstock wrote: >> >> @@ -819,6 +821,13 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, >> >> p_bind->send_err_callback = send_err_callback; >> >> p_bind->p_mad_pool = p_mad_pool; >> >> p_bind->port_guid = port_guid; >> >> + if (p_vend->timeout == -1) { >> >> + p_bind->timeout = p_user_bind->timeout; >> >> + p_bind->max_retries = p_user_bind->retries; >> >> + } else { >> >> + p_bind->timeout = p_vend->timeout; >> >> + p_bind->max_retries = p_vend->max_retries; >> >> + } >> > >> > Hmm, shouldn't we respect user requested data? Something like: >> > >> > p_bind->timeout = p_user_bind->timeout ? p_user_bind->timeout : >> > p_vend->timeout; >> > p_bind->retries = p_user_bind->retries ? p_user_bind->retries : >> > p_vend->retries; >> > >> > ? >> >> The -1 is for the new ABI. > > Could you elaborate? In order to be able to deal with combinations of old/new opensm/vendor layer. >> >> @@ -983,6 +989,11 @@ int main(int argc, char *argv[]) >> >> opt.sm_sl = (uint8_t) temp; >> >> printf(" SMSL = %d\n", opt.sm_sl); >> >> break; >> >> + case 8: >> >> + opt.transaction_retries = strtol(optarg, NULL, 0); >> > >> > Please use strtoul(). >> >> OK. What about transaction_timeout ? > > Yes, the same is applicable here. > >> >> @@ -392,8 +393,7 @@ ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm, >> >> if (status != IB_SUCCESS) >> >> goto Exit; >> >> >> >> - p_osm->p_vendor = >> >> - osm_vendor_new(&p_osm->log, p_opt->transaction_timeout); >> >> + p_osm->p_vendor = osm_vendor_new(&p_osm->log, -1); >> > >> > Why to not make p_opt->transaction_timeout as default for newly >> > allocated vendor object? >> >> To support old and new ABI. > > What do you mean? OpenSM will run against older vendor? > > Even if so how '-1' is helpful? Likely it will break things, no? -1 is not a valid timeout so can't this be used in this manner ? How will it break things ? -- Hal > Sasha > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 7+ messages in thread
[parent not found: <f0e08f230910301351o3f81d787g5e177be10da72319-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH] opensm: Add configurable retries for transactions [not found] ` <f0e08f230910301351o3f81d787g5e177be10da72319-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2009-10-30 21:48 ` Sasha Khapyorsky 2009-10-30 21:49 ` Hal Rosenstock 0 siblings, 1 reply; 7+ messages in thread From: Sasha Khapyorsky @ 2009-10-30 21:48 UTC (permalink / raw) To: Hal Rosenstock; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA On 16:51 Fri 30 Oct , Hal Rosenstock wrote: > On Fri, Oct 30, 2009 at 4:17 PM, Sasha Khapyorsky <sashak-smomgflXvObQFizaE/u3fw@public.gmane.orgm> wrote: > > On 11:48 Fri 30 Oct , Hal Rosenstock wrote: > >> >> @@ -819,6 +821,13 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, > >> >> p_bind->send_err_callback = send_err_callback; > >> >> p_bind->p_mad_pool = p_mad_pool; > >> >> p_bind->port_guid = port_guid; > >> >> + if (p_vend->timeout == -1) { > >> >> + p_bind->timeout = p_user_bind->timeout; > >> >> + p_bind->max_retries = p_user_bind->retries; > >> >> + } else { > >> >> + p_bind->timeout = p_vend->timeout; > >> >> + p_bind->max_retries = p_vend->max_retries; > >> >> + } > >> > > >> > Hmm, shouldn't we respect user requested data? Something like: > >> > > >> > p_bind->timeout = p_user_bind->timeout ? p_user_bind->timeout : > >> > p_vend->timeout; > >> > p_bind->retries = p_user_bind->retries ? p_user_bind->retries : > >> > p_vend->retries; > >> > > >> > ? > >> > >> The -1 is for the new ABI. > > > > Could you elaborate? > > In order to be able to deal with combinations of old/new opensm/vendor layer. I see, you are trying to protect old OpenSM <-> new vendor case. Such combination will not work for many reasons (we changed OpenSM/vendor interaction couple of times in past few years, including configuration) and it is better to avoid such usage. > >> >> @@ -392,8 +393,7 @@ ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm, > >> >> if (status != IB_SUCCESS) > >> >> goto Exit; > >> >> > >> >> - p_osm->p_vendor = > >> >> - osm_vendor_new(&p_osm->log, p_opt->transaction_timeout); > >> >> + p_osm->p_vendor = osm_vendor_new(&p_osm->log, -1); > >> > > >> > Why to not make p_opt->transaction_timeout as default for newly > >> > allocated vendor object? > >> > >> To support old and new ABI. > > > > What do you mean? OpenSM will run against older vendor? > > > > Even if so how '-1' is helpful? Likely it will break things, no? > > -1 is not a valid timeout so can't this be used in this manner ? How > will it break things ? Old vendor will not work with '-1'. OTOH this enforces all vendor users to set all parameter explicitly, and ignores those values when a value other than '-1' was used in vendor creation. In fact it enforces using '-1' and effectively makes it to be mandatory part of vendor API. Seems too messy for me. Sasha -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] opensm: Add configurable retries for transactions 2009-10-30 21:48 ` Sasha Khapyorsky @ 2009-10-30 21:49 ` Hal Rosenstock 0 siblings, 0 replies; 7+ messages in thread From: Hal Rosenstock @ 2009-10-30 21:49 UTC (permalink / raw) To: Sasha Khapyorsky; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA On Fri, Oct 30, 2009 at 5:48 PM, Sasha Khapyorsky <sashak-smomgflXvOZWk0Htik3J/w@public.gmane.org> wrote: > On 16:51 Fri 30 Oct , Hal Rosenstock wrote: >> On Fri, Oct 30, 2009 at 4:17 PM, Sasha Khapyorsky <sashak@voltaire.com> wrote: >> > On 11:48 Fri 30 Oct , Hal Rosenstock wrote: >> >> >> @@ -819,6 +821,13 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, >> >> >> p_bind->send_err_callback = send_err_callback; >> >> >> p_bind->p_mad_pool = p_mad_pool; >> >> >> p_bind->port_guid = port_guid; >> >> >> + if (p_vend->timeout == -1) { >> >> >> + p_bind->timeout = p_user_bind->timeout; >> >> >> + p_bind->max_retries = p_user_bind->retries; >> >> >> + } else { >> >> >> + p_bind->timeout = p_vend->timeout; >> >> >> + p_bind->max_retries = p_vend->max_retries; >> >> >> + } >> >> > >> >> > Hmm, shouldn't we respect user requested data? Something like: >> >> > >> >> > p_bind->timeout = p_user_bind->timeout ? p_user_bind->timeout : >> >> > p_vend->timeout; >> >> > p_bind->retries = p_user_bind->retries ? p_user_bind->retries : >> >> > p_vend->retries; >> >> > >> >> > ? >> >> >> >> The -1 is for the new ABI. >> > >> > Could you elaborate? >> >> In order to be able to deal with combinations of old/new opensm/vendor layer. > > I see, you are trying to protect old OpenSM <-> new vendor case. > > Such combination will not work for many reasons (we changed OpenSM/vendor > interaction couple of times in past few years, including configuration) > and it is better to avoid such usage. > >> >> >> @@ -392,8 +393,7 @@ ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm, >> >> >> if (status != IB_SUCCESS) >> >> >> goto Exit; >> >> >> >> >> >> - p_osm->p_vendor = >> >> >> - osm_vendor_new(&p_osm->log, p_opt->transaction_timeout); >> >> >> + p_osm->p_vendor = osm_vendor_new(&p_osm->log, -1); >> >> > >> >> > Why to not make p_opt->transaction_timeout as default for newly >> >> > allocated vendor object? >> >> >> >> To support old and new ABI. >> > >> > What do you mean? OpenSM will run against older vendor? >> > >> > Even if so how '-1' is helpful? Likely it will break things, no? >> >> -1 is not a valid timeout so can't this be used in this manner ? How >> will it break things ? > > Old vendor will not work with '-1'. OTOH this enforces all vendor users > to set all parameter explicitly, and ignores those values when a value > other than '-1' was used in vendor creation. In fact it enforces using > '-1' and effectively makes it to be mandatory part of vendor API. Seems > too messy for me. OK; I'll update the patch then. -- Hal > Sasha > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2009-10-30 21:49 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-24 14:05 [PATCH] opensm: Add configurable retries for transactions Hal Rosenstock
[not found] ` <20091024140538.GA5213-Wuw85uim5zDR7s880joybQ@public.gmane.org>
2009-10-30 2:55 ` Sasha Khapyorsky
2009-10-30 15:48 ` Hal Rosenstock
[not found] ` <f0e08f230910300848i3a36ceedh4472478b6f492a7d-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-10-30 20:17 ` Sasha Khapyorsky
2009-10-30 20:51 ` Hal Rosenstock
[not found] ` <f0e08f230910301351o3f81d787g5e177be10da72319-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-10-30 21:48 ` Sasha Khapyorsky
2009-10-30 21:49 ` Hal Rosenstock
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox