* [PATCH 3/3 v2] opensm: connect switches in tree - implemented in up/down
@ 2009-11-04 11:09 Yevgeny Kliteynik
[not found] ` <4AF160D8.9040801-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 4+ messages in thread
From: Yevgeny Kliteynik @ 2009-11-04 11:09 UTC (permalink / raw)
To: Sasha Khapyorsky; +Cc: Linux RDMA
This patch implements connect_switches option in up/down
routing. Also, connect_roots is now handled as a special
case of connect_switches.
The idea is the following: when clearing hops, preserve
the entries for switches that are above the highest leaf
in the tree.
So if the highest leaf in the tree has rank N, preserve
hops to all the switches with ranks 0 to (N-1).
When connecting roots (--connect_roots option), just set
N to 1.
[v2 - no changes, just rebased and resolved conflicts]
Signed-off-by: Yevgeny Kliteynik <kliteyn-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
---
opensm/opensm/osm_ucast_updn.c | 87 +++++++++++++++++++++++++++++++---------
1 files changed, 68 insertions(+), 19 deletions(-)
diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c
index 01e40eb..cb99730 100644
--- a/opensm/opensm/osm_ucast_updn.c
+++ b/opensm/opensm/osm_ucast_updn.c
@@ -279,24 +279,80 @@ static int updn_subn_rank(IN updn_t * p_updn)
return 0;
}
-/* hack: preserve min hops entries to any other root switches */
-static void updn_clear_non_root_hops(updn_t * p_updn, osm_switch_t * p_sw)
+/**********************************************************************
+ **********************************************************************/
+/* Preserve min hops entries to any switch
+ * above certain level in the tree */
+static void updn_switch_clear_hops_below_rank(updn_t * p_updn,
+ osm_switch_t * p_sw, unsigned min_leaf_rank)
{
osm_port_t *p_port;
unsigned i;
for (i = 0; i < p_sw->num_hops; i++)
if (p_sw->hops[i]) {
- p_port =
- cl_ptr_vector_get(&p_updn->p_osm->subn.port_lid_tbl,
- i);
- if (!p_port || !p_port->p_node->sw
- || ((struct updn_node *)p_port->p_node->sw->priv)->
- rank != 0)
+ p_port = cl_ptr_vector_get(
+ &p_updn->p_osm->subn.port_lid_tbl, i);
+ if (!p_port || !p_port->p_node->sw ||
+ ((struct updn_node *)
+ p_port->p_node->sw->priv)->rank >= min_leaf_rank)
memset(p_sw->hops[i], 0xff, p_sw->num_ports);
}
}
+/**********************************************************************
+ **********************************************************************/
+static void updn_clear_hops(IN updn_t * p_updn)
+{
+ osm_subn_t *p_subn = &p_updn->p_osm->subn;
+ osm_switch_t *p_sw;
+ struct updn_node * p_updn_sw;
+ osm_physp_t * p_physp;
+ osm_port_t * p_port;
+ cl_map_item_t *item;
+ unsigned min_leaf_rank = 0xff;
+
+ if (p_subn->opt.connect_switches) {
+ /*
+ * Preserve roots to switches above the highest leaf
+ * switch (leaf switch with the lowest rank).
+ *
+ * Need to find the minimal rank of leaf switch.
+ * The fastest way would be to go through all the
+ * CA ports in the fabric and check the switch that
+ * is connected to them.
+ */
+ for (item = cl_qmap_head(&p_updn->p_osm->subn.port_guid_tbl);
+ item != cl_qmap_end(&p_updn->p_osm->subn.port_guid_tbl);
+ item = cl_qmap_next(item)) {
+ p_port = (osm_port_t *)item;
+ if (p_port->p_node->sw)
+ continue;
+ p_physp = p_port->p_physp->p_remote_physp;
+ if (!p_physp || !p_physp->p_node->sw)
+ continue;
+
+ p_updn_sw = (struct updn_node*)p_physp->p_node->sw->priv;
+ if (p_updn_sw->rank < min_leaf_rank)
+ min_leaf_rank = p_updn_sw->rank;
+ }
+ } else if (p_subn->opt.connect_roots)
+ min_leaf_rank = 1;
+
+ for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl);
+ item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl);
+ item = cl_qmap_next(item)) {
+ p_sw = (osm_switch_t *)item;
+ if (p_subn->opt.connect_switches || p_subn->opt.connect_roots)
+ updn_switch_clear_hops_below_rank(
+ p_updn, p_sw, min_leaf_rank);
+ else
+ osm_switch_clear_hops(p_sw);
+ }
+}
+
+/**********************************************************************
+ **********************************************************************/
static int updn_set_min_hop_table(IN updn_t * p_updn)
{
osm_subn_t *p_subn = &p_updn->p_osm->subn;
@@ -311,16 +367,8 @@ static int updn_set_min_hop_table(IN updn_t * p_updn)
OSM_LOG(p_log, OSM_LOG_VERBOSE,
"Init Min Hop Table of all switches [\n");
- for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl);
- item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl);
- item = cl_qmap_next(item)) {
- p_sw = (osm_switch_t *)item;
- /* Clear Min Hop Table */
- if (p_subn->opt.connect_roots)
- updn_clear_non_root_hops(p_updn, p_sw);
- else
- osm_switch_clear_hops(p_sw);
- }
+ /* Clear Min Hop Table */
+ updn_clear_hops(p_updn);
OSM_LOG(p_log, OSM_LOG_VERBOSE,
"Init Min Hop Table of all switches ]\n");
@@ -598,7 +646,8 @@ static int updn_lid_matrices(void *ctx)
OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR : "
"cannot parse root guids file \'%s\'\n",
p_updn->p_osm->subn.opt.root_guid_file);
- if (p_updn->p_osm->subn.opt.connect_roots &&
+ if ((p_updn->p_osm->subn.opt.connect_roots ||
+ p_updn->p_osm->subn.opt.connect_switches) &&
p_updn->num_roots > 1)
osm_ucast_mgr_build_lid_matrices(&p_updn->p_osm->sm.ucast_mgr);
} else {
--
1.5.1.4
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 3/3 v2] opensm: connect switches in tree - implemented in up/down
[not found] ` <4AF160D8.9040801-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2009-11-26 15:46 ` Sasha Khapyorsky
2009-12-06 8:35 ` Yevgeny Kliteynik
0 siblings, 1 reply; 4+ messages in thread
From: Sasha Khapyorsky @ 2009-11-26 15:46 UTC (permalink / raw)
To: Yevgeny Kliteynik; +Cc: Linux RDMA
On 13:09 Wed 04 Nov , Yevgeny Kliteynik wrote:
> This patch implements connect_switches option in up/down
> routing. Also, connect_roots is now handled as a special
> case of connect_switches.
>
> The idea is the following: when clearing hops, preserve
> the entries for switches that are above the highest leaf
> in the tree.
> So if the highest leaf in the tree has rank N, preserve
> hops to all the switches with ranks 0 to (N-1).
> When connecting roots (--connect_roots option), just set
> N to 1.
Would this affect multicast routing in sense of a credit loop
generation?
Sasha
>
> [v2 - no changes, just rebased and resolved conflicts]
>
> Signed-off-by: Yevgeny Kliteynik <kliteyn-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
> ---
> opensm/opensm/osm_ucast_updn.c | 87 +++++++++++++++++++++++++++++++---------
> 1 files changed, 68 insertions(+), 19 deletions(-)
>
> diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c
> index 01e40eb..cb99730 100644
> --- a/opensm/opensm/osm_ucast_updn.c
> +++ b/opensm/opensm/osm_ucast_updn.c
> @@ -279,24 +279,80 @@ static int updn_subn_rank(IN updn_t * p_updn)
> return 0;
> }
>
> -/* hack: preserve min hops entries to any other root switches */
> -static void updn_clear_non_root_hops(updn_t * p_updn, osm_switch_t * p_sw)
> +/**********************************************************************
> + **********************************************************************/
> +/* Preserve min hops entries to any switch
> + * above certain level in the tree */
> +static void updn_switch_clear_hops_below_rank(updn_t * p_updn,
> + osm_switch_t * p_sw, unsigned min_leaf_rank)
> {
> osm_port_t *p_port;
> unsigned i;
>
> for (i = 0; i < p_sw->num_hops; i++)
> if (p_sw->hops[i]) {
> - p_port =
> - cl_ptr_vector_get(&p_updn->p_osm->subn.port_lid_tbl,
> - i);
> - if (!p_port || !p_port->p_node->sw
> - || ((struct updn_node *)p_port->p_node->sw->priv)->
> - rank != 0)
> + p_port = cl_ptr_vector_get(
> + &p_updn->p_osm->subn.port_lid_tbl, i);
> + if (!p_port || !p_port->p_node->sw ||
> + ((struct updn_node *)
> + p_port->p_node->sw->priv)->rank >= min_leaf_rank)
> memset(p_sw->hops[i], 0xff, p_sw->num_ports);
> }
> }
>
> +/**********************************************************************
> + **********************************************************************/
> +static void updn_clear_hops(IN updn_t * p_updn)
> +{
> + osm_subn_t *p_subn = &p_updn->p_osm->subn;
> + osm_switch_t *p_sw;
> + struct updn_node * p_updn_sw;
> + osm_physp_t * p_physp;
> + osm_port_t * p_port;
> + cl_map_item_t *item;
> + unsigned min_leaf_rank = 0xff;
> +
> + if (p_subn->opt.connect_switches) {
> + /*
> + * Preserve roots to switches above the highest leaf
> + * switch (leaf switch with the lowest rank).
> + *
> + * Need to find the minimal rank of leaf switch.
> + * The fastest way would be to go through all the
> + * CA ports in the fabric and check the switch that
> + * is connected to them.
> + */
> + for (item = cl_qmap_head(&p_updn->p_osm->subn.port_guid_tbl);
> + item != cl_qmap_end(&p_updn->p_osm->subn.port_guid_tbl);
> + item = cl_qmap_next(item)) {
> + p_port = (osm_port_t *)item;
> + if (p_port->p_node->sw)
> + continue;
> + p_physp = p_port->p_physp->p_remote_physp;
> + if (!p_physp || !p_physp->p_node->sw)
> + continue;
> +
> + p_updn_sw = (struct updn_node*)p_physp->p_node->sw->priv;
> + if (p_updn_sw->rank < min_leaf_rank)
> + min_leaf_rank = p_updn_sw->rank;
> + }
> + } else if (p_subn->opt.connect_roots)
> + min_leaf_rank = 1;
> +
> + for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl);
> + item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl);
> + item = cl_qmap_next(item)) {
> + p_sw = (osm_switch_t *)item;
> + if (p_subn->opt.connect_switches || p_subn->opt.connect_roots)
> + updn_switch_clear_hops_below_rank(
> + p_updn, p_sw, min_leaf_rank);
> + else
> + osm_switch_clear_hops(p_sw);
> + }
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> static int updn_set_min_hop_table(IN updn_t * p_updn)
> {
> osm_subn_t *p_subn = &p_updn->p_osm->subn;
> @@ -311,16 +367,8 @@ static int updn_set_min_hop_table(IN updn_t * p_updn)
> OSM_LOG(p_log, OSM_LOG_VERBOSE,
> "Init Min Hop Table of all switches [\n");
>
> - for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl);
> - item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl);
> - item = cl_qmap_next(item)) {
> - p_sw = (osm_switch_t *)item;
> - /* Clear Min Hop Table */
> - if (p_subn->opt.connect_roots)
> - updn_clear_non_root_hops(p_updn, p_sw);
> - else
> - osm_switch_clear_hops(p_sw);
> - }
> + /* Clear Min Hop Table */
> + updn_clear_hops(p_updn);
>
> OSM_LOG(p_log, OSM_LOG_VERBOSE,
> "Init Min Hop Table of all switches ]\n");
> @@ -598,7 +646,8 @@ static int updn_lid_matrices(void *ctx)
> OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR : "
> "cannot parse root guids file \'%s\'\n",
> p_updn->p_osm->subn.opt.root_guid_file);
> - if (p_updn->p_osm->subn.opt.connect_roots &&
> + if ((p_updn->p_osm->subn.opt.connect_roots ||
> + p_updn->p_osm->subn.opt.connect_switches) &&
> p_updn->num_roots > 1)
> osm_ucast_mgr_build_lid_matrices(&p_updn->p_osm->sm.ucast_mgr);
> } else {
> --
> 1.5.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 3/3 v2] opensm: connect switches in tree - implemented in up/down
2009-11-26 15:46 ` Sasha Khapyorsky
@ 2009-12-06 8:35 ` Yevgeny Kliteynik
[not found] ` <4B1B6CE1.4050802-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 4+ messages in thread
From: Yevgeny Kliteynik @ 2009-12-06 8:35 UTC (permalink / raw)
To: Sasha Khapyorsky; +Cc: Linux RDMA
Hi Sasha,
Sasha Khapyorsky wrote:
> On 13:09 Wed 04 Nov , Yevgeny Kliteynik wrote:
>> This patch implements connect_switches option in up/down
>> routing. Also, connect_roots is now handled as a special
>> case of connect_switches.
>>
>> The idea is the following: when clearing hops, preserve
>> the entries for switches that are above the highest leaf
>> in the tree.
>> So if the highest leaf in the tree has rank N, preserve
>> hops to all the switches with ranks 0 to (N-1).
>> When connecting roots (--connect_roots option), just set
>> N to 1.
>
> Would this affect multicast routing in sense of a credit loop
> generation?
Since I sent these patches, I had it running in various
simulations and setups, and there are couple of fundamental
issue with this approach. Basically, what I did here (in up/dn)
is wrong. Not only for multicast, but for unicast too.
It interferes with the usual up/down paths routing.
I will issue V3 of the patches, and it will be only
connect_roots implementation for fat-tree with the
two small remarks that you found.
As for the general algorithm for connecting switches,
the whole approach of the connect_roots option and of
what I was lately trying to do with connect_switches
option is wrong. There is no need to connect all the
roots to each other. There's also no need to connect
all the switches to each other.
What we need is a connection between all the *managed*
switches (extended port 0) to all the other switches
in the fabric in both directions.
This option should *replace* the connect_roots option
functionality (though we can leave connect_roots for
backward compatibility).
I'll work on the algorithm, but clearly it won't be
ready for OFED 1.5.
If you have any thoughts about this idea, I'd be happy
to hear them.
-- Yevgeny
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 3/3 v2] opensm: connect switches in tree - implemented in up/down
[not found] ` <4B1B6CE1.4050802-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2009-12-06 9:08 ` Yevgeny Kliteynik
0 siblings, 0 replies; 4+ messages in thread
From: Yevgeny Kliteynik @ 2009-12-06 9:08 UTC (permalink / raw)
To: Sasha Khapyorsky; +Cc: Linux RDMA
Yevgeny Kliteynik wrote:
> Hi Sasha,
>
> Sasha Khapyorsky wrote:
>> On 13:09 Wed 04 Nov , Yevgeny Kliteynik wrote:
>>> This patch implements connect_switches option in up/down
>>> routing. Also, connect_roots is now handled as a special
>>> case of connect_switches.
>>>
>>> The idea is the following: when clearing hops, preserve
>>> the entries for switches that are above the highest leaf
>>> in the tree.
>>> So if the highest leaf in the tree has rank N, preserve
>>> hops to all the switches with ranks 0 to (N-1).
>>> When connecting roots (--connect_roots option), just set
>>> N to 1.
>>
>> Would this affect multicast routing in sense of a credit loop
>> generation?
>
> Since I sent these patches, I had it running in various
> simulations and setups, and there are couple of fundamental
> issue with this approach. Basically, what I did here (in up/dn)
> is wrong. Not only for multicast, but for unicast too.
> It interferes with the usual up/down paths routing.
>
> I will issue V3 of the patches, and it will be only
> connect_roots implementation for fat-tree with the
> two small remarks that you found.
It won't be V3 - it's just one patch that implements
connect_roots in ftree routing.
-- Yevgeny
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-12-06 9:08 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-04 11:09 [PATCH 3/3 v2] opensm: connect switches in tree - implemented in up/down Yevgeny Kliteynik
[not found] ` <4AF160D8.9040801-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2009-11-26 15:46 ` Sasha Khapyorsky
2009-12-06 8:35 ` Yevgeny Kliteynik
[not found] ` <4B1B6CE1.4050802-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2009-12-06 9:08 ` Yevgeny Kliteynik
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox