netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash
@ 2014-09-03 21:47 Mahesh Bandewar
  2014-09-03 22:51 ` Jay Vosburgh
  2014-09-04 13:16 ` Nikolay Aleksandrov
  0 siblings, 2 replies; 7+ messages in thread
From: Mahesh Bandewar @ 2014-09-03 21:47 UTC (permalink / raw)
  To: Jay Vosburgh, Veaceslav Falico, Andy Gospodarek, David Miller
  Cc: netdev, Mahesh Bandewar, Eric Dumazet, Maciej Zenczykowski

Earlier change to use usable slave array for TLB mode had an additional
performance advantage. So extending the same logic to all other modes
that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
Also consolidating this with the earlier TLB change.

The main idea is to build the usable slaves array in the control path
and use that array for slave selection during xmit operation.

Measured performance in a setup with a bond of 4x1G NICs with 200
instances of netperf for the modes involved (3ad, xor, tlb)
cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5

Mode        TPS-Before   TPS-After

802.3ad   : 468,694      493,101
TLB (lb=0): 392,583      392,965
XOR       : 475,696      484,517

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
---
 drivers/net/bonding/bond_3ad.c  |  77 ++++---------------------------
 drivers/net/bonding/bond_alb.c  |  43 ++---------------
 drivers/net/bonding/bond_alb.h  |   8 ----
 drivers/net/bonding/bond_main.c | 100 ++++++++++++++++++++++++++++++++++++----
 drivers/net/bonding/bonding.h   |   8 ++++
 5 files changed, 113 insertions(+), 123 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index ee2c73a9de39..d42fd65fdfa9 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1717,6 +1717,8 @@ static void ad_enable_collecting_distributing(struct port *port)
 			 port->actor_port_number,
 			 port->aggregator->aggregator_identifier);
 		__enable_port(port);
+		if (bond_update_slave_arr(port->slave->bond, NULL))
+			pr_debug("Failed to build slave-array for 3ad mode.\n");
 	}
 }
 
@@ -1733,6 +1735,8 @@ static void ad_disable_collecting_distributing(struct port *port)
 			 port->actor_port_number,
 			 port->aggregator->aggregator_identifier);
 		__disable_port(port);
+		if (bond_update_slave_arr(port->slave->bond, NULL))
+			pr_debug("Failed to build slave-array for 3ad mode.\n");
 	}
 }
 
@@ -1917,6 +1921,9 @@ void bond_3ad_unbind_slave(struct slave *slave)
 	__update_lacpdu_from_port(port);
 	ad_lacpdu_send(port);
 
+	if (bond_update_slave_arr(bond, slave))
+		pr_debug("Failed to build slave-array for 3AD mode.\n");
+
 	/* check if this aggregator is occupied */
 	if (aggregator->lag_ports) {
 		/* check if there are other ports related to this aggregator
@@ -2311,6 +2318,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 	 */
 	port->sm_vars |= AD_PORT_BEGIN;
 
+	if (bond_update_slave_arr(slave->bond, NULL))
+		pr_debug("Failed to build slave-array for 3ad mode.\n");
+
 	__release_state_machine_lock(port);
 }
 
@@ -2407,73 +2417,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
 	return ret;
 }
 
-int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
-{
-	struct bonding *bond = netdev_priv(dev);
-	struct slave *slave, *first_ok_slave;
-	struct aggregator *agg;
-	struct ad_info ad_info;
-	struct list_head *iter;
-	int slaves_in_agg;
-	int slave_agg_no;
-	int agg_id;
-
-	if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
-		netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
-		goto err_free;
-	}
-
-	slaves_in_agg = ad_info.ports;
-	agg_id = ad_info.aggregator_id;
-
-	if (slaves_in_agg == 0) {
-		netdev_dbg(dev, "active aggregator is empty\n");
-		goto err_free;
-	}
-
-	slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
-	first_ok_slave = NULL;
-
-	bond_for_each_slave_rcu(bond, slave, iter) {
-		agg = SLAVE_AD_INFO(slave)->port.aggregator;
-		if (!agg || agg->aggregator_identifier != agg_id)
-			continue;
-
-		if (slave_agg_no >= 0) {
-			if (!first_ok_slave && bond_slave_can_tx(slave))
-				first_ok_slave = slave;
-			slave_agg_no--;
-			continue;
-		}
-
-		if (bond_slave_can_tx(slave)) {
-			bond_dev_queue_xmit(bond, skb, slave->dev);
-			goto out;
-		}
-	}
-
-	if (slave_agg_no >= 0) {
-		netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
-			   agg_id);
-		goto err_free;
-	}
-
-	/* we couldn't find any suitable slave after the agg_no, so use the
-	 * first suitable found, if found.
-	 */
-	if (first_ok_slave)
-		bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
-	else
-		goto err_free;
-
-out:
-	return NETDEV_TX_OK;
-err_free:
-	/* no suitable interface, frame not sent */
-	dev_kfree_skb_any(skb);
-	goto out;
-}
-
 int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 			 struct slave *slave)
 {
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 73c21e233131..b2be7b3216d6 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -200,7 +200,6 @@ static int tlb_initialize(struct bonding *bond)
 static void tlb_deinitialize(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct tlb_up_slave *arr;
 
 	_lock_tx_hashtbl_bh(bond);
 
@@ -208,10 +207,6 @@ static void tlb_deinitialize(struct bonding *bond)
 	bond_info->tx_hashtbl = NULL;
 
 	_unlock_tx_hashtbl_bh(bond);
-
-	arr = rtnl_dereference(bond_info->slave_arr);
-	if (arr)
-		kfree_rcu(arr, rcu);
 }
 
 static long long compute_gap(struct slave *slave)
@@ -1409,39 +1404,9 @@ out:
 	return NETDEV_TX_OK;
 }
 
-static int bond_tlb_update_slave_arr(struct bonding *bond,
-				     struct slave *skipslave)
-{
-	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct slave *tx_slave;
-	struct list_head *iter;
-	struct tlb_up_slave *new_arr, *old_arr;
-
-	new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
-			  GFP_ATOMIC);
-	if (!new_arr)
-		return -ENOMEM;
-
-	bond_for_each_slave(bond, tx_slave, iter) {
-		if (!bond_slave_can_tx(tx_slave))
-			continue;
-		if (skipslave == tx_slave)
-			continue;
-		new_arr->arr[new_arr->count++] = tx_slave;
-	}
-
-	old_arr = rtnl_dereference(bond_info->slave_arr);
-	rcu_assign_pointer(bond_info->slave_arr, new_arr);
-	if (old_arr)
-		kfree_rcu(old_arr, rcu);
-
-	return 0;
-}
-
 int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct ethhdr *eth_data;
 	struct slave *tx_slave = NULL;
 	u32 hash_index;
@@ -1462,9 +1427,9 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 							      hash_index & 0xFF,
 							      skb->len);
 			} else {
-				struct tlb_up_slave *slaves;
+				struct bond_up_slave *slaves;
 
-				slaves = rcu_dereference(bond_info->slave_arr);
+				slaves = rcu_dereference(bond->slave_arr);
 				if (slaves && slaves->count)
 					tx_slave = slaves->arr[hash_index %
 							       slaves->count];
@@ -1734,7 +1699,7 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
 	}
 
 	if (bond_is_nondyn_tlb(bond))
-		if (bond_tlb_update_slave_arr(bond, slave))
+		if (bond_update_slave_arr(bond, slave))
 			pr_err("Failed to build slave-array for TLB mode.\n");
 
 }
@@ -1762,7 +1727,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
 	}
 
 	if (bond_is_nondyn_tlb(bond)) {
-		if (bond_tlb_update_slave_arr(bond, NULL))
+		if (bond_update_slave_arr(bond, NULL))
 			pr_err("Failed to build slave-array for TLB mode.\n");
 	}
 }
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index aaeac61d03cf..5fc76c01636c 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -139,20 +139,12 @@ struct tlb_slave_info {
 			 */
 };
 
-struct tlb_up_slave {
-	unsigned int	count;
-	struct rcu_head rcu;
-	struct slave	*arr[0];
-};
-
 struct alb_bond_info {
 	struct tlb_client_info	*tx_hashtbl; /* Dynamically allocated */
 	spinlock_t		tx_hashtbl_lock;
 	u32			unbalanced_load;
 	int			tx_rebalance_counter;
 	int			lp_counter;
-	/* -------- non-dynamic tlb mode only ---------*/
-	struct tlb_up_slave __rcu *slave_arr;	  /* Up slaves */
 	/* -------- rlb parameters -------- */
 	int rlb_enabled;
 	struct rlb_client_info	*rx_hashtbl;	/* Receive hash table */
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f0f5eab0fab1..b0e8e7cfa10f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1693,6 +1693,9 @@ static int __bond_release_one(struct net_device *bond_dev,
 	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		bond_3ad_unbind_slave(slave);
 
+	else if (BOND_MODE(bond) == BOND_MODE_XOR)
+		bond_update_slave_arr(bond, slave);
+
 	write_unlock_bh(&bond->lock);
 
 	netdev_info(bond_dev, "Releasing %s interface %s\n",
@@ -2009,6 +2012,9 @@ static void bond_miimon_commit(struct bonding *bond)
 				bond_alb_handle_link_change(bond, slave,
 							    BOND_LINK_UP);
 
+			if (BOND_MODE(bond) == BOND_MODE_XOR)
+				bond_update_slave_arr(bond, NULL);
+
 			if (!bond->curr_active_slave ||
 			    (slave == bond->primary_slave))
 				goto do_failover;
@@ -2037,6 +2043,9 @@ static void bond_miimon_commit(struct bonding *bond)
 				bond_alb_handle_link_change(bond, slave,
 							    BOND_LINK_DOWN);
 
+			if (BOND_MODE(bond) == BOND_MODE_XOR)
+				bond_update_slave_arr(bond, slave);
+
 			if (slave == rcu_access_pointer(bond->curr_active_slave))
 				goto do_failover;
 
@@ -3149,6 +3158,7 @@ static int bond_open(struct net_device *bond_dev)
 static int bond_close(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+	struct bond_up_slave *arr;
 
 	bond_work_cancel_all(bond);
 	bond->send_peer_notif = 0;
@@ -3156,6 +3166,10 @@ static int bond_close(struct net_device *bond_dev)
 		bond_alb_deinitialize(bond);
 	bond->recv_probe = NULL;
 
+	arr = rtnl_dereference(bond->slave_arr);
+	if (arr)
+	    kfree_rcu(arr, rcu);
+
 	return 0;
 }
 
@@ -3684,15 +3698,84 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 	return NETDEV_TX_OK;
 }
 
-/* In bond_xmit_xor() , we determine the output device by using a pre-
- * determined xmit_hash_policy(), If the selected device is not enabled,
- * find the next active slave.
+/* Build the usable slaves array in control path for modes that use xmit-hash
+ * to determine the slave interface -
+ * (a) BOND_MODE_8023AD
+ * (b) BOND_MODE_XOR
+ * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
  */
-static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 {
-	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	struct list_head *iter;
+	struct bond_up_slave *new_arr, *old_arr;
+	int slaves_in_agg;
+	int agg_id = 0;
+	int ret = 0;
+
+	new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
+			  GFP_ATOMIC);
+	if (!new_arr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+		struct ad_info ad_info;
+
+		if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
+			pr_debug("__bond_3ad_get_active_agg_info failed\n");
+			kfree_rcu(new_arr, rcu);
+			ret = -EINVAL;
+			goto out;
+		}
+		slaves_in_agg = ad_info.ports;
+		agg_id = ad_info.aggregator_id;
+	}
+	bond_for_each_slave(bond, slave, iter) {
+		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+			struct aggregator *agg;
+
+			agg = SLAVE_AD_INFO(slave)->port.aggregator;
+			if (!agg || agg->aggregator_identifier != agg_id)
+				continue;
+		}
+		if (!bond_slave_can_tx(slave))
+			continue;
+		if (skipslave == slave)
+			continue;
+		new_arr->arr[new_arr->count++] = slave;
+	}
 
-	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt);
+	old_arr = rcu_dereference_protected(bond->slave_arr,
+					    lockdep_rtnl_is_held() ||
+					    lockdep_is_held(&bond->lock) ||
+					    lockdep_is_held(&bond->curr_slave_lock));
+	rcu_assign_pointer(bond->slave_arr, new_arr);
+	if (old_arr)
+		kfree_rcu(old_arr, rcu);
+
+out:
+	return ret;
+}
+
+/* Use this Xmit function for 3AD as well as XOR modes. The current
+ * usable slave array is formed in the control path. The xmit function
+ * just calculates hash and sends the packet out.
+ */
+int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct bonding *bond = netdev_priv(dev);
+	struct slave *slave;
+	struct bond_up_slave *slaves;
+
+	slaves = rcu_dereference(bond->slave_arr);
+	if (slaves && slaves->count) {
+		slave = slaves->arr[bond_xmit_hash(bond, skb) % slaves->count];
+		bond_dev_queue_xmit(bond, skb, slave->dev);
+	} else {
+		dev_kfree_skb_any(skb);
+		atomic_long_inc(&dev->tx_dropped);
+	}
 
 	return NETDEV_TX_OK;
 }
@@ -3794,12 +3877,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
 		return bond_xmit_roundrobin(skb, dev);
 	case BOND_MODE_ACTIVEBACKUP:
 		return bond_xmit_activebackup(skb, dev);
+	case BOND_MODE_8023AD:
 	case BOND_MODE_XOR:
-		return bond_xmit_xor(skb, dev);
+		return bond_3ad_xor_xmit(skb, dev);
 	case BOND_MODE_BROADCAST:
 		return bond_xmit_broadcast(skb, dev);
-	case BOND_MODE_8023AD:
-		return bond_3ad_xmit_xor(skb, dev);
 	case BOND_MODE_ALB:
 		return bond_alb_xmit(skb, dev);
 	case BOND_MODE_TLB:
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index aace510d08d1..4a6195c0de60 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -177,6 +177,12 @@ struct slave {
 	struct kobject kobj;
 };
 
+struct bond_up_slave {
+	unsigned int	count;
+	struct rcu_head rcu;
+	struct slave	*arr[0];
+};
+
 /*
  * Link pseudo-state only used internally by monitors
  */
@@ -196,6 +202,7 @@ struct bonding {
 	struct   slave __rcu *curr_active_slave;
 	struct   slave __rcu *current_arp_slave;
 	struct   slave *primary_slave;
+	struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
 	bool     force_primary;
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
@@ -527,6 +534,7 @@ const char *bond_slave_link_status(s8 link);
 struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
 					      struct net_device *end_dev,
 					      int level);
+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
 
 #ifdef CONFIG_PROC_FS
 void bond_create_proc_entry(struct bonding *bond);
-- 
2.1.0.rc2.206.gedb03e5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash
  2014-09-03 21:47 [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash Mahesh Bandewar
@ 2014-09-03 22:51 ` Jay Vosburgh
  2014-09-04  0:57   ` Mahesh Bandewar
  2014-09-04 13:16 ` Nikolay Aleksandrov
  1 sibling, 1 reply; 7+ messages in thread
From: Jay Vosburgh @ 2014-09-03 22:51 UTC (permalink / raw)
  To: Mahesh Bandewar
  Cc: Veaceslav Falico, Andy Gospodarek, David Miller, netdev,
	Eric Dumazet, Maciej Zenczykowski

Mahesh Bandewar <maheshb@google.com> wrote:

>Earlier change to use usable slave array for TLB mode had an additional
>performance advantage. So extending the same logic to all other modes
>that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
>Also consolidating this with the earlier TLB change.
>
>The main idea is to build the usable slaves array in the control path
>and use that array for slave selection during xmit operation.
>
>Measured performance in a setup with a bond of 4x1G NICs with 200
>instances of netperf for the modes involved (3ad, xor, tlb)
>cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5
>
>Mode        TPS-Before   TPS-After
>
>802.3ad   : 468,694      493,101
>TLB (lb=0): 392,583      392,965
>XOR       : 475,696      484,517

	In general I think this is an improvement over traversing linked
lists; however, I have a couple of comments.

	First, for the 802.3ad mode, is the case within
bond_3ad_state_machine_handler that runs the ad_agg_selection_logic (if
the agg_select_timer runs out) handled?  This may change the active
aggregator, and thus the list of slaves that ought to be in the array,
and looking at the patch I don't see where this would be taken care of.

	As a practical matter, the usage of the agg_select_timer is such
that it's unlikely to cause a change of active aggregator (it will
usually set an active aggregator when there is none, e.g., during open
processing), but this case should probably either be handled or
commented to explain why it is safe.

	It may be that the bond_3ad_initialize call to set the
agg_select_timer to 8 seconds is no longer needed, but for now it's
there and may change the active aggregator.

	One more comment below.

>Signed-off-by: Mahesh Bandewar <maheshb@google.com>
>---
> drivers/net/bonding/bond_3ad.c  |  77 ++++---------------------------
> drivers/net/bonding/bond_alb.c  |  43 ++---------------
> drivers/net/bonding/bond_alb.h  |   8 ----
> drivers/net/bonding/bond_main.c | 100 ++++++++++++++++++++++++++++++++++++----
> drivers/net/bonding/bonding.h   |   8 ++++
> 5 files changed, 113 insertions(+), 123 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>index ee2c73a9de39..d42fd65fdfa9 100644
>--- a/drivers/net/bonding/bond_3ad.c
>+++ b/drivers/net/bonding/bond_3ad.c
>@@ -1717,6 +1717,8 @@ static void ad_enable_collecting_distributing(struct port *port)
> 			 port->actor_port_number,
> 			 port->aggregator->aggregator_identifier);
> 		__enable_port(port);
>+		if (bond_update_slave_arr(port->slave->bond, NULL))
>+			pr_debug("Failed to build slave-array for 3ad mode.\n");
> 	}
> }
> 
>@@ -1733,6 +1735,8 @@ static void ad_disable_collecting_distributing(struct port *port)
> 			 port->actor_port_number,
> 			 port->aggregator->aggregator_identifier);
> 		__disable_port(port);
>+		if (bond_update_slave_arr(port->slave->bond, NULL))
>+			pr_debug("Failed to build slave-array for 3ad mode.\n");
> 	}
> }
> 
>@@ -1917,6 +1921,9 @@ void bond_3ad_unbind_slave(struct slave *slave)
> 	__update_lacpdu_from_port(port);
> 	ad_lacpdu_send(port);
> 
>+	if (bond_update_slave_arr(bond, slave))
>+		pr_debug("Failed to build slave-array for 3AD mode.\n");
>+
> 	/* check if this aggregator is occupied */
> 	if (aggregator->lag_ports) {
> 		/* check if there are other ports related to this aggregator
>@@ -2311,6 +2318,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
> 	 */
> 	port->sm_vars |= AD_PORT_BEGIN;
> 
>+	if (bond_update_slave_arr(slave->bond, NULL))
>+		pr_debug("Failed to build slave-array for 3ad mode.\n");
>+
> 	__release_state_machine_lock(port);
> }
> 
>@@ -2407,73 +2417,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
> 	return ret;
> }
> 
>-int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
>-{
>-	struct bonding *bond = netdev_priv(dev);
>-	struct slave *slave, *first_ok_slave;
>-	struct aggregator *agg;
>-	struct ad_info ad_info;
>-	struct list_head *iter;
>-	int slaves_in_agg;
>-	int slave_agg_no;
>-	int agg_id;
>-
>-	if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
>-		netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
>-		goto err_free;
>-	}
>-
>-	slaves_in_agg = ad_info.ports;
>-	agg_id = ad_info.aggregator_id;
>-
>-	if (slaves_in_agg == 0) {
>-		netdev_dbg(dev, "active aggregator is empty\n");
>-		goto err_free;
>-	}
>-
>-	slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
>-	first_ok_slave = NULL;
>-
>-	bond_for_each_slave_rcu(bond, slave, iter) {
>-		agg = SLAVE_AD_INFO(slave)->port.aggregator;
>-		if (!agg || agg->aggregator_identifier != agg_id)
>-			continue;
>-
>-		if (slave_agg_no >= 0) {
>-			if (!first_ok_slave && bond_slave_can_tx(slave))
>-				first_ok_slave = slave;
>-			slave_agg_no--;
>-			continue;
>-		}
>-
>-		if (bond_slave_can_tx(slave)) {
>-			bond_dev_queue_xmit(bond, skb, slave->dev);
>-			goto out;
>-		}
>-	}
>-
>-	if (slave_agg_no >= 0) {
>-		netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
>-			   agg_id);
>-		goto err_free;
>-	}
>-
>-	/* we couldn't find any suitable slave after the agg_no, so use the
>-	 * first suitable found, if found.
>-	 */
>-	if (first_ok_slave)
>-		bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
>-	else
>-		goto err_free;
>-
>-out:
>-	return NETDEV_TX_OK;
>-err_free:
>-	/* no suitable interface, frame not sent */
>-	dev_kfree_skb_any(skb);
>-	goto out;
>-}
>-
> int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
> 			 struct slave *slave)
> {
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index 73c21e233131..b2be7b3216d6 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -200,7 +200,6 @@ static int tlb_initialize(struct bonding *bond)
> static void tlb_deinitialize(struct bonding *bond)
> {
> 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>-	struct tlb_up_slave *arr;
> 
> 	_lock_tx_hashtbl_bh(bond);
> 
>@@ -208,10 +207,6 @@ static void tlb_deinitialize(struct bonding *bond)
> 	bond_info->tx_hashtbl = NULL;
> 
> 	_unlock_tx_hashtbl_bh(bond);
>-
>-	arr = rtnl_dereference(bond_info->slave_arr);
>-	if (arr)
>-		kfree_rcu(arr, rcu);
> }
> 
> static long long compute_gap(struct slave *slave)
>@@ -1409,39 +1404,9 @@ out:
> 	return NETDEV_TX_OK;
> }
> 
>-static int bond_tlb_update_slave_arr(struct bonding *bond,
>-				     struct slave *skipslave)
>-{
>-	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>-	struct slave *tx_slave;
>-	struct list_head *iter;
>-	struct tlb_up_slave *new_arr, *old_arr;
>-
>-	new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
>-			  GFP_ATOMIC);
>-	if (!new_arr)
>-		return -ENOMEM;
>-
>-	bond_for_each_slave(bond, tx_slave, iter) {
>-		if (!bond_slave_can_tx(tx_slave))
>-			continue;
>-		if (skipslave == tx_slave)
>-			continue;
>-		new_arr->arr[new_arr->count++] = tx_slave;
>-	}
>-
>-	old_arr = rtnl_dereference(bond_info->slave_arr);
>-	rcu_assign_pointer(bond_info->slave_arr, new_arr);
>-	if (old_arr)
>-		kfree_rcu(old_arr, rcu);
>-
>-	return 0;
>-}
>-
> int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
> {
> 	struct bonding *bond = netdev_priv(bond_dev);
>-	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> 	struct ethhdr *eth_data;
> 	struct slave *tx_slave = NULL;
> 	u32 hash_index;
>@@ -1462,9 +1427,9 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
> 							      hash_index & 0xFF,
> 							      skb->len);
> 			} else {
>-				struct tlb_up_slave *slaves;
>+				struct bond_up_slave *slaves;
> 
>-				slaves = rcu_dereference(bond_info->slave_arr);
>+				slaves = rcu_dereference(bond->slave_arr);
> 				if (slaves && slaves->count)
> 					tx_slave = slaves->arr[hash_index %
> 							       slaves->count];
>@@ -1734,7 +1699,7 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
> 	}
> 
> 	if (bond_is_nondyn_tlb(bond))
>-		if (bond_tlb_update_slave_arr(bond, slave))
>+		if (bond_update_slave_arr(bond, slave))
> 			pr_err("Failed to build slave-array for TLB mode.\n");
> 
> }
>@@ -1762,7 +1727,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
> 	}
> 
> 	if (bond_is_nondyn_tlb(bond)) {
>-		if (bond_tlb_update_slave_arr(bond, NULL))
>+		if (bond_update_slave_arr(bond, NULL))
> 			pr_err("Failed to build slave-array for TLB mode.\n");
> 	}
> }
>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>index aaeac61d03cf..5fc76c01636c 100644
>--- a/drivers/net/bonding/bond_alb.h
>+++ b/drivers/net/bonding/bond_alb.h
>@@ -139,20 +139,12 @@ struct tlb_slave_info {
> 			 */
> };
> 
>-struct tlb_up_slave {
>-	unsigned int	count;
>-	struct rcu_head rcu;
>-	struct slave	*arr[0];
>-};
>-
> struct alb_bond_info {
> 	struct tlb_client_info	*tx_hashtbl; /* Dynamically allocated */
> 	spinlock_t		tx_hashtbl_lock;
> 	u32			unbalanced_load;
> 	int			tx_rebalance_counter;
> 	int			lp_counter;
>-	/* -------- non-dynamic tlb mode only ---------*/
>-	struct tlb_up_slave __rcu *slave_arr;	  /* Up slaves */
> 	/* -------- rlb parameters -------- */
> 	int rlb_enabled;
> 	struct rlb_client_info	*rx_hashtbl;	/* Receive hash table */
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index f0f5eab0fab1..b0e8e7cfa10f 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1693,6 +1693,9 @@ static int __bond_release_one(struct net_device *bond_dev,
> 	if (BOND_MODE(bond) == BOND_MODE_8023AD)
> 		bond_3ad_unbind_slave(slave);
> 
>+	else if (BOND_MODE(bond) == BOND_MODE_XOR)
>+		bond_update_slave_arr(bond, slave);
>+
> 	write_unlock_bh(&bond->lock);
> 
> 	netdev_info(bond_dev, "Releasing %s interface %s\n",
>@@ -2009,6 +2012,9 @@ static void bond_miimon_commit(struct bonding *bond)
> 				bond_alb_handle_link_change(bond, slave,
> 							    BOND_LINK_UP);
> 
>+			if (BOND_MODE(bond) == BOND_MODE_XOR)
>+				bond_update_slave_arr(bond, NULL);
>+
> 			if (!bond->curr_active_slave ||
> 			    (slave == bond->primary_slave))
> 				goto do_failover;
>@@ -2037,6 +2043,9 @@ static void bond_miimon_commit(struct bonding *bond)
> 				bond_alb_handle_link_change(bond, slave,
> 							    BOND_LINK_DOWN);
> 
>+			if (BOND_MODE(bond) == BOND_MODE_XOR)
>+				bond_update_slave_arr(bond, slave);
>+
> 			if (slave == rcu_access_pointer(bond->curr_active_slave))
> 				goto do_failover;
> 
>@@ -3149,6 +3158,7 @@ static int bond_open(struct net_device *bond_dev)
> static int bond_close(struct net_device *bond_dev)
> {
> 	struct bonding *bond = netdev_priv(bond_dev);
>+	struct bond_up_slave *arr;
> 
> 	bond_work_cancel_all(bond);
> 	bond->send_peer_notif = 0;
>@@ -3156,6 +3166,10 @@ static int bond_close(struct net_device *bond_dev)
> 		bond_alb_deinitialize(bond);
> 	bond->recv_probe = NULL;
> 
>+	arr = rtnl_dereference(bond->slave_arr);
>+	if (arr)
>+	    kfree_rcu(arr, rcu);
>+
> 	return 0;
> }
> 
>@@ -3684,15 +3698,84 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
> 	return NETDEV_TX_OK;
> }
> 
>-/* In bond_xmit_xor() , we determine the output device by using a pre-
>- * determined xmit_hash_policy(), If the selected device is not enabled,
>- * find the next active slave.
>+/* Build the usable slaves array in control path for modes that use xmit-hash
>+ * to determine the slave interface -
>+ * (a) BOND_MODE_8023AD
>+ * (b) BOND_MODE_XOR
>+ * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
>  */
>-static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
>+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
> {
>-	struct bonding *bond = netdev_priv(bond_dev);
>+	struct slave *slave;
>+	struct list_head *iter;
>+	struct bond_up_slave *new_arr, *old_arr;
>+	int slaves_in_agg;
>+	int agg_id = 0;
>+	int ret = 0;
>+
>+	new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
>+			  GFP_ATOMIC);
>+	if (!new_arr) {
>+		ret = -ENOMEM;
>+		goto out;
>+	}

	If this allocation fails, won't the in-place array be left with
a reference to a slave that has potentially been freed, if, e.g., the
call came in via the __bond_release_one path?

	I haven't tested it, but it seems plausible that we could
resolve this by writing over the "disappearing" slave's entry in the
array, e.g., move the last element of the array to the "disappearing"
element.

	If this is a problem, the same logic exists in the current
(tlb-only) code, so a fix for stable may be needed as well.

	-J


>+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>+		struct ad_info ad_info;
>+
>+		if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
>+			pr_debug("__bond_3ad_get_active_agg_info failed\n");
>+			kfree_rcu(new_arr, rcu);
>+			ret = -EINVAL;
>+			goto out;
>+		}
>+		slaves_in_agg = ad_info.ports;
>+		agg_id = ad_info.aggregator_id;
>+	}
>+	bond_for_each_slave(bond, slave, iter) {
>+		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>+			struct aggregator *agg;
>+
>+			agg = SLAVE_AD_INFO(slave)->port.aggregator;
>+			if (!agg || agg->aggregator_identifier != agg_id)
>+				continue;
>+		}
>+		if (!bond_slave_can_tx(slave))
>+			continue;
>+		if (skipslave == slave)
>+			continue;
>+		new_arr->arr[new_arr->count++] = slave;
>+	}
> 
>-	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt);
>+	old_arr = rcu_dereference_protected(bond->slave_arr,
>+					    lockdep_rtnl_is_held() ||
>+					    lockdep_is_held(&bond->lock) ||
>+					    lockdep_is_held(&bond->curr_slave_lock));
>+	rcu_assign_pointer(bond->slave_arr, new_arr);
>+	if (old_arr)
>+		kfree_rcu(old_arr, rcu);
>+
>+out:
>+	return ret;
>+}
>+
>+/* Use this Xmit function for 3AD as well as XOR modes. The current
>+ * usable slave array is formed in the control path. The xmit function
>+ * just calculates hash and sends the packet out.
>+ */
>+int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
>+{
>+	struct bonding *bond = netdev_priv(dev);
>+	struct slave *slave;
>+	struct bond_up_slave *slaves;
>+
>+	slaves = rcu_dereference(bond->slave_arr);
>+	if (slaves && slaves->count) {
>+		slave = slaves->arr[bond_xmit_hash(bond, skb) % slaves->count];
>+		bond_dev_queue_xmit(bond, skb, slave->dev);
>+	} else {
>+		dev_kfree_skb_any(skb);
>+		atomic_long_inc(&dev->tx_dropped);
>+	}
> 
> 	return NETDEV_TX_OK;
> }
>@@ -3794,12 +3877,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
> 		return bond_xmit_roundrobin(skb, dev);
> 	case BOND_MODE_ACTIVEBACKUP:
> 		return bond_xmit_activebackup(skb, dev);
>+	case BOND_MODE_8023AD:
> 	case BOND_MODE_XOR:
>-		return bond_xmit_xor(skb, dev);
>+		return bond_3ad_xor_xmit(skb, dev);
> 	case BOND_MODE_BROADCAST:
> 		return bond_xmit_broadcast(skb, dev);
>-	case BOND_MODE_8023AD:
>-		return bond_3ad_xmit_xor(skb, dev);
> 	case BOND_MODE_ALB:
> 		return bond_alb_xmit(skb, dev);
> 	case BOND_MODE_TLB:
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index aace510d08d1..4a6195c0de60 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -177,6 +177,12 @@ struct slave {
> 	struct kobject kobj;
> };
> 
>+struct bond_up_slave {
>+	unsigned int	count;
>+	struct rcu_head rcu;
>+	struct slave	*arr[0];
>+};
>+
> /*
>  * Link pseudo-state only used internally by monitors
>  */
>@@ -196,6 +202,7 @@ struct bonding {
> 	struct   slave __rcu *curr_active_slave;
> 	struct   slave __rcu *current_arp_slave;
> 	struct   slave *primary_slave;
>+	struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
> 	bool     force_primary;
> 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
> 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
>@@ -527,6 +534,7 @@ const char *bond_slave_link_status(s8 link);
> struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
> 					      struct net_device *end_dev,
> 					      int level);
>+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
> 
> #ifdef CONFIG_PROC_FS
> void bond_create_proc_entry(struct bonding *bond);
>-- 
>2.1.0.rc2.206.gedb03e5
>

---
	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash
  2014-09-03 22:51 ` Jay Vosburgh
@ 2014-09-04  0:57   ` Mahesh Bandewar
  0 siblings, 0 replies; 7+ messages in thread
From: Mahesh Bandewar @ 2014-09-04  0:57 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Veaceslav Falico, Andy Gospodarek, David Miller, netdev,
	Eric Dumazet, Maciej Zenczykowski

On Wed, Sep 3, 2014 at 3:51 PM, Jay Vosburgh <jay.vosburgh@canonical.com> wrote:
> Mahesh Bandewar <maheshb@google.com> wrote:
>
>>Earlier change to use usable slave array for TLB mode had an additional
>>performance advantage. So extending the same logic to all other modes
>>that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
>>Also consolidating this with the earlier TLB change.
>>
>>The main idea is to build the usable slaves array in the control path
>>and use that array for slave selection during xmit operation.
>>
>>Measured performance in a setup with a bond of 4x1G NICs with 200
>>instances of netperf for the modes involved (3ad, xor, tlb)
>>cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5
>>
>>Mode        TPS-Before   TPS-After
>>
>>802.3ad   : 468,694      493,101
>>TLB (lb=0): 392,583      392,965
>>XOR       : 475,696      484,517
>
>         In general I think this is an improvement over traversing linked
> lists; however, I have a couple of comments.
>
>         First, for the 802.3ad mode, is the case within
> bond_3ad_state_machine_handler that runs the ad_agg_selection_logic (if
> the agg_select_timer runs out) handled?  This may change the active
> aggregator, and thus the list of slaves that ought to be in the array,
> and looking at the patch I don't see where this would be taken care of.
>
>         As a practical matter, the usage of the agg_select_timer is such
> that it's unlikely to cause a change of active aggregator (it will
> usually set an active aggregator when there is none, e.g., during open
> processing), but this case should probably either be handled or
> commented to explain why it is safe.
>
Good point. I'll fix that!

>         It may be that the bond_3ad_initialize call to set the
> agg_select_timer to 8 seconds is no longer needed, but for now it's
> there and may change the active aggregator.
>
>         One more comment below.
>
>>Signed-off-by: Mahesh Bandewar <maheshb@google.com>
>>---
>> drivers/net/bonding/bond_3ad.c  |  77 ++++---------------------------
>> drivers/net/bonding/bond_alb.c  |  43 ++---------------
>> drivers/net/bonding/bond_alb.h  |   8 ----
>> drivers/net/bonding/bond_main.c | 100 ++++++++++++++++++++++++++++++++++++----
>> drivers/net/bonding/bonding.h   |   8 ++++
>> 5 files changed, 113 insertions(+), 123 deletions(-)
>>
>>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>>index ee2c73a9de39..d42fd65fdfa9 100644
>>--- a/drivers/net/bonding/bond_3ad.c
>>+++ b/drivers/net/bonding/bond_3ad.c
>>@@ -1717,6 +1717,8 @@ static void ad_enable_collecting_distributing(struct port *port)
>>                        port->actor_port_number,
>>                        port->aggregator->aggregator_identifier);
>>               __enable_port(port);
>>+              if (bond_update_slave_arr(port->slave->bond, NULL))
>>+                      pr_debug("Failed to build slave-array for 3ad mode.\n");
>>       }
>> }
>>
>>@@ -1733,6 +1735,8 @@ static void ad_disable_collecting_distributing(struct port *port)
>>                        port->actor_port_number,
>>                        port->aggregator->aggregator_identifier);
>>               __disable_port(port);
>>+              if (bond_update_slave_arr(port->slave->bond, NULL))
>>+                      pr_debug("Failed to build slave-array for 3ad mode.\n");
>>       }
>> }
>>
>>@@ -1917,6 +1921,9 @@ void bond_3ad_unbind_slave(struct slave *slave)
>>       __update_lacpdu_from_port(port);
>>       ad_lacpdu_send(port);
>>
>>+      if (bond_update_slave_arr(bond, slave))
>>+              pr_debug("Failed to build slave-array for 3AD mode.\n");
>>+
>>       /* check if this aggregator is occupied */
>>       if (aggregator->lag_ports) {
>>               /* check if there are other ports related to this aggregator
>>@@ -2311,6 +2318,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
>>        */
>>       port->sm_vars |= AD_PORT_BEGIN;
>>
>>+      if (bond_update_slave_arr(slave->bond, NULL))
>>+              pr_debug("Failed to build slave-array for 3ad mode.\n");
>>+
>>       __release_state_machine_lock(port);
>> }
>>
>>@@ -2407,73 +2417,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
>>       return ret;
>> }
>>
>>-int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
>>-{
>>-      struct bonding *bond = netdev_priv(dev);
>>-      struct slave *slave, *first_ok_slave;
>>-      struct aggregator *agg;
>>-      struct ad_info ad_info;
>>-      struct list_head *iter;
>>-      int slaves_in_agg;
>>-      int slave_agg_no;
>>-      int agg_id;
>>-
>>-      if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
>>-              netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
>>-              goto err_free;
>>-      }
>>-
>>-      slaves_in_agg = ad_info.ports;
>>-      agg_id = ad_info.aggregator_id;
>>-
>>-      if (slaves_in_agg == 0) {
>>-              netdev_dbg(dev, "active aggregator is empty\n");
>>-              goto err_free;
>>-      }
>>-
>>-      slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
>>-      first_ok_slave = NULL;
>>-
>>-      bond_for_each_slave_rcu(bond, slave, iter) {
>>-              agg = SLAVE_AD_INFO(slave)->port.aggregator;
>>-              if (!agg || agg->aggregator_identifier != agg_id)
>>-                      continue;
>>-
>>-              if (slave_agg_no >= 0) {
>>-                      if (!first_ok_slave && bond_slave_can_tx(slave))
>>-                              first_ok_slave = slave;
>>-                      slave_agg_no--;
>>-                      continue;
>>-              }
>>-
>>-              if (bond_slave_can_tx(slave)) {
>>-                      bond_dev_queue_xmit(bond, skb, slave->dev);
>>-                      goto out;
>>-              }
>>-      }
>>-
>>-      if (slave_agg_no >= 0) {
>>-              netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
>>-                         agg_id);
>>-              goto err_free;
>>-      }
>>-
>>-      /* we couldn't find any suitable slave after the agg_no, so use the
>>-       * first suitable found, if found.
>>-       */
>>-      if (first_ok_slave)
>>-              bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
>>-      else
>>-              goto err_free;
>>-
>>-out:
>>-      return NETDEV_TX_OK;
>>-err_free:
>>-      /* no suitable interface, frame not sent */
>>-      dev_kfree_skb_any(skb);
>>-      goto out;
>>-}
>>-
>> int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
>>                        struct slave *slave)
>> {
>>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>>index 73c21e233131..b2be7b3216d6 100644
>>--- a/drivers/net/bonding/bond_alb.c
>>+++ b/drivers/net/bonding/bond_alb.c
>>@@ -200,7 +200,6 @@ static int tlb_initialize(struct bonding *bond)
>> static void tlb_deinitialize(struct bonding *bond)
>> {
>>       struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>>-      struct tlb_up_slave *arr;
>>
>>       _lock_tx_hashtbl_bh(bond);
>>
>>@@ -208,10 +207,6 @@ static void tlb_deinitialize(struct bonding *bond)
>>       bond_info->tx_hashtbl = NULL;
>>
>>       _unlock_tx_hashtbl_bh(bond);
>>-
>>-      arr = rtnl_dereference(bond_info->slave_arr);
>>-      if (arr)
>>-              kfree_rcu(arr, rcu);
>> }
>>
>> static long long compute_gap(struct slave *slave)
>>@@ -1409,39 +1404,9 @@ out:
>>       return NETDEV_TX_OK;
>> }
>>
>>-static int bond_tlb_update_slave_arr(struct bonding *bond,
>>-                                   struct slave *skipslave)
>>-{
>>-      struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>>-      struct slave *tx_slave;
>>-      struct list_head *iter;
>>-      struct tlb_up_slave *new_arr, *old_arr;
>>-
>>-      new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
>>-                        GFP_ATOMIC);
>>-      if (!new_arr)
>>-              return -ENOMEM;
>>-
>>-      bond_for_each_slave(bond, tx_slave, iter) {
>>-              if (!bond_slave_can_tx(tx_slave))
>>-                      continue;
>>-              if (skipslave == tx_slave)
>>-                      continue;
>>-              new_arr->arr[new_arr->count++] = tx_slave;
>>-      }
>>-
>>-      old_arr = rtnl_dereference(bond_info->slave_arr);
>>-      rcu_assign_pointer(bond_info->slave_arr, new_arr);
>>-      if (old_arr)
>>-              kfree_rcu(old_arr, rcu);
>>-
>>-      return 0;
>>-}
>>-
>> int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
>> {
>>       struct bonding *bond = netdev_priv(bond_dev);
>>-      struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>>       struct ethhdr *eth_data;
>>       struct slave *tx_slave = NULL;
>>       u32 hash_index;
>>@@ -1462,9 +1427,9 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
>>                                                             hash_index & 0xFF,
>>                                                             skb->len);
>>                       } else {
>>-                              struct tlb_up_slave *slaves;
>>+                              struct bond_up_slave *slaves;
>>
>>-                              slaves = rcu_dereference(bond_info->slave_arr);
>>+                              slaves = rcu_dereference(bond->slave_arr);
>>                               if (slaves && slaves->count)
>>                                       tx_slave = slaves->arr[hash_index %
>>                                                              slaves->count];
>>@@ -1734,7 +1699,7 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
>>       }
>>
>>       if (bond_is_nondyn_tlb(bond))
>>-              if (bond_tlb_update_slave_arr(bond, slave))
>>+              if (bond_update_slave_arr(bond, slave))
>>                       pr_err("Failed to build slave-array for TLB mode.\n");
>>
>> }
>>@@ -1762,7 +1727,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
>>       }
>>
>>       if (bond_is_nondyn_tlb(bond)) {
>>-              if (bond_tlb_update_slave_arr(bond, NULL))
>>+              if (bond_update_slave_arr(bond, NULL))
>>                       pr_err("Failed to build slave-array for TLB mode.\n");
>>       }
>> }
>>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>>index aaeac61d03cf..5fc76c01636c 100644
>>--- a/drivers/net/bonding/bond_alb.h
>>+++ b/drivers/net/bonding/bond_alb.h
>>@@ -139,20 +139,12 @@ struct tlb_slave_info {
>>                        */
>> };
>>
>>-struct tlb_up_slave {
>>-      unsigned int    count;
>>-      struct rcu_head rcu;
>>-      struct slave    *arr[0];
>>-};
>>-
>> struct alb_bond_info {
>>       struct tlb_client_info  *tx_hashtbl; /* Dynamically allocated */
>>       spinlock_t              tx_hashtbl_lock;
>>       u32                     unbalanced_load;
>>       int                     tx_rebalance_counter;
>>       int                     lp_counter;
>>-      /* -------- non-dynamic tlb mode only ---------*/
>>-      struct tlb_up_slave __rcu *slave_arr;     /* Up slaves */
>>       /* -------- rlb parameters -------- */
>>       int rlb_enabled;
>>       struct rlb_client_info  *rx_hashtbl;    /* Receive hash table */
>>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>>index f0f5eab0fab1..b0e8e7cfa10f 100644
>>--- a/drivers/net/bonding/bond_main.c
>>+++ b/drivers/net/bonding/bond_main.c
>>@@ -1693,6 +1693,9 @@ static int __bond_release_one(struct net_device *bond_dev,
>>       if (BOND_MODE(bond) == BOND_MODE_8023AD)
>>               bond_3ad_unbind_slave(slave);
>>
>>+      else if (BOND_MODE(bond) == BOND_MODE_XOR)
>>+              bond_update_slave_arr(bond, slave);
>>+
>>       write_unlock_bh(&bond->lock);
>>
>>       netdev_info(bond_dev, "Releasing %s interface %s\n",
>>@@ -2009,6 +2012,9 @@ static void bond_miimon_commit(struct bonding *bond)
>>                               bond_alb_handle_link_change(bond, slave,
>>                                                           BOND_LINK_UP);
>>
>>+                      if (BOND_MODE(bond) == BOND_MODE_XOR)
>>+                              bond_update_slave_arr(bond, NULL);
>>+
>>                       if (!bond->curr_active_slave ||
>>                           (slave == bond->primary_slave))
>>                               goto do_failover;
>>@@ -2037,6 +2043,9 @@ static void bond_miimon_commit(struct bonding *bond)
>>                               bond_alb_handle_link_change(bond, slave,
>>                                                           BOND_LINK_DOWN);
>>
>>+                      if (BOND_MODE(bond) == BOND_MODE_XOR)
>>+                              bond_update_slave_arr(bond, slave);
>>+
>>                       if (slave == rcu_access_pointer(bond->curr_active_slave))
>>                               goto do_failover;
>>
>>@@ -3149,6 +3158,7 @@ static int bond_open(struct net_device *bond_dev)
>> static int bond_close(struct net_device *bond_dev)
>> {
>>       struct bonding *bond = netdev_priv(bond_dev);
>>+      struct bond_up_slave *arr;
>>
>>       bond_work_cancel_all(bond);
>>       bond->send_peer_notif = 0;
>>@@ -3156,6 +3166,10 @@ static int bond_close(struct net_device *bond_dev)
>>               bond_alb_deinitialize(bond);
>>       bond->recv_probe = NULL;
>>
>>+      arr = rtnl_dereference(bond->slave_arr);
>>+      if (arr)
>>+          kfree_rcu(arr, rcu);
>>+
>>       return 0;
>> }
>>
>>@@ -3684,15 +3698,84 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
>>       return NETDEV_TX_OK;
>> }
>>
>>-/* In bond_xmit_xor() , we determine the output device by using a pre-
>>- * determined xmit_hash_policy(), If the selected device is not enabled,
>>- * find the next active slave.
>>+/* Build the usable slaves array in control path for modes that use xmit-hash
>>+ * to determine the slave interface -
>>+ * (a) BOND_MODE_8023AD
>>+ * (b) BOND_MODE_XOR
>>+ * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
>>  */
>>-static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
>>+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
>> {
>>-      struct bonding *bond = netdev_priv(bond_dev);
>>+      struct slave *slave;
>>+      struct list_head *iter;
>>+      struct bond_up_slave *new_arr, *old_arr;
>>+      int slaves_in_agg;
>>+      int agg_id = 0;
>>+      int ret = 0;
>>+
>>+      new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
>>+                        GFP_ATOMIC);
>>+      if (!new_arr) {
>>+              ret = -ENOMEM;
>>+              goto out;
>>+      }
>
>         If this allocation fails, won't the in-place array be left with
> a reference to a slave that has potentially been freed, if, e.g., the
> call came in via the __bond_release_one path?
>
>         I haven't tested it, but it seems plausible that we could
> resolve this by writing over the "disappearing" slave's entry in the
> array, e.g., move the last element of the array to the "disappearing"
> element.
>
Allocation failure could be catastrophic. If such a small alloc
request fails, the machine would have to worrying about lot other
things. Having said that, I think we can fix it by overwriting the
disappearing slave with the last entry (as you have suggested).

One other thing I should do is to treat this as an error and use
pr_err() instead of pr_debug() for these alloc failures.


>         If this is a problem, the same logic exists in the current
> (tlb-only) code, so a fix for stable may be needed as well.
>
Agreed. I'll cook another patch for the stable branch as well.

>         -J
>
>
>>+      if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>>+              struct ad_info ad_info;
>>+
>>+              if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
>>+                      pr_debug("__bond_3ad_get_active_agg_info failed\n");
>>+                      kfree_rcu(new_arr, rcu);
>>+                      ret = -EINVAL;
>>+                      goto out;
>>+              }
>>+              slaves_in_agg = ad_info.ports;
>>+              agg_id = ad_info.aggregator_id;
>>+      }
>>+      bond_for_each_slave(bond, slave, iter) {
>>+              if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>>+                      struct aggregator *agg;
>>+
>>+                      agg = SLAVE_AD_INFO(slave)->port.aggregator;
>>+                      if (!agg || agg->aggregator_identifier != agg_id)
>>+                              continue;
>>+              }
>>+              if (!bond_slave_can_tx(slave))
>>+                      continue;
>>+              if (skipslave == slave)
>>+                      continue;
>>+              new_arr->arr[new_arr->count++] = slave;
>>+      }
>>
>>-      bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt);
>>+      old_arr = rcu_dereference_protected(bond->slave_arr,
>>+                                          lockdep_rtnl_is_held() ||
>>+                                          lockdep_is_held(&bond->lock) ||
>>+                                          lockdep_is_held(&bond->curr_slave_lock));
>>+      rcu_assign_pointer(bond->slave_arr, new_arr);
>>+      if (old_arr)
>>+              kfree_rcu(old_arr, rcu);
>>+
>>+out:
>>+      return ret;
>>+}
>>+
>>+/* Use this Xmit function for 3AD as well as XOR modes. The current
>>+ * usable slave array is formed in the control path. The xmit function
>>+ * just calculates hash and sends the packet out.
>>+ */
>>+int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
>>+{
>>+      struct bonding *bond = netdev_priv(dev);
>>+      struct slave *slave;
>>+      struct bond_up_slave *slaves;
>>+
>>+      slaves = rcu_dereference(bond->slave_arr);
>>+      if (slaves && slaves->count) {
>>+              slave = slaves->arr[bond_xmit_hash(bond, skb) % slaves->count];
>>+              bond_dev_queue_xmit(bond, skb, slave->dev);
>>+      } else {
>>+              dev_kfree_skb_any(skb);
>>+              atomic_long_inc(&dev->tx_dropped);
>>+      }
>>
>>       return NETDEV_TX_OK;
>> }
>>@@ -3794,12 +3877,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
>>               return bond_xmit_roundrobin(skb, dev);
>>       case BOND_MODE_ACTIVEBACKUP:
>>               return bond_xmit_activebackup(skb, dev);
>>+      case BOND_MODE_8023AD:
>>       case BOND_MODE_XOR:
>>-              return bond_xmit_xor(skb, dev);
>>+              return bond_3ad_xor_xmit(skb, dev);
>>       case BOND_MODE_BROADCAST:
>>               return bond_xmit_broadcast(skb, dev);
>>-      case BOND_MODE_8023AD:
>>-              return bond_3ad_xmit_xor(skb, dev);
>>       case BOND_MODE_ALB:
>>               return bond_alb_xmit(skb, dev);
>>       case BOND_MODE_TLB:
>>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>>index aace510d08d1..4a6195c0de60 100644
>>--- a/drivers/net/bonding/bonding.h
>>+++ b/drivers/net/bonding/bonding.h
>>@@ -177,6 +177,12 @@ struct slave {
>>       struct kobject kobj;
>> };
>>
>>+struct bond_up_slave {
>>+      unsigned int    count;
>>+      struct rcu_head rcu;
>>+      struct slave    *arr[0];
>>+};
>>+
>> /*
>>  * Link pseudo-state only used internally by monitors
>>  */
>>@@ -196,6 +202,7 @@ struct bonding {
>>       struct   slave __rcu *curr_active_slave;
>>       struct   slave __rcu *current_arp_slave;
>>       struct   slave *primary_slave;
>>+      struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
>>       bool     force_primary;
>>       s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
>>       int     (*recv_probe)(const struct sk_buff *, struct bonding *,
>>@@ -527,6 +534,7 @@ const char *bond_slave_link_status(s8 link);
>> struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
>>                                             struct net_device *end_dev,
>>                                             int level);
>>+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
>>
>> #ifdef CONFIG_PROC_FS
>> void bond_create_proc_entry(struct bonding *bond);
>>--
>>2.1.0.rc2.206.gedb03e5
>>
>
> ---
>         -Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash
  2014-09-03 21:47 [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash Mahesh Bandewar
  2014-09-03 22:51 ` Jay Vosburgh
@ 2014-09-04 13:16 ` Nikolay Aleksandrov
  2014-09-05  0:10   ` Mahesh Bandewar
  1 sibling, 1 reply; 7+ messages in thread
From: Nikolay Aleksandrov @ 2014-09-04 13:16 UTC (permalink / raw)
  To: Mahesh Bandewar, Jay Vosburgh, Veaceslav Falico, Andy Gospodarek,
	David Miller
  Cc: netdev, Eric Dumazet, Maciej Zenczykowski

On 03/09/14 23:47, Mahesh Bandewar wrote:
> Earlier change to use usable slave array for TLB mode had an additional
> performance advantage. So extending the same logic to all other modes
> that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
> Also consolidating this with the earlier TLB change.
>
> The main idea is to build the usable slaves array in the control path
> and use that array for slave selection during xmit operation.
>
> Measured performance in a setup with a bond of 4x1G NICs with 200
> instances of netperf for the modes involved (3ad, xor, tlb)
> cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5
>
> Mode        TPS-Before   TPS-After
>
> 802.3ad   : 468,694      493,101
> TLB (lb=0): 392,583      392,965
> XOR       : 475,696      484,517
>
> Signed-off-by: Mahesh Bandewar <maheshb@google.com>
> ---
<<snip>>
Hi Mahesh,
I really like this idea, but I think this patch is far from ready and needs more 
thorough examination.

> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index f0f5eab0fab1..b0e8e7cfa10f 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c

What happens to modes that don't have miimon enabled i.e. if I do: modprobe 
bonding mode=2 and enslave something then the slave array will never get created 
and I won't be able to xmit anything ever because miimon is 0 by default and 
even if it wasn't I'll have to wait for it to create the slave array before I 
can do any transmissions.
By the way, I just noticed that arp_interval is actually supported in XOR mode.

> @@ -1693,6 +1693,9 @@ static int __bond_release_one(struct net_device *bond_dev,
>   	if (BOND_MODE(bond) == BOND_MODE_8023AD)
>   		bond_3ad_unbind_slave(slave);
>
^^^^^^^^^^^^^^^^^^^^^^^
Empty line.

> +	else if (BOND_MODE(bond) == BOND_MODE_XOR)
> +		bond_update_slave_arr(bond, slave);
> +
>   	write_unlock_bh(&bond->lock);
>
>   	netdev_info(bond_dev, "Releasing %s interface %s\n",
> @@ -2009,6 +2012,9 @@ static void bond_miimon_commit(struct bonding *bond)
>   				bond_alb_handle_link_change(bond, slave,
>   							    BOND_LINK_UP);
>
> +			if (BOND_MODE(bond) == BOND_MODE_XOR)
> +				bond_update_slave_arr(bond, NULL);
> +
>   			if (!bond->curr_active_slave ||
>   			    (slave == bond->primary_slave))
>   				goto do_failover;
> @@ -2037,6 +2043,9 @@ static void bond_miimon_commit(struct bonding *bond)
>   				bond_alb_handle_link_change(bond, slave,
>   							    BOND_LINK_DOWN);
>
> +			if (BOND_MODE(bond) == BOND_MODE_XOR)
> +				bond_update_slave_arr(bond, slave);
> +
>   			if (slave == rcu_access_pointer(bond->curr_active_slave))
>   				goto do_failover;
>
> @@ -3149,6 +3158,7 @@ static int bond_open(struct net_device *bond_dev)
>   static int bond_close(struct net_device *bond_dev)
>   {
>   	struct bonding *bond = netdev_priv(bond_dev);
> +	struct bond_up_slave *arr;
>
>   	bond_work_cancel_all(bond);
>   	bond->send_peer_notif = 0;
> @@ -3156,6 +3166,10 @@ static int bond_close(struct net_device *bond_dev)
>   		bond_alb_deinitialize(bond);
>   	bond->recv_probe = NULL;
>
> +	arr = rtnl_dereference(bond->slave_arr);
> +	if (arr)
> +	    kfree_rcu(arr, rcu);
> +
^^^^^^^^^^^^^^
Bond close is dealt with, but what happens after: ip l set bond down, ip l set 
bond up
or alternatively: ip l set bond down, echo -slave > slaves

The array is freed but slave_arr still points to it.

>   	return 0;
>   }
>
> @@ -3684,15 +3698,84 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
>   	return NETDEV_TX_OK;
>   }
>
> -/* In bond_xmit_xor() , we determine the output device by using a pre-
> - * determined xmit_hash_policy(), If the selected device is not enabled,
> - * find the next active slave.
> +/* Build the usable slaves array in control path for modes that use xmit-hash
> + * to determine the slave interface -
> + * (a) BOND_MODE_8023AD
> + * (b) BOND_MODE_XOR
> + * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
>    */
> -static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
> +int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
^^^^^^^^^^^^^^^^^^^^^^^^^^
In general any failing in this function means that the old array stays with the 
old slave list. I think that you really should revisit the places that use this 
after the patch and see if that won't cause any problems.

>   {
> -	struct bonding *bond = netdev_priv(bond_dev);
> +	struct slave *slave;
> +	struct list_head *iter;
> +	struct bond_up_slave *new_arr, *old_arr;
> +	int slaves_in_agg;
> +	int agg_id = 0;
> +	int ret = 0;
> +
> +	new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
> +			  GFP_ATOMIC);
> +	if (!new_arr) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
> +		struct ad_info ad_info;
> +
> +		if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Called with RCU otherwise sparse won't be happy. Perhaps 
bond_3ad_get_active_agg_info() ?

> +			pr_debug("__bond_3ad_get_active_agg_info failed\n");
> +			kfree_rcu(new_arr, rcu);
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		slaves_in_agg = ad_info.ports;
> +		agg_id = ad_info.aggregator_id;
> +	}
> +	bond_for_each_slave(bond, slave, iter) {
> +		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
> +			struct aggregator *agg;
> +
> +			agg = SLAVE_AD_INFO(slave)->port.aggregator;
> +			if (!agg || agg->aggregator_identifier != agg_id)
> +				continue;
> +		}
> +		if (!bond_slave_can_tx(slave))
> +			continue;
> +		if (skipslave == slave)
> +			continue;
> +		new_arr->arr[new_arr->count++] = slave;
> +	}
>
> -	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt);
> +	old_arr = rcu_dereference_protected(bond->slave_arr,
> +					    lockdep_rtnl_is_held() ||
> +					    lockdep_is_held(&bond->lock) ||
> +					    lockdep_is_held(&bond->curr_slave_lock));
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This line is the most troublesome for me, which lock is it ? Does this mean that 
whichever I hold from the three I can update the slave array ?
I don't think this is worked out well, you should explicitly specify how and why 
it is safe to update this under each of the locks and maybe you'll be able to 
reduce the lock list :-)

> +	rcu_assign_pointer(bond->slave_arr, new_arr);
> +	if (old_arr)
> +		kfree_rcu(old_arr, rcu);
> +
> +out:
> +	return ret;
> +}
> +
> +/* Use this Xmit function for 3AD as well as XOR modes. The current
> + * usable slave array is formed in the control path. The xmit function
> + * just calculates hash and sends the packet out.
> + */
> +int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
> +{
> +	struct bonding *bond = netdev_priv(dev);
> +	struct slave *slave;
> +	struct bond_up_slave *slaves;
> +
> +	slaves = rcu_dereference(bond->slave_arr);
> +	if (slaves && slaves->count) {
> +		slave = slaves->arr[bond_xmit_hash(bond, skb) % slaves->count];
> +		bond_dev_queue_xmit(bond, skb, slave->dev);
> +	} else {
> +		dev_kfree_skb_any(skb);
> +		atomic_long_inc(&dev->tx_dropped);
> +	}
>
>   	return NETDEV_TX_OK;
>   }
> @@ -3794,12 +3877,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
>   		return bond_xmit_roundrobin(skb, dev);
>   	case BOND_MODE_ACTIVEBACKUP:
>   		return bond_xmit_activebackup(skb, dev);
> +	case BOND_MODE_8023AD:
>   	case BOND_MODE_XOR:
> -		return bond_xmit_xor(skb, dev);
> +		return bond_3ad_xor_xmit(skb, dev);
>   	case BOND_MODE_BROADCAST:
>   		return bond_xmit_broadcast(skb, dev);
> -	case BOND_MODE_8023AD:
> -		return bond_3ad_xmit_xor(skb, dev);
>   	case BOND_MODE_ALB:
>   		return bond_alb_xmit(skb, dev);
>   	case BOND_MODE_TLB:
> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
> index aace510d08d1..4a6195c0de60 100644
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -177,6 +177,12 @@ struct slave {
>   	struct kobject kobj;
>   };
>
> +struct bond_up_slave {
> +	unsigned int	count;
> +	struct rcu_head rcu;
> +	struct slave	*arr[0];
> +};
> +
>   /*
>    * Link pseudo-state only used internally by monitors
>    */
> @@ -196,6 +202,7 @@ struct bonding {
>   	struct   slave __rcu *curr_active_slave;
>   	struct   slave __rcu *current_arp_slave;
>   	struct   slave *primary_slave;
> +	struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
>   	bool     force_primary;
>   	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
>   	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
> @@ -527,6 +534,7 @@ const char *bond_slave_link_status(s8 link);
>   struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
>   					      struct net_device *end_dev,
>   					      int level);
> +int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
>
>   #ifdef CONFIG_PROC_FS
>   void bond_create_proc_entry(struct bonding *bond);
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash
  2014-09-04 13:16 ` Nikolay Aleksandrov
@ 2014-09-05  0:10   ` Mahesh Bandewar
  2014-09-05 11:26     ` Nikolay Aleksandrov
  0 siblings, 1 reply; 7+ messages in thread
From: Mahesh Bandewar @ 2014-09-05  0:10 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: Jay Vosburgh, Veaceslav Falico, Andy Gospodarek, David Miller,
	netdev, Eric Dumazet, Maciej Zenczykowski

On Thu, Sep 4, 2014 at 6:16 AM, Nikolay Aleksandrov <nikolay@redhat.com> wrote:
> On 03/09/14 23:47, Mahesh Bandewar wrote:
>>
>> Earlier change to use usable slave array for TLB mode had an additional
>> performance advantage. So extending the same logic to all other modes
>> that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
>> Also consolidating this with the earlier TLB change.
>>
>> The main idea is to build the usable slaves array in the control path
>> and use that array for slave selection during xmit operation.
>>
>> Measured performance in a setup with a bond of 4x1G NICs with 200
>> instances of netperf for the modes involved (3ad, xor, tlb)
>> cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5
>>
>> Mode        TPS-Before   TPS-After
>>
>> 802.3ad   : 468,694      493,101
>> TLB (lb=0): 392,583      392,965
>> XOR       : 475,696      484,517
>>
>> Signed-off-by: Mahesh Bandewar <maheshb@google.com>
>> ---
>
> <<snip>>
> Hi Mahesh,
> I really like this idea, but I think this patch is far from ready and needs
> more thorough examination.
>
I see that you have looked it from the XOR-mode perspective and thanks
for that. I admit that, I did not test that mode thoroughly.

>
>> diff --git a/drivers/net/bonding/bond_main.c
>> b/drivers/net/bonding/bond_main.c
>> index f0f5eab0fab1..b0e8e7cfa10f 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>
>
> What happens to modes that don't have miimon enabled i.e. if I do: modprobe
> bonding mode=2 and enslave something then the slave array will never get
> created and I won't be able to xmit anything ever because miimon is 0 by
> default and even if it wasn't I'll have to wait for it to create the slave
> array before I can do any transmissions.
> By the way, I just noticed that arp_interval is actually supported in XOR
> mode.
>
Yes, this should not be restricted to just the miimon case, and it
should be irrespective of miimon, arp-mon enabled / disabled etc. I'll
correct that!

>
>> @@ -1693,6 +1693,9 @@ static int __bond_release_one(struct net_device
>> *bond_dev,
>>         if (BOND_MODE(bond) == BOND_MODE_8023AD)
>>                 bond_3ad_unbind_slave(slave);
>>
> ^^^^^^^^^^^^^^^^^^^^^^^
> Empty line.
>
>
>> +       else if (BOND_MODE(bond) == BOND_MODE_XOR)
>> +               bond_update_slave_arr(bond, slave);
>> +
>>         write_unlock_bh(&bond->lock);
>>
>>         netdev_info(bond_dev, "Releasing %s interface %s\n",
>> @@ -2009,6 +2012,9 @@ static void bond_miimon_commit(struct bonding *bond)
>>                                 bond_alb_handle_link_change(bond, slave,
>>                                                             BOND_LINK_UP);
>>
>> +                       if (BOND_MODE(bond) == BOND_MODE_XOR)
>> +                               bond_update_slave_arr(bond, NULL);
>> +
>>                         if (!bond->curr_active_slave ||
>>                             (slave == bond->primary_slave))
>>                                 goto do_failover;
>> @@ -2037,6 +2043,9 @@ static void bond_miimon_commit(struct bonding *bond)
>>                                 bond_alb_handle_link_change(bond, slave,
>>
>> BOND_LINK_DOWN);
>>
>> +                       if (BOND_MODE(bond) == BOND_MODE_XOR)
>> +                               bond_update_slave_arr(bond, slave);
>> +
>>                         if (slave ==
>> rcu_access_pointer(bond->curr_active_slave))
>>                                 goto do_failover;
>>
>> @@ -3149,6 +3158,7 @@ static int bond_open(struct net_device *bond_dev)
>>   static int bond_close(struct net_device *bond_dev)
>>   {
>>         struct bonding *bond = netdev_priv(bond_dev);
>> +       struct bond_up_slave *arr;
>>
>>         bond_work_cancel_all(bond);
>>         bond->send_peer_notif = 0;
>> @@ -3156,6 +3166,10 @@ static int bond_close(struct net_device *bond_dev)
>>                 bond_alb_deinitialize(bond);
>>         bond->recv_probe = NULL;
>>
>> +       arr = rtnl_dereference(bond->slave_arr);
>> +       if (arr)
>> +           kfree_rcu(arr, rcu);
>> +
>
> ^^^^^^^^^^^^^^
> Bond close is dealt with, but what happens after: ip l set bond down, ip l
> set bond up
> or alternatively: ip l set bond down, echo -slave > slaves
>
> The array is freed but slave_arr still points to it.
>
good point. I'll update it. Also applicable to the earlier TLB mode,
so will add that into the patch for the stable.

>
>>         return 0;
>>   }
>>
>> @@ -3684,15 +3698,84 @@ static int bond_xmit_activebackup(struct sk_buff
>> *skb, struct net_device *bond_d
>>         return NETDEV_TX_OK;
>>   }
>>
>> -/* In bond_xmit_xor() , we determine the output device by using a pre-
>> - * determined xmit_hash_policy(), If the selected device is not enabled,
>> - * find the next active slave.
>> +/* Build the usable slaves array in control path for modes that use
>> xmit-hash
>> + * to determine the slave interface -
>> + * (a) BOND_MODE_8023AD
>> + * (b) BOND_MODE_XOR
>> + * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
>>    */
>> -static int bond_xmit_xor(struct sk_buff *skb, struct net_device
>> *bond_dev)
>> +int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^
> In general any failing in this function means that the old array stays with
> the old slave list. I think that you really should revisit the places that
> use this after the patch and see if that won't cause any problems.
>
The only really problematic scenario is where one of the slaves is
disappearing (as pointed out by Jay earlier) and I'll fix that in the
next version. For rest of the cases, the worse that could happen is
the slave that is selected wont be able to transmit that packet. As I
have explained earlier, in case of failure to allocate such small
portion of memory (less than 1k in worse scenario!) machine has lot
other problems to handle than not being able to send a packet out.

>
>>   {
>> -       struct bonding *bond = netdev_priv(bond_dev);
>> +       struct slave *slave;
>> +       struct list_head *iter;
>> +       struct bond_up_slave *new_arr, *old_arr;
>> +       int slaves_in_agg;
>> +       int agg_id = 0;
>> +       int ret = 0;
>> +
>> +       new_arr = kzalloc(offsetof(struct bond_up_slave,
>> arr[bond->slave_cnt]),
>> +                         GFP_ATOMIC);
>> +       if (!new_arr) {
>> +               ret = -ENOMEM;
>> +               goto out;
>> +       }
>> +       if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>> +               struct ad_info ad_info;
>> +
>> +               if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> Called with RCU otherwise sparse won't be happy. Perhaps
> bond_3ad_get_active_agg_info() ?
>
Yes, corrected!
>
>> +                       pr_debug("__bond_3ad_get_active_agg_info
>> failed\n");
>> +                       kfree_rcu(new_arr, rcu);
>> +                       ret = -EINVAL;
>> +                       goto out;
>> +               }
>> +               slaves_in_agg = ad_info.ports;
>> +               agg_id = ad_info.aggregator_id;
>> +       }
>> +       bond_for_each_slave(bond, slave, iter) {
>> +               if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>> +                       struct aggregator *agg;
>> +
>> +                       agg = SLAVE_AD_INFO(slave)->port.aggregator;
>> +                       if (!agg || agg->aggregator_identifier != agg_id)
>> +                               continue;
>> +               }
>> +               if (!bond_slave_can_tx(slave))
>> +                       continue;
>> +               if (skipslave == slave)
>> +                       continue;
>> +               new_arr->arr[new_arr->count++] = slave;
>> +       }
>>
>> -       bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) %
>> bond->slave_cnt);
>> +       old_arr = rcu_dereference_protected(bond->slave_arr,
>> +                                           lockdep_rtnl_is_held() ||
>> +                                           lockdep_is_held(&bond->lock)
>> ||
>> +
>> lockdep_is_held(&bond->curr_slave_lock));
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> This line is the most troublesome for me, which lock is it ? Does this mean
> that whichever I hold from the three I can update the slave array ?
> I don't think this is worked out well, you should explicitly specify how and
> why it is safe to update this under each of the locks and maybe you'll be
> able to reduce the lock list :-)
>
This is primarily because of different code paths it's taking to reach
here. In all these cases, one of those locks is held. Unfortunately
there are three such locks  that I have identified (for all three
modes involved) and hence the above line.

>
>> +       rcu_assign_pointer(bond->slave_arr, new_arr);
>> +       if (old_arr)
>> +               kfree_rcu(old_arr, rcu);
>> +
>> +out:
>> +       return ret;
>> +}
>> +
>> +/* Use this Xmit function for 3AD as well as XOR modes. The current
>> + * usable slave array is formed in the control path. The xmit function
>> + * just calculates hash and sends the packet out.
>> + */
>> +int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
>> +{
>> +       struct bonding *bond = netdev_priv(dev);
>> +       struct slave *slave;
>> +       struct bond_up_slave *slaves;
>> +
>> +       slaves = rcu_dereference(bond->slave_arr);
>> +       if (slaves && slaves->count) {
>> +               slave = slaves->arr[bond_xmit_hash(bond, skb) %
>> slaves->count];
>> +               bond_dev_queue_xmit(bond, skb, slave->dev);
>> +       } else {
>> +               dev_kfree_skb_any(skb);
>> +               atomic_long_inc(&dev->tx_dropped);
>> +       }
>>
>>         return NETDEV_TX_OK;
>>   }
>> @@ -3794,12 +3877,11 @@ static netdev_tx_t __bond_start_xmit(struct
>> sk_buff *skb, struct net_device *dev
>>                 return bond_xmit_roundrobin(skb, dev);
>>         case BOND_MODE_ACTIVEBACKUP:
>>                 return bond_xmit_activebackup(skb, dev);
>> +       case BOND_MODE_8023AD:
>>         case BOND_MODE_XOR:
>> -               return bond_xmit_xor(skb, dev);
>> +               return bond_3ad_xor_xmit(skb, dev);
>>         case BOND_MODE_BROADCAST:
>>                 return bond_xmit_broadcast(skb, dev);
>> -       case BOND_MODE_8023AD:
>> -               return bond_3ad_xmit_xor(skb, dev);
>>         case BOND_MODE_ALB:
>>                 return bond_alb_xmit(skb, dev);
>>         case BOND_MODE_TLB:
>> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>> index aace510d08d1..4a6195c0de60 100644
>> --- a/drivers/net/bonding/bonding.h
>> +++ b/drivers/net/bonding/bonding.h
>> @@ -177,6 +177,12 @@ struct slave {
>>         struct kobject kobj;
>>   };
>>
>> +struct bond_up_slave {
>> +       unsigned int    count;
>> +       struct rcu_head rcu;
>> +       struct slave    *arr[0];
>> +};
>> +
>>   /*
>>    * Link pseudo-state only used internally by monitors
>>    */
>> @@ -196,6 +202,7 @@ struct bonding {
>>         struct   slave __rcu *curr_active_slave;
>>         struct   slave __rcu *current_arp_slave;
>>         struct   slave *primary_slave;
>> +       struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves
>> */
>>         bool     force_primary;
>>         s32      slave_cnt; /* never change this value outside the
>> attach/detach wrappers */
>>         int     (*recv_probe)(const struct sk_buff *, struct bonding *,
>> @@ -527,6 +534,7 @@ const char *bond_slave_link_status(s8 link);
>>   struct bond_vlan_tag *bond_verify_device_path(struct net_device
>> *start_dev,
>>                                               struct net_device *end_dev,
>>                                               int level);
>> +int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
>>
>>   #ifdef CONFIG_PROC_FS
>>   void bond_create_proc_entry(struct bonding *bond);
>>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash
  2014-09-05  0:10   ` Mahesh Bandewar
@ 2014-09-05 11:26     ` Nikolay Aleksandrov
  2014-09-05 11:49       ` Nikolay Aleksandrov
  0 siblings, 1 reply; 7+ messages in thread
From: Nikolay Aleksandrov @ 2014-09-05 11:26 UTC (permalink / raw)
  To: Mahesh Bandewar
  Cc: Jay Vosburgh, Veaceslav Falico, Andy Gospodarek, David Miller,
	netdev, Eric Dumazet, Maciej Zenczykowski

On 05/09/14 02:10, Mahesh Bandewar wrote:
> On Thu, Sep 4, 2014 at 6:16 AM, Nikolay Aleksandrov <nikolay@redhat.com> wrote:
>> On 03/09/14 23:47, Mahesh Bandewar wrote:
>>>
>>> Earlier change to use usable slave array for TLB mode had an additional
>>> performance advantage. So extending the same logic to all other modes
>>> that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
>>> Also consolidating this with the earlier TLB change.
>>>
>>> The main idea is to build the usable slaves array in the control path
>>> and use that array for slave selection during xmit operation.
>>>
>>> Measured performance in a setup with a bond of 4x1G NICs with 200
>>> instances of netperf for the modes involved (3ad, xor, tlb)
>>> cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5
>>>
>>> Mode        TPS-Before   TPS-After
>>>
>>> 802.3ad   : 468,694      493,101
>>> TLB (lb=0): 392,583      392,965
>>> XOR       : 475,696      484,517
>>>
>>> Signed-off-by: Mahesh Bandewar <maheshb@google.com>
>>> ---
>>
<<<<<snip>>>>>>
>>> -       bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) %
>>> bond->slave_cnt);
>>> +       old_arr = rcu_dereference_protected(bond->slave_arr,
>>> +                                           lockdep_rtnl_is_held() ||
>>> +                                           lockdep_is_held(&bond->lock)
>>> ||
>>> +
>>> lockdep_is_held(&bond->curr_slave_lock));
>>
>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>> This line is the most troublesome for me, which lock is it ? Does this mean
>> that whichever I hold from the three I can update the slave array ?
>> I don't think this is worked out well, you should explicitly specify how and
>> why it is safe to update this under each of the locks and maybe you'll be
>> able to reduce the lock list :-)
>>
> This is primarily because of different code paths it's taking to reach
> here. In all these cases, one of those locks is held. Unfortunately
> there are three such locks  that I have identified (for all three
> modes involved) and hence the above line.
>

True, but I did a little grepping and here's my analysis of the call sites which 
I can't guarantee is full or complete, but it shows at least 1 problem.
bond_update_slave_arr() callers:

1. 3ad mode
1.1. bond_3ad_state_machine_handler -> ad_mux_machine -> 
ad_(en|dis)able_collecting_distributing
  - read_lock(bond->lock), rcu_read_lock, state_machine_lock
1.2. __bond_release_one -> bond_3ad_unbind_slave
  - rtnl, write_lock(bond->lock)
1.3. bond_change_active_slave -> bond_3ad_handle_link_change
  -  from 4. rtnl, new_active != NULL -> write_lock(curr_slave_lock)
1.4. bond_miimon_commit -> bond_3ad_handle_link_change
  - rtnl

2. TLB
2.1. __bond_release_one -> bond_alb_deinit_slave
  - rtnl
2.2. bond_change_active_slave -> bond_alb_handle_link_change
  - from 4. rtnl, new_active != NULL -> write_lock(curr_slave_lock)
2.3. bond_miimon_commit -> bond_alb_handle_link_change
  - rtnl

3. XOR
3.1. __bond_release_one
  - rtnl
3.2. bond_miimon_commit
  - rtnl

4. bond_change_active_slave:
1. bond_select_active_slave -> bond_change_active_slave
1.1. bond_enslave -> bond_select_active_slave
  - rtnl, write_lock(curr_slave_lock)
1.2. __bond_release_one -> bond_select_active_slave
  - rtnl, write_lock(curr_slave_lock)
1.3. bond_miimon_commit -> bond_select_active_slave
  - rtnl, write_lock(curr_slave_lock)
1.4. bond_loadbalance_arp_mon -> bond_select_active_slave
  - rtnl, write_lock(curr_slave_lock)
1.5. bond_ab_arp_commit -> bond_select_active_slave
  - rtnl, write_lock(curr_slave_lock)
1.6. bond_slave_netdev_event -> bond_select_active_slave
  - rtnl, write_lock(curr_slave_lock)
1.7. bond_options.c (all callers)
  - rtnl, write_lock(curr_slave_lock)


Almost all callers of slave_update_arr() currently have rtnl acquired, but 
there's 1 troubling caller: bond_3ad_state_machine_handler() which is called 
from a workqueue. Now if we're able to execute anything with that workqueue, we 
have a race condition, good candidates are all options which don't acquire 
write_lock(bond->lock), I think the only one that can call 
bond_slave_update_arr() of those is primary_reselect right now.
So if you come up with some way to deal with that, you probably can use only 
rtnl for syncing the array and simplify this.
Again I might be wrong since this is done only via grepping :-)

Cheers,
  Nik

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash
  2014-09-05 11:26     ` Nikolay Aleksandrov
@ 2014-09-05 11:49       ` Nikolay Aleksandrov
  0 siblings, 0 replies; 7+ messages in thread
From: Nikolay Aleksandrov @ 2014-09-05 11:49 UTC (permalink / raw)
  To: Mahesh Bandewar
  Cc: Jay Vosburgh, Veaceslav Falico, Andy Gospodarek, David Miller,
	netdev, Eric Dumazet, Maciej Zenczykowski

On 05/09/14 13:26, Nikolay Aleksandrov wrote:
> On 05/09/14 02:10, Mahesh Bandewar wrote:
>> On Thu, Sep 4, 2014 at 6:16 AM, Nikolay Aleksandrov <nikolay@redhat.com> wrote:
>>> On 03/09/14 23:47, Mahesh Bandewar wrote:
>>>>
>>>> Earlier change to use usable slave array for TLB mode had an additional
>>>> performance advantage. So extending the same logic to all other modes
>>>> that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
>>>> Also consolidating this with the earlier TLB change.
>>>>
>>>> The main idea is to build the usable slaves array in the control path
>>>> and use that array for slave selection during xmit operation.
>>>>
>>>> Measured performance in a setup with a bond of 4x1G NICs with 200
>>>> instances of netperf for the modes involved (3ad, xor, tlb)
>>>> cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5
>>>>
>>>> Mode        TPS-Before   TPS-After
>>>>
>>>> 802.3ad   : 468,694      493,101
>>>> TLB (lb=0): 392,583      392,965
>>>> XOR       : 475,696      484,517
>>>>
>>>> Signed-off-by: Mahesh Bandewar <maheshb@google.com>
>>>> ---
>>>
> <<<<<snip>>>>>>
>>>> -       bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) %
>>>> bond->slave_cnt);
>>>> +       old_arr = rcu_dereference_protected(bond->slave_arr,
>>>> +                                           lockdep_rtnl_is_held() ||
>>>> +                                           lockdep_is_held(&bond->lock)
>>>> ||
>>>> +
>>>> lockdep_is_held(&bond->curr_slave_lock));
>>>
>>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>> This line is the most troublesome for me, which lock is it ? Does this mean
>>> that whichever I hold from the three I can update the slave array ?
>>> I don't think this is worked out well, you should explicitly specify how and
>>> why it is safe to update this under each of the locks and maybe you'll be
>>> able to reduce the lock list :-)
>>>
>> This is primarily because of different code paths it's taking to reach
>> here. In all these cases, one of those locks is held. Unfortunately
>> there are three such locks  that I have identified (for all three
>> modes involved) and hence the above line.
>>
>
> True, but I did a little grepping and here's my analysis of the call sites which
> I can't guarantee is full or complete, but it shows at least 1 problem.
> bond_update_slave_arr() callers:
>
> 1. 3ad mode
> 1.1. bond_3ad_state_machine_handler -> ad_mux_machine ->
> ad_(en|dis)able_collecting_distributing
>    - read_lock(bond->lock), rcu_read_lock, state_machine_lock
> 1.2. __bond_release_one -> bond_3ad_unbind_slave
>    - rtnl, write_lock(bond->lock)
> 1.3. bond_change_active_slave -> bond_3ad_handle_link_change
>    -  from 4. rtnl, new_active != NULL -> write_lock(curr_slave_lock)
> 1.4. bond_miimon_commit -> bond_3ad_handle_link_change
>    - rtnl
^^^^^^
missed the state_machine_lock here

>
> 2. TLB
> 2.1. __bond_release_one -> bond_alb_deinit_slave
>    - rtnl
> 2.2. bond_change_active_slave -> bond_alb_handle_link_change
>    - from 4. rtnl, new_active != NULL -> write_lock(curr_slave_lock)
> 2.3. bond_miimon_commit -> bond_alb_handle_link_change
>    - rtnl
>
> 3. XOR
> 3.1. __bond_release_one
>    - rtnl
> 3.2. bond_miimon_commit
>    - rtnl
>
> 4. bond_change_active_slave:
> 1. bond_select_active_slave -> bond_change_active_slave
> 1.1. bond_enslave -> bond_select_active_slave
>    - rtnl, write_lock(curr_slave_lock)
> 1.2. __bond_release_one -> bond_select_active_slave
>    - rtnl, write_lock(curr_slave_lock)
> 1.3. bond_miimon_commit -> bond_select_active_slave
>    - rtnl, write_lock(curr_slave_lock)
> 1.4. bond_loadbalance_arp_mon -> bond_select_active_slave
>    - rtnl, write_lock(curr_slave_lock)
> 1.5. bond_ab_arp_commit -> bond_select_active_slave
>    - rtnl, write_lock(curr_slave_lock)
> 1.6. bond_slave_netdev_event -> bond_select_active_slave
>    - rtnl, write_lock(curr_slave_lock)
> 1.7. bond_options.c (all callers)
>    - rtnl, write_lock(curr_slave_lock)
>
>
> Almost all callers of slave_update_arr() currently have rtnl acquired, but
> there's 1 troubling caller: bond_3ad_state_machine_handler() which is called
> from a workqueue. Now if we're able to execute anything with that workqueue, we
> have a race condition, good candidates are all options which don't acquire
> write_lock(bond->lock), I think the only one that can call
> bond_slave_update_arr() of those is primary_reselect right now.
^^^^^^^^^^^^^^^^
Though even that might not be a problem since the state_machine_lock would save 
you, so it looks like it's not a problem but the convoluted locking requirements 
are a problem waiting to happen by themselves.

Anyway that is a longstanding problem so I don't mind if you keep the code like 
this, too. Just wanted to make sure that it doesn't create any new subtle race 
conditions.

> So if you come up with some way to deal with that, you probably can use only
> rtnl for syncing the array and simplify this.
> Again I might be wrong since this is done only via grepping :-)
>
> Cheers,
>    Nik

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-09-05 11:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-09-03 21:47 [PATCH net-next 2/2] bonding: Simplify the xmit function for modes that use xmit_hash Mahesh Bandewar
2014-09-03 22:51 ` Jay Vosburgh
2014-09-04  0:57   ` Mahesh Bandewar
2014-09-04 13:16 ` Nikolay Aleksandrov
2014-09-05  0:10   ` Mahesh Bandewar
2014-09-05 11:26     ` Nikolay Aleksandrov
2014-09-05 11:49       ` Nikolay Aleksandrov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).