Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v4 net-next 13/27] bonding: rework bond_find_best_slave() to use bond_for_each_slave()
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

bond_find_best_slave() does not have to be balanced - i.e. return the slave
that is *after* some other slave, but rather return the best slave that
suits, except of bond->primary_slave - in which case we just return it if
it's suitable.

After that we just look through all the slaves and return either first up
slave or the slave whose link came back earliest.

We also don't care about curr_active_slave lock cause we use it in
bond_should_change_active() only and there we take it right away - i.e. it
won't go away.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    New patch.

 drivers/net/bonding/bond_main.c | 43 ++++++++++++-----------------------------
 1 file changed, 12 insertions(+), 31 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 85e99ae..6abbfac 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -785,43 +785,24 @@ static bool bond_should_change_active(struct bonding *bond)
 /**
  * find_best_interface - select the best available slave to be the active one
  * @bond: our bonding struct
- *
- * Warning: Caller must hold curr_slave_lock for writing.
  */
 static struct slave *bond_find_best_slave(struct bonding *bond)
 {
-	struct slave *new_active, *old_active;
-	struct slave *bestslave = NULL;
+	struct slave *slave, *bestslave = NULL;
+	struct list_head *iter;
 	int mintime = bond->params.updelay;
-	int i;
 
-	new_active = bond->curr_active_slave;
-
-	if (!new_active) { /* there were no active slaves left */
-		new_active = bond_first_slave(bond);
-		if (!new_active)
-			return NULL; /* still no slave, return NULL */
-	}
+	if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP &&
+	    bond_should_change_active(bond))
+		return bond->primary_slave;
 
-	if ((bond->primary_slave) &&
-	    bond->primary_slave->link == BOND_LINK_UP &&
-	    bond_should_change_active(bond)) {
-		new_active = bond->primary_slave;
-	}
-
-	/* remember where to stop iterating over the slaves */
-	old_active = new_active;
-
-	bond_for_each_slave_from(bond, new_active, i, old_active) {
-		if (new_active->link == BOND_LINK_UP) {
-			return new_active;
-		} else if (new_active->link == BOND_LINK_BACK &&
-			   IS_UP(new_active->dev)) {
-			/* link up, but waiting for stabilization */
-			if (new_active->delay < mintime) {
-				mintime = new_active->delay;
-				bestslave = new_active;
-			}
+	bond_for_each_slave(bond, slave, iter) {
+		if (slave->link == BOND_LINK_UP)
+			return slave;
+		if (slave->link == BOND_LINK_BACK && IS_UP(slave->dev) &&
+		    slave->delay < mintime) {
+			mintime = slave->delay;
+			bestslave = slave;
 		}
 	}
 
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 12/27] bonding: rework rlb_next_rx_slave() to use bond_for_each_slave()
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Currently, we're using bond_for_each_slave_from(), which is really hard to
implement under RCU and/or neighbour list.

Remove it and use bond_for_each_slave() instead, taking care of the last
used slave.

Also, rename next_rx_slave to rx_slave and store the current (last)
rx_slave.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    New patch.

 drivers/net/bonding/bond_alb.c | 41 +++++++++++++++++++++--------------------
 drivers/net/bonding/bond_alb.h |  4 +---
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index f4929ce..c5b85ad 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -383,30 +383,31 @@ out:
 static struct slave *rlb_next_rx_slave(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct slave *rx_slave, *slave, *start_at;
-	int i = 0;
-
-	if (bond_info->next_rx_slave)
-		start_at = bond_info->next_rx_slave;
-	else
-		start_at = bond_first_slave(bond);
-
-	rx_slave = NULL;
+	struct slave *before = NULL, *rx_slave = NULL, *slave;
+	struct list_head *iter;
+	bool found = false;
 
-	bond_for_each_slave_from(bond, slave, i, start_at) {
-		if (SLAVE_IS_OK(slave)) {
-			if (!rx_slave) {
-				rx_slave = slave;
-			} else if (slave->speed > rx_slave->speed) {
+	bond_for_each_slave(bond, slave, iter) {
+		if (!SLAVE_IS_OK(slave))
+			continue;
+		if (!found) {
+			if (!before || before->speed < slave->speed)
+				before = slave;
+		} else {
+			if (!rx_slave || rx_slave->speed < slave->speed)
 				rx_slave = slave;
-			}
 		}
+		if (slave == bond_info->rx_slave)
+			found = true;
 	}
+	/* we didn't find anything after the current or we have something
+	 * better before and up to the current slave
+	 */
+	if (!rx_slave || (before && rx_slave->speed < before->speed))
+		rx_slave = before;
 
-	if (rx_slave) {
-		slave = bond_next_slave(bond, rx_slave);
-		bond_info->next_rx_slave = slave;
-	}
+	if (rx_slave)
+		bond_info->rx_slave = rx_slave;
 
 	return rx_slave;
 }
@@ -1611,7 +1612,7 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
 	tlb_clear_slave(bond, slave, 0);
 
 	if (bond->alb_info.rlb_enabled) {
-		bond->alb_info.next_rx_slave = NULL;
+		bond->alb_info.rx_slave = NULL;
 		rlb_clear_slave(bond, slave);
 	}
 }
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index c5eff5d..4226044 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -154,9 +154,7 @@ struct alb_bond_info {
 	u8			rx_ntt;	/* flag - need to transmit
 					 * to all rx clients
 					 */
-	struct slave		*next_rx_slave;/* next slave to be assigned
-						* to a new rx client for
-						*/
+	struct slave		*rx_slave;/* last slave to xmit from */
 	u8			primary_is_promisc;	   /* boolean */
 	u32			rlb_promisc_timeout_counter;/* counts primary
 							     * promiscuity time
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 05/27] bonding: populate neighbour's private on enslave
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Use the new provided function when attaching the lower slave to populate
its ->private with struct slave *new_slave. Also, move it to the end to
be able to 'find' it only after it was completely initialized, and
deinitialize in the first place on release.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    Move the patch right after the introduction of ->private, to immediately
    use it - easier for review and more logical.

 drivers/net/bonding/bond_main.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 91c4ab8..8e41416 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1233,11 +1233,12 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
 }
 
 static int bond_master_upper_dev_link(struct net_device *bond_dev,
-				      struct net_device *slave_dev)
+				      struct net_device *slave_dev,
+				      struct slave *slave)
 {
 	int err;
 
-	err = netdev_master_upper_dev_link(slave_dev, bond_dev);
+	err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave);
 	if (err)
 		return err;
 	slave_dev->flags |= IFF_SLAVE;
@@ -1413,17 +1414,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		}
 	}
 
-	res = bond_master_upper_dev_link(bond_dev, slave_dev);
-	if (res) {
-		pr_debug("Error %d calling bond_master_upper_dev_link\n", res);
-		goto err_restore_mac;
-	}
-
 	/* open the slave since the application closed it */
 	res = dev_open(slave_dev);
 	if (res) {
 		pr_debug("Opening slave %s failed\n", slave_dev->name);
-		goto err_unset_master;
+		goto err_restore_mac;
 	}
 
 	new_slave->bond = bond;
@@ -1637,6 +1632,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		goto err_dest_symlinks;
 	}
 
+	res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave);
+	if (res) {
+		pr_debug("Error %d calling bond_master_upper_dev_link\n", res);
+		goto err_unregister;
+	}
+
+
 	pr_info("%s: enslaving %s as a%s interface with a%s link.\n",
 		bond_dev->name, slave_dev->name,
 		bond_is_active_slave(new_slave) ? "n active" : " backup",
@@ -1646,6 +1648,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	return 0;
 
 /* Undo stages on error */
+err_unregister:
+	netdev_rx_handler_unregister(slave_dev);
+
 err_dest_symlinks:
 	bond_destroy_slave_symlinks(bond_dev, slave_dev);
 
@@ -1675,9 +1680,6 @@ err_close:
 	slave_dev->priv_flags &= ~IFF_BONDING;
 	dev_close(slave_dev);
 
-err_unset_master:
-	bond_upper_dev_unlink(bond_dev, slave_dev);
-
 err_restore_mac:
 	if (!bond->params.fail_over_mac) {
 		/* XXX TODO - fom follow mode needs to change master's
@@ -1748,6 +1750,8 @@ static int __bond_release_one(struct net_device *bond_dev,
 	}
 
 	write_unlock_bh(&bond->lock);
+
+	bond_upper_dev_unlink(bond_dev, slave_dev);
 	/* unregister rx_handler early so bond_handle_frame wouldn't be called
 	 * for this slave anymore.
 	 */
@@ -1866,8 +1870,6 @@ static int __bond_release_one(struct net_device *bond_dev,
 		bond_hw_addr_flush(bond_dev, slave_dev);
 	}
 
-	bond_upper_dev_unlink(bond_dev, slave_dev);
-
 	slave_disable_netpoll(slave);
 
 	/* close slave before restoring its mac address */
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 15/27] bonding: remove unused bond_for_each_slave_from()
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

It has no users, so we can remove it.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    New patch.

 drivers/net/bonding/bonding.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c4f9d16..004a091 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -94,19 +94,6 @@
 					  bond_to_slave((pos)->list.prev))
 
 /**
- * bond_for_each_slave_from - iterate the slaves list from a starting point
- * @bond:	the bond holding this list.
- * @pos:	current slave.
- * @cnt:	counter for max number of moves
- * @start:	starting point.
- *
- * Caller must hold bond->lock
- */
-#define bond_for_each_slave_from(bond, pos, cnt, start) \
-	for (cnt = 0, pos = start; pos && cnt < (bond)->slave_cnt; \
-	     cnt++, pos = bond_next_slave(bond, pos))
-
-/**
  * bond_for_each_slave - iterate over all slaves
  * @bond:	the bond holding this list
  * @pos:	current slave
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 16/27] bonding: add bond_has_slaves() and use it
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Currently we verify if we have slaves by checking if bond->slave_list is
empty. Create a define bond_has_slaves() and use it, a bit more readable
and easier to change in the future.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    No change.

 drivers/net/bonding/bond_3ad.c   |  2 +-
 drivers/net/bonding/bond_alb.c   |  8 ++++----
 drivers/net/bonding/bond_main.c  | 32 ++++++++++++++++----------------
 drivers/net/bonding/bond_sysfs.c |  4 ++--
 drivers/net/bonding/bonding.h    |  2 ++
 5 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c861ee7..1337eaf 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2117,7 +2117,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	read_lock(&bond->lock);
 
 	//check if there are any slaves
-	if (list_empty(&bond->slave_list))
+	if (!bond_has_slaves(bond))
 		goto re_arm;
 
 	// check if agg_select_timer timer after initialize is timed out
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c5b85ad..e960418 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1178,7 +1178,7 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav
 	struct slave *tmp_slave1, *free_mac_slave = NULL;
 	struct list_head *iter;
 
-	if (list_empty(&bond->slave_list)) {
+	if (!bond_has_slaves(bond)) {
 		/* this is the first slave */
 		return 0;
 	}
@@ -1469,7 +1469,7 @@ void bond_alb_monitor(struct work_struct *work)
 
 	read_lock(&bond->lock);
 
-	if (list_empty(&bond->slave_list)) {
+	if (!bond_has_slaves(bond)) {
 		bond_info->tx_rebalance_counter = 0;
 		bond_info->lp_counter = 0;
 		goto re_arm;
@@ -1606,7 +1606,7 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
  */
 void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
 {
-	if (!list_empty(&bond->slave_list))
+	if (bond_has_slaves(bond))
 		alb_change_hw_addr_on_detach(bond, slave);
 
 	tlb_clear_slave(bond, slave, 0);
@@ -1676,7 +1676,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
 	swap_slave = bond->curr_active_slave;
 	rcu_assign_pointer(bond->curr_active_slave, new_slave);
 
-	if (!new_slave || list_empty(&bond->slave_list))
+	if (!new_slave || !bond_has_slaves(bond))
 		return;
 
 	/* set the new curr_active_slave to the bonds mac address
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3c96b1b..06ffc8a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -391,7 +391,7 @@ static int bond_set_carrier(struct bonding *bond)
 	struct list_head *iter;
 	struct slave *slave;
 
-	if (list_empty(&bond->slave_list))
+	if (!bond_has_slaves(bond))
 		goto down;
 
 	if (bond->params.mode == BOND_MODE_8023AD)
@@ -1085,7 +1085,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
 	netdev_features_t mask;
 	struct slave *slave;
 
-	if (list_empty(&bond->slave_list)) {
+	if (!bond_has_slaves(bond)) {
 		/* Disable adding VLANs to empty bond. But why? --mq */
 		features |= NETIF_F_VLAN_CHALLENGED;
 		return features;
@@ -1120,7 +1120,7 @@ static void bond_compute_features(struct bonding *bond)
 	unsigned int gso_max_size = GSO_MAX_SIZE;
 	u16 gso_max_segs = GSO_MAX_SEGS;
 
-	if (list_empty(&bond->slave_list))
+	if (!bond_has_slaves(bond))
 		goto done;
 
 	bond_for_each_slave(bond, slave, iter) {
@@ -1310,7 +1310,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	 * bond ether type mutual exclusion - don't allow slaves of dissimilar
 	 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
 	 */
-	if (list_empty(&bond->slave_list)) {
+	if (!bond_has_slaves(bond)) {
 		if (bond_dev->type != slave_dev->type) {
 			pr_debug("%s: change device type from %d to %d\n",
 				 bond_dev->name,
@@ -1349,7 +1349,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	if (slave_ops->ndo_set_mac_address == NULL) {
-		if (list_empty(&bond->slave_list)) {
+		if (!bond_has_slaves(bond)) {
 			pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.",
 				   bond_dev->name);
 			bond->params.fail_over_mac = BOND_FOM_ACTIVE;
@@ -1365,7 +1365,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	/* If this is the first slave, then we need to set the master's hardware
 	 * address to be the same as the slave's. */
-	if (list_empty(&bond->slave_list) &&
+	if (!bond_has_slaves(bond) &&
 	    bond->dev->addr_assign_type == NET_ADDR_RANDOM)
 		bond_set_dev_addr(bond->dev, slave_dev);
 
@@ -1696,7 +1696,7 @@ err_free:
 err_undo_flags:
 	bond_compute_features(bond);
 	/* Enslave of first slave has failed and we need to fix master's mac */
-	if (list_empty(&bond->slave_list) &&
+	if (!bond_has_slaves(bond) &&
 	    ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr))
 		eth_hw_addr_random(bond_dev);
 
@@ -1776,7 +1776,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 
 	if (!all && !bond->params.fail_over_mac) {
 		if (ether_addr_equal(bond_dev->dev_addr, slave->perm_hwaddr) &&
-		    !list_empty(&bond->slave_list))
+		    bond_has_slaves(bond))
 			pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n",
 				   bond_dev->name, slave_dev->name,
 				   slave->perm_hwaddr,
@@ -1819,7 +1819,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 		write_lock_bh(&bond->lock);
 	}
 
-	if (list_empty(&bond->slave_list)) {
+	if (!bond_has_slaves(bond)) {
 		bond_set_carrier(bond);
 		eth_hw_addr_random(bond_dev);
 
@@ -1835,7 +1835,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 	unblock_netpoll_tx();
 	synchronize_rcu();
 
-	if (list_empty(&bond->slave_list)) {
+	if (!bond_has_slaves(bond)) {
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
 		call_netdevice_notifiers(NETDEV_RELEASE, bond->dev);
 	}
@@ -1904,7 +1904,7 @@ static int  bond_release_and_destroy(struct net_device *bond_dev,
 	int ret;
 
 	ret = bond_release(bond_dev, slave_dev);
-	if (ret == 0 && list_empty(&bond->slave_list)) {
+	if (ret == 0 && !bond_has_slaves(bond)) {
 		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
 		pr_info("%s: destroying bond %s.\n",
 			bond_dev->name, bond_dev->name);
@@ -2219,7 +2219,7 @@ void bond_mii_monitor(struct work_struct *work)
 
 	delay = msecs_to_jiffies(bond->params.miimon);
 
-	if (list_empty(&bond->slave_list))
+	if (!bond_has_slaves(bond))
 		goto re_arm;
 
 	should_notify_peers = bond_should_notify_peers(bond);
@@ -2513,7 +2513,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
 
 	read_lock(&bond->lock);
 
-	if (list_empty(&bond->slave_list))
+	if (!bond_has_slaves(bond))
 		goto re_arm;
 
 	oldcurrent = bond->curr_active_slave;
@@ -2845,7 +2845,7 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 
 	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
 
-	if (list_empty(&bond->slave_list))
+	if (!bond_has_slaves(bond))
 		goto re_arm;
 
 	should_notify_peers = bond_should_notify_peers(bond);
@@ -3169,7 +3169,7 @@ static int bond_open(struct net_device *bond_dev)
 
 	/* reset slave->backup and slave->inactive */
 	read_lock(&bond->lock);
-	if (!list_empty(&bond->slave_list)) {
+	if (bond_has_slaves(bond)) {
 		read_lock(&bond->curr_slave_lock);
 		bond_for_each_slave(bond, slave, iter) {
 			if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP)
@@ -3892,7 +3892,7 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_BUSY;
 
 	rcu_read_lock();
-	if (!list_empty(&bond->slave_list))
+	if (bond_has_slaves(bond))
 		ret = __bond_start_xmit(skb, dev);
 	else
 		kfree_skb(skb);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e747415..04d95d6 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -327,7 +327,7 @@ static ssize_t bonding_store_mode(struct device *d,
 		goto out;
 	}
 
-	if (!list_empty(&bond->slave_list)) {
+	if (bond_has_slaves(bond)) {
 		pr_err("unable to update mode of %s because it has slaves.\n",
 			bond->dev->name);
 		ret = -EPERM;
@@ -523,7 +523,7 @@ static ssize_t bonding_store_fail_over_mac(struct device *d,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (!list_empty(&bond->slave_list)) {
+	if (bond_has_slaves(bond)) {
 		pr_err("%s: Can't alter fail_over_mac with slaves in bond.\n",
 		       bond->dev->name);
 		ret = -EPERM;
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 004a091..42c40d4 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -72,6 +72,8 @@
 	res; })
 
 /* slave list primitives */
+#define bond_has_slaves(bond) !list_empty(&(bond)->slave_list)
+
 #define bond_to_slave(ptr) list_entry(ptr, struct slave, list)
 
 /* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 17/27] bonding: convert bond_has_slaves() to use the neighbour list
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

The same way as it was used for its own slave_list.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    Use the renamed adj_list instead of neighbour_dev_list.

 drivers/net/bonding/bonding.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 42c40d4..4b3878e 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -72,7 +72,7 @@
 	res; })
 
 /* slave list primitives */
-#define bond_has_slaves(bond) !list_empty(&(bond)->slave_list)
+#define bond_has_slaves(bond) !list_empty(&(bond)->dev->adj_list.lower)
 
 #define bond_to_slave(ptr) list_entry(ptr, struct slave, list)
 
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 18/27] net: add a possibility to get private from netdev_adjacent->list
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev
  Cc: jiri, Veaceslav Falico, David S. Miller, Eric Dumazet,
	Alexander Duyck
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

It will be useful to get first/last element.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    No changes.

 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 168974e..b4cfb63 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2850,6 +2850,7 @@ extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 	     priv; \
 	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
 
+extern void *netdev_adjacent_get_private(struct list_head *adj_list);
 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
 extern int netdev_upper_dev_link(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 0aa844a..acc1181 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4466,6 +4466,16 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
+void *netdev_adjacent_get_private(struct list_head *adj_list)
+{
+	struct netdev_adjacent *adj;
+
+	adj = list_entry(adj_list, struct netdev_adjacent, list);
+
+	return adj->private;
+}
+EXPORT_SYMBOL(netdev_adjacent_get_private);
+
 /**
  * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 19/27] bonding: convert first/last slave logic to use neighbours
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

For that, use netdev_adjacent_get_private(list_head) on bond's lower
neighbour list members. Also, add a small macro - bond_slave_list(bond),
which returns the bond list via neighbour list.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    Rename neieghour_dev_list to adj_list.

 drivers/net/bonding/bonding.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 4b3878e..0b4c4ff 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -72,19 +72,24 @@
 	res; })
 
 /* slave list primitives */
-#define bond_has_slaves(bond) !list_empty(&(bond)->dev->adj_list.lower)
+#define bond_slave_list(bond) (&(bond)->dev->adj_list.lower)
+
+#define bond_has_slaves(bond) !list_empty(bond_slave_list(bond))
 
 #define bond_to_slave(ptr) list_entry(ptr, struct slave, list)
 
 /* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */
 #define bond_first_slave(bond) \
-	list_first_entry_or_null(&(bond)->slave_list, struct slave, list)
+	(bond_has_slaves(bond) ? \
+		netdev_adjacent_get_private(bond_slave_list(bond)->next) : \
+		NULL)
 #define bond_last_slave(bond) \
-	(list_empty(&(bond)->slave_list) ? NULL : \
-					   bond_to_slave((bond)->slave_list.prev))
+	(bond_has_slaves(bond) ? \
+		netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \
+		NULL)
 
-#define bond_is_first_slave(bond, pos) ((pos)->list.prev == &(bond)->slave_list)
-#define bond_is_last_slave(bond, pos) ((pos)->list.next == &(bond)->slave_list)
+#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond))
+#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))
 
 /* Since bond_first/last_slave can return NULL, these can return NULL too */
 #define bond_next_slave(bond, pos) \
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 20/27] bonding: remove bond_prev_slave()
From: Veaceslav Falico @ 2013-09-24 11:47 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

We don't really need it, and it's really hard to RCUify the list->prev.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    New patch.

 drivers/net/bonding/bond_main.c | 9 +++------
 drivers/net/bonding/bonding.h   | 4 ----
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 06ffc8a..6aa345a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1255,7 +1255,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
-	struct slave *new_slave = NULL;
+	struct slave *new_slave = NULL, *prev_slave;
 	struct sockaddr addr;
 	int link_reporting;
 	int res = 0, i;
@@ -1472,6 +1472,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	write_lock_bh(&bond->lock);
 
+	prev_slave = bond_last_slave(bond);
 	bond_attach_slave(bond, new_slave);
 
 	new_slave->delay = 0;
@@ -1566,9 +1567,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 			 */
 			bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL);
 		} else {
-			struct slave *prev_slave;
-
-			prev_slave = bond_prev_slave(bond, new_slave);
 			SLAVE_AD_INFO(new_slave).id =
 				SLAVE_AD_INFO(prev_slave).id + 1;
 		}
@@ -3506,9 +3504,8 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
 	 */
 
 	bond_for_each_slave(bond, slave, iter) {
-		pr_debug("s %p s->p %p c_m %p\n",
+		pr_debug("s %p c_m %p\n",
 			 slave,
-			 bond_prev_slave(bond, slave),
 			 slave->dev->netdev_ops->ndo_change_mtu);
 
 		res = dev_set_mtu(slave->dev, new_mtu);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 0b4c4ff..5157153 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -96,10 +96,6 @@
 	(bond_is_last_slave(bond, pos) ? bond_first_slave(bond) : \
 					 bond_to_slave((pos)->list.next))
 
-#define bond_prev_slave(bond, pos) \
-	(bond_is_first_slave(bond, pos) ? bond_last_slave(bond) : \
-					  bond_to_slave((pos)->list.prev))
-
 /**
  * bond_for_each_slave - iterate over all slaves
  * @bond:	the bond holding this list
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 22/27] bonding: use neighbours for bond_next_slave()
From: Veaceslav Falico @ 2013-09-24 11:47 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Use the new function __bond_next_slave().

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    Use the bonding-specific function, instead of the general netdev, which
    was removed.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    Change to standard logic - if it's the last slave - return the first one,
    otherwise - return the next via netdev_lower_dev_get_next_private().
    Also, bond_prev_slave() was dropped - so we don't need it.

 drivers/net/bonding/bonding.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index db8d38a7..1b68cec 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -76,8 +76,6 @@
 
 #define bond_has_slaves(bond) !list_empty(bond_slave_list(bond))
 
-#define bond_to_slave(ptr) list_entry(ptr, struct slave, list)
-
 /* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */
 #define bond_first_slave(bond) \
 	(bond_has_slaves(bond) ? \
@@ -92,9 +90,7 @@
 #define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))
 
 /* Since bond_first/last_slave can return NULL, these can return NULL too */
-#define bond_next_slave(bond, pos) \
-	(bond_is_last_slave(bond, pos) ? bond_first_slave(bond) : \
-					 bond_to_slave((pos)->list.next))
+#define bond_next_slave(bond, pos) __bond_next_slave(bond, pos)
 
 /**
  * bond_for_each_slave - iterate over all slaves
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 21/27] bonding: add __bond_next_slave() which uses neighbours
From: Veaceslav Falico @ 2013-09-24 11:47 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek,
	Ben Hutchings
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Add a new function, __bond_next_slave(), which uses neighbours to find the
next slave after the slave provided. It will be further used to gradually
go start using neighbour netdev_adjacent infrastructure instead of
bonding's own lists.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    RFC -> v4:
    New patch.

 drivers/net/bonding/bonding.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 5157153..db8d38a7 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -250,6 +250,34 @@ struct bonding {
 	((struct slave *) rtnl_dereference(dev->rx_handler_data))
 
 /**
+ * __bond_next_slave - get the next slave after the one provided
+ * @bond - bonding struct
+ * @slave - the slave provided
+ *
+ * Returns the next slave after the slave provided, first slave if the
+ * slave provided is the last slave and NULL if slave is not found
+ */
+static inline struct slave *__bond_next_slave(struct bonding *bond,
+					      struct slave *slave)
+{
+	struct slave *slave_iter;
+	struct list_head *iter;
+	bool found = false;
+
+	netdev_for_each_lower_private(bond->dev, slave_iter, iter) {
+		if (found)
+			return slave_iter;
+		if (slave_iter == slave)
+			found = true;
+	}
+
+	if (found)
+		return bond_first_slave(bond);
+
+	return NULL;
+}
+
+/**
  * Returns NULL if the net_device does not belong to any of the bond's slaves
  *
  * Caller must hold bond lock for read
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 24/27] vlan: link the upper neighbour only after registering
From: Veaceslav Falico @ 2013-09-24 11:47 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Patrick McHardy, David S. Miller
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Otherwise users might access it without being fully registered, as per
sysfs - it only inits in register_netdevice(), so is unusable till it is
called.

CC: Patrick McHardy <kaber@trash.net>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    No changes.

 net/8021q/vlan.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 61fc573..69b4a35 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,13 +169,13 @@ int register_vlan_dev(struct net_device *dev)
 	if (err < 0)
 		goto out_uninit_mvrp;
 
-	err = netdev_upper_dev_link(real_dev, dev);
-	if (err)
-		goto out_uninit_mvrp;
-
 	err = register_netdevice(dev);
 	if (err < 0)
-		goto out_upper_dev_unlink;
+		goto out_uninit_mvrp;
+
+	err = netdev_upper_dev_link(real_dev, dev);
+	if (err)
+		goto out_unregister_netdev;
 
 	/* Account for reference in struct vlan_dev_priv */
 	dev_hold(real_dev);
@@ -191,8 +191,8 @@ int register_vlan_dev(struct net_device *dev)
 
 	return 0;
 
-out_upper_dev_unlink:
-	netdev_upper_dev_unlink(real_dev, dev);
+out_unregister_netdev:
+	unregister_netdevice(dev);
 out_uninit_mvrp:
 	if (grp->nr_vlan_devs == 0)
 		vlan_mvrp_uninit_applicant(real_dev);
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 23/27] bonding: remove slave lists
From: Veaceslav Falico @ 2013-09-24 11:47 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

And all the initialization.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    RFC -> v2:
    New patch.

 drivers/net/bonding/bond_main.c | 4 ----
 drivers/net/bonding/bonding.h   | 2 --
 2 files changed, 6 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6aa345a..d494045 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -972,7 +972,6 @@ void bond_select_active_slave(struct bonding *bond)
  */
 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
 {
-	list_add_tail_rcu(&new_slave->list, &bond->slave_list);
 	bond->slave_cnt++;
 }
 
@@ -988,7 +987,6 @@ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
  */
 static void bond_detach_slave(struct bonding *bond, struct slave *slave)
 {
-	list_del_rcu(&slave->list);
 	bond->slave_cnt--;
 }
 
@@ -1374,7 +1372,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		res = -ENOMEM;
 		goto err_undo_flags;
 	}
-	INIT_LIST_HEAD(&new_slave->list);
 	/*
 	 * Set the new_slave's queue_id to be zero.  Queue ID mapping
 	 * is set via sysfs or module option if desired.
@@ -4022,7 +4019,6 @@ static void bond_setup(struct net_device *bond_dev)
 	/* initialize rwlocks */
 	rwlock_init(&bond->lock);
 	rwlock_init(&bond->curr_slave_lock);
-	INIT_LIST_HEAD(&bond->slave_list);
 	bond->params = bonding_defaults;
 
 	/* Initialize pointers */
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 1b68cec..18116ec 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -165,7 +165,6 @@ struct bond_parm_tbl {
 
 struct slave {
 	struct net_device *dev; /* first - useful for panic debug */
-	struct list_head list;
 	struct bonding *bond; /* our master */
 	int    delay;
 	unsigned long jiffies;
@@ -205,7 +204,6 @@ struct slave {
  */
 struct bonding {
 	struct   net_device *dev; /* first - useful for panic debug */
-	struct   list_head slave_list;
 	struct   slave *curr_active_slave;
 	struct   slave *current_arp_slave;
 	struct   slave *primary_slave;
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 25/27] vlan: unlink the upper neighbour before unregistering
From: Veaceslav Falico @ 2013-09-24 11:47 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Patrick McHardy, David S. Miller
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

On netdev unregister we're removing also all of its sysfs-associated stuff,
including the sysfs symlinks that are controlled by netdev neighbour code.
Also, it's a subtle race condition - cause we can still access it after
unregistering.

Move the unlinking right before the unregistering to fix both.

CC: Patrick McHardy <kaber@trash.net>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.

    v2  -> v3:
    No change.

    v1  -> v2:
    No changes.

    RFC -> v2:
    New patch.

 net/8021q/vlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 69b4a35..b3d17d1 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -98,14 +98,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 		vlan_gvrp_request_leave(dev);

 	vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);
+
+	netdev_upper_dev_unlink(real_dev, dev);
 	/* Because unregister_netdevice_queue() makes sure at least one rcu
 	 * grace period is respected before device freeing,
 	 * we dont need to call synchronize_net() here.
 	 */
 	unregister_netdevice_queue(dev, head);

-	netdev_upper_dev_unlink(real_dev, dev);
-
 	if (grp->nr_vlan_devs == 0) {
 		vlan_mvrp_uninit_applicant(real_dev);
 		vlan_gvrp_uninit_applicant(real_dev);
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 26/27] net: expose the master link to sysfs, and remove it from bond
From: Veaceslav Falico @ 2013-09-24 11:47 UTC (permalink / raw)
  To: netdev
  Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek,
	David S. Miller, Eric Dumazet, Alexander Duyck
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Currently, we can have only one master upper neighbour, so it would be
useful to create a symlink to it in the sysfs device directory, the way
that bonding now does it, for every device. Lower devices from
bridge/team/etc will automagically get it, so we could rely on it.

Also, remove the same functionality from bonding.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    Convert the logic to the new functions, which don't have the bool upper
    argument.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    No changes.

 drivers/net/bonding/bond_sysfs.c | 20 +++-----------------
 net/core/dev.c                   | 19 +++++++++++++++++--
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 04d95d6..1c57246 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -172,24 +172,11 @@ int bond_create_slave_symlinks(struct net_device *master,
 			       struct net_device *slave)
 {
 	char linkname[IFNAMSIZ+7];
-	int ret = 0;
 
-	/* first, create a link from the slave back to the master */
-	ret = sysfs_create_link(&(slave->dev.kobj), &(master->dev.kobj),
-				"master");
-	if (ret)
-		return ret;
-	/* next, create a link from the master to the slave */
+	/* create a link from the master to the slave */
 	sprintf(linkname, "slave_%s", slave->name);
-	ret = sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj),
-				linkname);
-
-	/* free the master link created earlier in case of error */
-	if (ret)
-		sysfs_remove_link(&(slave->dev.kobj), "master");
-
-	return ret;
-
+	return sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj),
+				 linkname);
 }
 
 void bond_destroy_slave_symlinks(struct net_device *master,
@@ -197,7 +184,6 @@ void bond_destroy_slave_symlinks(struct net_device *master,
 {
 	char linkname[IFNAMSIZ+7];
 
-	sysfs_remove_link(&(slave->dev.kobj), "master");
 	sprintf(linkname, "slave_%s", slave->name);
 	sysfs_remove_link(&(master->dev.kobj), linkname);
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index acc1181..de443ee 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4584,6 +4584,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					void *private, bool master)
 {
 	struct netdev_adjacent *adj;
+	int ret;
 
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
 
@@ -4606,12 +4607,23 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 		 adj_dev->name, dev->name, adj_dev->name);
 
 	/* Ensure that master link is always the first item in list. */
-	if (master)
+	if (master) {
+		ret = sysfs_create_link(&(dev->dev.kobj),
+					&(adj_dev->dev.kobj), "master");
+		if (ret)
+			goto free_adj;
+
 		list_add_rcu(&adj->list, dev_list);
-	else
+	} else {
 		list_add_tail_rcu(&adj->list, dev_list);
+	}
 
 	return 0;
+
+free_adj:
+	kfree(adj);
+
+	return ret;
 }
 
 void __netdev_adjacent_dev_remove(struct net_device *dev,
@@ -4635,6 +4647,9 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
 		return;
 	}
 
+	if (adj->master)
+		sysfs_remove_link(&(dev->dev.kobj), "master");
+
 	list_del_rcu(&adj->list);
 	pr_debug("dev_put for %s, because link removed from %s to %s\n",
 		 adj_dev->name, dev->name, adj_dev->name);
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 27/27] net: create sysfs symlinks for neighbour devices
From: Veaceslav Falico @ 2013-09-24 11:47 UTC (permalink / raw)
  To: netdev
  Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek,
	David S. Miller, Eric Dumazet, Alexander Duyck
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Also, remove the same functionality from bonding - it will be already done
for any device that links to its lower/upper neighbour.

The links will be created for dev's kobject, and will look like
lower_eth0 for lower device eth0 and upper_bridge0 for upper device
bridge0.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    Convert the logic to the new functions, which don't have the bool upper
    argument.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    Rename lower devices from slave_eth0 to lower_eth0, so that it meets
    the lower/upper naming. I've searched for any active users of bonding's
    slave_eth0, and seems that nobody's actively using it or relying on it.

 drivers/net/bonding/bond_main.c  | 11 +----------
 drivers/net/bonding/bond_sysfs.c | 21 ---------------------
 drivers/net/bonding/bonding.h    |  2 --
 net/core/dev.c                   | 35 ++++++++++++++++++++++++++++++++++-
 4 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d494045..d5c3153 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1612,15 +1612,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	read_unlock(&bond->lock);
 
-	res = bond_create_slave_symlinks(bond_dev, slave_dev);
-	if (res)
-		goto err_detach;
-
 	res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
 					 new_slave);
 	if (res) {
 		pr_debug("Error %d calling netdev_rx_handler_register\n", res);
-		goto err_dest_symlinks;
+		goto err_detach;
 	}
 
 	res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave);
@@ -1642,9 +1638,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 err_unregister:
 	netdev_rx_handler_unregister(slave_dev);
 
-err_dest_symlinks:
-	bond_destroy_slave_symlinks(bond_dev, slave_dev);
-
 err_detach:
 	if (!USES_PRIMARY(bond->params.mode))
 		bond_hw_addr_flush(bond_dev, slave_dev);
@@ -1842,8 +1835,6 @@ static int __bond_release_one(struct net_device *bond_dev,
 			bond_dev->name, slave_dev->name, bond_dev->name);
 
 	/* must do this from outside any spinlocks */
-	bond_destroy_slave_symlinks(bond_dev, slave_dev);
-
 	vlan_vids_del_by_dev(slave_dev, bond_dev);
 
 	/* If the mode USES_PRIMARY, then this cases was handled above by
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 1c57246..e06c644 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -168,27 +168,6 @@ static const struct class_attribute class_attr_bonding_masters = {
 	.namespace = bonding_namespace,
 };
 
-int bond_create_slave_symlinks(struct net_device *master,
-			       struct net_device *slave)
-{
-	char linkname[IFNAMSIZ+7];
-
-	/* create a link from the master to the slave */
-	sprintf(linkname, "slave_%s", slave->name);
-	return sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj),
-				 linkname);
-}
-
-void bond_destroy_slave_symlinks(struct net_device *master,
-				 struct net_device *slave)
-{
-	char linkname[IFNAMSIZ+7];
-
-	sprintf(linkname, "slave_%s", slave->name);
-	sysfs_remove_link(&(master->dev.kobj), linkname);
-}
-
-
 /*
  * Show the slaves in the current bond.
  */
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 18116ec..1fd37c8 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -437,8 +437,6 @@ int bond_create(struct net *net, const char *name);
 int bond_create_sysfs(struct bond_net *net);
 void bond_destroy_sysfs(struct bond_net *net);
 void bond_prepare_sysfs_group(struct bonding *bond);
-int bond_create_slave_symlinks(struct net_device *master, struct net_device *slave);
-void bond_destroy_slave_symlinks(struct net_device *master, struct net_device *slave);
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);
 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
 void bond_mii_monitor(struct work_struct *);
diff --git a/net/core/dev.c b/net/core/dev.c
index de443ee..25ab6fe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4584,6 +4584,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					void *private, bool master)
 {
 	struct netdev_adjacent *adj;
+	char linkname[IFNAMSIZ+7];
 	int ret;
 
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
@@ -4606,12 +4607,26 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 	pr_debug("dev_hold for %s, because of link added from %s to %s\n",
 		 adj_dev->name, dev->name, adj_dev->name);
 
+	if (dev_list == &dev->adj_list.lower) {
+		sprintf(linkname, "lower_%s", adj_dev->name);
+		ret = sysfs_create_link(&(dev->dev.kobj),
+					&(adj_dev->dev.kobj), linkname);
+		if (ret)
+			goto free_adj;
+	} else if (dev_list == &dev->adj_list.upper) {
+		sprintf(linkname, "upper_%s", adj_dev->name);
+		ret = sysfs_create_link(&(dev->dev.kobj),
+					&(adj_dev->dev.kobj), linkname);
+		if (ret)
+			goto free_adj;
+	}
+
 	/* Ensure that master link is always the first item in list. */
 	if (master) {
 		ret = sysfs_create_link(&(dev->dev.kobj),
 					&(adj_dev->dev.kobj), "master");
 		if (ret)
-			goto free_adj;
+			goto remove_symlinks;
 
 		list_add_rcu(&adj->list, dev_list);
 	} else {
@@ -4620,6 +4635,15 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 
 	return 0;
 
+remove_symlinks:
+	if (dev_list == &dev->adj_list.lower) {
+		sprintf(linkname, "lower_%s", adj_dev->name);
+		sysfs_remove_link(&(dev->dev.kobj), linkname);
+	} else if (dev_list == &dev->adj_list.upper) {
+		sprintf(linkname, "upper_%s", adj_dev->name);
+		sysfs_remove_link(&(dev->dev.kobj), linkname);
+	}
+
 free_adj:
 	kfree(adj);
 
@@ -4631,6 +4655,7 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
 				  struct list_head *dev_list)
 {
 	struct netdev_adjacent *adj;
+	char linkname[IFNAMSIZ+7];
 
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
 
@@ -4650,6 +4675,14 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
 	if (adj->master)
 		sysfs_remove_link(&(dev->dev.kobj), "master");
 
+	if (dev_list == &dev->adj_list.lower) {
+		sprintf(linkname, "lower_%s", adj_dev->name);
+		sysfs_remove_link(&(dev->dev.kobj), linkname);
+	} else if (dev_list == &dev->adj_list.upper) {
+		sprintf(linkname, "upper_%s", adj_dev->name);
+		sysfs_remove_link(&(dev->dev.kobj), linkname);
+	}
+
 	list_del_rcu(&adj->list);
 	pr_debug("dev_put for %s, because link removed from %s to %s\n",
 		 adj_dev->name, dev->name, adj_dev->name);
-- 
1.8.4

^ permalink raw reply related

* [PATCH v4 net-next 14/27] bonding: rework bond_ab_arp_probe() to use bond_for_each_slave()
From: Veaceslav Falico @ 2013-09-24 11:46 UTC (permalink / raw)
  To: netdev; +Cc: jiri, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1380023227-9576-1-git-send-email-vfalico@redhat.com>

Currently it uses the hard-to-rcuify bond_for_each_slave_from(), and also
it doesn't check every slave for disrepencies between the actual
IS_UP(slave) and the slave->link == BOND_LINK_UP, but only till we find the
next suitable slave.

Fix this by using bond_for_each_slave() and storing the first good slave in
*before till we find the current_arp_slave, after that we store the first good
slave in new_slave. If new_slave is empty - use the slave stored in before,
and if it's also empty - then we didn't find any suitable slave.

Also, in the meanwhile, check for each slave status.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v3  -> v4:
    No change.
    
    v2  -> v3:
    No change.
    
    v1  -> v2:
    No changes.
    
    RFC -> v1:
    New patch.

 drivers/net/bonding/bond_main.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6abbfac..3c96b1b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2761,8 +2761,9 @@ do_failover:
  */
 static void bond_ab_arp_probe(struct bonding *bond)
 {
-	struct slave *slave, *next_slave;
-	int i;
+	struct slave *slave, *before = NULL, *new_slave = NULL;
+	struct list_head *iter;
+	bool found = false;
 
 	read_lock(&bond->curr_slave_lock);
 
@@ -2792,18 +2793,12 @@ static void bond_ab_arp_probe(struct bonding *bond)
 
 	bond_set_slave_inactive_flags(bond->current_arp_slave);
 
-	/* search for next candidate */
-	next_slave = bond_next_slave(bond, bond->current_arp_slave);
-	bond_for_each_slave_from(bond, slave, i, next_slave) {
-		if (IS_UP(slave->dev)) {
-			slave->link = BOND_LINK_BACK;
-			bond_set_slave_active_flags(slave);
-			bond_arp_send_all(bond, slave);
-			slave->jiffies = jiffies;
-			bond->current_arp_slave = slave;
-			break;
-		}
+	bond_for_each_slave(bond, slave, iter) {
+		if (!found && !before && IS_UP(slave->dev))
+			before = slave;
 
+		if (found && !new_slave && IS_UP(slave->dev))
+			new_slave = slave;
 		/* if the link state is up at this point, we
 		 * mark it down - this can happen if we have
 		 * simultaneous link failures and
@@ -2811,7 +2806,7 @@ static void bond_ab_arp_probe(struct bonding *bond)
 		 * one the current slave so it is still marked
 		 * up when it is actually down
 		 */
-		if (slave->link == BOND_LINK_UP) {
+		if (!IS_UP(slave->dev) && slave->link == BOND_LINK_UP) {
 			slave->link = BOND_LINK_DOWN;
 			if (slave->link_failure_count < UINT_MAX)
 				slave->link_failure_count++;
@@ -2821,7 +2816,22 @@ static void bond_ab_arp_probe(struct bonding *bond)
 			pr_info("%s: backup interface %s is now down.\n",
 				bond->dev->name, slave->dev->name);
 		}
+		if (slave == bond->current_arp_slave)
+			found = true;
 	}
+
+	if (!new_slave && before)
+		new_slave = before;
+
+	if (!new_slave)
+		return;
+
+	new_slave->link = BOND_LINK_BACK;
+	bond_set_slave_active_flags(new_slave);
+	bond_arp_send_all(bond, new_slave);
+	new_slave->jiffies = jiffies;
+	bond->current_arp_slave = new_slave;
+
 }
 
 void bond_activebackup_arp_mon(struct work_struct *work)
-- 
1.8.4

^ permalink raw reply related

* RE: [PATCH 1/2] net: Toeplitz library functions
From: Eric Dumazet @ 2013-09-24 12:24 UTC (permalink / raw)
  To: David Laight; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B7355@saturn3.aculab.com>

On Tue, 2013-09-24 at 09:32 +0100, David Laight wrote:
> > +static inline unsigned int
> > +toeplitz_hash(const unsigned char *bytes,
> > +	      struct toeplitz *toeplitz, int n)
> > +{
> > +	int i;
> > +	unsigned int result = 0;
> > +
> > +	for (i = 0; i < n; i++)
> > +		result ^= toeplitz->key_cache[i][bytes[i]];
> > +
> > +        return result;
> > +};
> 
> That is a horrid hash function to be calculating in software.
> 
> The code looks very much like a simple 32bit CRC.
> It isn't entirely clears exactly where the 'key' gets included,
> but I suspect it is just xored with the data bytes.
> 
> Using in it hardware is probably fine - the hardware can do
> it cheaply (in dedicated logic) as the frame arrives.
> The CRC polynomial probably collapses to a few XOR operations
> when done byte by byte (the hdlc crc16 collapses to 3 levels
> of xor).
> 
> IIRC jhash() works on 32bit quantities - so has far fewer
> maths operations and well as not having all the random data
> accesses (cache misses and displacing other parts of the
> working set from the cache).

Anyway, I really hope Toeplitz hash is not restricted to 0-255 range

Tom implementation is not Toeplitz, but a degenerated hash.

^ permalink raw reply

* SMSC 9303 support
From: Gary Thomas @ 2013-09-24 12:21 UTC (permalink / raw)
  To: netdev

I need to support the SMSC9303 in an embedded system.  I'm not
finding any [explicit] support for this device in the latest
mainline kernel.  Did I miss something?

To be clear, the SMSC9303 is a 3-port managed ethernet switch
capable of supporting 802.1D/802.1Q directly. This switch is
driven by a single MAC via MII/RMII and exposes the other two
ports via physical PHYs.  What I need it to do is behave like
two external, separate devices.  I was thinking that what I need
to do is treat these as VLAN devices since the switch can manage
the routing.

Does this seem like a reasonable approach?
How do I "hook up" my normal ethernet driver to it?  To the hardware
it just looks like any other MII/RMII PHY.  The device is managed
separately via I2C.  I can have that set up separately if necessary.

Thanks for any pointers/ideas

-- 
------------------------------------------------------------
Gary Thomas                 |  Consulting for the
MLB Associates              |    Embedded world
------------------------------------------------------------

^ permalink raw reply

* Re: [pchecks v1 2/4] Use raw cpu ops for calls that would trigger with checks
From: Eric Dumazet @ 2013-09-24 12:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Christoph Lameter, Tejun Heo, akpm, Steven Rostedt, linux-kernel,
	Peter Zijlstra, netdev
In-Reply-To: <20130924073250.GD28538@gmail.com>

On Tue, 2013-09-24 at 09:32 +0200, Ingo Molnar wrote:
> (netdev Cc:-ed)
> 
> * Christoph Lameter <cl@linux.com> wrote:
> 
> > These location triggered during testing with KVM.
> > 
> > These are fetches without preemption off where we judged that
> > to be more performance efficient or where other means of
> > providing synchronization (BH handling) are available.
> 
> > Index: linux/include/net/snmp.h
> > ===================================================================
> > --- linux.orig/include/net/snmp.h	2013-09-12 13:26:29.216103951 -0500
> > +++ linux/include/net/snmp.h	2013-09-12 13:26:29.208104037 -0500
> > @@ -126,7 +126,7 @@ struct linux_xfrm_mib {
> >  	extern __typeof__(type) __percpu *name[SNMP_ARRAY_SZ]
> >  
> >  #define SNMP_INC_STATS_BH(mib, field)	\
> > -			__this_cpu_inc(mib[0]->mibs[field])
> > +			raw_cpu_inc(mib[0]->mibs[field])
> >  
> >  #define SNMP_INC_STATS_USER(mib, field)	\
> >  			this_cpu_inc(mib[0]->mibs[field])
> > @@ -141,7 +141,7 @@ struct linux_xfrm_mib {
> >  			this_cpu_dec(mib[0]->mibs[field])
> >  
> >  #define SNMP_ADD_STATS_BH(mib, field, addend)	\
> > -			__this_cpu_add(mib[0]->mibs[field], addend)
> > +			raw_cpu_add(mib[0]->mibs[field], addend)
> 
> Are the networking folks fine with allowing unafe operations of SNMP stats 
> in preemptible sections, or should the kernel produce an optional warning 
> message if CONFIG_PREEMPT_DEBUG=y and these ops are used in preemptible 
> (non-bh, non-irq-handler, non-irqs-off, etc.) sections?
> 
> RAW_SNMP_*_STATS() ops could be used to annotate those places where that 
> kind of usage is safe.


I would rather not use RAW_ prefix in the macro, but add debugging
check to make sure we use _BH() variant in the right context.

BUG_ON(!in_softirq())

^ permalink raw reply

* Re: [PATCH] ptp: add range check on n_samples
From: Richard Cochran @ 2013-09-24 12:50 UTC (permalink / raw)
  To: Dong Zhu; +Cc: David Miller, netdev, linux-kernel
In-Reply-To: <20130924070557.GA28795@zhudong.nay.redhat.com>

On Tue, Sep 24, 2013 at 03:05:57PM +0800, Dong Zhu wrote:
> From d4eb97e8d5def76d46167c91059147e2c7d33433 Mon Sep 17 00:00:00 2001
> 
> When using PTP_SYS_OFFSET ioctl to measure the time offset between the
> PHC and system clock, we need to specify the number of measurements, the
> valid value of n_samples is between 1 to 25. If n_samples <= 0 or > 25
> it makes no sense, so this patch intends to add a range check.

The field, n_samples, is unsigned, so the check is not needed.

Thanks,
Richard
 
> Signed-off-by: Dong Zhu <bluezhudong@gmail.com>
> ---
>  drivers/ptp/ptp_chardev.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
> index 34a0c60..4e85b23 100644
> --- a/drivers/ptp/ptp_chardev.c
> +++ b/drivers/ptp/ptp_chardev.c
> @@ -104,7 +104,8 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  			err = -EFAULT;
>  			break;
>  		}
> -		if (sysoff->n_samples > PTP_MAX_SAMPLES) {
> +		if (sysoff->n_samples <= 0 ||
> +		    sysoff->n_samples > PTP_MAX_SAMPLES) {
>  			err = -EINVAL;
>  			break;
>  		}
> -- 
> 1.7.11.7
> 
> -- 
> Best Regards,
> Dong Zhu

^ permalink raw reply

* [PATCH] net: net_secret should not depend on TCP
From: Eric Dumazet @ 2013-09-24 13:19 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg
In-Reply-To: <20130924054532.GA24446@order.stressinduktion.org>

From: Eric Dumazet <edumazet@google.com>

A host might need net_secret[] and never open a single socket. 

Problem added in commit aebda156a570782
("net: defer net_secret[] initialization")

Based on prior patch from Hannes Frederic Sowa.

Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/secure_seq.h |    1 -
 net/core/secure_seq.c    |   27 ++++++++++++++++++++++++---
 net/ipv4/af_inet.c       |    4 +---
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index 6ca975b..c2e542b 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -3,7 +3,6 @@
 
 #include <linux/types.h>
 
-extern void net_secret_init(void);
 extern __u32 secure_ip_id(__be32 daddr);
 extern __u32 secure_ipv6_id(const __be32 daddr[4]);
 extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13c..3f1ec15 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,11 +10,24 @@
 
 #include <net/secure_seq.h>
 
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
 
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
 {
-	get_random_bytes(net_secret, sizeof(net_secret));
+	u32 tmp;
+	int i;
+
+	if (likely(net_secret[0]))
+		return;
+
+	for (i = NET_SECRET_SIZE; i > 0;) {
+		do {
+			get_random_bytes(&tmp, sizeof(tmp));
+		} while (!tmp);
+		cmpxchg(&net_secret[--i], 0, tmp);
+	}
 }
 
 #ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 	u32 hash[MD5_DIGEST_WORDS];
 	u32 i;
 
+	net_secret_init();
 	memcpy(hash, saddr, 16);
 	for (i = 0; i < 4; i++)
 		secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 	u32 hash[MD5_DIGEST_WORDS];
 	u32 i;
 
+	net_secret_init();
 	memcpy(hash, saddr, 16);
 	for (i = 0; i < 4; i++)
 		secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
 {
 	u32 hash[MD5_DIGEST_WORDS];
 
+	net_secret_init();
 	hash[0] = (__force __u32) daddr;
 	hash[1] = net_secret[13];
 	hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
 {
 	__u32 hash[4];
 
+	net_secret_init();
 	memcpy(hash, daddr, 16);
 	md5_transform(hash, net_secret);
 
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 {
 	u32 hash[MD5_DIGEST_WORDS];
 
+	net_secret_init();
 	hash[0] = (__force u32)saddr;
 	hash[1] = (__force u32)daddr;
 	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
 	u32 hash[MD5_DIGEST_WORDS];
 
+	net_secret_init();
 	hash[0] = (__force u32)saddr;
 	hash[1] = (__force u32)daddr;
 	hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 	u32 hash[MD5_DIGEST_WORDS];
 	u64 seq;
 
+	net_secret_init();
 	hash[0] = (__force u32)saddr;
 	hash[1] = (__force u32)daddr;
 	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 	u64 seq;
 	u32 i;
 
+	net_secret_init();
 	memcpy(hash, saddr, 16);
 	for (i = 0; i < 4; i++)
 		secret[i] = net_secret[i] + daddr[i];
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7a1874b..cfeb85c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
 		get_random_bytes(&rnd, sizeof(rnd));
 	} while (rnd == 0);
 
-	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
 		get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
-		net_secret_init();
-	}
 }
 EXPORT_SYMBOL(build_ehash_secret);
 

^ permalink raw reply related

* Re: [PATCH net 0/4] bridge: Fix problems around the PVID
From: Vlad Yasevich @ 2013-09-24 13:35 UTC (permalink / raw)
  To: Toshiaki Makita
  Cc: Toshiaki Makita, David Miller, netdev, Fernando Luis Vazquez Cao,
	Patrick McHardy
In-Reply-To: <1380023107.3162.53.camel@ubuntu-vm-makita>

On 09/24/2013 07:45 AM, Toshiaki Makita wrote:
> On Mon, 2013-09-23 at 10:41 -0400, Vlad Yasevich wrote:
>> On 09/17/2013 04:12 AM, Toshiaki Makita wrote:
>>> On Mon, 2013-09-16 at 13:49 -0400, Vlad Yasevich wrote:
>>>> On 09/13/2013 08:06 AM, Toshiaki Makita wrote:
>>>>> On Thu, 2013-09-12 at 16:00 -0400, David Miller wrote:
>>>>>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>>>>> Date: Tue, 10 Sep 2013 19:27:54 +0900
>>>>>>
>>>>>>> There seem to be some undesirable behaviors related with PVID.
>>>>>>> 1. It has no effect assigning PVID to a port. PVID cannot be applied
>>>>>>> to any frame regardless of whether we set it or not.
>>>>>>> 2. FDB entries learned via frames applied PVID are registered with
>>>>>>> VID 0 rather than VID value of PVID.
>>>>>>> 3. We can set 0 or 4095 as a PVID that are not allowed in IEEE 802.1Q.
>>>>>>> This leads interoperational problems such as sending frames with VID
>>>>>>> 4095, which is not allowed in IEEE 802.1Q, and treating frames with VID
>>>>>>> 0 as they belong to VLAN 0, which is expected to be handled as they have
>>>>>>> no VID according to IEEE 802.1Q.
>>>>>>>
>>>>>>> Note: 2nd and 3rd problems are potential and not exposed unless 1st problem
>>>>>>> is fixed, because we cannot activate PVID due to it.
>>>>>>
>>>>>> Please work out the issues in patch #2 with Vlad and resubmit this
>>>>>> series.
>>>>>>
>>>>>> Thank you.
>>>>>
>>>>> I'm hovering between whether we should fix the issue by changing vlan 0
>>>>> interface behavior in 8021q module or enabling a bridge port to sending
>>>>> priority-tagged frames, or another better way.
>>>>>
>>>>> If you could comment it, I'd appreciate it :)
>>>>>
>>>>>
>>>>> BTW, I think what is discussed in patch #2 is another problem about
>>>>> handling priority-tags, and it exists without this patch set applied.
>>>>> It looks like that we should prepare another patch set than this to fix
>>>>> that problem.
>>>>>
>>>>> Should I include patches that fix the priority-tags problem in this
>>>>> patch set and resubmit them all together?
>>>>>
>>>>
>>>> I am thinking that we might need to do it in bridge and it looks like
>>>> the simplest way to do it is to have default priority regeneration table
>>>> (table 6-5 from 802.1Q doc).
>>>>
>>>> That way I think we would conform to the spec.
>>>>
>>>> -vlad
>>>
>>> Unfortunately I don't think the default priority regeneration table
>>> resolves the problem because IEEE 802.1Q says that a VLAN-aware bridge
>>> can transmit untagged or VLAN-tagged frames only (the end of section 7.5
>>> and 8.1.7).
>>>
>>> No mechanism to send priority-tagged frames is found as far as I can see
>>> the standard. I think the regenerated priority is used for outgoing PCP
>>> field only if egress policy is not untagged (i.e. transmitting as
>>> VLAN-tagged), and unused if untagged (Section 6.9.2 3rd/4th Paragraph).
>>>
>>> If we want to transmit priority-tagged frames from a bridge port, I
>>> think we need to implement a new (optional) feature that is above the
>>> standard, as I stated previously.
>>>
>>> How do you feel about adding a per-port policy that enables a bridge to
>>> send priority-tagged frames instead of untagged frames when egress
>>> policy for the port is untagged?
>>> With this change, we can transmit frames for a given vlan as either all
>>> untagged, all priority-tagged or all VLAN-tagged.
>>
>> That would work.  What I am thinking is that we do it by special casing
>> the vid 0 egress policy specification.  Let it be untagged by default
>> and if it is tagged, then we preserve the priority field and forward
>> it on.
>>
>> This keeps the API stable and doesn't require user/admin from knowing
>> exactly what happens.  Default operation conforms to the spec and allows
>> simple change to make it backward-compatible.
>>
>> What do you think.  I've done a simple prototype of this an it seems to
>> work with the VMs I am testing with.
>
> Are you saying that
> - by default, set the 0th bit of untagged_bitmap; and
> - if we unset the 0th bit and set the "vid"th bit, we transmit frames
> classified as belonging to VLAN "vid" as priority-tagged?
>
> If so, though it's attractive to keep current API, I'm worried about if
> it could be a bit confusing and not intuitive for kernel/iproute2
> developers that VID 0 has a special meaning only in the egress policy.
> Wouldn't it be better to adding a new member to struct net_port_vlans
> instead of using VID 0 of untagged_bitmap?
>
> Or are you saying that we use a new flag in struct net_port_vlans but
> use the BRIDGE_VLAN_INFO_UNTAGGED bit with VID 0 in netlink to set the
> flag?
>
> Even in that case, I'm afraid that it might be confusing for developers
> for the same reason. We are going to prohibit to specify VID with 0 (and
> 4095) in adding/deleting a FDB entry or a vlan filtering entry, but it
> would allow us to use VID 0 only when a vlan filtering entry is
> configured.
> I am thinking a new nlattr is a straightforward approach to configure
> it.

By making this an explicit attribute it makes vid 0 a special case for
any automatic tool that would provision such filtering.  Seeing vid 0
would mean that these tools would have to know that this would have to
be translated to a different attribute instead of setting the policy
values.

How it is implemented internally in the kernel isn't as big of an issue.
We can do it as a separate flag or as part of existing policy.

-vlad

>
> Thanks,
>
> Toshiaki Makita
>
>>
>> -vlad
>>
>>>
>>> Thanks,
>>>
>>> Toshiaki Makita
>>>
>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>> Toshiaki Makita
>>>>>
>>>>>>
>>>>>> --
>>>>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>>>
>>>>>
>>>>>
>>>
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>
>

^ permalink raw reply

* [PATCH net-next v3 0/2] ipv4: per-datagram IP_TOS and IP_TTL via sendmsg()
From: Francesco Fusco @ 2013-09-24 13:43 UTC (permalink / raw)
  To: davem; +Cc: netdev

There is no way to set the IP_TOS field on a per-packet basis in IPv4, while
IPv6 has such a mechanism. Therefore one has to fall back to the setsockopt()
in case of IPv4. 

Using the existing per-socket option is not convenient particularly in the
situations where multiple threads have to use the same socket data requiring
per-thread TOS values. In fact this would involve calling setsockopt() before
sendmsg() every time.

Francesco Fusco (2):
  ipv4: IP_TOS and IP_TTL can be specified as ancillary data
  ipv4: processing ancillary IP_TOS or IP_TTL

 include/net/inet_sock.h |  3 +++
 include/net/ip.h        | 14 ++++++++++++++
 include/net/route.h     |  1 +
 net/ipv4/icmp.c         |  5 +++++
 net/ipv4/ip_output.c    | 13 ++++++++++---
 net/ipv4/ip_sockglue.c  | 20 +++++++++++++++++++-
 net/ipv4/ping.c         |  4 +++-
 net/ipv4/raw.c          |  4 +++-
 net/ipv4/udp.c          |  4 +++-
 9 files changed, 61 insertions(+), 7 deletions(-)

-- 
1.8.3.1

^ permalink raw reply

* [PATCH net-next v3 1/2] ipv4: IP_TOS and IP_TTL can be specified as ancillary data
From: Francesco Fusco @ 2013-09-24 13:43 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <cover.1379944641.git.ffusco@redhat.com>

This patch enables the IP_TTL and IP_TOS values passed from userspace to
be stored in the ipcm_cookie struct. Three fields are added to the struct:

- the TTL, expressed as __u8.
  The allowed values are in the [1-255].
  A value of 0 means that the TTL is not specified.

- the TOS, expressed as __s16.
  The allowed values are in the range [0,255].
  A value of -1 means that the TOS is not specified.

- the priority, expressed as a char and computed when
  handling the ancillary data.

Signed-off-by: Francesco Fusco <ffusco@redhat.com>
---
 v1->v2
  - changed the icmp_cookie ttl field from __s16 to __u8.
    A value of 0 means that the TTL has not been specified
  - to tos field is still __s16. The user can specify
    values in the range 0-255 included, therefore I use
    a value of -1 as a flag saying that the value has
    not been specified
  - the priority it is now a char instead of a __u32, 
    which is the return type of rt_tos2priority
  - improved commit message
 v1->v2
  - no code changes, rebase to net-next

 include/net/ip.h       |  3 +++
 net/ipv4/ip_sockglue.c | 20 +++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index c1f192b..0135f38 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -56,6 +56,9 @@ struct ipcm_cookie {
 	int			oif;
 	struct ip_options_rcu	*opt;
 	__u8			tx_flags;
+	__u8			ttl;
+	__s16			tos;
+	char			priority;
 };
 
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d9c4f11..56e3445 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -189,7 +189,7 @@ EXPORT_SYMBOL(ip_cmsg_recv);
 
 int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
 {
-	int err;
+	int err, val;
 	struct cmsghdr *cmsg;
 
 	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
@@ -215,6 +215,24 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
 			ipc->addr = info->ipi_spec_dst.s_addr;
 			break;
 		}
+		case IP_TTL:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+				return -EINVAL;
+			val = *(int *)CMSG_DATA(cmsg);
+			if (val < 1 || val > 255)
+				return -EINVAL;
+			ipc->ttl = val;
+			break;
+		case IP_TOS:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+				return -EINVAL;
+			val = *(int *)CMSG_DATA(cmsg);
+			if (val < 0 || val > 255)
+				return -EINVAL;
+			ipc->tos = val;
+			ipc->priority = rt_tos2priority(ipc->tos);
+			break;
+
 		default:
 			return -EINVAL;
 		}
-- 
1.8.3.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox