Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 04/26] net: add netdev_adjacent->private and allow to use it
From: Veaceslav Falico @ 2013-09-09 20:16 UTC (permalink / raw)
  To: netdev
  Cc: jiri, Veaceslav Falico, David S. Miller, Eric Dumazet,
	Alexander Duyck
In-Reply-To: <1378757804-3159-1-git-send-email-vfalico@redhat.com>

Currently, even though we can access any linked device, we can't attach
anything to it, which is vital to properly manage them.

To fix this, add a new void *private to netdev_adjacent and functions
setting/getting it (per link), so that we can save, per example, bonding's
slave structures there, per slave device.

netdev_master_upper_dev_link_private(dev, upper_dev, private) links dev to
upper dev and populates the neighbour link only with private.

netdev_lower_dev_get_private{,_rcu}() returns the private, if found.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
 include/linux/netdevice.h |  7 ++++
 net/core/dev.c            | 83 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2a944e5..eab8e36 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2839,8 +2839,15 @@ extern int netdev_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev);
 extern int netdev_master_upper_dev_link(struct net_device *dev,
 					struct net_device *upper_dev);
+extern int netdev_master_upper_dev_link_private(struct net_device *dev,
+						struct net_device *upper_dev,
+						void *private);
 extern void netdev_upper_dev_unlink(struct net_device *dev,
 				    struct net_device *upper_dev);
+extern void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+					      struct net_device *lower_dev);
+extern void *netdev_lower_dev_get_private(struct net_device *dev,
+					  struct net_device *lower_dev);
 extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features, bool tx_path);
diff --git a/net/core/dev.c b/net/core/dev.c
index eef99de..9528e8f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4376,6 +4376,9 @@ struct netdev_adjacent {
 	/* counter for the number of times this device was added to us */
 	u16 ref_nr;
 
+	/* private field for the users */
+	void *private;
+
 	struct list_head list;
 	struct rcu_head rcu;
 };
@@ -4556,7 +4559,7 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					struct net_device *adj_dev,
 					bool neighbour, bool master,
-					bool upper)
+					bool upper, void *private)
 {
 	struct netdev_adjacent *adj, *neigh = NULL;
 
@@ -4599,9 +4602,15 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 			 adj_dev->name);
 
 	if (!upper) {
-		if (neigh)
+		if (neigh) {
+			/* we're backlinging the master device to its
+			 * slave, so save the private in this link.
+			 */
+			if (master)
+				neigh->private = private;
 			list_add_tail_rcu(&neigh->list,
 					  &dev->adj_list.lower);
+		}
 		list_add_tail_rcu(&adj->list, &dev->all_adj_list.lower);
 		return 0;
 	}
@@ -4627,15 +4636,16 @@ static int __netdev_upper_dev_insert(struct net_device *dev,
 				     bool master, bool neighbour)
 {
 	return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
-					    true);
+					    true, NULL);
 }
 
 static int __netdev_lower_dev_insert(struct net_device *dev,
 				     struct net_device *ldev,
-				     bool neighbour)
+				     bool master, bool neighbour,
+				     void *private)
 {
-	return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
-					    false);
+	return __netdev_adjacent_dev_insert(dev, ldev, neighbour, master,
+					    false, private);
 }
 
 void __netdev_adjacent_dev_remove(struct net_device *dev,
@@ -4703,7 +4713,8 @@ static void __netdev_lower_dev_remove(struct net_device *dev,
 
 int __netdev_adjacent_dev_insert_link(struct net_device *dev,
 				      struct net_device *upper_dev,
-				      bool master, bool neighbour)
+				      bool master, bool neighbour,
+				      void *private)
 {
 	int ret;
 
@@ -4711,7 +4722,8 @@ int __netdev_adjacent_dev_insert_link(struct net_device *dev,
 	if (ret)
 		return ret;
 
-	ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
+	ret = __netdev_lower_dev_insert(upper_dev, dev, master, neighbour,
+					private);
 	if (ret) {
 		__netdev_upper_dev_remove(dev, upper_dev);
 		return ret;
@@ -4723,14 +4735,15 @@ int __netdev_adjacent_dev_insert_link(struct net_device *dev,
 static int __netdev_adjacent_dev_link(struct net_device *dev,
 				      struct net_device *udev)
 {
-	return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
+	return __netdev_adjacent_dev_insert_link(dev, udev, false, false, NULL);
 }
 
 static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 						struct net_device *udev,
-						bool master)
+						bool master, void *priv)
 {
-	return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
+	return __netdev_adjacent_dev_insert_link(dev, udev, master, true,
+						 priv);
 }
 
 void __netdev_adjacent_dev_unlink(struct net_device *dev,
@@ -4742,7 +4755,8 @@ void __netdev_adjacent_dev_unlink(struct net_device *dev,
 
 
 static int __netdev_upper_dev_link(struct net_device *dev,
-				   struct net_device *upper_dev, bool master)
+				   struct net_device *upper_dev, bool master,
+				   void *private)
 {
 	struct netdev_adjacent *i, *j, *to_i, *to_j;
 	int ret = 0;
@@ -4762,7 +4776,8 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (master && netdev_master_upper_dev_get(dev))
 		return -EBUSY;
 
-	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master,
+						   private);
 	if (ret)
 		return ret;
 
@@ -4853,7 +4868,7 @@ rollback_mesh:
 int netdev_upper_dev_link(struct net_device *dev,
 			  struct net_device *upper_dev)
 {
-	return __netdev_upper_dev_link(dev, upper_dev, false);
+	return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
 }
 EXPORT_SYMBOL(netdev_upper_dev_link);
 
@@ -4871,10 +4886,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
 int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev)
 {
-	return __netdev_upper_dev_link(dev, upper_dev, true);
+	return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link);
 
+int netdev_master_upper_dev_link_private(struct net_device *dev,
+					 struct net_device *upper_dev,
+					 void *private)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, true, private);
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
+
 /**
  * netdev_upper_dev_unlink - Removes a link to upper device
  * @dev: device
@@ -4912,6 +4935,36 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+				       struct net_device *lower_dev)
+{
+	struct netdev_adjacent *lower;
+
+	if (!lower_dev)
+		return NULL;
+	lower = __netdev_lower_find_rcu(dev, lower_dev);
+	if (!lower)
+		return NULL;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
+
+void *netdev_lower_dev_get_private(struct net_device *dev,
+				   struct net_device *lower_dev)
+{
+	struct netdev_adjacent *lower;
+
+	if (!lower_dev)
+		return NULL;
+	lower = __netdev_lower_find(dev, lower_dev);
+	if (!lower)
+		return NULL;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private);
+
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-- 
1.8.4

^ permalink raw reply related

* [PATCH net-next 0/26] bonding: use neighbours instead of own lists
From: Veaceslav Falico @ 2013-09-09 20:16 UTC (permalink / raw)
  To: netdev
  Cc: jiri, Jay Vosburgh, Andy Gospodarek, Dimitris Michailidis,
	David S. Miller, Patrick McHardy, Eric Dumazet, Alexander Duyck,
	Veaceslav Falico

(David, feel free to drop the whole patchset - I know that the window is
closed and I'm quite sure that it's not the last version, and even if it is
- I'll easily re-submit it. Sorry for the mess :-/)

Hi,

RFC -> v1:
I've added proper, consistent naming for all variables/functions, uninlined
some helpers to get better backtraces, just in case (overhead is minimal,
no hot paths), rearranged patches for better review, dropped bondings
->prev and bond_for_each_slave_continue() functionality - to be able to
RCUify it easier, and renamed slave_* sysfs links to lower_* sysfs links to
maintain upper/lower naming. I've also dropped, thanks to bonding cleanup,
some heavy and ugly/intrusive patches.

I'm sending it as early as possible, because it's quite a big patchset and
some of the approaches I've chosen are not really easy/straightforward.
It's, however, quite heavily tested already and works fine.

I'm sending it to gather any feedback possible.

This patchset introduces all the needed infrastructure, on top of current
adjacent lists, to be able to remove bond's slave_list/slave->list. The
overhead in memory/CPU is minimal, and after the patchset bonding can rely
on its slave-related functions, given the proper locking. I've done some
netperf benchmarks on a vm, and the delta was about 0.1gbps for 35gbps as a
whole, so no speed fluctuations.

It also automatically creates lower/upper and master symlinks in dev's
sysfs directory.

The current version works ok, as first tests have shown. I'm testing it
further, if anything comes up - I'll update.

Here is a short description of each (group):

51317ce net: add adj_list to save only neighbours
417ade0 net: add RCU variant to search for netdev_adjacent link
1f6ae72 net: uninline netdev neighbour functions
6b80fc6 net: add netdev_adjacent->private and allow to use it
		Preparations to be able to use the new lists.

7e83095 bonding: populate neighbour's private on enslave
		Make bonding set ->private on enslave.

21fdd60 bonding: modify bond_get_slave_by_dev() to use neighbours
		First use of ->private.

5d2fcb1 net: add for_each iterators through neighbour lower link's private
		Creates the standard for_each macro of 'slaves'.

315572a bonding: remove bond_for_each_slave_reverse()
		Drop the useless (and heavy to modify) macro.

4bf68d6 bonding: make bond_for_each_slave() use lower neighbour's private
		Modify the main iterator.

063b9f6 bonding: use bond_for_each_slave() in bond_uninit()
		Small cleanup - to avoid using the slave_list directly.

13368ec bonding: rework bond_3ad_xmit_xor() to use bond_for_each_slave() only
2c89a38 bonding: rework rlb_next_rx_slave() to use bond_for_each_slave()
6f3049b bonding: rework bond_find_best_slave() to use bond_for_each_slave()
d0b1930 bonding: rework bond_ab_arp_probe() to use bond_for_each_slave()
29aac7d bonding: remove unused bond_for_each_slave_from()
		Remove bond_for_each_slave_from() - it's almost impossible
		to correctly use it under RCU - and it's not really needed
		- some functions even become cleaner and some small bugs
		fixed.

17fd9e8 bonding: add bond_has_slaves() and use it
98d90f5 bonding: convert bond_has_slaves() to use the neighbour list
		Convert list emptiness checking to use neighbour list.

c6df10c net: add a possibility to get private from netdev_adjacent->list
5bdebae bonding: convert first/last slave logic to use neighbours
		Start using ->private directly for first/last slaves.

231db0b bonding: remove bond_prev_slave()
		Cleanup - easier to RCUify in the future.

3a32d8d net: add a function to get the next private
0a275bf bonding: use neighbours for bond_next_slave()
		Convert next_slave() to use neighbours.

0a39ab2 bonding: remove slave lists
		Finally.

ea3f071 net: expose the master link to sysfs, and remove it from bond
71cc99e vlan: link the upper neighbour only after registering
ecc2a4c net: create sysfs symlinks for neighbour devices
		Create sysfs links.

Thanks in advance.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: Dimitris Michailidis <dm@chelsio.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: Patrick McHardy <kaber@trash.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>

---
 drivers/net/bonding/bond_3ad.c                  |  54 +--
 drivers/net/bonding/bond_alb.c                  |  81 +++--
 drivers/net/bonding/bond_alb.h                  |   4 +-
 drivers/net/bonding/bond_main.c                 | 296 +++++++--------
 drivers/net/bonding/bond_procfs.c               |   5 +-
 drivers/net/bonding/bond_sysfs.c                |  62 +---
 drivers/net/bonding/bonding.h                   |  74 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |   3 +-
 include/linux/netdevice.h                       |  57 ++-
 net/8021q/vlan.c                                |  14 +-
 net/core/dev.c                                  | 458 +++++++++++++++++++-----
 11 files changed, 703 insertions(+), 405 deletions(-)

^ permalink raw reply

* [PATCH net-next 03/26] net: uninline netdev neighbour functions
From: Veaceslav Falico @ 2013-09-09 20:16 UTC (permalink / raw)
  To: netdev
  Cc: jiri, Veaceslav Falico, David S. Miller, Eric Dumazet,
	Alexander Duyck
In-Reply-To: <1378757804-3159-1-git-send-email-vfalico@redhat.com>

They don't give almost any speed/size advantage, however it's really
useful to have them in the backtrace.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
 net/core/dev.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 749ec0b..eef99de 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4689,14 +4689,14 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
 	}
 }
 
-static inline void __netdev_upper_dev_remove(struct net_device *dev,
-					     struct net_device *udev)
+static void __netdev_upper_dev_remove(struct net_device *dev,
+				      struct net_device *udev)
 {
 	return __netdev_adjacent_dev_remove(dev, udev, true);
 }
 
-static inline void __netdev_lower_dev_remove(struct net_device *dev,
-					     struct net_device *ldev)
+static void __netdev_lower_dev_remove(struct net_device *dev,
+				      struct net_device *ldev)
 {
 	return __netdev_adjacent_dev_remove(dev, ldev, false);
 }
@@ -4720,15 +4720,15 @@ int __netdev_adjacent_dev_insert_link(struct net_device *dev,
 	return 0;
 }
 
-static inline int __netdev_adjacent_dev_link(struct net_device *dev,
-					     struct net_device *udev)
+static int __netdev_adjacent_dev_link(struct net_device *dev,
+				      struct net_device *udev)
 {
 	return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
 }
 
-static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
-						       struct net_device *udev,
-						       bool master)
+static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
+						struct net_device *udev,
+						bool master)
 {
 	return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
 }
-- 
1.8.4

^ permalink raw reply related

* [PATCH net-next 02/26] net: add RCU variant to search for netdev_adjacent link
From: Veaceslav Falico @ 2013-09-09 20:16 UTC (permalink / raw)
  To: netdev
  Cc: jiri, Veaceslav Falico, David S. Miller, Eric Dumazet,
	Alexander Duyck, Cong Wang
In-Reply-To: <1378757804-3159-1-git-send-email-vfalico@redhat.com>

Currently we have only the RTNL flavour, however we can traverse it while
holding only RCU, so add the RCU search. Add only one function that will be
used further, other functions can be added easily afterwards, if anyone
would need them.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Cong Wang <amwang@redhat.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
 net/core/dev.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 8832711..749ec0b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4380,6 +4380,33 @@ struct netdev_adjacent {
 	struct rcu_head rcu;
 };
 
+static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
+						     struct net_device *adj_dev,
+						     bool upper, bool neighbour)
+{
+	struct netdev_adjacent *adj;
+	struct list_head *adj_list;
+
+	if (neighbour)
+		adj_list = upper ? &dev->adj_list.upper :
+				   &dev->adj_list.lower;
+	else
+		adj_list = upper ? &dev->all_adj_list.upper :
+				   &dev->all_adj_list.lower;
+
+	list_for_each_entry_rcu(adj, adj_list, list) {
+		if (adj->dev == adj_dev)
+			return adj;
+	}
+	return NULL;
+}
+
+static struct netdev_adjacent *__netdev_lower_find_rcu(struct net_device *dev,
+							struct net_device *ldev)
+{
+	return __netdev_find_adj_rcu(dev, ldev, false, true);
+}
+
 static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
 						 struct net_device *adj_dev,
 						 bool upper, bool neighbour)
-- 
1.8.4

^ permalink raw reply related

* [PATCH net-next 01/26] net: add adj_list to save only neighbours
From: Veaceslav Falico @ 2013-09-09 20:16 UTC (permalink / raw)
  To: netdev
  Cc: jiri, Veaceslav Falico, David S. Miller, Eric Dumazet,
	Alexander Duyck, Cong Wang
In-Reply-To: <1378757804-3159-1-git-send-email-vfalico@redhat.com>

Currently, we distinguish neighbours (first-level linked devices) from
non-neighbours by the neighbour bool in the netdev_adjacent. This could be
quite time-consuming in case we would like to traverse *only* through
neighbours - cause we'd have to traverse through all devices and check for
this flag, and in a (quite common) scenario where we have lots of vlans on
top of bridge, which is on top of a bond - the bonding would have to go
through all those vlans to get its upper neighbour linked devices.

This situation is really unpleasant, cause there are already a lot of cases
when a device with slaves needs to go through them in hot path.

To fix this, introduce a new upper/lower device lists structure -
adj_list, which contains only the neighbours. It works always in
pair with the all_adj_list structure (renamed from upper/lower_dev_list),
i.e. both of them contain the same links, only that all_adj_list contains
also non-neighbour device links. It's really a small change visible,
currently, only for __netdev_adjacent_dev_insert/remove(), and doesn't
change the main linked logic at all.

Also, add some comments a fix a name collision in
netdev_for_each_upper_dev_rcu() and rework the naming by the following
rules:

netdev_(all_)(upper|lower)_*

If "all_" is present, then we work with the whole list of upper/lower
devices, otherwise - only with direct neighbours. Uninline functions - to
get better stack traces.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Cong Wang <amwang@redhat.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
 drivers/net/bonding/bond_alb.c  |   2 +-
 drivers/net/bonding/bond_main.c |  10 ++-
 include/linux/netdevice.h       |  28 ++++--
 net/core/dev.c                  | 195 +++++++++++++++++++++++++++-------------
 4 files changed, 160 insertions(+), 75 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 91f179d..c3dcc6b 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1019,7 +1019,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 
 	/* loop through vlans and send one packet for each */
 	rcu_read_lock();
-	netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
 		if (upper->priv_flags & IFF_802_1Q_VLAN)
 			alb_send_lp_vid(slave, mac_addr,
 					vlan_dev_vlan_id(upper));
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 39e5b1c..72bdb8b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2267,7 +2267,7 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
 		return true;
 
 	rcu_read_lock();
-	netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
 		if (ip == bond_confirm_addr(upper, 0, ip)) {
 			ret = true;
 			break;
@@ -2342,10 +2342,12 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		 *
 		 * TODO: QinQ?
 		 */
-		netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) {
+		netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper,
+						  vlan_iter) {
 			if (!is_vlan_dev(vlan_upper))
 				continue;
-			netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) {
+			netdev_for_each_all_upper_dev_rcu(vlan_upper, upper,
+							  iter) {
 				if (upper == rt->dst.dev) {
 					vlan_id = vlan_dev_vlan_id(vlan_upper);
 					rcu_read_unlock();
@@ -2358,7 +2360,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		 * our upper vlans, then just search for any dev that
 		 * matches, and in case it's a vlan - save the id
 		 */
-		netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+		netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
 			if (upper == rt->dst.dev) {
 				/* if it's a vlan - get its VID */
 				if (is_vlan_dev(upper))
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 041b42a..2a944e5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1143,8 +1143,18 @@ struct net_device {
 	struct list_head	dev_list;
 	struct list_head	napi_list;
 	struct list_head	unreg_list;
-	struct list_head	upper_dev_list; /* List of upper devices */
-	struct list_head	lower_dev_list;
+
+	/* directly linked devices, like slaves for bonding */
+	struct {
+		struct list_head upper;
+		struct list_head lower;
+	} adj_list;
+
+	/* all linked devices, *including* neighbours */
+	struct {
+		struct list_head upper;
+		struct list_head lower;
+	} all_adj_list;
 
 
 	/* currently active device features */
@@ -2813,15 +2823,15 @@ extern int		bpf_jit_enable;
 extern bool netdev_has_upper_dev(struct net_device *dev,
 				 struct net_device *upper_dev);
 extern bool netdev_has_any_upper_dev(struct net_device *dev);
-extern struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
-							struct list_head **iter);
+extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+							    struct list_head **iter);
 
 /* iterate through upper list, must be called under RCU read lock */
-#define netdev_for_each_upper_dev_rcu(dev, upper, iter) \
-	for (iter = &(dev)->upper_dev_list, \
-	     upper = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
-	     upper; \
-	     upper = netdev_upper_get_next_dev_rcu(dev, &(iter)))
+#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
+	for (iter = &(dev)->all_adj_list.upper, \
+	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \
+	     updev; \
+	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2..8832711 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4373,9 +4373,6 @@ struct netdev_adjacent {
 	/* upper master flag, there can only be one master device per list */
 	bool master;
 
-	/* indicates that this dev is our first-level lower/upper device */
-	bool neighbour;
-
 	/* counter for the number of times this device was added to us */
 	u16 ref_nr;
 
@@ -4385,30 +4382,47 @@ struct netdev_adjacent {
 
 static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
 						 struct net_device *adj_dev,
-						 bool upper)
+						 bool upper, bool neighbour)
 {
 	struct netdev_adjacent *adj;
-	struct list_head *dev_list;
+	struct list_head *adj_list;
 
-	dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
+	if (neighbour)
+		adj_list = upper ? &dev->adj_list.upper :
+				   &dev->adj_list.lower;
+	else
+		adj_list = upper ? &dev->all_adj_list.upper :
+				   &dev->all_adj_list.lower;
 
-	list_for_each_entry(adj, dev_list, list) {
+	list_for_each_entry(adj, adj_list, list) {
 		if (adj->dev == adj_dev)
 			return adj;
 	}
 	return NULL;
 }
 
-static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
-							  struct net_device *udev)
+static struct netdev_adjacent *__netdev_all_upper_find(struct net_device *dev,
+						       struct net_device *udev)
 {
-	return __netdev_find_adj(dev, udev, true);
+	return __netdev_find_adj(dev, udev, true, false);
 }
 
-static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
-							  struct net_device *ldev)
+static struct netdev_adjacent *__netdev_all_lower_find(struct net_device *dev,
+						       struct net_device *ldev)
 {
-	return __netdev_find_adj(dev, ldev, false);
+	return __netdev_find_adj(dev, ldev, false, false);
+}
+
+static struct netdev_adjacent *__netdev_upper_find(struct net_device *dev,
+						   struct net_device *udev)
+{
+	return __netdev_find_adj(dev, udev, true, true);
+}
+
+static struct netdev_adjacent *__netdev_lower_find(struct net_device *dev,
+						   struct net_device *ldev)
+{
+	return __netdev_find_adj(dev, ldev, false, true);
 }
 
 /**
@@ -4425,7 +4439,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
 {
 	ASSERT_RTNL();
 
-	return __netdev_find_upper(dev, upper_dev);
+	return __netdev_all_upper_find(dev, upper_dev);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 
@@ -4440,7 +4454,7 @@ bool netdev_has_any_upper_dev(struct net_device *dev)
 {
 	ASSERT_RTNL();
 
-	return !list_empty(&dev->upper_dev_list);
+	return !list_empty(&dev->all_adj_list.upper);
 }
 EXPORT_SYMBOL(netdev_has_any_upper_dev);
 
@@ -4457,10 +4471,10 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if (list_empty(&dev->upper_dev_list))
+	if (list_empty(&dev->adj_list.upper))
 		return NULL;
 
-	upper = list_first_entry(&dev->upper_dev_list,
+	upper = list_first_entry(&dev->adj_list.upper,
 				 struct netdev_adjacent, list);
 	if (likely(upper->master))
 		return upper->dev;
@@ -4468,15 +4482,15 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
-/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+/* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next device from the dev's upper list, starting from iter
  * position. The caller must hold RCU read lock.
  */
-struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
-						 struct list_head **iter)
+struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+						     struct list_head **iter)
 {
 	struct netdev_adjacent *upper;
 
@@ -4484,14 +4498,14 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 
 	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 
-	if (&upper->list == &dev->upper_dev_list)
+	if (&upper->list == &dev->all_adj_list.upper)
 		return NULL;
 
 	*iter = &upper->list;
 
 	return upper->dev;
 }
-EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
 
 /**
  * netdev_master_upper_dev_get_rcu - Get master upper device
@@ -4504,7 +4518,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
 {
 	struct netdev_adjacent *upper;
 
-	upper = list_first_or_null_rcu(&dev->upper_dev_list,
+	upper = list_first_or_null_rcu(&dev->adj_list.upper,
 				       struct netdev_adjacent, list);
 	if (upper && likely(upper->master))
 		return upper->dev;
@@ -4517,11 +4531,12 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					bool neighbour, bool master,
 					bool upper)
 {
-	struct netdev_adjacent *adj;
+	struct netdev_adjacent *adj, *neigh = NULL;
 
-	adj = __netdev_find_adj(dev, adj_dev, upper);
+	adj = __netdev_find_adj(dev, adj_dev, upper, false);
 
 	if (adj) {
+		/* we cannot insert a neighbour device twice */
 		BUG_ON(neighbour);
 		adj->ref_nr++;
 		return 0;
@@ -4533,39 +4548,64 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 
 	adj->dev = adj_dev;
 	adj->master = master;
-	adj->neighbour = neighbour;
 	adj->ref_nr = 1;
-
 	dev_hold(adj_dev);
+
+	if (neighbour) {
+		neigh = kmalloc(sizeof(*neigh), GFP_KERNEL);
+		if (!neigh) {
+			kfree(adj);
+			return -ENOMEM;
+		}
+		neigh->dev = adj_dev;
+		neigh->master = master;
+		neigh->ref_nr = 1;
+		dev_hold(adj_dev);
+	}
+
 	pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
 		 adj_dev->name, upper ? "upper" : "lower", dev->name,
 		 adj_dev->name);
+	if (neigh)
+		pr_debug("dev_hold for %s, because of %s link added from %s to %s (neighbour)\n",
+			 adj_dev->name, upper ? "upper" : "lower", dev->name,
+			 adj_dev->name);
 
 	if (!upper) {
-		list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
+		if (neigh)
+			list_add_tail_rcu(&neigh->list,
+					  &dev->adj_list.lower);
+		list_add_tail_rcu(&adj->list, &dev->all_adj_list.lower);
 		return 0;
 	}
 
 	/* Ensure that master upper link is always the first item in list. */
-	if (master)
-		list_add_rcu(&adj->list, &dev->upper_dev_list);
-	else
-		list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
+	if (master) {
+		if (neigh)
+			list_add_rcu(&neigh->list,
+				     &dev->adj_list.upper);
+		list_add_rcu(&adj->list, &dev->all_adj_list.upper);
+	} else {
+		if (neigh)
+			list_add_tail_rcu(&neigh->list,
+					  &dev->adj_list.upper);
+		list_add_tail_rcu(&adj->list, &dev->all_adj_list.upper);
+	}
 
 	return 0;
 }
 
-static inline int __netdev_upper_dev_insert(struct net_device *dev,
-					    struct net_device *udev,
-					    bool master, bool neighbour)
+static int __netdev_upper_dev_insert(struct net_device *dev,
+				     struct net_device *udev,
+				     bool master, bool neighbour)
 {
 	return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
 					    true);
 }
 
-static inline int __netdev_lower_dev_insert(struct net_device *dev,
-					    struct net_device *ldev,
-					    bool neighbour)
+static int __netdev_lower_dev_insert(struct net_device *dev,
+				     struct net_device *ldev,
+				     bool neighbour)
 {
 	return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
 					    false);
@@ -4574,17 +4614,34 @@ static inline int __netdev_lower_dev_insert(struct net_device *dev,
 void __netdev_adjacent_dev_remove(struct net_device *dev,
 				  struct net_device *adj_dev, bool upper)
 {
-	struct netdev_adjacent *adj;
+	struct netdev_adjacent *adj, *neighbour;
 
-	if (upper)
-		adj = __netdev_find_upper(dev, adj_dev);
-	else
-		adj = __netdev_find_lower(dev, adj_dev);
+	if (upper) {
+		adj = __netdev_all_upper_find(dev, adj_dev);
+		neighbour = __netdev_upper_find(dev, adj_dev);
+	} else {
+		adj = __netdev_all_lower_find(dev, adj_dev);
+		neighbour = __netdev_lower_find(dev, adj_dev);
+	}
 
-	if (!adj)
+	if (!adj) {
+		pr_err("tried to remove %s device %s from %s\n",
+		       upper ? "upper" : "lower", dev->name, adj_dev->name);
 		BUG();
+	}
 
 	if (adj->ref_nr > 1) {
+		pr_debug("rec_cnt-- for link to %s, because of %s link removed from %s to %s, remains %d\n",
+			 adj_dev->name, upper ? "upper" : "lower", dev->name,
+			 adj_dev->name, adj->ref_nr-1);
+		if (neighbour) {
+			pr_debug("rec_cnt-- for link to %s, because of %s link removed from %s to %s, remain %d (neigh)\n",
+				 adj_dev->name, upper ? "upper" : "lower",
+				 dev->name, adj_dev->name,
+				 neighbour->ref_nr-1);
+			BUG_ON(adj->ref_nr != neighbour->ref_nr);
+			neighbour->ref_nr--;
+		}
 		adj->ref_nr--;
 		return;
 	}
@@ -4595,6 +4652,14 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
 		 adj_dev->name);
 	dev_put(adj_dev);
 	kfree_rcu(adj, rcu);
+	if (neighbour) {
+		pr_debug("dev_put for %s, because of %s link removed from %s to %s (neighbour)\n",
+			 adj_dev->name, upper ? "upper" : "lower", dev->name,
+			 adj_dev->name);
+		list_del_rcu(&neighbour->list);
+		dev_put(adj_dev);
+		kfree_rcu(neighbour, rcu);
+	}
 }
 
 static inline void __netdev_upper_dev_remove(struct net_device *dev,
@@ -4661,10 +4726,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 		return -EBUSY;
 
 	/* To prevent loops, check if dev is not upper device to upper_dev. */
-	if (__netdev_find_upper(upper_dev, dev))
+	if (__netdev_all_upper_find(upper_dev, dev))
 		return -EBUSY;
 
-	if (__netdev_find_upper(dev, upper_dev))
+	if (__netdev_all_upper_find(dev, upper_dev))
 		return -EEXIST;
 
 	if (master && netdev_master_upper_dev_get(dev))
@@ -4675,12 +4740,14 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 		return ret;
 
 	/* Now that we linked these devs, make all the upper_dev's
-	 * upper_dev_list visible to every dev's lower_dev_list and vice
+	 * all_adj_list.upper visible to every dev's all_adj_list.lower an
 	 * versa, and don't forget the devices itself. All of these
 	 * links are non-neighbours.
 	 */
-	list_for_each_entry(i, &dev->lower_dev_list, list) {
-		list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
+			pr_debug("Interlinking %s with %s, non-neighbour\n",
+				 i->dev->name, j->dev->name);
 			ret = __netdev_adjacent_dev_link(i->dev, j->dev);
 			if (ret)
 				goto rollback_mesh;
@@ -4688,14 +4755,18 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	}
 
 	/* add dev to every upper_dev's upper device */
-	list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
+		pr_debug("linking %s's upper device %s with %s\n",
+			 upper_dev->name, i->dev->name, dev->name);
 		ret = __netdev_adjacent_dev_link(dev, i->dev);
 		if (ret)
 			goto rollback_upper_mesh;
 	}
 
 	/* add upper_dev to every dev's lower device */
-	list_for_each_entry(i, &dev->lower_dev_list, list) {
+	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+		pr_debug("linking %s's lower device %s with %s\n", dev->name,
+			 i->dev->name, upper_dev->name);
 		ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
 		if (ret)
 			goto rollback_lower_mesh;
@@ -4706,7 +4777,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 
 rollback_lower_mesh:
 	to_i = i;
-	list_for_each_entry(i, &dev->lower_dev_list, list) {
+	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		if (i == to_i)
 			break;
 		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
@@ -4716,7 +4787,7 @@ rollback_lower_mesh:
 
 rollback_upper_mesh:
 	to_i = i;
-	list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
 		if (i == to_i)
 			break;
 		__netdev_adjacent_dev_unlink(dev, i->dev);
@@ -4727,8 +4798,8 @@ rollback_upper_mesh:
 rollback_mesh:
 	to_i = i;
 	to_j = j;
-	list_for_each_entry(i, &dev->lower_dev_list, list) {
-		list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
 			if (i == to_i && j == to_j)
 				break;
 			__netdev_adjacent_dev_unlink(i->dev, j->dev);
@@ -4797,17 +4868,17 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 	 * devices from all upper_dev's upper devices and vice
 	 * versa, to maintain the graph relationship.
 	 */
-	list_for_each_entry(i, &dev->lower_dev_list, list)
-		list_for_each_entry(j, &upper_dev->upper_dev_list, list)
+	list_for_each_entry(i, &dev->all_adj_list.lower, list)
+		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
 			__netdev_adjacent_dev_unlink(i->dev, j->dev);
 
 	/* remove also the devices itself from lower/upper device
 	 * list
 	 */
-	list_for_each_entry(i, &dev->lower_dev_list, list)
+	list_for_each_entry(i, &dev->all_adj_list.lower, list)
 		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
 
-	list_for_each_entry(i, &upper_dev->upper_dev_list, list)
+	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
 		__netdev_adjacent_dev_unlink(dev, i->dev);
 
 	call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
@@ -6069,8 +6140,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
-	INIT_LIST_HEAD(&dev->upper_dev_list);
-	INIT_LIST_HEAD(&dev->lower_dev_list);
+	INIT_LIST_HEAD(&dev->adj_list.upper);
+	INIT_LIST_HEAD(&dev->adj_list.lower);
+	INIT_LIST_HEAD(&dev->all_adj_list.upper);
+	INIT_LIST_HEAD(&dev->all_adj_list.lower);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 
-- 
1.8.4

^ permalink raw reply related

* Re: [PATCH RFC] net: neighbour: use source address of last enqueued packet for solicitation
From: Julian Anastasov @ 2013-09-09 20:17 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: netdev, davem
In-Reply-To: <20130908193031.GC21070@order.stressinduktion.org>


	Hello,

On Sun, 8 Sep 2013, Hannes Frederic Sowa wrote:

> Currently we always use the first member of the arp_queue to determine
> the sender ip address of the arp packet (or in case of IPv6 - source
> address of the ndisc packet). This skb is fixed as long as the queue is
> not drained by a complete purge because of a timeout or by a successful
> response.
> 
> If the first packet enqueued on the arp_queue is from a local application
> with a manually set source address and the to be discovered system
> does some kind of uRPF checks on the source address in the arp packet
> the resolving process hangs until a timeout and restarts. This hurts
> communication with the participating network node.
> 
> This could be mitigated a bit if we use the latest enqueued skb's
> source address for the resolving process, which is not as static as
> the arp_queue's head. This change of the source address could result in
> better recovery of a failed solicitation.
> 
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Julian Anastasov <ja@ssi.bg>
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> ---
> 
> I didn't find anything which could break because of this change, but
> please have a second look.

	arp_queue has packets only in NUD_INCOMPLETE state 
(mcast_solicit=3 secs by default). And __neigh_event_send()
now can keep many packets, 64KB from recent changes. So the
1st place is not guaranteed but now it is more difficult
to kick the first packet compared to the old limit of just
3 packets.

	The change can give chance for 2nd and 3th
probe if the 1st probe is not replied, so it should be
better to apply it:

Reviewed-by: Julian Anastasov <ja@ssi.bg>

	Still, I think such problems should be addressed
with conf/{DEV,all}/arp_announce=1 or 2.

>  net/core/neighbour.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 6072610..ca15f32 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -867,7 +867,7 @@ static void neigh_invalidate(struct neighbour *neigh)
>  static void neigh_probe(struct neighbour *neigh)
>  	__releases(neigh->lock)
>  {
> -	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
> +	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
>  	/* keep skb alive even if arp_queue overflows */
>  	if (skb)
>  		skb = skb_copy(skb, GFP_ATOMIC);
> -- 
> 1.8.3.1

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* Re: [PATCH] pch_gbe: fix unmet direct dependency on PTP_1588_CLOCK_PCH
From: Randy Dunlap @ 2013-09-09 20:00 UTC (permalink / raw)
  To: Vladimir Murzin
  Cc: netdev, davem, ben, jeffrey.t.kirsher, rdunlap, haicheng.lee
In-Reply-To: <1378754910-2254-1-git-send-email-murzin.v@gmail.com>

On 09/09/13 12:28, Vladimir Murzin wrote:
> While cross-building for PPC.
> 
> warning: (PCH_GBE) selects PTP_1588_CLOCK_PCH which has unmet direct
> dependencies (X86 || COMPILE_TEST)
> 
> Both PCH_GBE and PPT_1588_CLOCK_PCH is only compatible with Intel
> architecture. Add dependency on x86 for PCH_GBE. Keep COMPILE_TEST to allow
> building on different arches.
> 
> Signed-off-by: Vladimir Murzin <murzin.v@gmail.com>
> ---
>  drivers/net/ethernet/oki-semi/pch_gbe/Kconfig | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
> index cb22341..4a935ec 100644
> --- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
> +++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
> @@ -4,7 +4,7 @@
>  
>  config PCH_GBE
>  	tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
> -	depends on PCI
> +	depends on PCI && (x86 || COMPILE_TEST)

Has this been tested?
That should be             X86

>  	select MII
>  	select PTP_1588_CLOCK_PCH
>  	---help---
> 


-- 
~Randy

^ permalink raw reply

* [PATCH net] ipv6: don't call fib6_run_gc() until routing is ready
From: Michal Kubecek @ 2013-09-09 19:45 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy
In-Reply-To: <20130909.132720.1372904024173191622.davem@davemloft.net>

When loading the ipv6 module, ndisc_init() is called before
ip6_route_init(). As the former registers a handler calling
fib6_run_gc(), this opens a window to run the garbage collector
before necessary data structures are initialized. If a network
device is initialized in this window, adding MAC address to it
triggers a NETDEV_CHANGEADDR event, leading to a crash in
fib6_clean_all().

Take the event handler registration out of ndisc_init() into a
separate function ndisc_late_init() and move it after
ip6_route_init().

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
 include/net/ndisc.h |  2 ++
 net/ipv6/af_inet6.c |  6 ++++++
 net/ipv6/ndisc.c    | 18 +++++++++++-------
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 3c4211f..ea0cc26 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -190,7 +190,9 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
 }
 
 extern int			ndisc_init(void);
+extern int			ndisc_late_init(void);
 
+extern void			ndisc_late_cleanup(void);
 extern void			ndisc_cleanup(void);
 
 extern int			ndisc_rcv(struct sk_buff *skb);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 136fe55..7c96100 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -915,6 +915,9 @@ static int __init inet6_init(void)
 	err = ip6_route_init();
 	if (err)
 		goto ip6_route_fail;
+	err = ndisc_late_init();
+	if (err)
+		goto ndisc_late_fail;
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
@@ -981,6 +984,8 @@ ipv6_exthdrs_fail:
 addrconf_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
+	ndisc_late_cleanup();
+ndisc_late_fail:
 	ip6_route_cleanup();
 ip6_route_fail:
 #ifdef CONFIG_PROC_FS
@@ -1043,6 +1048,7 @@ static void __exit inet6_exit(void)
 	ipv6_exthdrs_exit();
 	addrconf_cleanup();
 	ip6_flowlabel_cleanup();
+	ndisc_late_cleanup();
 	ip6_route_cleanup();
 #ifdef CONFIG_PROC_FS
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 1217945..f8a55ff 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1727,24 +1727,28 @@ int __init ndisc_init(void)
 	if (err)
 		goto out_unregister_pernet;
 #endif
-	err = register_netdevice_notifier(&ndisc_netdev_notifier);
-	if (err)
-		goto out_unregister_sysctl;
 out:
 	return err;
 
-out_unregister_sysctl:
 #ifdef CONFIG_SYSCTL
-	neigh_sysctl_unregister(&nd_tbl.parms);
 out_unregister_pernet:
-#endif
 	unregister_pernet_subsys(&ndisc_net_ops);
 	goto out;
+#endif
 }
 
-void ndisc_cleanup(void)
+int __init ndisc_late_init(void)
+{
+	return register_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_late_cleanup(void)
 {
 	unregister_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_cleanup(void)
+{
 #ifdef CONFIG_SYSCTL
 	neigh_sysctl_unregister(&nd_tbl.parms);
 #endif
-- 
1.8.1.4

^ permalink raw reply related

* [PATCH] pch_gbe: fix unmet direct dependency on PTP_1588_CLOCK_PCH
From: Vladimir Murzin @ 2013-09-09 19:28 UTC (permalink / raw)
  To: netdev
  Cc: davem, ben, jeffrey.t.kirsher, rdunlap, haicheng.lee,
	Vladimir Murzin

While cross-building for PPC.

warning: (PCH_GBE) selects PTP_1588_CLOCK_PCH which has unmet direct
dependencies (X86 || COMPILE_TEST)

Both PCH_GBE and PPT_1588_CLOCK_PCH is only compatible with Intel
architecture. Add dependency on x86 for PCH_GBE. Keep COMPILE_TEST to allow
building on different arches.

Signed-off-by: Vladimir Murzin <murzin.v@gmail.com>
---
 drivers/net/ethernet/oki-semi/pch_gbe/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
index cb22341..4a935ec 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
@@ -4,7 +4,7 @@
 
 config PCH_GBE
 	tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
-	depends on PCI
+	depends on PCI && (x86 || COMPILE_TEST)
 	select MII
 	select PTP_1588_CLOCK_PCH
 	---help---
-- 
1.8.1.5

^ permalink raw reply related

* Re: [PATCH net] net: sctp: fix bug in sctp_poll for SOCK_SELECT_ERR_QUEUE
From: Keller, Jacob E @ 2013-09-09 19:12 UTC (permalink / raw)
  To: Vlad Yasevich
  Cc: Daniel Borkmann, davem@davemloft.net, netdev@vger.kernel.org,
	linux-sctp@vger.kernel.org
In-Reply-To: <522DD37C.5070108@gmail.com>

On Mon, 2013-09-09 at 09:56 -0400, Vlad Yasevich wrote:
> On 09/07/2013 10:44 AM, Daniel Borkmann wrote:
> > If we do not add braces around ...
> >
> >    mask |= POLLERR |
> >            sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
> >
> > ... then this condition always evaluates to true as POLLERR is
> > defined as 8 and binary or'd with whatever result comes out of
> > sock_flag(). Hence instead of (X | Y) ? A : B, transform it into
> > X | (Y ? A : B). Unfortunatelty, commit 8facd5fb73 ("net: fix
> > smatch warnings inside datagram_poll") forgot about SCTP. :-(
> >
> > Introduced by 7d4c04fc170 ("net: add option to enable error queue
> > packets waking select").
> >
> > Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
> > Cc Jacob Keller <jacob.e.keller@intel.com>
> 
> Acked-by: Vlad Yasevich <vyasevich@gmail.com>

Acked-by: Jacob Keller <jacob.e.keller@intel.com>

> -vlad
> 
> > ---
> >   net/sctp/socket.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> > index d5d5882..5462bbb 100644
> > --- a/net/sctp/socket.c
> > +++ b/net/sctp/socket.c
> > @@ -6176,7 +6176,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
> >   	/* Is there any exceptional events?  */
> >   	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
> >   		mask |= POLLERR |
> > -			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
> > +			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
> >   	if (sk->sk_shutdown & RCV_SHUTDOWN)
> >   		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
> >   	if (sk->sk_shutdown == SHUTDOWN_MASK)
> >
> 



^ permalink raw reply

* Re: bnx2x fix for stable
From: David Miller @ 2013-09-09 18:32 UTC (permalink / raw)
  To: bhutchings; +Cc: eilong, davej, netdev
In-Reply-To: <1378749027.1806.4.camel@bwh-desktop.uk.level5networks.com>

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 9 Sep 2013 18:50:27 +0100

> I think you need to either drop the bnx2x non-fix currently in your
> stable queue (commit c0a77ec74f29 'bnx2x: Add missing braces in
> bnx2x:bnx2x_link_initialize', or add the follow-up (commit 937e5c3d63a1
> 'bnx2x: Restore a call to config_init') with the net result of fixing
> indentation.

I know, I just ran into this while working on -stable submissions,
thanks Ben.

^ permalink raw reply

* [PATCH] net: tilegx driver: avoid compiler warning
From: Chris Metcalf @ 2013-09-09 18:11 UTC (permalink / raw)
  To: netdev, linux-kernel

The "id" variable was being incremented in common code, but only
initialized and used in IPv4 code.  We move the increment to the IPv4
code too, and then legitimately use the uninitialized_var() macro to
avoid the gcc 4.6 warning that 'id' may be used uninitialized.
Note that gcc 4.7 does not warn.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 drivers/net/ethernet/tile/tilegx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 949076f..13e6fff 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -1734,7 +1734,8 @@ static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
 	unsigned int data_len = skb->len - sh_len;
 	unsigned char *data = skb->data;
 	unsigned int ih_off, th_off, p_len;
-	unsigned int isum_seed, tsum_seed, id, seq;
+	unsigned int isum_seed, tsum_seed, seq;
+	unsigned int uninitialized_var(id);
 	int is_ipv6;
 	long f_id = -1;    /* id of the current fragment */
 	long f_size = skb_headlen(skb) - sh_len;  /* current fragment size */
@@ -1781,7 +1782,7 @@ static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
 		} else {
 			ih = (struct iphdr *)(buf + ih_off);
 			ih->tot_len = htons(sh_len + p_len - ih_off);
-			ih->id = htons(id);
+			ih->id = htons(id++);
 			ih->check = csum_long(isum_seed + ih->tot_len +
 					      ih->id) ^ 0xffff;
 		}
@@ -1818,7 +1819,6 @@ static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
 			slot++;
 		}
 
-		id++;
 		seq += p_len;
 
 		/* The last segment may be less than gso_size. */
-- 
1.8.3.1

^ permalink raw reply related

* bnx2x fix for stable
From: Ben Hutchings @ 2013-09-09 17:50 UTC (permalink / raw)
  To: David Miller; +Cc: Eilon Greenstein, Dave Jones, netdev

I think you need to either drop the bnx2x non-fix currently in your
stable queue (commit c0a77ec74f29 'bnx2x: Add missing braces in
bnx2x:bnx2x_link_initialize', or add the follow-up (commit 937e5c3d63a1
'bnx2x: Restore a call to config_init') with the net result of fixing
indentation.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH stable] ipv6: restrict neighbor entry creation to output flow
From: David Miller @ 2013-09-09 17:29 UTC (permalink / raw)
  To: jiri; +Cc: hannes, mleitner, netdev, dbanerje, yoshfuji
In-Reply-To: <20130909121719.GA3269@minipsycho.brq.redhat.com>

From: Jiri Pirko <jiri@resnulli.us>
Date: Mon, 9 Sep 2013 14:17:19 +0200

> When do you plan to push this to stable maintainers?

Jiri, it's in the -stable queue and that means I will send it at an
appropriate time.

I'm working on a submission at the moment, but every extra query like
your's that I have to answer takes up time I could be spending on it.

^ permalink raw reply

* Re: RFC: crash in fib6_clean_all() while loading ipv6 module
From: David Miller @ 2013-09-09 17:27 UTC (permalink / raw)
  To: mkubecek; +Cc: netdev
In-Reply-To: <20130909100515.GA16056@unicorn.suse.cz>

From: Michal Kubecek <mkubecek@suse.cz>
Date: Mon, 9 Sep 2013 12:05:15 +0200

> This could be prevented by setting a flag when ip6_route_init() is
> complete and not calling fib6_run_gc() from ndisc_netdev_event() until
> the flag is set. However, I don't like the idea of adding a test which
> will be useful only in a short window while loading ipv6 module.

Please just initialize the parts of ipv6 in the correct order necessary
to prevent this problem.

I stronly dislike using flags when it's simply an initialization
ordering problem.  It's just like registering a device interrupt
handle before the driver's data structures are properly setup.

^ permalink raw reply

* [PATCH 1/1] bridge: fix message_age_timer calculation
From: Chris Healy @ 2013-09-09 16:56 UTC (permalink / raw)
  To: Stephen Hemminger, David S. Miller; +Cc: netdev, bridge, buytenh

This changes the message_age_timer calculation to use the BPDU's max age as opposed to the local bridge's max age.  This is in accordance with section 8.6.2.3.2 Step 2 of the 802.1D-1998 sprecification.

With the current implementation, when running with very large bridge diameters, convergance will not always occur even if a root bridge is configured to have a longer max age.

Tested successfully on bridge diameters of ~200.

Signed-off-by: Chris Healy <cphealy@gmail.com>
---
 net/bridge/br_stp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1c0a50f..f1887ba 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -209,7 +209,7 @@ static void br_record_config_information(struct net_bridge_port *p,
 	p->designated_age = jiffies - bpdu->message_age;

 	mod_timer(&p->message_age_timer, jiffies
-		  + (p->br->max_age - bpdu->message_age));
+		  + (bpdu->max_age - bpdu->message_age));
 }

 /* called under bridge lock */
-- 
1.8.1.2

^ permalink raw reply related

* Re: rtnl_lock deadlock on 3.10
From: Steve Wise @ 2013-09-09 16:48 UTC (permalink / raw)
  To: Shawn Bohrer, roland-BHEL68pLQRGGvPXPguhicg
  Cc: Bart Van Assche, Shawn Bohrer, Or Gerlitz, Cong Wang,
	netdev-u79uwXL29TY76Z2rM5mHXA, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	swise-ut6Up61K2wZBDgjK7y7TUQ
In-Reply-To: <20130906231901.GB10419-/vebjAlq/uFE7V8Yqttd03bhEEblAqRIDbRjUBewulXQT0dZR+AlfA@public.gmane.org>

On 9/6/2013 6:19 PM, Shawn Bohrer wrote:
> On Thu, Sep 05, 2013 at 10:14:51AM -0500, Steve Wise wrote:
>> Roland, what do you think?
>>
>> As I've said, I think we should go ahead with using the rtnl lock in
>> the core.  Is there a complete patch available for review?  looks
>> like the original was a partial fix.
> I guess I should realize that when no one jumps at fixing my issues
> for me that they probably aren't simple to fix.  The solution that
> Cong proposed was to acquire rtnl_lock() before acquiring the
> infiniband device_mutex, and his partial patch did that in
> ib_register_client().  The problem is that you would also need to do
> that in ib_unregister_client(), ib_register_device(), and
> ib_unregister_device(), and that brings us back to the original
> problem which was that cxgb3 was holding the rtnl_lock() when it
> called ib_register_device().  Thus with the proposed fix I believe
> cxgb3 would already be holding the rtnl_lock() and then call
> ib_register_device() which would try to acquire the rtnl_lock() again
> and deadlock for a different reason.
>
> Actually how does this currently work?  ib_register_device() calls
> client->add() for each client in the list which should call
> ipoib_add_one() which calls register_netdev().  Shouldn't that also
> deadlock in the cxgb3 case?

cxgb3 is an iWARP device and doesn't support IPoIB.

>
> Also while digging through this I think I see another bug which is
> that ipoib_dev_cleanup() can be called from ipoib_add_port() but in
> the current code ipoib_add_port() is not holding the rtnl_lock() which
> appears to be a requirement of ipoib_dev_cleanup().
>
> Sigh...  I'm going to stop looking at this for now and hopefully
> someone can propose a better solution to this issue.

I can help with this, but I'm waiting for Roland to chime in.

Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next v4 1/6] bonding: simplify and use RCU protection for 3ad xmit path
From: Ding Tianhong @ 2013-09-09 14:53 UTC (permalink / raw)
  To: Veaceslav Falico
  Cc: Ding Tianhong, Nikolay Aleksandrov, David S. Miller, Netdev
In-Reply-To: <20130909095752.GC2048@redhat.com>

于 2013/9/9 17:57, Veaceslav Falico 写道:
> On Mon, Sep 09, 2013 at 04:58:29PM +0800, Ding Tianhong wrote:
>> On 2013/9/8 14:05, Ding Tianhong wrote:
>>
>> Hi Veaceslav and Nik:
>>
>> please take a moment to reveiw the function just modify for 
>> bond_XXX_rcu,
>> and give me some advice. thanks for the help again.:)
>>
>> +#define bond_first_slave_rcu(bond) \
>> + list_first_or_null_rcu(&(bond)->slave_list, struct slave, list);
>> +#define bond_last_slave_rcu(bond) \
>> + ({struct list_head *__slave_list = &(bond)->slave_list; \
>> + struct list_head __rcu *__prev = \
>> + (*((struct list_head __rcu **)(&(__slave_list)->prev)));\
>> + likely(__slave_list != __prev) ? \
>> + container_of(__prev, struct slave, list) : NULL;})
>
> Please take a look at Nikolay's reply to my RCU email -
> http://www.spinics.net/lists/netdev/msg249805.html . And mine also, to 
> his
> email. In short - RCU doesn't guarantee ->prev, so better take the 
> approach
> of eliminating bond_last/prev_slave completely.
>
yes, I see the message, the list_del_rcu will make the slave->list 
->prev = LIST_POISON2,
the bond->slave_list will not be set to the messae, the prev will point 
a slave->list or itself,
so I think it will be ok here, please correct me if I miss something.

Best Regards
Ding

>> +
>> #define bond_is_first_slave(bond, pos) ((pos)->list.prev == 
>> &(bond)->slave_list)
>> #define bond_is_last_slave(bond, pos) ((pos)->list.next == 
>> &(bond)->slave_list)
>>
>> @@ -93,6 +117,29 @@
>> (bond_is_first_slave(bond, pos) ? bond_last_slave(bond) : \
>> bond_to_slave((pos)->list.prev))
>>
>> +/* Since bond_first/last_slave_rcu can return NULL, these can return 
>> NULL too */
>> +#define bond_next_slave_rcu(bond, pos) \
>> + ({struct list_head *__slave_list = &(bond)->slave_list; \
>> + struct list_head __rcu *__next = list_next_rcu(__slave_list); \
>> + struct list_head *__pos_list = &(pos)->list; \
>> + struct list_head __rcu *__pos_next = list_next_rcu(__pos_list); \
>> + likely(__pos_next != __slave_list) ? \
>> + container_of(__pos_next, struct slave, list) : \
>> + container_of(__next, struct slave, list); \
>> + })
>
> Nice, but can be shortened - we know that pos won't go away.

OK, clean it soon.

>
>> +
>> +#define bond_prev_slave_rcu(bond, pos) \
>> + ({struct list_head *__slave_list = &(bond)->slave_list; \
>> + struct list_head __rcu *__prev = \
>> + (*((struct list_head __rcu **)(&(__slave_list)->prev)));\
>> + struct list_head *__pos_list = &(pos)->list; \
>> + struct list_head __rcu *__pos_prev = (__pos_list->prev 
>> !=LIST_POISON2) ? \
yes, the pos->list will be set to LIST_POISON2 by list_del_rcu, so I add 
a check for it, But
take the approach of eliminating bond_last/prev_slave completely is a 
wise decision, I agree.

>> + (*((struct list_head __rcu **)(&(__pos_list)->prev))) : NULL; \
>> + likely(__pos_prev != __slave_list) ? \
>> + ((__pos_prev) ? list_entry_rcu(__pos_prev, struct slave, list) : 
>> NULL;) : \
>> + (list_entry_rcu(__prev, struct slave, list)); \
>> + })
>
> Same remark as above about prev.
>
>> +
>>
>>
>> -#define bond_for_each_slave_from(bond, pos, cnt, start) \
>> - for (cnt = 0, pos = start; pos && cnt < (bond)->slave_cnt; \
>> - cnt++, pos = bond_next_slave(bond, pos))
>> -
>> +#define bond_for_each_slave_from(bond, pos, start) \
>> + for (pos = start; pos; (pos = bond_next_slave(bond, pos)) != start ? \
>> + (pos) : (pos = NULL))
>> +
>> +#define bond_for_each_slave_from_rcu(bond, pos, start) \
yes, it is a little tedious. I think it could be more easier and shorter.

>> + for ({struct list_head *__start = &(start)->list; \
>> + struct list_head *__slave_list = &(bond)->slave_list; \
>> + pos = list_entry_rcu(__start, struct slave, list);}; \
>> + pos; \
the only way to get out of the loop is that pos is NULL.
>> + {struct list_head __rcu *__next = list_next_rcu(pos->next); \
>> + __next != __slave_list ? \
>> + __next : __next = list_next_rcu(__next->next); \
first, check whether the pos->next is the last one in the slave_list, if 
it does, get the
first slave of the bond->slave_list.
>>
>> + __next != __start ? \
>> + pos = list_entry_rcu(__next, struct slave, list) : \
>> + pos = NULL; \
second, check whether the pos is reach the start, if not, continue, 
otherwise, the pos
will be set to NULL, so break the loop.
>> + })
>
> Jeez, I don't even want to review it. It's too complex and too hard to
> maintain, even if it works. Can you please make something 
> shorter/easier to
> understand?
>

Best Regards.
Ding

>> +
>>
>> Best regards
>> Ding
>>
>>
>>>> -- 
>>>> To unsubscribe from this list: send the line "unsubsc ribe netdev" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>>
>>>
>>>
>>> .
>>>
>>
>>
> -- 
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: [PATCH net] bnx2x: Fix configuration of doorbell block
From: Eric Dumazet @ 2013-09-09 14:17 UTC (permalink / raw)
  To: Ariel Elior; +Cc: David Miller, netdev, Eilon Greenstein
In-Reply-To: <1378727487-6921-1-git-send-email-ariele@broadcom.com>

On Mon, 2013-09-09 at 14:51 +0300, Ariel Elior wrote:
> As part of VF RSS feature doorbell block was configured not to use dpm, but
> a small part of configuration was left out, preventing the driver from sending
> tx messages to the device. This patch adds the missing configuration.
> 
> Reported-by: Eric Dumazet <eric.dumazet@gmil.com>
> Signed-off-by: Ariel Elior <ariele@broadcom.com>
> Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
> ---
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |    1 +
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |    3 ---
>  2 files changed, 1 insertions(+), 3 deletions(-)

Thanks for fixing this.

Tested-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [PATCH net] net: sctp: fix bug in sctp_poll for SOCK_SELECT_ERR_QUEUE
From: Vlad Yasevich @ 2013-09-09 13:56 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: davem, netdev, linux-sctp, Jacob Keller
In-Reply-To: <1378565099-20987-1-git-send-email-dborkman@redhat.com>

On 09/07/2013 10:44 AM, Daniel Borkmann wrote:
> If we do not add braces around ...
>
>    mask |= POLLERR |
>            sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
>
> ... then this condition always evaluates to true as POLLERR is
> defined as 8 and binary or'd with whatever result comes out of
> sock_flag(). Hence instead of (X | Y) ? A : B, transform it into
> X | (Y ? A : B). Unfortunatelty, commit 8facd5fb73 ("net: fix
> smatch warnings inside datagram_poll") forgot about SCTP. :-(
>
> Introduced by 7d4c04fc170 ("net: add option to enable error queue
> packets waking select").
>
> Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
> Cc Jacob Keller <jacob.e.keller@intel.com>

Acked-by: Vlad Yasevich <vyasevich@gmail.com>

-vlad

> ---
>   net/sctp/socket.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index d5d5882..5462bbb 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -6176,7 +6176,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
>   	/* Is there any exceptional events?  */
>   	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
>   		mask |= POLLERR |
> -			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
> +			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
>   	if (sk->sk_shutdown & RCV_SHUTDOWN)
>   		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
>   	if (sk->sk_shutdown == SHUTDOWN_MASK)
>

^ permalink raw reply

* Re: TSQ accounting skb->truesize degrades throughput for large packets
From: Eric Dumazet @ 2013-09-09 13:47 UTC (permalink / raw)
  To: Jason Wang
  Cc: Zoltan Kiss, Wei Liu, Jonathan Davies, Ian Campbell, netdev,
	xen-devel, Michael S. Tsirkin
In-Reply-To: <522D9466.6020205@redhat.com>

On Mon, 2013-09-09 at 17:27 +0800, Jason Wang wrote:

> Virtio-net orphan the skb in .ndo_start_xmit() so TSQ can not throttle
> packets in device accurately, and it also can't do BQL. Does this means
> TSQ should be disabled for virtio-net?
> 

If skb are orphaned, there is no way TSQ can work at all.

It is already disabled, so why do you want to disable it ?

^ permalink raw reply

* Re: [PATCH] bnx2x: avoid atomic allocations during initialization
From: Michal Schmidt @ 2013-09-09 12:20 UTC (permalink / raw)
  To: Dmitry Kravkov
  Cc: David Miller, netdev@vger.kernel.org, Ariel Elior,
	Eilon Greenstein
In-Reply-To: <504C9EFCA2D0054393414C9CB605C37F20D90E3D@SJEXCHMB06.corp.ad.broadcom.com>

On 09/07/2013 03:45 AM, Dmitry Kravkov wrote:
> Once you allocated the memory during initialization , you will most
> probably fail to allocate its replacement during RX handling (on this
> machine).

Why do you think this would happen "most probably"? I would instead
expect the VM subsystem to respond to the memory pressure created by the
initial GFP_KERNEL allocations by freeing some memory to allow the
future atomic allocations to succeed.

Michal

^ permalink raw reply

* Re: [PATCH stable] ipv6: restrict neighbor entry creation to output flow
From: Jiri Pirko @ 2013-09-09 12:17 UTC (permalink / raw)
  To: David Miller; +Cc: hannes, mleitner, netdev, dbanerje, yoshfuji
In-Reply-To: <20130815.155454.417440388188230172.davem@davemloft.net>

Fri, Aug 16, 2013 at 12:54:54AM CEST, davem@davemloft.net wrote:
>From: Hannes Frederic Sowa <hannes@stressinduktion.org>
>Date: Wed, 14 Aug 2013 17:00:54 +0200
>
>> On Wed, Aug 14, 2013 at 10:53:27AM -0300, Marcelo Ricardo Leitner wrote:
>>> This patch is based on 3.2.y branch, the one used by reported. Please let me
>>> know if it should be different. Thanks.
>>> 
>>> ---8<---
>>> 
>>> Commit 0d6a77079c475033cb622c07c5a880b392ef664e introduced a regression on
>>> which routes to local delivery would not work anymore. Like this:
>>> 
>>>     $ ip -6 route add local 2001::/64 dev lo
>>>     $ ping6 -c1 2001::9
>>>     PING 2001::9(2001::9) 56 data bytes
>>>     ping: sendmsg: Invalid argument
>>> 
>>> As this is a local delivery, that commit would not allow the creation of a
>>> neighbor entry and thus the packet cannot be sent.
>>> 
>>> But as TPROXY scenario actually needs to avoid the neighbor entry creation only
>>> for input flow, this patch now limits previous patch to input flow, keeping
>>> output as before that patch.
>>> 
>>> Reported-by: Debabrata Banerjee <dbavatar@gmail.com>
>>> Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
>>> CC: Hannes Frederic Sowa <hannes@stressinduktion.org>
>> 
>> Looks good, thanks Marcelo!
>> 
>> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>> 
>> David, this patch is for all stable kernels except the 3.10 series.
>> It does not apply cleanly throughout the whole longterm kernels but the
>> changes should not be too difficult to adapt. Do you take care of this
>> or can we do something to ease this process?
>
>I've queued it up for -stable, thanks.

Hi Dave.

When do you plan to push this to stable maintainers?

Thanks!

Jiri

^ permalink raw reply

* [PATCH net] bnx2x: Fix configuration of doorbell block
From: Ariel Elior @ 2013-09-09 11:51 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Eilon Greenstein, Eric Dumazet, Ariel Elior

As part of VF RSS feature doorbell block was configured not to use dpm, but
a small part of configuration was left out, preventing the driver from sending
tx messages to the device. This patch adds the missing configuration.

Reported-by: Eric Dumazet <eric.dumazet@gmil.com>
Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |    1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |    3 ---
 2 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 634a793..2f8dbbb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -7645,6 +7645,7 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
 
 	bnx2x_init_block(bp, BLOCK_TM, init_phase);
 	bnx2x_init_block(bp, BLOCK_DORQ, init_phase);
+	REG_WR(bp, DORQ_REG_MODE_ACT, 1); /* no dpm */
 
 	bnx2x_iov_init_dq(bp);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index b26eb83..2604b62 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1756,9 +1756,6 @@ void bnx2x_iov_init_dq(struct bnx2x *bp)
 	REG_WR(bp, DORQ_REG_VF_TYPE_MIN_MCID_0, 0);
 	REG_WR(bp, DORQ_REG_VF_TYPE_MAX_MCID_0, 0x1ffff);
 
-	/* set the number of VF allowed doorbells to the full DQ range */
-	REG_WR(bp, DORQ_REG_VF_NORM_MAX_CID_COUNT, 0x20000);
-
 	/* set the VF doorbell threshold */
 	REG_WR(bp, DORQ_REG_VF_USAGE_CT_LIMIT, 4);
 }
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH 09/16] c_can: expicit 32bit access on D_CAN to message buffer data register
From: Marc Kleine-Budde @ 2013-09-09 11:20 UTC (permalink / raw)
  To: Benedikt Spranger
  Cc: netdev, Alexander Frank, Sebastian Andrzej Siewior,
	Holger Dengler, linux-can@vger.kernel.org
In-Reply-To: <1378711513-2548-10-git-send-email-b.spranger@linutronix.de>

[-- Attachment #1: Type: text/plain, Size: 2170 bytes --]

On 09/09/2013 09:25 AM, Benedikt Spranger wrote:
> change the 16bit access of ARB1_REG and DATA1/2_REG to a 32bit access.
> 
> Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
> ---
>  drivers/net/can/c_can/c_can.c | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
> index 4b94f2d..c573399 100644
> --- a/drivers/net/can/c_can/c_can.c
> +++ b/drivers/net/can/c_can/c_can.c
> @@ -360,7 +360,6 @@ static inline void c_can_object_put(struct net_device *dev,
>  static void c_can_write_msg_object(struct net_device *dev,
>  			int iface, struct can_frame *frame, int objno)
>  {
> -	int i;
>  	u32 flags = IF_ARB_MSGVAL;
>  	unsigned int id;
>  	struct c_can_priv *priv = netdev_priv(dev);
> @@ -376,15 +375,17 @@ static void c_can_write_msg_object(struct net_device *dev,
>  
>  	id |= flags;
>  
> -	priv->write_reg(priv, C_CAN_IFACE(ARB1_REG, iface),
> -				IFX_WRITE_LOW_16BIT(id));
> -	priv->write_reg(priv, C_CAN_IFACE(ARB2_REG, iface),
> -				IFX_WRITE_HIGH_16BIT(id));
> +	c_can_writereg32(priv, C_CAN_IFACE(ARB1_REG, iface),
> +			IFX_WRITE_HIGH_16BIT(id),
> +			IFX_WRITE_LOW_16BIT(id));
> +
> +	c_can_writereg32(priv, C_CAN_IFACE(DATA1_REG, iface),
> +			 frame->data[2] | frame->data[3] << 8,
> +			 frame->data[0] | frame->data[1] << 8);

You can use something like beXX_to_cpup((__be32 *)&cf->data[0]) here.

>  
> -	for (i = 0; i < frame->can_dlc; i += 2) {
> -		priv->write_reg(priv, C_CAN_IFACE(DATA1_REG, iface) + i / 2,
> -				frame->data[i] | (frame->data[i + 1] << 8));
> -	}
> +	c_can_writereg32(priv, C_CAN_IFACE(DATA3_REG, iface),
> +			 frame->data[6] | frame->data[7] << 8,
> +			 frame->data[4] | frame->data[5] << 8);

You write here the upper 32 bit unconditionally, the original code doesn't.

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox