netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
@ 2013-03-30 15:57 Jiri Pirko
       [not found] ` <1364659034-7049-1-git-send-email-jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Jiri Pirko @ 2013-03-30 15:57 UTC (permalink / raw)
  To: netdev
  Cc: davem, edumazet, fubar, andy, kaber, stephen, jesse,
	alexander.h.duyck, xiyou.wangcong, dev, rostedt,
	nicolas.2p.debian, tglx, streeter, paulmck, ivecera

No need to have two pointers in struct netdevice for rx_handler func and
priv data. Just embed rx_handler structure into driver port_priv and
have ->func pointer there. This introduces no performance penalty,
reduces struct netdevice by one pointer and reduces number of needed
rcu_dereference calls from 2 to 1.

Note this also fixes the race bug pointed out by Steven Rostedt and
fixed by patch "[PATCH] net: add a synchronize_net() in
netdev_rx_handler_unregister()" by Eric Dumazet. This is based on
current net-next tree where the patch is not applied yet.
I can rebase it on whatever tree/state, just say so.

Smoke tested with all drivers who use rx_handler.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 drivers/net/bonding/bond_main.c |  9 +++++----
 drivers/net/bonding/bonding.h   | 16 +++++++++++-----
 drivers/net/macvlan.c           | 30 +++++++++++++++++++++---------
 drivers/net/team/team.c         | 22 +++++++++++-----------
 include/linux/if_team.h         |  1 +
 include/linux/netdevice.h       | 17 ++++++++++++-----
 net/bridge/br_if.c              |  3 ++-
 net/bridge/br_input.c           |  5 +++--
 net/bridge/br_private.h         | 26 ++++++++++++++++++--------
 net/core/dev.c                  | 24 +++++++++++++++++-------
 net/openvswitch/dp_notify.c     |  2 +-
 net/openvswitch/vport-netdev.c  | 22 +++++-----------------
 net/openvswitch/vport-netdev.h  | 16 +++++++++++++++-
 net/openvswitch/vport.h         |  2 ++
 14 files changed, 124 insertions(+), 71 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 11a8cb3..03e9895 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1440,7 +1440,8 @@ static bool bond_should_deliver_exact_match(struct sk_buff *skb,
 	return false;
 }
 
-static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
+static rx_handler_result_t
+bond_handle_frame(struct sk_buff **pskb, struct netdev_rx_handler *rx_handler)
 {
 	struct sk_buff *skb = *pskb;
 	struct slave *slave;
@@ -1455,7 +1456,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
 
 	*pskb = skb;
 
-	slave = bond_slave_get_rcu(skb->dev);
+	slave = bond_slave_get_by_rx_handler(rx_handler);
 	bond = slave->bond;
 
 	if (bond->params.arp_interval)
@@ -1880,8 +1881,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	if (res)
 		goto err_detach;
 
-	res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
-					 new_slave);
+	netdev_rx_handler_init(&new_slave->rx_handler, bond_handle_frame);
+	res = netdev_rx_handler_register(slave_dev, &new_slave->rx_handler);
 	if (res) {
 		pr_debug("Error %d calling netdev_rx_handler_register\n", res);
 		goto err_dest_symlinks;
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 2baec24..cd3ccb4 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -172,7 +172,8 @@ struct vlan_entry {
 };
 
 struct slave {
-	struct net_device *dev; /* first - useful for panic debug */
+	struct netdev_rx_handler rx_handler;
+	struct net_device *dev;
 	struct slave *next;
 	struct slave *prev;
 	struct bonding *bond; /* our master */
@@ -256,11 +257,16 @@ static inline bool bond_vlan_used(struct bonding *bond)
 	return !list_empty(&bond->vlan_list);
 }
 
-#define bond_slave_get_rcu(dev) \
-	((struct slave *) rcu_dereference(dev->rx_handler_data))
+static inline struct slave *
+bond_slave_get_by_rx_handler(const struct netdev_rx_handler *rx_handler)
+{
+	return container_of(rx_handler, struct slave, rx_handler);
+}
 
-#define bond_slave_get_rtnl(dev) \
-	((struct slave *) rtnl_dereference(dev->rx_handler_data))
+static inline struct slave *bond_slave_get_rtnl(const struct net_device *dev)
+{
+	return bond_slave_get_by_rx_handler(rtnl_dereference(dev->rx_handler));
+}
 
 /**
  * Returns NULL if the net_device does not belong to any of the bond's slaves
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 73abbc1..9a065e4 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -36,6 +36,7 @@
 #define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
 
 struct macvlan_port {
+	struct netdev_rx_handler rx_handler;
 	struct net_device	*dev;
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
 	struct list_head	vlans;
@@ -46,9 +47,17 @@ struct macvlan_port {
 
 static void macvlan_port_destroy(struct net_device *dev);
 
-#define macvlan_port_get_rcu(dev) \
-	((struct macvlan_port *) rcu_dereference(dev->rx_handler_data))
-#define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data)
+static struct macvlan_port *
+macvlan_port_get_by_rx_handler(const struct netdev_rx_handler *rx_handler)
+{
+	return container_of(rx_handler, struct macvlan_port, rx_handler);
+}
+
+static struct macvlan_port *macvlan_port_get_rtnl(const struct net_device *dev)
+{
+	return macvlan_port_get_by_rx_handler(rtnl_dereference(dev->rx_handler));
+}
+
 #define macvlan_port_exists(dev) (dev->priv_flags & IFF_MACVLAN_PORT)
 
 static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
@@ -174,7 +183,9 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
+static rx_handler_result_t
+macvlan_handle_frame(struct sk_buff **pskb,
+		     struct netdev_rx_handler *rx_handler)
 {
 	struct macvlan_port *port;
 	struct sk_buff *skb = *pskb;
@@ -185,7 +196,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 	unsigned int len = 0;
 	int ret = NET_RX_DROP;
 
-	port = macvlan_port_get_rcu(skb->dev);
+	port = macvlan_port_get_by_rx_handler(rx_handler);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		skb = ip_check_defrag(skb, IP_DEFRAG_MACVLAN);
 		if (!skb)
@@ -693,7 +704,8 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 
-	err = netdev_rx_handler_register(dev, macvlan_handle_frame, port);
+	netdev_rx_handler_init(&port->rx_handler, macvlan_handle_frame);
+	err = netdev_rx_handler_register(dev, &port->rx_handler);
 	if (err)
 		kfree(port);
 	else
@@ -703,7 +715,7 @@ static int macvlan_port_create(struct net_device *dev)
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
-	struct macvlan_port *port = macvlan_port_get(dev);
+	struct macvlan_port *port = macvlan_port_get_rtnl(dev);
 
 	dev->priv_flags &= ~IFF_MACVLAN_PORT;
 	netdev_rx_handler_unregister(dev);
@@ -772,7 +784,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		if (err < 0)
 			return err;
 	}
-	port = macvlan_port_get(lowerdev);
+	port = macvlan_port_get_rtnl(lowerdev);
 
 	/* Only 1 macvlan device can be created in passthru mode */
 	if (port->passthru)
@@ -921,7 +933,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 	if (!macvlan_port_exists(dev))
 		return NOTIFY_DONE;
 
-	port = macvlan_port_get(dev);
+	port = macvlan_port_get_rtnl(dev);
 
 	switch (event) {
 	case NETDEV_CHANGE:
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 621c1bd..86dcbf1 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -40,18 +40,17 @@
 
 #define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT)
 
-static struct team_port *team_port_get_rcu(const struct net_device *dev)
+static struct team_port *
+team_port_get_by_rx_handler(const struct netdev_rx_handler *rx_handler)
 {
-	struct team_port *port = rcu_dereference(dev->rx_handler_data);
-
-	return team_port_exists(dev) ? port : NULL;
+	return container_of(rx_handler, struct team_port, rx_handler);
 }
 
 static struct team_port *team_port_get_rtnl(const struct net_device *dev)
 {
-	struct team_port *port = rtnl_dereference(dev->rx_handler_data);
-
-	return team_port_exists(dev) ? port : NULL;
+	if (!team_port_exists(dev))
+		return NULL;
+	return team_port_get_by_rx_handler(rtnl_dereference(dev->rx_handler));
 }
 
 /*
@@ -632,7 +631,8 @@ static int team_change_mode(struct team *team, const char *kind)
  ************************/
 
 /* note: already called with rcu_read_lock */
-static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
+static rx_handler_result_t
+team_handle_frame(struct sk_buff **pskb, struct netdev_rx_handler *rx_handler)
 {
 	struct sk_buff *skb = *pskb;
 	struct team_port *port;
@@ -645,7 +645,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
 
 	*pskb = skb;
 
-	port = team_port_get_rcu(skb->dev);
+	port = team_port_get_by_rx_handler(rx_handler);
 	team = port->team;
 	if (!team_port_enabled(port)) {
 		/* allow exact match delivery for disabled ports */
@@ -1076,8 +1076,8 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_set_upper_link;
 	}
 
-	err = netdev_rx_handler_register(port_dev, team_handle_frame,
-					 port);
+	netdev_rx_handler_init(&port->rx_handler, team_handle_frame);
+	err = netdev_rx_handler_register(port_dev, &port->rx_handler);
 	if (err) {
 		netdev_err(dev, "Device %s failed to register rx_handler\n",
 			   portname);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 4474557..72fd12e 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -29,6 +29,7 @@ struct team_pcpu_stats {
 struct team;
 
 struct team_port {
+	struct netdev_rx_handler rx_handler;
 	struct net_device *dev;
 	struct hlist_node hlist; /* node in enabled ports hash list */
 	struct list_head list; /* node in ordinary list */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1dbb02c..738226e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -387,8 +387,15 @@ enum rx_handler_result {
 	RX_HANDLER_EXACT,
 	RX_HANDLER_PASS,
 };
+
 typedef enum rx_handler_result rx_handler_result_t;
-typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
+struct netdev_rx_handler;
+typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb,
+					      struct netdev_rx_handler *rx_handler);
+
+struct netdev_rx_handler {
+	rx_handler_func_t *func;
+};
 
 extern void __napi_schedule(struct napi_struct *n);
 
@@ -1208,8 +1215,7 @@ struct net_device {
 #endif
 #endif
 
-	rx_handler_func_t __rcu	*rx_handler;
-	void __rcu		*rx_handler_data;
+	struct netdev_rx_handler __rcu *rx_handler;
 
 	struct netdev_queue __rcu *ingress_queue;
 
@@ -2212,9 +2218,10 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern void netdev_rx_handler_init(struct netdev_rx_handler *rx_handler,
+				   rx_handler_func_t *func);
 extern int netdev_rx_handler_register(struct net_device *dev,
-				      rx_handler_func_t *rx_handler,
-				      void *rx_handler_data);
+				      struct netdev_rx_handler *rx_handler);
 extern void netdev_rx_handler_unregister(struct net_device *dev);
 
 extern bool		dev_valid_name(const char *name);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ef1b914..f855c07 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -370,7 +370,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (err)
 		goto err4;
 
-	err = netdev_rx_handler_register(dev, br_handle_frame, p);
+	netdev_rx_handler_init(&p->rx_handler, br_handle_frame);
+	err = netdev_rx_handler_register(dev, &p->rx_handler);
 	if (err)
 		goto err5;
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 828e2bc..a8bc5a6 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -150,7 +150,8 @@ static int br_handle_local_finish(struct sk_buff *skb)
  * Return NULL if skb is handled
  * note: already called with rcu_read_lock
  */
-rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
+rx_handler_result_t br_handle_frame(struct sk_buff **pskb,
+				    struct netdev_rx_handler *rx_handler)
 {
 	struct net_bridge_port *p;
 	struct sk_buff *skb = *pskb;
@@ -167,7 +168,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 	if (!skb)
 		return RX_HANDLER_CONSUMED;
 
-	p = br_port_get_rcu(skb->dev);
+	p = br_port_get_by_rx_handler(rx_handler);
 
 	if (unlikely(is_link_local_ether_addr(dest))) {
 		/*
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3cbf5be..727cc51 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -127,6 +127,7 @@ struct net_bridge_mdb_htable
 
 struct net_bridge_port
 {
+	struct netdev_rx_handler	rx_handler;
 	struct net_bridge		*br;
 	struct net_device		*dev;
 	struct list_head		list;
@@ -180,18 +181,26 @@ struct net_bridge_port
 
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
 
-static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
+static inline struct net_bridge_port *
+br_port_get_by_rx_handler(const struct netdev_rx_handler *rx_handler)
 {
-	struct net_bridge_port *port =
-			rcu_dereference_rtnl(dev->rx_handler_data);
+	return container_of(rx_handler, struct net_bridge_port, rx_handler);
+}
 
-	return br_port_exists(dev) ? port : NULL;
+static inline struct net_bridge_port *
+br_port_get_rcu(const struct net_device *dev)
+{
+	if (!br_port_exists(dev))
+		return NULL;
+	return br_port_get_by_rx_handler(rcu_dereference_rtnl(dev->rx_handler));
 }
 
-static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
+static inline struct net_bridge_port *
+br_port_get_rtnl(const struct net_device *dev)
 {
-	return br_port_exists(dev) ?
-		rtnl_dereference(dev->rx_handler_data) : NULL;
+	if (!br_port_exists(dev))
+		return NULL;
+	return br_port_get_by_rx_handler(rtnl_dereference(dev->rx_handler));
 }
 
 struct br_cpu_netstats {
@@ -429,7 +438,8 @@ extern netdev_features_t br_features_recompute(struct net_bridge *br,
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
+extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb,
+					   struct netdev_rx_handler *rx_handler);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2db88df..88e839c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3296,10 +3296,23 @@ out:
 #endif
 
 /**
+ *	netdev_rx_handler_init - init receive handler structure
+ *	@rx_handler: receive handler to init
+ *	@func: receive handler function
+ *
+ *	Inits receive handler structure.
+ */
+void netdev_rx_handler_init(struct netdev_rx_handler *rx_handler,
+			    rx_handler_func_t *func)
+{
+	rx_handler->func = func;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_init);
+
+/**
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
  *	@rx_handler: receive handler to register
- *	@rx_handler_data: data pointer that is used by rx handler
  *
  *	Register a receive hander for a device. This handler will then be
  *	called from __netif_receive_skb. A negative errno code is returned
@@ -3310,15 +3323,13 @@ out:
  *	For a general description of rx_handler, see enum rx_handler_result.
  */
 int netdev_rx_handler_register(struct net_device *dev,
-			       rx_handler_func_t *rx_handler,
-			       void *rx_handler_data)
+			       struct netdev_rx_handler *rx_handler)
 {
 	ASSERT_RTNL();
 
 	if (dev->rx_handler)
 		return -EBUSY;
 
-	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
 
 	return 0;
@@ -3338,7 +3349,6 @@ void netdev_rx_handler_unregister(struct net_device *dev)
 
 	ASSERT_RTNL();
 	RCU_INIT_POINTER(dev->rx_handler, NULL);
-	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
@@ -3362,7 +3372,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
 	struct packet_type *ptype, *pt_prev;
-	rx_handler_func_t *rx_handler;
+	struct netdev_rx_handler *rx_handler;
 	struct net_device *orig_dev;
 	struct net_device *null_or_dev;
 	bool deliver_exact = false;
@@ -3445,7 +3455,7 @@ ncls:
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		switch (rx_handler(&skb)) {
+		switch (rx_handler->func(&skb, rx_handler)) {
 		case RX_HANDLER_CONSUMED:
 			ret = NET_RX_SUCCESS;
 			goto unlock;
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350..5248f8e 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -32,7 +32,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
 	if (ovs_is_internal_dev(dev))
 		vport = ovs_internal_dev_get_vport(dev);
 	else
-		vport = ovs_netdev_get_vport(dev);
+		vport = ovs_netdev_get_vport_rtnl(dev);
 
 	if (!vport)
 		return NOTIFY_DONE;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2130d61..0ff6aa1 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -35,9 +35,6 @@
 /* Must be called with rcu_read_lock. */
 static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 {
-	if (unlikely(!vport))
-		goto error;
-
 	if (unlikely(skb_warn_if_lro(skb)))
 		goto error;
 
@@ -57,7 +54,8 @@ error:
 }
 
 /* Called with rcu_read_lock and bottom-halves disabled. */
-static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
+static rx_handler_result_t
+netdev_frame_hook(struct sk_buff **pskb, struct netdev_rx_handler *rx_handler)
 {
 	struct sk_buff *skb = *pskb;
 	struct vport *vport;
@@ -65,7 +63,7 @@ static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
 	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
 		return RX_HANDLER_PASS;
 
-	vport = ovs_netdev_get_vport(skb->dev);
+	vport = ovs_netdev_get_vport_by_rx_handler(rx_handler);
 
 	netdev_port_receive(vport, skb);
 
@@ -100,8 +98,8 @@ static struct vport *netdev_create(const struct vport_parms *parms)
 		goto error_put;
 	}
 
-	err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
-					 vport);
+	netdev_rx_handler_init(&vport->rx_handler, netdev_frame_hook);
+	err = netdev_rx_handler_register(netdev_vport->dev, &vport->rx_handler);
 	if (err)
 		goto error_put;
 
@@ -185,16 +183,6 @@ error:
 	return 0;
 }
 
-/* Returns null if this device is not attached to a datapath. */
-struct vport *ovs_netdev_get_vport(struct net_device *dev)
-{
-	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
-		return (struct vport *)
-			rcu_dereference_rtnl(dev->rx_handler_data);
-	else
-		return NULL;
-}
-
 const struct vport_ops ovs_netdev_vport_ops = {
 	.type		= OVS_VPORT_TYPE_NETDEV,
 	.create		= netdev_create,
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 6478079..d3f1471 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -24,7 +24,21 @@
 
 #include "vport.h"
 
-struct vport *ovs_netdev_get_vport(struct net_device *dev);
+#define ovs_netdev_vport_exists(dev) (dev->priv_flags & IFF_OVS_DATAPATH)
+
+static inline struct vport *
+ovs_netdev_get_vport_by_rx_handler(const struct netdev_rx_handler *rx_handler)
+{
+	return container_of(rx_handler, struct vport, rx_handler);
+}
+
+static inline struct vport *
+ovs_netdev_get_vport_rtnl(const struct net_device *dev)
+{
+	if (!ovs_netdev_vport_exists(dev))
+		return NULL;
+	return ovs_netdev_get_vport_by_rx_handler(rtnl_dereference(dev->rx_handler));
+}
 
 struct netdev_vport {
 	struct rcu_head rcu;
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index aee7d43..a7a07c0 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -67,6 +67,7 @@ struct vport_err_stats {
 
 /**
  * struct vport - one port within a datapath
+ * @rx_handler: RX handler structure.
  * @rcu: RCU callback head for deferred destruction.
  * @dp: Datapath to which this port belongs.
  * @upcall_portid: The Netlink port to use for packets received on this port that
@@ -80,6 +81,7 @@ struct vport_err_stats {
  * @err_stats: Points to error statistics used and maintained by vport
  */
 struct vport {
+	struct netdev_rx_handler rx_handler;
 	struct rcu_head rcu;
 	struct datapath	*dp;
 	u32 upcall_portid;
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
       [not found] ` <1364659034-7049-1-git-send-email-jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org>
@ 2013-03-30 16:23   ` Eric Dumazet
  2013-03-30 17:13     ` Jiri Pirko
  2013-03-30 19:24   ` Steven Rostedt
  1 sibling, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2013-03-30 16:23 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: alexander.h.duyck-ral2JQCrhuEAvxtiuMwx3w,
	dev-yBygre7rU0TnMu66kgdUjQ, paulmck-r/Jw6+rmf7HQT0dZR+AlfA,
	streeter-H+wXaHxf7aLQT0dZR+AlfA,
	nicolas.2p.debian-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, fubar-r/Jw6+rmf7HQT0dZR+AlfA,
	rostedt-nx8X9YLhiw1AfugRpC6u6w, kaber-dcUjhNyLwpNeoWH0uzbU5w,
	edumazet-hpIqsD4AKlfQT0dZR+AlfA, ivecera-H+wXaHxf7aLQT0dZR+AlfA,
	tglx-hfZtesqFncYOwBW4kG4KsQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	andy-QlMahl40kYEqcZcGjlUOXw

On Sat, 2013-03-30 at 16:57 +0100, Jiri Pirko wrote:
> No need to have two pointers in struct netdevice for rx_handler func and
> priv data. Just embed rx_handler structure into driver port_priv and
> have ->func pointer there. This introduces no performance penalty,
> reduces struct netdevice by one pointer and reduces number of needed
> rcu_dereference calls from 2 to 1.
> 

Thats not true.

> Note this also fixes the race bug pointed out by Steven Rostedt and
> fixed by patch "[PATCH] net: add a synchronize_net() in
> netdev_rx_handler_unregister()" by Eric Dumazet. This is based on
> current net-next tree where the patch is not applied yet.
> I can rebase it on whatever tree/state, just say so.
> 
> Smoke tested with all drivers who use rx_handler.
> 
> Reported-by: Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org>
> Signed-off-by: Jiri Pirko <jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org>
> ---

I see no value for this patch.

It obfuscates things for no good reason.

Once again rcu_dereference(dev->field) has no cost, its a memory read,
like dev->field.

I fear you don't understand enough RCU to make so invasive changes.

Your patch adds a double dereference on fast path, and its more
expensive than two single deref.

dev->rx_handler actually gets the function pointer, and after your patch
we would have to do dev->rx_handler->func instead, which is bad on many
cpus.

I'll send a patch reordering some fields of net_device, because as time
passed, it seems a lot of junk broke work done in commit
cd13539b8bc9ae884 (net: shrinks struct net_device)

offsetof(struct net_device,dev_addr)=0x258
offsetof(struct net_device,rx_handler)=0x2b8
offsetof(struct net_device,ingress_queue)=0x2c8
offsetof(struct net_device,broadcast)=0x278

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
  2013-03-30 16:23   ` Eric Dumazet
@ 2013-03-30 17:13     ` Jiri Pirko
       [not found]       ` <20130330171325.GC1544-RDzucLLXGGI88b5SBfVpbw@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Jiri Pirko @ 2013-03-30 17:13 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, davem, edumazet, fubar, andy, kaber, stephen, jesse,
	alexander.h.duyck, xiyou.wangcong, dev, rostedt,
	nicolas.2p.debian, tglx, streeter, paulmck, ivecera

Sat, Mar 30, 2013 at 05:23:08PM CET, eric.dumazet@gmail.com wrote:
>On Sat, 2013-03-30 at 16:57 +0100, Jiri Pirko wrote:
>> No need to have two pointers in struct netdevice for rx_handler func and
>> priv data. Just embed rx_handler structure into driver port_priv and
>> have ->func pointer there. This introduces no performance penalty,
>> reduces struct netdevice by one pointer and reduces number of needed
>> rcu_dereference calls from 2 to 1.
>> 
>
>Thats not true.
>
>> Note this also fixes the race bug pointed out by Steven Rostedt and
>> fixed by patch "[PATCH] net: add a synchronize_net() in
>> netdev_rx_handler_unregister()" by Eric Dumazet. This is based on
>> current net-next tree where the patch is not applied yet.
>> I can rebase it on whatever tree/state, just say so.
>> 
>> Smoke tested with all drivers who use rx_handler.
>> 
>> Reported-by: Steven Rostedt <rostedt@goodmis.org>
>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>> ---
>
>I see no value for this patch.
>
>It obfuscates things for no good reason.

Well, I like this what you call obfuscation. It looks similar to
list_head and rcu_head. I do not think that this makes the readibility
more difficult.

>
>Once again rcu_dereference(dev->field) has no cost, its a memory read,
>like dev->field.

Well, not entirely true, depends on arch.

>
>I fear you don't understand enough RCU to make so invasive changes.
>
>Your patch adds a double dereference on fast path, and its more
>expensive than two single deref.
>
>dev->rx_handler actually gets the function pointer, and after your patch
>we would have to do dev->rx_handler->func instead, which is bad on many
>cpus.

dev->rx_handler == port_priv and it is treated as such and accessed in
handle_frame driver functions. Is it really that bad to access deref
port_priv->pointer (rx_handler->func) couple of instructions earlier?
I wonder what the difference is in compare with the original code.


Thanks, Jiri

>
>I'll send a patch reordering some fields of net_device, because as time
>passed, it seems a lot of junk broke work done in commit
>cd13539b8bc9ae884 (net: shrinks struct net_device)
>
>offsetof(struct net_device,dev_addr)=0x258
>offsetof(struct net_device,rx_handler)=0x2b8
>offsetof(struct net_device,ingress_queue)=0x2c8
>offsetof(struct net_device,broadcast)=0x278
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
       [not found] ` <1364659034-7049-1-git-send-email-jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org>
  2013-03-30 16:23   ` Eric Dumazet
@ 2013-03-30 19:24   ` Steven Rostedt
       [not found]     ` <f72ccc9e-b09e-4abb-a0ce-a6516138b25c-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Steven Rostedt @ 2013-03-30 19:24 UTC (permalink / raw)
  To: Jiri Pirko, netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: alexander.h.duyck-ral2JQCrhuEAvxtiuMwx3w,
	dev-yBygre7rU0TnMu66kgdUjQ, paulmck-r/Jw6+rmf7HQT0dZR+AlfA,
	streeter-H+wXaHxf7aLQT0dZR+AlfA,
	nicolas.2p.debian-Re5JQEeQqe8AvxtiuMwx3w,
	fubar-r/Jw6+rmf7HQT0dZR+AlfA, kaber-dcUjhNyLwpNeoWH0uzbU5w,
	edumazet-hpIqsD4AKlfQT0dZR+AlfA, ivecera-H+wXaHxf7aLQT0dZR+AlfA,
	tglx-hfZtesqFncYOwBW4kG4KsQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	andy-QlMahl40kYEqcZcGjlUOXw



Jiri Pirko <jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org> wrote:

>No need to have two pointers in struct netdevice for rx_handler func
>and
>priv data. Just embed rx_handler structure into driver port_priv and
>have ->func pointer there. This introduces no performance penalty,
>reduces struct netdevice by one pointer and reduces number of needed
>rcu_dereference calls from 2 to 1.
>
>Note this also fixes the race bug pointed out by Steven Rostedt and
>fixed by patch "[PATCH] net: add a synchronize_net() in
>netdev_rx_handler_unregister()" by Eric Dumazet. This is based on
>current net-next tree where the patch is not applied yet.
>I can rebase it on whatever tree/state, just say so.
>
>Smoke tested with all drivers who use rx_handler.
>
>Reported-by: Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org>
>Signed-off-by: Jiri Pirko <jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org>
>---
 +
>+
>--- a/net/core/dev.c
>+++ b/net/core/dev.c
>@@ -3296,10 +3296,23 @@ out:
> #endif
> 
> /**
>+ *	netdev_rx_handler_init - init receive handler structure
>+ *	@rx_handler: receive handler to init
>+ *	@func: receive handler function
>+ *
>+ *	Inits receive handler structure.
>+ */
>+void netdev_rx_handler_init(struct netdev_rx_handler *rx_handler,
>+			    rx_handler_func_t *func)
>+{
>+	rx_handler->func = func;
>+}
>+EXPORT_SYMBOL_GPL(netdev_rx_handler_init);
>+
>+/**
>  *	netdev_rx_handler_register - register receive handler
>  *	@dev: device to register a handler for
>  *	@rx_handler: receive handler to register
>- *	@rx_handler_data: data pointer that is used by rx handler
>  *
>  *	Register a receive hander for a device. This handler will then be
>  *	called from __netif_receive_skb. A negative errno code is returned
>@@ -3310,15 +3323,13 @@ out:
> *	For a general description of rx_handler, see enum rx_handler_result.
>  */
> int netdev_rx_handler_register(struct net_device *dev,
>-			       rx_handler_func_t *rx_handler,
>-			       void *rx_handler_data)
>+			       struct netdev_rx_handler *rx_handler)
> {
> 	ASSERT_RTNL();
> 
> 	if (dev->rx_handler)
> 		return -EBUSY;
> 
>-	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
> 	rcu_assign_pointer(dev->rx_handler, rx_handler);
> 
> 	return 0;
>@@ -3338,7 +3349,6 @@ void netdev_rx_handler_unregister(struct
>net_device *dev)
> 
> 	ASSERT_RTNL();
> 	RCU_INIT_POINTER(dev->rx_handler, NULL);
>-	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
> }
> EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
> 
>@@ -3362,7 +3372,7 @@ static bool skb_pfmemalloc_protocol(struct
>sk_buff *skb)
>static int __netif_receive_skb_core(struct sk_buff *skb, bool
>pfmemalloc)
> {
> 	struct packet_type *ptype, *pt_prev;
>-	rx_handler_func_t *rx_handler;
>+	struct netdev_rx_handler *rx_handler;
> 	struct net_device *orig_dev;
> 	struct net_device *null_or_dev;
> 	bool deliver_exact = false;
>@@ -3445,7 +3455,7 @@ ncls:
> 			ret = deliver_skb(skb, pt_prev, orig_dev);
> 			pt_prev = NULL;
> 		}
>-		switch (rx_handler(&skb)) {
>+		switch (rx_handler->func(&skb, rx_handler)) {


This doesn't solve anything. The CPU can be executing func when you set it to null.  Then you have the same problem.  This patch shows you still don't understand the bug.  


-- Steve


> 		case RX_HANDLER_CONSUMED:
> 			ret = NET_RX_SUCCESS;
> 			goto unlock;
>diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
>index 5558350..5248f8e 100644
>--- a/net/openvswitch/dp_notify.c
>+++ b/net/openvswitch/dp_notify.c
>@@ -32,7 +32,7 @@ static int dp_device_event(struct notifier_block
>*unused, unsigned long event,
> 	if (ovs_is_internal_dev(dev))
> 		vport = ovs_internal_dev_get_vport(dev);
> 	else
>-		vport = ovs_netdev_get_vport(dev);
>+		vport = ovs_netdev_get_vport_rtnl(dev);
> 
> 	if (!vport)
> 		return NOTIFY_DONE;
>diff --git a/net/openvswitch/vport-netdev.c
>b/net/openvswitch/vport-netdev.c
>index 2130d61..0ff6aa1 100644
>--- a/net/openvswitch/vport-netdev.c
>+++ b/net/openvswitch/vport-netdev.c
>@@ -35,9 +35,6 @@
> /* Must be called with rcu_read_lock. */
>static void netdev_port_receive(struct vport *vport, struct sk_buff
>*skb)
> {
>-	if (unlikely(!vport))
>-		goto error;
>-
> 	if (unlikely(skb_warn_if_lro(skb)))
> 		goto error;
> 
>@@ -57,7 +54,8 @@ error:
> }
> 
> /* Called with rcu_read_lock and bottom-halves disabled. */
>-static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
>+static rx_handler_result_t
>+netdev_frame_hook(struct sk_buff **pskb, struct netdev_rx_handler
>*rx_handler)
> {
> 	struct sk_buff *skb = *pskb;
> 	struct vport *vport;
>@@ -65,7 +63,7 @@ static rx_handler_result_t netdev_frame_hook(struct
>sk_buff **pskb)
> 	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
> 		return RX_HANDLER_PASS;
> 
>-	vport = ovs_netdev_get_vport(skb->dev);
>+	vport = ovs_netdev_get_vport_by_rx_handler(rx_handler);
> 
> 	netdev_port_receive(vport, skb);
> 
>@@ -100,8 +98,8 @@ static struct vport *netdev_create(const struct
>vport_parms *parms)
> 		goto error_put;
> 	}
> 
>-	err = netdev_rx_handler_register(netdev_vport->dev,
>netdev_frame_hook,
>-					 vport);
>+	netdev_rx_handler_init(&vport->rx_handler, netdev_frame_hook);
>+	err = netdev_rx_handler_register(netdev_vport->dev,
>&vport->rx_handler);
> 	if (err)
> 		goto error_put;
> 
>@@ -185,16 +183,6 @@ error:
> 	return 0;
> }
> 
>-/* Returns null if this device is not attached to a datapath. */
>-struct vport *ovs_netdev_get_vport(struct net_device *dev)
>-{
>-	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
>-		return (struct vport *)
>-			rcu_dereference_rtnl(dev->rx_handler_data);
>-	else
>-		return NULL;
>-}
>-
> const struct vport_ops ovs_netdev_vport_ops = {
> 	.type		= OVS_VPORT_TYPE_NETDEV,
> 	.create		= netdev_create,
>diff --git a/net/openvswitch/vport-netdev.h
>b/net/openvswitch/vport-netdev.h
>index 6478079..d3f1471 100644
>--- a/net/openvswitch/vport-netdev.h
>+++ b/net/openvswitch/vport-netdev.h
>@@ -24,7 +24,21 @@
> 
> #include "vport.h"
> 
>-struct vport *ovs_netdev_get_vport(struct net_device *dev);
>+#define ovs_netdev_vport_exists(dev) (dev->priv_flags &
>IFF_OVS_DATAPATH)
>+
>+static inline struct vport *
>+ovs_netdev_get_vport_by_rx_handler(const struct netdev_rx_handler
>*rx_handler)
>+{
>+	return container_of(rx_handler, struct vport, rx_handler);
>+}
>+
>+static inline struct vport *
>+ovs_netdev_get_vport_rtnl(const struct net_device *dev)
>+{
>+	if (!ovs_netdev_vport_exists(dev))
>+		return NULL;
>+	return
>ovs_netdev_get_vport_by_rx_handler(rtnl_dereference(dev->rx_handler));
>+}
> 
> struct netdev_vport {
> 	struct rcu_head rcu;
>diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
>index aee7d43..a7a07c0 100644
>--- a/net/openvswitch/vport.h
>+++ b/net/openvswitch/vport.h
>@@ -67,6 +67,7 @@ struct vport_err_stats {
> 
> /**
>  * struct vport - one port within a datapath
>+ * @rx_handler: RX handler structure.
>  * @rcu: RCU callback head for deferred destruction.
>  * @dp: Datapath to which this port belongs.
>* @upcall_portid: The Netlink port to use for packets received on this
>port that
>@@ -80,6 +81,7 @@ struct vport_err_stats {
>  * @err_stats: Points to error statistics used and maintained by vport
>  */
> struct vport {
>+	struct netdev_rx_handler rx_handler;
> 	struct rcu_head rcu;
> 	struct datapath	*dp;
> 	u32 upcall_portid;

-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
       [not found]       ` <20130330171325.GC1544-RDzucLLXGGI88b5SBfVpbw@public.gmane.org>
@ 2013-03-30 19:28         ` Eric Dumazet
  2013-03-30 19:31           ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2013-03-30 19:28 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: alexander.h.duyck-ral2JQCrhuEAvxtiuMwx3w,
	dev-yBygre7rU0TnMu66kgdUjQ, paulmck-r/Jw6+rmf7HQT0dZR+AlfA,
	streeter-H+wXaHxf7aLQT0dZR+AlfA,
	nicolas.2p.debian-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, fubar-r/Jw6+rmf7HQT0dZR+AlfA,
	rostedt-nx8X9YLhiw1AfugRpC6u6w, kaber-dcUjhNyLwpNeoWH0uzbU5w,
	edumazet-hpIqsD4AKlfQT0dZR+AlfA, ivecera-H+wXaHxf7aLQT0dZR+AlfA,
	tglx-hfZtesqFncYOwBW4kG4KsQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	andy-QlMahl40kYEqcZcGjlUOXw

On Sat, 2013-03-30 at 18:13 +0100, Jiri Pirko wrote:

> Well, not entirely true, depends on arch.
> 

Are you really trying to obfuscate stack because of Alpha architecture ?

Really, a bit of stability in this code is welcome.

Lets fix existing bugs instead of possibly add new ones.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
  2013-03-30 19:28         ` Eric Dumazet
@ 2013-03-30 19:31           ` Eric Dumazet
  2013-03-30 19:32             ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2013-03-30 19:31 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: alexander.h.duyck-ral2JQCrhuEAvxtiuMwx3w,
	dev-yBygre7rU0TnMu66kgdUjQ, paulmck-r/Jw6+rmf7HQT0dZR+AlfA,
	streeter-H+wXaHxf7aLQT0dZR+AlfA,
	nicolas.2p.debian-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, fubar-r/Jw6+rmf7HQT0dZR+AlfA,
	rostedt-nx8X9YLhiw1AfugRpC6u6w, kaber-dcUjhNyLwpNeoWH0uzbU5w,
	edumazet-hpIqsD4AKlfQT0dZR+AlfA, ivecera-H+wXaHxf7aLQT0dZR+AlfA,
	tglx-hfZtesqFncYOwBW4kG4KsQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	andy-QlMahl40kYEqcZcGjlUOXw

On Sat, 2013-03-30 at 12:28 -0700, Eric Dumazet wrote:
> On Sat, 2013-03-30 at 18:13 +0100, Jiri Pirko wrote:
> 
> > Well, not entirely true, depends on arch.
> > 
> 
> Are you really trying to obfuscate stack because of Alpha architecture ?


By the way, only dev->rx_handler needs to be RCU protected.

The patch send yesterday make the second rcu_dereference() (to get
rx_handler_data) totally irrelevant.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
       [not found]     ` <f72ccc9e-b09e-4abb-a0ce-a6516138b25c-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2013-03-30 19:32       ` Jiri Pirko
       [not found]         ` <20130330193210.GD1544-RDzucLLXGGI88b5SBfVpbw@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Jiri Pirko @ 2013-03-30 19:32 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: alexander.h.duyck-ral2JQCrhuEAvxtiuMwx3w,
	dev-yBygre7rU0TnMu66kgdUjQ, paulmck-r/Jw6+rmf7HQT0dZR+AlfA,
	streeter-H+wXaHxf7aLQT0dZR+AlfA,
	nicolas.2p.debian-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, fubar-r/Jw6+rmf7HQT0dZR+AlfA,
	kaber-dcUjhNyLwpNeoWH0uzbU5w, edumazet-hpIqsD4AKlfQT0dZR+AlfA,
	ivecera-H+wXaHxf7aLQT0dZR+AlfA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, andy-QlMahl40kYEqcZcGjlUOXw

Sat, Mar 30, 2013 at 08:24:04PM CET, rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org wrote:
>
>
>Jiri Pirko <jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org> wrote:
>
>>No need to have two pointers in struct netdevice for rx_handler func
>>and
>>priv data. Just embed rx_handler structure into driver port_priv and
>>have ->func pointer there. This introduces no performance penalty,
>>reduces struct netdevice by one pointer and reduces number of needed
>>rcu_dereference calls from 2 to 1.
>>
>>Note this also fixes the race bug pointed out by Steven Rostedt and
>>fixed by patch "[PATCH] net: add a synchronize_net() in
>>netdev_rx_handler_unregister()" by Eric Dumazet. This is based on
>>current net-next tree where the patch is not applied yet.
>>I can rebase it on whatever tree/state, just say so.
>>
>>Smoke tested with all drivers who use rx_handler.
>>
>>Reported-by: Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org>
>>Signed-off-by: Jiri Pirko <jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org>
>>---
> +
>>+
>>--- a/net/core/dev.c
>>+++ b/net/core/dev.c
>>@@ -3296,10 +3296,23 @@ out:
>> #endif
>> 
>> /**
>>+ *	netdev_rx_handler_init - init receive handler structure
>>+ *	@rx_handler: receive handler to init
>>+ *	@func: receive handler function
>>+ *
>>+ *	Inits receive handler structure.
>>+ */
>>+void netdev_rx_handler_init(struct netdev_rx_handler *rx_handler,
>>+			    rx_handler_func_t *func)
>>+{
>>+	rx_handler->func = func;
>>+}
>>+EXPORT_SYMBOL_GPL(netdev_rx_handler_init);
>>+
>>+/**
>>  *	netdev_rx_handler_register - register receive handler
>>  *	@dev: device to register a handler for
>>  *	@rx_handler: receive handler to register
>>- *	@rx_handler_data: data pointer that is used by rx handler
>>  *
>>  *	Register a receive hander for a device. This handler will then be
>>  *	called from __netif_receive_skb. A negative errno code is returned
>>@@ -3310,15 +3323,13 @@ out:
>> *	For a general description of rx_handler, see enum rx_handler_result.
>>  */
>> int netdev_rx_handler_register(struct net_device *dev,
>>-			       rx_handler_func_t *rx_handler,
>>-			       void *rx_handler_data)
>>+			       struct netdev_rx_handler *rx_handler)
>> {
>> 	ASSERT_RTNL();
>> 
>> 	if (dev->rx_handler)
>> 		return -EBUSY;
>> 
>>-	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
>> 	rcu_assign_pointer(dev->rx_handler, rx_handler);
>> 
>> 	return 0;
>>@@ -3338,7 +3349,6 @@ void netdev_rx_handler_unregister(struct
>>net_device *dev)
>> 
>> 	ASSERT_RTNL();
>> 	RCU_INIT_POINTER(dev->rx_handler, NULL);
>>-	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
>> }
>> EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
>> 
>>@@ -3362,7 +3372,7 @@ static bool skb_pfmemalloc_protocol(struct
>>sk_buff *skb)
>>static int __netif_receive_skb_core(struct sk_buff *skb, bool
>>pfmemalloc)
>> {
>> 	struct packet_type *ptype, *pt_prev;
>>-	rx_handler_func_t *rx_handler;
>>+	struct netdev_rx_handler *rx_handler;
>> 	struct net_device *orig_dev;
>> 	struct net_device *null_or_dev;
>> 	bool deliver_exact = false;
>>@@ -3445,7 +3455,7 @@ ncls:
>> 			ret = deliver_skb(skb, pt_prev, orig_dev);
>> 			pt_prev = NULL;
>> 		}
>>-		switch (rx_handler(&skb)) {
>>+		switch (rx_handler->func(&skb, rx_handler)) {
>
>
>This doesn't solve anything. The CPU can be executing func when you set it to null.  Then you have the same problem.  This patch shows you still don't understand the bug.  

rx_handler->func is never set to NULL and is valid for all rx_handler (port_priv)
lifetime.

>
>
>-- Steve
>
>
>> 		case RX_HANDLER_CONSUMED:
>> 			ret = NET_RX_SUCCESS;
>> 			goto unlock;
>>diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
>>index 5558350..5248f8e 100644
>>--- a/net/openvswitch/dp_notify.c
>>+++ b/net/openvswitch/dp_notify.c
>>@@ -32,7 +32,7 @@ static int dp_device_event(struct notifier_block
>>*unused, unsigned long event,
>> 	if (ovs_is_internal_dev(dev))
>> 		vport = ovs_internal_dev_get_vport(dev);
>> 	else
>>-		vport = ovs_netdev_get_vport(dev);
>>+		vport = ovs_netdev_get_vport_rtnl(dev);
>> 
>> 	if (!vport)
>> 		return NOTIFY_DONE;
>>diff --git a/net/openvswitch/vport-netdev.c
>>b/net/openvswitch/vport-netdev.c
>>index 2130d61..0ff6aa1 100644
>>--- a/net/openvswitch/vport-netdev.c
>>+++ b/net/openvswitch/vport-netdev.c
>>@@ -35,9 +35,6 @@
>> /* Must be called with rcu_read_lock. */
>>static void netdev_port_receive(struct vport *vport, struct sk_buff
>>*skb)
>> {
>>-	if (unlikely(!vport))
>>-		goto error;
>>-
>> 	if (unlikely(skb_warn_if_lro(skb)))
>> 		goto error;
>> 
>>@@ -57,7 +54,8 @@ error:
>> }
>> 
>> /* Called with rcu_read_lock and bottom-halves disabled. */
>>-static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
>>+static rx_handler_result_t
>>+netdev_frame_hook(struct sk_buff **pskb, struct netdev_rx_handler
>>*rx_handler)
>> {
>> 	struct sk_buff *skb = *pskb;
>> 	struct vport *vport;
>>@@ -65,7 +63,7 @@ static rx_handler_result_t netdev_frame_hook(struct
>>sk_buff **pskb)
>> 	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
>> 		return RX_HANDLER_PASS;
>> 
>>-	vport = ovs_netdev_get_vport(skb->dev);
>>+	vport = ovs_netdev_get_vport_by_rx_handler(rx_handler);
>> 
>> 	netdev_port_receive(vport, skb);
>> 
>>@@ -100,8 +98,8 @@ static struct vport *netdev_create(const struct
>>vport_parms *parms)
>> 		goto error_put;
>> 	}
>> 
>>-	err = netdev_rx_handler_register(netdev_vport->dev,
>>netdev_frame_hook,
>>-					 vport);
>>+	netdev_rx_handler_init(&vport->rx_handler, netdev_frame_hook);
>>+	err = netdev_rx_handler_register(netdev_vport->dev,
>>&vport->rx_handler);
>> 	if (err)
>> 		goto error_put;
>> 
>>@@ -185,16 +183,6 @@ error:
>> 	return 0;
>> }
>> 
>>-/* Returns null if this device is not attached to a datapath. */
>>-struct vport *ovs_netdev_get_vport(struct net_device *dev)
>>-{
>>-	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
>>-		return (struct vport *)
>>-			rcu_dereference_rtnl(dev->rx_handler_data);
>>-	else
>>-		return NULL;
>>-}
>>-
>> const struct vport_ops ovs_netdev_vport_ops = {
>> 	.type		= OVS_VPORT_TYPE_NETDEV,
>> 	.create		= netdev_create,
>>diff --git a/net/openvswitch/vport-netdev.h
>>b/net/openvswitch/vport-netdev.h
>>index 6478079..d3f1471 100644
>>--- a/net/openvswitch/vport-netdev.h
>>+++ b/net/openvswitch/vport-netdev.h
>>@@ -24,7 +24,21 @@
>> 
>> #include "vport.h"
>> 
>>-struct vport *ovs_netdev_get_vport(struct net_device *dev);
>>+#define ovs_netdev_vport_exists(dev) (dev->priv_flags &
>>IFF_OVS_DATAPATH)
>>+
>>+static inline struct vport *
>>+ovs_netdev_get_vport_by_rx_handler(const struct netdev_rx_handler
>>*rx_handler)
>>+{
>>+	return container_of(rx_handler, struct vport, rx_handler);
>>+}
>>+
>>+static inline struct vport *
>>+ovs_netdev_get_vport_rtnl(const struct net_device *dev)
>>+{
>>+	if (!ovs_netdev_vport_exists(dev))
>>+		return NULL;
>>+	return
>>ovs_netdev_get_vport_by_rx_handler(rtnl_dereference(dev->rx_handler));
>>+}
>> 
>> struct netdev_vport {
>> 	struct rcu_head rcu;
>>diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
>>index aee7d43..a7a07c0 100644
>>--- a/net/openvswitch/vport.h
>>+++ b/net/openvswitch/vport.h
>>@@ -67,6 +67,7 @@ struct vport_err_stats {
>> 
>> /**
>>  * struct vport - one port within a datapath
>>+ * @rx_handler: RX handler structure.
>>  * @rcu: RCU callback head for deferred destruction.
>>  * @dp: Datapath to which this port belongs.
>>* @upcall_portid: The Netlink port to use for packets received on this
>>port that
>>@@ -80,6 +81,7 @@ struct vport_err_stats {
>>  * @err_stats: Points to error statistics used and maintained by vport
>>  */
>> struct vport {
>>+	struct netdev_rx_handler rx_handler;
>> 	struct rcu_head rcu;
>> 	struct datapath	*dp;
>> 	u32 upcall_portid;
>
>-- 
>Sent from my Android phone with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
  2013-03-30 19:31           ` Eric Dumazet
@ 2013-03-30 19:32             ` Eric Dumazet
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2013-03-30 19:32 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: alexander.h.duyck-ral2JQCrhuEAvxtiuMwx3w,
	dev-yBygre7rU0TnMu66kgdUjQ, paulmck-r/Jw6+rmf7HQT0dZR+AlfA,
	streeter-H+wXaHxf7aLQT0dZR+AlfA,
	nicolas.2p.debian-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, fubar-r/Jw6+rmf7HQT0dZR+AlfA,
	rostedt-nx8X9YLhiw1AfugRpC6u6w, kaber-dcUjhNyLwpNeoWH0uzbU5w,
	edumazet-hpIqsD4AKlfQT0dZR+AlfA, ivecera-H+wXaHxf7aLQT0dZR+AlfA,
	tglx-hfZtesqFncYOwBW4kG4KsQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	andy-QlMahl40kYEqcZcGjlUOXw

On Sat, 2013-03-30 at 12:31 -0700, Eric Dumazet wrote:

> By the way, only dev->rx_handler needs to be RCU protected.
> 
> The patch send yesterday make the second rcu_dereference() (to get
> rx_handler_data) totally irrelevant.

I'll send patch when yesterday fix is merged into net-next.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer
       [not found]         ` <20130330193210.GD1544-RDzucLLXGGI88b5SBfVpbw@public.gmane.org>
@ 2013-04-08 16:32           ` Steven Rostedt
  0 siblings, 0 replies; 9+ messages in thread
From: Steven Rostedt @ 2013-04-08 16:32 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: alexander.h.duyck-ral2JQCrhuEAvxtiuMwx3w,
	dev-yBygre7rU0TnMu66kgdUjQ, paulmck-r/Jw6+rmf7HQT0dZR+AlfA,
	streeter-H+wXaHxf7aLQT0dZR+AlfA,
	nicolas.2p.debian-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, fubar-r/Jw6+rmf7HQT0dZR+AlfA,
	kaber-dcUjhNyLwpNeoWH0uzbU5w, edumazet-hpIqsD4AKlfQT0dZR+AlfA,
	ivecera-H+wXaHxf7aLQT0dZR+AlfA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, andy-QlMahl40kYEqcZcGjlUOXw

On Sat, 2013-03-30 at 20:32 +0100, Jiri Pirko wrote:

> >This doesn't solve anything. The CPU can be executing func when you set it to null.  Then you have the same problem.  This patch shows you still don't understand the bug.  
> 
> rx_handler->func is never set to NULL and is valid for all rx_handler (port_priv)
> lifetime.
> 

Ah, you're right. I'll look deeper at this patch after reading the
thread.

This is what I get for trying to review a patch reading it on my android
phone ;-)

-- Steve

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-04-08 16:32 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-30 15:57 [patch net-next] net: squash ->rx_handler and ->rx_handler_data into single rcu pointer Jiri Pirko
     [not found] ` <1364659034-7049-1-git-send-email-jiri-rHqAuBHg3fBzbRFIqnYvSA@public.gmane.org>
2013-03-30 16:23   ` Eric Dumazet
2013-03-30 17:13     ` Jiri Pirko
     [not found]       ` <20130330171325.GC1544-RDzucLLXGGI88b5SBfVpbw@public.gmane.org>
2013-03-30 19:28         ` Eric Dumazet
2013-03-30 19:31           ` Eric Dumazet
2013-03-30 19:32             ` Eric Dumazet
2013-03-30 19:24   ` Steven Rostedt
     [not found]     ` <f72ccc9e-b09e-4abb-a0ce-a6516138b25c-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2013-03-30 19:32       ` Jiri Pirko
     [not found]         ` <20130330193210.GD1544-RDzucLLXGGI88b5SBfVpbw@public.gmane.org>
2013-04-08 16:32           ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).