netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 1/2] ip_tunnel: embed hash list head
@ 2013-08-06  5:51 Stephen Hemminger
  2013-08-06  5:53 ` [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer Stephen Hemminger
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Stephen Hemminger @ 2013-08-06  5:51 UTC (permalink / raw)
  To: Pravin B Shelar, David Miller; +Cc: netdev

The IP tunnel hash heads can be embedded in the per-net structure
since it is a fixed size. Reduce the size so that the total structure
fits in a page size. The original size was overly large, even NETDEV_HASHBITS
is only 8 bits!

Also, add some white space for readability.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>


--- a/net/ipv4/ip_tunnel.c	2013-07-20 10:25:11.207494774 -0700
+++ b/net/ipv4/ip_tunnel.c	2013-07-24 08:40:11.025668961 -0700
@@ -838,15 +838,16 @@ int ip_tunnel_init_net(struct net *net,
 {
 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
 	struct ip_tunnel_parm parms;
+	unsigned int i;
 
-	itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
-	if (!itn->tunnels)
-		return -ENOMEM;
+	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&itn->tunnels[i]);
 
 	if (!ops) {
 		itn->fb_tunnel_dev = NULL;
 		return 0;
 	}
+
 	memset(&parms, 0, sizeof(parms));
 	if (devname)
 		strlcpy(parms.name, devname, IFNAMSIZ);
@@ -854,10 +855,9 @@ int ip_tunnel_init_net(struct net *net,
 	rtnl_lock();
 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
 	rtnl_unlock();
-	if (IS_ERR(itn->fb_tunnel_dev)) {
-		kfree(itn->tunnels);
+
+	if (IS_ERR(itn->fb_tunnel_dev))
 		return PTR_ERR(itn->fb_tunnel_dev);
-	}
 
 	return 0;
 }
@@ -887,7 +887,6 @@ void ip_tunnel_delete_net(struct ip_tunn
 	ip_tunnel_destroy(itn, &list);
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
-	kfree(itn->tunnels);
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
 
--- a/include/net/ip_tunnels.h	2013-06-28 08:16:54.093005290 -0700
+++ b/include/net/ip_tunnels.h	2013-07-24 08:39:20.822260842 -0700
@@ -86,12 +86,12 @@ struct tnl_ptk_info {
 #define PACKET_RCVD	0
 #define PACKET_REJECT	1
 
-#define IP_TNL_HASH_BITS   10
+#define IP_TNL_HASH_BITS   7
 #define IP_TNL_HASH_SIZE   (1 << IP_TNL_HASH_BITS)
 
 struct ip_tunnel_net {
-	struct hlist_head *tunnels;
 	struct net_device *fb_tunnel_dev;
+	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
 };
 
 #ifdef CONFIG_INET

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer
  2013-08-06  5:51 [PATCH net-next 1/2] ip_tunnel: embed hash list head Stephen Hemminger
@ 2013-08-06  5:53 ` Stephen Hemminger
  2013-08-06 17:42   ` Pravin Shelar
  2013-08-06 17:41 ` [PATCH net-next 1/2] ip_tunnel: embed hash list head Pravin Shelar
  2013-08-07 23:48 ` David Miller
  2 siblings, 1 reply; 8+ messages in thread
From: Stephen Hemminger @ 2013-08-06  5:53 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Pravin B Shelar, David Miller, netdev

Tunnel devices should reflect the carrier state of the lower device.
I.e if carrier goes down on the lower (ethernet) device, it should
change on the tunnel as well.

This patch also adds full RFC2863 compatible state so that the
tunnel state can be controlled from user space as described in
Documentation/networking/operstats.txt

Example of usage:
ip li add tnl1 mode dormant \
  type gretap remote 172.19.20.21 local 172.16.17.18 dev eth1
ip li set dev tnl1 up
ip li set dev tnl1 state UP

In real life, this would be managed by tunnel broker, not
iproute2 shell commands.


Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

---
 include/net/ip_tunnels.h |    4 ++++
 net/ipv4/ip_gre.c        |   35 +++++++++++++++++++++++++++++++++++
 net/ipv4/ip_tunnel.c     |   41 +++++++++++++++++++++++++++++++++++++----
 net/ipv4/ip_vti.c        |   23 +++++++++++++++++++++++
 net/ipv4/ipip.c          |   22 ++++++++++++++++++++++
 5 files changed, 121 insertions(+), 4 deletions(-)

--- a/include/net/ip_tunnels.h	2013-08-05 16:49:31.965828932 -0700
+++ b/include/net/ip_tunnels.h	2013-08-05 16:49:34.001797670 -0700
@@ -41,6 +41,7 @@ struct ip_tunnel_prl_entry {
 struct ip_tunnel {
 	struct ip_tunnel __rcu	*next;
 	struct hlist_node hash_node;
+	struct hlist_node hash_dev;
 	struct net_device	*dev;
 	struct net		*net;	/* netns for packet i/o */
 
@@ -92,6 +93,7 @@ struct tnl_ptk_info {
 struct ip_tunnel_net {
 	struct net_device *fb_tunnel_dev;
 	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
+	struct hlist_head lower_dev[NETDEV_HASHENTRIES];
 };
 
 #ifdef CONFIG_INET
@@ -101,6 +103,8 @@ void ip_tunnel_uninit(struct net_device
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 		       struct rtnl_link_ops *ops, char *devname);
+void ip_tunnel_stacked_transfer(struct ip_tunnel_net *itn,
+				struct net_device *dev);
 
 void ip_tunnel_delete_net(struct ip_tunnel_net *itn);
 
--- a/net/ipv4/ip_gre.c	2013-08-05 07:58:55.521573667 -0700
+++ b/net/ipv4/ip_gre.c	2013-08-05 22:41:10.879993567 -0700
@@ -777,6 +777,32 @@ static struct pernet_operations ipgre_ta
 	.size = sizeof(struct ip_tunnel_net),
 };
 
+/* If lower device changes state, reflect that to the tunnel. */
+static int ipgre_notify(struct notifier_block *unused,
+			unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct ip_tunnel_net *itn;
+
+	if (event != NETDEV_CHANGE)
+		return NOTIFY_DONE;
+
+	if (dev->type == ARPHRD_IPGRE)
+		itn = net_generic(net, ipgre_net_id);
+	else if (dev->type == ARPHRD_ETHER)
+		itn = net_generic(net, gre_tap_net_id);
+	else
+		return NOTIFY_DONE;
+
+	ip_tunnel_stacked_transfer(itn, dev);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ipgre_notifier = {
+	.notifier_call = ipgre_notify,
+};
+
 static int __init ipgre_init(void)
 {
 	int err;
@@ -805,8 +831,14 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		goto tap_ops_failed;
 
+	err = register_netdevice_notifier(&ipgre_notifier);
+	if (err < 0)
+		goto notify_failed;
+
 	return 0;
 
+notify_failed:
+	rtnl_link_unregister(&ipgre_tap_ops);
 tap_ops_failed:
 	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
@@ -820,6 +852,7 @@ pnet_tap_faied:
 
 static void __exit ipgre_fini(void)
 {
+	unregister_netdevice_notifier(&ipgre_notifier);
 	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
 	gre_cisco_unregister(&ipgre_protocol);
--- a/net/ipv4/ip_tunnel.c	2013-08-05 16:49:31.965828932 -0700
+++ b/net/ipv4/ip_tunnel.c	2013-08-05 22:46:06.578307707 -0700
@@ -243,12 +243,32 @@ static void ip_tunnel_add(struct ip_tunn
 	struct hlist_head *head = ip_bucket(itn, &t->parms);
 
 	hlist_add_head_rcu(&t->hash_node, head);
+	if (t->parms.link) {
+		unsigned hash = t->parms.link & (NETDEV_HASHENTRIES - 1);
+		hlist_add_head(&t->hash_dev, &itn->lower_dev[hash]);
+	}
 }
 
 static void ip_tunnel_del(struct ip_tunnel *t)
 {
 	hlist_del_init_rcu(&t->hash_node);
+	hlist_del_init(&t->hash_dev);
+}
+
+void ip_tunnel_stacked_transfer(struct ip_tunnel_net *itn,
+				struct net_device *dev)
+{
+	struct ip_tunnel *t;
+	unsigned devhash = dev->ifindex & (NETDEV_HASHENTRIES - 1);
+
+	ASSERT_RTNL();
+
+	hlist_for_each_entry(t, &itn->lower_dev[devhash], hash_dev) {
+		if (t->parms.link == dev->ifindex)
+			netif_stacked_transfer_operstate(dev, t->dev);
+	}
 }
+EXPORT_SYMBOL(ip_tunnel_stacked_transfer);
 
 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
 					struct ip_tunnel_parm *parms,
@@ -310,6 +330,7 @@ static struct net_device *__ip_tunnel_cr
 	if (err)
 		goto failed_free;
 
+	linkwatch_fire_event(dev);	/* call rfc2863_policy */
 	return dev;
 
 failed_free:
@@ -334,7 +355,7 @@ static inline struct rtable *ip_route_ou
 	return ip_route_output_key(net, fl4);
 }
 
-static int ip_tunnel_bind_dev(struct net_device *dev)
+static int ip_tunnel_bind_dev(struct ip_tunnel_net *itn, struct net_device *dev)
 {
 	struct net_device *tdev = NULL;
 	struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -370,6 +391,9 @@ static int ip_tunnel_bind_dev(struct net
 	if (tdev) {
 		hlen = tdev->hard_header_len + tdev->needed_headroom;
 		mtu = tdev->mtu;
+
+		netif_stacked_transfer_operstate(tdev, dev);
+		linkwatch_fire_event(dev); /* call rfc2863_policy() */
 	}
 	dev->iflink = tunnel->parms.link;
 
@@ -395,7 +419,7 @@ static struct ip_tunnel *ip_tunnel_creat
 	if (IS_ERR(dev))
 		return NULL;
 
-	dev->mtu = ip_tunnel_bind_dev(dev);
+	dev->mtu = ip_tunnel_bind_dev(itn, dev);
 
 	nt = netdev_priv(dev);
 	ip_tunnel_add(itn, nt);
@@ -695,7 +719,7 @@ static void ip_tunnel_update(struct ip_t
 		int mtu;
 
 		t->parms.link = p->link;
-		mtu = ip_tunnel_bind_dev(dev);
+		mtu = ip_tunnel_bind_dev(itn, dev);
 		if (set_mtu)
 			dev->mtu = mtu;
 	}
@@ -914,7 +938,7 @@ int ip_tunnel_newlink(struct net_device
 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
 		eth_hw_addr_random(dev);
 
-	mtu = ip_tunnel_bind_dev(dev);
+	mtu = ip_tunnel_bind_dev(itn, dev);
 	if (!tb[IFLA_MTU])
 		dev->mtu = mtu;
 
--- a/net/ipv4/ip_vti.c	2013-08-05 07:58:55.521573667 -0700
+++ b/net/ipv4/ip_vti.c	2013-08-05 22:39:39.068790171 -0700
@@ -429,6 +429,22 @@ static struct rtnl_link_ops vti_link_ops
 	.fill_info	= vti_fill_info,
 };
 
+static int vti_notify(struct notifier_block *unused,
+		      unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct ip_tunnel_net *itn = net_generic(dev_net(dev), vti_net_id);
+
+	if (event == NETDEV_CHANGE)
+		ip_tunnel_stacked_transfer(itn, dev);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vti_notifier = {
+	.notifier_call = vti_notify,
+};
+
 static int __init vti_init(void)
 {
 	int err;
@@ -448,8 +464,14 @@ static int __init vti_init(void)
 	if (err < 0)
 		goto rtnl_link_failed;
 
+	err = register_netdevice_notifier(&vti_notifier);
+	if (err < 0)
+		goto notify_failed;
+
 	return err;
 
+notify_failed:
+	rtnl_link_unregister(&vti_link_ops);
 rtnl_link_failed:
 	xfrm4_mode_tunnel_input_deregister(&vti_handler);
 	unregister_pernet_device(&vti_net_ops);
@@ -458,6 +480,7 @@ rtnl_link_failed:
 
 static void __exit vti_fini(void)
 {
+	unregister_netdevice_notifier(&vti_notifier);
 	rtnl_link_unregister(&vti_link_ops);
 	if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
 		pr_info("vti close: can't deregister tunnel\n");
--- a/net/ipv4/ipip.c	2013-08-05 07:58:55.521573667 -0700
+++ b/net/ipv4/ipip.c	2013-08-05 22:39:05.253137638 -0700
@@ -447,6 +447,22 @@ static struct pernet_operations ipip_net
 	.size = sizeof(struct ip_tunnel_net),
 };
 
+static int ipip_notify(struct notifier_block *unused,
+		      unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct ip_tunnel_net *itn = net_generic(dev_net(dev), ipip_net_id);
+
+	if (event == NETDEV_CHANGE)
+		ip_tunnel_stacked_transfer(itn, dev);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ipip_notifier = {
+	.notifier_call = ipip_notify,
+};
+
 static int __init ipip_init(void)
 {
 	int err;
@@ -465,9 +481,14 @@ static int __init ipip_init(void)
 	if (err < 0)
 		goto rtnl_link_failed;
 
+	err = register_netdevice_notifier(&ipip_notifier);
+	if (err < 0)
+		goto notify_failed;
 out:
 	return err;
 
+notify_failed:
+	rtnl_link_unregister(&ipip_link_ops);
 rtnl_link_failed:
 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 xfrm_tunnel_failed:
@@ -477,6 +498,7 @@ xfrm_tunnel_failed:
 
 static void __exit ipip_fini(void)
 {
+	unregister_netdevice_notifier(&ipip_notifier);
 	rtnl_link_unregister(&ipip_link_ops);
 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 		pr_info("%s: can't deregister tunnel\n", __func__);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 1/2] ip_tunnel: embed hash list head
  2013-08-06  5:51 [PATCH net-next 1/2] ip_tunnel: embed hash list head Stephen Hemminger
  2013-08-06  5:53 ` [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer Stephen Hemminger
@ 2013-08-06 17:41 ` Pravin Shelar
  2013-08-07 23:48 ` David Miller
  2 siblings, 0 replies; 8+ messages in thread
From: Pravin Shelar @ 2013-08-06 17:41 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On Mon, Aug 5, 2013 at 10:51 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> The IP tunnel hash heads can be embedded in the per-net structure
> since it is a fixed size. Reduce the size so that the total structure
> fits in a page size. The original size was overly large, even NETDEV_HASHBITS
> is only 8 bits!
>
> Also, add some white space for readability.
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>
>
Looks good.
Acked-by: Pravin B Shelar <pshelar@nicira.com>.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer
  2013-08-06  5:53 ` [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer Stephen Hemminger
@ 2013-08-06 17:42   ` Pravin Shelar
  2013-08-06 17:49     ` Stephen Hemminger
  0 siblings, 1 reply; 8+ messages in thread
From: Pravin Shelar @ 2013-08-06 17:42 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On Mon, Aug 5, 2013 at 10:53 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> Tunnel devices should reflect the carrier state of the lower device.
> I.e if carrier goes down on the lower (ethernet) device, it should
> change on the tunnel as well.
>
> This patch also adds full RFC2863 compatible state so that the
> tunnel state can be controlled from user space as described in
> Documentation/networking/operstats.txt
>
> Example of usage:
> ip li add tnl1 mode dormant \
>   type gretap remote 172.19.20.21 local 172.16.17.18 dev eth1
> ip li set dev tnl1 up
> ip li set dev tnl1 state UP
>
> In real life, this would be managed by tunnel broker, not
> iproute2 shell commands.
>
>
I sent out similar patch which try to add this feature at ip_tunnel
generic layer rather than in tunnel implementation.  This way we can
share single notifier for all tunneling protocols.
Can you have something similar?
http://marc.info/?l=linux-netdev&m=135761231222711&w=2

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer
  2013-08-06 17:42   ` Pravin Shelar
@ 2013-08-06 17:49     ` Stephen Hemminger
  2013-08-06 18:07       ` Pravin Shelar
  0 siblings, 1 reply; 8+ messages in thread
From: Stephen Hemminger @ 2013-08-06 17:49 UTC (permalink / raw)
  To: Pravin Shelar; +Cc: David Miller, netdev

On Tue, 6 Aug 2013 10:42:53 -0700
Pravin Shelar <pshelar@nicira.com> wrote:

> On Mon, Aug 5, 2013 at 10:53 PM, Stephen Hemminger
> <stephen@networkplumber.org> wrote:
> > Tunnel devices should reflect the carrier state of the lower device.
> > I.e if carrier goes down on the lower (ethernet) device, it should
> > change on the tunnel as well.
> >
> > This patch also adds full RFC2863 compatible state so that the
> > tunnel state can be controlled from user space as described in
> > Documentation/networking/operstats.txt
> >
> > Example of usage:
> > ip li add tnl1 mode dormant \
> >   type gretap remote 172.19.20.21 local 172.16.17.18 dev eth1
> > ip li set dev tnl1 up
> > ip li set dev tnl1 state UP
> >
> > In real life, this would be managed by tunnel broker, not
> > iproute2 shell commands.
> >
> >
> I sent out similar patch which try to add this feature at ip_tunnel
> generic layer rather than in tunnel implementation.  This way we can
> share single notifier for all tunneling protocols.
> Can you have something similar?
> http://marc.info/?l=linux-netdev&m=135761231222711&w=2

How does that work well for case of GRE where gre and gretap
have different net namespace id's. If it can handle that, then
this is better.

Also link_map could be array (not allocated), and avoid
another layer of indirection

Also, the rfc2863 policy needs to handle CHANGE (for carrier).

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer
  2013-08-06 17:49     ` Stephen Hemminger
@ 2013-08-06 18:07       ` Pravin Shelar
  2013-08-06 23:44         ` [RFC] ip_tunnel: follow lower device state Stephen Hemminger
  0 siblings, 1 reply; 8+ messages in thread
From: Pravin Shelar @ 2013-08-06 18:07 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On Tue, Aug 6, 2013 at 10:49 AM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> On Tue, 6 Aug 2013 10:42:53 -0700
> Pravin Shelar <pshelar@nicira.com> wrote:
>
>> On Mon, Aug 5, 2013 at 10:53 PM, Stephen Hemminger
>> <stephen@networkplumber.org> wrote:
>> > Tunnel devices should reflect the carrier state of the lower device.
>> > I.e if carrier goes down on the lower (ethernet) device, it should
>> > change on the tunnel as well.
>> >
>> > This patch also adds full RFC2863 compatible state so that the
>> > tunnel state can be controlled from user space as described in
>> > Documentation/networking/operstats.txt
>> >
>> > Example of usage:
>> > ip li add tnl1 mode dormant \
>> >   type gretap remote 172.19.20.21 local 172.16.17.18 dev eth1
>> > ip li set dev tnl1 up
>> > ip li set dev tnl1 state UP
>> >
>> > In real life, this would be managed by tunnel broker, not
>> > iproute2 shell commands.
>> >
>> >
>> I sent out similar patch which try to add this feature at ip_tunnel
>> generic layer rather than in tunnel implementation.  This way we can
>> share single notifier for all tunneling protocols.
>> Can you have something similar?
>> http://marc.info/?l=linux-netdev&m=135761231222711&w=2
>
> How does that work well for case of GRE where gre and gretap
> have different net namespace id's. If it can handle that, then
> this is better.
>
ip_tunnel registers its own net namespace struct to keep track of all
tunnels from different tunneling protocols.

> Also link_map could be array (not allocated), and avoid
> another layer of indirection
>
> Also, the rfc2863 policy needs to handle CHANGE (for carrier).

that event is not handled in the patch.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC] ip_tunnel: follow lower device state
  2013-08-06 18:07       ` Pravin Shelar
@ 2013-08-06 23:44         ` Stephen Hemminger
  0 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2013-08-06 23:44 UTC (permalink / raw)
  To: Pravin Shelar; +Cc: David Miller, netdev

This is merge of Pravin's earlier patch and mine..

IP tunnels like other layered devices should propogate
carrier and state from lower device to tunnel.
Following patch would propogate link status to IPIP and
GRE devices.


---
v2 - embed link_map in per-net struct.
     no need for RCU on link map
     handle carrier (NETDEV_CHANGE) as well   

 include/net/ip_tunnels.h |    1 
 net/ipv4/ip_tunnel.c     |  108 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 107 insertions(+), 2 deletions(-)

--- a/net/ipv4/ip_tunnel.c	2013-08-06 16:28:14.000000000 -0700
+++ b/net/ipv4/ip_tunnel.c	2013-08-06 16:40:36.498464820 -0700
@@ -61,6 +61,11 @@
 #include <net/ip6_route.h>
 #endif
 
+static int tunnels_net_id;
+struct tunnels_net {
+	struct hlist_head link_map[IP_TNL_HASH_SIZE];
+};
+
 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
 				   __be32 key, __be32 remote)
 {
@@ -248,8 +253,62 @@ static void ip_tunnel_add(struct ip_tunn
 static void ip_tunnel_del(struct ip_tunnel *t)
 {
 	hlist_del_init_rcu(&t->hash_node);
+	hlist_del_init(&t->link_node);
+}
+
+static void ip_tunnel_add_link(struct net *net, struct ip_tunnel *t, int iflink)
+{
+	struct tunnels_net *tn = net_generic(net, tunnels_net_id);
+	int hash = hash_32(iflink, IP_TNL_HASH_BITS);
+
+	hlist_add_head(&t->link_node, &tn->link_map[hash]);
 }
 
+static int ip_tunnel_notify(struct notifier_block *unused,
+			    unsigned long event, void *ptr)
+{
+	struct net_device *rootdev = netdev_notifier_info_to_dev(ptr);
+	struct tunnels_net *tn = net_generic(dev_net(rootdev), tunnels_net_id);
+	int hash = hash_32(rootdev->iflink, IP_TNL_HASH_BITS);
+	struct hlist_node *n;
+	struct ip_tunnel *t;
+
+	hlist_for_each_entry_safe(t, n, &tn->link_map[hash], link_node) {
+		int flags;
+
+		if (rootdev->ifindex != t->dev->iflink)
+			continue;
+
+		switch (event) {
+		case NETDEV_CHANGE:
+			break;
+
+		case NETDEV_DOWN:
+			flags = t->dev->flags;
+			if (!(flags & IFF_UP))
+				break;
+			dev_change_flags(t->dev, flags & ~IFF_UP);
+			break;
+
+		case NETDEV_UP:
+			flags = t->dev->flags;
+			if (flags & IFF_UP)
+				break;
+			dev_change_flags(t->dev, flags | IFF_UP);
+			break;
+
+		default:
+			continue;
+		}
+		netif_stacked_transfer_operstate(rootdev, t->dev);
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ip_tunnel_notifier = {
+	.notifier_call = ip_tunnel_notify,
+};
+
 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
 					struct ip_tunnel_parm *parms,
 					int type)
@@ -370,8 +429,12 @@ static int ip_tunnel_bind_dev(struct net
 	if (tdev) {
 		hlen = tdev->hard_header_len + tdev->needed_headroom;
 		mtu = tdev->mtu;
+		netif_stacked_transfer_operstate(tdev, dev);
+		ip_tunnel_add_link(dev_net(dev), tunnel, tdev->ifindex);
+		dev->iflink = tdev->ifindex;
+	} else {
+		dev->iflink = tunnel->parms.link;
 	}
-	dev->iflink = tunnel->parms.link;
 
 	dev->needed_headroom = t_hlen + hlen;
 	mtu -= (dev->hard_header_len + t_hlen);
@@ -919,7 +982,7 @@ int ip_tunnel_newlink(struct net_device
 		dev->mtu = mtu;
 
 	ip_tunnel_add(itn, nt);
-
+	linkwatch_fire_event(dev);
 out:
 	return err;
 }
@@ -1012,4 +1075,45 @@ void ip_tunnel_setup(struct net_device *
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
 
+static int __net_init tunnels_init_net(struct net *net)
+{
+	struct tunnels_net *tn = net_generic(net, tunnels_net_id);
+	unsigned i;
+
+	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&tn->link_map[i]);
+
+	return 0;
+}
+
+static struct pernet_operations tunnels_net_ops = {
+	.init = tunnels_init_net,
+	.id   = &tunnels_net_id,
+	.size = sizeof(struct tunnels_net),
+};
+
+static int __init ip_tunnel_mod_init(void)
+{
+	int err;
+
+	pr_info("IP_Tunnel init\n");
+	err = register_pernet_device(&tunnels_net_ops);
+	if (err < 0)
+		return err;
+
+	err = register_netdevice_notifier(&ip_tunnel_notifier);
+	if (err < 0)
+		unregister_pernet_device(&tunnels_net_ops);
+
+	return err;
+}
+
+static void __exit ip_tunnel_mod_fini(void)
+{
+	unregister_netdevice_notifier(&ip_tunnel_notifier);
+	unregister_pernet_device(&tunnels_net_ops);
+}
+
+module_init(ip_tunnel_mod_init);
+module_exit(ip_tunnel_mod_fini);
 MODULE_LICENSE("GPL");
--- a/include/net/ip_tunnels.h	2013-08-06 16:28:14.358362477 -0700
+++ b/include/net/ip_tunnels.h	2013-08-06 16:39:35.719423506 -0700
@@ -41,6 +41,7 @@ struct ip_tunnel_prl_entry {
 struct ip_tunnel {
 	struct ip_tunnel __rcu	*next;
 	struct hlist_node hash_node;
+	struct hlist_node link_node;
 	struct net_device	*dev;
 	struct net		*net;	/* netns for packet i/o */
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 1/2] ip_tunnel: embed hash list head
  2013-08-06  5:51 [PATCH net-next 1/2] ip_tunnel: embed hash list head Stephen Hemminger
  2013-08-06  5:53 ` [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer Stephen Hemminger
  2013-08-06 17:41 ` [PATCH net-next 1/2] ip_tunnel: embed hash list head Pravin Shelar
@ 2013-08-07 23:48 ` David Miller
  2 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2013-08-07 23:48 UTC (permalink / raw)
  To: stephen; +Cc: pshelar, netdev

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Mon, 5 Aug 2013 22:51:37 -0700

> The IP tunnel hash heads can be embedded in the per-net structure
> since it is a fixed size. Reduce the size so that the total structure
> fits in a page size. The original size was overly large, even NETDEV_HASHBITS
> is only 8 bits!
> 
> Also, add some white space for readability.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Applied, thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-08-07 23:43 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-08-06  5:51 [PATCH net-next 1/2] ip_tunnel: embed hash list head Stephen Hemminger
2013-08-06  5:53 ` [PATCH 2/2 net-next] ip_tunnel: operstate support and link state transfer Stephen Hemminger
2013-08-06 17:42   ` Pravin Shelar
2013-08-06 17:49     ` Stephen Hemminger
2013-08-06 18:07       ` Pravin Shelar
2013-08-06 23:44         ` [RFC] ip_tunnel: follow lower device state Stephen Hemminger
2013-08-06 17:41 ` [PATCH net-next 1/2] ip_tunnel: embed hash list head Pravin Shelar
2013-08-07 23:48 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).