netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [GIT PULL nf-2.6] IPVS
@ 2011-05-02 12:47 Simon Horman
  2011-05-02 12:47 ` [PATCH 1/2] IPVS: Change of socket usage to enable name space exit Simon Horman
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Simon Horman @ 2011-05-02 12:47 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel, netfilter
  Cc: Wensong Zhang, Julian Anastasov, Patrick McHardy,
	Hans Schillstrom, Hans Schillstrom, Eric W. Biederman


Hi Patrick,

please consider pulling
git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-2.6.git master
to get the following fix from Hans. They resolve some problems related
to his netns for IPVS work which was incorporated into 2.6.39-rc1.

The pull request is based on nf-2.6/master.

There are other less-pressing changes from Hans which
I plan to get you to pull into nf-next-2.6 once these
changes make it there (presumably via net-2.6 and then net-next-2.6).

Hans Schillstrom (2):
      IPVS: Change of socket usage to enable name space exit.
      IPVS: labels at pos 0

 net/netfilter/ipvs/ip_vs_core.c |   12 ++++----
 net/netfilter/ipvs/ip_vs_ctl.c  |    8 +++---
 net/netfilter/ipvs/ip_vs_sync.c |   58 +++++++++++++++++++++++++--------------
 3 files changed, 47 insertions(+), 31 deletions(-)


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] IPVS: Change of socket usage to enable name space exit.
  2011-05-02 12:47 [GIT PULL nf-2.6] IPVS Simon Horman
@ 2011-05-02 12:47 ` Simon Horman
  2011-05-02 12:47 ` [PATCH 2/2] IPVS: labels at pos 0 Simon Horman
  2011-05-02 13:03 ` [GIT PULL nf-2.6] IPVS Simon Horman
  2 siblings, 0 replies; 6+ messages in thread
From: Simon Horman @ 2011-05-02 12:47 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel, netfilter
  Cc: Wensong Zhang, Julian Anastasov, Patrick McHardy,
	Hans Schillstrom, Hans Schillstrom, Eric W. Biederman,
	Simon Horman

From: Hans Schillstrom <hans@schillstrom.com>

If the sync daemons run in a name space while it crashes
or get killed, there is no way to stop them except for a reboot.
When all patches are there, ip_vs_core will handle register_pernet_(),
i.e. ip_vs_sync_init() and ip_vs_sync_cleanup() will be removed.

Kernel threads should not increment the use count of a socket.
By calling sk_change_net() after creating a socket this is avoided.
sock_release cant be used intead sk_release_kernel() should be used.

Thanks Eric W Biederman for your advices.

This patch is based on net-next-2.6  ver 2.6.39-rc2

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
(minor edits to description)
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c |    2 +-
 net/netfilter/ipvs/ip_vs_sync.c |   58 +++++++++++++++++++++++++--------------
 2 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 07accf6..a0791dc 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net)
 
 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }
 
 static struct pernet_operations ipvs_core_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3e7961e..0cce953 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net)
 	struct socket *sock;
 	int result;
 
-	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	/* First create a socket move it to right name space later */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net)
 
 	return sock;
 
-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }
 
@@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net)
 	int result;
 
 	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = 1;
 
@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net)
 
 	return sock;
 
-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }
 
@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data)
 		ip_vs_sync_buff_release(sb);
 
 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo);
 
 	return 0;
@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data)
 	}
 
 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo->buf);
 	kfree(tinfo);
 
@@ -1601,7 +1611,7 @@ outtinfo:
 outbuf:
 	kfree(buf);
 outsocket:
-	sock_release(sock);
+	sk_release_kernel(sock->sk);
 out:
 	return result;
 }
@@ -1610,6 +1620,7 @@ out:
 int stop_sync_thread(struct net *net, int state)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	int retc = -EINVAL;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
@@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state)
 		spin_lock_bh(&ipvs->sync_lock);
 		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
 		spin_unlock_bh(&ipvs->sync_lock);
-		kthread_stop(ipvs->master_thread);
+		retc = kthread_stop(ipvs->master_thread);
 		ipvs->master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
 		if (!ipvs->backup_thread)
@@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state)
 			task_pid_nr(ipvs->backup_thread));
 
 		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(ipvs->backup_thread);
+		retc = kthread_stop(ipvs->backup_thread);
 		ipvs->backup_thread = NULL;
-	} else {
-		return -EINVAL;
 	}
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
 
-	return 0;
+	return retc;
 }
 
 /*
@@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net)
 
 static void __ip_vs_sync_cleanup(struct net *net)
 {
-	stop_sync_thread(net, IP_VS_STATE_MASTER);
-	stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	int retc;
+
+	retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Master Daemon\n");
+
+	retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Backup Daemon\n");
 }
 
 static struct pernet_operations ipvs_sync_ops = {
@@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = {
 
 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_subsys(&ipvs_sync_ops);
+	return register_pernet_device(&ipvs_sync_ops);
 }
 
 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_sync_ops);
+	unregister_pernet_device(&ipvs_sync_ops);
 }
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] IPVS: labels at pos 0
  2011-05-02 12:47 [GIT PULL nf-2.6] IPVS Simon Horman
  2011-05-02 12:47 ` [PATCH 1/2] IPVS: Change of socket usage to enable name space exit Simon Horman
@ 2011-05-02 12:47 ` Simon Horman
  2011-05-02 13:03 ` [GIT PULL nf-2.6] IPVS Simon Horman
  2 siblings, 0 replies; 6+ messages in thread
From: Simon Horman @ 2011-05-02 12:47 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel, netfilter
  Cc: Wensong Zhang, Julian Anastasov, Patrick McHardy,
	Hans Schillstrom, Hans Schillstrom, Eric W. Biederman,
	Simon Horman

From: Hans Schillstrom <hans@schillstrom.com>

Put goto labels at the beginig of row
acording to coding style example.

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c |   10 +++++-----
 net/netfilter/ipvs/ip_vs_ctl.c  |    8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a0791dc..d536b51 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1388,7 +1388,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		verdict = NF_DROP;
 	}
 
-  out:
+out:
 	__ip_vs_conn_put(cp);
 
 	return verdict;
@@ -1955,14 +1955,14 @@ static int __init ip_vs_init(void)
 
 cleanup_sync:
 	ip_vs_sync_cleanup();
-  cleanup_conn:
+cleanup_conn:
 	ip_vs_conn_cleanup();
-  cleanup_app:
+cleanup_app:
 	ip_vs_app_cleanup();
-  cleanup_protocol:
+cleanup_protocol:
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
-  cleanup_estimator:
+cleanup_estimator:
 	ip_vs_estimator_cleanup();
 	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ae47090..a31a70c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1327,9 +1327,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 		ip_vs_bind_pe(svc, pe);
 	}
 
-  out_unlock:
+out_unlock:
 	write_unlock_bh(&__ip_vs_svc_lock);
-  out:
+out:
 	ip_vs_scheduler_put(old_sched);
 	ip_vs_pe_put(old_pe);
 	return ret;
@@ -2387,7 +2387,7 @@ __ip_vs_get_service_entries(struct net *net,
 			count++;
 		}
 	}
-  out:
+out:
 	return ret;
 }
 
@@ -2625,7 +2625,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		ret = -EINVAL;
 	}
 
-  out:
+out:
 	mutex_unlock(&__ip_vs_mutex);
 	return ret;
 }
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [GIT PULL nf-2.6] IPVS
  2011-05-02 12:47 [GIT PULL nf-2.6] IPVS Simon Horman
  2011-05-02 12:47 ` [PATCH 1/2] IPVS: Change of socket usage to enable name space exit Simon Horman
  2011-05-02 12:47 ` [PATCH 2/2] IPVS: labels at pos 0 Simon Horman
@ 2011-05-02 13:03 ` Simon Horman
  2 siblings, 0 replies; 6+ messages in thread
From: Simon Horman @ 2011-05-02 13:03 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel, netfilter
  Cc: Wensong Zhang, Julian Anastasov, Patrick McHardy,
	Hans Schillstrom, Hans Schillstrom, Eric W. Biederman

On Mon, May 02, 2011 at 09:47:25PM +0900, Simon Horman wrote:
> 
> Hi Patrick,
> 
> please consider pulling
> git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-2.6.git master
> to get the following fix from Hans. They resolve some problems related
> to his netns for IPVS work which was incorporated into 2.6.39-rc1.

Sorry, please ignore this.
The second patch is one of the non-pressing ones.
I'll fix things up and repost.

> The pull request is based on nf-2.6/master.
> 
> There are other less-pressing changes from Hans which
> I plan to get you to pull into nf-next-2.6 once these
> changes make it there (presumably via net-2.6 and then net-next-2.6).
> 
> Hans Schillstrom (2):
>       IPVS: Change of socket usage to enable name space exit.
>       IPVS: labels at pos 0
> 
>  net/netfilter/ipvs/ip_vs_core.c |   12 ++++----
>  net/netfilter/ipvs/ip_vs_ctl.c  |    8 +++---
>  net/netfilter/ipvs/ip_vs_sync.c |   58 +++++++++++++++++++++++++--------------
>  3 files changed, 47 insertions(+), 31 deletions(-)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] IPVS: Change of socket usage to enable name space exit.
  2011-05-03  7:05 [GIT PULL nf-2.6] IPVS (Take II) Simon Horman
@ 2011-05-03  7:05 ` Simon Horman
  0 siblings, 0 replies; 6+ messages in thread
From: Simon Horman @ 2011-05-03  7:05 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel, netfilter
  Cc: Wensong Zhang, Julian Anastasov, Patrick McHardy,
	Hans Schillstrom, Hans Schillstrom, Eric W. Biederman,
	Simon Horman

From: Hans Schillstrom <hans@schillstrom.com>

If the sync daemons run in a name space while it crashes
or get killed, there is no way to stop them except for a reboot.
When all patches are there, ip_vs_core will handle register_pernet_(),
i.e. ip_vs_sync_init() and ip_vs_sync_cleanup() will be removed.

Kernel threads should not increment the use count of a socket.
By calling sk_change_net() after creating a socket this is avoided.
sock_release cant be used intead sk_release_kernel() should be used.

Thanks Eric W Biederman for your advices.

This patch is based on net-next-2.6  ver 2.6.39-rc2

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
[minor edits to description]
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c |    2 +-
 net/netfilter/ipvs/ip_vs_sync.c |   58 +++++++++++++++++++++++++--------------
 2 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 07accf6..a0791dc 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net)
 
 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }
 
 static struct pernet_operations ipvs_core_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3e7961e..0cce953 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net)
 	struct socket *sock;
 	int result;
 
-	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	/* First create a socket move it to right name space later */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net)
 
 	return sock;
 
-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }
 
@@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net)
 	int result;
 
 	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = 1;
 
@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net)
 
 	return sock;
 
-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }
 
@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data)
 		ip_vs_sync_buff_release(sb);
 
 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo);
 
 	return 0;
@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data)
 	}
 
 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo->buf);
 	kfree(tinfo);
 
@@ -1601,7 +1611,7 @@ outtinfo:
 outbuf:
 	kfree(buf);
 outsocket:
-	sock_release(sock);
+	sk_release_kernel(sock->sk);
 out:
 	return result;
 }
@@ -1610,6 +1620,7 @@ out:
 int stop_sync_thread(struct net *net, int state)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	int retc = -EINVAL;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
@@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state)
 		spin_lock_bh(&ipvs->sync_lock);
 		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
 		spin_unlock_bh(&ipvs->sync_lock);
-		kthread_stop(ipvs->master_thread);
+		retc = kthread_stop(ipvs->master_thread);
 		ipvs->master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
 		if (!ipvs->backup_thread)
@@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state)
 			task_pid_nr(ipvs->backup_thread));
 
 		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(ipvs->backup_thread);
+		retc = kthread_stop(ipvs->backup_thread);
 		ipvs->backup_thread = NULL;
-	} else {
-		return -EINVAL;
 	}
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
 
-	return 0;
+	return retc;
 }
 
 /*
@@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net)
 
 static void __ip_vs_sync_cleanup(struct net *net)
 {
-	stop_sync_thread(net, IP_VS_STATE_MASTER);
-	stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	int retc;
+
+	retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Master Daemon\n");
+
+	retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Backup Daemon\n");
 }
 
 static struct pernet_operations ipvs_sync_ops = {
@@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = {
 
 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_subsys(&ipvs_sync_ops);
+	return register_pernet_device(&ipvs_sync_ops);
 }
 
 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_sync_ops);
+	unregister_pernet_device(&ipvs_sync_ops);
 }
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 1/2] IPVS: Change of socket usage to enable name space exit.
  2011-05-08 23:17 [GIT PULL nf-2.6] IPVS (take III) Simon Horman
@ 2011-05-08 23:17 ` Simon Horman
  0 siblings, 0 replies; 6+ messages in thread
From: Simon Horman @ 2011-05-08 23:17 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel, netfilter
  Cc: Wensong Zhang, Julian Anastasov, Patrick McHardy,
	Hans Schillstrom, Hans Schillstrom, Eric W. Biederman,
	Simon Horman

From: Hans Schillstrom <hans@schillstrom.com>

If the sync daemons run in a name space while it crashes
or get killed, there is no way to stop them except for a reboot.
When all patches are there, ip_vs_core will handle register_pernet_(),
i.e. ip_vs_sync_init() and ip_vs_sync_cleanup() will be removed.

Kernel threads should not increment the use count of a socket.
By calling sk_change_net() after creating a socket this is avoided.
sock_release cant be used intead sk_release_kernel() should be used.

Thanks Eric W Biederman for your advices.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
[horms@verge.net.au: minor edit to changelog]
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c |    2 +-
 net/netfilter/ipvs/ip_vs_sync.c |   58 +++++++++++++++++++++++++--------------
 2 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 07accf6..a0791dc 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net)
 
 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }
 
 static struct pernet_operations ipvs_core_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3e7961e..0cce953 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net)
 	struct socket *sock;
 	int result;
 
-	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	/* First create a socket move it to right name space later */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net)
 
 	return sock;
 
-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }
 
@@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net)
 	int result;
 
 	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = 1;
 
@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net)
 
 	return sock;
 
-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }
 
@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data)
 		ip_vs_sync_buff_release(sb);
 
 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo);
 
 	return 0;
@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data)
 	}
 
 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo->buf);
 	kfree(tinfo);
 
@@ -1601,7 +1611,7 @@ outtinfo:
 outbuf:
 	kfree(buf);
 outsocket:
-	sock_release(sock);
+	sk_release_kernel(sock->sk);
 out:
 	return result;
 }
@@ -1610,6 +1620,7 @@ out:
 int stop_sync_thread(struct net *net, int state)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	int retc = -EINVAL;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
@@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state)
 		spin_lock_bh(&ipvs->sync_lock);
 		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
 		spin_unlock_bh(&ipvs->sync_lock);
-		kthread_stop(ipvs->master_thread);
+		retc = kthread_stop(ipvs->master_thread);
 		ipvs->master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
 		if (!ipvs->backup_thread)
@@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state)
 			task_pid_nr(ipvs->backup_thread));
 
 		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(ipvs->backup_thread);
+		retc = kthread_stop(ipvs->backup_thread);
 		ipvs->backup_thread = NULL;
-	} else {
-		return -EINVAL;
 	}
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
 
-	return 0;
+	return retc;
 }
 
 /*
@@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net)
 
 static void __ip_vs_sync_cleanup(struct net *net)
 {
-	stop_sync_thread(net, IP_VS_STATE_MASTER);
-	stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	int retc;
+
+	retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Master Daemon\n");
+
+	retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Backup Daemon\n");
 }
 
 static struct pernet_operations ipvs_sync_ops = {
@@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = {
 
 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_subsys(&ipvs_sync_ops);
+	return register_pernet_device(&ipvs_sync_ops);
 }
 
 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_sync_ops);
+	unregister_pernet_device(&ipvs_sync_ops);
 }
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-05-08 23:17 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-05-02 12:47 [GIT PULL nf-2.6] IPVS Simon Horman
2011-05-02 12:47 ` [PATCH 1/2] IPVS: Change of socket usage to enable name space exit Simon Horman
2011-05-02 12:47 ` [PATCH 2/2] IPVS: labels at pos 0 Simon Horman
2011-05-02 13:03 ` [GIT PULL nf-2.6] IPVS Simon Horman
  -- strict thread matches above, loose matches on Subject: below --
2011-05-03  7:05 [GIT PULL nf-2.6] IPVS (Take II) Simon Horman
2011-05-03  7:05 ` [PATCH 1/2] IPVS: Change of socket usage to enable name space exit Simon Horman
2011-05-08 23:17 [GIT PULL nf-2.6] IPVS (take III) Simon Horman
2011-05-08 23:17 ` [PATCH 1/2] IPVS: Change of socket usage to enable name space exit Simon Horman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).