Netdev List
 help / color / mirror / Atom feed
* Re: [Bugme-new] [Bug 19692] New: linux-2.6.36-rc5 crash with gianfar ethernet at full line rate traffic
From: Jarek Poplawski @ 2010-10-08  9:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: netdev, bugzilla-daemon, bugme-daemon, eminak71, Anton Vorontsov
In-Reply-To: <20101004135310.6a5f8e93.akpm@linux-foundation.org>

Andrew Morton wrote:
> (switched to email.  Please respond via emailed reply-to-all, not via the
> bugzilla web interface).
> 
> On Mon, 4 Oct 2010 06:25:14 GMT
> bugzilla-daemon@bugzilla.kernel.org wrote:
> 
>> https://bugzilla.kernel.org/show_bug.cgi?id=19692
>>
>>            Summary: linux-2.6.36-rc5 crash with gianfar ethernet at full
>>                     line rate traffic
...

Emin, until there is something better I hope you could try this patch.
(not tested nor compiled)

Thanks,
Jarek P.
---
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 4f7c3f3..db47b55 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -2515,7 +2515,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 				skb_recycle_check(skb, priv->rx_buffer_size +
 					RXBUF_ALIGNMENT)) {
 			gfar_align_skb(skb);
-			__skb_queue_head(&priv->rx_recycle, skb);
+			skb_queue_head(&priv->rx_recycle, skb);
 		} else
 			dev_kfree_skb_any(skb);
 
@@ -2598,7 +2598,7 @@ struct sk_buff * gfar_new_skb(struct net_device *dev)
 	struct gfar_private *priv = netdev_priv(dev);
 	struct sk_buff *skb = NULL;
 
-	skb = __skb_dequeue(&priv->rx_recycle);
+	skb = skb_dequeue(&priv->rx_recycle);
 	if (!skb)
 		skb = gfar_alloc_skb(dev);
 
@@ -2754,7 +2754,7 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
 			if (unlikely(!newskb))
 				newskb = skb;
 			else if (skb)
-				__skb_queue_head(&priv->rx_recycle, skb);
+				skb_queue_head(&priv->rx_recycle, skb);
 		} else {
 			/* Increment the number of packets */
 			rx_queue->stats.rx_packets++;

^ permalink raw reply related

* [PATCH] net/tg3: simplify conditional
From: Nicolas Kaiser @ 2010-10-08  9:29 UTC (permalink / raw)
  To: Matt Carlson; +Cc: Michael Chan, netdev, linux-kernel

Simplify: ((a && !b) || (!a && b)) => (a != b)

Signed-off-by: Nicolas Kaiser <nikai@nikai.net>
---
 drivers/net/tg3.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 16e1a95..714f0fb 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -9967,8 +9967,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 
 		if (!(phydev->supported & SUPPORTED_Pause) ||
 		    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-		     ((epause->rx_pause && !epause->tx_pause) ||
-		      (!epause->rx_pause && epause->tx_pause))))
+		     (epause->rx_pause != epause->tx_pause)))
 			return -EINVAL;
 
 		tp->link_config.flowctrl = 0;
-- 
1.7.2.2

^ permalink raw reply related

* [PATCH NEXT 2/7] qlcnic: support quiescent mode
From: Amit Kumar Salecha @ 2010-10-08  9:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, anirban.chakraborty
In-Reply-To: <1286531171-21173-1-git-send-email-amit.salecha@qlogic.com>

Put device in quiescent mode during internal loopback test.
Before running test, set state to NEED_QUISCENT. After getting
ack from all function, change state to QUISCENT and perform test.

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h         |    2 +
 drivers/net/qlcnic/qlcnic_ethtool.c |    6 +++
 drivers/net/qlcnic/qlcnic_main.c    |   83 +++++++++++++++++++++++++++-------
 3 files changed, 74 insertions(+), 17 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index a1fabdc..6909cfc 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -1313,6 +1313,8 @@ int qlcnic_set_ilb_mode(struct qlcnic_adapter *adapter);
 void qlcnic_fetch_mac(struct qlcnic_adapter *, u32, u32, u8, u8 *);
 
 /* Functions from qlcnic_main.c */
+int qlcnic_request_quiscent_mode(struct qlcnic_adapter *adapter);
+void qlcnic_clear_quiscent_mode(struct qlcnic_adapter *adapter);
 int qlcnic_reset_context(struct qlcnic_adapter *);
 u32 qlcnic_issue_cmd(struct qlcnic_adapter *adapter,
 	u32 pci_fn, u32 version, u32 arg1, u32 arg2, u32 arg3, u32 cmd);
diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
index 6a76014..0181301 100644
--- a/drivers/net/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/qlcnic/qlcnic_ethtool.c
@@ -706,6 +706,11 @@ static int qlcnic_loopback_test(struct net_device *netdev)
 	if (test_and_set_bit(__QLCNIC_RESETTING, &adapter->state))
 		return -EIO;
 
+	if (qlcnic_request_quiscent_mode(adapter)) {
+		clear_bit(__QLCNIC_RESETTING, &adapter->state);
+		return -EIO;
+	}
+
 	ret = qlcnic_diag_alloc_res(netdev, QLCNIC_LOOPBACK_TEST);
 	if (ret)
 		goto clear_it;
@@ -722,6 +727,7 @@ done:
 	qlcnic_diag_free_res(netdev, max_sds_rings);
 
 clear_it:
+	qlcnic_clear_quiscent_mode(adapter);
 	adapter->max_sds_rings = max_sds_rings;
 	clear_bit(__QLCNIC_RESETTING, &adapter->state);
 	return ret;
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 7503c48..9b0acfb 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -2712,7 +2712,8 @@ qlcnic_fwinit_work(struct work_struct *work)
 		goto err_ret;
 
 	dev_state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
-	if (dev_state ==  QLCNIC_DEV_QUISCENT) {
+	if (dev_state == QLCNIC_DEV_QUISCENT ||
+	    dev_state == QLCNIC_DEV_NEED_QUISCENT) {
 		qlcnic_api_unlock(adapter);
 		qlcnic_schedule_work(adapter, qlcnic_fwinit_work,
 						FW_POLL_DELAY * 2);
@@ -2734,18 +2735,6 @@ qlcnic_fwinit_work(struct work_struct *work)
 skip_ack_check:
 		dev_state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
 
-		if (dev_state == QLCNIC_DEV_NEED_QUISCENT) {
-			QLCWR32(adapter, QLCNIC_CRB_DEV_STATE,
-						QLCNIC_DEV_QUISCENT);
-			qlcnic_schedule_work(adapter, qlcnic_fwinit_work,
-						FW_POLL_DELAY * 2);
-			QLCDB(adapter, DRV, "Quiscing the driver\n");
-			qlcnic_idc_debug_info(adapter, 0);
-
-			qlcnic_api_unlock(adapter);
-			return;
-		}
-
 		if (dev_state == QLCNIC_DEV_NEED_RESET) {
 			QLCWR32(adapter, QLCNIC_CRB_DEV_STATE,
 						QLCNIC_DEV_INITIALIZING);
@@ -2802,7 +2791,12 @@ qlcnic_detach_work(struct work_struct *work)
 
 	netif_device_detach(netdev);
 
-	qlcnic_down(adapter, netdev);
+	/* Dont grab rtnl lock during Quiscent mode */
+	if (adapter->dev_state == QLCNIC_DEV_NEED_QUISCENT) {
+		if (netif_running(netdev))
+			__qlcnic_down(adapter, netdev);
+	} else
+		qlcnic_down(adapter, netdev);
 
 	status = QLCRD32(adapter, QLCNIC_PEG_HALT_STATUS1);
 
@@ -2844,6 +2838,61 @@ qlcnic_set_npar_non_operational(struct qlcnic_adapter *adapter)
 	qlcnic_api_unlock(adapter);
 }
 
+/* Caller should held RESETTING bit.
+ * This should be call in sync with qlcnic_request_quiscent_mode.
+ */
+void qlcnic_clear_quiscent_mode(struct qlcnic_adapter *adapter)
+{
+	qlcnic_clr_drv_state(adapter);
+	qlcnic_api_lock(adapter);
+	QLCWR32(adapter, QLCNIC_CRB_DEV_STATE, QLCNIC_DEV_READY);
+	qlcnic_api_unlock(adapter);
+}
+
+/* Caller should held RESETTING bit.
+ */
+int qlcnic_request_quiscent_mode(struct qlcnic_adapter *adapter)
+{
+	u8 timeo = adapter->dev_init_timeo / 2;
+	u32 state;
+
+	if (qlcnic_api_lock(adapter))
+		return -EIO;
+
+	state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
+	if (state != QLCNIC_DEV_READY)
+		return -EIO;
+
+	QLCWR32(adapter, QLCNIC_CRB_DEV_STATE, QLCNIC_DEV_NEED_QUISCENT);
+	qlcnic_api_unlock(adapter);
+	QLCDB(adapter, DRV, "NEED QUISCENT state set\n");
+	qlcnic_idc_debug_info(adapter, 0);
+
+	qlcnic_set_drv_state(adapter, QLCNIC_DEV_NEED_QUISCENT);
+
+	do {
+		msleep(2000);
+		state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
+		if (state == QLCNIC_DEV_QUISCENT)
+			return 0;
+		if (!qlcnic_check_drv_state(adapter)) {
+			if (qlcnic_api_lock(adapter))
+				return -EIO;
+			QLCWR32(adapter, QLCNIC_CRB_DEV_STATE,
+							QLCNIC_DEV_QUISCENT);
+			qlcnic_api_unlock(adapter);
+			QLCDB(adapter, DRV, "QUISCENT mode set\n");
+			return 0;
+		}
+	} while (--timeo);
+
+	dev_err(&adapter->pdev->dev, "Failed to quiesce device, DRV_STATE=%08x"
+		" DRV_ACTIVE=%08x\n", QLCRD32(adapter, QLCNIC_CRB_DRV_STATE),
+		QLCRD32(adapter, QLCNIC_CRB_DRV_ACTIVE));
+	qlcnic_clear_quiscent_mode(adapter);
+	return -EIO;
+}
+
 /*Transit to RESET state from READY state only */
 static void
 qlcnic_dev_request_reset(struct qlcnic_adapter *adapter)
@@ -2951,11 +3000,11 @@ qlcnic_check_health(struct qlcnic_adapter *adapter)
 		qlcnic_dev_request_reset(adapter);
 
 	state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
-	if (state == QLCNIC_DEV_NEED_RESET ||
-	    state == QLCNIC_DEV_NEED_QUISCENT) {
+	if (state == QLCNIC_DEV_NEED_RESET) {
 		qlcnic_set_npar_non_operational(adapter);
 		adapter->need_fw_reset = 1;
-	}
+	} else if (state == QLCNIC_DEV_NEED_QUISCENT)
+		goto detach;
 
 	heartbeat = QLCRD32(adapter, QLCNIC_PEG_ALIVE_COUNTER);
 	if (heartbeat != adapter->heartbeat) {
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 3/7] qlcnic: remove private LRO flag
From: Amit Kumar Salecha @ 2010-10-08  9:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, anirban.chakraborty
In-Reply-To: <1286531171-21173-1-git-send-email-amit.salecha@qlogic.com>

LRO was not getting enable after interface down/up.

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic_ethtool.c |    4 ++--
 drivers/net/qlcnic/qlcnic_hw.c      |    5 -----
 drivers/net/qlcnic/qlcnic_main.c    |    2 --
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
index 0181301..e07adb1 100644
--- a/drivers/net/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/qlcnic/qlcnic_ethtool.c
@@ -865,7 +865,7 @@ static int qlcnic_set_rx_csum(struct net_device *dev, u32 data)
 		return 0;
 	}
 
-	if (adapter->flags & QLCNIC_LRO_ENABLED) {
+	if (dev->features & NETIF_F_LRO) {
 		if (qlcnic_config_hw_lro(adapter, QLCNIC_LRO_DISABLED))
 			return -EIO;
 
@@ -1062,7 +1062,7 @@ static int qlcnic_set_flags(struct net_device *netdev, u32 data)
 		return -EINVAL;
 	}
 
-	if ((data & ETH_FLAG_LRO) && (adapter->flags & QLCNIC_LRO_ENABLED))
+	if ((data & ETH_FLAG_LRO) && (netdev->features & NETIF_F_LRO))
 		return 0;
 
 	if (data & ETH_FLAG_LRO) {
diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index 9d3e16d..7f1f9b4 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -582,9 +582,6 @@ int qlcnic_config_hw_lro(struct qlcnic_adapter *adapter, int enable)
 	u64 word;
 	int rv;
 
-	if ((adapter->flags & QLCNIC_LRO_ENABLED) == enable)
-		return 0;
-
 	memset(&req, 0, sizeof(struct qlcnic_nic_req));
 
 	req.qhdr = cpu_to_le64(QLCNIC_HOST_REQUEST << 23);
@@ -599,8 +596,6 @@ int qlcnic_config_hw_lro(struct qlcnic_adapter *adapter, int enable)
 		dev_err(&adapter->netdev->dev,
 			"Could not send configure hw lro request\n");
 
-	adapter->flags ^= QLCNIC_LRO_ENABLED;
-
 	return rv;
 }
 
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 9b0acfb..e3c1b80 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -657,8 +657,6 @@ qlcnic_check_options(struct qlcnic_adapter *adapter)
 	dev_info(&pdev->dev, "firmware v%d.%d.%d\n",
 			fw_major, fw_minor, fw_build);
 
-	adapter->flags &= ~QLCNIC_LRO_ENABLED;
-
 	if (adapter->ahw.port_type == QLCNIC_XGBE) {
 		adapter->num_rxd = DEFAULT_RCV_DESCRIPTORS_10G;
 		adapter->num_jumbo_rxd = MAX_JUMBO_RCV_DESCRIPTORS_10G;
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 1/7] qlcnic: driver private workqueue
From: Amit Kumar Salecha @ 2010-10-08  9:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, anirban.chakraborty
In-Reply-To: <1286531171-21173-1-git-send-email-amit.salecha@qlogic.com>

Currently fw recovery usage global workqueue.
As same workqueue used by kernel for ethtool and etc., supporting
quiescent mode is not possible, without driver private workqueue.

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic_main.c |   12 +++++++++++-
 1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 4757908..7503c48 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -46,6 +46,7 @@ char qlcnic_driver_name[] = "qlcnic";
 static const char qlcnic_driver_string[] = "QLogic 1/10 GbE "
 	"Converged/Intelligent Ethernet Driver v" QLCNIC_LINUX_VERSIONID;
 
+static struct workqueue_struct *qlcnic_wq;
 static int qlcnic_mac_learn;
 module_param(qlcnic_mac_learn, int, 0644);
 MODULE_PARM_DESC(qlcnic_mac_learn, "Mac Filter (0=disabled, 1=enabled)");
@@ -2886,7 +2887,8 @@ qlcnic_schedule_work(struct qlcnic_adapter *adapter,
 		return;
 
 	INIT_DELAYED_WORK(&adapter->fw_work, func);
-	schedule_delayed_work(&adapter->fw_work, round_jiffies_relative(delay));
+	queue_delayed_work(qlcnic_wq, &adapter->fw_work,
+					round_jiffies_relative(delay));
 }
 
 static void
@@ -4163,6 +4165,12 @@ static int __init qlcnic_init_module(void)
 
 	printk(KERN_INFO "%s\n", qlcnic_driver_string);
 
+	qlcnic_wq = create_singlethread_workqueue("qlcnic");
+	if (qlcnic_wq == NULL) {
+		printk(KERN_ERR "qlcnic: cannot create workqueue\n");
+		return -ENOMEM;
+	}
+
 #ifdef CONFIG_INET
 	register_netdevice_notifier(&qlcnic_netdev_cb);
 	register_inetaddr_notifier(&qlcnic_inetaddr_cb);
@@ -4174,6 +4182,7 @@ static int __init qlcnic_init_module(void)
 		unregister_inetaddr_notifier(&qlcnic_inetaddr_cb);
 		unregister_netdevice_notifier(&qlcnic_netdev_cb);
 #endif
+		destroy_workqueue(qlcnic_wq);
 	}
 
 	return ret;
@@ -4190,6 +4199,7 @@ static void __exit qlcnic_exit_module(void)
 	unregister_inetaddr_notifier(&qlcnic_inetaddr_cb);
 	unregister_netdevice_notifier(&qlcnic_netdev_cb);
 #endif
+	destroy_workqueue(qlcnic_wq);
 }
 
 module_exit(qlcnic_exit_module);
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 0/7]qlcnic: driver update
From: Amit Kumar Salecha @ 2010-10-08  9:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, anirban.chakraborty

Hi
  Series of 7 to support quiescent mode and fixes.
  Please include them in net-next.

-Amit

^ permalink raw reply

* [PATCH NEXT 4/7] qlcnic: fix board description
From: Amit Kumar Salecha @ 2010-10-08  9:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, anirban.chakraborty, Sritej Velaga
In-Reply-To: <1286531171-21173-1-git-send-email-amit.salecha@qlogic.com>

From: Sritej Velaga <sritej.velaga@qlogic.com>

Remove "Flex-10" from board description.

Signed-off-by: Sritej Velaga <sritej.velaga@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 6909cfc..46a54b3 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -1370,7 +1370,7 @@ static const struct qlcnic_brdinfo qlcnic_boards[] = {
 	{0x1077, 0x8020, 0x1077, 0x20f,
 		"3200 Series Single Port 10Gb Intelligent Ethernet Adapter"},
 	{0x1077, 0x8020, 0x103c, 0x3733,
-		"NC523SFP 10Gb 2-port Flex-10 Server Adapter"},
+		"NC523SFP 10Gb 2-port Server Adapter"},
 	{0x1077, 0x8020, 0x0, 0x0, "cLOM8214 1/10GbE Controller"},
 };
 
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 6/7] qlcnic: change all P3 references to P3P
From: Amit Kumar Salecha @ 2010-10-08  9:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, anirban.chakraborty, Sritej Velaga
In-Reply-To: <1286531171-21173-1-git-send-email-amit.salecha@qlogic.com>

From: Sritej Velaga <sritej.velaga@qlogic.com>

This patch just rename all P3 #define to P3P.

Signed-off-by: Sritej Velaga <sritej.velaga@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h         |   38 +++++++++++++++---------------
 drivers/net/qlcnic/qlcnic_ethtool.c |   42 +++++++++++++++++-----------------
 drivers/net/qlcnic/qlcnic_hdr.h     |   24 ++++++++++----------
 drivers/net/qlcnic/qlcnic_hw.c      |   36 +++++++++++++++---------------
 drivers/net/qlcnic/qlcnic_init.c    |    4 +-
 5 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 4025e20..d325314 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -94,12 +94,12 @@
 #define FIRST_PAGE_GROUP_START	0
 #define FIRST_PAGE_GROUP_END	0x100000
 
-#define P3_MAX_MTU                     (9600)
-#define P3_MIN_MTU                     (68)
+#define P3P_MAX_MTU                     (9600)
+#define P3P_MIN_MTU                     (68)
 #define QLCNIC_MAX_ETHERHDR                32 /* This contains some padding */
 
-#define QLCNIC_P3_RX_BUF_MAX_LEN         (QLCNIC_MAX_ETHERHDR + ETH_DATA_LEN)
-#define QLCNIC_P3_RX_JUMBO_BUF_MAX_LEN   (QLCNIC_MAX_ETHERHDR + P3_MAX_MTU)
+#define QLCNIC_P3P_RX_BUF_MAX_LEN         (QLCNIC_MAX_ETHERHDR + ETH_DATA_LEN)
+#define QLCNIC_P3P_RX_JUMBO_BUF_MAX_LEN   (QLCNIC_MAX_ETHERHDR + P3P_MAX_MTU)
 #define QLCNIC_CT_DEFAULT_RX_BUF_LEN	2048
 #define QLCNIC_LRO_BUFFER_EXTRA		2048
 
@@ -307,20 +307,20 @@ struct uni_data_desc{
 /* Magic number to let user know flash is programmed */
 #define	QLCNIC_BDINFO_MAGIC 0x12345678
 
-#define QLCNIC_BRDTYPE_P3_REF_QG	0x0021
-#define QLCNIC_BRDTYPE_P3_HMEZ		0x0022
-#define QLCNIC_BRDTYPE_P3_10G_CX4_LP	0x0023
-#define QLCNIC_BRDTYPE_P3_4_GB		0x0024
-#define QLCNIC_BRDTYPE_P3_IMEZ		0x0025
-#define QLCNIC_BRDTYPE_P3_10G_SFP_PLUS	0x0026
-#define QLCNIC_BRDTYPE_P3_10000_BASE_T	0x0027
-#define QLCNIC_BRDTYPE_P3_XG_LOM	0x0028
-#define QLCNIC_BRDTYPE_P3_4_GB_MM	0x0029
-#define QLCNIC_BRDTYPE_P3_10G_SFP_CT	0x002a
-#define QLCNIC_BRDTYPE_P3_10G_SFP_QT	0x002b
-#define QLCNIC_BRDTYPE_P3_10G_CX4	0x0031
-#define QLCNIC_BRDTYPE_P3_10G_XFP	0x0032
-#define QLCNIC_BRDTYPE_P3_10G_TP	0x0080
+#define QLCNIC_BRDTYPE_P3P_REF_QG	0x0021
+#define QLCNIC_BRDTYPE_P3P_HMEZ		0x0022
+#define QLCNIC_BRDTYPE_P3P_10G_CX4_LP	0x0023
+#define QLCNIC_BRDTYPE_P3P_4_GB		0x0024
+#define QLCNIC_BRDTYPE_P3P_IMEZ		0x0025
+#define QLCNIC_BRDTYPE_P3P_10G_SFP_PLUS	0x0026
+#define QLCNIC_BRDTYPE_P3P_10000_BASE_T	0x0027
+#define QLCNIC_BRDTYPE_P3P_XG_LOM	0x0028
+#define QLCNIC_BRDTYPE_P3P_4_GB_MM	0x0029
+#define QLCNIC_BRDTYPE_P3P_10G_SFP_CT	0x002a
+#define QLCNIC_BRDTYPE_P3P_10G_SFP_QT	0x002b
+#define QLCNIC_BRDTYPE_P3P_10G_CX4	0x0031
+#define QLCNIC_BRDTYPE_P3P_10G_XFP	0x0032
+#define QLCNIC_BRDTYPE_P3P_10G_TP	0x0080
 
 #define QLCNIC_MSIX_TABLE_OFFSET	0x44
 
@@ -719,7 +719,7 @@ struct qlcnic_cardrsp_tx_ctx {
 
 /* MAC */
 
-#define MC_COUNT_P3	38
+#define MC_COUNT_P3P	38
 
 #define QLCNIC_MAC_NOOP	0
 #define QLCNIC_MAC_ADD	1
diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
index e07adb1..2568aa6 100644
--- a/drivers/net/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/qlcnic/qlcnic_ethtool.c
@@ -96,7 +96,7 @@ static const char qlcnic_gstrings_test[][ETH_GSTRING_LEN] = {
 static const u32 diag_registers[] = {
 	CRB_CMDPEG_STATE,
 	CRB_RCVPEG_STATE,
-	CRB_XG_STATE_P3,
+	CRB_XG_STATE_P3P,
 	CRB_FW_CAPABILITIES_1,
 	ISR_INT_STATE_REG,
 	QLCNIC_CRB_DRV_ACTIVE,
@@ -189,9 +189,9 @@ qlcnic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 			goto skip;
 		}
 
-		val = QLCRD32(adapter, P3_LINK_SPEED_REG(pcifn));
-		ecmd->speed = P3_LINK_SPEED_MHZ *
-			P3_LINK_SPEED_VAL(pcifn, val);
+		val = QLCRD32(adapter, P3P_LINK_SPEED_REG(pcifn));
+		ecmd->speed = P3P_LINK_SPEED_MHZ *
+			P3P_LINK_SPEED_VAL(pcifn, val);
 		ecmd->duplex = DUPLEX_FULL;
 		ecmd->autoneg = AUTONEG_DISABLE;
 	} else
@@ -202,42 +202,42 @@ skip:
 	ecmd->transceiver = XCVR_EXTERNAL;
 
 	switch (adapter->ahw.board_type) {
-	case QLCNIC_BRDTYPE_P3_REF_QG:
-	case QLCNIC_BRDTYPE_P3_4_GB:
-	case QLCNIC_BRDTYPE_P3_4_GB_MM:
+	case QLCNIC_BRDTYPE_P3P_REF_QG:
+	case QLCNIC_BRDTYPE_P3P_4_GB:
+	case QLCNIC_BRDTYPE_P3P_4_GB_MM:
 
 		ecmd->supported |= SUPPORTED_Autoneg;
 		ecmd->advertising |= ADVERTISED_Autoneg;
-	case QLCNIC_BRDTYPE_P3_10G_CX4:
-	case QLCNIC_BRDTYPE_P3_10G_CX4_LP:
-	case QLCNIC_BRDTYPE_P3_10000_BASE_T:
+	case QLCNIC_BRDTYPE_P3P_10G_CX4:
+	case QLCNIC_BRDTYPE_P3P_10G_CX4_LP:
+	case QLCNIC_BRDTYPE_P3P_10000_BASE_T:
 		ecmd->supported |= SUPPORTED_TP;
 		ecmd->advertising |= ADVERTISED_TP;
 		ecmd->port = PORT_TP;
 		ecmd->autoneg =  adapter->link_autoneg;
 		break;
-	case QLCNIC_BRDTYPE_P3_IMEZ:
-	case QLCNIC_BRDTYPE_P3_XG_LOM:
-	case QLCNIC_BRDTYPE_P3_HMEZ:
+	case QLCNIC_BRDTYPE_P3P_IMEZ:
+	case QLCNIC_BRDTYPE_P3P_XG_LOM:
+	case QLCNIC_BRDTYPE_P3P_HMEZ:
 		ecmd->supported |= SUPPORTED_MII;
 		ecmd->advertising |= ADVERTISED_MII;
 		ecmd->port = PORT_MII;
 		ecmd->autoneg = AUTONEG_DISABLE;
 		break;
-	case QLCNIC_BRDTYPE_P3_10G_SFP_PLUS:
-	case QLCNIC_BRDTYPE_P3_10G_SFP_CT:
-	case QLCNIC_BRDTYPE_P3_10G_SFP_QT:
+	case QLCNIC_BRDTYPE_P3P_10G_SFP_PLUS:
+	case QLCNIC_BRDTYPE_P3P_10G_SFP_CT:
+	case QLCNIC_BRDTYPE_P3P_10G_SFP_QT:
 		ecmd->advertising |= ADVERTISED_TP;
 		ecmd->supported |= SUPPORTED_TP;
 		check_sfp_module = netif_running(dev) &&
 			adapter->has_link_events;
-	case QLCNIC_BRDTYPE_P3_10G_XFP:
+	case QLCNIC_BRDTYPE_P3P_10G_XFP:
 		ecmd->supported |= SUPPORTED_FIBRE;
 		ecmd->advertising |= ADVERTISED_FIBRE;
 		ecmd->port = PORT_FIBRE;
 		ecmd->autoneg = AUTONEG_DISABLE;
 		break;
-	case QLCNIC_BRDTYPE_P3_10G_TP:
+	case QLCNIC_BRDTYPE_P3P_10G_TP:
 		if (adapter->ahw.port_type == QLCNIC_XGBE) {
 			ecmd->autoneg = AUTONEG_DISABLE;
 			ecmd->supported |= (SUPPORTED_FIBRE | SUPPORTED_TP);
@@ -381,9 +381,9 @@ static u32 qlcnic_test_link(struct net_device *dev)
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
 	u32 val;
 
-	val = QLCRD32(adapter, CRB_XG_STATE_P3);
-	val = XG_LINK_STATE_P3(adapter->ahw.pci_func, val);
-	return (val == XG_LINK_UP_P3) ? 0 : 1;
+	val = QLCRD32(adapter, CRB_XG_STATE_P3P);
+	val = XG_LINK_STATE_P3P(adapter->ahw.pci_func, val);
+	return (val == XG_LINK_UP_P3P) ? 0 : 1;
 }
 
 static int
diff --git a/drivers/net/qlcnic/qlcnic_hdr.h b/drivers/net/qlcnic/qlcnic_hdr.h
index 716203e..4290b80 100644
--- a/drivers/net/qlcnic/qlcnic_hdr.h
+++ b/drivers/net/qlcnic/qlcnic_hdr.h
@@ -556,18 +556,18 @@ enum {
 #define XG_LINK_UP	0x10
 #define XG_LINK_DOWN	0x20
 
-#define XG_LINK_UP_P3	0x01
-#define XG_LINK_DOWN_P3	0x02
-#define XG_LINK_STATE_P3_MASK 0xf
-#define XG_LINK_STATE_P3(pcifn, val) \
-	(((val) >> ((pcifn) * 4)) & XG_LINK_STATE_P3_MASK)
-
-#define P3_LINK_SPEED_MHZ	100
-#define P3_LINK_SPEED_MASK	0xff
-#define P3_LINK_SPEED_REG(pcifn)	\
+#define XG_LINK_UP_P3P	0x01
+#define XG_LINK_DOWN_P3P	0x02
+#define XG_LINK_STATE_P3P_MASK 0xf
+#define XG_LINK_STATE_P3P(pcifn, val) \
+	(((val) >> ((pcifn) * 4)) & XG_LINK_STATE_P3P_MASK)
+
+#define P3P_LINK_SPEED_MHZ	100
+#define P3P_LINK_SPEED_MASK	0xff
+#define P3P_LINK_SPEED_REG(pcifn)	\
 	(CRB_PF_LINK_SPEED_1 + (((pcifn) / 4) * 4))
-#define P3_LINK_SPEED_VAL(pcifn, reg)	\
-	(((reg) >> (8 * ((pcifn) & 0x3))) & P3_LINK_SPEED_MASK)
+#define P3P_LINK_SPEED_VAL(pcifn, reg)	\
+	(((reg) >> (8 * ((pcifn) & 0x3))) & P3P_LINK_SPEED_MASK)
 
 #define QLCNIC_CAM_RAM_BASE	(QLCNIC_CRB_CAM + 0x02000)
 #define QLCNIC_CAM_RAM(reg)	(QLCNIC_CAM_RAM_BASE + (reg))
@@ -592,7 +592,7 @@ enum {
 #define CRB_CMDPEG_STATE		(QLCNIC_REG(0x50))
 #define CRB_RCVPEG_STATE		(QLCNIC_REG(0x13c))
 
-#define CRB_XG_STATE_P3 		(QLCNIC_REG(0x98))
+#define CRB_XG_STATE_P3P		(QLCNIC_REG(0x98))
 #define CRB_PF_LINK_SPEED_1		(QLCNIC_REG(0xe8))
 #define CRB_PF_LINK_SPEED_2		(QLCNIC_REG(0xec))
 
diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index 53e8053..7a47a2a 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -754,9 +754,9 @@ int qlcnic_change_mtu(struct net_device *netdev, int mtu)
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int rc = 0;
 
-	if (mtu < P3_MIN_MTU || mtu > P3_MAX_MTU) {
+	if (mtu < P3P_MIN_MTU || mtu > P3P_MAX_MTU) {
 		dev_err(&adapter->netdev->dev, "%d bytes < mtu < %d bytes"
-			" not supported\n", P3_MAX_MTU, P3_MIN_MTU);
+			" not supported\n", P3P_MAX_MTU, P3P_MIN_MTU);
 		return -EINVAL;
 	}
 
@@ -1161,31 +1161,31 @@ int qlcnic_get_board_info(struct qlcnic_adapter *adapter)
 
 	adapter->ahw.board_type = board_type;
 
-	if (board_type == QLCNIC_BRDTYPE_P3_4_GB_MM) {
+	if (board_type == QLCNIC_BRDTYPE_P3P_4_GB_MM) {
 		u32 gpio = QLCRD32(adapter, QLCNIC_ROMUSB_GLB_PAD_GPIO_I);
 		if ((gpio & 0x8000) == 0)
-			board_type = QLCNIC_BRDTYPE_P3_10G_TP;
+			board_type = QLCNIC_BRDTYPE_P3P_10G_TP;
 	}
 
 	switch (board_type) {
-	case QLCNIC_BRDTYPE_P3_HMEZ:
-	case QLCNIC_BRDTYPE_P3_XG_LOM:
-	case QLCNIC_BRDTYPE_P3_10G_CX4:
-	case QLCNIC_BRDTYPE_P3_10G_CX4_LP:
-	case QLCNIC_BRDTYPE_P3_IMEZ:
-	case QLCNIC_BRDTYPE_P3_10G_SFP_PLUS:
-	case QLCNIC_BRDTYPE_P3_10G_SFP_CT:
-	case QLCNIC_BRDTYPE_P3_10G_SFP_QT:
-	case QLCNIC_BRDTYPE_P3_10G_XFP:
-	case QLCNIC_BRDTYPE_P3_10000_BASE_T:
+	case QLCNIC_BRDTYPE_P3P_HMEZ:
+	case QLCNIC_BRDTYPE_P3P_XG_LOM:
+	case QLCNIC_BRDTYPE_P3P_10G_CX4:
+	case QLCNIC_BRDTYPE_P3P_10G_CX4_LP:
+	case QLCNIC_BRDTYPE_P3P_IMEZ:
+	case QLCNIC_BRDTYPE_P3P_10G_SFP_PLUS:
+	case QLCNIC_BRDTYPE_P3P_10G_SFP_CT:
+	case QLCNIC_BRDTYPE_P3P_10G_SFP_QT:
+	case QLCNIC_BRDTYPE_P3P_10G_XFP:
+	case QLCNIC_BRDTYPE_P3P_10000_BASE_T:
 		adapter->ahw.port_type = QLCNIC_XGBE;
 		break;
-	case QLCNIC_BRDTYPE_P3_REF_QG:
-	case QLCNIC_BRDTYPE_P3_4_GB:
-	case QLCNIC_BRDTYPE_P3_4_GB_MM:
+	case QLCNIC_BRDTYPE_P3P_REF_QG:
+	case QLCNIC_BRDTYPE_P3P_4_GB:
+	case QLCNIC_BRDTYPE_P3P_4_GB_MM:
 		adapter->ahw.port_type = QLCNIC_GBE;
 		break;
-	case QLCNIC_BRDTYPE_P3_10G_TP:
+	case QLCNIC_BRDTYPE_P3P_10G_TP:
 		adapter->ahw.port_type = (adapter->portnum < 2) ?
 			QLCNIC_XGBE : QLCNIC_GBE;
 		break;
diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c
index 908a25b..0d180c6 100644
--- a/drivers/net/qlcnic/qlcnic_init.c
+++ b/drivers/net/qlcnic/qlcnic_init.c
@@ -259,14 +259,14 @@ int qlcnic_alloc_sw_resources(struct qlcnic_adapter *adapter)
 		switch (ring) {
 		case RCV_RING_NORMAL:
 			rds_ring->num_desc = adapter->num_rxd;
-			rds_ring->dma_size = QLCNIC_P3_RX_BUF_MAX_LEN;
+			rds_ring->dma_size = QLCNIC_P3P_RX_BUF_MAX_LEN;
 			rds_ring->skb_size = rds_ring->dma_size + NET_IP_ALIGN;
 			break;
 
 		case RCV_RING_JUMBO:
 			rds_ring->num_desc = adapter->num_jumbo_rxd;
 			rds_ring->dma_size =
-				QLCNIC_P3_RX_JUMBO_BUF_MAX_LEN;
+				QLCNIC_P3P_RX_JUMBO_BUF_MAX_LEN;
 
 			if (adapter->capabilities & QLCNIC_FW_CAPABILITY_HW_LRO)
 				rds_ring->dma_size += QLCNIC_LRO_BUFFER_EXTRA;
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 7/7] qlcnic: update driver version 5.0.11
From: Amit Kumar Salecha @ 2010-10-08  9:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, anirban.chakraborty
In-Reply-To: <1286531171-21173-1-git-send-email-amit.salecha@qlogic.com>

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index d325314..26c37d3 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -51,8 +51,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 0
-#define _QLCNIC_LINUX_SUBVERSION 10
-#define QLCNIC_LINUX_VERSIONID  "5.0.10"
+#define _QLCNIC_LINUX_SUBVERSION 11
+#define QLCNIC_LINUX_VERSIONID  "5.0.11"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
 		 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 5/7] qlcnic: fix promiscous mode for VF
From: Amit Kumar Salecha @ 2010-10-08  9:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, anirban.chakraborty, Rajesh Borundia
In-Reply-To: <1286531171-21173-1-git-send-email-amit.salecha@qlogic.com>

From: Rajesh Borundia <rajesh.borundia@qlogic.com>

o Allow promiscous mode setting for VF's depending upon the configuration.

Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h      |    1 +
 drivers/net/qlcnic/qlcnic_hw.c   |    3 ++-
 drivers/net/qlcnic/qlcnic_main.c |    9 +++++++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 46a54b3..4025e20 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -920,6 +920,7 @@ struct qlcnic_ipaddr {
 #define QLCNIC_TAGGING_ENABLED		0x100
 #define QLCNIC_MACSPOOF			0x200
 #define QLCNIC_MAC_OVERRIDE_DISABLED	0x400
+#define QLCNIC_PROMISC_DISABLED		0x800
 #define QLCNIC_IS_MSI_FAMILY(adapter) \
 	((adapter)->flags & (QLCNIC_MSI_ENABLED | QLCNIC_MSIX_ENABLED))
 
diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index 7f1f9b4..53e8053 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -442,7 +442,8 @@ void qlcnic_set_multi(struct net_device *netdev)
 	qlcnic_nic_add_mac(adapter, bcast_addr);
 
 	if (netdev->flags & IFF_PROMISC) {
-		mode = VPORT_MISS_MODE_ACCEPT_ALL;
+		if (!(adapter->flags & QLCNIC_PROMISC_DISABLED))
+			mode = VPORT_MISS_MODE_ACCEPT_ALL;
 		goto send_fw_cmd;
 	}
 
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index e3c1b80..4aada0b 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -718,8 +718,8 @@ static void
 qlcnic_set_eswitch_port_features(struct qlcnic_adapter *adapter,
 		struct qlcnic_esw_func_cfg *esw_cfg)
 {
-	adapter->flags &= ~QLCNIC_MACSPOOF;
-	adapter->flags &= ~QLCNIC_MAC_OVERRIDE_DISABLED;
+	adapter->flags &= ~(QLCNIC_MACSPOOF | QLCNIC_MAC_OVERRIDE_DISABLED |
+				QLCNIC_PROMISC_DISABLED);
 
 	if (esw_cfg->mac_anti_spoof)
 		adapter->flags |= QLCNIC_MACSPOOF;
@@ -727,6 +727,9 @@ qlcnic_set_eswitch_port_features(struct qlcnic_adapter *adapter,
 	if (!esw_cfg->mac_override)
 		adapter->flags |= QLCNIC_MAC_OVERRIDE_DISABLED;
 
+	if (!esw_cfg->promisc_mode)
+		adapter->flags |= QLCNIC_PROMISC_DISABLED;
+
 	qlcnic_set_netdev_features(adapter, esw_cfg);
 }
 
@@ -845,6 +848,7 @@ qlcnic_set_default_offload_settings(struct qlcnic_adapter *adapter)
 		esw_cfg.pci_func = i;
 		esw_cfg.offload_flags = BIT_0;
 		esw_cfg.mac_override = BIT_0;
+		esw_cfg.promisc_mode = BIT_0;
 		if (adapter->capabilities  & QLCNIC_FW_CAPABILITY_TSO)
 			esw_cfg.offload_flags |= (BIT_1 | BIT_2);
 		if (qlcnic_config_switch_port(adapter, &esw_cfg))
@@ -3571,6 +3575,7 @@ validate_esw_config(struct qlcnic_adapter *adapter,
 						QLCNIC_NON_PRIV_FUNC) {
 				esw_cfg[i].mac_anti_spoof = 0;
 				esw_cfg[i].mac_override = 1;
+				esw_cfg[i].promisc_mode = 1;
 			}
 			break;
 		case QLCNIC_ADD_VLAN:
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH 4/5] rds: stop including asm-generic/bitops/le.h directly
From: Akinobu Mita @ 2010-10-08 10:03 UTC (permalink / raw)
  To: linux-kernel, linux-arch, Arnd Bergmann, Andrew Morton
  Cc: Akinobu Mita, Andy Grover, rds-devel, David S. Miller, netdev
In-Reply-To: <1286532193-29814-1-git-send-email-akinobu.mita@gmail.com>

asm-generic/bitops/le.h is only intended to be included directly from
asm-generic/bitops/ext2-non-atomic.h or asm-generic/bitops/minix-le.h
which implements generic ext2 or minix bit operations.

This stops including asm-generic/bitops/le.h directly and use ext2
non-atomic bit operations instead.

An alternative approach is introducing little endian bit operations
in linux/bitops.h. But it needs to touch more files than this change does.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Andy Grover <andy.grover@oracle.com>
Cc: rds-devel@oss.oracle.com
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
---
 net/rds/cong.c |    9 ++++-----
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/rds/cong.c b/net/rds/cong.c
index 0871a29..5bd0f36 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -33,8 +33,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/rbtree.h>
-
-#include <asm-generic/bitops/le.h>
+#include <linux/bitops.h>
 
 #include "rds.h"
 
@@ -285,7 +284,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	generic___set_le_bit(off, (void *)map->m_page_addrs[i]);
+	ext2_set_bit(off, (void *)map->m_page_addrs[i]);
 }
 
 void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
@@ -299,7 +298,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	generic___clear_le_bit(off, (void *)map->m_page_addrs[i]);
+	ext2_clear_bit(off, (void *)map->m_page_addrs[i]);
 }
 
 static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
@@ -310,7 +309,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	return generic_test_le_bit(off, (void *)map->m_page_addrs[i]);
+	return ext2_test_bit(off, (void *)map->m_page_addrs[i]);
 }
 
 void rds_cong_add_socket(struct rds_sock *rs)
-- 
1.7.1.231.gd0b16

^ permalink raw reply related

* Re: Query on usage of Netlink
From: Rémi Denis-Courmont @ 2010-10-08 10:14 UTC (permalink / raw)
  To: Kumar SANGHVI
  Cc: netdev, srinidhi.kasagar, linus.walleij, sudeep.divakaran,
	gulshan.karmani
In-Reply-To: <20101008085056.GA13091@bnru01.bnr.st.com>


On Fri, 8 Oct 2010 14:21:28 +0530, Kumar SANGHVI
<kumar.sanghvi@stericsson.com> wrote:
> We have a requirement where-in we want to communicate the status of
> modem (whether modem is online or offline) from linux driver to
> user-space components. So that user-space components stop sending data
> to linux driver for communicating with modem, in case modem goes
> offline.

Is the modem control state machine in kernel space, as for say, 802.11
devices?

> We have decided to use the netlink mechanism to achieve this. We intend
> to send an integer value, defined in enum, to user-space indicating the
> current modem status.
(...)
> Or should we define a custom Netlink type in include/linux/netlink.h?

Did you consider using kobject and uevents (which are internally running on
top of Netlink)?

-- 
Rémi Denis-Courmont
http://www.remlab.net
http://fi.linkedin.com/in/remidenis


^ permalink raw reply

* Re: Query on usage of Netlink
From: Kumar SANGHVI @ 2010-10-08 10:31 UTC (permalink / raw)
  To: Rémi Denis-Courmont
  Cc: netdev@vger.kernel.org, Srinidhi KASAGAR, Linus WALLEIJ,
	Sudeep DIVAKARAN, Gulshan KARMANI
In-Reply-To: <1b4c783a02b46d7da4e308699a41856c@chewa.net>

Hi Rémi Denis-Courmontt,

On Fri, Oct 08, 2010 at 12:14:44 +0200, Rémi Denis-Courmont wrote:
> 
> On Fri, 8 Oct 2010 14:21:28 +0530, Kumar SANGHVI
> <kumar.sanghvi@stericsson.com> wrote:
> > We have a requirement where-in we want to communicate the status of
> > modem (whether modem is online or offline) from linux driver to
> > user-space components. So that user-space components stop sending data
> > to linux driver for communicating with modem, in case modem goes
> > offline.
> 
> Is the modem control state machine in kernel space, as for say, 802.11
> devices?
State machine to keep track of current modem state is in kernel driver.
Kernel driver can know when modem goes offline or when modem is back
online.

> > We have decided to use the netlink mechanism to achieve this. We intend
> > to send an integer value, defined in enum, to user-space indicating the
> > current modem status.
> (...)
> > Or should we define a custom Netlink type in include/linux/netlink.h?
> 
> Did you consider using kobject and uevents (which are internally running on
> top of Netlink)?
Yes. However, I believe kobject and uevents require some user-space
daamon like udev which can dispatch the specific uevents to user-space
apps. This user-space daemon then becomes something application
framework specific.
Another option is that user-space app itself open up a netlink socket of type
KOBJECT_UEVENT. But then, user-space will have to filter out
un-necessary uevents out of it. 

Best regards,
Kumar.

^ permalink raw reply

* [RFC PATCH 0/9] ipvs network name space (netns) aware
From: Hans Schillstrom @ 2010-10-08 11:16 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This patch series adds network name space (netns) support to the LVS.

REVISION

This is version 1

OVERVIEW

The patch doesn't remove or add any functionality except for netns.
For users that don't use network name space (netns) this patch is
completely transparent.

No it's possible to run LVS in a Linux container (see lxc-tools)
i.e.  a light weight virtualization. For example it's possible to run
one or several lvs on a real server in their own network name spaces.
>From the LVS point of view it looks like it runs on it's own machine.

IMPLEMENTATION
Basic requirements for netns awareness
 - Global variables has to be moved to dyn. allocated memory.

Most global variables now resides in a struct ipvs { } in netns/ip_vs.h.
What is moved and what is not ?

Some cache aligned locks are still in global, module init params and some debug_level.

Algorithm files they are untouched.

QUESTIONS
Drop rate in ip_vs_ctl per netns or grand total ?
Should more lock variables be moved (or less) ?


PATCH SET
This patch set is based upon net-next-2.6 (2.6.36-rc3) from 4 oct 2010
and [patch v4] ipvs: IPv6 tunnel mode

Note: ip_vs_xmit.c will not work without "[patch v4] ipvs: IPv6 tunnel mode"

SUMMARY

 include/net/ip_vs.h                     |  136 ++++---
 include/net/net_namespace.h             |    2 +
 include/net/netns/ip_vs.h               |  112 +++++
 net/netfilter/ipvs/ip_vs_app.c          |   96 +++--
 net/netfilter/ipvs/ip_vs_conn.c         |  296 ++++++++-----
 net/netfilter/ipvs/ip_vs_core.c         |  155 ++++---
 net/netfilter/ipvs/ip_vs_ctl.c          |  771 +++++++++++++++++--------------
 net/netfilter/ipvs/ip_vs_est.c          |  127 +++--
 net/netfilter/ipvs/ip_vs_ftp.c          |   64 ++-
 net/netfilter/ipvs/ip_vs_proto.c        |  108 +++++-
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c |   34 +-
 net/netfilter/ipvs/ip_vs_proto_sctp.c   |  126 +++---
 net/netfilter/ipvs/ip_vs_proto_tcp.c    |  112 +++--
 net/netfilter/ipvs/ip_vs_proto_udp.c    |  107 +++--
 net/netfilter/ipvs/ip_vs_sync.c         |  323 +++++++------
 net/netfilter/ipvs/ip_vs_xmit.c         |   12 +-
 16 files changed, 1588 insertions(+), 993 deletions(-)

The patch will be divided for readability into
 1. include files
 2. app
 3. conn
 4. core
 5. ctl
 6. est
 7. ftp
 8. proto files
 9. sync and xmit

Include files,
A new file added include/net/netns/ip_vs.h containg all netns specific data.
include/net/net_namespce.h, pointer to "struct ipvs"  added.
include/net/ip_vs.h a new struct added, and many prototypes changed.

* ip_vs_core.c
All netns init origins from this file - ip_vs_init()

* ip_vs_conn.c
Lock array for conn table is kept due to performance,
(or am I wrong here ?).
"static struct ip_vs_aligned_lock
__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;"

* ip_vs_ctl.c
drop_ rate is still global

TESTING
This patch have been running for a month now with three LVS/machine
one in root name-space and two in other name-space.
Both IPv4 & IPv6 have been tested in all three modes DR/TUN and NAT
Only a limited set of algos have been used (read rr).

Backup have been there all the time and a switch has been performed a couple of times.

There is still some BUG_ON(!net) left  ...

Not tested yet:
 Drop level, DOS,  schedulers, performance ....
 Netns exit after usage of LVS (due to a bug in netdev/ipip somewhere tunl0 and


--
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply

* [RFC PATCH 1/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:16 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This part contains the include files
where include/net/netns/ip_vs.h is new and contains all moved vars.

SUMMARY

 include/net/ip_vs.h                     |  136 ++++---
 include/net/net_namespace.h             |    2 +
 include/net/netns/ip_vs.h               |  112 +++++

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
---

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b17f863..b40a0fb 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -288,6 +288,7 @@ struct iphdr;
 struct ip_vs_conn;
 struct ip_vs_app;
 struct sk_buff;
+struct ip_vs_proto_data;

 struct ip_vs_protocol {
 	struct ip_vs_protocol	*next;
@@ -302,6 +303,10 @@ struct ip_vs_protocol {

 	void (*exit)(struct ip_vs_protocol *pp);

+	void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
+	void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
 	int (*conn_schedule)(int af, struct sk_buff *skb,
 			     struct ip_vs_protocol *pp,
 			     int *verdict, struct ip_vs_conn **cpp);
@@ -337,11 +342,11 @@ struct ip_vs_protocol {
 				const struct sk_buff *skb,
 				struct ip_vs_protocol *pp);

-	int (*register_app)(struct ip_vs_app *inc);
+	int (*register_app)(struct net *net, struct ip_vs_app *inc);

-	void (*unregister_app)(struct ip_vs_app *inc);
+	void (*unregister_app)(struct net *net, struct ip_vs_app *inc);

-	int (*app_conn_bind)(struct ip_vs_conn *cp);
+	int (*app_conn_bind)(struct net *net, struct ip_vs_conn *cp);

 	void (*debug_packet)(struct ip_vs_protocol *pp,
 			     const struct sk_buff *skb,
@@ -350,10 +355,24 @@ struct ip_vs_protocol {

 	void (*timeout_change)(struct ip_vs_protocol *pp, int flags);

-	int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+	/*
+	 int (*set_state_timeout)(struct ip_vs_protocol *pp,
+	                          char *sname,
+	                          int to);    Not used  -Hans S */
+};
+/*
+ * protocol data per netns
+ */
+struct ip_vs_proto_data {
+	struct ip_vs_proto_data	*next;
+	struct ip_vs_protocol   *pp;
+	int			*timeout_table;	/* protocol timeout table */
+	atomic_t		appcnt;		/* counter of proto app incs. */
 };

-extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_protocol   * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_proto_data * ip_vs_proto_data_get(struct net *net,
+						      unsigned short proto);

 /*
  *	IP_VS structure allocated for each dynamically scheduled connection
@@ -398,6 +417,8 @@ struct ip_vs_conn {
 	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
 			   struct ip_vs_protocol *pp);

+	struct net		*net;		/* netns ptr needed in timer */
+
 	/* Note: we can group the following members into a structure,
 	   in order to save more space, and the following members are
 	   only used in VS/NAT anyway */
@@ -628,29 +649,32 @@ enum {
 	IP_VS_DIR_LAST,
 };

-extern struct ip_vs_conn *ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_conn_in_get(struct net *net, int af, int protocol,
+		  const union nf_inet_addr *s_addr, __be16 s_port,
+		  const union nf_inet_addr *d_addr, __be16 d_port);

-extern struct ip_vs_conn *ip_vs_ct_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_ct_in_get(struct net *net, int af, int protocol,
+		const union nf_inet_addr *s_addr, __be16 s_port,
+		const union nf_inet_addr *d_addr, __be16 d_port);

-struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-					    struct ip_vs_protocol *pp,
-					    const struct ip_vs_iphdr *iph,
-					    unsigned int proto_off,
-					    int inverse);
+struct ip_vs_conn *
+ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
+			struct ip_vs_protocol *pp,
+			const struct ip_vs_iphdr *iph,
+			unsigned int proto_off, int inverse);

-extern struct ip_vs_conn *ip_vs_conn_out_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_conn_out_get(struct net *net,int af, int protocol,
+		   const union nf_inet_addr *s_addr, __be16 s_port,
+		   const union nf_inet_addr *d_addr, __be16 d_port);

-struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-					     struct ip_vs_protocol *pp,
-					     const struct ip_vs_iphdr *iph,
-					     unsigned int proto_off,
-					     int inverse);
+struct ip_vs_conn *
+ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
+			 struct ip_vs_protocol *pp,
+			 const struct ip_vs_iphdr *iph,
+			 unsigned int proto_off, int inverse);

 /* put back the conn without restarting its timer */
 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
@@ -658,20 +682,22 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 	atomic_dec(&cp->refcnt);
 }
 extern void ip_vs_conn_put(struct ip_vs_conn *cp);
-extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
+extern void
+ip_vs_conn_fill_cport(struct net *net, struct ip_vs_conn *cp, __be16 cport);

 extern struct ip_vs_conn *
-ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+ip_vs_conn_new(struct net *net, int af, int proto,
+	       const union nf_inet_addr *caddr, __be16 cport,
 	       const union nf_inet_addr *vaddr, __be16 vport,
-	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-	       struct ip_vs_dest *dest);
+	       const union nf_inet_addr *daddr, __be16 dport,
+	       unsigned flags, struct ip_vs_dest *dest);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);

 extern const char * ip_vs_state_name(__u16 proto, int state);

-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
-extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_check_template(struct net *net, struct ip_vs_conn *ct);
+extern void ip_vs_random_dropentry(struct net *net);
 extern int ip_vs_conn_init(void);
 extern void ip_vs_conn_cleanup(void);

@@ -741,12 +767,15 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
  *      (from ip_vs_app.c)
  */
 #define IP_VS_APP_MAX_PORTS  8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
-extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern int ip_vs_bind_app(struct net *net, struct ip_vs_conn *cp,
+		          struct ip_vs_protocol *pp);
 extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net,
+				  struct ip_vs_app *app,
+				  __u16 proto,
+				  __u16 port);
 extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
 extern void ip_vs_app_inc_put(struct ip_vs_app *inc);

@@ -762,7 +791,7 @@ extern void ip_vs_app_cleanup(void);
 extern int ip_vs_protocol_init(void);
 extern void ip_vs_protocol_cleanup(void);
 extern void ip_vs_protocol_timeout_change(int flags);
-extern int *ip_vs_create_timeout_table(int *table, int size);
+extern int *ip_vs_create_timeout_table(const int *table, int size);
 extern int
 ip_vs_set_state_timeout(int *table, int num, const char *const *names,
 			const char *name, int to);
@@ -806,7 +835,7 @@ extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];

 extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		  const union nf_inet_addr *vaddr, __be16 vport);

 static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -815,7 +844,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
 }

 extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 			  const union nf_inet_addr *daddr, __be16 dport);

 extern int ip_vs_use_count_inc(void);
@@ -823,23 +852,22 @@ extern void ip_vs_use_count_dec(void);
 extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
+ip_vs_find_dest(struct net *net, int af,
+		const union nf_inet_addr *daddr, __be16 dport,
 		const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
-extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
-
+extern struct ip_vs_dest *ip_vs_try_bind_dest(struct net *net,
+		                              struct ip_vs_conn *cp);

 /*
  *      IPVS sync daemon data and function prototypes
  *      (from ip_vs_sync.c)
  */
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+		             __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_sync_init(void);
+extern void ip_vs_sync_cleanup(void);


 /*
@@ -847,8 +875,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
  */
 extern int ip_vs_estimator_init(void);
 extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);

 /*
@@ -864,8 +892,8 @@ extern int ip_vs_tunnel_xmit
 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 extern int ip_vs_dr_xmit
 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
-extern int ip_vs_icmp_xmit
-(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
+extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		           struct ip_vs_protocol *pp, int offset);
 extern void ip_vs_dst_reset(struct ip_vs_dest *dest);

 #ifdef CONFIG_IP_VS_IPV6
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bd10a79..b59cdc5 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -15,6 +15,7 @@
 #include <net/netns/ipv4.h>
 #include <net/netns/ipv6.h>
 #include <net/netns/dccp.h>
+#include <net/netns/ip_vs.h>
 #include <net/netns/x_tables.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netns/conntrack.h>
@@ -91,6 +92,7 @@ struct net {
 	struct sk_buff_head	wext_nlevents;
 #endif
 	struct net_generic	*gen;
+	struct netns_ipvs       *ipvs;
 };


diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
new file mode 100644
index 0000000..540ac90
--- /dev/null
+++ b/include/net/netns/ip_vs.h
@@ -0,0 +1,112 @@
+#ifndef __NETNS_IP_VS_H_
+#define __NETNS_IP_VS_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/list_nulls.h>
+#include <linux/ip_vs.h>
+#include <asm/atomic.h>
+#include <linux/in.h>
+
+struct ip_vs_stats;
+struct ip_vs_sync_buff;
+struct ctl_table_header;
+
+struct netns_ipvs {
+	int			inc;		/* incarnation */
+	/* ip_vs_app */
+	struct list_head 	app_list;
+	struct mutex		app_mutex;
+	struct lock_class_key 	app_key;	/* Grrr, for mutex debuging */
+	/* ip_vs_conn */
+	unsigned char           conn_cname[20];	/* Connection hash name */
+	struct list_head 	*conn_tab;	/* Connection hash: for in and output packets */
+	struct kmem_cache 	*conn_cachep;	/* SLAB cache for IPVS connections */
+	atomic_t 		conn_count;	/* counter for current IPVS connections */
+	atomic_t 		conn_no_cport_cnt; /* counter for no client port connections */
+	unsigned int 		conn_rnd;	/* random value for IPVS connection hash */
+	/* ip_vs_ctl */
+	struct ip_vs_stats 	*ctl_stats;	/* Statistics & estimator */
+	/*	Hash table: for virtual service lookups */
+	#define IP_VS_SVC_TAB_BITS 8
+	#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+	#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+	/* the service table hashed by <protocol, addr, port> */
+	struct list_head 	ctl_svc_table[IP_VS_SVC_TAB_SIZE];
+	/* the service table hashed by fwmark */
+	struct list_head 	ctl_fwm_table[IP_VS_SVC_TAB_SIZE];
+	/* Hash table: for real service lookups */
+	#define IP_VS_RTAB_BITS 4
+	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+	struct list_head 	ctl_rtable[IP_VS_RTAB_SIZE]; /* Hash table: for real service  */
+	struct list_head	ctl_dest_trash;	    /* Trash for destinations */
+	atomic_t 		ctl_ftpsvc_counter;
+	atomic_t 		ctl_nullsvc_counter;
+	/* sys-ctl struct */
+	struct ctl_table_header	*sysctl_hdr;
+	struct ctl_table	*sysctl_tbl;
+	/* sysctl variables */
+	int 			sysctl_amemthresh;
+	int 			sysctl_am_droprate;
+	int 			sysctl_drop_entry;
+	int 			sysctl_drop_packet;
+	int 			sysctl_secure_tcp;
+	int 			sysctl_cache_bypass;
+	int 			sysctl_expire_nodest_conn;
+	int 			sysctl_expire_quiescent_template;
+	int 			sysctl_sync_threshold[2];
+	int 			sysctl_nat_icmp_send;
+	/* ip_vs_proto */
+	#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
+	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+	/* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	#define	TCP_APP_TAB_BITS	4
+	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
+	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
+	struct list_head 	tcp_apps[TCP_APP_TAB_SIZE];
+	spinlock_t		tcp_app_lock;
+#endif
+	/* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	#define	UDP_APP_TAB_BITS	4
+	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
+	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
+	struct list_head 	udp_apps[UDP_APP_TAB_SIZE];
+	spinlock_t		udp_app_lock;
+#endif
+	/* ip_vs_proto_sctp */
+	#define SCTP_APP_TAB_BITS        4
+	#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
+	#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
+	/* Hash table for SCTP application incarnations	 */
+	struct list_head 	sctp_apps[SCTP_APP_TAB_SIZE];
+	spinlock_t		sctp_app_lock;
+
+	/* ip_vs_est */
+	struct list_head 	est_list;	/* estimator list */
+	spinlock_t		est_lock;
+	/* ip_vs_sync */
+	struct list_head	sync_queue;
+	spinlock_t		sync_lock;
+	struct ip_vs_sync_buff  *sync_buff;
+	spinlock_t		sync_buff_lock;
+	struct sockaddr_in 	sync_mcast_addr;
+	/* sync daemon tasks */
+	struct task_struct 	*sync_master_thread;
+	struct task_struct 	*sync_backup_thread;
+	/* the maximum length of sync (sending/receiving) message */
+	int 			sync_send_mesg_maxlen;
+	int 			sync_recv_mesg_maxlen;
+
+	volatile int 		sync_state;
+	volatile int 		master_syncid;
+	volatile int 		backup_syncid;
+	/* multicast interface name */
+	char 			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+	char 			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+};
+
+#endif /*__NETNS_IP_VS_H_*/

^ permalink raw reply related

* [RFC PATCH 2/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:16 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This part contains the include files
where include/net/netns/ip_vs.h is new and contains all moved vars.

SUMMARY

 include/net/ip_vs.h                     |  136 ++++---
 include/net/net_namespace.h             |    2 +
 include/net/netns/ip_vs.h               |  112 +++++

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
---

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b17f863..b40a0fb 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -288,6 +288,7 @@ struct iphdr;
 struct ip_vs_conn;
 struct ip_vs_app;
 struct sk_buff;
+struct ip_vs_proto_data;

 struct ip_vs_protocol {
 	struct ip_vs_protocol	*next;
@@ -302,6 +303,10 @@ struct ip_vs_protocol {

 	void (*exit)(struct ip_vs_protocol *pp);

+	void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
+	void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
 	int (*conn_schedule)(int af, struct sk_buff *skb,
 			     struct ip_vs_protocol *pp,
 			     int *verdict, struct ip_vs_conn **cpp);
@@ -337,11 +342,11 @@ struct ip_vs_protocol {
 				const struct sk_buff *skb,
 				struct ip_vs_protocol *pp);

-	int (*register_app)(struct ip_vs_app *inc);
+	int (*register_app)(struct net *net, struct ip_vs_app *inc);

-	void (*unregister_app)(struct ip_vs_app *inc);
+	void (*unregister_app)(struct net *net, struct ip_vs_app *inc);

-	int (*app_conn_bind)(struct ip_vs_conn *cp);
+	int (*app_conn_bind)(struct net *net, struct ip_vs_conn *cp);

 	void (*debug_packet)(struct ip_vs_protocol *pp,
 			     const struct sk_buff *skb,
@@ -350,10 +355,24 @@ struct ip_vs_protocol {

 	void (*timeout_change)(struct ip_vs_protocol *pp, int flags);

-	int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+	/*
+	 int (*set_state_timeout)(struct ip_vs_protocol *pp,
+	                          char *sname,
+	                          int to);    Not used  -Hans S */
+};
+/*
+ * protocol data per netns
+ */
+struct ip_vs_proto_data {
+	struct ip_vs_proto_data	*next;
+	struct ip_vs_protocol   *pp;
+	int			*timeout_table;	/* protocol timeout table */
+	atomic_t		appcnt;		/* counter of proto app incs. */
 };

-extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_protocol   * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_proto_data * ip_vs_proto_data_get(struct net *net,
+						      unsigned short proto);

 /*
  *	IP_VS structure allocated for each dynamically scheduled connection
@@ -398,6 +417,8 @@ struct ip_vs_conn {
 	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
 			   struct ip_vs_protocol *pp);

+	struct net		*net;		/* netns ptr needed in timer */
+
 	/* Note: we can group the following members into a structure,
 	   in order to save more space, and the following members are
 	   only used in VS/NAT anyway */
@@ -628,29 +649,32 @@ enum {
 	IP_VS_DIR_LAST,
 };

-extern struct ip_vs_conn *ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_conn_in_get(struct net *net, int af, int protocol,
+		  const union nf_inet_addr *s_addr, __be16 s_port,
+		  const union nf_inet_addr *d_addr, __be16 d_port);

-extern struct ip_vs_conn *ip_vs_ct_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_ct_in_get(struct net *net, int af, int protocol,
+		const union nf_inet_addr *s_addr, __be16 s_port,
+		const union nf_inet_addr *d_addr, __be16 d_port);

-struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-					    struct ip_vs_protocol *pp,
-					    const struct ip_vs_iphdr *iph,
-					    unsigned int proto_off,
-					    int inverse);
+struct ip_vs_conn *
+ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
+			struct ip_vs_protocol *pp,
+			const struct ip_vs_iphdr *iph,
+			unsigned int proto_off, int inverse);

-extern struct ip_vs_conn *ip_vs_conn_out_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_conn_out_get(struct net *net,int af, int protocol,
+		   const union nf_inet_addr *s_addr, __be16 s_port,
+		   const union nf_inet_addr *d_addr, __be16 d_port);

-struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-					     struct ip_vs_protocol *pp,
-					     const struct ip_vs_iphdr *iph,
-					     unsigned int proto_off,
-					     int inverse);
+struct ip_vs_conn *
+ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
+			 struct ip_vs_protocol *pp,
+			 const struct ip_vs_iphdr *iph,
+			 unsigned int proto_off, int inverse);

 /* put back the conn without restarting its timer */
 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
@@ -658,20 +682,22 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 	atomic_dec(&cp->refcnt);
 }
 extern void ip_vs_conn_put(struct ip_vs_conn *cp);
-extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
+extern void
+ip_vs_conn_fill_cport(struct net *net, struct ip_vs_conn *cp, __be16 cport);

 extern struct ip_vs_conn *
-ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+ip_vs_conn_new(struct net *net, int af, int proto,
+	       const union nf_inet_addr *caddr, __be16 cport,
 	       const union nf_inet_addr *vaddr, __be16 vport,
-	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-	       struct ip_vs_dest *dest);
+	       const union nf_inet_addr *daddr, __be16 dport,
+	       unsigned flags, struct ip_vs_dest *dest);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);

 extern const char * ip_vs_state_name(__u16 proto, int state);

-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
-extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_check_template(struct net *net, struct ip_vs_conn *ct);
+extern void ip_vs_random_dropentry(struct net *net);
 extern int ip_vs_conn_init(void);
 extern void ip_vs_conn_cleanup(void);

@@ -741,12 +767,15 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
  *      (from ip_vs_app.c)
  */
 #define IP_VS_APP_MAX_PORTS  8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
-extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern int ip_vs_bind_app(struct net *net, struct ip_vs_conn *cp,
+		          struct ip_vs_protocol *pp);
 extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net,
+				  struct ip_vs_app *app,
+				  __u16 proto,
+				  __u16 port);
 extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
 extern void ip_vs_app_inc_put(struct ip_vs_app *inc);

@@ -762,7 +791,7 @@ extern void ip_vs_app_cleanup(void);
 extern int ip_vs_protocol_init(void);
 extern void ip_vs_protocol_cleanup(void);
 extern void ip_vs_protocol_timeout_change(int flags);
-extern int *ip_vs_create_timeout_table(int *table, int size);
+extern int *ip_vs_create_timeout_table(const int *table, int size);
 extern int
 ip_vs_set_state_timeout(int *table, int num, const char *const *names,
 			const char *name, int to);
@@ -806,7 +835,7 @@ extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];

 extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		  const union nf_inet_addr *vaddr, __be16 vport);

 static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -815,7 +844,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
 }

 extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 			  const union nf_inet_addr *daddr, __be16 dport);

 extern int ip_vs_use_count_inc(void);
@@ -823,23 +852,22 @@ extern void ip_vs_use_count_dec(void);
 extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
+ip_vs_find_dest(struct net *net, int af,
+		const union nf_inet_addr *daddr, __be16 dport,
 		const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
-extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
-
+extern struct ip_vs_dest *ip_vs_try_bind_dest(struct net *net,
+		                              struct ip_vs_conn *cp);

 /*
  *      IPVS sync daemon data and function prototypes
  *      (from ip_vs_sync.c)
  */
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+		             __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_sync_init(void);
+extern void ip_vs_sync_cleanup(void);


 /*
@@ -847,8 +875,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
  */
 extern int ip_vs_estimator_init(void);
 extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);

 /*
@@ -864,8 +892,8 @@ extern int ip_vs_tunnel_xmit
 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 extern int ip_vs_dr_xmit
 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
-extern int ip_vs_icmp_xmit
-(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
+extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		           struct ip_vs_protocol *pp, int offset);
 extern void ip_vs_dst_reset(struct ip_vs_dest *dest);

 #ifdef CONFIG_IP_VS_IPV6
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bd10a79..b59cdc5 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -15,6 +15,7 @@
 #include <net/netns/ipv4.h>
 #include <net/netns/ipv6.h>
 #include <net/netns/dccp.h>
+#include <net/netns/ip_vs.h>
 #include <net/netns/x_tables.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netns/conntrack.h>
@@ -91,6 +92,7 @@ struct net {
 	struct sk_buff_head	wext_nlevents;
 #endif
 	struct net_generic	*gen;
+	struct netns_ipvs       *ipvs;
 };


diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
new file mode 100644
index 0000000..540ac90
--- /dev/null
+++ b/include/net/netns/ip_vs.h
@@ -0,0 +1,112 @@
+#ifndef __NETNS_IP_VS_H_
+#define __NETNS_IP_VS_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/list_nulls.h>
+#include <linux/ip_vs.h>
+#include <asm/atomic.h>
+#include <linux/in.h>
+
+struct ip_vs_stats;
+struct ip_vs_sync_buff;
+struct ctl_table_header;
+
+struct netns_ipvs {
+	int			inc;		/* incarnation */
+	/* ip_vs_app */
+	struct list_head 	app_list;
+	struct mutex		app_mutex;
+	struct lock_class_key 	app_key;	/* Grrr, for mutex debuging */
+	/* ip_vs_conn */
+	unsigned char           conn_cname[20];	/* Connection hash name */
+	struct list_head 	*conn_tab;	/* Connection hash: for in and output packets */
+	struct kmem_cache 	*conn_cachep;	/* SLAB cache for IPVS connections */
+	atomic_t 		conn_count;	/* counter for current IPVS connections */
+	atomic_t 		conn_no_cport_cnt; /* counter for no client port connections */
+	unsigned int 		conn_rnd;	/* random value for IPVS connection hash */
+	/* ip_vs_ctl */
+	struct ip_vs_stats 	*ctl_stats;	/* Statistics & estimator */
+	/*	Hash table: for virtual service lookups */
+	#define IP_VS_SVC_TAB_BITS 8
+	#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+	#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+	/* the service table hashed by <protocol, addr, port> */
+	struct list_head 	ctl_svc_table[IP_VS_SVC_TAB_SIZE];
+	/* the service table hashed by fwmark */
+	struct list_head 	ctl_fwm_table[IP_VS_SVC_TAB_SIZE];
+	/* Hash table: for real service lookups */
+	#define IP_VS_RTAB_BITS 4
+	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+	struct list_head 	ctl_rtable[IP_VS_RTAB_SIZE]; /* Hash table: for real service  */
+	struct list_head	ctl_dest_trash;	    /* Trash for destinations */
+	atomic_t 		ctl_ftpsvc_counter;
+	atomic_t 		ctl_nullsvc_counter;
+	/* sys-ctl struct */
+	struct ctl_table_header	*sysctl_hdr;
+	struct ctl_table	*sysctl_tbl;
+	/* sysctl variables */
+	int 			sysctl_amemthresh;
+	int 			sysctl_am_droprate;
+	int 			sysctl_drop_entry;
+	int 			sysctl_drop_packet;
+	int 			sysctl_secure_tcp;
+	int 			sysctl_cache_bypass;
+	int 			sysctl_expire_nodest_conn;
+	int 			sysctl_expire_quiescent_template;
+	int 			sysctl_sync_threshold[2];
+	int 			sysctl_nat_icmp_send;
+	/* ip_vs_proto */
+	#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
+	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+	/* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	#define	TCP_APP_TAB_BITS	4
+	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
+	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
+	struct list_head 	tcp_apps[TCP_APP_TAB_SIZE];
+	spinlock_t		tcp_app_lock;
+#endif
+	/* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	#define	UDP_APP_TAB_BITS	4
+	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
+	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
+	struct list_head 	udp_apps[UDP_APP_TAB_SIZE];
+	spinlock_t		udp_app_lock;
+#endif
+	/* ip_vs_proto_sctp */
+	#define SCTP_APP_TAB_BITS        4
+	#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
+	#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
+	/* Hash table for SCTP application incarnations	 */
+	struct list_head 	sctp_apps[SCTP_APP_TAB_SIZE];
+	spinlock_t		sctp_app_lock;
+
+	/* ip_vs_est */
+	struct list_head 	est_list;	/* estimator list */
+	spinlock_t		est_lock;
+	/* ip_vs_sync */
+	struct list_head	sync_queue;
+	spinlock_t		sync_lock;
+	struct ip_vs_sync_buff  *sync_buff;
+	spinlock_t		sync_buff_lock;
+	struct sockaddr_in 	sync_mcast_addr;
+	/* sync daemon tasks */
+	struct task_struct 	*sync_master_thread;
+	struct task_struct 	*sync_backup_thread;
+	/* the maximum length of sync (sending/receiving) message */
+	int 			sync_send_mesg_maxlen;
+	int 			sync_recv_mesg_maxlen;
+
+	volatile int 		sync_state;
+	volatile int 		master_syncid;
+	volatile int 		backup_syncid;
+	/* multicast interface name */
+	char 			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+	char 			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+};
+
+#endif /*__NETNS_IP_VS_H_*/

-- 
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply related

* [RFC PATCH 3/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:16 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano


This patch just contains ip_vs_conn.c
and does the normal
 - moving to vars to struct ipvs
 - adding per netns init and exit

proc_fs required some extra work with adding/chaning private data to get the net ptr.

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b71c69a..c47828f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -47,7 +47,7 @@

 /*
  * Connection hash size. Default is what was selected at compile time.
-*/
+ */
 int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
 module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
 MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
@@ -56,23 +56,12 @@ MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
 int ip_vs_conn_tab_size;
 int ip_vs_conn_tab_mask;

-/*
- *  Connection hash table: for input and output packets lookups of IPVS
- */
-static struct list_head *ip_vs_conn_tab;
-
-/*  SLAB cache for IPVS connections */
-static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
-
-/*  counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
-/*  counter for no client port connections */
-static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
-
 /* random value for IPVS connection hash */
 static unsigned int ip_vs_conn_rnd;

+/* cache name cnt */
+static atomic_t conn_cache_nr = ATOMIC_INIT(0);
+
 /*
  *  Fine locking granularity for big connection hash table
  */
@@ -153,7 +142,7 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
  *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
  *	returns bool success.
  */
-static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
+static inline int ip_vs_conn_hash(struct net *net, struct ip_vs_conn *cp)
 {
 	unsigned hash;
 	int ret;
@@ -168,7 +157,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	spin_lock(&cp->lock);

 	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
-		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
+		list_add(&cp->c_list, &net->ipvs->conn_tab[hash]);
 		cp->flags |= IP_VS_CONN_F_HASHED;
 		atomic_inc(&cp->refcnt);
 		ret = 1;
@@ -221,18 +210,20 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
  *	s_addr, s_port: pkt source address (foreign host)
  *	d_addr, d_port: pkt dest address (load balancer)
  */
-static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
+static inline struct ip_vs_conn *
+__ip_vs_conn_in_get(struct net *net, int af, int protocol,
+		    const union nf_inet_addr *s_addr, __be16 s_port,
+		    const union nf_inet_addr *d_addr, __be16 d_port)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = net->ipvs;

 	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);

 	ct_read_lock(hash);

-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+	list_for_each_entry(cp, &ipvs->conn_tab[hash], c_list) {
 		if (cp->af == af &&
 		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
 		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
@@ -251,16 +242,18 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
 	return NULL;
 }

-struct ip_vs_conn *ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
+struct ip_vs_conn *
+ip_vs_conn_in_get(struct net *net, int af, int protocol,
+		  const union nf_inet_addr *s_addr, __be16 s_port,
+		  const union nf_inet_addr *d_addr, __be16 d_port)
 {
 	struct ip_vs_conn *cp;

-	cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
-	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
-		cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
-					 d_port);
+	cp = __ip_vs_conn_in_get(net, af, protocol,
+				 s_addr, s_port, d_addr, d_port);
+	if (!cp && atomic_read(&net->ipvs->conn_no_cport_cnt))
+		cp = __ip_vs_conn_in_get(net, af, protocol,
+					 s_addr, 0, d_addr, d_port);

 	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
 		      ip_vs_proto_name(protocol),
@@ -278,35 +271,41 @@ ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
 			unsigned int proto_off, int inverse)
 {
 	__be16 _ports[2], *pptr;
+	struct net *net = dev_net(skb->dev);

 	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return NULL;

+	BUG_ON(!net);
 	if (likely(!inverse))
-		return ip_vs_conn_in_get(af, iph->protocol,
+		return ip_vs_conn_in_get(net, af, iph->protocol,
 					 &iph->saddr, pptr[0],
 					 &iph->daddr, pptr[1]);
 	else
-		return ip_vs_conn_in_get(af, iph->protocol,
+		return ip_vs_conn_in_get(net, af, iph->protocol,
 					 &iph->daddr, pptr[1],
 					 &iph->saddr, pptr[0]);
 }
 EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);

-/* Get reference to connection template */
-struct ip_vs_conn *ip_vs_ct_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
+/*
+ *  Get reference to connection template
+ */
+struct ip_vs_conn *
+ip_vs_ct_in_get(struct net *net, int af, int protocol,
+		const union nf_inet_addr *s_addr, __be16 s_port,
+		const union nf_inet_addr *d_addr, __be16 d_port)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = net->ipvs;

 	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);

 	ct_read_lock(hash);

-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+	list_for_each_entry(cp, &ipvs->conn_tab[hash], c_list) {
 		if (cp->af == af &&
 		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
 		    /* protocol should only be IPPROTO_IP if
@@ -341,12 +340,14 @@ struct ip_vs_conn *ip_vs_ct_in_get
  *	s_addr, s_port: pkt source address (inside host)
  *	d_addr, d_port: pkt dest address (foreign host)
  */
-struct ip_vs_conn *ip_vs_conn_out_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
+struct ip_vs_conn *
+ip_vs_conn_out_get(struct net *net, int af, int protocol,
+		   const union nf_inet_addr *s_addr, __be16 s_port,
+		   const union nf_inet_addr *d_addr, __be16 d_port)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp, *ret=NULL;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/*
 	 *	Check for "full" addressed entries
@@ -355,7 +356,7 @@ struct ip_vs_conn *ip_vs_conn_out_get

 	ct_read_lock(hash);

-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+	list_for_each_entry(cp, &ipvs->conn_tab[hash], c_list) {
 		if (cp->af == af &&
 		    ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
 		    ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
@@ -386,17 +387,19 @@ ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
 			 unsigned int proto_off, int inverse)
 {
 	__be16 _ports[2], *pptr;
+	struct net *net = dev_net(skb->dev);

 	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return NULL;

+	BUG_ON(!net);
 	if (likely(!inverse))
-		return ip_vs_conn_out_get(af, iph->protocol,
+		return ip_vs_conn_out_get(net, af, iph->protocol,
 					  &iph->saddr, pptr[0],
 					  &iph->daddr, pptr[1]);
 	else
-		return ip_vs_conn_out_get(af, iph->protocol,
+		return ip_vs_conn_out_get(net, af, iph->protocol,
 					  &iph->daddr, pptr[1],
 					  &iph->saddr, pptr[0]);
 }
@@ -408,7 +411,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
 void ip_vs_conn_put(struct ip_vs_conn *cp)
 {
 	unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
-		0 : cp->timeout;
+			   0 : cp->timeout;
 	mod_timer(&cp->timer, jiffies+t);

 	__ip_vs_conn_put(cp);
@@ -418,19 +421,19 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
 /*
  *	Fill a no_client_port connection with a client port number
  */
-void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
+void ip_vs_conn_fill_cport(struct net *net, struct ip_vs_conn *cp, __be16 cport)
 {
 	if (ip_vs_conn_unhash(cp)) {
 		spin_lock(&cp->lock);
 		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
-			atomic_dec(&ip_vs_conn_no_cport_cnt);
+			atomic_dec(&net->ipvs->conn_no_cport_cnt);
 			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
 			cp->cport = cport;
 		}
 		spin_unlock(&cp->lock);

 		/* hash on new dport */
-		ip_vs_conn_hash(cp);
+		ip_vs_conn_hash(net, cp);
 	}
 }

@@ -561,12 +564,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
  * Check if there is a destination for the connection, if so
  * bind the connection to the destination.
  */
-struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+struct ip_vs_dest *ip_vs_try_bind_dest(struct net *net, struct ip_vs_conn *cp)
 {
 	struct ip_vs_dest *dest;

 	if ((cp) && (!cp->dest)) {
-		dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+		dest = ip_vs_find_dest(net, cp->af, &cp->daddr, cp->dport,
 				       &cp->vaddr, cp->vport,
 				       cp->protocol);
 		ip_vs_bind_dest(cp, dest);
@@ -638,7 +641,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
  *	If available, return 1, otherwise invalidate this connection
  *	template and return 0.
  */
-int ip_vs_check_template(struct ip_vs_conn *ct)
+int ip_vs_check_template(struct net *net, struct ip_vs_conn *ct)
 {
 	struct ip_vs_dest *dest = ct->dest;

@@ -647,7 +650,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 	 */
 	if ((dest == NULL) ||
 	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-	    (sysctl_ip_vs_expire_quiescent_template &&
+	    (net->ipvs->sysctl_expire_quiescent_template &&
 	     (atomic_read(&dest->weight) == 0))) {
 		IP_VS_DBG_BUF(9, "check_template: dest not available for "
 			      "protocol %s s:%s:%d v:%s:%d "
@@ -668,7 +671,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 				ct->dport = htons(0xffff);
 				ct->vport = htons(0xffff);
 				ct->cport = 0;
-				ip_vs_conn_hash(ct);
+				ip_vs_conn_hash(net, ct);
 			}
 		}

@@ -720,16 +723,17 @@ static void ip_vs_conn_expire(unsigned long data)
 		if (unlikely(cp->app != NULL))
 			ip_vs_unbind_app(cp);
 		ip_vs_unbind_dest(cp);
+		BUG_ON(!cp->net);
 		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
-			atomic_dec(&ip_vs_conn_no_cport_cnt);
-		atomic_dec(&ip_vs_conn_count);
+			atomic_dec(&cp->net->ipvs->conn_no_cport_cnt);
+		atomic_dec(&cp->net->ipvs->conn_count);

-		kmem_cache_free(ip_vs_conn_cachep, cp);
+		kmem_cache_free(cp->net->ipvs->conn_cachep, cp);
 		return;
 	}

 	/* hash it back to the table */
-	ip_vs_conn_hash(cp);
+	ip_vs_conn_hash(cp->net, cp);

   expire_later:
 	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
@@ -748,18 +752,22 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)


 /*
- *	Create a new connection entry and hash it into the ip_vs_conn_tab
+ *	Create a new connection entry and hash it into the ip_vs_conn_tab,
+ * 	netns ptr will be stored in ip_vs_con here.
  */
 struct ip_vs_conn *
-ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+ip_vs_conn_new(struct net *net, int af, int proto,
+	       const union nf_inet_addr *caddr, __be16 cport,
 	       const union nf_inet_addr *vaddr, __be16 vport,
-	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-	       struct ip_vs_dest *dest)
+	       const union nf_inet_addr *daddr, __be16 dport,
+	       unsigned flags, struct ip_vs_dest *dest)
 {
 	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, proto);
+	struct ip_vs_protocol *pp;
+	struct netns_ipvs *ipvs = net->ipvs;

-	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+	cp = kmem_cache_zalloc(ipvs->conn_cachep, GFP_ATOMIC);
 	if (cp == NULL) {
 		IP_VS_ERR_RL("%s(): no memory\n", __func__);
 		return NULL;
@@ -790,9 +798,9 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 	atomic_set(&cp->n_control, 0);
 	atomic_set(&cp->in_pkts, 0);

-	atomic_inc(&ip_vs_conn_count);
+	atomic_inc(&ipvs->conn_count);
 	if (flags & IP_VS_CONN_F_NO_CPORT)
-		atomic_inc(&ip_vs_conn_no_cport_cnt);
+		atomic_inc(&ipvs->conn_no_cport_cnt);

 	/* Bind the connection with a destination server */
 	ip_vs_bind_dest(cp, dest);
@@ -808,12 +816,14 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 	else
 #endif
 		ip_vs_bind_xmit(cp);
-
-	if (unlikely(pp && atomic_read(&pp->appcnt)))
-		ip_vs_bind_app(cp, pp);
-
+	cp->net = net;	/* netns ptr  needed in timer */
+	if( pd ) {
+		pp = pd->pp;
+		if (unlikely(pp && atomic_read(&pd->appcnt)))
+			ip_vs_bind_app(net, cp, pp);
+	}
 	/* Hash it in the ip_vs_conn_tab finally */
-	ip_vs_conn_hash(cp);
+	ip_vs_conn_hash(net, cp);

 	return cp;
 }
@@ -824,16 +834,33 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
  */
 #ifdef CONFIG_PROC_FS

+struct ipvs_private {
+	struct seq_net_private p;
+	void *private;
+};
+
+static inline void ipvs_seq_priv_set(struct seq_file *seq, void *data)
+{
+	struct ipvs_private *ipriv=(struct ipvs_private *)seq->private;
+	ipriv->private = data;
+}
+static inline void *ipvs_seq_priv_get(struct seq_file *seq)
+{
+	return ((struct ipvs_private *)seq->private)->private;
+}
+
 static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 {
 	int idx;
 	struct ip_vs_conn *cp;
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net->ipvs;

 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		ct_read_lock_bh(idx);
-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+		list_for_each_entry(cp, &ipvs->conn_tab[idx], c_list) {
 			if (pos-- == 0) {
-				seq->private = &ip_vs_conn_tab[idx];
+				ipvs_seq_priv_set(seq, &ipvs->conn_tab[idx]);
 				return cp;
 			}
 		}
@@ -845,15 +872,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)

 static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	seq->private = NULL;
+	ipvs_seq_priv_set(seq, NULL);
 	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
 }
-
+ /* netns: conn_tab OK */
 static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ip_vs_conn *cp = v;
-	struct list_head *e, *l = seq->private;
+	struct list_head *e, *l = ipvs_seq_priv_get(seq);
 	int idx;
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net->ipvs;

 	++*pos;
 	if (v == SEQ_START_TOKEN)
@@ -863,27 +892,28 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if ((e = cp->c_list.next) != l)
 		return list_entry(e, struct ip_vs_conn, c_list);

-	idx = l - ip_vs_conn_tab;
+	idx = l - ipvs->conn_tab;
 	ct_read_unlock_bh(idx);

 	while (++idx < ip_vs_conn_tab_size) {
 		ct_read_lock_bh(idx);
-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-			seq->private = &ip_vs_conn_tab[idx];
+		list_for_each_entry(cp, &ipvs->conn_tab[idx], c_list) {
+			ipvs_seq_priv_set(seq, &ipvs->conn_tab[idx]);
 			return cp;
 		}
 		ct_read_unlock_bh(idx);
 	}
-	seq->private = NULL;
+	ipvs_seq_priv_set(seq, NULL);
 	return NULL;
 }
-
+/* netns: conn_tab OK */
 static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
 {
-	struct list_head *l = seq->private;
+	struct list_head *l = ipvs_seq_priv_get(seq);
+	struct net *net = seq_file_net(seq);

 	if (l)
-		ct_read_unlock_bh(l - ip_vs_conn_tab);
+		ct_read_unlock_bh(l - net->ipvs->conn_tab);
 }

 static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
@@ -928,7 +958,16 @@ static const struct seq_operations ip_vs_conn_seq_ops = {

 static int ip_vs_conn_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ip_vs_conn_seq_ops);
+	int ret;
+	struct ipvs_private *priv;
+
+	ret = seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+			   sizeof(struct ipvs_private));
+	if (!ret) {
+		priv = ((struct seq_file *)file->private_data)->private;
+		priv->private = NULL;
+	}
+	return ret;
 }

 static const struct file_operations ip_vs_conn_fops = {
@@ -936,7 +975,8 @@ static const struct file_operations ip_vs_conn_fops = {
 	.open    = ip_vs_conn_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_private,
+
 };

 static const char *ip_vs_origin_name(unsigned flags)
@@ -991,7 +1031,17 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {

 static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ip_vs_conn_sync_seq_ops);
+	int ret;
+	struct ipvs_private *ipriv;
+
+	ret = seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+			   sizeof(struct ipvs_private));
+	if (!ret) {
+		ipriv = ((struct seq_file *)file->private_data)->private;
+		ipriv->private = NULL;
+	}
+	return ret;
+//	return seq_open(file, &ip_vs_conn_sync_seq_ops);
 }

 static const struct file_operations ip_vs_conn_sync_fops = {
@@ -999,7 +1049,7 @@ static const struct file_operations ip_vs_conn_sync_fops = {
 	.open    = ip_vs_conn_sync_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_private,
 };

 #endif
@@ -1036,11 +1086,14 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
 	return 1;
 }

-/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+/* Called from keventd and must protect itself from softirqs
+ * netns: conn_tab OK
+ */
+void ip_vs_random_dropentry(struct net *net)
 {
 	int idx;
 	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/*
 	 * Randomly scan 1/32 of the whole table every second
@@ -1053,7 +1106,7 @@ void ip_vs_random_dropentry(void)
 		 */
 		ct_write_lock_bh(hash);

-		list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		list_for_each_entry(cp, &ipvs->conn_tab[hash], c_list) {
 			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
 				continue;
@@ -1091,11 +1144,13 @@ void ip_vs_random_dropentry(void)

 /*
  *      Flush all the connection entries in the ip_vs_conn_tab
+ * netns: conn_tab OK
  */
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
 {
 	int idx;
 	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = net->ipvs;

   flush_again:
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
@@ -1104,7 +1159,7 @@ static void ip_vs_conn_flush(void)
 		 */
 		ct_write_lock_bh(idx);

-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+		list_for_each_entry(cp, &ipvs->conn_tab[idx], c_list) {

 			IP_VS_DBG(4, "del connection\n");
 			ip_vs_conn_expire_now(cp);
@@ -1118,16 +1173,17 @@ static void ip_vs_conn_flush(void)

 	/* the counter may be not NULL, because maybe some conn entries
 	   are run by slow timer handler or unhashed but still referred */
-	if (atomic_read(&ip_vs_conn_count) != 0) {
+	if (atomic_read(&ipvs->conn_count) != 0) {
 		schedule();
 		goto flush_again;
 	}
 }


-int __init ip_vs_conn_init(void)
+int __net_init __ip_vs_conn_init(struct net *net)
 {
 	int idx;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/* Compute size and mask */
 	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1136,19 +1192,26 @@ int __init ip_vs_conn_init(void)
 	/*
 	 * Allocate the connection hash table and initialize its list heads
 	 */
-	ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size *
+	ipvs->conn_tab = vmalloc(ip_vs_conn_tab_size *
 				 sizeof(struct list_head));
-	if (!ip_vs_conn_tab)
+	if (!ipvs->conn_tab)
 		return -ENOMEM;

 	/* Allocate ip_vs_conn slab cache */
-	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+	/* Todo: find a better way to name the cache */
+	snprintf(ipvs->conn_cname, sizeof(ipvs->conn_cname)-1,
+			"ipvs_conn_%d", atomic_read(&conn_cache_nr) );
+	atomic_inc(&conn_cache_nr);
+
+	ipvs->conn_cachep = kmem_cache_create(ipvs->conn_cname,
 					      sizeof(struct ip_vs_conn), 0,
 					      SLAB_HWCACHE_ALIGN, NULL);
-	if (!ip_vs_conn_cachep) {
-		vfree(ip_vs_conn_tab);
+	if (!ipvs->conn_cachep) {
+		vfree(ipvs->conn_tab);
 		return -ENOMEM;
 	}
+	atomic_set(&ipvs->conn_count, 0);
+	atomic_set(&ipvs->conn_no_cport_cnt, 0);

 	pr_info("Connection hash table configured "
 		"(size=%d, memory=%ldKbytes)\n",
@@ -1158,31 +1221,46 @@ int __init ip_vs_conn_init(void)
 		  sizeof(struct ip_vs_conn));

 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
-		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
+		INIT_LIST_HEAD(&ipvs->conn_tab[idx]);
 	}

 	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}

-	proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-	proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
-
-	/* calculate the random value for connection hash */
-	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+	proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+	proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);

 	return 0;
 }
+/* Cleanup and release all netns related ... */
+static void __net_exit __ip_vs_conn_cleanup(struct net *net) {

+	/* flush all the connection entries first */
+	ip_vs_conn_flush(net);
+	/* Release the empty cache */
+	kmem_cache_destroy(net->ipvs->conn_cachep);
+	proc_net_remove(net, "ip_vs_conn");
+	proc_net_remove(net, "ip_vs_conn_sync");
+	vfree(net->ipvs->conn_tab);
+}
+static struct pernet_operations ipvs_conn_ops = {
+	.init = __ip_vs_conn_init,
+	.exit = __ip_vs_conn_cleanup,
+};

-void ip_vs_conn_cleanup(void)
+int __init ip_vs_conn_init(void)
 {
-	/* flush all the connection entries first */
-	ip_vs_conn_flush();
+	int rv;

-	/* Release the empty cache */
-	kmem_cache_destroy(ip_vs_conn_cachep);
-	proc_net_remove(&init_net, "ip_vs_conn");
-	proc_net_remove(&init_net, "ip_vs_conn_sync");
-	vfree(ip_vs_conn_tab);
+	rv = register_pernet_subsys(&ipvs_conn_ops);
+
+	/* calculate the random value for connection hash */
+	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+	return rv;
+}
+
+void ip_vs_conn_cleanup(void)
+{
+	unregister_pernet_subsys(&ipvs_conn_ops);
 }

-- 
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply related

* [RFC PATCH 4/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:16 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This patch just contains ip_vs_core.c

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 0c043b6..4fdc5cb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -52,7 +52,6 @@

 #include <net/ip_vs.h>

-
 EXPORT_SYMBOL(register_ip_vs_scheduler);
 EXPORT_SYMBOL(unregister_ip_vs_scheduler);
 EXPORT_SYMBOL(ip_vs_proto_name);
@@ -67,6 +66,8 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif

+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);

 /* ID used in ICMP lookups */
 #define icmp_id(icmph)          (((icmph)->un).echo.id)
@@ -107,6 +108,8 @@ static inline void
 ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest = cp->dest;
+	struct net *net = dev_net(skb->dev);
+
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		spin_lock(&dest->stats.lock);
 		dest->stats.ustats.inpkts++;
@@ -118,10 +121,10 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		dest->svc->stats.ustats.inbytes += skb->len;
 		spin_unlock(&dest->svc->stats.lock);

-		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.ustats.inpkts++;
-		ip_vs_stats.ustats.inbytes += skb->len;
-		spin_unlock(&ip_vs_stats.lock);
+		spin_lock(&net->ipvs->ctl_stats->lock);
+		net->ipvs->ctl_stats->ustats.inpkts++;
+		net->ipvs->ctl_stats->ustats.inbytes += skb->len;
+		spin_unlock(&net->ipvs->ctl_stats->lock);
 	}
 }

@@ -130,7 +133,10 @@ static inline void
 ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest = cp->dest;
+	struct net *net = dev_net(skb->dev);
+
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		struct ip_vs_stats *ctl_stats = net->ipvs->ctl_stats;
 		spin_lock(&dest->stats.lock);
 		dest->stats.ustats.outpkts++;
 		dest->stats.ustats.outbytes += skb->len;
@@ -141,16 +147,16 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		dest->svc->stats.ustats.outbytes += skb->len;
 		spin_unlock(&dest->svc->stats.lock);

-		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.ustats.outpkts++;
-		ip_vs_stats.ustats.outbytes += skb->len;
-		spin_unlock(&ip_vs_stats.lock);
+		spin_lock(&ctl_stats->lock);
+		net->ipvs->ctl_stats->ustats.outpkts++;
+		net->ipvs->ctl_stats->ustats.outbytes += skb->len;
+		spin_unlock(&ctl_stats->lock);
 	}
 }


 static inline void
-ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
+ip_vs_conn_stats(struct net *net, struct ip_vs_conn *cp, struct ip_vs_service *svc)
 {
 	spin_lock(&cp->dest->stats.lock);
 	cp->dest->stats.ustats.conns++;
@@ -160,9 +166,9 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 	svc->stats.ustats.conns++;
 	spin_unlock(&svc->stats.lock);

-	spin_lock(&ip_vs_stats.lock);
-	ip_vs_stats.ustats.conns++;
-	spin_unlock(&ip_vs_stats.lock);
+	spin_lock(&net->ipvs->ctl_stats->lock);
+	net->ipvs->ctl_stats->ustats.conns++;
+	spin_unlock(&net->ipvs->ctl_stats->lock);
 }


@@ -197,6 +203,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	__be16  flags;
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
+	struct net *net = dev_net(skb->dev);

 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);

@@ -230,13 +237,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	if (ports[1] == svc->port) {
 		/* Check if a template already exists */
 		if (svc->port != FTPPORT)
-			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
-					     &iph.daddr, ports[1]);
+			ct = ip_vs_ct_in_get(net, svc->af, iph.protocol, &snet,
+					     0, &iph.daddr, ports[1]);
 		else
-			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
-					     &iph.daddr, 0);
+			ct = ip_vs_ct_in_get(net, svc->af, iph.protocol, &snet,
+					     0, &iph.daddr, 0);

-		if (!ct || !ip_vs_check_template(ct)) {
+		if (!ct || !ip_vs_check_template(net, ct)) {
 			/*
 			 * No template found or the dest of the connection
 			 * template is not available.
@@ -254,7 +261,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			 * for ftp service.
 			 */
 			if (svc->port != FTPPORT)
-				ct = ip_vs_conn_new(svc->af, iph.protocol,
+				ct = ip_vs_conn_new(net, svc->af, iph.protocol,
 						    &snet, 0,
 						    &iph.daddr,
 						    ports[1],
@@ -262,7 +269,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
-				ct = ip_vs_conn_new(svc->af, iph.protocol,
+				ct = ip_vs_conn_new(net, svc->af, iph.protocol,
 						    &snet, 0,
 						    &iph.daddr, 0,
 						    &dest->addr, 0,
@@ -289,13 +296,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 				.ip = htonl(svc->fwmark)
 			};

-			ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
-					     &fwmark, 0);
+			ct = ip_vs_ct_in_get(net, svc->af, IPPROTO_IP, &snet,
+					     0, &fwmark, 0);
 		} else
-			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
-					     &iph.daddr, 0);
+			ct = ip_vs_ct_in_get(net, svc->af, iph.protocol, &snet,
+					     0, &iph.daddr, 0);

-		if (!ct || !ip_vs_check_template(ct)) {
+		if (!ct || !ip_vs_check_template(net, ct)) {
 			/*
 			 * If it is not persistent port zero, return NULL,
 			 * otherwise create a connection template.
@@ -317,14 +324,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 					.ip = htonl(svc->fwmark)
 				};

-				ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
+				ct = ip_vs_conn_new(net, svc->af, IPPROTO_IP,
 						    &snet, 0,
 						    &fwmark, 0,
 						    &dest->addr, 0,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			} else
-				ct = ip_vs_conn_new(svc->af, iph.protocol,
+				ct = ip_vs_conn_new(net, svc->af, iph.protocol,
 						    &snet, 0,
 						    &iph.daddr, 0,
 						    &dest->addr, 0,
@@ -348,7 +355,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/*
 	 *    Create a new connection according to the template
 	 */
-	cp = ip_vs_conn_new(svc->af, iph.protocol,
+	cp = ip_vs_conn_new(net, svc->af, iph.protocol,
 			    &iph.saddr, ports[0],
 			    &iph.daddr, ports[1],
 			    &dest->addr, dport,
@@ -365,7 +372,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	ip_vs_control_add(cp, ct);
 	ip_vs_conn_put(ct);

-	ip_vs_conn_stats(cp, svc);
+	ip_vs_conn_stats(net, cp, svc);
 	return cp;
 }

@@ -383,6 +390,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
 	__be16 _ports[2], *pptr, flags;
+	struct net *net = dev_net(skb->dev);

 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -415,11 +423,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
 		 && iph.protocol == IPPROTO_UDP)?
 		IP_VS_CONN_F_ONE_PACKET : 0;
-
 	/*
 	 *    Create a connection entry.
 	 */
-	cp = ip_vs_conn_new(svc->af, iph.protocol,
+	cp = ip_vs_conn_new(net, svc->af, iph.protocol,
 			    &iph.saddr, pptr[0],
 			    &iph.daddr, pptr[1],
 			    &dest->addr, dest->port ? dest->port : pptr[1],
@@ -436,7 +443,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		      IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
 		      cp->flags, atomic_read(&cp->refcnt));

-	ip_vs_conn_stats(cp, svc);
+	ip_vs_conn_stats(net, cp, svc);
 	return cp;
 }

@@ -452,6 +459,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	__be16 _ports[2], *pptr;
 	struct ip_vs_iphdr iph;
 	int unicast;
+	struct net *net = dev_net(skb->dev);
+
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);

 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -465,12 +474,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
 	else
 #endif
-		unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+		unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);

 	/* if it is fwmark-based service, the cache_bypass sysctl is up
 	   and the destination is a non-local unicast, then create
 	   a cache_bypass connection entry */
-	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+	if (net->ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
 		__u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -482,7 +491,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,

 		/* create a new connection entry */
 		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
-		cp = ip_vs_conn_new(svc->af, iph.protocol,
+		cp = ip_vs_conn_new(net, svc->af, iph.protocol,
 				    &iph.saddr, pptr[0],
 				    &iph.daddr, pptr[1],
 				    &daddr, 0,
@@ -954,6 +963,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
 	int af;
+	struct net *net = dev_net(skb->dev);

 	EnterFunction(11);

@@ -1013,7 +1023,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);

 	if (unlikely(!cp)) {
-		if (sysctl_ip_vs_nat_icmp_send &&
+		if (net->ipvs->sysctl_nat_icmp_send &&
 		    (pp->protocol == IPPROTO_TCP ||
 		     pp->protocol == IPPROTO_UDP ||
 		     pp->protocol == IPPROTO_SCTP)) {
@@ -1023,7 +1033,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 						  sizeof(_ports), _ports);
 			if (pptr == NULL)
 				return NF_ACCEPT;	/* Not for me */
-			if (ip_vs_lookup_real_service(af, iph.protocol,
+			if (ip_vs_lookup_real_service(net, af, iph.protocol,
 						      &iph.saddr,
 						      pptr[0])) {
 				/*
@@ -1283,6 +1293,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
 	int ret, restart, af, pkts;
+	struct net *net = dev_net(skb->dev);

 	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;

@@ -1354,7 +1365,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		/* the destination server is not available */

-		if (sysctl_ip_vs_expire_nodest_conn) {
+		if (net->ipvs->sysctl_expire_nodest_conn) {
 			/* try to expire the connection immediately */
 			ip_vs_conn_expire_now(cp);
 		}
@@ -1381,33 +1392,33 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 * encorage the standby servers to update the connections timeout
 	 */
 	pkts = atomic_add_return(1, &cp->in_pkts);
-	if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	if (af == AF_INET && (net->ipvs->sync_state & IP_VS_STATE_MASTER) &&
 	    cp->protocol == IPPROTO_SCTP) {
 		if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-			(pkts % sysctl_ip_vs_sync_threshold[1]
-			 == sysctl_ip_vs_sync_threshold[0])) ||
+			(pkts % net->ipvs->sysctl_sync_threshold[1]
+			 == net->ipvs->sysctl_sync_threshold[0])) ||
 				(cp->old_state != cp->state &&
 				 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
 				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
 				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
-			ip_vs_sync_conn(cp);
+			ip_vs_sync_conn(net, cp);
 			goto out;
 		}
 	}

 	/* Keep this block last: TCP and others with pp->num_states <= 1 */
 	else if (af == AF_INET &&
-	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	    (net->ipvs->sync_state & IP_VS_STATE_MASTER) &&
 	    (((cp->protocol != IPPROTO_TCP ||
 	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	      (pkts % sysctl_ip_vs_sync_threshold[1]
-	       == sysctl_ip_vs_sync_threshold[0])) ||
+	      (pkts % net->ipvs->sysctl_sync_threshold[1]
+	       == net->ipvs->sysctl_sync_threshold[0])) ||
 	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
 	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-		ip_vs_sync_conn(cp);
+		ip_vs_sync_conn(net,cp);
 out:
 	cp->old_state = cp->state;

@@ -1512,7 +1523,37 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	},
 #endif
 };
+/*
+ *	Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = 0;

+	ipvs = kzalloc(sizeof(struct netns_ipvs), GFP_ATOMIC);
+	if( ipvs == NULL ) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+	ipvs->inc = atomic_read(&ipvs_netns_cnt);
+	atomic_inc(&ipvs_netns_cnt);
+	IP_VS_DBG(10, "Creating new netns *net=%p *ipvs=%p size=%lu\n",
+		     net, ipvs, sizeof(struct netns_ipvs));
+	net->ipvs = ipvs;
+
+	return 0;
+}
+
+static void __net_exit __ip_vs_cleanup(struct net *net)
+{
+	IP_VS_DBG(10, "ipvs netns %p released\n", net);
+	kfree(net->ipvs);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+	.init = __ip_vs_init,
+	.exit = __ip_vs_cleanup,
+};

 /*
  *	Initialize IP Virtual Server
@@ -1521,8 +1562,11 @@ static int __init ip_vs_init(void)
 {
 	int ret;

-	ip_vs_estimator_init();
+	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	if( ret < 0 )
+		return ret;

+	ip_vs_estimator_init();
 	ret = ip_vs_control_init();
 	if (ret < 0) {
 		pr_err("can't setup control.\n");
@@ -1530,28 +1574,30 @@ static int __init ip_vs_init(void)
 	}

 	ip_vs_protocol_init();
-
 	ret = ip_vs_app_init();
 	if (ret < 0) {
 		pr_err("can't setup application helper.\n");
 		goto cleanup_protocol;
 	}
-
 	ret = ip_vs_conn_init();
 	if (ret < 0) {
 		pr_err("can't setup connection table.\n");
 		goto cleanup_app;
 	}
-
+	ret = ip_vs_sync_init();
+	if (ret < 0) {
+		pr_err("can't setup sync data.\n");
+		goto cleanup_conn;
+	}
 	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
 		pr_err("can't register hooks.\n");
-		goto cleanup_conn;
+		goto cleanup_sync;
 	}
-
 	pr_info("ipvs loaded.\n");
 	return ret;
-
+  cleanup_sync:
+  	ip_vs_sync_cleanup();
   cleanup_conn:
 	ip_vs_conn_cleanup();
   cleanup_app:
@@ -1561,17 +1607,20 @@ static int __init ip_vs_init(void)
 	ip_vs_control_cleanup();
   cleanup_estimator:
 	ip_vs_estimator_cleanup();
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
 }

 static void __exit ip_vs_cleanup(void)
 {
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	ip_vs_sync_cleanup();
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	pr_info("ipvs unloaded.\n");
 }


-- 
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply related

* [RFC PATCH 5/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:17 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This patch just contains ip_vs_ctl

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ca8ec8c..7e99cbc 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
 #include <linux/mutex.h>

 #include <net/net_namespace.h>
+#include <linux/nsproxy.h>
 #include <net/ip.h>
 #ifdef CONFIG_IP_VS_IPV6
 #include <net/ipv6.h>
@@ -77,20 +78,8 @@ static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
 /* number of virtual services */
 static int ip_vs_num_services = 0;

-/* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-
-
 #ifdef CONFIG_IP_VS_DEBUG
+/* sysctl variables, Not per netns level */
 static int sysctl_ip_vs_debug_level = 0;

 int ip_vs_get_debug_level(void)
@@ -101,7 +90,7 @@ int ip_vs_get_debug_level(void)

 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net, const struct in6_addr *addr)
 {
 	struct rt6_info *rt;
 	struct flowi fl = {
@@ -112,7 +101,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
 				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
 	};

-	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
 	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
 			return 1;

@@ -123,8 +112,9 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
  *	update_defense_level is called from keventd and from sysctl,
  *	so it needs to protect itself from softirqs
  */
-static void update_defense_level(void)
+static void update_defense_level(struct net *net)
 {
+	struct netns_ipvs *ipvs = net->ipvs;
 	struct sysinfo i;
 	static int old_secure_tcp = 0;
 	int availmem;
@@ -139,20 +129,20 @@ static void update_defense_level(void)
 	/* si_swapinfo(&i); */
 	/* availmem = availmem - (i.totalswap - i.freeswap); */

-	nomem = (availmem < sysctl_ip_vs_amemthresh);
+	nomem = (availmem < ipvs->sysctl_amemthresh);

 	local_bh_disable();

 	/* drop_entry */
 	spin_lock(&__ip_vs_dropentry_lock);
-	switch (sysctl_ip_vs_drop_entry) {
+	switch (ipvs->sysctl_drop_entry) {
 	case 0:
 		atomic_set(&ip_vs_dropentry, 0);
 		break;
 	case 1:
 		if (nomem) {
 			atomic_set(&ip_vs_dropentry, 1);
-			sysctl_ip_vs_drop_entry = 2;
+			ipvs->sysctl_drop_entry = 2;
 		} else {
 			atomic_set(&ip_vs_dropentry, 0);
 		}
@@ -162,7 +152,7 @@ static void update_defense_level(void)
 			atomic_set(&ip_vs_dropentry, 1);
 		} else {
 			atomic_set(&ip_vs_dropentry, 0);
-			sysctl_ip_vs_drop_entry = 1;
+			ipvs->sysctl_drop_entry = 1;
 		};
 		break;
 	case 3:
@@ -173,16 +163,16 @@ static void update_defense_level(void)

 	/* drop_packet */
 	spin_lock(&__ip_vs_droppacket_lock);
-	switch (sysctl_ip_vs_drop_packet) {
+	switch (ipvs->sysctl_drop_packet) {
 	case 0:
 		ip_vs_drop_rate = 0;
 		break;
 	case 1:
 		if (nomem) {
 			ip_vs_drop_rate = ip_vs_drop_counter
-				= sysctl_ip_vs_amemthresh /
-				(sysctl_ip_vs_amemthresh-availmem);
-			sysctl_ip_vs_drop_packet = 2;
+				= ipvs->sysctl_amemthresh /
+				(ipvs->sysctl_amemthresh-availmem);
+			ipvs->sysctl_drop_packet = 2;
 		} else {
 			ip_vs_drop_rate = 0;
 		}
@@ -190,22 +180,22 @@ static void update_defense_level(void)
 	case 2:
 		if (nomem) {
 			ip_vs_drop_rate = ip_vs_drop_counter
-				= sysctl_ip_vs_amemthresh /
-				(sysctl_ip_vs_amemthresh-availmem);
+				= ipvs->sysctl_amemthresh /
+				(ipvs->sysctl_amemthresh-availmem);
 		} else {
 			ip_vs_drop_rate = 0;
-			sysctl_ip_vs_drop_packet = 1;
+			ipvs->sysctl_drop_packet = 1;
 		}
 		break;
 	case 3:
-		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+		ip_vs_drop_rate = ipvs->sysctl_am_droprate;
 		break;
 	}
 	spin_unlock(&__ip_vs_droppacket_lock);

 	/* secure_tcp */
 	spin_lock(&ip_vs_securetcp_lock);
-	switch (sysctl_ip_vs_secure_tcp) {
+	switch (ipvs->sysctl_secure_tcp) {
 	case 0:
 		if (old_secure_tcp >= 2)
 			to_change = 0;
@@ -214,7 +204,7 @@ static void update_defense_level(void)
 		if (nomem) {
 			if (old_secure_tcp < 2)
 				to_change = 1;
-			sysctl_ip_vs_secure_tcp = 2;
+			ipvs->sysctl_secure_tcp = 2;
 		} else {
 			if (old_secure_tcp >= 2)
 				to_change = 0;
@@ -227,7 +217,7 @@ static void update_defense_level(void)
 		} else {
 			if (old_secure_tcp >= 2)
 				to_change = 0;
-			sysctl_ip_vs_secure_tcp = 1;
+			ipvs->sysctl_secure_tcp = 1;
 		}
 		break;
 	case 3:
@@ -235,9 +225,9 @@ static void update_defense_level(void)
 			to_change = 1;
 		break;
 	}
-	old_secure_tcp = sysctl_ip_vs_secure_tcp;
+	old_secure_tcp = ipvs->sysctl_secure_tcp;
 	if (to_change >= 0)
-		ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
+		ip_vs_protocol_timeout_change(ipvs->sysctl_secure_tcp>1);
 	spin_unlock(&ip_vs_securetcp_lock);

 	local_bh_enable();
@@ -253,9 +243,16 @@ static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);

 static void defense_work_handler(struct work_struct *work)
 {
-	update_defense_level();
-	if (atomic_read(&ip_vs_dropentry))
-		ip_vs_random_dropentry();
+	struct net *net;
+
+	for_each_net(net)
+		update_defense_level(net);
+
+	if (atomic_read(&ip_vs_dropentry)) {
+		/* Should another sched period be used to reduce peak load ?*/
+		for_each_net(net)
+			ip_vs_random_dropentry(net);
+	}

 	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
 }
@@ -272,40 +269,6 @@ ip_vs_use_count_dec(void)
 	module_put(THIS_MODULE);
 }

-
-/*
- *	Hash table: for virtual service lookups
- */
-#define IP_VS_SVC_TAB_BITS 8
-#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
-#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
-
-/* the service table hashed by <protocol, addr, port> */
-static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
-/* the service table hashed by fwmark */
-static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
-
-/*
- *	Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- *	Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- *	FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
-
 /*
  *	Returns hash value for virtual service
  */
@@ -336,10 +299,10 @@ static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)

 /*
  *	Hashes a service in the ip_vs_svc_table by <proto,addr,port>
- *	or in the ip_vs_svc_fwm_table by fwmark.
+ *	or in the net->ipvs->ctl_fwm_table by fwmark.
  *	Should be called with locked tables.
  */
-static int ip_vs_svc_hash(struct ip_vs_service *svc)
+static int ip_vs_svc_hash(struct net *net, struct ip_vs_service *svc)
 {
 	unsigned hash;

@@ -355,13 +318,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 		 */
 		hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
 					 svc->port);
-		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+		list_add(&svc->s_list, &net->ipvs->ctl_svc_table[hash]);
 	} else {
 		/*
-		 *  Hash it by fwmark in ip_vs_svc_fwm_table
+		 *  Hash it by fwmark in net->ipvs->ctl_fwm_table
 		 */
 		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
-		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+		list_add(&svc->f_list, &net->ipvs->ctl_fwm_table[hash]);
 	}

 	svc->flags |= IP_VS_SVC_F_HASHED;
@@ -372,7 +335,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)


 /*
- *	Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *	Unhashes a service from net->ipvs->ctl_svc_table/net->ipvs->ctl_fwm_table.
  *	Should be called with locked tables.
  */
 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -384,10 +347,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 	}

 	if (svc->fwmark == 0) {
-		/* Remove it from the ip_vs_svc_table table */
+		/* Remove it from the net->ipvs->ctl_svc_table table */
 		list_del(&svc->s_list);
 	} else {
-		/* Remove it from the ip_vs_svc_fwm_table table */
+		/* Remove it from the net->ipvs->ctl_fwm_table table */
 		list_del(&svc->f_list);
 	}

@@ -401,16 +364,17 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
  *	Get service by {proto,addr,port} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
-		    __be16 vport)
+__ip_vs_service_get(struct net *net, int af, __u16 protocol,
+		    const union nf_inet_addr *vaddr,  __be16 vport)
 {
 	unsigned hash;
 	struct ip_vs_service *svc;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/* Check for "full" addressed entries */
 	hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);

-	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+	list_for_each_entry(svc, &ipvs->ctl_svc_table[hash], s_list){
 		if ((svc->af == af)
 		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 		    && (svc->port == vport)
@@ -429,15 +393,16 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
  *	Get service by {fwmark} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_svc_fwm_get(int af, __u32 fwmark)
+__ip_vs_svc_fwm_get(struct net *net, int af, __u32 fwmark)
 {
 	unsigned hash;
 	struct ip_vs_service *svc;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/* Check for fwmark addressed entries */
 	hash = ip_vs_svc_fwm_hashkey(fwmark);

-	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+	list_for_each_entry(svc, &ipvs->ctl_fwm_table[hash], f_list) {
 		if (svc->fwmark == fwmark && svc->af == af) {
 			/* HIT */
 			atomic_inc(&svc->usecnt);
@@ -449,7 +414,7 @@ __ip_vs_svc_fwm_get(int af, __u32 fwmark)
 }

 struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		  const union nf_inet_addr *vaddr, __be16 vport)
 {
 	struct ip_vs_service *svc;
@@ -459,32 +424,32 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
 	/*
 	 *	Check the table hashed by fwmark first
 	 */
-	if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
+	if (fwmark && (svc = __ip_vs_svc_fwm_get(net, af, fwmark)))
 		goto out;

 	/*
 	 *	Check the table hashed by <protocol,addr,port>
 	 *	for "full" addressed entries
 	 */
-	svc = __ip_vs_service_get(af, protocol, vaddr, vport);
+	svc = __ip_vs_service_get(net, af, protocol, vaddr, vport);

 	if (svc == NULL
 	    && protocol == IPPROTO_TCP
-	    && atomic_read(&ip_vs_ftpsvc_counter)
+	    && atomic_read(&net->ipvs->ctl_ftpsvc_counter)
 	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 		/*
 		 * Check if ftp service entry exists, the packet
 		 * might belong to FTP data connections.
 		 */
-		svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
+		svc = __ip_vs_service_get(net, af, protocol, vaddr, FTPPORT);
 	}

 	if (svc == NULL
-	    && atomic_read(&ip_vs_nullsvc_counter)) {
+	    && atomic_read(&net->ipvs->ctl_nullsvc_counter)) {
 		/*
 		 * Check if the catch-all port (port zero) exists
 		 */
-		svc = __ip_vs_service_get(af, protocol, vaddr, 0);
+		svc = __ip_vs_service_get(net, af, protocol, vaddr, 0);
 	}

   out:
@@ -538,10 +503,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
 }

 /*
- *	Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *	Hashes ip_vs_dest in net->ipvs->ctl_rtable by <proto,addr,port>.
  *	should be called with locked tables.
  */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct net *net, struct ip_vs_dest *dest)
 {
 	unsigned hash;

@@ -555,19 +520,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
 	 */
 	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);

-	list_add(&dest->d_list, &ip_vs_rtable[hash]);
+	list_add(&dest->d_list, &net->ipvs->ctl_rtable[hash]);

 	return 1;
 }

 /*
- *	UNhashes ip_vs_dest from ip_vs_rtable.
+ *	UNhashes ip_vs_dest from net->ipvs->ctl_rtable.
  *	should be called with locked tables.
  */
 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 {
 	/*
-	 * Remove it from the ip_vs_rtable table.
+	 * Remove it from the net->ipvs->ctl_rtable table.
 	 */
 	if (!list_empty(&dest->d_list)) {
 		list_del(&dest->d_list);
@@ -581,12 +546,13 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  *	Lookup real service by <proto,addr,port> in the real service table.
  */
 struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 			  const union nf_inet_addr *daddr,
 			  __be16 dport)
 {
 	unsigned hash;
 	struct ip_vs_dest *dest;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/*
 	 *	Check for "full" addressed entries
@@ -595,7 +561,7 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
 	hash = ip_vs_rs_hashkey(af, daddr, dport);

 	read_lock(&__ip_vs_rs_lock);
-	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+	list_for_each_entry(dest, &ipvs->ctl_rtable[hash], d_list) {
 		if ((dest->af == af)
 		    && ip_vs_addr_equal(af, &dest->addr, daddr)
 		    && (dest->port == dport)
@@ -645,15 +611,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  * ip_vs_lookup_real_service() looked promissing, but
  * seems not working as expected.
  */
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
-				   __be16 dport,
-				   const union nf_inet_addr *vaddr,
-				   __be16 vport, __u16 protocol)
+struct ip_vs_dest *
+ip_vs_find_dest(struct net *net, int af,
+		const union nf_inet_addr *daddr, __be16 dport,
+		const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_service *svc;

-	svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+	svc = ip_vs_service_get(net, af, 0, protocol, vaddr, vport);
 	if (!svc)
 		return NULL;
 	dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -674,15 +640,16 @@ struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
  *  scheduling.
  */
 static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
-		     __be16 dport)
+ip_vs_trash_get_dest(struct net *net, struct ip_vs_service *svc,
+		     const union nf_inet_addr *daddr, __be16 dport)
 {
 	struct ip_vs_dest *dest, *nxt;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/*
 	 * Find the destination in trash
 	 */
-	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+	list_for_each_entry_safe(dest, nxt, &ipvs->ctl_dest_trash, n_list) {
 		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 			      "dest->refcnt=%d\n",
 			      dest->vfwmark,
@@ -730,11 +697,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  *  are expired, and the refcnt of each destination in the trash must
  *  be 1, so we simply release them here.
  */
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
 {
 	struct ip_vs_dest *dest, *nxt;
+	struct netns_ipvs *ipvs = net->ipvs;

-	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+	list_for_each_entry_safe(dest, nxt, &ipvs->ctl_dest_trash, n_list) {
 		list_del(&dest->n_list);
 		ip_vs_dst_reset(dest);
 		__ip_vs_unbind_svc(dest);
@@ -743,8 +711,7 @@ static void ip_vs_trash_cleanup(void)
 }


-static void
-ip_vs_zero_stats(struct ip_vs_stats *stats)
+static void ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
 	spin_lock_bh(&stats->lock);

@@ -758,7 +725,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
  *	Update a destination in the given service
  */
 static void
-__ip_vs_update_dest(struct ip_vs_service *svc,
+__ip_vs_update_dest(struct net *net, struct ip_vs_service *svc,
 		    struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
 {
 	int conn_flags;
@@ -770,13 +737,13 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
 	/* check if local node and update the flags */
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6) {
-		if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
+		if (__ip_vs_addr_is_local_v6(net, &udest->addr.in6)) {
 			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
 				| IP_VS_CONN_F_LOCALNODE;
 		}
 	} else
 #endif
-		if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
+		if (inet_addr_type(net, udest->addr.ip) == RTN_LOCAL) {
 			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
 				| IP_VS_CONN_F_LOCALNODE;
 		}
@@ -786,11 +753,11 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 	} else {
 		/*
-		 *    Put the real service in ip_vs_rtable if not present.
-		 *    For now only for NAT!
+		 * Put the real service in net->ipvs->ctl_rtable if not present.
+		 * For now only for NAT!
 		 */
 		write_lock_bh(&__ip_vs_rs_lock);
-		ip_vs_rs_hash(dest);
+		ip_vs_rs_hash(net, dest);
 		write_unlock_bh(&__ip_vs_rs_lock);
 	}
 	atomic_set(&dest->conn_flags, conn_flags);
@@ -820,8 +787,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
  *	Create a destination for the given service
  */
 static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
-	       struct ip_vs_dest **dest_p)
+ip_vs_new_dest(struct net *net, struct ip_vs_service *svc,
+	       struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p)
 {
 	struct ip_vs_dest *dest;
 	unsigned atype;
@@ -833,12 +800,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 		atype = ipv6_addr_type(&udest->addr.in6);
 		if ((!(atype & IPV6_ADDR_UNICAST) ||
 			atype & IPV6_ADDR_LINKLOCAL) &&
-			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
+			!__ip_vs_addr_is_local_v6(net, &udest->addr.in6))
 			return -EINVAL;
 	} else
 #endif
 	{
-		atype = inet_addr_type(&init_net, udest->addr.ip);
+		atype = inet_addr_type(net, udest->addr.ip);
 		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 			return -EINVAL;
 	}
@@ -865,8 +832,8 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 	INIT_LIST_HEAD(&dest->d_list);
 	spin_lock_init(&dest->dst_lock);
 	spin_lock_init(&dest->stats.lock);
-	__ip_vs_update_dest(svc, dest, udest);
-	ip_vs_new_estimator(&dest->stats);
+	__ip_vs_update_dest(net, svc, dest, udest);
+	ip_vs_new_estimator(net, &dest->stats);

 	*dest_p = dest;

@@ -878,8 +845,8 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 /*
  *	Add a destination into an existing service
  */
-static int
-ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+static int ip_vs_add_dest(struct net *net, struct ip_vs_service *svc,
+			  struct ip_vs_dest_user_kern *udest)
 {
 	struct ip_vs_dest *dest;
 	union nf_inet_addr daddr;
@@ -915,7 +882,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	 * Check if the dest already exists in the trash and
 	 * is from the same service
 	 */
-	dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+	dest = ip_vs_trash_get_dest(net, svc, &daddr, dport);

 	if (dest != NULL) {
 		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
@@ -926,14 +893,14 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
 			      ntohs(dest->vport));

-		__ip_vs_update_dest(svc, dest, udest);
+		__ip_vs_update_dest(net, svc, dest, udest);

 		/*
 		 * Get the destination from the trash
 		 */
 		list_del(&dest->n_list);

-		ip_vs_new_estimator(&dest->stats);
+		ip_vs_new_estimator(net, &dest->stats);

 		write_lock_bh(&__ip_vs_svc_lock);

@@ -956,7 +923,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	/*
 	 * Allocate and initialize the dest structure
 	 */
-	ret = ip_vs_new_dest(svc, udest, &dest);
+	ret = ip_vs_new_dest(net, svc, udest, &dest);
 	if (ret) {
 		return ret;
 	}
@@ -991,8 +958,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 /*
  *	Edit a destination in the given service
  */
-static int
-ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+static int ip_vs_edit_dest(struct net *net, struct ip_vs_service *svc,
+			   struct ip_vs_dest_user_kern *udest)
 {
 	struct ip_vs_dest *dest;
 	union nf_inet_addr daddr;
@@ -1023,7 +990,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 		return -ENOENT;
 	}

-	__ip_vs_update_dest(svc, dest, udest);
+	__ip_vs_update_dest(net, svc, dest, udest);

 	write_lock_bh(&__ip_vs_svc_lock);

@@ -1045,9 +1012,9 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 /*
  *	Delete a destination (must be already unlinked from the service)
  */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
-	ip_vs_kill_estimator(&dest->stats);
+	ip_vs_kill_estimator(net, &dest->stats);

 	/*
 	 *  Remove it from the d-linked list with the real services.
@@ -1076,7 +1043,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
 			      ntohs(dest->port),
 			      atomic_read(&dest->refcnt));
-		list_add(&dest->n_list, &ip_vs_dest_trash);
+		list_add(&dest->n_list, &net->ipvs->ctl_dest_trash);
 		atomic_inc(&dest->refcnt);
 	}
 }
@@ -1108,8 +1075,8 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
 /*
  *	Delete a destination server in the given service
  */
-static int
-ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+static int ip_vs_del_dest(struct net *net, struct ip_vs_service *svc,
+			  struct ip_vs_dest_user_kern *udest)
 {
 	struct ip_vs_dest *dest;
 	__be16 dport = udest->port;
@@ -1140,7 +1107,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	/*
 	 *	Delete the destination
 	 */
-	__ip_vs_del_dest(dest);
+	__ip_vs_del_dest(net, dest);

 	LeaveFunction(2);

@@ -1152,7 +1119,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  *	Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		  struct ip_vs_service **svc_p)
 {
 	int ret = 0;
@@ -1209,11 +1176,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,

 	/* Update the virtual service counters */
 	if (svc->port == FTPPORT)
-		atomic_inc(&ip_vs_ftpsvc_counter);
+		atomic_inc(&net->ipvs->ctl_ftpsvc_counter);
 	else if (svc->port == 0)
-		atomic_inc(&ip_vs_nullsvc_counter);
+		atomic_inc(&net->ipvs->ctl_nullsvc_counter);

-	ip_vs_new_estimator(&svc->stats);
+	ip_vs_new_estimator(net, &svc->stats);

 	/* Count only IPv4 services for old get/setsockopt interface */
 	if (svc->af == AF_INET)
@@ -1221,7 +1188,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,

 	/* Hash the service into the service table */
 	write_lock_bh(&__ip_vs_svc_lock);
-	ip_vs_svc_hash(svc);
+	ip_vs_svc_hash(net, svc);
 	write_unlock_bh(&__ip_vs_svc_lock);

 	*svc_p = svc;
@@ -1336,7 +1303,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
  *	- The service must be unlinked, unlocked and not referenced!
  *	- We are called under _bh lock
  */
-static void __ip_vs_del_service(struct ip_vs_service *svc)
+static void __ip_vs_del_service(struct net *net, struct ip_vs_service *svc)
 {
 	struct ip_vs_dest *dest, *nxt;
 	struct ip_vs_scheduler *old_sched;
@@ -1345,7 +1312,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	if (svc->af == AF_INET)
 		ip_vs_num_services--;

-	ip_vs_kill_estimator(&svc->stats);
+	ip_vs_kill_estimator(net, &svc->stats);

 	/* Unbind scheduler */
 	old_sched = svc->scheduler;
@@ -1364,16 +1331,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	 */
 	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
 		__ip_vs_unlink_dest(svc, dest, 0);
-		__ip_vs_del_dest(dest);
+		__ip_vs_del_dest(net, dest);
 	}

 	/*
 	 *    Update the virtual service counters
 	 */
 	if (svc->port == FTPPORT)
-		atomic_dec(&ip_vs_ftpsvc_counter);
+		atomic_dec(&net->ipvs->ctl_ftpsvc_counter);
 	else if (svc->port == 0)
-		atomic_dec(&ip_vs_nullsvc_counter);
+		atomic_dec(&net->ipvs->ctl_nullsvc_counter);

 	/*
 	 *    Free the service if nobody refers to it
@@ -1388,7 +1355,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *	Delete a service from the service list
  */
-static int ip_vs_del_service(struct ip_vs_service *svc)
+static int ip_vs_del_service(struct net *net, struct ip_vs_service *svc)
 {
 	if (svc == NULL)
 		return -EEXIST;
@@ -1405,7 +1372,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 	 */
 	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);

-	__ip_vs_del_service(svc);
+	__ip_vs_del_service(net, svc);

 	write_unlock_bh(&__ip_vs_svc_lock);

@@ -1416,23 +1383,26 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *	Flush all the virtual services
  */
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
 {
 	int idx;
 	struct ip_vs_service *svc, *nxt;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/*
 	 * Flush the service table hashed by <protocol,addr,port>
 	 */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
+		list_for_each_entry_safe(svc, nxt,
+				         &ipvs->ctl_svc_table[idx],
+				         s_list) {
 			write_lock_bh(&__ip_vs_svc_lock);
 			ip_vs_svc_unhash(svc);
 			/*
 			 * Wait until all the svc users go away.
 			 */
 			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-			__ip_vs_del_service(svc);
+			__ip_vs_del_service(net, svc);
 			write_unlock_bh(&__ip_vs_svc_lock);
 		}
 	}
@@ -1442,14 +1412,14 @@ static int ip_vs_flush(void)
 	 */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry_safe(svc, nxt,
-					 &ip_vs_svc_fwm_table[idx], f_list) {
+					 &ipvs->ctl_fwm_table[idx], f_list) {
 			write_lock_bh(&__ip_vs_svc_lock);
 			ip_vs_svc_unhash(svc);
 			/*
 			 * Wait until all the svc users go away.
 			 */
 			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-			__ip_vs_del_service(svc);
+			__ip_vs_del_service(net, svc);
 			write_unlock_bh(&__ip_vs_svc_lock);
 		}
 	}
@@ -1474,24 +1444,25 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
 	return 0;
 }

-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
 {
 	int idx;
 	struct ip_vs_service *svc;
+	struct netns_ipvs *ipvs = net->ipvs;

 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+		list_for_each_entry(svc, &ipvs->ctl_svc_table[idx], s_list) {
 			ip_vs_zero_service(svc);
 		}
 	}

 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+		list_for_each_entry(svc, &ipvs->ctl_fwm_table[idx], f_list) {
 			ip_vs_zero_service(svc);
 		}
 	}

-	ip_vs_zero_stats(&ip_vs_stats);
+	ip_vs_zero_stats(ipvs->ctl_stats);
 	return 0;
 }

@@ -1500,6 +1471,7 @@ static int
 proc_do_defense_mode(ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct net *net = current->nsproxy->net_ns;
 	int *valp = table->data;
 	int val = *valp;
 	int rc;
@@ -1510,7 +1482,7 @@ proc_do_defense_mode(ctl_table *table, int write,
 			/* Restore the correct value */
 			*valp = val;
 		} else {
-			update_defense_level();
+			update_defense_level(net);
 		}
 	}
 	return rc;
@@ -1539,53 +1511,71 @@ proc_do_sync_threshold(ctl_table *table, int write,

 /*
  *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ *	Do not change order or insert new entries without
+ *	align with netns init in __ip_vs_control_init()
  */

 static struct ctl_table vs_vars[] = {
 	{
 		.procname	= "amemthresh",
-		.data		= &sysctl_ip_vs_amemthresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_IP_VS_DEBUG
-	{
-		.procname	= "debug_level",
-		.data		= &sysctl_ip_vs_debug_level,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-#endif
 	{
 		.procname	= "am_droprate",
-		.data		= &sysctl_ip_vs_am_droprate,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "drop_entry",
-		.data		= &sysctl_ip_vs_drop_entry,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
 	{
 		.procname	= "drop_packet",
-		.data		= &sysctl_ip_vs_drop_packet,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
 	{
 		.procname	= "secure_tcp",
-		.data		= &sysctl_ip_vs_secure_tcp,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
+	{
+		.procname	= "cache_bypass",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expire_nodest_conn",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expire_quiescent_template",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sync_threshold",
+		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
+		.mode		= 0644,
+		.proc_handler	= proc_do_sync_threshold,
+	},
+	{
+		.procname	= "nat_icmp_send",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #if 0
 	{
 		.procname	= "timeout_established",
@@ -1672,41 +1662,15 @@ static struct ctl_table vs_vars[] = {
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 #endif
+#ifdef CONFIG_IP_VS_DEBUG
 	{
-		.procname	= "cache_bypass",
-		.data		= &sysctl_ip_vs_cache_bypass,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "expire_nodest_conn",
-		.data		= &sysctl_ip_vs_expire_nodest_conn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "expire_quiescent_template",
-		.data		= &sysctl_ip_vs_expire_quiescent_template,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "sync_threshold",
-		.data		= &sysctl_ip_vs_sync_threshold,
-		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
-		.mode		= 0644,
-		.proc_handler	= proc_do_sync_threshold,
-	},
-	{
-		.procname	= "nat_icmp_send",
-		.data		= &sysctl_ip_vs_nat_icmp_send,
+		.procname	= "debug_level",
+		.data		= &sysctl_ip_vs_debug_level,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#endif
 	{ }
 };

@@ -1718,11 +1682,10 @@ const struct ctl_path net_vs_ctl_path[] = {
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);

-static struct ctl_table_header * sysctl_header;
-
 #ifdef CONFIG_PROC_FS

 struct ip_vs_iter {
+	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
 	struct list_head *table;
 	int bucket;
 };
@@ -1752,12 +1715,15 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 	struct ip_vs_iter *iter = seq->private;
 	int idx;
 	struct ip_vs_service *svc;
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net->ipvs;

+	BUG_ON(!net);
 	/* look in hash by protocol */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+		list_for_each_entry(svc, &ipvs->ctl_svc_table[idx], s_list) {
 			if (pos-- == 0){
-				iter->table = ip_vs_svc_table;
+				iter->table = ipvs->ctl_svc_table;
 				iter->bucket = idx;
 				return svc;
 			}
@@ -1766,9 +1732,9 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)

 	/* keep looking in fwmark */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+		list_for_each_entry(svc, &ipvs->ctl_fwm_table[idx], f_list) {
 			if (pos-- == 0) {
-				iter->table = ip_vs_svc_fwm_table;
+				iter->table = ipvs->ctl_fwm_table;
 				iter->bucket = idx;
 				return svc;
 			}
@@ -1792,7 +1758,10 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct list_head *e;
 	struct ip_vs_iter *iter;
 	struct ip_vs_service *svc;
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net->ipvs;

+	BUG_ON(!net);
 	++*pos;
 	if (v == SEQ_START_TOKEN)
 		return ip_vs_info_array(seq,0);
@@ -1800,31 +1769,31 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	svc = v;
 	iter = seq->private;

-	if (iter->table == ip_vs_svc_table) {
+	if (iter->table == ipvs->ctl_svc_table) {
 		/* next service in table hashed by protocol */
-		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
+		if ((e = svc->s_list.next) != &ipvs->ctl_svc_table[iter->bucket])
 			return list_entry(e, struct ip_vs_service, s_list);


 		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
-			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
+			list_for_each_entry(svc, &ipvs->ctl_svc_table[iter->bucket],
 					    s_list) {
 				return svc;
 			}
 		}

-		iter->table = ip_vs_svc_fwm_table;
+		iter->table = ipvs->ctl_fwm_table;
 		iter->bucket = -1;
 		goto scan_fwmark;
 	}

 	/* next service in hashed by fwmark */
-	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
+	if ((e = svc->f_list.next) != &ipvs->ctl_fwm_table[iter->bucket])
 		return list_entry(e, struct ip_vs_service, f_list);

  scan_fwmark:
 	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
+		list_for_each_entry(svc, &ipvs->ctl_fwm_table[iter->bucket],
 				    f_list)
 			return svc;
 	}
@@ -1853,8 +1822,10 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 		const struct ip_vs_service *svc = v;
 		const struct ip_vs_iter *iter = seq->private;
 		const struct ip_vs_dest *dest;
+		struct net *net = seq_file_net(seq);

-		if (iter->table == ip_vs_svc_table) {
+		BUG_ON(!net);
+		if (iter->table == net->ipvs->ctl_svc_table) {
 #ifdef CONFIG_IP_VS_IPV6
 			if (svc->af == AF_INET6)
 				seq_printf(seq, "%s  [%pI6]:%04X %s ",
@@ -1921,7 +1892,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {

 static int ip_vs_info_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &ip_vs_info_seq_ops,
+	return seq_open_net(inode, file, &ip_vs_info_seq_ops,
 			sizeof(struct ip_vs_iter));
 }

@@ -1935,13 +1906,12 @@ static const struct file_operations ip_vs_info_fops = {

 #endif

-struct ip_vs_stats ip_vs_stats = {
-	.lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
 #ifdef CONFIG_PROC_FS
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
+	/* single_open_net returns net in private */
+	struct net *net = (struct net *)seq->private;
+	struct ip_vs_stats *ctl_stats = net->ipvs->ctl_stats;

 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
@@ -1949,29 +1919,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "   Conns  Packets  Packets            Bytes            Bytes\n");

-	spin_lock_bh(&ip_vs_stats.lock);
-	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
-		   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
-		   (unsigned long long) ip_vs_stats.ustats.inbytes,
-		   (unsigned long long) ip_vs_stats.ustats.outbytes);
+	spin_lock_bh(&ctl_stats->lock);
+	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ctl_stats->ustats.conns,
+		   ctl_stats->ustats.inpkts, ctl_stats->ustats.outpkts,
+		   (unsigned long long) ctl_stats->ustats.inbytes,
+		   (unsigned long long) ctl_stats->ustats.outbytes);

 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
 		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
 	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-			ip_vs_stats.ustats.cps,
-			ip_vs_stats.ustats.inpps,
-			ip_vs_stats.ustats.outpps,
-			ip_vs_stats.ustats.inbps,
-			ip_vs_stats.ustats.outbps);
-	spin_unlock_bh(&ip_vs_stats.lock);
+			ctl_stats->ustats.cps,
+			ctl_stats->ustats.inpps,
+			ctl_stats->ustats.outpps,
+			ctl_stats->ustats.inbps,
+			ctl_stats->ustats.outbps);
+	spin_unlock_bh(&ctl_stats->lock);

 	return 0;
 }

 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ip_vs_stats_show, NULL);
+	return single_open_net(inode, file, ip_vs_stats_show);
 }

 static const struct file_operations ip_vs_stats_fops = {
@@ -1979,7 +1949,7 @@ static const struct file_operations ip_vs_stats_fops = {
 	.open = ip_vs_stats_seq_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = single_release,
+	.release = single_release_net,
 };

 #endif
@@ -1987,29 +1957,32 @@ static const struct file_operations ip_vs_stats_fops = {
 /*
  *	Set timeout values for tcp tcpfin udp in the timeout_table.
  */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
 {
+	struct ip_vs_proto_data *pd;
 	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
 		  u->tcp_timeout,
 		  u->tcp_fin_timeout,
 		  u->udp_timeout);

 #ifdef CONFIG_IP_VS_PROTO_TCP
-	if (u->tcp_timeout) {
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
-			= u->tcp_timeout * HZ;
+	pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+	if (u->tcp_timeout && pd) {
+		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
+		        = u->tcp_timeout * HZ;
 	}

-	if (u->tcp_fin_timeout) {
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+	if (u->tcp_fin_timeout && pd) {
+		pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
 			= u->tcp_fin_timeout * HZ;
 	}
 #endif

 #ifdef CONFIG_IP_VS_PROTO_UDP
 	if (u->udp_timeout) {
-		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
-			= u->udp_timeout * HZ;
+		if( (pd = ip_vs_proto_data_get(net, IPPROTO_UDP)) )
+			pd->timeout_table[IP_VS_UDP_S_NORMAL]
+			        = u->udp_timeout * HZ;
 	}
 #endif
 	return 0;
@@ -2076,7 +2049,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	struct ip_vs_service *svc;
 	struct ip_vs_dest_user *udest_compat;
 	struct ip_vs_dest_user_kern udest;
+	struct net *net = sock_net(sk);

+	BUG_ON(!net);
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;

@@ -2103,19 +2078,19 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)

 	if (cmd == IP_VS_SO_SET_FLUSH) {
 		/* Flush the virtual service */
-		ret = ip_vs_flush();
+		ret = ip_vs_flush(net);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
 		/* Set timeout values for (tcp tcpfin udp) */
-		ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+		ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-		ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+		ret = start_sync_thread(net, dm->state, dm->mcast_ifn, dm->syncid);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-		ret = stop_sync_thread(dm->state);
+		ret = stop_sync_thread(net, dm->state);
 		goto out_unlock;
 	}

@@ -2130,7 +2105,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	if (cmd == IP_VS_SO_SET_ZERO) {
 		/* if no service address is set, zero counters in all */
 		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-			ret = ip_vs_zero_all();
+			ret = ip_vs_zero_all(net);
 			goto out_unlock;
 		}
 	}
@@ -2147,15 +2122,15 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)

 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
 	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+		svc = __ip_vs_service_get(net, usvc.af, usvc.protocol,
 					  &usvc.addr, usvc.port);
 	else
-		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+		svc = __ip_vs_svc_fwm_get(net, usvc.af, usvc.fwmark);

 	if (cmd != IP_VS_SO_SET_ADD
 	    && (svc == NULL || svc->protocol != usvc.protocol)) {
 		ret = -ESRCH;
-		goto out_drop_service;
+		goto out_unlock;
 	}

 	switch (cmd) {
@@ -2163,13 +2138,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		if (svc != NULL)
 			ret = -EEXIST;
 		else
-			ret = ip_vs_add_service(&usvc, &svc);
+			ret = ip_vs_add_service(net, &usvc, &svc);
 		break;
 	case IP_VS_SO_SET_EDIT:
 		ret = ip_vs_edit_service(svc, &usvc);
 		break;
 	case IP_VS_SO_SET_DEL:
-		ret = ip_vs_del_service(svc);
+		ret = ip_vs_del_service(net, svc);
 		if (!ret)
 			goto out_unlock;
 		break;
@@ -2177,19 +2152,18 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		ret = ip_vs_zero_service(svc);
 		break;
 	case IP_VS_SO_SET_ADDDEST:
-		ret = ip_vs_add_dest(svc, &udest);
+		ret = ip_vs_add_dest(net, svc, &udest);
 		break;
 	case IP_VS_SO_SET_EDITDEST:
-		ret = ip_vs_edit_dest(svc, &udest);
+		ret = ip_vs_edit_dest(net, svc, &udest);
 		break;
 	case IP_VS_SO_SET_DELDEST:
-		ret = ip_vs_del_dest(svc, &udest);
+		ret = ip_vs_del_dest(net, svc, &udest);
 		break;
 	default:
 		ret = -EINVAL;
 	}

-out_drop_service:
 	if (svc)
 		ip_vs_service_put(svc);

@@ -2207,7 +2181,7 @@ static void
 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
 {
 	spin_lock_bh(&src->lock);
-	memcpy(dst, &src->ustats, sizeof(*dst));
+	memcpy(dst, &src->ustats, sizeof(struct ip_vs_stats_user));
 	spin_unlock_bh(&src->lock);
 }

@@ -2227,16 +2201,17 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 }

 static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net, const struct ip_vs_get_services *get,
 			    struct ip_vs_get_services __user *uptr)
 {
 	int idx, count=0;
 	struct ip_vs_service *svc;
 	struct ip_vs_service_entry entry;
+	struct netns_ipvs *ipvs = net->ipvs;
 	int ret = 0;

 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+		list_for_each_entry(svc, &ipvs->ctl_svc_table[idx], s_list) {
 			/* Only expose IPv4 entries to old interface */
 			if (svc->af != AF_INET)
 				continue;
@@ -2255,7 +2230,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 	}

 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+		list_for_each_entry(svc, &ipvs->ctl_fwm_table[idx], f_list) {
 			/* Only expose IPv4 entries to old interface */
 			if (svc->af != AF_INET)
 				continue;
@@ -2277,7 +2252,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 }

 static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 			 struct ip_vs_get_dests __user *uptr)
 {
 	struct ip_vs_service *svc;
@@ -2285,9 +2260,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 	int ret = 0;

 	if (get->fwmark)
-		svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
+		svc = __ip_vs_svc_fwm_get(net, AF_INET, get->fwmark);
 	else
-		svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
+		svc = __ip_vs_service_get(net, AF_INET, get->protocol, &addr,
 					  get->port);

 	if (svc) {
@@ -2323,17 +2298,23 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 }

 static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
 {
+	struct ip_vs_proto_data *pd;
+
 #ifdef CONFIG_IP_VS_PROTO_TCP
-	u->tcp_timeout =
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-	u->tcp_fin_timeout =
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+	pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+	if (pd) {
+		u->tcp_timeout=pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+		u->tcp_fin_timeout=pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+	} else {
+		u->tcp_timeout = 0;
+		u->tcp_fin_timeout = 0;
+	}
 #endif
 #ifdef CONFIG_IP_VS_PROTO_UDP
-	u->udp_timeout =
-		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+	pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+	u->udp_timeout = (pd ? pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ : 0);
 #endif
 }

@@ -2362,7 +2343,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	unsigned char arg[128];
 	int ret = 0;
 	unsigned int copylen;
+	struct net *net = sock_net(sk);

+	BUG_ON(!net);
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;

@@ -2424,7 +2407,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = __ip_vs_get_service_entries(get, user);
+		ret = __ip_vs_get_service_entries(net, get, user);
 	}
 	break;

@@ -2437,9 +2420,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		entry = (struct ip_vs_service_entry *)arg;
 		addr.ip = entry->addr;
 		if (entry->fwmark)
-			svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
+			svc = __ip_vs_svc_fwm_get(net, AF_INET, entry->fwmark);
 		else
-			svc = __ip_vs_service_get(AF_INET, entry->protocol,
+			svc = __ip_vs_service_get(net, AF_INET, entry->protocol,
 						  &addr, entry->port);
 		if (svc) {
 			ip_vs_copy_service(entry, svc);
@@ -2464,7 +2447,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = __ip_vs_get_dest_entries(get, user);
+		ret = __ip_vs_get_dest_entries(net, get, user);
 	}
 	break;

@@ -2472,7 +2455,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_timeout_user t;

-		__ip_vs_get_timeouts(&t);
+		__ip_vs_get_timeouts(net, &t);
 		if (copy_to_user(user, &t, sizeof(t)) != 0)
 			ret = -EFAULT;
 	}
@@ -2483,15 +2466,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		struct ip_vs_daemon_user d[2];

 		memset(&d, 0, sizeof(d));
-		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+		if (net->ipvs->sync_state & IP_VS_STATE_MASTER) {
 			d[0].state = IP_VS_STATE_MASTER;
-			strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
-			d[0].syncid = ip_vs_master_syncid;
+			strlcpy(d[0].mcast_ifn, net->ipvs->master_mcast_ifn, sizeof(d[0].mcast_ifn));
+			d[0].syncid = net->ipvs->master_syncid;
 		}
-		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+		if (net->ipvs->sync_state & IP_VS_STATE_BACKUP) {
 			d[1].state = IP_VS_STATE_BACKUP;
-			strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
-			d[1].syncid = ip_vs_backup_syncid;
+			strlcpy(d[1].mcast_ifn, net->ipvs->backup_mcast_ifn, sizeof(d[1].mcast_ifn));
+			d[1].syncid = net->ipvs->backup_syncid;
 		}
 		if (copy_to_user(user, &d, sizeof(d)) != 0)
 			ret = -EFAULT;
@@ -2530,6 +2513,7 @@ static struct genl_family ip_vs_genl_family = {
 	.name		= IPVS_GENL_NAME,
 	.version	= IPVS_GENL_VERSION,
 	.maxattr	= IPVS_CMD_MAX,
+	.netnsok 	= true,		/* Make ipvsadm to work on netns */
 };

 /* Policy used for first-level command attributes */
@@ -2680,10 +2664,15 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 	int idx = 0, i;
 	int start = cb->args[0];
 	struct ip_vs_service *svc;
-
+	struct net *net = skb->sk->sk_net;
+	struct netns_ipvs *ipvs;
+	if (!net)
+		net = dev_net(skb->dev);
+	BUG_ON(!net);
+	ipvs = net->ipvs;
 	mutex_lock(&__ip_vs_mutex);
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+		list_for_each_entry(svc, &ipvs->ctl_svc_table[i], s_list) {
 			if (++idx <= start)
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2694,7 +2683,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 	}

 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+		list_for_each_entry(svc, &net->ipvs->ctl_fwm_table[i], f_list) {
 			if (++idx <= start)
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2711,7 +2700,7 @@ nla_put_failure:
 	return skb->len;
 }

-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+static int ip_vs_genl_parse_service(struct net *net, struct ip_vs_service_user_kern *usvc,
 				    struct nlattr *nla, int full_entry)
 {
 	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
@@ -2770,9 +2759,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,

 		/* prefill flags from service if it already exists */
 		if (usvc->fwmark)
-			svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
+			svc = __ip_vs_svc_fwm_get(net, usvc->af, usvc->fwmark);
 		else
-			svc = __ip_vs_service_get(usvc->af, usvc->protocol,
+			svc = __ip_vs_service_get(net, usvc->af, usvc->protocol,
 						  &usvc->addr, usvc->port);
 		if (svc) {
 			usvc->flags = svc->flags;
@@ -2791,19 +2780,19 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 	return 0;
 }

-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, struct nlattr *nla)
 {
 	struct ip_vs_service_user_kern usvc;
 	int ret;

-	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+	ret = ip_vs_genl_parse_service(net, &usvc, nla, 0);
 	if (ret)
 		return ERR_PTR(ret);

 	if (usvc.fwmark)
-		return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+		return __ip_vs_svc_fwm_get(net, usvc.af, usvc.fwmark);
 	else
-		return __ip_vs_service_get(usvc.af, usvc.protocol,
+		return __ip_vs_service_get(net, usvc.af, usvc.protocol,
 					   &usvc.addr, usvc.port);
 }

@@ -2871,7 +2860,11 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 	struct ip_vs_service *svc;
 	struct ip_vs_dest *dest;
 	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+	struct net *net = skb->sk->sk_net;

+	if (!net)
+		net = dev_net(skb->dev);
+	BUG_ON(!net);
 	mutex_lock(&__ip_vs_mutex);

 	/* Try to find the service for which to dump destinations */
@@ -2879,7 +2872,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
 		goto out_err;

-	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+	svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
 	if (IS_ERR(svc) || svc == NULL)
 		goto out_err;

@@ -2994,20 +2987,23 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
 				   struct netlink_callback *cb)
 {
+	struct net *net = sock_net(skb->sk);
+
+	BUG_ON(!net);
 	mutex_lock(&__ip_vs_mutex);
-	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+	if ((net->ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
-					   ip_vs_master_mcast_ifn,
-					   ip_vs_master_syncid, cb) < 0)
+					   net->ipvs->master_mcast_ifn,
+					   net->ipvs->master_syncid, cb) < 0)
 			goto nla_put_failure;

 		cb->args[0] = 1;
 	}

-	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+	if ((net->ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
-					   ip_vs_backup_mcast_ifn,
-					   ip_vs_backup_syncid, cb) < 0)
+					   net->ipvs->backup_mcast_ifn,
+					   net->ipvs->backup_syncid, cb) < 0)
 			goto nla_put_failure;

 		cb->args[1] = 1;
@@ -3019,31 +3015,33 @@ nla_put_failure:
 	return skb->len;
 }

-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
 {
 	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
 	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
 	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
 		return -EINVAL;

-	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+	return start_sync_thread(net,
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
 				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
 				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
 }

-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
 {
 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
 		return -EINVAL;

-	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+	return stop_sync_thread(net,
+				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
 }

-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
 {
 	struct ip_vs_timeout_user t;

-	__ip_vs_get_timeouts(&t);
+	__ip_vs_get_timeouts(net, &t);

 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
 		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3055,7 +3053,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
 		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);

-	return ip_vs_set_timeout(&t);
+	return ip_vs_set_timeout(net, &t);
 }

 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3065,16 +3063,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct ip_vs_dest_user_kern udest;
 	int ret = 0, cmd;
 	int need_full_svc = 0, need_full_dest = 0;
+	struct net *net = skb->sk->sk_net;

+	if (!net)
+		net = dev_net(skb->dev);
+	BUG_ON(!net);
 	cmd = info->genlhdr->cmd;

 	mutex_lock(&__ip_vs_mutex);

 	if (cmd == IPVS_CMD_FLUSH) {
-		ret = ip_vs_flush();
+		ret = ip_vs_flush(net);
 		goto out;
 	} else if (cmd == IPVS_CMD_SET_CONFIG) {
-		ret = ip_vs_genl_set_config(info->attrs);
+		ret = ip_vs_genl_set_config(net, info->attrs);
 		goto out;
 	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
 		   cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3090,13 +3092,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 		}

 		if (cmd == IPVS_CMD_NEW_DAEMON)
-			ret = ip_vs_genl_new_daemon(daemon_attrs);
+			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
 		else
-			ret = ip_vs_genl_del_daemon(daemon_attrs);
+			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
 		goto out;
 	} else if (cmd == IPVS_CMD_ZERO &&
 		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
-		ret = ip_vs_zero_all();
+		ret = ip_vs_zero_all(net);
 		goto out;
 	}

@@ -3106,7 +3108,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
 		need_full_svc = 1;

-	ret = ip_vs_genl_parse_service(&usvc,
+	ret = ip_vs_genl_parse_service(net, &usvc,
 				       info->attrs[IPVS_CMD_ATTR_SERVICE],
 				       need_full_svc);
 	if (ret)
@@ -3114,10 +3116,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)

 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
 	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+		svc = __ip_vs_service_get(net, usvc.af, usvc.protocol,
 					  &usvc.addr, usvc.port);
 	else
-		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+		svc = __ip_vs_svc_fwm_get(net, usvc.af, usvc.fwmark);

 	/* Unless we're adding a new service, the service must already exist */
 	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
@@ -3143,7 +3145,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	switch (cmd) {
 	case IPVS_CMD_NEW_SERVICE:
 		if (svc == NULL)
-			ret = ip_vs_add_service(&usvc, &svc);
+			ret = ip_vs_add_service(net, &usvc, &svc);
 		else
 			ret = -EEXIST;
 		break;
@@ -3151,16 +3153,16 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 		ret = ip_vs_edit_service(svc, &usvc);
 		break;
 	case IPVS_CMD_DEL_SERVICE:
-		ret = ip_vs_del_service(svc);
+		ret = ip_vs_del_service(net, svc);
 		break;
 	case IPVS_CMD_NEW_DEST:
-		ret = ip_vs_add_dest(svc, &udest);
+		ret = ip_vs_add_dest(net, svc, &udest);
 		break;
 	case IPVS_CMD_SET_DEST:
-		ret = ip_vs_edit_dest(svc, &udest);
+		ret = ip_vs_edit_dest(net, svc, &udest);
 		break;
 	case IPVS_CMD_DEL_DEST:
-		ret = ip_vs_del_dest(svc, &udest);
+		ret = ip_vs_del_dest(net, svc, &udest);
 		break;
 	case IPVS_CMD_ZERO:
 		ret = ip_vs_zero_service(svc);
@@ -3182,7 +3184,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	void *reply;
 	int ret, cmd, reply_cmd;
+	struct net *net = skb->sk->sk_net;

+	if (unlikely(!net))
+		net = dev_net(skb->dev);
+	BUG_ON(!net);
 	cmd = info->genlhdr->cmd;

 	if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3211,7 +3217,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	{
 		struct ip_vs_service *svc;

-		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		svc = ip_vs_genl_find_service(net, info->attrs[IPVS_CMD_ATTR_SERVICE]);
 		if (IS_ERR(svc)) {
 			ret = PTR_ERR(svc);
 			goto out_err;
@@ -3232,7 +3238,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	{
 		struct ip_vs_timeout_user t;

-		__ip_vs_get_timeouts(&t);
+		__ip_vs_get_timeouts(net, &t);
 #ifdef CONFIG_IP_VS_PROTO_TCP
 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3377,62 +3383,131 @@ static void ip_vs_genl_unregister(void)
 }

 /* End of Generic Netlink interface definitions */
+/*
+ * per netns intit/exit func.
+ */
+int /*__net_init*/ __ip_vs_control_init(struct net *net)
+{
+	int idx;
+	struct netns_ipvs *ipvs = net->ipvs;
+	struct ctl_table *tbl;
+
+	ipvs->ctl_stats=kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
+	if (ipvs->ctl_stats == NULL) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+
+	proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+	proc_net_fops_create(net, "ip_vs_stats",0, &ip_vs_stats_fops);
+	if (net != &init_net) {
+		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+		if (tbl == NULL)
+			goto err_dup;
+	} else
+		tbl = vs_vars;
+	/* Initialize sysctl defaults */
+	idx = 0;
+	ipvs->sysctl_amemthresh = 1024;
+	tbl[idx++].data = &ipvs->sysctl_amemthresh;
+	ipvs->sysctl_am_droprate = 10;
+	tbl[idx++].data = &ipvs->sysctl_am_droprate;
+	ipvs->sysctl_drop_entry = 0;
+	tbl[idx++].data = &ipvs->sysctl_drop_entry;
+	ipvs->sysctl_drop_packet = 0;
+	tbl[idx++].data = &ipvs->sysctl_drop_packet;
+	ipvs->sysctl_secure_tcp = 0;
+	tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+	ipvs->sysctl_cache_bypass = 0;
+	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+	ipvs->sysctl_expire_nodest_conn = 0;
+	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+	ipvs->sysctl_expire_quiescent_template = 0;
+	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+	ipvs->sysctl_sync_threshold[0] = 3;
+	ipvs->sysctl_sync_threshold[1] = 50;
+	tbl[idx].data = &ipvs->sysctl_sync_threshold;
+	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+	ipvs->sysctl_nat_icmp_send = 0;
+	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+	ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, tbl);
+	if (ipvs->sysctl_hdr == NULL)
+			goto err_reg;
+	ipvs->sysctl_tbl = tbl;
+	/* Initialize net->ipvs->ctl_svc_table, net->ipvs->ctl_fwm_table, net->ipvs->ctl_rtable */
+	spin_lock_init(&ipvs->ctl_stats->lock);
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ipvs->ctl_svc_table[idx]);
+		INIT_LIST_HEAD(&ipvs->ctl_fwm_table[idx]);
+	}
+
+	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ipvs->ctl_rtable[idx]);
+	}
+	INIT_LIST_HEAD(&ipvs->ctl_dest_trash);
+	atomic_set(&ipvs->ctl_ftpsvc_counter, 0);
+	atomic_set(&ipvs->ctl_nullsvc_counter, 0);
+	ip_vs_new_estimator(net, ipvs->ctl_stats);
+	return 0;
+
+err_reg:
+	if (net != &init_net)
+		kfree(tbl);
+err_dup:
+	kfree(ipvs->ctl_stats);
+	return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+	ip_vs_kill_estimator(net, net->ipvs->ctl_stats);
+	unregister_sysctl_table(net->ipvs->sysctl_hdr);
+	proc_net_remove(net, "ip_vs_stats");
+	proc_net_remove(net, "ip_vs");
+	ip_vs_trash_cleanup(net);
+	cancel_rearming_delayed_work(&defense_work);
+	cancel_work_sync(&defense_work.work);
+	kfree(net->ipvs->ctl_stats);
+	if ( net != &init_net )
+		kfree(net->ipvs->sysctl_tbl);
+}
+
+static struct pernet_operations ipvs_control_ops = {
+	.init = __ip_vs_control_init,
+	.exit = __ip_vs_control_cleanup,
+};


 int __init ip_vs_control_init(void)
 {
 	int ret;
-	int idx;

 	EnterFunction(2);
-
 	ret = nf_register_sockopt(&ip_vs_sockopts);
 	if (ret) {
 		pr_err("cannot register sockopt.\n");
 		return ret;
 	}
-
 	ret = ip_vs_genl_register();
 	if (ret) {
 		pr_err("cannot register Generic Netlink interface.\n");
 		nf_unregister_sockopt(&ip_vs_sockopts);
 		return ret;
 	}
-
-	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-
-	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
-	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
-		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
-		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
-	}
-	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
-		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
-	}
-
-	ip_vs_new_estimator(&ip_vs_stats);
-
+	ret = register_pernet_subsys(&ipvs_control_ops);
 	/* Hook the defense timer */
 	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);

^ permalink raw reply related

* [RFC PATCH 6/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:17 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This patch just contains ip_vs_est.c

There is one estimator i.e not one per netns
When est runs it loops all netns
  for_each_net(net) { ... }


Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>

diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ff28801..e8c185d 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,13 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  *
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *		Affected data: est_list and est_lock.
+ *		estimation_timer() runs with a common timer, but
+ *		do update every netns on timeout.
  */

 #define KMSG_COMPONENT "IPVS"
@@ -45,13 +50,13 @@
     rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.

   * A lot code is taken from net/sched/estimator.c
+
+  * netns: estimation_timer runs every netns
  */


 static void estimation_timer(unsigned long arg);

-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
 static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);

 static void estimation_timer(unsigned long arg)
@@ -62,50 +67,55 @@ static void estimation_timer(unsigned long arg)
 	u32 n_inpkts, n_outpkts;
 	u64 n_inbytes, n_outbytes;
 	u32 rate;
-
-	spin_lock(&est_lock);
-	list_for_each_entry(e, &est_list, list) {
-		s = container_of(e, struct ip_vs_stats, est);
-
-		spin_lock(&s->lock);
-		n_conns = s->ustats.conns;
-		n_inpkts = s->ustats.inpkts;
-		n_outpkts = s->ustats.outpkts;
-		n_inbytes = s->ustats.inbytes;
-		n_outbytes = s->ustats.outbytes;
-
-		/* scaled by 2^10, but divided 2 seconds */
-		rate = (n_conns - e->last_conns)<<9;
-		e->last_conns = n_conns;
-		e->cps += ((long)rate - (long)e->cps)>>2;
-		s->ustats.cps = (e->cps+0x1FF)>>10;
-
-		rate = (n_inpkts - e->last_inpkts)<<9;
-		e->last_inpkts = n_inpkts;
-		e->inpps += ((long)rate - (long)e->inpps)>>2;
-		s->ustats.inpps = (e->inpps+0x1FF)>>10;
-
-		rate = (n_outpkts - e->last_outpkts)<<9;
-		e->last_outpkts = n_outpkts;
-		e->outpps += ((long)rate - (long)e->outpps)>>2;
-		s->ustats.outpps = (e->outpps+0x1FF)>>10;
-
-		rate = (n_inbytes - e->last_inbytes)<<4;
-		e->last_inbytes = n_inbytes;
-		e->inbps += ((long)rate - (long)e->inbps)>>2;
-		s->ustats.inbps = (e->inbps+0xF)>>5;
-
-		rate = (n_outbytes - e->last_outbytes)<<4;
-		e->last_outbytes = n_outbytes;
-		e->outbps += ((long)rate - (long)e->outbps)>>2;
-		s->ustats.outbps = (e->outbps+0xF)>>5;
-		spin_unlock(&s->lock);
+	struct net *net;
+	struct netns_ipvs *ipvs;
+
+	for_each_net(net) {
+		ipvs = net->ipvs;
+		spin_lock(&ipvs->est_lock);
+		list_for_each_entry(e, &ipvs->est_list, list) {
+			s = container_of(e, struct ip_vs_stats, est);
+
+			spin_lock(&s->lock);
+			n_conns = s->ustats.conns;
+			n_inpkts = s->ustats.inpkts;
+			n_outpkts = s->ustats.outpkts;
+			n_inbytes = s->ustats.inbytes;
+			n_outbytes = s->ustats.outbytes;
+
+			/* scaled by 2^10, but divided 2 seconds */
+			rate = (n_conns - e->last_conns)<<9;
+			e->last_conns = n_conns;
+			e->cps += ((long)rate - (long)e->cps)>>2;
+			s->ustats.cps = (e->cps+0x1FF)>>10;
+
+			rate = (n_inpkts - e->last_inpkts)<<9;
+			e->last_inpkts = n_inpkts;
+			e->inpps += ((long)rate - (long)e->inpps)>>2;
+			s->ustats.inpps = (e->inpps+0x1FF)>>10;
+
+			rate = (n_outpkts - e->last_outpkts)<<9;
+			e->last_outpkts = n_outpkts;
+			e->outpps += ((long)rate - (long)e->outpps)>>2;
+			s->ustats.outpps = (e->outpps+0x1FF)>>10;
+
+			rate = (n_inbytes - e->last_inbytes)<<4;
+			e->last_inbytes = n_inbytes;
+			e->inbps += ((long)rate - (long)e->inbps)>>2;
+			s->ustats.inbps = (e->inbps+0xF)>>5;
+
+			rate = (n_outbytes - e->last_outbytes)<<4;
+			e->last_outbytes = n_outbytes;
+			e->outbps += ((long)rate - (long)e->outbps)>>2;
+			s->ustats.outbps = (e->outbps+0xF)>>5;
+			spin_unlock(&s->lock);
+		}
+		spin_unlock(&ipvs->est_lock);
 	}
-	spin_unlock(&est_lock);
 	mod_timer(&est_timer, jiffies + 2*HZ);
 }

-void ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
 {
 	struct ip_vs_estimator *est = &stats->est;

@@ -126,18 +136,18 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
 	est->last_outbytes = stats->ustats.outbytes;
 	est->outbps = stats->ustats.outbps<<5;

-	spin_lock_bh(&est_lock);
-	list_add(&est->list, &est_list);
-	spin_unlock_bh(&est_lock);
+	spin_lock_bh(&net->ipvs->est_lock);
+	list_add(&est->list, &net->ipvs->est_list);
+	spin_unlock_bh(&net->ipvs->est_lock);
 }

-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
 {
 	struct ip_vs_estimator *est = &stats->est;

-	spin_lock_bh(&est_lock);
+	spin_lock_bh(&net->ipvs->est_lock);
 	list_del(&est->list);
-	spin_unlock_bh(&est_lock);
+	spin_unlock_bh(&net->ipvs->est_lock);
 }

 void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@@ -156,14 +166,31 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 	est->inbps = 0;
 	est->outbps = 0;
 }
+static int __net_init __ip_vs_estimator_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->ipvs->est_list);
+	spin_lock_init(&net->ipvs->est_lock);
+	return 0;
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+	.init = __ip_vs_estimator_init,
+//	.exit = __ip_vs_estimator_cleanup,
+};

 int __init ip_vs_estimator_init(void)
 {
+	int rv;
+
+	rv = register_pernet_subsys(&ip_vs_app_ops);
+	if(rv < 0)
+		return rv;
 	mod_timer(&est_timer, jiffies + 2 * HZ);
-	return 0;
+	return rv;
 }

 void ip_vs_estimator_cleanup(void)
 {
 	del_timer_sync(&est_timer);
+	unregister_pernet_subsys(&ip_vs_app_ops);
 }

-- 
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply related

* [RFC PATCH 7/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:17 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This patch just contains ip_vs_ftp.c

minor changes.

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7e9af5b..9d54eb0 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,7 +157,8 @@ ip_vs_expect_callback(struct nf_conn *ct,
 {
 	struct nf_conntrack_tuple *orig, new_reply;
 	struct ip_vs_conn *cp;
-
+	struct net *net = nf_ct_net(ct);
+
 	if (exp->tuple.src.l3num != PF_INET)
 		return;

@@ -168,10 +169,10 @@ ip_vs_expect_callback(struct nf_conn *ct,
 	 * actual values from the newly created original conntrack direction.
 	 * The conntrack is confirmed when packet reaches IPVS hooks.
 	 */
-
+	BUG_ON(!net);
 	/* RS->CLIENT */
 	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
+	cp = ip_vs_conn_out_get(net, exp->tuple.src.l3num, orig->dst.protonum,
 				&orig->src.u3, orig->src.u.tcp.port,
 				&orig->dst.u3, orig->dst.u.tcp.port);
 	if (cp) {
@@ -193,7 +194,7 @@ ip_vs_expect_callback(struct nf_conn *ct,
 	}

 	/* CLIENT->VS */
-	cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
+	cp = ip_vs_conn_in_get(net, exp->tuple.src.l3num, orig->dst.protonum,
 			       &orig->src.u3, orig->src.u.tcp.port,
 			       &orig->dst.u3, orig->dst.u.tcp.port);
 	if (cp) {
@@ -290,7 +291,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	int ret = 0;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
-
+	struct net *net = dev_net(skb->dev);
+
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
 	 * so turn this into a no-op for IPv6 packets
@@ -328,10 +330,10 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		/*
 		 * Now update or create an connection entry for it
 		 */
-		n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
-					  &cp->caddr, 0);
+		n_cp = ip_vs_conn_out_get(net, AF_INET, iph->protocol,
+				          &from, port, &cp->caddr, 0);
 		if (!n_cp) {
-			n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+			n_cp = ip_vs_conn_new(net, AF_INET, IPPROTO_TCP,
 					      &cp->caddr, 0,
 					      &cp->vaddr, port,
 					      &from, port,
@@ -381,7 +383,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 */

 		cp->app_data = NULL;
-		ip_vs_tcp_conn_listen(n_cp);
+		ip_vs_tcp_conn_listen(net, n_cp);
 		ip_vs_conn_put(n_cp);
 		return ret;
 	}
@@ -410,6 +412,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
+	struct nf_conn *ct;
+	struct net *net = dev_net(skb->dev);

 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -479,11 +483,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		  ip_vs_proto_name(iph->protocol),
 		  &to.ip, ntohs(port), &cp->vaddr.ip, 0);

-	n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
+	n_cp = ip_vs_conn_in_get(net, AF_INET, iph->protocol,
 				 &to, port,
 				 &cp->vaddr, htons(ntohs(cp->vport)-1));
 	if (!n_cp) {
-		n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+		n_cp = ip_vs_conn_new(net, AF_INET, IPPROTO_TCP,
 				      &to, port,
 				      &cp->vaddr, htons(ntohs(cp->vport)-1),
 				      &cp->daddr, htons(ntohs(cp->dport)-1),
@@ -499,7 +503,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	/*
 	 *	Move tunnel to listen state
 	 */
-	ip_vs_tcp_conn_listen(n_cp);
+	ip_vs_tcp_conn_listen(net, n_cp);
 	ip_vs_conn_put(n_cp);

 	return 1;
@@ -520,23 +524,22 @@ static struct ip_vs_app ip_vs_ftp = {
 	.pkt_in =	ip_vs_ftp_in,
 };

-
 /*
- *	ip_vs_ftp initialization
+ *	per netns ip_vs_ftp initialization
  */
-static int __init ip_vs_ftp_init(void)
+static int __net_init __ip_vs_ftp_init(struct net *net)
 {
 	int i, ret;
 	struct ip_vs_app *app = &ip_vs_ftp;
-
-	ret = register_ip_vs_app(app);
+
+	ret = register_ip_vs_app(net, app);
 	if (ret)
 		return ret;

 	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
 		if (!ports[i])
 			continue;
-		ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+		ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
 		if (ret)
 			break;
 		pr_info("%s: loaded support on port[%d] = %d\n",
@@ -544,18 +547,39 @@ static int __init ip_vs_ftp_init(void)
 	}

 	if (ret)
-		unregister_ip_vs_app(app);
+		unregister_ip_vs_app(net, app);

 	return ret;
 }
+/*
+ * 	netns exit
+ */
+static void __ip_vs_ftp_exit(struct net *net)
+{
+	struct ip_vs_app *app = &ip_vs_ftp;
+
+	unregister_ip_vs_app(net, app);
+}
+
+static struct pernet_operations ip_vs_ftp_ops = {
+	.init = __ip_vs_ftp_init,
+	.exit = __ip_vs_ftp_exit,
+};
+
+int __init ip_vs_ftp_init(void)
+{
+	int rv;

+	rv = register_pernet_subsys(&ip_vs_ftp_ops);
+	return rv;
+}

 /*
  *	ip_vs_ftp finish.
  */
 static void __exit ip_vs_ftp_exit(void)
 {
-	unregister_ip_vs_app(&ip_vs_ftp);
+	unregister_pernet_subsys(&ip_vs_ftp_ops);
 }



-- 
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply related

* [RFC PATCH 8/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:17 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This patch contains all proto files

All timeouts are moved to ipvs struct.
Global "timeout tables" are used as default values only.

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>

diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 027f654..c17e02c 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -38,7 +38,6 @@
  * ipvs protocol table.
  */

-#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
 #define IP_VS_PROTO_HASH(proto)		((proto) & (IP_VS_PROTO_TAB_SIZE-1))

 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
@@ -60,6 +59,30 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
 	return 0;
 }

+/*
+ *	register an ipvs protocols netns related data
+ */
+static int
+register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp )
+{
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+	struct ip_vs_proto_data *pd =
+			kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+
+	if (!pd) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+	pd->pp=pp;	/* For speed issues */
+	pd->next = net->ipvs->proto_data_table[hash];
+	net->ipvs->proto_data_table[hash] = pd;
+	atomic_set(&pd->appcnt,0);	/* Init app counter */
+
+	if (pp->init_netns != NULL)
+		pp->init_netns(net, pd);
+
+	return 0;
+}

 /*
  *	unregister an ipvs protocol
@@ -81,6 +104,28 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)

 	return -ESRCH;
 }
+/*
+ *	unregister an ipvs protocols netns data
+ */
+static int
+unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+{
+	struct ip_vs_proto_data **pd_p;
+	unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+
+	pd_p = &net->ipvs->proto_data_table[hash];
+	for (; *pd_p; pd_p = &(*pd_p)->next) {
+		if (*pd_p == pd) {
+			*pd_p = pd->next;
+			if (pd->pp->exit_netns != NULL)
+				pd->pp->exit_netns(net, pd);
+			kfree(pd);
+			return 0;
+		}
+	}
+
+	return -ESRCH;
+}


 /*
@@ -100,6 +145,24 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
 }
 EXPORT_SYMBOL(ip_vs_proto_get);

+/*
+ *	get ip_vs_protocol object data by netns and proto
+ */
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+	struct ip_vs_proto_data *pd;
+	unsigned hash = IP_VS_PROTO_HASH(proto);
+	struct netns_ipvs *ipvs = net->ipvs;
+
+	for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
+		if (pd->pp->protocol == proto)
+			return pd;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(ip_vs_proto_data_get);

 /*
  *	Propagate event for state change to all protocols
@@ -118,8 +181,7 @@ void ip_vs_protocol_timeout_change(int flags)
 }


-int *
-ip_vs_create_timeout_table(int *table, int size)
+int *ip_vs_create_timeout_table(const int *table, int size)
 {
 	return kmemdup(table, size, GFP_ATOMIC);
 }
@@ -235,7 +297,44 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
 #endif
 		ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
 }
+static int  __net_init  __ip_vs_protocol_init(struct net *net)
+{

+#ifdef CONFIG_IP_VS_PROTO_TCP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
+	return 0;
+}
+
+static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+{
+	struct ip_vs_proto_data *pd;
+	int i;
+	struct netns_ipvs *ipvs = net->ipvs;
+
+	/* unregister all the ipvs proto data for this netns */
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		while ((pd = ipvs->proto_data_table[i]) != NULL)
+			unregister_ip_vs_proto_netns(net, pd);
+	}
+}
+
+static struct pernet_operations ipvs_proto_ops = {
+	.init = __ip_vs_protocol_init,
+	.exit = __ip_vs_protocol_cleanup,
+};

 int __init ip_vs_protocol_init(void)
 {
@@ -266,7 +365,7 @@ int __init ip_vs_protocol_init(void)
 #endif
 	pr_info("Registered protocols (%s)\n", &protocols[2]);

-	return 0;
+	return register_pernet_subsys(&ipvs_proto_ops);
 }


@@ -275,6 +374,7 @@ void ip_vs_protocol_cleanup(void)
 	struct ip_vs_protocol *pp;
 	int i;

+	unregister_pernet_subsys(&ipvs_proto_ops);
 	/* unregister all the ipvs protocols */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 1892dfc..1b77ef1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -47,15 +47,17 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 		   int inverse)
 {
 	struct ip_vs_conn *cp;
+	struct net *net = dev_net(skb->dev);

+	BUG_ON(!net);
 	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+		cp = ip_vs_conn_in_get(net, af, IPPROTO_UDP,
 				       &iph->saddr,
 				       htons(PORT_ISAKMP),
 				       &iph->daddr,
 				       htons(PORT_ISAKMP));
 	} else {
-		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+		cp = ip_vs_conn_in_get(net, af, IPPROTO_UDP,
 				       &iph->daddr,
 				       htons(PORT_ISAKMP),
 				       &iph->saddr,
@@ -87,15 +89,17 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
 		    int inverse)
 {
 	struct ip_vs_conn *cp;
+	struct net *net = dev_net(skb->dev);

+	BUG_ON(!net);
 	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+		cp = ip_vs_conn_out_get(net, af, IPPROTO_UDP,
 					&iph->saddr,
 					htons(PORT_ISAKMP),
 					&iph->daddr,
 					htons(PORT_ISAKMP));
 	} else {
-		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+		cp = ip_vs_conn_out_get(net, af, IPPROTO_UDP,
 					&iph->daddr,
 					htons(PORT_ISAKMP),
 					&iph->saddr,
@@ -173,27 +177,14 @@ ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 		ah_esp_debug_packet_v4(pp, skb, offset, msg);
 }

-
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
 #ifdef CONFIG_IP_VS_PROTO_AH
 struct ip_vs_protocol ip_vs_protocol_ah = {
 	.name =			"AH",
 	.protocol =		IPPROTO_AH,
 	.num_states =		1,
 	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
+	.init =			NULL,
+	.exit =			NULL,
 	.conn_schedule =	ah_esp_conn_schedule,
 	.conn_in_get =		ah_esp_conn_in_get,
 	.conn_out_get =		ah_esp_conn_out_get,
@@ -206,7 +197,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
 	.app_conn_bind =	NULL,
 	.debug_packet =		ah_esp_debug_packet,
 	.timeout_change =	NULL,		/* ISAKMP */
-	.set_state_timeout =	NULL,
 };
 #endif

@@ -216,8 +206,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
 	.protocol =		IPPROTO_ESP,
 	.num_states =		1,
 	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
+	.init =			NULL,
+	.exit =			NULL,
 	.conn_schedule =	ah_esp_conn_schedule,
 	.conn_in_get =		ah_esp_conn_in_get,
 	.conn_out_get =		ah_esp_conn_out_get,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 4c0855c..0e7eb5d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -16,7 +16,9 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	sctp_chunkhdr_t _schunkh, *sch;
 	sctp_sctphdr_t *sh, _sctph;
 	struct ip_vs_iphdr iph;
-
+	struct net *net = dev_net(skb->dev);
+
+	BUG_ON(!net);
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);

 	sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
@@ -29,7 +31,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return 0;

 	if ((sch->type == SCTP_CID_INIT) &&
-	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+	    (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
 				     &iph.daddr, sh->dest))) {
 		if (ip_vs_todrop()) {
 			/*
@@ -224,7 +226,7 @@ static enum ipvs_sctp_event_t sctp_events[255] = {
 	IP_VS_SCTP_EVE_SHUT_COM_CLI,
 };

-static struct ipvs_sctp_nextstate
+static const struct ipvs_sctp_nextstate
  sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = {
 	/*
 	 * STATE : IP_VS_SCTP_S_NONE
@@ -853,7 +855,7 @@ static struct ipvs_sctp_nextstate
 /*
  *      Timeout table[state]
  */
-static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
 	[IP_VS_SCTP_S_NONE]         =     2 * HZ,
 	[IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
 	[IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
@@ -901,6 +903,7 @@ static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
 {
 }

+/*
 static int
 sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
 {
@@ -908,7 +911,7 @@ sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
 				sctp_state_name_table, sname, to);
 }
-
+*/
 static inline int
 set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 		int direction, const struct sk_buff *skb)
@@ -917,7 +920,10 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 	unsigned char chunk_type;
 	int event, next_state;
 	int ihl;
+	struct net *net = dev_net(skb->dev);
+	struct ip_vs_proto_data *pd;

+	BUG_ON(!net);
 #ifdef CONFIG_IP_VS_IPV6
 	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
 #else
@@ -992,10 +998,13 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 			}
 		}
 	}
+	pd = ip_vs_proto_data_get(net, pp->protocol);
+	if(likely(pd))
+		cp->timeout = pd->timeout_table[cp->state = next_state];
+	else	/* What to do ? */
+		cp->timeout = sctp_timeouts[cp->state = next_state];

-	 cp->timeout = pp->timeout_table[cp->state = next_state];
-
-	 return 1;
+	return 1;
 }

 static int
@@ -1011,59 +1020,55 @@ sctp_state_transition(struct ip_vs_conn *cp, int direction,
 	return ret;
 }

-/*
- *      Hash table for SCTP application incarnations
- */
-#define SCTP_APP_TAB_BITS        4
-#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
-#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
-
-static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(sctp_app_lock);
-
 static inline __u16 sctp_app_hashkey(__be16 port)
 {
 	return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
 		& SCTP_APP_TAB_MASK;
 }

-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net->ipvs;
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);

 	hash = sctp_app_hashkey(port);

-	spin_lock_bh(&sctp_app_lock);
-	list_for_each_entry(i, &sctp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->sctp_app_lock);
+	list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &sctp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_sctp.appcnt);
+	list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+	atomic_inc(&pd->appcnt);
 out:
-	spin_unlock_bh(&sctp_app_lock);
+	spin_unlock_bh(&ipvs->sctp_app_lock);

 	return ret;
 }

-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&sctp_app_lock);
-	atomic_dec(&ip_vs_protocol_sctp.appcnt);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+
+	BUG_ON(!pd);
+	spin_lock_bh(&net->ipvs->sctp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&sctp_app_lock);
+	spin_unlock_bh(&net->ipvs->sctp_app_lock);
 }

-static int sctp_app_conn_bind(struct ip_vs_conn *cp)
+static int sctp_app_conn_bind(struct net *net, struct ip_vs_conn *cp)
 {
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/* Default binding: bind app only for NAT */
 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
@@ -1071,12 +1076,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = sctp_app_hashkey(cp->vport);

-	spin_lock(&sctp_app_lock);
-	list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+	spin_lock(&ipvs->sctp_app_lock);
+	list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&sctp_app_lock);
+			spin_unlock(&ipvs->sctp_app_lock);

 			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
 					"%s:%u to app %s on port %u\n",
@@ -1092,43 +1097,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&sctp_app_lock);
+	spin_unlock(&ipvs->sctp_app_lock);
 out:
 	return result;
 }

-static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(sctp_apps);
-	pp->timeout_table = sctp_timeouts;
+	ip_vs_init_hash_table(net->ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
+	spin_lock_init(&net->ipvs->tcp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table(sctp_timeouts,
+							sizeof(sctp_timeouts));
 }

-
-static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
-
+	kfree(pd->timeout_table);
 }

+
 struct ip_vs_protocol ip_vs_protocol_sctp = {
-	.name = "SCTP",
-	.protocol = IPPROTO_SCTP,
-	.num_states = IP_VS_SCTP_S_LAST,
-	.dont_defrag = 0,
-	.appcnt = ATOMIC_INIT(0),
-	.init = ip_vs_sctp_init,
-	.exit = ip_vs_sctp_exit,
-	.register_app = sctp_register_app,
+	.name 		= "SCTP",
+	.protocol 	= IPPROTO_SCTP,
+	.num_states 	= IP_VS_SCTP_S_LAST,
+	.dont_defrag 	= 0,
+	.init 		= NULL,
+	.exit 		= NULL,
+	.init_netns 	= __ip_vs_sctp_init,
+	.exit_netns 	= __ip_vs_sctp_exit,
+	.register_app 	= sctp_register_app,
 	.unregister_app = sctp_unregister_app,
-	.conn_schedule = sctp_conn_schedule,
-	.conn_in_get = ip_vs_conn_in_get_proto,
-	.conn_out_get = ip_vs_conn_out_get_proto,
-	.snat_handler = sctp_snat_handler,
-	.dnat_handler = sctp_dnat_handler,
-	.csum_check = sctp_csum_check,
-	.state_name = sctp_state_name,
+	.conn_schedule 	= sctp_conn_schedule,
+	.conn_in_get 	= ip_vs_conn_in_get_proto,
+	.conn_out_get 	= ip_vs_conn_out_get_proto,
+	.snat_handler 	= sctp_snat_handler,
+	.dnat_handler 	= sctp_dnat_handler,
+	.csum_check 	= sctp_csum_check,
+	.state_name 	= sctp_state_name,
 	.state_transition = sctp_state_transition,
-	.app_conn_bind = sctp_app_conn_bind,
-	.debug_packet = ip_vs_tcpudp_debug_packet,
+	.app_conn_bind 	= sctp_app_conn_bind,
+	.debug_packet 	= ip_vs_tcpudp_debug_packet,
 	.timeout_change = sctp_timeout_change,
-	.set_state_timeout = sctp_set_state_timeout,
+/*	.set_state_timeout = sctp_set_state_timeout, */
 };
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 282d24d..bd40721 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,7 +9,12 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              tcp_timeouts table has copy per netns in a hash table per
+ *              protocol ip_vs_proto_data and is handled by netns
  *
  */

@@ -34,7 +39,9 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
 	struct ip_vs_iphdr iph;
-
+	struct net *net = dev_net(skb->dev);
+
+	BUG_ON(!net);
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);

 	th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
@@ -44,8 +51,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	}

 	if (th->syn &&
-	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
-				     th->dest))) {
+	    (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
+			    	     &iph.daddr, th->dest))) {
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -316,7 +323,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
 /*
  *	Timeout table[state]
  */
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_NONE]		=	2*HZ,
 	[IP_VS_TCP_S_ESTABLISHED]	=	15*60*HZ,
 	[IP_VS_TCP_S_SYN_SENT]		=	2*60*HZ,
@@ -430,13 +437,13 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
 	*/
 	tcp_state_table = (on? tcp_states_dos : tcp_states);
 }
-
+/* Removed not used
 static int
 tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
 {
 	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
 				       tcp_state_name_table, sname, to);
-}
+} */

 static inline int tcp_state_idx(struct tcphdr *th)
 {
@@ -452,12 +459,13 @@ static inline int tcp_state_idx(struct tcphdr *th)
 }

 static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct net *net, struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 	      int direction, struct tcphdr *th)
 {
 	int state_idx;
 	int new_state = IP_VS_TCP_S_CLOSE;
 	int state_off = tcp_state_off[direction];
+	struct ip_vs_proto_data *pd;

 	/*
 	 *    Update state offset to INPUT_ONLY if necessary
@@ -512,8 +520,12 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 			}
 		}
 	}
-
-	cp->timeout = pp->timeout_table[cp->state = new_state];
+	pd = ip_vs_proto_data_get(net, pp->protocol);
+	if(likely(pd))
+		cp->timeout = pd->timeout_table[cp->state = new_state];
+	else	/* What to do ? */
+		cp->timeout = tcp_timeouts[cp->state = new_state];
+	IP_VS_DBG(8, "%s() timeout=%lu, pd=%p def=%d\n", __func__, cp->timeout, pd->timeout_table, tcp_timeouts[new_state]);
 }


@@ -525,6 +537,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 		     const struct sk_buff *skb,
 		     struct ip_vs_protocol *pp)
 {
+	struct net *net = dev_net(skb->dev);
 	struct tcphdr _tcph, *th;

 #ifdef CONFIG_IP_VS_IPV6
@@ -538,7 +551,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 		return 0;

 	spin_lock(&cp->lock);
-	set_tcp_state(pp, cp, direction, th);
+	set_tcp_state(net, pp, cp, direction, th);
 	spin_unlock(&cp->lock);

 	return 1;
@@ -548,12 +561,6 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 /*
  *	Hash table for TCP application incarnations
  */
-#define	TCP_APP_TAB_BITS	4
-#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
-#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);

 static inline __u16 tcp_app_hashkey(__be16 port)
 {
@@ -562,47 +569,51 @@ static inline __u16 tcp_app_hashkey(__be16 port)
 }


-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net->ipvs;
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);

 	hash = tcp_app_hashkey(port);

-	spin_lock_bh(&tcp_app_lock);
-	list_for_each_entry(i, &tcp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->tcp_app_lock);
+	list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &tcp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_tcp.appcnt);
+	list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+	atomic_inc(&pd->appcnt);

   out:
-	spin_unlock_bh(&tcp_app_lock);
+	spin_unlock_bh(&ipvs->tcp_app_lock);
 	return ret;
 }


-static void
-tcp_unregister_app(struct ip_vs_app *inc)
+static void tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&tcp_app_lock);
-	atomic_dec(&ip_vs_protocol_tcp.appcnt);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
+	BUG_ON(!pd);
+	spin_lock_bh(&net->ipvs->tcp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&tcp_app_lock);
+	spin_unlock_bh(&net->ipvs->tcp_app_lock);
 }


-static int
-tcp_app_conn_bind(struct ip_vs_conn *cp)
+static int tcp_app_conn_bind(struct net *net, struct ip_vs_conn *cp)
 {
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/* Default binding: bind app only for NAT */
 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
@@ -611,12 +622,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = tcp_app_hashkey(cp->vport);

-	spin_lock(&tcp_app_lock);
-	list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+	spin_lock(&ipvs->tcp_app_lock);
+	list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&tcp_app_lock);
+			spin_unlock(&ipvs->tcp_app_lock);

 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -633,7 +644,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&tcp_app_lock);
+	spin_unlock(&ipvs->tcp_app_lock);

   out:
 	return result;
@@ -643,24 +654,32 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 /*
  *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  */
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
 {
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
 	spin_lock(&cp->lock);
 	cp->state = IP_VS_TCP_S_LISTEN;
-	cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+	cp->timeout = ( pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+	                   : tcp_timeouts[IP_VS_TCP_S_LISTEN] );
 	spin_unlock(&cp->lock);
 }

-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(tcp_apps);
-	pp->timeout_table = tcp_timeouts;
+	ip_vs_init_hash_table(net->ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+	spin_lock_init(&net->ipvs->tcp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table(tcp_timeouts,
+							sizeof(tcp_timeouts));
 }

-
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
+	kfree(pd->timeout_table);
 }


@@ -669,9 +688,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.protocol =		IPPROTO_TCP,
 	.num_states =		IP_VS_TCP_S_LAST,
 	.dont_defrag =		0,
-	.appcnt =		ATOMIC_INIT(0),
-	.init =			ip_vs_tcp_init,
-	.exit =			ip_vs_tcp_exit,
+	.init =			NULL,
+	.exit =			NULL,
+	.init_netns =		__ip_vs_tcp_init,
+	.exit_netns =		__ip_vs_tcp_exit,
 	.register_app =		tcp_register_app,
 	.unregister_app =	tcp_unregister_app,
 	.conn_schedule =	tcp_conn_schedule,
@@ -685,5 +705,5 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.app_conn_bind =	tcp_app_conn_bind,
 	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	tcp_timeout_change,
-	.set_state_timeout =	tcp_set_state_timeout,
+/*	.set_state_timeout =	tcp_set_state_timeout, */
 };
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 8553231..d067843 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,10 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
  *
  */

@@ -34,7 +37,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	struct ip_vs_service *svc;
 	struct udphdr _udph, *uh;
 	struct ip_vs_iphdr iph;
-
+	struct net *net = dev_net(skb->dev);
+
+	BUG_ON(!net);
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);

 	uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
@@ -43,7 +48,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return 0;
 	}

-	svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+	svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
 				&iph.daddr, uh->dest);
 	if (svc) {
 		if (ip_vs_todrop()) {
@@ -323,13 +328,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
  *	unregister_app or app_conn_bind is called each time.
  */

-#define	UDP_APP_TAB_BITS	4
-#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
-#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
 static inline __u16 udp_app_hashkey(__be16 port)
 {
 	return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -337,47 +335,52 @@ static inline __u16 udp_app_hashkey(__be16 port)
 }


-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net->ipvs;
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);

+	BUG_ON(!pd);
 	hash = udp_app_hashkey(port);

-
-	spin_lock_bh(&udp_app_lock);
-	list_for_each_entry(i, &udp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->udp_app_lock);
+	list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &udp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_udp.appcnt);
+	list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+	atomic_inc(&pd->appcnt);

   out:
-	spin_unlock_bh(&udp_app_lock);
+	spin_unlock_bh(&ipvs->udp_app_lock);
 	return ret;
 }


-static void
-udp_unregister_app(struct ip_vs_app *inc)
+static void udp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&udp_app_lock);
-	atomic_dec(&ip_vs_protocol_udp.appcnt);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+
+	BUG_ON(!pd);
+	spin_lock_bh(&net->ipvs->udp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&udp_app_lock);
+	spin_unlock_bh(&net->ipvs->udp_app_lock);
 }


-static int udp_app_conn_bind(struct ip_vs_conn *cp)
+static int udp_app_conn_bind(struct net *net, struct ip_vs_conn *cp)
 {
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
+	struct netns_ipvs *ipvs = net->ipvs;

 	/* Default binding: bind app only for NAT */
 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
@@ -386,12 +389,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = udp_app_hashkey(cp->vport);

-	spin_lock(&udp_app_lock);
-	list_for_each_entry(inc, &udp_apps[hash], p_list) {
+	spin_lock(&ipvs->udp_app_lock);
+	list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&udp_app_lock);
+			spin_unlock(&ipvs->udp_app_lock);

 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -408,14 +411,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&udp_app_lock);
+	spin_unlock(&ipvs->udp_app_lock);

   out:
 	return result;
 }


-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 	[IP_VS_UDP_S_NORMAL]		=	5*60*HZ,
 	[IP_VS_UDP_S_LAST]		=	2*HZ,
 };
@@ -425,14 +428,20 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 	[IP_VS_UDP_S_LAST]		=	"BUG!",
 };

-
+/*
 static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+udp_set_state_timeout(struct net *net, struct ip_vs_protocol *pp, char *sname,
+                      int to)
 {
-	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
-				       udp_state_name_table, sname, to);
+	struct ip_vs_proto_data *pd=ip_vs_proto_data_get(net, IPPROTO_UDP);
+	if (pd)
+		return ip_vs_set_state_timeout(pd->timeout_table,
+					       IP_VS_UDP_S_LAST,
+					       udp_state_name_table, sname, to);
+	else
+		return -ENOENT;
 }
-
+*/
 static const char * udp_state_name(int state)
 {
 	if (state >= IP_VS_UDP_S_LAST)
@@ -445,28 +454,40 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
 		     const struct sk_buff *skb,
 		     struct ip_vs_protocol *pp)
 {
-	cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+	struct net *net = dev_net(skb->dev);
+	struct ip_vs_proto_data *pd=ip_vs_proto_data_get(net, IPPROTO_UDP);
+	if(unlikely(pd))
+		return 0;
+
+	cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
 	return 1;
 }
-
-static void udp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(udp_apps);
-	pp->timeout_table = udp_timeouts;
+	ip_vs_init_hash_table(net->ipvs->udp_apps, UDP_APP_TAB_SIZE);
+	spin_lock_init(&net->ipvs->udp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table(udp_timeouts,
+							sizeof(udp_timeouts));
 }

-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
+	kfree(pd->timeout_table);
 }

-
 struct ip_vs_protocol ip_vs_protocol_udp = {
 	.name =			"UDP",
 	.protocol =		IPPROTO_UDP,
 	.num_states =		IP_VS_UDP_S_LAST,
 	.dont_defrag =		0,
-	.init =			udp_init,
-	.exit =			udp_exit,
+	.init =			NULL,
+	.exit =			NULL,
+	.init_netns =		__udp_init,
+	.exit_netns =		__udp_exit,
 	.conn_schedule =	udp_conn_schedule,
 	.conn_in_get =		ip_vs_conn_in_get_proto,
 	.conn_out_get =		ip_vs_conn_out_get_proto,
@@ -480,5 +501,5 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
 	.app_conn_bind =	udp_app_conn_bind,
 	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	NULL,
-	.set_state_timeout =	udp_set_state_timeout,
+/*	.set_state_timeout =	udp_set_state_timeout, */
 };

-- 
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply related

* [RFC PATCH 9/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-08 11:17 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel; +Cc: horms, ja, wensong, daniel.lezcano

This patch contains ip_vs_sync.c and ip_vs_xmit.c

There is one sync daemon per netns, and a number is prepended to its name.
(a kind of incarnation counter)

Part of the netns migration in ip_vs_xmit.c was done in the IPv6 tunnel patch,
so make sure that "[patch v4] ipvs: IPv6 tunnel mode" is applied

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 7ba0693..98575da 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -74,6 +74,7 @@ struct ip_vs_sync_conn_options {
 struct ip_vs_sync_thread_data {
 	struct socket *sock;
 	char *buf;
+	struct net *net;
 };

 #define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
@@ -113,9 +114,6 @@ struct ip_vs_sync_mesg {
 	/* ip_vs_sync_conn entries start here */
 };

-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;

 struct ip_vs_sync_buff {
 	struct list_head        list;
@@ -127,70 +125,41 @@ struct ip_vs_sync_buff {
 	unsigned char           *end;
 };

-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff   *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
-/* multicast addr */
-static struct sockaddr_in mcast_addr = {
-	.sin_family		= AF_INET,
-	.sin_port		= cpu_to_be16(IP_VS_SYNC_PORT),
-	.sin_addr.s_addr	= cpu_to_be32(IP_VS_SYNC_GROUP),
-};
-
-
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct net *net)
 {
 	struct ip_vs_sync_buff *sb;
+	struct netns_ipvs *ipvs = net->ipvs;

-	spin_lock_bh(&ip_vs_sync_lock);
-	if (list_empty(&ip_vs_sync_queue)) {
+	spin_lock_bh(&ipvs->sync_lock);
+	if (list_empty(&ipvs->sync_queue)) {
 		sb = NULL;
 	} else {
-		sb = list_entry(ip_vs_sync_queue.next,
+		sb = list_entry(ipvs->sync_queue.next,
 				struct ip_vs_sync_buff,
 				list);
 		list_del(&sb->list);
 	}
-	spin_unlock_bh(&ip_vs_sync_lock);
+	spin_unlock_bh(&ipvs->sync_lock);

 	return sb;
 }

-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(struct net *net)
 {
 	struct ip_vs_sync_buff *sb;

 	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
 		return NULL;

-	if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+	if (!(sb->mesg=kmalloc(net->ipvs->sync_send_mesg_maxlen, GFP_ATOMIC))) {
 		kfree(sb);
 		return NULL;
 	}
 	sb->mesg->nr_conns = 0;
-	sb->mesg->syncid = ip_vs_master_syncid;
+	sb->mesg->syncid = net->ipvs->master_syncid;
 	sb->mesg->size = 4;
 	sb->head = (unsigned char *)sb->mesg + 4;
-	sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+	sb->end = (unsigned char *)sb->mesg + net->ipvs->sync_send_mesg_maxlen;
 	sb->firstuse = jiffies;
 	return sb;
 }
@@ -201,14 +170,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
 	kfree(sb);
 }

-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct net *net, struct ip_vs_sync_buff *sb)
 {
-	spin_lock(&ip_vs_sync_lock);
-	if (ip_vs_sync_state & IP_VS_STATE_MASTER)
-		list_add_tail(&sb->list, &ip_vs_sync_queue);
+	struct netns_ipvs *ipvs = net->ipvs;
+
+	spin_lock(&ipvs->sync_lock);
+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		list_add_tail(&sb->list, &ipvs->sync_queue);
 	else
 		ip_vs_sync_buff_release(sb);
-	spin_unlock(&ip_vs_sync_lock);
+	spin_unlock(&ipvs->sync_lock);
 }

 /*
@@ -216,18 +187,19 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
  *	than the specified time or the specified time is zero.
  */
 static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct net *net, unsigned long time)
 {
 	struct ip_vs_sync_buff *sb;
+	struct netns_ipvs *ipvs = net->ipvs;

-	spin_lock_bh(&curr_sb_lock);
-	if (curr_sb && (time == 0 ||
-			time_before(jiffies - curr_sb->firstuse, time))) {
-		sb = curr_sb;
-		curr_sb = NULL;
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	if (ipvs->sync_buff && (time == 0 ||
+			time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
+		sb = ipvs->sync_buff;
+		ipvs->sync_buff = NULL;
 	} else
 		sb = NULL;
-	spin_unlock_bh(&curr_sb_lock);
+	spin_unlock_bh(&ipvs->sync_buff_lock);
 	return sb;
 }

@@ -236,16 +208,17 @@ get_curr_sync_buff(unsigned long time)
  *      Add an ip_vs_conn information into the current sync_buff.
  *      Called by ip_vs_in.
  */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
 {
 	struct ip_vs_sync_mesg *m;
 	struct ip_vs_sync_conn *s;
 	int len;
+	struct netns_ipvs *ipvs = net->ipvs;

-	spin_lock(&curr_sb_lock);
-	if (!curr_sb) {
-		if (!(curr_sb=ip_vs_sync_buff_create())) {
-			spin_unlock(&curr_sb_lock);
+	spin_lock(&ipvs->sync_buff_lock);
+	if (!ipvs->sync_buff) {
+		if (!(ipvs->sync_buff=ip_vs_sync_buff_create(net))) {
+			spin_unlock(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			return;
 		}
@@ -253,8 +226,8 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)

 	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
 		SIMPLE_CONN_SIZE;
-	m = curr_sb->mesg;
-	s = (struct ip_vs_sync_conn *)curr_sb->head;
+	m = ipvs->sync_buff->mesg;
+	s = (struct ip_vs_sync_conn *)ipvs->sync_buff->head;

 	/* copy members */
 	s->protocol = cp->protocol;
@@ -274,18 +247,18 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)

 	m->nr_conns++;
 	m->size += len;
-	curr_sb->head += len;
+	ipvs->sync_buff->head += len;

 	/* check if there is a space for next one */
-	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
-		sb_queue_tail(curr_sb);
-		curr_sb = NULL;
+	if (ipvs->sync_buff->head+FULL_CONN_SIZE > ipvs->sync_buff->end) {
+		sb_queue_tail(net, ipvs->sync_buff);
+		ipvs->sync_buff = NULL;
 	}
-	spin_unlock(&curr_sb_lock);
+	spin_unlock(&ipvs->sync_buff_lock);

 	/* synchronize its controller if it has */
 	if (cp->control)
-		ip_vs_sync_conn(cp->control);
+		ip_vs_sync_conn(net, cp->control);
 }


@@ -293,13 +266,15 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
  *      Process received multicast message and create the corresponding
  *      ip_vs_conn entries.
  */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void
+ip_vs_process_message(struct net *net, const char *buffer, const size_t buflen)
 {
 	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
 	struct ip_vs_sync_conn *s;
 	struct ip_vs_sync_conn_options *opt;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	struct ip_vs_dest *dest;
 	char *p;
 	int i;
@@ -318,7 +293,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 	}

 	/* SyncID sanity check */
-	if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
+	if (net->ipvs->backup_syncid != 0 && m->syncid != net->ipvs->backup_syncid) {
 		IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
 			  m->syncid);
 		return;
@@ -371,13 +346,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		}

 		if (!(flags & IP_VS_CONN_F_TEMPLATE))
-			cp = ip_vs_conn_in_get(AF_INET, s->protocol,
+			cp = ip_vs_conn_in_get(net, AF_INET, s->protocol,
 					       (union nf_inet_addr *)&s->caddr,
 					       s->cport,
 					       (union nf_inet_addr *)&s->vaddr,
 					       s->vport);
 		else
-			cp = ip_vs_ct_in_get(AF_INET, s->protocol,
+			cp = ip_vs_ct_in_get(net, AF_INET, s->protocol,
 					     (union nf_inet_addr *)&s->caddr,
 					     s->cport,
 					     (union nf_inet_addr *)&s->vaddr,
@@ -388,7 +363,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 			 * If it is not found the connection will remain unbound
 			 * but still handled.
 			 */
-			dest = ip_vs_find_dest(AF_INET,
+			dest = ip_vs_find_dest(net, AF_INET,
 					       (union nf_inet_addr *)&s->daddr,
 					       s->dport,
 					       (union nf_inet_addr *)&s->vaddr,
@@ -406,7 +381,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				else
 					flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
-			cp = ip_vs_conn_new(AF_INET, s->protocol,
+			cp = ip_vs_conn_new(net, AF_INET, s->protocol,
 					    (union nf_inet_addr *)&s->caddr,
 					    s->cport,
 					    (union nf_inet_addr *)&s->vaddr,
@@ -421,7 +396,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				return;
 			}
 		} else if (!cp->dest) {
-			dest = ip_vs_try_bind_dest(cp);
+			dest = ip_vs_try_bind_dest(net, cp);
 			if (dest)
 				atomic_dec(&dest->refcnt);
 		} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
@@ -452,7 +427,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)

 		if (opt)
 			memcpy(&cp->in_seq, opt, sizeof(*opt));
-		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+		atomic_set(&cp->in_pkts, net->ipvs->sysctl_sync_threshold[0]);
 		cp->state = state;
 		cp->old_state = cp->state;
 		/*
@@ -461,8 +436,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		 * virtual service. If needed, we can do it for
 		 * non-fwmark persistent services.
 		 */
-		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-			cp->timeout = pp->timeout_table[state];
+		pd = ip_vs_proto_data_get(net,cp->protocol);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table )
+			cp->timeout = pd->timeout_table[state];
 		else
 			cp->timeout = (3*60*HZ);
 		ip_vs_conn_put(cp);
@@ -503,8 +479,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
 {
 	struct net_device *dev;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);

-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	BUG_ON(!net);
+	if ((dev = __dev_get_by_name(net, ifname)) == NULL)
 		return -ENODEV;

 	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -523,30 +501,31 @@ static int set_mcast_if(struct sock *sk, char *ifname)
  *	Set the maximum length of sync message according to the
  *	specified interface's MTU.
  */
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
 {
 	struct net_device *dev;
 	int num;
+	struct netns_ipvs *ipvs = net->ipvs;

 	if (sync_state == IP_VS_STATE_MASTER) {
-		if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+		if ((dev = __dev_get_by_name(net, ipvs->master_mcast_ifn)) == NULL)
 			return -ENODEV;

 		num = (dev->mtu - sizeof(struct iphdr) -
 		       sizeof(struct udphdr) -
 		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-		sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+		ipvs->sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
 			SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
 		IP_VS_DBG(7, "setting the maximum length of sync sending "
-			  "message %d.\n", sync_send_mesg_maxlen);
+			  "message %d.\n", ipvs->sync_send_mesg_maxlen);
 	} else if (sync_state == IP_VS_STATE_BACKUP) {
-		if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+		if ((dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn)) == NULL)
 			return -ENODEV;

-		sync_recv_mesg_maxlen = dev->mtu -
+		ipvs->sync_recv_mesg_maxlen = dev->mtu -
 			sizeof(struct iphdr) - sizeof(struct udphdr);
 		IP_VS_DBG(7, "setting the maximum length of sync receiving "
-			  "message %d.\n", sync_recv_mesg_maxlen);
+			  "message %d.\n", ipvs->sync_recv_mesg_maxlen);
 	}

 	return 0;
@@ -564,11 +543,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 	struct ip_mreqn mreq;
 	struct net_device *dev;
 	int ret;
+	struct net *net = sock_net(sk);

+	BUG_ON(!net);
 	memset(&mreq, 0, sizeof(mreq));
 	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));

-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	if ((dev = __dev_get_by_name(net, ifname)) == NULL)
 		return -ENODEV;
 	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
 		return -EINVAL;
@@ -588,8 +569,10 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 	struct net_device *dev;
 	__be32 addr;
 	struct sockaddr_in sin;
+	struct net *net = sock_net(sock->sk);

-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	BUG_ON(!net);
+	if ((dev = __dev_get_by_name(net, ifname)) == NULL)
 		return -ENODEV;

 	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -611,19 +594,19 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 /*
  *      Set up sending multicast socket over UDP
  */
-static struct socket * make_send_sock(void)
+static struct socket * make_send_sock(struct net *net)
 {
 	struct socket *sock;
 	int result;

-	/* First create a socket */
-	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	/* First create a socket in current netns  */
+	result = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}

-	result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+	result = set_mcast_if(sock->sk, net->ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
 		goto error;
@@ -632,13 +615,14 @@ static struct socket * make_send_sock(void)
 	set_mcast_loop(sock->sk, 0);
 	set_mcast_ttl(sock->sk, 1);

-	result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+	result = bind_mcastif_addr(sock, net->ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error binding address of the mcast interface\n");
 		goto error;
 	}

-	result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+	result = sock->ops->connect(sock,
+			(struct sockaddr *) &net->ipvs->sync_mcast_addr,
 			sizeof(struct sockaddr), 0);
 	if (result < 0) {
 		pr_err("Error connecting to the multicast addr\n");
@@ -656,13 +640,13 @@ static struct socket * make_send_sock(void)
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket * make_receive_sock(void)
+static struct socket * make_receive_sock(struct net *net)
 {
 	struct socket *sock;
 	int result;

-	/* First create a socket */
-	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	/* First create a socket in current netns */
+	result = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
@@ -671,7 +655,8 @@ static struct socket * make_receive_sock(void)
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = 1;

-	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+	result = sock->ops->bind(sock,
+			(struct sockaddr *) &net->ipvs->sync_mcast_addr,
 			sizeof(struct sockaddr));
 	if (result < 0) {
 		pr_err("Error binding to the multicast addr\n");
@@ -680,8 +665,8 @@ static struct socket * make_receive_sock(void)

 	/* join the multicast group */
 	result = join_mcast_group(sock->sk,
-			(struct in_addr *) &mcast_addr.sin_addr,
-			ip_vs_backup_mcast_ifn);
+			(struct in_addr *) &net->ipvs->sync_mcast_addr.sin_addr,
+			net->ipvs->backup_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error joining to the multicast group\n");
 		goto error;
@@ -756,16 +741,17 @@ static int sync_thread_master(void *data)

 	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
 		"syncid = %d\n",
-		ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+		tinfo->net->ipvs->master_mcast_ifn,
+		tinfo->net->ipvs->master_syncid);

 	while (!kthread_should_stop()) {
-		while ((sb = sb_dequeue())) {
+		while ((sb = sb_dequeue(tinfo->net))) {
 			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
 		}

 		/* check if entries stay in curr_sb for 2 seconds */
-		sb = get_curr_sync_buff(2 * HZ);
+		sb = get_curr_sync_buff(tinfo->net, 2 * HZ);
 		if (sb) {
 			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
@@ -775,12 +761,12 @@ static int sync_thread_master(void *data)
 	}

 	/* clean up the sync_buff queue */
-	while ((sb=sb_dequeue())) {
+	while ((sb=sb_dequeue(tinfo->net))) {
 		ip_vs_sync_buff_release(sb);
 	}

 	/* clean up the current sync_buff */
-	if ((sb = get_curr_sync_buff(0))) {
+	if ((sb = get_curr_sync_buff(tinfo->net, 0))) {
 		ip_vs_sync_buff_release(sb);
 	}

@@ -796,10 +782,11 @@ static int sync_thread_backup(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
 	int len;
-
+
 	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
 		"syncid = %d\n",
-		ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+		tinfo->net->ipvs->backup_mcast_ifn,
+		tinfo->net->ipvs->backup_syncid);

 	while (!kthread_should_stop()) {
 		wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -809,16 +796,15 @@ static int sync_thread_backup(void *data)
 		/* do we have data now? */
 		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
 			len = ip_vs_receive(tinfo->sock, tinfo->buf,
-					sync_recv_mesg_maxlen);
+					tinfo->net->ipvs->sync_recv_mesg_maxlen);
 			if (len <= 0) {
 				pr_err("receiving message error\n");
 				break;
 			}
-
-			/* disable bottom half, because it accesses the data
+			/* disable bottom half per netns, because it accesses the data
 			   shared by softirq while getting/creating conns */
 			local_bh_disable();
-			ip_vs_process_message(tinfo->buf, len);
+			ip_vs_process_message(tinfo->net, tinfo->buf, len);
 			local_bh_enable();
 		}
 	}
@@ -832,41 +818,43 @@ static int sync_thread_backup(void *data)
 }


-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 {
 	struct ip_vs_sync_thread_data *tinfo;
 	struct task_struct **realtask, *task;
 	struct socket *sock;
+	struct netns_ipvs *ipvs = net->ipvs;
 	char *name, *buf = NULL;
 	int (*threadfn)(void *data);
 	int result = -ENOMEM;

-	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
+	IP_VS_DBG(7, "%s(): pid %d inc:%d\n", __func__, task_pid_nr(current),
+		                             ipvs->inc);
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
 		  sizeof(struct ip_vs_sync_conn));

 	if (state == IP_VS_STATE_MASTER) {
-		if (sync_master_thread)
+		if (ipvs->sync_master_thread)
 			return -EEXIST;

-		strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_master_mcast_ifn));
-		ip_vs_master_syncid = syncid;
-		realtask = &sync_master_thread;
-		name = "ipvs_syncmaster";
+		strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+			sizeof(ipvs->master_mcast_ifn));
+		ipvs->master_syncid = syncid;
+		realtask = &ipvs->sync_master_thread;
+		name = "ipvs_master:%d";
 		threadfn = sync_thread_master;
-		sock = make_send_sock();
+		sock = make_send_sock(net);
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (sync_backup_thread)
+		if (ipvs->sync_backup_thread)
 			return -EEXIST;

-		strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_backup_mcast_ifn));
-		ip_vs_backup_syncid = syncid;
-		realtask = &sync_backup_thread;
-		name = "ipvs_syncbackup";
+		strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+			sizeof(ipvs->backup_mcast_ifn));
+		ipvs->backup_syncid = syncid;
+		realtask = &ipvs->sync_backup_thread;
+		name = "ipvs_backup:%d";
 		threadfn = sync_thread_backup;
-		sock = make_receive_sock();
+		sock = make_receive_sock(net);
 	} else {
 		return -EINVAL;
 	}
@@ -876,9 +864,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 		goto out;
 	}

-	set_sync_mesg_maxlen(state);
+	set_sync_mesg_maxlen(net, state);
 	if (state == IP_VS_STATE_BACKUP) {
-		buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+		buf = kmalloc(ipvs->sync_recv_mesg_maxlen, GFP_KERNEL);
 		if (!buf)
 			goto outsocket;
 	}
@@ -889,16 +877,17 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)

 	tinfo->sock = sock;
 	tinfo->buf = buf;
+	tinfo->net = net;

-	task = kthread_run(threadfn, tinfo, name);
+	task = kthread_run(threadfn, tinfo, name, ipvs->inc);
 	if (IS_ERR(task)) {
 		result = PTR_ERR(task);
 		goto outtinfo;
 	}
-
+	IP_VS_DBG(1, "kthread %s started (%d)\n", name, task->pid);
 	/* mark as active */
 	*realtask = task;
-	ip_vs_sync_state |= state;
+	ipvs->sync_state |= state;

 	/* increase the module use count */
 	ip_vs_use_count_inc();
@@ -916,16 +905,19 @@ out:
 }


-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
 {
+	struct netns_ipvs *ipvs = net->ipvs;
+
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));

 	if (state == IP_VS_STATE_MASTER) {
-		if (!sync_master_thread)
+		if (!ipvs->sync_master_thread)
 			return -ESRCH;

-		pr_info("stopping master sync thread %d ...\n",
-			task_pid_nr(sync_master_thread));
+		pr_info("stopping master sync thread %d  inc:%d...\n",
+			task_pid_nr(ipvs->sync_master_thread),
+			ipvs->inc);

 		/*
 		 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -933,21 +925,22 @@ int stop_sync_thread(int state)
 		 * progress of stopping the master sync daemon.
 		 */

-		spin_lock_bh(&ip_vs_sync_lock);
-		ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-		spin_unlock_bh(&ip_vs_sync_lock);
-		kthread_stop(sync_master_thread);
-		sync_master_thread = NULL;
+		spin_lock_bh(&ipvs->sync_lock);
+		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+		spin_unlock_bh(&ipvs->sync_lock);
+		kthread_stop(ipvs->sync_master_thread);
+		ipvs->sync_master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (!sync_backup_thread)
+		if (!ipvs->sync_backup_thread)
 			return -ESRCH;

-		pr_info("stopping backup sync thread %d ...\n",
-			task_pid_nr(sync_backup_thread));
+		pr_info("stopping backup sync thread %d inc:%d...\n",
+			task_pid_nr(ipvs->sync_backup_thread),
+			ipvs->inc);

-		ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(sync_backup_thread);
-		sync_backup_thread = NULL;
+		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+		kthread_stop(ipvs->sync_backup_thread);
+		ipvs->sync_backup_thread = NULL;
 	} else {
 		return -EINVAL;
 	}
@@ -957,3 +950,41 @@ int stop_sync_thread(int state)

 	return 0;
 }
+
+/*
+ * Initialize data struct for each netns
+ */
+static int __net_init __ip_vs_sync_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net->ipvs;
+	INIT_LIST_HEAD(&ipvs->sync_queue);
+	spin_lock_init(&ipvs->sync_lock);
+	spin_lock_init(&ipvs->sync_buff_lock);
+
+	ipvs->sync_mcast_addr.sin_family = AF_INET;
+	ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+	ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+	return 0;
+}
+
+static void __ip_vs_sync_cleanup(struct net *net)
+{
+	stop_sync_thread(net, net->ipvs->sync_state &
+			      (IP_VS_STATE_MASTER | IP_VS_STATE_BACKUP));
+	return;
+}
+static struct pernet_operations ipvs_sync_ops = {
+	.init = __ip_vs_sync_init,
+	.exit = __ip_vs_sync_cleanup,
+};
+
+
+int __init ip_vs_sync_init(void)
+{
+	return register_pernet_subsys(&ipvs_sync_ops);
+}
+
+void __exit ip_vs_sync_cleanup(void)
+{
+	unregister_pernet_subsys(&ipvs_sync_ops);
+}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index a2e8497..d68178f 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -410,13 +410,15 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* check if it is a connection of no-client-port */
 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
 		__be16 _pt, *p;
+		struct net *net;
 		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
 		if (p == NULL)
 			goto tx_error;
-		ip_vs_conn_fill_cport(cp, *p);
+		net = dev_net(skb->dev);
+		ip_vs_conn_fill_cport(net, cp, *p);
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
-
+	IP_VS_DBG(10, "%s() dst:%x\n", __func__, iph->daddr);
 	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
 		goto tx_error_icmp;

@@ -486,14 +488,16 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* check if it is a connection of no-client-port */
 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
 		__be16 _pt, *p;
+		struct net *net;
 		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
 				       sizeof(_pt), &_pt);
 		if (p == NULL)
 			goto tx_error;
-		ip_vs_conn_fill_cport(cp, *p);
+		net = dev_net(skb->dev);
+		BUG_ON(!net);
+		ip_vs_conn_fill_cport(net, cp, *p);
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
-
 	rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
 	if (!rt)
 		goto tx_error_icmp;

-- 
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply related

* Re: BUG ? ipip unregister_netdevice_many()
From: Daniel Lezcano @ 2010-10-08 11:19 UTC (permalink / raw)
  To: Hans Schillstrom; +Cc: netdev@vger.kernel.org, Eric W. Biederman
In-Reply-To: <201010071048.12817.hans.schillstrom@ericsson.com>

On 10/07/2010 10:48 AM, Hans Schillstrom wrote:
> Hello
> I'm trying to exit a network name space and it doesn't work (or am I doing something wrong?)
> The only netdevices left are lo and the tunnels ip6tnl0, sit0 and tunl0 when exiting netns.
>
> A netns is created by lxc-execute with two interfaces eth0 eth1 (macvlan)
> (see conf file at the end)
>
> Kernel: net-next-2.6 top from 4 october 2010
>    

Hi Hans,

I tried to reproduce your problem but I just get a big kernel crash when 
exiting the container :/

The stack is different but it may be related to the same problem.

BUG: unable to handle kernel paging request at ffff88003ba453a0
IP: [<ffffffff813020b6>] macvlan_stop+0x57/0x7d
PGD 180b063 PUD 180f063 PMD 1ffdb067 PTE 3ba45160
Oops: 0002 [#1] DEBUG_PAGEALLOC
last sysfs file: /sys/devices/virtual/net/mc0PyXBA/type
CPU 0
Pid: 5, comm: kworker/u:0 Not tainted 2.6.36-rc7-next-20101007+ #11 /Bochs
RIP: 0010:[<ffffffff813020b6>]  [<ffffffff813020b6>] macvlan_stop+0x57/0x7d
RSP: 0018:ffff88003f111c30  EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff88003bdd1e60 RCX: 000000000000c100
RDX: ffff88003ba453a0 RSI: ffff88003f111d70 RDI: ffffffff810300e5
RBP: ffff88003f111c50 R08: ffff88003f111d70 R09: 00000000000000cc
R10: 0000000000000001 R11: ffff88003f111ba0 R12: ffff88003bdd1800
R13: ffff880039fec800 R14: ffff88003f111d70 R15: ffff88003ba06830
FS:  0000000000000000(0000) GS:ffffffff8181b000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: ffff88003ba453a0 CR3: 000000003c284000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process kworker/u:0 (pid: 5, threadinfo ffff88003f110000, task 
ffff88003f0f20a0)
Stack:
  ffffffff81657a10 ffff88003bdd1800 ffffffff81657a10 ffff88003bb23800
<0> ffff88003f111c70 ffffffff81362d09 ffff88003bdd1800 ffff88003f111cf0
<0> ffff88003f111c90 ffffffff81362d31 ffff88003ba067c0 ffff88003bdd1800
Call Trace:
  [<ffffffff81362d09>] __dev_close+0x75/0x83
  [<ffffffff81362d31>] dev_close+0x1a/0x3f
  [<ffffffff81362e38>] rollback_registered_many+0xe2/0x21c
  [<ffffffff81362f88>] unregister_netdevice_many+0x16/0x6d
  [<ffffffff8136314d>] default_device_exit_batch+0xa7/0xbb
  [<ffffffff8135db06>] ops_exit_list+0x4e/0x56
  [<ffffffff8135e285>] cleanup_net+0xf5/0x195
  [<ffffffff8103e084>] process_one_work+0x25d/0x3e7
  [<ffffffff8103e027>] ? process_one_work+0x200/0x3e7
  [<ffffffff8135e190>] ? cleanup_net+0x0/0x195
  [<ffffffff8103e54a>] worker_thread+0x1b5/0x342
  [<ffffffff8103e395>] ? worker_thread+0x0/0x342
  [<ffffffff81041495>] kthread+0x7c/0x84
  [<ffffffff810034f4>] kernel_thread_helper+0x4/0x10
  [<ffffffff814389ba>] ? restore_args+0x0/0x30
  [<ffffffff81041419>] ? kthread+0x0/0x84
  [<ffffffff810034f0>] ? kernel_thread_helper+0x0/0x10
Code: 00 00 02 74 0b 83 ce ff 4c 89 ef e8 ab eb 05 00 49 8b b4 24 a0 02 
00 00 4c 89 ef e8 b9 52 06 00 48 8b 43 18 48 8b 53 20 48 85 c0 <48> 89 
02 74 04 48 89 50 08 48 be 00 02 20 00 00 00 ad de 48 89
RIP  [<ffffffff813020b6>] macvlan_stop+0x57/0x7d
  RSP <ffff88003f111c30>
CR2: ffff88003ba453a0
---[ end trace 05c41c2103816005 ]---
BUG: unable to handle kernel paging request at fffffffffffffff8
IP: [<ffffffff810410bf>] kthread_data+0xb/0x11
PGD 180c067 PUD 180d067 PMD 0
Oops: 0000 [#2] DEBUG_PAGEALLOC
last sysfs file: /sys/devices/virtual/net/mc0PyXBA/type
CPU 0
Pid: 5, comm: kworker/u:0 Tainted: G      D     
2.6.36-rc7-next-20101007+ #11 /Bochs
RIP: 0010:[<ffffffff810410bf>]  [<ffffffff810410bf>] kthread_data+0xb/0x11
RSP: 0018:ffff88003f111868  EFLAGS: 00010096
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88003f111fd8
RDX: ffff88003f0f20a0 RSI: 0000000000000000 RDI: ffff88003f0f20a0
RBP: ffff88003f111868 R08: 0000000000000002 R09: 0000000000000001
R10: 0000000000000246 R11: 09f911029d74e35b R12: 0000000000000000
R13: ffff88003f111948 R14: ffff88003f0c60a0 R15: ffff88003f0f2218
FS:  0000000000000000(0000) GS:ffffffff8181b000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: fffffffffffffff8 CR3: 000000003cb19000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process kworker/u:0 (pid: 5, threadinfo ffff88003f110000, task 
ffff88003f0f20a0)
Stack:
  ffff88003f111888 ffffffff8103d4e4 ffff88003f111888 ffff88003f0f2310
<0> ffff88003f110010 ffff88003f0f20a0 ffff88003f111fd8 ffff88003f111fd8
Call Trace:
  [<ffffffff8103d4e4>] wq_worker_sleeping+0x10/0x76
  [<ffffffff81435ffe>] schedule+0xf4/0x405
  [<ffffffff8102ebc4>] do_exit+0x647/0x660
  [<ffffffff81005ba0>] oops_end+0xb3/0xbb
  [<ffffffff8101c6b8>] no_context+0x1f5/0x204
  [<ffffffff8101c854>] __bad_area_nosemaphore+0x18d/0x1b0
  [<ffffffff8101c885>] bad_area_nosemaphore+0xe/0x10
  [<ffffffff8101cb52>] do_page_fault+0x16b/0x34d
  [<ffffffff81300a85>] ? ei_set_multicast_list+0x1f/0x3d
  [<ffffffff81437f46>] ? trace_hardirqs_off_thunk+0x3a/0x3c
  [<ffffffff81438b9f>] page_fault+0x1f/0x30
  [<ffffffff810300e5>] ? local_bh_enable_ip+0xb7/0xbd
  [<ffffffff813020b6>] ? macvlan_stop+0x57/0x7d
  [<ffffffff81362d09>] __dev_close+0x75/0x83
  [<ffffffff81362d31>] dev_close+0x1a/0x3f
  [<ffffffff81362e38>] rollback_registered_many+0xe2/0x21c
  [<ffffffff81362f88>] unregister_netdevice_many+0x16/0x6d
  [<ffffffff8136314d>] default_device_exit_batch+0xa7/0xbb
  [<ffffffff8135db06>] ops_exit_list+0x4e/0x56
  [<ffffffff8135e285>] cleanup_net+0xf5/0x195
  [<ffffffff8103e084>] process_one_work+0x25d/0x3e7
  [<ffffffff8103e027>] ? process_one_work+0x200/0x3e7
  [<ffffffff8135e190>] ? cleanup_net+0x0/0x195
  [<ffffffff8103e54a>] worker_thread+0x1b5/0x342
  [<ffffffff8103e395>] ? worker_thread+0x0/0x342
  [<ffffffff81041495>] kthread+0x7c/0x84
  [<ffffffff810034f4>] kernel_thread_helper+0x4/0x10
  [<ffffffff814389ba>] ? restore_args+0x0/0x30
  [<ffffffff81041419>] ? kthread+0x0/0x84
  [<ffffffff810034f0>] ? kernel_thread_helper+0x0/0x10
Code: 5c 41 5d 41 5e c9 c3 90 55 48 8b 04 25 40 a0 81 81 48 8b 80 18 02 
00 00 48 89 e5 8b 40 f0 c9 c3 48 8b 87 18 02 00 00 55 48 89 e5 <48> 8b 
40 f8 c9 c3 48 89 f0 c1 ee 06 55 89 f6 83 e0 3f 48 c1 e6
RIP  [<ffffffff810410bf>] kthread_data+0xb/0x11
  RSP <ffff88003f111868>
CR2: fffffffffffffff8
---[ end trace 05c41c2103816006 ]---



Thanks
   -- Daniel

> I added some printk's inn ipip.c  ipip_exit_net()
> ...
>          rtnl_lock();
>          printk(KERN_ERR "ipip_exit_net(enter)\n");
>          ipip_destroy_tunnels(ipn,&list);
>          printk(KERN_ERR "ipip_exit_net(1)\n");
>          unregister_netdevice_queue(ipn->fb_tunnel_dev,&list);
>          printk(KERN_ERR "ipip_exit_net(2)\n");
>          unregister_netdevice_many(&list);
>          printk(KERN_ERR "ipip_exit_net(3)\n");
>          rtnl_unlock();
>          printk(KERN_ERR "ipip_exit_net(exit)\n");
>
>
> Exit steps:
> ===== Screen dump =====
>
>   # ifconfig eth0  0.0.0.0  down
>   # ifconfig eth1  0.0.0.0  down
>   # ifconfig lo  0.0.0.0  down
>   # ip li de eth0
>   # ip li de eth1
>   # ifconfig -a
> ip6tnl0   Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
>            NOARP  MTU:1460  Metric:1
>            RX packets:0 errors:0 dropped:0 overruns:0 frame:0
>            TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
>            collisions:0 txqueuelen:0
>            RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
>
> lo        Link encap:Local Loopback
>            inet addr:127.0.0.1  Mask:255.0.0.0
>            LOOPBACK  MTU:16436  Metric:1
>            RX packets:0 errors:0 dropped:0 overruns:0 frame:0
>            TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
>            collisions:0 txqueuelen:0
>            RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
>
> sit0      Link encap:IPv6-in-IPv4
>            NOARP  MTU:1480  Metric:1
>            RX packets:0 errors:0 dropped:0 overruns:0 frame:0
>            TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
>            collisions:0 txqueuelen:0
>            RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
>
> tunl0     Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
>            NOARP  MTU:1480  Metric:1
>            RX packets:0 errors:0 dropped:0 overruns:0 frame:0
>            TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
>            collisions:0 txqueuelen:0
>            RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
>
>   # ps
>    PID USER       VSZ STAT COMMAND
>      1 root     12412 S    /usr/lib64/lxc/lxc-init -- /var/bin/init
>      2 root      4540 S    /bin/ash /var/bin/init
>      7 root      6640 S    inetd
>      8 root      4544 S    /bin/ash
>     26 root      4544 R    ps
>   # lsmod
> Module                  Size  Used by    Not tainted
> macvlan                 8709  0
> pcnet32                29549  0
> tg3                   112093  0
> libphy                 21043  1 tg3
>   # kill 7 2
>   # ps
>    PID USER       VSZ STAT COMMAND
>      1 root     12412 S    /usr/lib64/lxc/lxc-init -- /var/bin/init
>      8 root      4544 S    /bin/ash
>     28 root      4544 R    ps
>   # exit  ( here is the exit from netns  )
>   # ipip_exit_net(enter)
> ipip_exit_net(1)
> ipip_exit_net(2)
> ------------[ cut here ]------------
> WARNING: at /home/hans/evip/kvm/net-next-2.6/kernel/sysctl.c:1953 unregister_sysctl_table+0xc7/0xf9()
> Hardware name: Bochs
> Modules linked in: macvlan pcnet32 tg3 libphy
> Pid: 5, comm: kworker/u:0 Not tainted 2.6.36-rc3+ #7
> Call Trace:
>   [<ffffffff8103e281>] warn_slowpath_common+0x85/0x9d
>   [<ffffffff8103e2b3>] warn_slowpath_null+0x1a/0x1c
>   [<ffffffff81045e64>] unregister_sysctl_table+0xc7/0xf9
>   [<ffffffff812c86a5>] neigh_sysctl_unregister+0x27/0x3f
>   [<ffffffff81342108>] addrconf_ifdown+0x415/0x45e
>   [<ffffffff81342b98>] addrconf_notify+0x756/0x7fe
>   [<ffffffff812cacfb>] ? neigh_ifdown+0xc3/0xd4
>   [<ffffffff813622b3>] ? ip6mr_device_event+0x8d/0x9e
>   [<ffffffff8105eddb>] notifier_call_chain+0x37/0x63
>   [<ffffffff8105ee8b>] raw_notifier_call_chain+0x14/0x16
>   [<ffffffff812c15c7>] call_netdevice_notifiers+0x4a/0x4f
>   [<ffffffff812c1c1b>] rollback_registered_many+0x121/0x208
>   [<ffffffff812c1d1d>] unregister_netdevice_many+0x1b/0x71
>   [<ffffffff81324209>] ipip_exit_net+0xea/0x11a
>   [<ffffffff812bc941>] ? cleanup_net+0x0/0x198
>   [<ffffffff812bc2cf>] ops_exit_list+0x2a/0x5b
>   [<ffffffff812bca39>] cleanup_net+0xf8/0x198
>   [<ffffffff810568c7>] process_one_work+0x2a2/0x44d
>   [<ffffffff81056e35>] worker_thread+0x1db/0x34e
>   [<ffffffff81056c5a>] ? worker_thread+0x0/0x34e
>   [<ffffffff8105a030>] kthread+0x82/0x8a
>   [<ffffffff81003954>] kernel_thread_helper+0x4/0x10
>   [<ffffffff81059fae>] ? kthread+0x0/0x8a
>   [<ffffffff81003950>] ? kernel_thread_helper+0x0/0x10
> ---[ end trace 939b5185219f32e7 ]---
> ipip_exit_net(3)
> ipip_exit_net(exit)
> unregister_netdevice: waiting for lo to become free. Usage count = 4
> unregister_netdevice: waiting for lo to become free. Usage count = 4
> unregister_netdevice: waiting for lo to become free. Usage count = 4
> ....
> ...
> ===== End of screen dump =====
>
> lxc conf file:
> # Container with network virtualized using the vlan device driver
> # Local eth0 uplink
> lxc.utsname = fee_0
> lxc.network.type = macvlan
> lxc.network.flags = up
> lxc.network.link = eth1
> lxc.network.hwaddr = 00:00:04:01:01:01
> lxc.network.ipv4 = 192.168.1.21/24
> lxc.network.ipv6 = 2003::2:1:1/96
> # local eth1 downlink - to the RS farm
> lxc.network.type = macvlan
> lxc.network.flags = up
> lxc.network.link = eth0
> lxc.network.hwaddr = 00:00:03:01:01:01
> lxc.network.ipv4 = 192.168.0.21/24
> lxc.network.ipv6 = 2003::1:1:1/96
> lxc.mount.entry = /var/lib/lxc/fee_0/var /var none rw,bind 0 0
>    

^ permalink raw reply

* Re: BUG ? ipip unregister_netdevice_many()
From: Hans Schillstrom @ 2010-10-08 11:53 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: netdev@vger.kernel.org, Eric W. Biederman
In-Reply-To: <4CAEFE2C.3010007@free.fr>

On Friday 08 October 2010 13:19:08 Daniel Lezcano wrote:
Hello
> On 10/07/2010 10:48 AM, Hans Schillstrom wrote:
> > Hello
> > I'm trying to exit a network name space and it doesn't work (or am I doing something wrong?)
> > The only netdevices left are lo and the tunnels ip6tnl0, sit0 and tunl0 when exiting netns.
> >
> > A netns is created by lxc-execute with two interfaces eth0 eth1 (macvlan)
> > (see conf file at the end)
> >
> > Kernel: net-next-2.6 top from 4 october 2010
> >
> 
> Hi Hans,
> 
> I tried to reproduce your problem but I just get a big kernel crash when
> exiting the container :/
> 
> The stack is different but it may be related to the same problem.
> 
> BUG: unable to handle kernel paging request at ffff88003ba453a0
> IP: [<ffffffff813020b6>] macvlan_stop+0x57/0x7d
> PGD 180b063 PUD 180f063 PMD 1ffdb067 PTE 3ba45160
> Oops: 0002 [#1] DEBUG_PAGEALLOC
> last sysfs file: /sys/devices/virtual/net/mc0PyXBA/type
> CPU 0
> Pid: 5, comm: kworker/u:0 Not tainted 2.6.36-rc7-next-20101007+ #11 /Bochs
> RIP: 0010:[<ffffffff813020b6>]  [<ffffffff813020b6>] macvlan_stop+0x57/0x7d
> RSP: 0018:ffff88003f111c30  EFLAGS: 00010246
> RAX: 0000000000000000 RBX: ffff88003bdd1e60 RCX: 000000000000c100
> RDX: ffff88003ba453a0 RSI: ffff88003f111d70 RDI: ffffffff810300e5
> RBP: ffff88003f111c50 R08: ffff88003f111d70 R09: 00000000000000cc
> R10: 0000000000000001 R11: ffff88003f111ba0 R12: ffff88003bdd1800
> R13: ffff880039fec800 R14: ffff88003f111d70 R15: ffff88003ba06830
> FS:  0000000000000000(0000) GS:ffffffff8181b000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> CR2: ffff88003ba453a0 CR3: 000000003c284000 CR4: 00000000000006f0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process kworker/u:0 (pid: 5, threadinfo ffff88003f110000, task
> ffff88003f0f20a0)
> Stack:
>   ffffffff81657a10 ffff88003bdd1800 ffffffff81657a10 ffff88003bb23800
> <0> ffff88003f111c70 ffffffff81362d09 ffff88003bdd1800 ffff88003f111cf0
> <0> ffff88003f111c90 ffffffff81362d31 ffff88003ba067c0 ffff88003bdd1800
> Call Trace:
>   [<ffffffff81362d09>] __dev_close+0x75/0x83
>   [<ffffffff81362d31>] dev_close+0x1a/0x3f
>   [<ffffffff81362e38>] rollback_registered_many+0xe2/0x21c
>   [<ffffffff81362f88>] unregister_netdevice_many+0x16/0x6d
>   [<ffffffff8136314d>] default_device_exit_batch+0xa7/0xbb
>   [<ffffffff8135db06>] ops_exit_list+0x4e/0x56
>   [<ffffffff8135e285>] cleanup_net+0xf5/0x195
>   [<ffffffff8103e084>] process_one_work+0x25d/0x3e7
>   [<ffffffff8103e027>] ? process_one_work+0x200/0x3e7
>   [<ffffffff8135e190>] ? cleanup_net+0x0/0x195
>   [<ffffffff8103e54a>] worker_thread+0x1b5/0x342
>   [<ffffffff8103e395>] ? worker_thread+0x0/0x342
>   [<ffffffff81041495>] kthread+0x7c/0x84
>   [<ffffffff810034f4>] kernel_thread_helper+0x4/0x10
>   [<ffffffff814389ba>] ? restore_args+0x0/0x30
>   [<ffffffff81041419>] ? kthread+0x0/0x84
>   [<ffffffff810034f0>] ? kernel_thread_helper+0x0/0x10
> Code: 00 00 02 74 0b 83 ce ff 4c 89 ef e8 ab eb 05 00 49 8b b4 24 a0 02
> 00 00 4c 89 ef e8 b9 52 06 00 48 8b 43 18 48 8b 53 20 48 85 c0 <48> 89
> 02 74 04 48 89 50 08 48 be 00 02 20 00 00 00 ad de 48 89
> RIP  [<ffffffff813020b6>] macvlan_stop+0x57/0x7d
>   RSP <ffff88003f111c30>
> CR2: ffff88003ba453a0
> ---[ end trace 05c41c2103816005 ]---
> BUG: unable to handle kernel paging request at fffffffffffffff8
> IP: [<ffffffff810410bf>] kthread_data+0xb/0x11
> PGD 180c067 PUD 180d067 PMD 0
> Oops: 0000 [#2] DEBUG_PAGEALLOC
> last sysfs file: /sys/devices/virtual/net/mc0PyXBA/type
> CPU 0
> Pid: 5, comm: kworker/u:0 Tainted: G      D
> 2.6.36-rc7-next-20101007+ #11 /Bochs
> RIP: 0010:[<ffffffff810410bf>]  [<ffffffff810410bf>] kthread_data+0xb/0x11
> RSP: 0018:ffff88003f111868  EFLAGS: 00010096
> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88003f111fd8
> RDX: ffff88003f0f20a0 RSI: 0000000000000000 RDI: ffff88003f0f20a0
> RBP: ffff88003f111868 R08: 0000000000000002 R09: 0000000000000001
> R10: 0000000000000246 R11: 09f911029d74e35b R12: 0000000000000000
> R13: ffff88003f111948 R14: ffff88003f0c60a0 R15: ffff88003f0f2218
> FS:  0000000000000000(0000) GS:ffffffff8181b000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> CR2: fffffffffffffff8 CR3: 000000003cb19000 CR4: 00000000000006f0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process kworker/u:0 (pid: 5, threadinfo ffff88003f110000, task
> ffff88003f0f20a0)
> Stack:
>   ffff88003f111888 ffffffff8103d4e4 ffff88003f111888 ffff88003f0f2310
> <0> ffff88003f110010 ffff88003f0f20a0 ffff88003f111fd8 ffff88003f111fd8
> Call Trace:
>   [<ffffffff8103d4e4>] wq_worker_sleeping+0x10/0x76
>   [<ffffffff81435ffe>] schedule+0xf4/0x405
>   [<ffffffff8102ebc4>] do_exit+0x647/0x660
>   [<ffffffff81005ba0>] oops_end+0xb3/0xbb
>   [<ffffffff8101c6b8>] no_context+0x1f5/0x204
>   [<ffffffff8101c854>] __bad_area_nosemaphore+0x18d/0x1b0
>   [<ffffffff8101c885>] bad_area_nosemaphore+0xe/0x10
>   [<ffffffff8101cb52>] do_page_fault+0x16b/0x34d
>   [<ffffffff81300a85>] ? ei_set_multicast_list+0x1f/0x3d
>   [<ffffffff81437f46>] ? trace_hardirqs_off_thunk+0x3a/0x3c
>   [<ffffffff81438b9f>] page_fault+0x1f/0x30
>   [<ffffffff810300e5>] ? local_bh_enable_ip+0xb7/0xbd
>   [<ffffffff813020b6>] ? macvlan_stop+0x57/0x7d
>   [<ffffffff81362d09>] __dev_close+0x75/0x83
>   [<ffffffff81362d31>] dev_close+0x1a/0x3f
>   [<ffffffff81362e38>] rollback_registered_many+0xe2/0x21c
>   [<ffffffff81362f88>] unregister_netdevice_many+0x16/0x6d
>   [<ffffffff8136314d>] default_device_exit_batch+0xa7/0xbb
>   [<ffffffff8135db06>] ops_exit_list+0x4e/0x56
>   [<ffffffff8135e285>] cleanup_net+0xf5/0x195
>   [<ffffffff8103e084>] process_one_work+0x25d/0x3e7
>   [<ffffffff8103e027>] ? process_one_work+0x200/0x3e7
>   [<ffffffff8135e190>] ? cleanup_net+0x0/0x195
>   [<ffffffff8103e54a>] worker_thread+0x1b5/0x342
>   [<ffffffff8103e395>] ? worker_thread+0x0/0x342
>   [<ffffffff81041495>] kthread+0x7c/0x84
>   [<ffffffff810034f4>] kernel_thread_helper+0x4/0x10
>   [<ffffffff814389ba>] ? restore_args+0x0/0x30
>   [<ffffffff81041419>] ? kthread+0x0/0x84
>   [<ffffffff810034f0>] ? kernel_thread_helper+0x0/0x10
> Code: 5c 41 5d 41 5e c9 c3 90 55 48 8b 04 25 40 a0 81 81 48 8b 80 18 02
> 00 00 48 89 e5 8b 40 f0 c9 c3 48 8b 87 18 02 00 00 55 48 89 e5 <48> 8b
> 40 f8 c9 c3 48 89 f0 c1 ee 06 55 89 f6 83 e0 3f 48 c1 e6
> RIP  [<ffffffff810410bf>] kthread_data+0xb/0x11
>   RSP <ffff88003f111868>
> CR2: fffffffffffffff8
> ---[ end trace 05c41c2103816006 ]---
> 
> 
> 
> Thanks
>    -- Daniel

I did the same setup without any tunnel modules loaded and then it almost worked 
except free-ing of the loopback interface :-(
"unregister_netdevice: waiting for lo to become free. Usage count = 4"

When adding a tunnel module (here ip6_tunnel) you'll have the crash

ex: 
/var/lib/lxc # ifconfig -a
ip6tnl0   Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00  
          NOARP  MTU:1460  Metric:1
          RX packets:0 errors:0 dropped:0 overruns:0 frame:0
          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:0 
          RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)

lo        Link encap:Local Loopback  
          LOOPBACK  MTU:16436  Metric:1
          RX packets:0 errors:0 dropped:0 overruns:0 frame:0
          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:0 
          RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)

 # ifconfig
 # ps
  PID USER       VSZ STAT COMMAND
    1 root     12412 S    /usr/lib64/lxc/lxc-init -- /var/bin/init
    2 root      4540 S    /bin/ash /var/bin/init
    6 root      4544 S    /bin/ash
   16 root      4544 R    ps
 # kill 2
 # ^D (exit of ns)
 # ------------[ cut here ]------------
WARNING: at /home/hans/evip/kvm/net-next-2.6/kernel/sysctl.c:1953 unregister_sysctl_table+0xc7/0xf9()
Hardware name: Bochs
Modules linked in: macvlan ip6_tunnel tunnel6 pcnet32 tg3 libphy
Pid: 5, comm: kworker/u:0 Not tainted 2.6.36-rc3 #2
Call Trace:
 [<ffffffff8103e281>] warn_slowpath_common+0x85/0x9d
 [<ffffffff8103e2b3>] warn_slowpath_null+0x1a/0x1c
 [<ffffffff81045e64>] unregister_sysctl_table+0xc7/0xf9
 [<ffffffff812c86a5>] neigh_sysctl_unregister+0x27/0x3f
 [<ffffffff81340c75>] addrconf_ifdown+0x415/0x45e
 [<ffffffff81341705>] addrconf_notify+0x756/0x7fe
 [<ffffffff812cacfb>] ? neigh_ifdown+0xc3/0xd4
 [<ffffffff81360eb3>] ? ip6mr_device_event+0x8d/0x9e
 [<ffffffff8105eddb>] notifier_call_chain+0x37/0x63
 [<ffffffff8105ee8b>] raw_notifier_call_chain+0x14/0x16
 [<ffffffff812c15c7>] call_netdevice_notifiers+0x4a/0x4f
 [<ffffffff812c1c1b>] rollback_registered_many+0x121/0x208
 [<ffffffff812c1d1d>] unregister_netdevice_many+0x1b/0x71
 [<ffffffffa0047244>] ip6_tnl_exit_net+0xa4/0xb8 [ip6_tunnel]
 [<ffffffff812bc941>] ? cleanup_net+0x0/0x198
 [<ffffffff812bc2cf>] ops_exit_list+0x2a/0x5b
 [<ffffffff812bca39>] cleanup_net+0xf8/0x198
 [<ffffffff810568c7>] process_one_work+0x2a2/0x44d
 [<ffffffff81056e35>] worker_thread+0x1db/0x34e
 [<ffffffff81056c5a>] ? worker_thread+0x0/0x34e
 [<ffffffff8105a030>] kthread+0x82/0x8a
 [<ffffffff81003954>] kernel_thread_helper+0x4/0x10
 [<ffffffff81059fae>] ? kthread+0x0/0x8a
 [<ffffffff81003950>] ? kernel_thread_helper+0x0/0x10
---[ end trace eb3bc950cf9a8748 ]---
unregister_netdevice: waiting for lo to become free. Usage count = 4
unregister_netdevice: waiting for lo to become free. Usage count = 4
unregister_netdevice: waiting for lo to become free. Usage count = 4

Regards 
Hans

> 
> > I added some printk's inn ipip.c  ipip_exit_net()
> > ...
> >          rtnl_lock();
> >          printk(KERN_ERR "ipip_exit_net(enter)\n");
> >          ipip_destroy_tunnels(ipn,&list);
> >          printk(KERN_ERR "ipip_exit_net(1)\n");
> >          unregister_netdevice_queue(ipn->fb_tunnel_dev,&list);
> >          printk(KERN_ERR "ipip_exit_net(2)\n");
> >          unregister_netdevice_many(&list);
> >          printk(KERN_ERR "ipip_exit_net(3)\n");
> >          rtnl_unlock();
> >          printk(KERN_ERR "ipip_exit_net(exit)\n");
> >
> >
> > Exit steps:
> > ===== Screen dump =====
> >
> >   # ifconfig eth0  0.0.0.0  down
> >   # ifconfig eth1  0.0.0.0  down
> >   # ifconfig lo  0.0.0.0  down
> >   # ip li de eth0
> >   # ip li de eth1
> >   # ifconfig -a
> > ip6tnl0   Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
> >            NOARP  MTU:1460  Metric:1
> >            RX packets:0 errors:0 dropped:0 overruns:0 frame:0
> >            TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
> >            collisions:0 txqueuelen:0
> >            RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
> >
> > lo        Link encap:Local Loopback
> >            inet addr:127.0.0.1  Mask:255.0.0.0
> >            LOOPBACK  MTU:16436  Metric:1
> >            RX packets:0 errors:0 dropped:0 overruns:0 frame:0
> >            TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
> >            collisions:0 txqueuelen:0
> >            RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
> >
> > sit0      Link encap:IPv6-in-IPv4
> >            NOARP  MTU:1480  Metric:1
> >            RX packets:0 errors:0 dropped:0 overruns:0 frame:0
> >            TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
> >            collisions:0 txqueuelen:0
> >            RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
> >
> > tunl0     Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
> >            NOARP  MTU:1480  Metric:1
> >            RX packets:0 errors:0 dropped:0 overruns:0 frame:0
> >            TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
> >            collisions:0 txqueuelen:0
> >            RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
> >
> >   # ps
> >    PID USER       VSZ STAT COMMAND
> >      1 root     12412 S    /usr/lib64/lxc/lxc-init -- /var/bin/init
> >      2 root      4540 S    /bin/ash /var/bin/init
> >      7 root      6640 S    inetd
> >      8 root      4544 S    /bin/ash
> >     26 root      4544 R    ps
> >   # lsmod
> > Module                  Size  Used by    Not tainted
> > macvlan                 8709  0
> > pcnet32                29549  0
> > tg3                   112093  0
> > libphy                 21043  1 tg3
> >   # kill 7 2
> >   # ps
> >    PID USER       VSZ STAT COMMAND
> >      1 root     12412 S    /usr/lib64/lxc/lxc-init -- /var/bin/init
> >      8 root      4544 S    /bin/ash
> >     28 root      4544 R    ps
> >   # exit  ( here is the exit from netns  )
> >   # ipip_exit_net(enter)
> > ipip_exit_net(1)
> > ipip_exit_net(2)
> > ------------[ cut here ]------------
> > WARNING: at /home/hans/evip/kvm/net-next-2.6/kernel/sysctl.c:1953 unregister_sysctl_table+0xc7/0xf9()
> > Hardware name: Bochs
> > Modules linked in: macvlan pcnet32 tg3 libphy
> > Pid: 5, comm: kworker/u:0 Not tainted 2.6.36-rc3+ #7
> > Call Trace:
> >   [<ffffffff8103e281>] warn_slowpath_common+0x85/0x9d
> >   [<ffffffff8103e2b3>] warn_slowpath_null+0x1a/0x1c
> >   [<ffffffff81045e64>] unregister_sysctl_table+0xc7/0xf9
> >   [<ffffffff812c86a5>] neigh_sysctl_unregister+0x27/0x3f
> >   [<ffffffff81342108>] addrconf_ifdown+0x415/0x45e
> >   [<ffffffff81342b98>] addrconf_notify+0x756/0x7fe
> >   [<ffffffff812cacfb>] ? neigh_ifdown+0xc3/0xd4
> >   [<ffffffff813622b3>] ? ip6mr_device_event+0x8d/0x9e
> >   [<ffffffff8105eddb>] notifier_call_chain+0x37/0x63
> >   [<ffffffff8105ee8b>] raw_notifier_call_chain+0x14/0x16
> >   [<ffffffff812c15c7>] call_netdevice_notifiers+0x4a/0x4f
> >   [<ffffffff812c1c1b>] rollback_registered_many+0x121/0x208
> >   [<ffffffff812c1d1d>] unregister_netdevice_many+0x1b/0x71
> >   [<ffffffff81324209>] ipip_exit_net+0xea/0x11a
> >   [<ffffffff812bc941>] ? cleanup_net+0x0/0x198
> >   [<ffffffff812bc2cf>] ops_exit_list+0x2a/0x5b
> >   [<ffffffff812bca39>] cleanup_net+0xf8/0x198
> >   [<ffffffff810568c7>] process_one_work+0x2a2/0x44d
> >   [<ffffffff81056e35>] worker_thread+0x1db/0x34e
> >   [<ffffffff81056c5a>] ? worker_thread+0x0/0x34e
> >   [<ffffffff8105a030>] kthread+0x82/0x8a
> >   [<ffffffff81003954>] kernel_thread_helper+0x4/0x10
> >   [<ffffffff81059fae>] ? kthread+0x0/0x8a
> >   [<ffffffff81003950>] ? kernel_thread_helper+0x0/0x10
> > ---[ end trace 939b5185219f32e7 ]---
> > ipip_exit_net(3)
> > ipip_exit_net(exit)
> > unregister_netdevice: waiting for lo to become free. Usage count = 4
> > unregister_netdevice: waiting for lo to become free. Usage count = 4
> > unregister_netdevice: waiting for lo to become free. Usage count = 4
> > ....
> > ...
> > ===== End of screen dump =====
> >
> > lxc conf file:
> > # Container with network virtualized using the vlan device driver
> > # Local eth0 uplink
> > lxc.utsname = fee_0
> > lxc.network.type = macvlan
> > lxc.network.flags = up
> > lxc.network.link = eth1
> > lxc.network.hwaddr = 00:00:04:01:01:01
> > lxc.network.ipv4 = 192.168.1.21/24
> > lxc.network.ipv6 = 2003::2:1:1/96
> > # local eth1 downlink - to the RS farm
> > lxc.network.type = macvlan
> > lxc.network.flags = up
> > lxc.network.link = eth0
> > lxc.network.hwaddr = 00:00:03:01:01:01
> > lxc.network.ipv4 = 192.168.0.21/24
> > lxc.network.ipv6 = 2003::1:1:1/96
> > lxc.mount.entry = /var/lib/lxc/fee_0/var /var none rw,bind 0 0
> >
> 

-- 
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox