Netdev List
 help / color / mirror / Atom feed
* [PATCH NEXT 5/8] qlcnic: use IDC defined timeout value
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, Sucheta Chakraborty
In-Reply-To: <1270037026-9062-1-git-send-email-amit.salecha@qlogic.com>

From: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>

o USE/Read IDC defined timeout value from ROM.
o While resetting chip, don't wait for other pci-func to respond,
  more than reset_ack_timeo seconds,

Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h      |    4 +++-
 drivers/net/qlcnic/qlcnic_hdr.h  |    2 ++
 drivers/net/qlcnic/qlcnic_init.c |   16 ++++++++++++++++
 drivers/net/qlcnic/qlcnic_main.c |   26 ++++++++++++++++----------
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 8a3446d..87cd1a7 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -958,8 +958,9 @@ struct qlcnic_adapter {
 	u8 dev_state;
 	u8 diag_test;
 	u8 diag_cnt;
+	u8 reset_ack_timeo;
+	u8 dev_init_timeo;
 	u8 rsrd1;
-	u16 rsrd2;
 
 	u8 mac_addr[ETH_ALEN];
 
@@ -1040,6 +1041,7 @@ int qlcnic_need_fw_reset(struct qlcnic_adapter *adapter);
 void qlcnic_request_firmware(struct qlcnic_adapter *adapter);
 void qlcnic_release_firmware(struct qlcnic_adapter *adapter);
 int qlcnic_pinit_from_rom(struct qlcnic_adapter *adapter);
+void qlcnic_setup_idc_param(struct qlcnic_adapter *adapter);
 
 int qlcnic_rom_fast_read(struct qlcnic_adapter *adapter, int addr, int *valp);
 int qlcnic_rom_fast_read_words(struct qlcnic_adapter *adapter, int addr,
diff --git a/drivers/net/qlcnic/qlcnic_hdr.h b/drivers/net/qlcnic/qlcnic_hdr.h
index e9fb692..51fa3fb 100644
--- a/drivers/net/qlcnic/qlcnic_hdr.h
+++ b/drivers/net/qlcnic/qlcnic_hdr.h
@@ -695,6 +695,8 @@ enum {
 #define QLCNIC_CRB_DRV_SCRATCH             (QLCNIC_CAM_RAM(0x148))
 #define QLCNIC_CRB_DEV_PARTITION_INFO      (QLCNIC_CAM_RAM(0x14c))
 #define QLCNIC_CRB_DRV_IDC_VER             (QLCNIC_CAM_RAM(0x14c))
+#define QLCNIC_ROM_DEV_INIT_TIMEOUT	(0x3e885c)
+#define QLCNIC_ROM_DRV_RESET_TIMEOUT	(0x3e8860)
 
 		 /* Device State */
 #define QLCNIC_DEV_COLD 		1
diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c
index 0a424e0..ccd24f4 100644
--- a/drivers/net/qlcnic/qlcnic_init.c
+++ b/drivers/net/qlcnic/qlcnic_init.c
@@ -529,6 +529,22 @@ int qlcnic_pinit_from_rom(struct qlcnic_adapter *adapter)
 	return 0;
 }
 
+void
+qlcnic_setup_idc_param(struct qlcnic_adapter *adapter) {
+
+	int timeo;
+
+	if (qlcnic_rom_fast_read(adapter, QLCNIC_ROM_DEV_INIT_TIMEOUT, &timeo))
+		timeo = 30;
+
+	adapter->dev_init_timeo = timeo;
+
+	if (qlcnic_rom_fast_read(adapter, QLCNIC_ROM_DRV_RESET_TIMEOUT, &timeo))
+		timeo = 10;
+
+	adapter->reset_ack_timeo = timeo;
+}
+
 static int
 qlcnic_has_mn(struct qlcnic_adapter *adapter)
 {
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index a234622..38e0829 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -649,7 +649,10 @@ qlcnic_start_firmware(struct qlcnic_adapter *adapter)
 	if (err)
 		return err;
 
-	if (!qlcnic_can_start_firmware(adapter))
+	err = qlcnic_can_start_firmware(adapter);
+	if (err < 0)
+		return err;
+	else if (!err)
 		goto wait_init;
 
 	first_boot = QLCRD32(adapter, QLCNIC_CAM_RAM(0x1fc));
@@ -1138,6 +1141,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_out_iounmap;
 	}
 
+	qlcnic_setup_idc_param(adapter);
 
 	err = qlcnic_start_firmware(adapter);
 	if (err)
@@ -2027,7 +2031,7 @@ static int
 qlcnic_can_start_firmware(struct qlcnic_adapter *adapter)
 {
 	u32 val, prev_state;
-	int cnt = 0;
+	u8 dev_init_timeo = adapter->dev_init_timeo;
 	int portnum = adapter->portnum;
 
 	if (qlcnic_api_lock(adapter))
@@ -2072,12 +2076,13 @@ start_fw:
 	}
 
 	qlcnic_api_unlock(adapter);
-	msleep(1000);
-	while ((QLCRD32(adapter, QLCNIC_CRB_DEV_STATE) != QLCNIC_DEV_READY) &&
-			++cnt < 20)
+
+	do {
 		msleep(1000);
+	} while ((QLCRD32(adapter, QLCNIC_CRB_DEV_STATE) != QLCNIC_DEV_READY)
+			&& --dev_init_timeo);
 
-	if (cnt >= 20)
+	if (!dev_init_timeo)
 		return -1;
 
 	if (qlcnic_api_lock(adapter))
@@ -2099,12 +2104,10 @@ qlcnic_fwinit_work(struct work_struct *work)
 			struct qlcnic_adapter, fw_work.work);
 	int dev_state;
 
-	if (++adapter->fw_wait_cnt > FW_POLL_THRESH)
-		goto err_ret;
-
 	if (test_bit(__QLCNIC_START_FW, &adapter->state)) {
 
-		if (qlcnic_check_drv_state(adapter)) {
+		if (qlcnic_check_drv_state(adapter) &&
+			(adapter->fw_wait_cnt++ < adapter->reset_ack_timeo)) {
 			qlcnic_schedule_work(adapter,
 					qlcnic_fwinit_work, FW_POLL_DELAY);
 			return;
@@ -2118,6 +2121,9 @@ qlcnic_fwinit_work(struct work_struct *work)
 		goto err_ret;
 	}
 
+	if (adapter->fw_wait_cnt++ > (adapter->dev_init_timeo / 2))
+		goto err_ret;
+
 	dev_state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
 	switch (dev_state) {
 	case QLCNIC_DEV_READY:
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 6/8] qlcnic: add driver debug support
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman
In-Reply-To: <1270037026-9062-1-git-send-email-amit.salecha@qlogic.com>

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h         |    9 ++++++++-
 drivers/net/qlcnic/qlcnic_ethtool.c |   16 ++++++++++++++++
 drivers/net/qlcnic/qlcnic_hw.c      |    6 +++++-
 drivers/net/qlcnic/qlcnic_main.c    |   23 ++++++++++++++++++++---
 4 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 87cd1a7..dbf6335 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -960,7 +960,7 @@ struct qlcnic_adapter {
 	u8 diag_cnt;
 	u8 reset_ack_timeo;
 	u8 dev_init_timeo;
-	u8 rsrd1;
+	u8 msglvl;
 
 	u8 mac_addr[ETH_ALEN];
 
@@ -1135,4 +1135,11 @@ static inline u32 qlcnic_tx_avail(struct qlcnic_host_tx_ring *tx_ring)
 
 extern const struct ethtool_ops qlcnic_ethtool_ops;
 
+#define QLCDB(adapter, _fmt, _args...) do { \
+	if (adapter->msglvl) \
+		printk(KERN_INFO "%s: %s: " _fmt,		\
+			dev_name(&adapter->pdev->dev),\
+			__func__, ##_args);		\
+	} while (0)
+
 #endif				/* __QLCNIC_H_ */
diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
index f83e15f..1cf3e59 100644
--- a/drivers/net/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/qlcnic/qlcnic_ethtool.c
@@ -998,6 +998,20 @@ static int qlcnic_set_flags(struct net_device *netdev, u32 data)
 	return 0;
 }
 
+static u32 qlcnic_get_msglevel(struct net_device *netdev)
+{
+	struct qlcnic_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msglvl;
+}
+
+static void qlcnic_set_msglevel(struct net_device *netdev, u32 msglvl)
+{
+	struct qlcnic_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msglvl = !!msglvl;
+}
+
 const struct ethtool_ops qlcnic_ethtool_ops = {
 	.get_settings = qlcnic_get_settings,
 	.set_settings = qlcnic_set_settings,
@@ -1029,4 +1043,6 @@ const struct ethtool_ops qlcnic_ethtool_ops = {
 	.get_flags = ethtool_op_get_flags,
 	.set_flags = qlcnic_set_flags,
 	.phys_id = qlcnic_blink_led,
+	.set_msglevel = qlcnic_set_msglevel,
+	.get_msglevel = qlcnic_get_msglevel,
 };
diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index 0933c2d..14c999a 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -294,8 +294,12 @@ qlcnic_pcie_sem_lock(struct qlcnic_adapter *adapter, int sem, u32 id_reg)
 		done = QLCRD32(adapter, QLCNIC_PCIE_REG(PCIE_SEM_LOCK(sem)));
 		if (done == 1)
 			break;
-		if (++timeout >= QLCNIC_PCIE_SEM_TIMEOUT)
+		if (++timeout >= QLCNIC_PCIE_SEM_TIMEOUT) {
+			dev_err(&adapter->pdev->dev,
+				"Failed to acquire sem=%d lock;reg_id=%d\n",
+				sem, id_reg);
 			return -EIO;
+		}
 		msleep(1);
 	}
 
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 38e0829..87511f1 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -1742,6 +1742,7 @@ static void qlcnic_tx_timeout_task(struct work_struct *work)
 request_reset:
 	adapter->need_fw_reset = 1;
 	clear_bit(__QLCNIC_RESETTING, &adapter->state);
+	QLCDB(adapter, "Resetting adapter\n");
 }
 
 static struct net_device_stats *qlcnic_get_stats(struct net_device *netdev)
@@ -2046,6 +2047,7 @@ qlcnic_can_start_firmware(struct qlcnic_adapter *adapter)
 	}
 
 	prev_state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
+	QLCDB(adapter, "Device state = %u\n", prev_state);
 
 	switch (prev_state) {
 	case QLCNIC_DEV_COLD:
@@ -2082,8 +2084,11 @@ start_fw:
 	} while ((QLCRD32(adapter, QLCNIC_CRB_DEV_STATE) != QLCNIC_DEV_READY)
 			&& --dev_init_timeo);
 
-	if (!dev_init_timeo)
+	if (!dev_init_timeo) {
+		dev_err(&adapter->pdev->dev,
+			"Waiting for device to initialize timeout\n");
 		return -1;
+	}
 
 	if (qlcnic_api_lock(adapter))
 		return -1;
@@ -2113,6 +2118,7 @@ qlcnic_fwinit_work(struct work_struct *work)
 			return;
 		}
 
+		QLCDB(adapter, "Resetting FW\n");
 		if (!qlcnic_start_firmware(adapter)) {
 			qlcnic_schedule_work(adapter, qlcnic_attach_work, 0);
 			return;
@@ -2121,10 +2127,15 @@ qlcnic_fwinit_work(struct work_struct *work)
 		goto err_ret;
 	}
 
-	if (adapter->fw_wait_cnt++ > (adapter->dev_init_timeo / 2))
+	if (adapter->fw_wait_cnt++ > (adapter->dev_init_timeo / 2)) {
+		dev_err(&adapter->pdev->dev,
+				"Waiting for device to reset timeout\n");
 		goto err_ret;
+	}
 
 	dev_state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
+	QLCDB(adapter, "Func waiting: Device state=%d\n", dev_state);
+
 	switch (dev_state) {
 	case QLCNIC_DEV_READY:
 		if (!qlcnic_start_firmware(adapter)) {
@@ -2177,6 +2188,8 @@ qlcnic_detach_work(struct work_struct *work)
 	return;
 
 err_ret:
+	dev_err(&adapter->pdev->dev, "detach failed; status=%d temp=%d\n",
+			status, adapter->temp);
 	qlcnic_clr_all_drv_state(adapter);
 
 }
@@ -2194,6 +2207,7 @@ qlcnic_dev_request_reset(struct qlcnic_adapter *adapter)
 	if (state != QLCNIC_DEV_INITALIZING && state != QLCNIC_DEV_NEED_RESET) {
 		QLCWR32(adapter, QLCNIC_CRB_DEV_STATE, QLCNIC_DEV_NEED_RESET);
 		set_bit(__QLCNIC_START_FW, &adapter->state);
+		QLCDB(adapter, "NEED_RESET state set\n");
 	}
 
 	qlcnic_api_unlock(adapter);
@@ -2290,8 +2304,11 @@ detach:
 		QLCNIC_DEV_NEED_RESET;
 
 	if ((auto_fw_reset == AUTO_FW_RESET_ENABLED) &&
-			!test_and_set_bit(__QLCNIC_RESETTING, &adapter->state))
+		!test_and_set_bit(__QLCNIC_RESETTING, &adapter->state)) {
+
 		qlcnic_schedule_work(adapter, qlcnic_detach_work, 0);
+		QLCDB(adapter, "fw recovery scheduled.\n");
+	}
 
 	return 1;
 }
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 8/8] qlcnic: update version to 5.0.1
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman
In-Reply-To: <1270037026-9062-1-git-send-email-amit.salecha@qlogic.com>

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index dbf6335..b726db2 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -51,8 +51,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 0
-#define _QLCNIC_LINUX_SUBVERSION 0
-#define QLCNIC_LINUX_VERSIONID  "5.0.0"
+#define _QLCNIC_LINUX_SUBVERSION 1
+#define QLCNIC_LINUX_VERSIONID  "5.0.1"
 
 #define QLCNIC_VERSION_CODE(a, b, c)	(((a) << 24) + ((b) << 16) + (c))
 #define _major(v)	(((v) >> 24) & 0xff)
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 7/8] qlcnic: fix interface attach sequence
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman
In-Reply-To: <1270037026-9062-1-git-send-email-amit.salecha@qlogic.com>

Interface should be visible even if resource allocation fails.
netif_device_attach should be called for every netif_device_detach.

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic_main.c |   20 ++++++++++----------
 1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 87511f1..6faabf4 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -952,11 +952,11 @@ void qlcnic_diag_free_res(struct net_device *netdev, int max_sds_rings)
 	adapter->max_sds_rings = max_sds_rings;
 
 	if (qlcnic_attach(adapter))
-		return;
+		goto out;
 
 	if (netif_running(netdev))
 		__qlcnic_up(adapter, netdev);
-
+out:
 	netif_device_attach(netdev);
 }
 
@@ -978,8 +978,10 @@ int qlcnic_diag_alloc_res(struct net_device *netdev, int test)
 	adapter->diag_test = test;
 
 	ret = qlcnic_attach(adapter);
-	if (ret)
+	if (ret) {
+		netif_device_attach(netdev);
 		return ret;
+	}
 
 	if (adapter->diag_test == QLCNIC_INTERRUPT_TEST) {
 		for (ring = 0; ring < adapter->max_sds_rings; ring++) {
@@ -1012,16 +1014,12 @@ qlcnic_reset_context(struct qlcnic_adapter *adapter)
 		if (netif_running(netdev)) {
 			err = qlcnic_attach(adapter);
 			if (!err)
-				err = __qlcnic_up(adapter, netdev);
-
-			if (err)
-				goto done;
+				__qlcnic_up(adapter, netdev);
 		}
 
 		netif_device_attach(netdev);
 	}
 
-done:
 	clear_bit(__QLCNIC_RESETTING, &adapter->state);
 	return err;
 }
@@ -1337,6 +1335,7 @@ err_out_detach:
 	qlcnic_detach(adapter);
 err_out:
 	qlcnic_clr_all_drv_state(adapter);
+	netif_device_attach(netdev);
 	return err;
 }
 #endif
@@ -2152,6 +2151,7 @@ qlcnic_fwinit_work(struct work_struct *work)
 	}
 
 err_ret:
+	netif_device_attach(adapter->netdev);
 	qlcnic_clr_all_drv_state(adapter);
 }
 
@@ -2190,6 +2190,7 @@ qlcnic_detach_work(struct work_struct *work)
 err_ret:
 	dev_err(&adapter->pdev->dev, "detach failed; status=%d temp=%d\n",
 			status, adapter->temp);
+	netif_device_attach(netdev);
 	qlcnic_clr_all_drv_state(adapter);
 
 }
@@ -2252,9 +2253,8 @@ qlcnic_attach_work(struct work_struct *work)
 		qlcnic_config_indev_addr(netdev, NETDEV_UP);
 	}
 
-	netif_device_attach(netdev);
-
 done:
+	netif_device_attach(netdev);
 	adapter->fw_fail_cnt = 0;
 	clear_bit(__QLCNIC_RESETTING, &adapter->state);
 
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 2/8] qlcnic: handle queue manager access
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, Dhananjay Phadke
In-Reply-To: <1270037026-9062-1-git-send-email-amit.salecha@qlogic.com>

From: Dhananjay Phadke <dhananjay.phadke@qlogic.com>

Check the access by tools for hardware queue engine and handle it
separately than other block registers, otherwise incorrect data
is returned.

Signed-off-by: Dhananjay Phadke <dhananjay.phadke@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic.h      |    5 +++++
 drivers/net/qlcnic/qlcnic_hdr.h  |    3 ++-
 drivers/net/qlcnic/qlcnic_hw.c   |   25 ++++++++++++++++++++++---
 drivers/net/qlcnic/qlcnic_main.c |   35 +++++++++++++++++++++++++++--------
 4 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 0da94b2..8a3446d 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -994,6 +994,11 @@ u32 qlcnic_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong off);
 int qlcnic_hw_write_wx_2M(struct qlcnic_adapter *, ulong off, u32 data);
 int qlcnic_pci_mem_write_2M(struct qlcnic_adapter *, u64 off, u64 data);
 int qlcnic_pci_mem_read_2M(struct qlcnic_adapter *, u64 off, u64 *data);
+void qlcnic_pci_camqm_read_2M(struct qlcnic_adapter *, u64, u64 *);
+void qlcnic_pci_camqm_write_2M(struct qlcnic_adapter *, u64, u64);
+
+#define ADDR_IN_RANGE(addr, low, high)	\
+	(((addr) < (high)) && ((addr) >= (low)))
 
 #define QLCRD32(adapter, off) \
 	(qlcnic_hw_read_wx_2M(adapter, off))
diff --git a/drivers/net/qlcnic/qlcnic_hdr.h b/drivers/net/qlcnic/qlcnic_hdr.h
index 0469f84..25465a9 100644
--- a/drivers/net/qlcnic/qlcnic_hdr.h
+++ b/drivers/net/qlcnic/qlcnic_hdr.h
@@ -435,9 +435,10 @@ enum {
 #define QLCNIC_PCI_MS_2M	(0x80000)
 #define QLCNIC_PCI_OCM0_2M	(0x000c0000UL)
 #define QLCNIC_PCI_CRBSPACE	(0x06000000UL)
+#define QLCNIC_PCI_CAMQM	(0x04800000UL)
+#define QLCNIC_PCI_CAMQM_END	(0x04800800UL)
 #define QLCNIC_PCI_2MB_SIZE	(0x00200000UL)
 #define QLCNIC_PCI_CAMQM_2M_BASE	(0x000ff800UL)
-#define QLCNIC_PCI_CAMQM_2M_END 	(0x04800800UL)
 
 #define QLCNIC_CRB_CAM	QLCNIC_PCI_CRB_WINDOW(QLCNIC_HW_PX_MAP_CRB_CAM)
 
diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index da00e16..b977874 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -53,9 +53,6 @@ static inline void writeq(u64 val, void __iomem *addr)
 }
 #endif
 
-#define ADDR_IN_RANGE(addr, low, high)	\
-	(((addr) < (high)) && ((addr) >= (low)))
-
 #define PCI_OFFSET_FIRST_RANGE(adapter, off)    \
 	((adapter)->ahw.pci_base0 + (off))
 
@@ -936,6 +933,28 @@ unlock:
 	return ret;
 }
 
+void
+qlcnic_pci_camqm_read_2M(struct qlcnic_adapter *adapter, u64 off, u64 *data)
+{
+	void __iomem *addr = adapter->ahw.pci_base0 +
+		QLCNIC_PCI_CAMQM_2M_BASE + (off - QLCNIC_PCI_CAMQM);
+
+	mutex_lock(&adapter->ahw.mem_lock);
+	*data = readq(addr);
+	mutex_unlock(&adapter->ahw.mem_lock);
+}
+
+void
+qlcnic_pci_camqm_write_2M(struct qlcnic_adapter *adapter, u64 off, u64 data)
+{
+	void __iomem *addr = adapter->ahw.pci_base0 +
+		QLCNIC_PCI_CAMQM_2M_BASE + (off - QLCNIC_PCI_CAMQM);
+
+	mutex_lock(&adapter->ahw.mem_lock);
+	writeq(data, addr);
+	mutex_unlock(&adapter->ahw.mem_lock);
+}
+
 #define MAX_CTL_CHECK   1000
 
 int
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index fc72156..a234622 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -2386,14 +2386,21 @@ static int
 qlcnic_sysfs_validate_crb(struct qlcnic_adapter *adapter,
 		loff_t offset, size_t size)
 {
+	size_t crb_size = 4;
+
 	if (!(adapter->flags & QLCNIC_DIAG_ENABLED))
 		return -EIO;
 
-	if ((size != 4) || (offset & 0x3))
-		return  -EINVAL;
+	if (offset < QLCNIC_PCI_CRBSPACE) {
+		if (ADDR_IN_RANGE(offset, QLCNIC_PCI_CAMQM,
+					QLCNIC_PCI_CAMQM_END))
+			crb_size = 8;
+		else
+			return -EINVAL;
+	}
 
-	if (offset < QLCNIC_PCI_CRBSPACE)
-		return -EINVAL;
+	if ((size != crb_size) || (offset & (crb_size-1)))
+		return  -EINVAL;
 
 	return 0;
 }
@@ -2405,14 +2412,20 @@ qlcnic_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct qlcnic_adapter *adapter = dev_get_drvdata(dev);
 	u32 data;
+	u64 qmdata;
 	int ret;
 
 	ret = qlcnic_sysfs_validate_crb(adapter, offset, size);
 	if (ret != 0)
 		return ret;
 
-	data = QLCRD32(adapter, offset);
-	memcpy(buf, &data, size);
+	if (ADDR_IN_RANGE(offset, QLCNIC_PCI_CAMQM, QLCNIC_PCI_CAMQM_END)) {
+		qlcnic_pci_camqm_read_2M(adapter, offset, &qmdata);
+		memcpy(buf, &qmdata, size);
+	} else {
+		data = QLCRD32(adapter, offset);
+		memcpy(buf, &data, size);
+	}
 	return size;
 }
 
@@ -2423,14 +2436,20 @@ qlcnic_sysfs_write_crb(struct kobject *kobj, struct bin_attribute *attr,
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct qlcnic_adapter *adapter = dev_get_drvdata(dev);
 	u32 data;
+	u64 qmdata;
 	int ret;
 
 	ret = qlcnic_sysfs_validate_crb(adapter, offset, size);
 	if (ret != 0)
 		return ret;
 
-	memcpy(&data, buf, size);
-	QLCWR32(adapter, offset, data);
+	if (ADDR_IN_RANGE(offset, QLCNIC_PCI_CAMQM, QLCNIC_PCI_CAMQM_END)) {
+		memcpy(&qmdata, buf, size);
+		qlcnic_pci_camqm_write_2M(adapter, offset, qmdata);
+	} else {
+		memcpy(&data, buf, size);
+		QLCWR32(adapter, offset, data);
+	}
 	return size;
 }
 
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 0/8]qlcnic: fix diagnostic tools access
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman

Hi
  Series of 8 patches to fix diagnostic tools access and added debug
  messages in driver.
  Apply them in net-next branch.

-Amit

^ permalink raw reply

* [PATCH NEXT 3/8] qlcnic: update oncard memory size check
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, Dhananjay Phadke
In-Reply-To: <1270037026-9062-1-git-send-email-amit.salecha@qlogic.com>

From: Dhananjay Phadke <dhananjay.phadke@qlogic.com>

All QLogic converged NICs have 128-bit 128MB on card memory.
Fix the limit check from 64MB to 128MB and remove unnecessary
64-bit read/write checks.

Signed-off-by: Dhananjay Phadke <dhananjay.phadke@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic_hdr.h |    2 +-
 drivers/net/qlcnic/qlcnic_hw.c  |   55 +++++++++++++++++----------------------
 2 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic_hdr.h b/drivers/net/qlcnic/qlcnic_hdr.h
index 25465a9..e9fb692 100644
--- a/drivers/net/qlcnic/qlcnic_hdr.h
+++ b/drivers/net/qlcnic/qlcnic_hdr.h
@@ -449,7 +449,7 @@ enum {
 #define QLCNIC_ADDR_OCM1	(0x0000000200400000ULL)
 #define QLCNIC_ADDR_OCM1_MAX	(0x00000002004fffffULL)
 #define QLCNIC_ADDR_QDR_NET	(0x0000000300000000ULL)
-#define QLCNIC_ADDR_QDR_NET_MAX_P3 (0x0000000303ffffffULL)
+#define QLCNIC_ADDR_QDR_NET_MAX (0x0000000307ffffffULL)
 
 /*
  *   Register offsets for MN
diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index b977874..419f46e 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -963,7 +963,6 @@ qlcnic_pci_mem_write_2M(struct qlcnic_adapter *adapter,
 {
 	int i, j, ret;
 	u32 temp, off8;
-	u64 stride;
 	void __iomem *mem_crb;
 
 	/* Only 64-bit aligned access */
@@ -972,7 +971,7 @@ qlcnic_pci_mem_write_2M(struct qlcnic_adapter *adapter,
 
 	/* P3 onward, test agent base for MIU and SIU is same */
 	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_QDR_NET,
-				QLCNIC_ADDR_QDR_NET_MAX_P3)) {
+				QLCNIC_ADDR_QDR_NET_MAX)) {
 		mem_crb = qlcnic_get_ioaddr(adapter,
 				QLCNIC_CRB_QDR_NET+MIU_TEST_AGT_BASE);
 		goto correct;
@@ -990,9 +989,7 @@ qlcnic_pci_mem_write_2M(struct qlcnic_adapter *adapter,
 	return -EIO;
 
 correct:
-	stride = QLCNIC_IS_REVISION_P3P(adapter->ahw.revision_id) ? 16 : 8;
-
-	off8 = off & ~(stride-1);
+	off8 = off & ~0xf;
 
 	mutex_lock(&adapter->ahw.mem_lock);
 
@@ -1000,30 +997,28 @@ correct:
 	writel(0, (mem_crb + MIU_TEST_AGT_ADDR_HI));
 
 	i = 0;
-	if (stride == 16) {
-		writel(TA_CTL_ENABLE, (mem_crb + TEST_AGT_CTRL));
-		writel((TA_CTL_START | TA_CTL_ENABLE),
-				(mem_crb + TEST_AGT_CTRL));
-
-		for (j = 0; j < MAX_CTL_CHECK; j++) {
-			temp = readl(mem_crb + TEST_AGT_CTRL);
-			if ((temp & TA_CTL_BUSY) == 0)
-				break;
-		}
+	writel(TA_CTL_ENABLE, (mem_crb + TEST_AGT_CTRL));
+	writel((TA_CTL_START | TA_CTL_ENABLE),
+			(mem_crb + TEST_AGT_CTRL));
 
-		if (j >= MAX_CTL_CHECK) {
-			ret = -EIO;
-			goto done;
-		}
+	for (j = 0; j < MAX_CTL_CHECK; j++) {
+		temp = readl(mem_crb + TEST_AGT_CTRL);
+		if ((temp & TA_CTL_BUSY) == 0)
+			break;
+	}
 
-		i = (off & 0xf) ? 0 : 2;
-		writel(readl(mem_crb + MIU_TEST_AGT_RDDATA(i)),
-				mem_crb + MIU_TEST_AGT_WRDATA(i));
-		writel(readl(mem_crb + MIU_TEST_AGT_RDDATA(i+1)),
-				mem_crb + MIU_TEST_AGT_WRDATA(i+1));
-		i = (off & 0xf) ? 2 : 0;
+	if (j >= MAX_CTL_CHECK) {
+		ret = -EIO;
+		goto done;
 	}
 
+	i = (off & 0xf) ? 0 : 2;
+	writel(readl(mem_crb + MIU_TEST_AGT_RDDATA(i)),
+			mem_crb + MIU_TEST_AGT_WRDATA(i));
+	writel(readl(mem_crb + MIU_TEST_AGT_RDDATA(i+1)),
+			mem_crb + MIU_TEST_AGT_WRDATA(i+1));
+	i = (off & 0xf) ? 2 : 0;
+
 	writel(data & 0xffffffff,
 			mem_crb + MIU_TEST_AGT_WRDATA(i));
 	writel((data >> 32) & 0xffffffff,
@@ -1059,7 +1054,7 @@ qlcnic_pci_mem_read_2M(struct qlcnic_adapter *adapter,
 {
 	int j, ret;
 	u32 temp, off8;
-	u64 val, stride;
+	u64 val;
 	void __iomem *mem_crb;
 
 	/* Only 64-bit aligned access */
@@ -1068,7 +1063,7 @@ qlcnic_pci_mem_read_2M(struct qlcnic_adapter *adapter,
 
 	/* P3 onward, test agent base for MIU and SIU is same */
 	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_QDR_NET,
-				QLCNIC_ADDR_QDR_NET_MAX_P3)) {
+				QLCNIC_ADDR_QDR_NET_MAX)) {
 		mem_crb = qlcnic_get_ioaddr(adapter,
 				QLCNIC_CRB_QDR_NET+MIU_TEST_AGT_BASE);
 		goto correct;
@@ -1088,9 +1083,7 @@ qlcnic_pci_mem_read_2M(struct qlcnic_adapter *adapter,
 	return -EIO;
 
 correct:
-	stride = QLCNIC_IS_REVISION_P3P(adapter->ahw.revision_id) ? 16 : 8;
-
-	off8 = off & ~(stride-1);
+	off8 = off & ~0xf;
 
 	mutex_lock(&adapter->ahw.mem_lock);
 
@@ -1112,7 +1105,7 @@ correct:
 		ret = -EIO;
 	} else {
 		off8 = MIU_TEST_AGT_RDDATA_LO;
-		if ((stride == 16) && (off & 0xf))
+		if (off & 0xf)
 			off8 = MIU_TEST_AGT_RDDATA_UPPER_LO;
 
 		temp = readl(mem_crb + off8 + 4);
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 1/8] qlcnic: fix fw load from file
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman
In-Reply-To: <1270037026-9062-1-git-send-email-amit.salecha@qlogic.com>

Rarely: Fw file size can be unaligned to 8.

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic_init.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c
index 7c34e4e..0a424e0 100644
--- a/drivers/net/qlcnic/qlcnic_init.c
+++ b/drivers/net/qlcnic/qlcnic_init.c
@@ -949,6 +949,16 @@ qlcnic_load_firmware(struct qlcnic_adapter *adapter)
 
 			flashaddr += 8;
 		}
+
+		size = (__force u32)qlcnic_get_fw_size(adapter) % 8;
+		if (size) {
+			data = cpu_to_le64(ptr64[i]);
+
+			if (qlcnic_pci_mem_write_2M(adapter,
+						flashaddr, data))
+				return -EIO;
+		}
+
 	} else {
 		u64 data;
 		u32 hi, lo;
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH NEXT 4/8] qlcnic: fix onchip memory access
From: Amit Kumar Salecha @ 2010-03-31 12:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, Dhananjay Phadke
In-Reply-To: <1270037026-9062-1-git-send-email-amit.salecha@qlogic.com>

From: Dhananjay Phadke <dhananjay.phadke@qlogic.com>

Fix incorrect offset calculation and remove unnecessary remap
of the region in bar 0 to access onchip memory.

This was leading to read incorrect values by debug tools.

Signed-off-by: Dhananjay Phadke <dhananjay.phadke@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
 drivers/net/qlcnic/qlcnic_hw.c |   39 ++-------------------------------------
 1 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index 419f46e..0933c2d 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -53,18 +53,6 @@ static inline void writeq(u64 val, void __iomem *addr)
 }
 #endif
 
-#define PCI_OFFSET_FIRST_RANGE(adapter, off)    \
-	((adapter)->ahw.pci_base0 + (off))
-
-static void __iomem *pci_base_offset(struct qlcnic_adapter *adapter,
-					    unsigned long off)
-{
-	if (ADDR_IN_RANGE(off, FIRST_PAGE_GROUP_START, FIRST_PAGE_GROUP_END))
-		return PCI_OFFSET_FIRST_RANGE(adapter, off);
-
-	return NULL;
-}
-
 static const struct crb_128M_2M_block_map
 crb_128M_2M_map[64] __cacheline_aligned_in_smp = {
     {{{0, 0,         0,         0} } },		/* 0: PCI */
@@ -871,13 +859,6 @@ qlcnic_pci_set_window_2M(struct qlcnic_adapter *adapter,
 		u64 addr, u32 *start)
 {
 	u32 window;
-	struct pci_dev *pdev = adapter->pdev;
-
-	if ((addr & 0x00ff800) == 0xff800) {
-		if (printk_ratelimit())
-			dev_warn(&pdev->dev, "QM access not handled\n");
-		return -EIO;
-	}
 
 	window = OCM_WIN_P3P(addr);
 
@@ -894,8 +875,7 @@ static int
 qlcnic_pci_mem_access_direct(struct qlcnic_adapter *adapter, u64 off,
 		u64 *data, int op)
 {
-	void __iomem *addr, *mem_ptr = NULL;
-	resource_size_t mem_base;
+	void __iomem *addr;
 	int ret;
 	u32 start;
 
@@ -905,21 +885,8 @@ qlcnic_pci_mem_access_direct(struct qlcnic_adapter *adapter, u64 off,
 	if (ret != 0)
 		goto unlock;
 
-	addr = pci_base_offset(adapter, start);
-	if (addr)
-		goto noremap;
-
-	mem_base = pci_resource_start(adapter->pdev, 0) + (start & PAGE_MASK);
-
-	mem_ptr = ioremap(mem_base, PAGE_SIZE);
-	if (mem_ptr == NULL) {
-		ret = -EIO;
-		goto unlock;
-	}
-
-	addr = mem_ptr + (start & (PAGE_SIZE - 1));
+	addr = adapter->ahw.pci_base0 + start;
 
-noremap:
 	if (op == 0)	/* read */
 		*data = readq(addr);
 	else		/* write */
@@ -928,8 +895,6 @@ noremap:
 unlock:
 	mutex_unlock(&adapter->ahw.mem_lock);
 
-	if (mem_ptr)
-		iounmap(mem_ptr);
 	return ret;
 }
 
-- 
1.6.0.2


^ permalink raw reply related

* [PATCH 3/3] be2net: fix bug in vlan rx path for big endian architecture
From: Ajit Khaparde @ 2010-03-31 12:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

vlan traffic on big endian architecture is broken.
Need to swap the vid before giving packet to stack.
This patch fixes it.

Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
---
 drivers/net/benet/be_main.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index b0faaa2..ec6ace8 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -807,7 +807,7 @@ static void be_rx_compl_process(struct be_adapter *adapter,
 			return;
 		}
 		vid = AMAP_GET_BITS(struct amap_eth_rx_compl, vlan_tag, rxcp);
-		vid = be16_to_cpu(vid);
+		vid = swab16(vid);
 		vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, vid);
 	} else {
 		netif_receive_skb(skb);
@@ -884,7 +884,7 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter,
 		napi_gro_frags(&eq_obj->napi);
 	} else {
 		vid = AMAP_GET_BITS(struct amap_eth_rx_compl, vlan_tag, rxcp);
-		vid = be16_to_cpu(vid);
+		vid = swab16(vid);
 
 		if (!adapter->vlan_grp || adapter->vlans_added == 0)
 			return;
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 2/3] be2net: fix flashing on big endian architectures
From: Ajit Khaparde @ 2010-03-31 11:57 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Flashing is broken on big endian architectures like ppc.
This patch fixes it.

From: Naresh G <nareshg@serverengines.com>
Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
---
 drivers/net/benet/be_cmds.c |    4 ++--
 drivers/net/benet/be_main.c |   15 +++++++--------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index 50e6259..d0ef4ac 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -1464,8 +1464,8 @@ int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc,
 
 	req->params.op_type = cpu_to_le32(IMG_TYPE_REDBOOT);
 	req->params.op_code = cpu_to_le32(FLASHROM_OPER_REPORT);
-	req->params.offset = offset;
-	req->params.data_buf_size = 0x4;
+	req->params.offset = cpu_to_le32(offset);
+	req->params.data_buf_size = cpu_to_le32(0x4);
 
 	status = be_mcc_notify_wait(adapter);
 	if (!status)
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index a08faf3..b0faaa2 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1991,7 +1991,7 @@ int be_load_fw(struct be_adapter *adapter, u8 *func)
 	struct flash_file_hdr_g3 *fhdr3;
 	struct image_hdr *img_hdr_ptr = NULL;
 	struct be_dma_mem flash_cmd;
-	int status, i = 0;
+	int status, i = 0, num_imgs = 0;
 	const u8 *p;
 
 	strcpy(fw_file, func);
@@ -2017,15 +2017,14 @@ int be_load_fw(struct be_adapter *adapter, u8 *func)
 	if ((adapter->generation == BE_GEN3) &&
 			(get_ufigen_type(fhdr) == BE_GEN3)) {
 		fhdr3 = (struct flash_file_hdr_g3 *) fw->data;
-		for (i = 0; i < fhdr3->num_imgs; i++) {
+		num_imgs = le32_to_cpu(fhdr3->num_imgs);
+		for (i = 0; i < num_imgs; i++) {
 			img_hdr_ptr = (struct image_hdr *) (fw->data +
 					(sizeof(struct flash_file_hdr_g3) +
-					i * sizeof(struct image_hdr)));
-			if (img_hdr_ptr->imageid == 1) {
-				status = be_flash_data(adapter, fw,
-						&flash_cmd, fhdr3->num_imgs);
-			}
-
+					 i * sizeof(struct image_hdr)));
+			if (le32_to_cpu(img_hdr_ptr->imageid) == 1)
+				status = be_flash_data(adapter, fw, &flash_cmd,
+							num_imgs);
 		}
 	} else if ((adapter->generation == BE_GEN2) &&
 			(get_ufigen_type(fhdr) == BE_GEN2)) {
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 1/3] be2net: fix a bug in flashing the redboot section
From: Ajit Khaparde @ 2010-03-31 11:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev


Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
---
 drivers/net/benet/be_main.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 43e8032..a08faf3 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1855,7 +1855,7 @@ static bool be_flash_redboot(struct be_adapter *adapter,
 	p += crc_offset;
 
 	status = be_cmd_get_flash_crc(adapter, flashed_crc,
-			(img_start + image_size - 4));
+			(image_size - 4));
 	if (status) {
 		dev_err(&adapter->pdev->dev,
 		"could not get crc from flash, not flashing redboot\n");
-- 
1.6.3.3


^ permalink raw reply related

* Undefined behaviour of connect(fd, NULL, 0);
From: Neil Brown @ 2010-03-31 11:36 UTC (permalink / raw)
  To: netdev


Hi Netdev.

We have a customer who was reporting strangely unpredictable behaviour of an
in-house application that used networking.

It called connect on a non-blocking socket and subsequently called
   connect(fd, NULL, 0)

to check if the connection had succeeded.
This would sometime "work" and sometimes close the connection.

Looking at the code (sys_connect, move_addr_to_kernel, inet_stream_connect),
it seems that in this case an uninitialised on-stack address is passed
to inet_stream_connect and it makes a decision based on ->sa_family (which is
uninitialised).

It seems clear that connect(fd, NULL, 0) is the wrong thing to do in this
circumstance, but I think it would be good if it failed consistently rather
than unpredictably.

Would it be appropriate for move_addr_to_kernel to zero out the remainder of
the address?
   memset(kaddr+ulen, 0, MAX_SOCK_ADDR-ulen);
??

Then connect(fd, NULL, 0) would always break the connection.

Thanks,
NeilBrown

^ permalink raw reply

* Re: [Patch] bonding: fix potential deadlock in bond_uninit()
From: Eric W. Biederman @ 2010-03-31 11:28 UTC (permalink / raw)
  To: Amerigo Wang
  Cc: linux-kernel, Jiri Pirko, Stephen Hemminger, netdev,
	David S. Miller, bonding-devel, Jay Vosburgh
In-Reply-To: <20100331105559.5607.38643.sendpatchset@localhost.localdomain>

Amerigo Wang <amwang@redhat.com> writes:

> bond_uninit() is invoked with rtnl_lock held, when it does destroy_workqueue()
> which will potentially flush all works in this workqueue, if we hold rtnl_lock
> again in the work function, it will deadlock.
>
> So unlock rtnl_lock before calling destroy_workqueue().

Ouch.  That seems rather rude to our caller, and likely very
dangerous.

Is this a deadlock you actually hit, or is this something lockdep
warned about?

My gut feel says we need to move the destroy_workqueue into
the network device destructor.

Eric



> Signed-off-by: WANG Cong <amwang@redhat.com>
> Cc: Jay Vosburgh <fubar@us.ibm.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Stephen Hemminger <shemminger@vyatta.com>
> Cc: Jiri Pirko <jpirko@redhat.com>
> Cc: "Eric W. Biederman" <ebiederm@xmission.com>
>
> ---
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 5b92fbf..b781728 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -4542,8 +4542,11 @@ static void bond_uninit(struct net_device *bond_dev)
>  
>  	bond_remove_proc_entry(bond);
>  
> -	if (bond->wq)
> +	if (bond->wq) {
> +		rtnl_unlock();
>  		destroy_workqueue(bond->wq);
> +		rtnl_lock();
> +	}
>  
>  	netif_addr_lock_bh(bond_dev);
>  	bond_mc_list_destroy(bond);

^ permalink raw reply

* Re: [PATCH 1/4] xfrm: increment genid before bumping state genids
From: Timo Teräs @ 2010-03-31 11:24 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, David S. Miller
In-Reply-To: <20100331111949.GD12845@gondor.apana.org.au>

Herbert Xu wrote:
> On Wed, Mar 31, 2010 at 02:01:03PM +0300, Timo Teräs wrote:
>> If any other xfrm_state_insert place needs to invalidate old
>> states it needs an additional bumping call. So the bumping function
>> is the right place to increment the genid.
> 
> Right.  In fact, this thing doesn't need to be incremented on every
> insert.  How about this patch?
> 
> xfrm: Remove xfrm_state_genid
> 
> The xfrm state genid only needs to be matched against the copy
> saved in xfrm_dst.  So we don't need a global genid at all.  In
> fact, we don't even need to initialise it.
> 
> Based on observation by Timo Teräs.

Ah, yes. This is indeed better fix.

Thanks.

^ permalink raw reply

* Re: [PATCH 4/4] flow: structurize flow cache
From: Herbert Xu @ 2010-03-31 11:21 UTC (permalink / raw)
  To: Timo Teras; +Cc: netdev
In-Reply-To: <1270030626-16687-6-git-send-email-timo.teras@iki.fi>

On Wed, Mar 31, 2010 at 01:17:06PM +0300, Timo Teras wrote:
> Group all per-cpu data to one structure instead of having many
> globals. Also prepare the internals so that we can have multiple
> instances of the flow cache if needed.
> 
> Only the kmem_cache is left as a global as all flow caches share
> the same element size, and benefit from using a common cache.
> 
> Signed-off-by: Timo Teras <timo.teras@iki.fi>

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH 1/4] xfrm: increment genid before bumping state genids
From: Herbert Xu @ 2010-03-31 11:19 UTC (permalink / raw)
  To: Timo Teräs; +Cc: netdev, David S. Miller
In-Reply-To: <4BB32B6F.7060403@iki.fi>

On Wed, Mar 31, 2010 at 02:01:03PM +0300, Timo Teräs wrote:
>
> If any other xfrm_state_insert place needs to invalidate old
> states it needs an additional bumping call. So the bumping function
> is the right place to increment the genid.

Right.  In fact, this thing doesn't need to be incremented on every
insert.  How about this patch?

xfrm: Remove xfrm_state_genid

The xfrm state genid only needs to be matched against the copy
saved in xfrm_dst.  So we don't need a global genid at all.  In
fact, we don't even need to initialise it.

Based on observation by Timo Teräs.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 17d5b96..71f8f33 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -37,7 +37,6 @@
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static unsigned int xfrm_state_genid;
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -923,8 +922,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 	struct net *net = xs_net(x);
 	unsigned int h;
 
-	x->genid = ++xfrm_state_genid;
-
 	list_add(&x->km.all, &net->xfrm.state_all);
 
 	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
@@ -970,7 +967,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 		    (mark & x->mark.m) == x->mark.v &&
 		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
 		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
-			x->genid = xfrm_state_genid;
+			x->genid++;
 	}
 }
 
Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply related

* Re: [r8169] WARNING: at net/sched/sch_generic.c
From: Neil Horman @ 2010-03-31 11:19 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: netdev, Francois Romieu, Eric Dumazet, David S. Miller,
	linux-kernel
In-Reply-To: <20100331102142.GA3294@swordfish.minsk.epam.com>

On Wed, Mar 31, 2010 at 01:21:42PM +0300, Sergey Senozhatsky wrote:
> Hello,
> I have the following problem:
> 
> [  296.337510] ------------[ cut here ]------------
> [  296.337523] WARNING: at net/sched/sch_generic.c:255 dev_watchdog+0xc1/0x125()
> [  296.337527] Hardware name: F3JC                
> [  296.337530] NETDEV WATCHDOG: eth0 (r8169): transmit queue 0 timed out
> [  296.337533] Modules linked in: pktgen ipv6 snd_hwdep snd_hda_codec_si3054 snd_hda_codec_realtek sdhci_pci sdhci asus_laptop sparse_keymap mmc_core led_class snd_hda_intel
> snd_hda_codec psmouse snd_pcm snd_timer snd soundcore snd_page_alloc serio_raw i2c_i801 rng_core evdev sg r8169 mii usbhid hid uhci_hcd ehci_hcd sr_mod cdrom sd_mod usbcore
> ata_piix
> [  296.337586] Pid: 0, comm: swapper Not tainted 2.6.34-rc3-dbg #74
> [  296.337589] Call Trace:
> [  296.337597]  [<c102e71f>] warn_slowpath_common+0x65/0x7c
> [  296.337603]  [<c126e30c>] ? dev_watchdog+0xc1/0x125
> [  296.337608]  [<c102e76a>] warn_slowpath_fmt+0x24/0x27
> [  296.337613]  [<c126e30c>] dev_watchdog+0xc1/0x125
> [  296.337620]  [<c1040039>] ? prepare_to_wait_exclusive+0x52/0x5b
> [  296.337627]  [<c1037053>] ? run_timer_softirq+0x120/0x1eb
> [  296.337632]  [<c10370a9>] run_timer_softirq+0x176/0x1eb
> [  296.337637]  [<c1037053>] ? run_timer_softirq+0x120/0x1eb
> [  296.337643]  [<c126e24b>] ? dev_watchdog+0x0/0x125
> [  296.337650]  [<c10331c9>] __do_softirq+0x8d/0x117
> [  296.337655]  [<c103327e>] do_softirq+0x2b/0x43
> [  296.337660]  [<c10333a3>] irq_exit+0x38/0x75
> [  296.337667]  [<c1015138>] smp_apic_timer_interrupt+0x6d/0x7b
> [  296.337673]  [<c12cbada>] apic_timer_interrupt+0x36/0x3c
> [  296.337679]  [<c104007b>] ? prepare_to_wait+0x39/0x57
> [  296.337685]  [<c11dd835>] ? acpi_idle_enter_simple+0x119/0x144
> [  296.337692]  [<c124d358>] cpuidle_idle_call+0x6d/0xa5
> [  296.337697]  [<c1001b51>] cpu_idle+0x92/0xc1
> [  296.337704]  [<c12c63d0>] start_secondary+0x1f3/0x1fa
> [  296.337708] ---[ end trace cd4a1b50139837df ]---
> 
> 
> Reproducing 100% with pktgen tests.
> 
What kind of packets are you sending with pktgen?
Neil

> 
> 	Sergey

^ permalink raw reply

* Re: [PATCH 3/4] xfrm: remove policy lock when accessing policy->walk.dead
From: Herbert Xu @ 2010-03-31 11:03 UTC (permalink / raw)
  To: Timo Teras; +Cc: netdev, Jamal Hadi Salim, David S. Miller
In-Reply-To: <1270030626-16687-5-git-send-email-timo.teras@iki.fi>

On Wed, Mar 31, 2010 at 01:17:05PM +0300, Timo Teras wrote:
> All of the code considers ->dead as a hint that the cached policy
> needs to get refreshed. The read side can just drop the read lock
> without any side effects.
> 
> The write side needs to make sure that it's written only exactly
> once. Only possible race is at xfrm_policy_kill(). This is fixed
> by checking result of __xfrm_policy_unlink() when needed. It will
> always succeed if the policy object is looked up from the hash
> list (so some checks are removed), but it needs to be checked if
> we are trying to unlink policy via a reference (appropriate
> checks added).
> 
> Since policy->walk.dead is written exactly once, it no longer
> needs to be protected with a write lock.
> 
> Signed-off-by: Timo Teras <timo.teras@iki.fi>

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>

> @@ -794,10 +783,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
>  				     &net->xfrm.policy_inexact[dir], bydst) {
>  			if (pol->type != type)
>  				continue;
> -			dp = __xfrm_policy_unlink(pol, dir);
> +			__xfrm_policy_unlink(pol, dir);
>  			write_unlock_bh(&xfrm_policy_lock);
> -			if (dp)
> -				cnt++;
> +			cnt++;
>  
>  			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
>  						 audit_info->sessionid,

I was intrigued to find that my local source never had this dp
stuff.

It seems that this was added recently:

commit 2f1eb65f366b81aa3c22c31e6e8db26168777ec5
Author: Jamal Hadi Salim <hadi@cyberus.ca>
Date:   Fri Feb 19 02:00:42 2010 +0000

    xfrm: Flushing empty SPD generates false events

    To see the effect make sure you have an empty SPD.
    On window1 "ip xfrm mon" and on window2 issue "ip xfrm policy flush"
    You get prompt back in window2 and you see the flush event on window1.
    With this fix, you still get prompt on window1 but no event on window2.

    Thanks to Alexey Dobriyan for finding a bug in earlier version
    when using pfkey to do the flushing.

    Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
    Signed-off-by: David S. Miller <davem@davemloft.net>

This seems to be bogus to me.  Just because the DB was empty
before the flush doesn't mean that the flush didn't happen.

We should revert this patch in its entirety.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH 1/4] xfrm: increment genid before bumping state genids
From: Timo Teräs @ 2010-03-31 11:01 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev
In-Reply-To: <4BB32A11.1030708@iki.fi>

Timo Teräs wrote:
> Herbert Xu wrote:
>> On Wed, Mar 31, 2010 at 01:17:03PM +0300, Timo Teras wrote:
>>> __xfrm_state_bump_genids() is used to update the genid of all
>>> matching xfrm_state's, so any bundle using the state would get
>>> refreshed with the newly inserted state.
>>>
>>> However, since __xfrm_state_bump_genids() is called before the
>>> __xfrm_state_insert() which actually bumps the genid counter,
>>> it is possible that the genid was not updated at all (if there
>>> was no state inserts previously).
>>>
>>> This is fixed by moving the genid incrementation to
>>> __xfrm_state_bump_genids() so the older states are guaranteed
>>> to get different genid.
>>>
>>> Signed-off-by: Timo Teras <timo.teras@iki.fi>
>>
>> It would appear that not all xfrm_state_insert calls are preceded
>> by xfrm_state_bump_genids so this patch isn't correct.
> 
> Yes, I noticed that there's one. But __xfrm_state_insert() is
> called to replace an acquire in that case. Acquires are never
> used in bundle, so this is good.
> 
> But maybe it'd be more explicit if the genid increment is done
> before each __xfrm_state_insert()?

Actually. Search for xfrm_state_genid in xfrm_state.c. It's only
used in two places: __xfrm_state_bumb_genid() and _insert().

The only case when it needs to get incremented is in the bump
function. If we are adding a state for the first time, there's
no need to bump the genid as it's per-matching state. The actual
work to invalidate states is done in the bump function, so it's
the only place that needs to increment it.

If any other xfrm_state_insert place needs to invalidate old
states it needs an additional bumping call. So the bumping function
is the right place to increment the genid.

^ permalink raw reply

* Re: [PATCH 1/4] xfrm: increment genid before bumping state genids
From: Timo Teräs @ 2010-03-31 10:55 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev
In-Reply-To: <20100331105023.GA12845@gondor.apana.org.au>

Herbert Xu wrote:
> On Wed, Mar 31, 2010 at 01:17:03PM +0300, Timo Teras wrote:
>> __xfrm_state_bump_genids() is used to update the genid of all
>> matching xfrm_state's, so any bundle using the state would get
>> refreshed with the newly inserted state.
>>
>> However, since __xfrm_state_bump_genids() is called before the
>> __xfrm_state_insert() which actually bumps the genid counter,
>> it is possible that the genid was not updated at all (if there
>> was no state inserts previously).
>>
>> This is fixed by moving the genid incrementation to
>> __xfrm_state_bump_genids() so the older states are guaranteed
>> to get different genid.
>>
>> Signed-off-by: Timo Teras <timo.teras@iki.fi>
> 
> It would appear that not all xfrm_state_insert calls are preceded
> by xfrm_state_bump_genids so this patch isn't correct.

Yes, I noticed that there's one. But __xfrm_state_insert() is
called to replace an acquire in that case. Acquires are never
used in bundle, so this is good.

But maybe it'd be more explicit if the genid increment is done
before each __xfrm_state_insert()?

^ permalink raw reply

* Re: [PATCH 2/4] xfrm_user: verify policy direction at XFRM_MSG_POLEXPIRE handler
From: Herbert Xu @ 2010-03-31 10:54 UTC (permalink / raw)
  To: Timo Teras; +Cc: netdev
In-Reply-To: <1270030626-16687-4-git-send-email-timo.teras@iki.fi>

On Wed, Mar 31, 2010 at 01:17:04PM +0300, Timo Teras wrote:
> Add missing check for policy direction verification. This is
> especially important since without this xfrm_user may end up
> deleting per-socket policy which is not allowed.
> 
> Signed-off-by: Timo Teras <timo.teras@iki.fi>

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [Patch] bonding: fix potential deadlock in bond_uninit()
From: Amerigo Wang @ 2010-03-31 10:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Pirko, Stephen Hemminger, netdev, David S. Miller,
	Eric W. Biederman, Amerigo Wang, bonding-devel, Jay Vosburgh


bond_uninit() is invoked with rtnl_lock held, when it does destroy_workqueue()
which will potentially flush all works in this workqueue, if we hold rtnl_lock
again in the work function, it will deadlock.

So unlock rtnl_lock before calling destroy_workqueue().

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Jay Vosburgh <fubar@us.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Jiri Pirko <jpirko@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>

---
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5b92fbf..b781728 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4542,8 +4542,11 @@ static void bond_uninit(struct net_device *bond_dev)
 
 	bond_remove_proc_entry(bond);
 
-	if (bond->wq)
+	if (bond->wq) {
+		rtnl_unlock();
 		destroy_workqueue(bond->wq);
+		rtnl_lock();
+	}
 
 	netif_addr_lock_bh(bond_dev);
 	bond_mc_list_destroy(bond);

^ permalink raw reply related

* Re: [PATCH 1/4] xfrm: increment genid before bumping state genids
From: Herbert Xu @ 2010-03-31 10:50 UTC (permalink / raw)
  To: Timo Teras; +Cc: netdev
In-Reply-To: <1270030626-16687-3-git-send-email-timo.teras@iki.fi>

On Wed, Mar 31, 2010 at 01:17:03PM +0300, Timo Teras wrote:
> __xfrm_state_bump_genids() is used to update the genid of all
> matching xfrm_state's, so any bundle using the state would get
> refreshed with the newly inserted state.
> 
> However, since __xfrm_state_bump_genids() is called before the
> __xfrm_state_insert() which actually bumps the genid counter,
> it is possible that the genid was not updated at all (if there
> was no state inserts previously).
> 
> This is fixed by moving the genid incrementation to
> __xfrm_state_bump_genids() so the older states are guaranteed
> to get different genid.
> 
> Signed-off-by: Timo Teras <timo.teras@iki.fi>

It would appear that not all xfrm_state_insert calls are preceded
by xfrm_state_bump_genids so this patch isn't correct.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH 3/5] netfilter: xtables: inclusion of xt_TEE
From: Jan Engelhardt @ 2010-03-31 10:38 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel, netdev
In-Reply-To: <1270031934-15940-1-git-send-email-jengelh@medozas.de>

xt_TEE can be used to clone and reroute a packet. This can for
example be used to copy traffic at a router for logging purposes
to another dedicated machine.

References: http://www.gossamer-threads.com/lists/iptables/devel/68781
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/Kbuild   |    1 +
 include/linux/netfilter/xt_TEE.h |    8 +
 net/ipv4/ip_output.c             |    1 +
 net/ipv6/ip6_output.c            |    1 +
 net/netfilter/Kconfig            |    7 +
 net/netfilter/Makefile           |    1 +
 net/netfilter/xt_TEE.c           |  272 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 291 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/xt_TEE.h
 create mode 100644 net/netfilter/xt_TEE.c

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index a5a63e4..48767cd 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -16,6 +16,7 @@ header-y += xt_RATEEST.h
 header-y += xt_SECMARK.h
 header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
+header-y += xt_TEE.h
 header-y += xt_TPROXY.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h
diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h
new file mode 100644
index 0000000..83fa768
--- /dev/null
+++ b/include/linux/netfilter/xt_TEE.h
@@ -0,0 +1,8 @@
+#ifndef _XT_TEE_TARGET_H
+#define _XT_TEE_TARGET_H
+
+struct xt_tee_tginfo {
+	union nf_inet_addr gw;
+};
+
+#endif /* _XT_TEE_TARGET_H */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f09135e..0abfdde 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -309,6 +309,7 @@ int ip_output(struct sk_buff *skb)
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7e10f62..307d8bf 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -176,6 +176,7 @@ int ip6_output(struct sk_buff *skb)
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IPSKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8055786..673a6c8 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -502,6 +502,13 @@ config NETFILTER_XT_TARGET_RATEEST
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TEE
+	tristate '"TEE" - packet cloning to alternate destiantion'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds a "TEE" target with which a packet can be cloned and
+	this clone be rerouted to another nexthop.
+
 config NETFILTER_XT_TARGET_TPROXY
 	tristate '"TPROXY" target support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index cd31afe..14e3a8f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 
 # matches
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
new file mode 100644
index 0000000..96dd746
--- /dev/null
+++ b/net/netfilter/xt_TEE.c
@@ -0,0 +1,272 @@
+/*
+ *	"TEE" target extension for Xtables
+ *	Copyright © Sebastian Claßen <sebastian.classen [at] freenet de>, 2007
+ *	Jan Engelhardt <jengelh [at] medozas de>, 2007 - 2010
+ *
+ *	based on ipt_ROUTE.c from Cédric de Launois
+ *	<delaunois@info.ucl.be>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 or later, as published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_TEE.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#	define WITH_CONNTRACK 1
+#	include <net/netfilter/nf_conntrack.h>
+static struct nf_conn tee_track;
+#endif
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#	define WITH_IPV6 1
+#endif
+
+static const union nf_inet_addr tee_zero_address;
+
+/*
+ * Try to route the packet according to the routing keys specified in
+ * route_info. Keys are :
+ *  - ifindex :
+ *      0 if no oif preferred,
+ *      otherwise set to the index of the desired oif
+ *  - route_info->gateway :
+ *      0 if no gateway specified,
+ *      otherwise set to the next host to which the pkt must be routed
+ * If success, skb->dev is the output device to which the packet must
+ * be sent and skb->dst is not NULL
+ *
+ * RETURN: false - if an error occured
+ *         true  - if the packet was succesfully routed to the
+ *                 destination desired
+ */
+static bool
+tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt;
+	struct flowi fl;
+	int err;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.iif  = skb->skb_iif;
+	fl.mark = skb->mark;
+	fl.nl_u.ip4_u.daddr = info->gw.ip;
+	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
+	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
+
+	/* Trying to route the packet using the standard routing table. */
+	err = ip_route_output_key(dev_net(skb->dev), &rt, &fl);
+	if (err != 0)
+		return false;
+
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, &rt->u.dst);
+	skb->dev      = rt->u.dst.dev;
+	skb->protocol = htons(ETH_P_IP);
+	IPCB(skb)->flags |= IPSKB_REROUTED;
+	return true;
+}
+
+/*
+ * To detect and deter routed packet loopback when using the --tee option, we
+ * take a page out of the raw.patch book: on the copied skb, we set up a fake
+ * ->nfct entry, pointing to the local &route_tee_track. We skip routing
+ * packets when we see they already have that ->nfct.
+ */
+static unsigned int
+tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+	struct iphdr *iph;
+
+#ifdef WITH_CONNTRACK
+	if (skb->nfct == &tee_track.ct_general)
+		/*
+		 * Loopback - a packet we already routed, is to be
+		 * routed another time. Avoid that, now.
+		 */
+		return NF_DROP;
+#endif
+	/*
+	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+	 * the original skb, which should continue on its way as if nothing has
+	 * happened. The copy should be independently delivered to the TEE
+	 * --gateway.
+	 */
+	skb = skb_copy(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return XT_CONTINUE;
+	/*
+	 * If we are in PREROUTING/INPUT, the checksum must be recalculated
+	 * since the length could have changed as a result of defragmentation.
+	 *
+	 * We also decrease the TTL to mitigate potential TEE loops
+	 * between two hosts.
+	 *
+	 * Set %IP_DF so that the original source is notified of a potentially
+	 * decreased MTU on the clone route. IPv6 does this too.
+	 */
+	iph = ip_hdr(skb);
+	iph->frag_off |= htons(IP_DF);
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN)
+		--iph->ttl;
+	ip_send_check(iph);
+
+#ifdef WITH_CONNTRACK
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &tee_track.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	/*
+	 * Xtables is not reentrant currently, so a choice has to be made:
+	 * 1. return absolute verdict for the original and let the cloned
+	 *    packet travel through the chains
+	 * 2. let the original continue travelling and not pass the clone
+	 *    to Xtables.
+	 * #2 is chosen. Normally, we would use ip_local_out for the clone.
+	 * Because iph->check is already correct and we don't pass it to
+	 * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can
+	 * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not
+	 * invoke POSTROUTING on the cloned packet.
+	 */
+	IPCB(skb)->flags |= IPSKB_REROUTED;
+	if (tee_tg_route4(skb, info))
+		ip_output(skb);
+
+	return XT_CONTINUE;
+}
+
+#ifdef WITH_IPV6
+static bool
+tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct dst_entry *dst;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.iif  = skb->skb_iif;
+	fl.mark = skb->mark;
+	fl.nl_u.ip6_u.daddr = info->gw.in6;
+	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+
+	dst = ip6_route_output(dev_net(skb->dev), NULL, &fl);
+	if (dst == NULL)
+		return false;
+
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, dst);
+	skb->dev      = dst->dev;
+	skb->protocol = htons(ETH_P_IPV6);
+	IP6CB(skb)->flags |= IPSKB_REROUTED;
+	return true;
+}
+
+static unsigned int
+tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+#ifdef WITH_CONNTRACK
+	if (skb->nfct == &tee_track.ct_general)
+		return NF_DROP;
+#endif
+	if ((skb = skb_copy(skb, GFP_ATOMIC)) == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &tee_track.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN) {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+		--iph->hop_limit;
+	}
+	IP6CB(skb)->flags |= IPSKB_REROUTED;
+	if (tee_tg_route6(skb, info))
+		ip6_output(skb);
+
+	return XT_CONTINUE;
+}
+#endif /* WITH_IPV6 */
+
+static int tee_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	/* 0.0.0.0 and :: not allowed */
+	return (memcmp(&info->gw, &tee_zero_address,
+	       sizeof(tee_zero_address)) == 0) ? -EINVAL : 0;
+}
+
+static struct xt_target tee_tg_reg[] __read_mostly = {
+	{
+		.name       = "TEE",
+		.revision   = 0,
+		.family     = NFPROTO_IPV4,
+		.target     = tee_tg4,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#ifdef WITH_IPV6
+	{
+		.name       = "TEE",
+		.revision   = 0,
+		.family     = NFPROTO_IPV6,
+		.target     = tee_tg6,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static int __init tee_tg_init(void)
+{
+#ifdef WITH_CONNTRACK
+	/*
+	 * Set up fake conntrack (stolen from raw.patch):
+	 * - to never be deleted, not in any hashes
+	 */
+	atomic_set(&tee_track.ct_general.use, 1);
+
+	/* - and look it like as a confirmed connection */
+	set_bit(IPS_CONFIRMED_BIT, &tee_track.status);
+
+	/* Initialize fake conntrack so that NAT will skip it */
+	tee_track.status |= IPS_NAT_DONE_MASK;
+#endif
+	return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+static void __exit tee_tg_exit(void)
+{
+	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+module_init(tee_tg_init);
+module_exit(tee_tg_exit);
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Reroute packet copy");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TEE");
+MODULE_ALIAS("ip6t_TEE");
-- 
1.7.0.2


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox