Netdev List
 help / color / mirror / Atom feed
* [PATCH 09/12] vxge: correct multi-function detection
From: Jon Mason @ 2010-11-11 14:26 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

The values used to determined if the adapter is running in single or
multi-function mode were previously modified to the values necessary
when making the VXGE_HW_FW_API_GET_FUNC_MODE firmware call.  However,
the firmware call was not modified.  This had the driver printing out on
probe that the adapter was in multi-function mode when in single
function mode and vice versa.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-config.c |    6 +++---
 drivers/net/vxge/vxge-reg.h    |    3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
index 5903b2e..14dc8c8 100644
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -1091,16 +1091,16 @@ __vxge_hw_vpath_pci_func_mode_get(struct __vxge_hw_virtualpath *vpath,
 	u64 data0, data1 = 0, steer_ctrl = 0;
 	enum vxge_hw_status status;
 
-	data0 = VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_PCI_MODE;
+	data0 = 0;
 
 	status = vxge_hw_vpath_fw_api(vpath,
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY,
+			VXGE_HW_FW_API_GET_FUNC_MODE,
 			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
 			0, &data0, &data1, &steer_ctrl);
 	if (status != VXGE_HW_OK)
 		return status;
 
-	hw_info->function_mode = data0;
+	hw_info->function_mode = VXGE_HW_GET_FUNC_MODE_VAL(data0);
 	return status;
 }
 
diff --git a/drivers/net/vxge/vxge-reg.h b/drivers/net/vxge/vxge-reg.h
index 0df52db..3e658b1 100644
--- a/drivers/net/vxge/vxge-reg.h
+++ b/drivers/net/vxge/vxge-reg.h
@@ -62,6 +62,9 @@
 #define VXGE_HW_GET_EPROM_IMAGE_REV(val)		vxge_bVALn(val, 48, 16)
 #define VXGE_HW_RTS_ACCESS_STEER_ROM_IMAGE_INDEX(val)	vxge_vBIT(val, 16, 8)
 
+#define VXGE_HW_FW_API_GET_FUNC_MODE			29
+#define VXGE_HW_GET_FUNC_MODE_VAL(val)			(val & 0xFF)
+
 #define VXGE_HW_FW_UPGRADE_MEMO				13
 #define VXGE_HW_FW_UPGRADE_ACTION			16
 #define VXGE_HW_FW_UPGRADE_OFFSET_START			2
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 08/12] vxge: Titan1A detection
From: Jon Mason @ 2010-11-11 14:26 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

Detect if the adapter is Titan or Titan1A, and tune the driver for this
hardware.  Also, remove unnecessary function __vxge_hw_device_id_get.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-config.c |   40 +++++++------------------------
 drivers/net/vxge/vxge-config.h |    6 ----
 drivers/net/vxge/vxge-main.c   |   45 +++++++++++++++++++++++++++++++----
 drivers/net/vxge/vxge-main.h   |   51 +++++++++++++++++++++++++++++----------
 4 files changed, 87 insertions(+), 55 deletions(-)

diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
index 388e6c4..5903b2e 100644
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -99,9 +99,6 @@ static enum vxge_hw_status
 __vxge_hw_device_config_check(struct vxge_hw_device_config *new_config);
 
 static void
-__vxge_hw_device_id_get(struct __vxge_hw_device *hldev);
-
-static void
 __vxge_hw_device_host_info_get(struct __vxge_hw_device *hldev);
 
 static enum vxge_hw_status
@@ -806,26 +803,6 @@ exit:
 }
 
 /*
- * __vxge_hw_device_id_get
- * This routine returns sets the device id and revision numbers into the device
- * structure
- */
-void __vxge_hw_device_id_get(struct __vxge_hw_device *hldev)
-{
-	u64 val64;
-
-	val64 = readq(&hldev->common_reg->titan_asic_id);
-	hldev->device_id =
-		(u16)VXGE_HW_TITAN_ASIC_ID_GET_INITIAL_DEVICE_ID(val64);
-
-	hldev->major_revision =
-		(u8)VXGE_HW_TITAN_ASIC_ID_GET_INITIAL_MAJOR_REVISION(val64);
-
-	hldev->minor_revision =
-		(u8)VXGE_HW_TITAN_ASIC_ID_GET_INITIAL_MINOR_REVISION(val64);
-}
-
-/*
  * __vxge_hw_device_access_rights_get: Get Access Rights of the driver
  * This routine returns the Access Rights of the driver
  */
@@ -1327,7 +1304,6 @@ vxge_hw_device_initialize(
 		vfree(hldev);
 		goto exit;
 	}
-	__vxge_hw_device_id_get(hldev);
 
 	__vxge_hw_device_host_info_get(hldev);
 
@@ -4442,16 +4418,18 @@ vpath_open_exit1:
 void
 vxge_hw_vpath_rx_doorbell_init(struct __vxge_hw_vpath_handle *vp)
 {
-	struct __vxge_hw_virtualpath *vpath = NULL;
+	struct __vxge_hw_virtualpath *vpath = vp->vpath;
+	struct __vxge_hw_ring *ring = vpath->ringh;
+	struct vxgedev *vdev = netdev_priv(vpath->hldev->ndev);
 	u64 new_count, val64, val164;
-	struct __vxge_hw_ring *ring;
 
-	vpath = vp->vpath;
-	ring = vpath->ringh;
+	if (vdev->titan1) {
+		new_count = readq(&vpath->vp_reg->rxdmem_size);
+		new_count &= 0x1fff;
+	} else
+		new_count = ring->config->ring_blocks * VXGE_HW_BLOCK_SIZE / 8;
 
-	new_count = readq(&vpath->vp_reg->rxdmem_size);
-	new_count &= 0x1fff;
-	val164 = (VXGE_HW_RXDMEM_SIZE_PRC_RXDMEM_SIZE(new_count));
+	val164 = VXGE_HW_RXDMEM_SIZE_PRC_RXDMEM_SIZE(new_count);
 
 	writeq(VXGE_HW_PRC_RXD_DOORBELL_NEW_QW_CNT(val164),
 		&vpath->vp_reg->prc_rxd_doorbell);
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 95230bd..5b2c831 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -713,9 +713,6 @@ struct __vxge_hw_vpath_handle{
 /**
  * struct __vxge_hw_device  - Hal device object
  * @magic: Magic Number
- * @device_id: PCI Device Id of the adapter
- * @major_revision: PCI Device major revision
- * @minor_revision: PCI Device minor revision
  * @bar0: BAR0 virtual address.
  * @pdev: Physical device handle
  * @config: Confguration passed by the LL driver at initialization
@@ -727,9 +724,6 @@ struct __vxge_hw_device {
 	u32				magic;
 #define VXGE_HW_DEVICE_MAGIC		0x12345678
 #define VXGE_HW_DEVICE_DEAD		0xDEADDEAD
-	u16				device_id;
-	u8				major_revision;
-	u8				minor_revision;
 	void __iomem			*bar0;
 	struct pci_dev			*pdev;
 	struct net_device		*ndev;
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index ea303a2..b8806a1 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -2012,8 +2012,23 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
 
 	for (i = 0; i < vdev->no_of_vpath; i++) {
 		vpath = &vdev->vpaths[i];
-
 		vxge_assert(vpath->is_configured);
+
+		if (!vdev->titan1) {
+			struct vxge_hw_vp_config *vcfg;
+			vcfg = &vdev->devh->config.vp_config[vpath->device_id];
+
+			vcfg->rti.urange_a = RTI_T1A_RX_URANGE_A;
+			vcfg->rti.urange_b = RTI_T1A_RX_URANGE_B;
+			vcfg->rti.urange_c = RTI_T1A_RX_URANGE_C;
+			vcfg->tti.uec_a = TTI_T1A_TX_UFC_A;
+			vcfg->tti.uec_b = TTI_T1A_TX_UFC_B;
+			vcfg->tti.uec_c = TTI_T1A_TX_UFC_C(vdev->mtu);
+			vcfg->tti.uec_d = TTI_T1A_TX_UFC_D(vdev->mtu);
+			vcfg->tti.ltimer_val = VXGE_T1A_TTI_LTIMER_VAL;
+			vcfg->tti.rtimer_val = VXGE_T1A_TTI_RTIMER_VAL;
+		}
+
 		attr.vp_id = vpath->device_id;
 		attr.fifo_attr.callback = vxge_xmit_compl;
 		attr.fifo_attr.txdl_term = vxge_tx_term;
@@ -2710,9 +2725,10 @@ vxge_open(struct net_device *dev)
 		vxge_os_timer(vdev->vp_reset_timer,
 			vxge_poll_vp_reset, vdev, (HZ/2));
 
-	if (vdev->vp_lockup_timer.function == NULL)
-		vxge_os_timer(vdev->vp_lockup_timer,
-			vxge_poll_vp_lockup, vdev, (HZ/2));
+	/* There is no need to check for RxD leak and RxD lookup on Titan1A */
+	if (vdev->titan1 && vdev->vp_lockup_timer.function == NULL)
+		vxge_os_timer(vdev->vp_lockup_timer, vxge_poll_vp_lockup, vdev,
+			      HZ / 2);
 
 	set_bit(__VXGE_STATE_CARD_UP, &vdev->state);
 
@@ -2844,7 +2860,9 @@ static int do_vxge_close(struct net_device *dev, int do_io)
 
 		smp_wmb();
 	}
-	del_timer_sync(&vdev->vp_lockup_timer);
+
+	if (vdev->titan1)
+		del_timer_sync(&vdev->vp_lockup_timer);
 
 	del_timer_sync(&vdev->vp_reset_timer);
 
@@ -3262,6 +3280,19 @@ static const struct net_device_ops vxge_netdev_ops = {
 #endif
 };
 
+static int __devinit vxge_device_revision(struct vxgedev *vdev)
+{
+	int ret;
+	u8 revision;
+
+	ret = pci_read_config_byte(vdev->pdev, PCI_REVISION_ID, &revision);
+	if (ret)
+		return -EIO;
+
+	vdev->titan1 = (revision == VXGE_HW_TITAN1_PCI_REVISION);
+	return 0;
+}
+
 static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
 					  struct vxge_config *config,
 					  int high_dma, int no_of_vpath,
@@ -3302,6 +3333,10 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
 	vdev->rx_csum = 1;	/* Enable Rx CSUM by default. */
 	vdev->rx_hwts = 0;
 
+	ret = vxge_device_revision(vdev);
+	if (ret < 0)
+		goto _out1;
+
 	SET_NETDEV_DEV(ndev, &vdev->pdev->dev);
 
 	ndev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX |
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h
index 6f6e9ce..953cb0d 100644
--- a/drivers/net/vxge/vxge-main.h
+++ b/drivers/net/vxge/vxge-main.h
@@ -29,6 +29,9 @@
 
 #define PCI_DEVICE_ID_TITAN_WIN		0x5733
 #define PCI_DEVICE_ID_TITAN_UNI		0x5833
+#define VXGE_HW_TITAN1_PCI_REVISION	1
+#define VXGE_HW_TITAN1A_PCI_REVISION	2
+
 #define	VXGE_USE_DEFAULT		0xffffffff
 #define VXGE_HW_VPATH_MSIX_ACTIVE	4
 #define VXGE_ALARM_MSIX_ID		2
@@ -53,11 +56,13 @@
 
 #define VXGE_TTI_BTIMER_VAL 250000
 
-#define VXGE_TTI_LTIMER_VAL 1000
-#define VXGE_TTI_RTIMER_VAL 0
-#define VXGE_RTI_BTIMER_VAL 250
-#define VXGE_RTI_LTIMER_VAL 100
-#define VXGE_RTI_RTIMER_VAL 0
+#define VXGE_TTI_LTIMER_VAL	1000
+#define VXGE_T1A_TTI_LTIMER_VAL	80
+#define VXGE_TTI_RTIMER_VAL	0
+#define VXGE_T1A_TTI_RTIMER_VAL	400
+#define VXGE_RTI_BTIMER_VAL	250
+#define VXGE_RTI_LTIMER_VAL	100
+#define VXGE_RTI_RTIMER_VAL	0
 #define VXGE_FIFO_INDICATE_MAX_PKTS VXGE_DEF_FIFO_LENGTH
 #define VXGE_ISR_POLLING_CNT 	8
 #define VXGE_MAX_CONFIG_DEV	0xFF
@@ -76,14 +81,32 @@
 #define TTI_TX_UFC_B	40
 #define TTI_TX_UFC_C	60
 #define TTI_TX_UFC_D	100
+#define TTI_T1A_TX_UFC_A	30
+#define TTI_T1A_TX_UFC_B	80
+/* Slope - (max_mtu - min_mtu)/(max_mtu_ufc - min_mtu_ufc) */
+/* Slope - 93 */
+/* 60 - 9k Mtu, 140 - 1.5k mtu */
+#define TTI_T1A_TX_UFC_C(mtu)	(60 + ((VXGE_HW_MAX_MTU - mtu) / 93))
+
+/* Slope - 37 */
+/* 100 - 9k Mtu, 300 - 1.5k mtu */
+#define TTI_T1A_TX_UFC_D(mtu)	(100 + ((VXGE_HW_MAX_MTU - mtu) / 37))
+
+
+#define RTI_RX_URANGE_A		5
+#define RTI_RX_URANGE_B		15
+#define RTI_RX_URANGE_C		40
+#define RTI_T1A_RX_URANGE_A	1
+#define RTI_T1A_RX_URANGE_B	20
+#define RTI_T1A_RX_URANGE_C	50
+#define RTI_RX_UFC_A		1
+#define RTI_RX_UFC_B		5
+#define RTI_RX_UFC_C		10
+#define RTI_RX_UFC_D		15
+#define RTI_T1A_RX_UFC_B	20
+#define RTI_T1A_RX_UFC_C	50
+#define RTI_T1A_RX_UFC_D	60
 
-#define RTI_RX_URANGE_A	5
-#define RTI_RX_URANGE_B	15
-#define RTI_RX_URANGE_C	40
-#define RTI_RX_UFC_A	1
-#define RTI_RX_UFC_B	5
-#define RTI_RX_UFC_C	10
-#define RTI_RX_UFC_D	15
 
 /* Milli secs timer period */
 #define VXGE_TIMER_DELAY		10000
@@ -329,7 +352,8 @@ struct vxgedev {
 
 	 /* A flag indicating whether rx_csum is to be used or not. */
 	u32	rx_csum:1,
-		rx_hwts:1;
+		rx_hwts:1,
+		titan1:1;
 
 	struct vxge_msix_entry *vxge_entries;
 	struct msix_entry *entries;
@@ -397,6 +421,7 @@ struct vxge_tx_priv {
 	} while (0);
 
 extern void vxge_initialize_ethtool_ops(struct net_device *ndev);
+
 enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev);
 
 int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override);
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 07/12] vxge: Handle errors in vxge_hw_vpath_fw_api
From: Jon Mason @ 2010-11-11 14:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

Propagate the return code of the call to vxge_hw_vpath_fw_api and
__vxge_hw_vpath_pci_func_mode_get.  This enables the proper handling of
error conditions when querying the function mode of the device during
probe.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-config.c |   15 ++++++++++-----
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
index 7761b9e..388e6c4 100644
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -1107,8 +1107,9 @@ __vxge_hw_vpath_card_info_get(struct __vxge_hw_virtualpath *vpath,
  * __vxge_hw_vpath_pci_func_mode_get - Get the pci mode
  * Returns pci function mode
  */
-static u64
-__vxge_hw_vpath_pci_func_mode_get(struct __vxge_hw_virtualpath *vpath)
+static enum vxge_hw_status
+__vxge_hw_vpath_pci_func_mode_get(struct __vxge_hw_virtualpath *vpath,
+				  struct vxge_hw_device_hw_info *hw_info)
 {
 	u64 data0, data1 = 0, steer_ctrl = 0;
 	enum vxge_hw_status status;
@@ -1119,8 +1120,11 @@ __vxge_hw_vpath_pci_func_mode_get(struct __vxge_hw_virtualpath *vpath)
 			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY,
 			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
 			0, &data0, &data1, &steer_ctrl);
+	if (status != VXGE_HW_OK)
+		return status;
 
-	return data0;
+	hw_info->function_mode = data0;
+	return status;
 }
 
 /*
@@ -1235,8 +1239,9 @@ vxge_hw_device_hw_info_get(void __iomem *bar0,
 			       (bar0 + val64);
 		vpath.vp_open = 0;
 
-		hw_info->function_mode =
-			__vxge_hw_vpath_pci_func_mode_get(&vpath);
+		status = __vxge_hw_vpath_pci_func_mode_get(&vpath, hw_info);
+		if (status != VXGE_HW_OK)
+			goto exit;
 
 		status = __vxge_hw_vpath_fw_ver_get(&vpath, hw_info);
 		if (status != VXGE_HW_OK)
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 06/12] vxge: add receive hardware timestamping
From: Jon Mason @ 2010-11-11 14:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

Add support for enable/disabling hardware timestamping on receive
packets via ioctl call.  When enabled, the hardware timestamp replaces
the FCS in the payload.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-main.c |  133 ++++++++++++++++++++++++++++++++++++++++--
 drivers/net/vxge/vxge-main.h |    8 ++-
 2 files changed, 132 insertions(+), 9 deletions(-)

diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 10549bd..ea303a2 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -51,6 +51,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/firmware.h>
+#include <linux/net_tstamp.h>
 #include "vxge-main.h"
 #include "vxge-reg.h"
 
@@ -369,7 +370,7 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr,
 		 u8 t_code, void *userdata)
 {
 	struct vxge_ring *ring = (struct vxge_ring *)userdata;
-	struct  net_device *dev = ring->ndev;
+	struct net_device *dev = ring->ndev;
 	unsigned int dma_sizes;
 	void *first_dtr = NULL;
 	int dtr_cnt = 0;
@@ -513,6 +514,16 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr,
 		else
 			skb_checksum_none_assert(skb);
 
+
+		if (ring->rx_hwts) {
+			struct skb_shared_hwtstamps *skb_hwts;
+			u32 ns = *(u32 *)(skb->head + pkt_length);
+
+			skb_hwts = skb_hwtstamps(skb);
+			skb_hwts->hwtstamp = ns_to_ktime(ns);
+			skb_hwts->syststamp.tv64 = 0;
+		}
+
 		/* rth_hash_type and rth_it_hit are non-zero regardless of
 		 * whether rss is enabled.  Only the rth_value is zero/non-zero
 		 * if rss is disabled/enabled, so key off of that.
@@ -2037,6 +2048,7 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
 				vdev->config.fifo_indicate_max_pkts;
 			vpath->ring.rx_vector_no = 0;
 			vpath->ring.rx_csum = vdev->rx_csum;
+			vpath->ring.rx_hwts = vdev->rx_hwts;
 			vpath->is_open = 1;
 			vdev->vp_handles[i] = vpath->handle;
 			vpath->ring.gro_enable = vdev->config.gro_enable;
@@ -2971,6 +2983,101 @@ vxge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats)
 	return net_stats;
 }
 
+static enum vxge_hw_status vxge_timestamp_config(struct vxgedev *vdev,
+						 int enable)
+{
+	enum vxge_hw_status status;
+	u64 val64;
+
+	/* Timestamp is passed to the driver via the FCS, therefore we
+	 * must disable the FCS stripping by the adapter.  Since this is
+	 * required for the driver to load (due to a hardware bug),
+	 * there is no need to do anything special here.
+	 */
+	if (enable)
+		val64 = VXGE_HW_XMAC_TIMESTAMP_EN |
+			VXGE_HW_XMAC_TIMESTAMP_USE_LINK_ID(0) |
+			VXGE_HW_XMAC_TIMESTAMP_INTERVAL(0);
+	else
+		val64 = 0;
+
+	status = vxge_hw_mgmt_reg_write(vdev->devh,
+					vxge_hw_mgmt_reg_type_mrpcim,
+					0,
+					offsetof(struct vxge_hw_mrpcim_reg,
+						 xmac_timestamp),
+					val64);
+	vxge_hw_device_flush_io(vdev->devh);
+	return status;
+}
+
+static int vxge_hwtstamp_ioctl(struct vxgedev *vdev, void __user *data)
+{
+	struct hwtstamp_config config;
+	enum vxge_hw_status status;
+	int i;
+
+	if (copy_from_user(&config, data, sizeof(config)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (config.flags)
+		return -EINVAL;
+
+	/* Transmit HW Timestamp not supported */
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		break;
+	case HWTSTAMP_TX_ON:
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		status = vxge_timestamp_config(vdev, 0);
+		if (status != VXGE_HW_OK)
+			return -EFAULT;
+
+		vdev->rx_hwts = 0;
+		config.rx_filter = HWTSTAMP_FILTER_NONE;
+		break;
+
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		status = vxge_timestamp_config(vdev, 1);
+		if (status != VXGE_HW_OK)
+			return -EFAULT;
+
+		vdev->rx_hwts = 1;
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+
+	default:
+		 return -ERANGE;
+	}
+
+	for (i = 0; i < vdev->no_of_vpath; i++)
+		vdev->vpaths[i].ring.rx_hwts = vdev->rx_hwts;
+
+	if (copy_to_user(data, &config, sizeof(config)))
+		return -EFAULT;
+
+	return 0;
+}
+
 /**
  * vxge_ioctl
  * @dev: Device pointer.
@@ -2983,7 +3090,20 @@ vxge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats)
  */
 static int vxge_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	return -EOPNOTSUPP;
+	struct vxgedev *vdev = netdev_priv(dev);
+	int ret;
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		ret = vxge_hwtstamp_ioctl(vdev, rq->ifr_data);
+		if (ret)
+			return ret;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
 }
 
 /**
@@ -3180,6 +3300,7 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
 	vdev->pdev = hldev->pdev;
 	memcpy(&vdev->config, config, sizeof(struct vxge_config));
 	vdev->rx_csum = 1;	/* Enable Rx CSUM by default. */
+	vdev->rx_hwts = 0;
 
 	SET_NETDEV_DEV(ndev, &vdev->pdev->dev);
 
@@ -4321,10 +4442,10 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
 	}
 
 	/* if FCS stripping is not disabled in MAC fail driver load */
-	if (vxge_hw_vpath_strip_fcs_check(hldev, vpath_mask) != VXGE_HW_OK) {
-		vxge_debug_init(VXGE_ERR,
-			"%s: FCS stripping is not disabled in MAC"
-			" failing driver load", VXGE_DRIVER_NAME);
+	status = vxge_hw_vpath_strip_fcs_check(hldev, vpath_mask);
+	if (status != VXGE_HW_OK) {
+		vxge_debug_init(VXGE_ERR, "%s: FCS stripping is enabled in MAC"
+				" failing driver load", VXGE_DRIVER_NAME);
 		ret = -EINVAL;
 		goto _exit4;
 	}
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h
index 1699d75..6f6e9ce 100644
--- a/drivers/net/vxge/vxge-main.h
+++ b/drivers/net/vxge/vxge-main.h
@@ -248,8 +248,9 @@ struct vxge_ring {
 	 */
 	int driver_id;
 
-	 /* copy of the flag indicating whether rx_csum is to be used */
-	u32 rx_csum;
+	/* copy of the flag indicating whether rx_csum is to be used */
+	u32 rx_csum:1,
+	    rx_hwts:1;
 
 	int pkts_processed;
 	int budget;
@@ -327,7 +328,8 @@ struct vxgedev {
 	u16		all_multi_flg;
 
 	 /* A flag indicating whether rx_csum is to be used or not. */
-	u32	rx_csum;
+	u32	rx_csum:1,
+		rx_hwts:1;
 
 	struct vxge_msix_entry *vxge_entries;
 	struct msix_entry *entries;
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 05/12] vxge: add support for ethtool firmware flashing
From: Jon Mason @ 2010-11-11 14:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

Add the ability in the vxge driver to flash firmware via ethtool.

Updated to include comments from Ben Hutchings.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-config.c  |  183 +++++++++++++++++++++++++++++++++++++-
 drivers/net/vxge/vxge-config.h  |   71 ++++++++++++++-
 drivers/net/vxge/vxge-ethtool.c |   20 ++++
 drivers/net/vxge/vxge-main.c    |  190 +++++++++++++++++++++++++++++++++++----
 drivers/net/vxge/vxge-main.h    |    2 +
 drivers/net/vxge/vxge-reg.h     |   29 ++++++-
 drivers/net/vxge/vxge-version.h |   27 ++++++
 7 files changed, 500 insertions(+), 22 deletions(-)

diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
index c822463..7761b9e 100644
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -155,7 +155,6 @@ __vxge_hw_vpath_reset_check(struct __vxge_hw_virtualpath *vpath);
 static enum vxge_hw_status
 __vxge_hw_vpath_sw_reset(struct __vxge_hw_device *devh, u32 vp_id);
 
-
 static enum vxge_hw_status
 __vxge_hw_vpath_mac_configure(struct __vxge_hw_device *devh, u32 vp_id);
 
@@ -322,6 +321,188 @@ out:
 	return status;
 }
 
+enum vxge_hw_status
+vxge_hw_upgrade_read_version(struct __vxge_hw_device *hldev, u32 *major,
+			     u32 *minor, u32 *build)
+{
+	u64 data0 = 0, data1 = 0, steer_ctrl = 0;
+	struct __vxge_hw_virtualpath *vpath;
+	enum vxge_hw_status status;
+
+	vpath = &hldev->virtual_paths[hldev->first_vp_id];
+
+	status = vxge_hw_vpath_fw_api(vpath,
+				      VXGE_HW_FW_UPGRADE_ACTION,
+				      VXGE_HW_FW_UPGRADE_MEMO,
+				      VXGE_HW_FW_UPGRADE_OFFSET_READ,
+				      &data0, &data1, &steer_ctrl);
+	if (status != VXGE_HW_OK)
+		return status;
+
+	*major = VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_MAJOR(data0);
+	*minor = VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_MINOR(data0);
+	*build = VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_BUILD(data0);
+
+	return status;
+}
+
+enum vxge_hw_status vxge_hw_flash_fw(struct __vxge_hw_device *hldev)
+{
+	u64 data0 = 0, data1 = 0, steer_ctrl = 0;
+	struct __vxge_hw_virtualpath *vpath;
+	enum vxge_hw_status status;
+	u32 ret;
+
+	vpath = &hldev->virtual_paths[hldev->first_vp_id];
+
+	status = vxge_hw_vpath_fw_api(vpath,
+				      VXGE_HW_FW_UPGRADE_ACTION,
+				      VXGE_HW_FW_UPGRADE_MEMO,
+				      VXGE_HW_FW_UPGRADE_OFFSET_COMMIT,
+				      &data0, &data1, &steer_ctrl);
+	if (status != VXGE_HW_OK) {
+		vxge_debug_init(VXGE_ERR, "%s: FW upgrade failed", __func__);
+		goto exit;
+	}
+
+	ret = VXGE_HW_RTS_ACCESS_STEER_CTRL_GET_ACTION(steer_ctrl) & 0x7F;
+	if (ret != 1) {
+		vxge_debug_init(VXGE_ERR, "%s: FW commit failed with error %d",
+				__func__, ret);
+		status = VXGE_HW_FAIL;
+	}
+
+exit:
+	return status;
+}
+
+enum vxge_hw_status
+vxge_update_fw_image(struct __vxge_hw_device *hldev, const u8 *fwdata, int size)
+{
+	u64 data0 = 0, data1 = 0, steer_ctrl = 0;
+	struct __vxge_hw_virtualpath *vpath;
+	enum vxge_hw_status status;
+	int ret_code, sec_code;
+
+	vpath = &hldev->virtual_paths[hldev->first_vp_id];
+
+	/* send upgrade start command */
+	status = vxge_hw_vpath_fw_api(vpath,
+				      VXGE_HW_FW_UPGRADE_ACTION,
+				      VXGE_HW_FW_UPGRADE_MEMO,
+				      VXGE_HW_FW_UPGRADE_OFFSET_START,
+				      &data0, &data1, &steer_ctrl);
+	if (status != VXGE_HW_OK) {
+		vxge_debug_init(VXGE_ERR, " %s: Upgrade start cmd failed",
+				__func__);
+		return status;
+	}
+
+	/* Transfer fw image to adapter 16 bytes at a time */
+	for (; size > 0; size -= VXGE_HW_FW_UPGRADE_BLK_SIZE) {
+		steer_ctrl = 0;
+
+		/* The next 128bits of fwdata to be loaded onto the adapter */
+		data0 = *((u64 *)fwdata);
+		data1 = *((u64 *)fwdata + 1);
+
+		status = vxge_hw_vpath_fw_api(vpath,
+					      VXGE_HW_FW_UPGRADE_ACTION,
+					      VXGE_HW_FW_UPGRADE_MEMO,
+					      VXGE_HW_FW_UPGRADE_OFFSET_SEND,
+					      &data0, &data1, &steer_ctrl);
+		if (status != VXGE_HW_OK) {
+			vxge_debug_init(VXGE_ERR, "%s: Upgrade send failed",
+					__func__);
+			goto out;
+		}
+
+		ret_code = VXGE_HW_UPGRADE_GET_RET_ERR_CODE(data0);
+		switch (ret_code) {
+		case VXGE_HW_FW_UPGRADE_OK:
+			/* All OK, send next 16 bytes. */
+			break;
+		case VXGE_FW_UPGRADE_BYTES2SKIP:
+			/* skip bytes in the stream */
+			fwdata += (data0 >> 8) & 0xFFFFFFFF;
+			break;
+		case VXGE_HW_FW_UPGRADE_DONE:
+			goto out;
+		case VXGE_HW_FW_UPGRADE_ERR:
+			sec_code = VXGE_HW_UPGRADE_GET_SEC_ERR_CODE(data0);
+			switch (sec_code) {
+			case VXGE_HW_FW_UPGRADE_ERR_CORRUPT_DATA_1:
+			case VXGE_HW_FW_UPGRADE_ERR_CORRUPT_DATA_7:
+				printk(KERN_ERR
+				       "corrupted data from .ncf file\n");
+				break;
+			case VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_3:
+			case VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_4:
+			case VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_5:
+			case VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_6:
+			case VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_8:
+				printk(KERN_ERR "invalid .ncf file\n");
+				break;
+			case VXGE_HW_FW_UPGRADE_ERR_BUFFER_OVERFLOW:
+				printk(KERN_ERR "buffer overflow\n");
+				break;
+			case VXGE_HW_FW_UPGRADE_ERR_FAILED_TO_FLASH:
+				printk(KERN_ERR "failed to flash the image\n");
+				break;
+			case VXGE_HW_FW_UPGRADE_ERR_GENERIC_ERROR_UNKNOWN:
+				printk(KERN_ERR
+				       "generic error. Unknown error type\n");
+				break;
+			default:
+				printk(KERN_ERR "Unknown error of type %d\n",
+				       sec_code);
+				break;
+			}
+			status = VXGE_HW_FAIL;
+			goto out;
+		default:
+			printk(KERN_ERR "Unknown FW error: %d\n", ret_code);
+			status = VXGE_HW_FAIL;
+			goto out;
+		}
+		/* point to next 16 bytes */
+		fwdata += VXGE_HW_FW_UPGRADE_BLK_SIZE;
+	}
+out:
+	return status;
+}
+
+enum vxge_hw_status
+vxge_hw_vpath_eprom_img_ver_get(struct __vxge_hw_device *hldev,
+				struct eprom_image *img)
+{
+	u64 data0 = 0, data1 = 0, steer_ctrl = 0;
+	struct __vxge_hw_virtualpath *vpath;
+	enum vxge_hw_status status;
+	int i;
+
+	vpath = &hldev->virtual_paths[hldev->first_vp_id];
+
+	for (i = 0; i < VXGE_HW_MAX_ROM_IMAGES; i++) {
+		data0 = VXGE_HW_RTS_ACCESS_STEER_ROM_IMAGE_INDEX(i);
+		data1 = steer_ctrl = 0;
+
+		status = vxge_hw_vpath_fw_api(vpath,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
+			VXGE_HW_FW_API_GET_EPROM_REV,
+			0, &data0, &data1, &steer_ctrl);
+		if (status != VXGE_HW_OK)
+			break;
+
+		img[i].is_valid = VXGE_HW_GET_EPROM_IMAGE_VALID(data0);
+		img[i].index = VXGE_HW_GET_EPROM_IMAGE_INDEX(data0);
+		img[i].type = VXGE_HW_GET_EPROM_IMAGE_TYPE(data0);
+		img[i].version = VXGE_HW_GET_EPROM_IMAGE_REV(data0);
+	}
+
+	return status;
+}
+
 /*
  * __vxge_hw_channel_allocate - Allocate memory for channel
  * This function allocates required memory for the channel and various arrays
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 6a81014..95230bd 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -29,6 +29,15 @@
 #define VXGE_HW_MAX_MTU				9600
 #define VXGE_HW_DEFAULT_MTU			1500
 
+#define VXGE_HW_MAX_ROM_IMAGES			8
+
+struct eprom_image {
+	u8 is_valid:1;
+	u8 index;
+	u8 type;
+	u16 version;
+};
+
 #ifdef VXGE_DEBUG_ASSERT
 /**
  * vxge_assert
@@ -148,6 +157,47 @@ enum vxge_hw_device_link_state {
 };
 
 /**
+ * enum enum vxge_hw_fw_upgrade_code - FW upgrade return codes.
+ * @VXGE_HW_FW_UPGRADE_OK: All OK send next 16 bytes
+ * @VXGE_HW_FW_UPGRADE_DONE:  upload completed
+ * @VXGE_HW_FW_UPGRADE_ERR:  upload error
+ * @VXGE_FW_UPGRADE_BYTES2SKIP:  skip bytes in the stream
+ *
+ */
+enum vxge_hw_fw_upgrade_code {
+	VXGE_HW_FW_UPGRADE_OK		= 0,
+	VXGE_HW_FW_UPGRADE_DONE		= 1,
+	VXGE_HW_FW_UPGRADE_ERR		= 2,
+	VXGE_FW_UPGRADE_BYTES2SKIP	= 3
+};
+
+/**
+ * enum enum vxge_hw_fw_upgrade_err_code - FW upgrade error codes.
+ * @VXGE_HW_FW_UPGRADE_ERR_CORRUPT_DATA_1: corrupt data
+ * @VXGE_HW_FW_UPGRADE_ERR_BUFFER_OVERFLOW: buffer overflow
+ * @VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_3: invalid .ncf file
+ * @VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_4: invalid .ncf file
+ * @VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_5: invalid .ncf file
+ * @VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_6: invalid .ncf file
+ * @VXGE_HW_FW_UPGRADE_ERR_CORRUPT_DATA_7: corrupt data
+ * @VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_8: invalid .ncf file
+ * @VXGE_HW_FW_UPGRADE_ERR_GENERIC_ERROR_UNKNOWN: generic error unknown type
+ * @VXGE_HW_FW_UPGRADE_ERR_FAILED_TO_FLASH: failed to flash image check failed
+ */
+enum vxge_hw_fw_upgrade_err_code {
+	VXGE_HW_FW_UPGRADE_ERR_CORRUPT_DATA_1		= 1,
+	VXGE_HW_FW_UPGRADE_ERR_BUFFER_OVERFLOW		= 2,
+	VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_3		= 3,
+	VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_4		= 4,
+	VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_5		= 5,
+	VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_6		= 6,
+	VXGE_HW_FW_UPGRADE_ERR_CORRUPT_DATA_7		= 7,
+	VXGE_HW_FW_UPGRADE_ERR_INV_NCF_FILE_8		= 8,
+	VXGE_HW_FW_UPGRADE_ERR_GENERIC_ERROR_UNKNOWN	= 9,
+	VXGE_HW_FW_UPGRADE_ERR_FAILED_TO_FLASH		= 10
+};
+
+/**
  * struct vxge_hw_device_date - Date Format
  * @day: Day
  * @month: Month
@@ -454,7 +504,6 @@ struct vxge_hw_device_config {
  * See also: vxge_hw_driver_initialize().
  */
 struct vxge_hw_uld_cbs {
-
 	void (*link_up)(struct __vxge_hw_device *devh);
 	void (*link_down)(struct __vxge_hw_device *devh);
 	void (*crit_err)(struct __vxge_hw_device *devh,
@@ -721,6 +770,7 @@ struct __vxge_hw_device {
 	u32				debug_level;
 	u32				level_err;
 	u32				level_trace;
+	u16 eprom_versions[VXGE_HW_MAX_ROM_IMAGES];
 };
 
 #define VXGE_HW_INFO_LEN	64
@@ -2032,7 +2082,22 @@ __vxge_hw_device_is_privilaged(u32 host_type, u32 func_id);
 #define VXGE_HW_MIN_SUCCESSIVE_IDLE_COUNT 5
 #define VXGE_HW_MAX_POLLING_COUNT 100
 
-int vxge_hw_vpath_wait_receive_idle(struct __vxge_hw_device *hldev, u32 vp_id);
+void
+vxge_hw_device_wait_receive_idle(struct __vxge_hw_device *hldev);
+
+enum vxge_hw_status
+vxge_hw_upgrade_read_version(struct __vxge_hw_device *hldev, u32 *major,
+			     u32 *minor, u32 *build);
+
+enum vxge_hw_status vxge_hw_flash_fw(struct __vxge_hw_device *hldev);
 
-void  vxge_hw_device_wait_receive_idle(struct __vxge_hw_device *hldev);
+enum vxge_hw_status
+vxge_update_fw_image(struct __vxge_hw_device *hldev, const u8 *filebuf,
+		     int size);
+
+enum vxge_hw_status
+vxge_hw_vpath_eprom_img_ver_get(struct __vxge_hw_device *hldev,
+				struct eprom_image *eprom_image_data);
+
+int vxge_hw_vpath_wait_receive_idle(struct __vxge_hw_device *hldev, u32 vp_id);
 #endif
diff --git a/drivers/net/vxge/vxge-ethtool.c b/drivers/net/vxge/vxge-ethtool.c
index f8fd8da..3d2cd6a 100644
--- a/drivers/net/vxge/vxge-ethtool.c
+++ b/drivers/net/vxge/vxge-ethtool.c
@@ -1153,6 +1153,25 @@ static int vxge_set_flags(struct net_device *dev, u32 data)
 	return 0;
 }
 
+static int vxge_fw_flash(struct net_device *dev, struct ethtool_flash *parms)
+{
+	struct vxgedev *vdev = (struct vxgedev *)netdev_priv(dev);
+
+	if (vdev->max_vpath_supported != VXGE_HW_MAX_VIRTUAL_PATHS) {
+		printk(KERN_INFO "Single Function Mode is required to flash the"
+		       " firmware\n");
+		return -EINVAL;
+	}
+
+	if (netif_running(dev)) {
+		printk(KERN_INFO "Interface %s must be down to flash the "
+		       "firmware\n", dev->name);
+		return -EBUSY;
+	}
+
+	return vxge_fw_upgrade(vdev, parms->data, 1);
+}
+
 static const struct ethtool_ops vxge_ethtool_ops = {
 	.get_settings		= vxge_ethtool_gset,
 	.set_settings		= vxge_ethtool_sset,
@@ -1175,6 +1194,7 @@ static const struct ethtool_ops vxge_ethtool_ops = {
 	.get_sset_count		= vxge_ethtool_get_sset_count,
 	.get_ethtool_stats	= vxge_get_ethtool_stats,
 	.set_flags		= vxge_set_flags,
+	.flash_device		= vxge_fw_flash,
 };
 
 void vxge_initialize_ethtool_ops(struct net_device *ndev)
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 53db6a4..10549bd 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -50,6 +50,7 @@
 #include <net/ip.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/firmware.h>
 #include "vxge-main.h"
 #include "vxge-reg.h"
 
@@ -3248,6 +3249,7 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
 		"%s: Ethernet device registered",
 		ndev->name);
 
+	hldev->ndev = ndev;
 	*vdev_out = vdev;
 
 	/* Resetting the Device stats */
@@ -3935,6 +3937,142 @@ static inline u32 vxge_get_num_vfs(u64 function_mode)
 	return num_functions;
 }
 
+int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override)
+{
+	struct __vxge_hw_device *hldev = vdev->devh;
+	u32 maj, min, bld, cmaj, cmin, cbld;
+	enum vxge_hw_status status;
+	const struct firmware *fw;
+	int ret;
+
+	ret = request_firmware(&fw, fw_name, &vdev->pdev->dev);
+	if (ret) {
+		vxge_debug_init(VXGE_ERR, "%s: Firmware file '%s' not found",
+				VXGE_DRIVER_NAME, fw_name);
+		goto out;
+	}
+
+	/* Load the new firmware onto the adapter */
+	status = vxge_update_fw_image(hldev, fw->data, fw->size);
+	if (status != VXGE_HW_OK) {
+		vxge_debug_init(VXGE_ERR,
+				"%s: FW image download to adapter failed '%s'.",
+				VXGE_DRIVER_NAME, fw_name);
+		ret = -EIO;
+		goto out;
+	}
+
+	/* Read the version of the new firmware */
+	status = vxge_hw_upgrade_read_version(hldev, &maj, &min, &bld);
+	if (status != VXGE_HW_OK) {
+		vxge_debug_init(VXGE_ERR,
+				"%s: Upgrade read version failed '%s'.",
+				VXGE_DRIVER_NAME, fw_name);
+		ret = -EIO;
+		goto out;
+	}
+
+	cmaj = vdev->config.device_hw_info.fw_version.major;
+	cmin = vdev->config.device_hw_info.fw_version.minor;
+	cbld = vdev->config.device_hw_info.fw_version.build;
+	/* It's possible the version in /lib/firmware is not the latest version.
+	 * If so, we could get into a loop of trying to upgrade to the latest
+	 * and flashing the older version.
+	 */
+	if (VXGE_FW_VER(maj, min, bld) == VXGE_FW_VER(cmaj, cmin, cbld) &&
+	    !override) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	printk(KERN_NOTICE "Upgrade to firmware version %d.%d.%d commencing\n",
+	       maj, min, bld);
+
+	/* Flash the adapter with the new firmware */
+	status = vxge_hw_flash_fw(hldev);
+	if (status != VXGE_HW_OK) {
+		vxge_debug_init(VXGE_ERR, "%s: Upgrade commit failed '%s'.",
+				VXGE_DRIVER_NAME, fw_name);
+		ret = -EIO;
+		goto out;
+	}
+
+	printk(KERN_NOTICE "Upgrade of firmware successful!  Adapter must be "
+	       "hard reset before using, thus requiring a system reboot or a "
+	       "hotplug event.\n");
+
+out:
+	return ret;
+}
+
+static int vxge_probe_fw_update(struct vxgedev *vdev)
+{
+	u32 maj, min, bld;
+	int ret, gpxe = 0;
+	char *fw_name;
+
+	maj = vdev->config.device_hw_info.fw_version.major;
+	min = vdev->config.device_hw_info.fw_version.minor;
+	bld = vdev->config.device_hw_info.fw_version.build;
+
+	if (VXGE_FW_VER(maj, min, bld) == VXGE_CERT_FW_VER)
+		return 0;
+
+	/* Ignore the build number when determining if the current firmware is
+	 * "too new" to load the driver
+	 */
+	if (VXGE_FW_VER(maj, min, 0) > VXGE_CERT_FW_VER) {
+		vxge_debug_init(VXGE_ERR, "%s: Firmware newer than last known "
+				"version, unable to load driver\n",
+				VXGE_DRIVER_NAME);
+		return -EINVAL;
+	}
+
+	/* Firmware 1.4.4 and older cannot be upgraded, and is too ancient to
+	 * work with this driver.
+	 */
+	if (VXGE_FW_VER(maj, min, bld) <= VXGE_FW_DEAD_VER) {
+		vxge_debug_init(VXGE_ERR, "%s: Firmware %d.%d.%d cannot be "
+				"upgraded\n", VXGE_DRIVER_NAME, maj, min, bld);
+		return -EINVAL;
+	}
+
+	/* If file not specified, determine gPXE or not */
+	if (VXGE_FW_VER(maj, min, bld) >= VXGE_EPROM_FW_VER) {
+		int i;
+		for (i = 0; i < VXGE_HW_MAX_ROM_IMAGES; i++)
+			if (vdev->devh->eprom_versions[i]) {
+				gpxe = 1;
+				break;
+			}
+	}
+	if (gpxe)
+		fw_name = "vxge/X3fw-pxe.ncf";
+	else
+		fw_name = "vxge/X3fw.ncf";
+
+	ret = vxge_fw_upgrade(vdev, fw_name, 0);
+	/* -EINVAL and -ENOENT are not fatal errors for flashing firmware on
+	 * probe, so ignore them
+	 */
+	if (ret != -EINVAL && ret != -ENOENT)
+		return -EIO;
+	else
+		ret = 0;
+
+	if (VXGE_FW_VER(VXGE_CERT_FW_VER_MAJOR, VXGE_CERT_FW_VER_MINOR, 0) >
+	    VXGE_FW_VER(maj, min, 0)) {
+		vxge_debug_init(VXGE_ERR, "%s: Firmware %d.%d.%d is too old to"
+				" be used with this driver.\n"
+				"Please get the latest version from "
+				"ftp://ftp.s2io.com/pub/X3100-Drivers/FIRMWARE",
+				VXGE_DRIVER_NAME, maj, min, bld);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
 /**
  * vxge_probe
  * @pdev : structure containing the PCI related information of the device.
@@ -4093,16 +4231,6 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
 		goto _exit3;
 	}
 
-	if (ll_config->device_hw_info.fw_version.major !=
-		VXGE_DRIVER_FW_VERSION_MAJOR) {
-		vxge_debug_init(VXGE_ERR,
-			"%s: Incorrect firmware version."
-			"Please upgrade the firmware to version 1.x.x",
-			VXGE_DRIVER_NAME);
-		ret = -EINVAL;
-		goto _exit3;
-	}
-
 	vpath_mask = ll_config->device_hw_info.vpath_mask;
 	if (vpath_mask == 0) {
 		vxge_debug_ll_config(VXGE_TRACE,
@@ -4166,6 +4294,32 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
 			goto _exit3;
 	}
 
+	if (VXGE_FW_VER(ll_config->device_hw_info.fw_version.major,
+			ll_config->device_hw_info.fw_version.minor,
+			ll_config->device_hw_info.fw_version.build) >=
+	    VXGE_EPROM_FW_VER) {
+		struct eprom_image img[VXGE_HW_MAX_ROM_IMAGES];
+
+		status = vxge_hw_vpath_eprom_img_ver_get(hldev, img);
+		if (status != VXGE_HW_OK) {
+			vxge_debug_init(VXGE_ERR, "%s: Reading of EPROM failed",
+					VXGE_DRIVER_NAME);
+			/* This is a non-fatal error, continue */
+		}
+
+		for (i = 0; i < VXGE_HW_MAX_ROM_IMAGES; i++) {
+			hldev->eprom_versions[i] = img[i].version;
+			if (!img[i].is_valid)
+				break;
+			vxge_debug_init(VXGE_TRACE, "%s: EPROM %d, version "
+					"%d.%d.%d.%d\n", VXGE_DRIVER_NAME, i,
+					VXGE_EPROM_IMG_MAJOR(img[i].version),
+					VXGE_EPROM_IMG_MINOR(img[i].version),
+					VXGE_EPROM_IMG_FIX(img[i].version),
+					VXGE_EPROM_IMG_BUILD(img[i].version));
+		}
+	}
+
 	/* if FCS stripping is not disabled in MAC fail driver load */
 	if (vxge_hw_vpath_strip_fcs_check(hldev, vpath_mask) != VXGE_HW_OK) {
 		vxge_debug_init(VXGE_ERR,
@@ -4194,18 +4348,22 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
 	ll_config->tx_pause_enable = VXGE_PAUSE_CTRL_ENABLE;
 	ll_config->rx_pause_enable = VXGE_PAUSE_CTRL_ENABLE;
 
-	if (vxge_device_register(hldev, ll_config, high_dma, no_of_vpath,
-		&vdev)) {
+	ret = vxge_device_register(hldev, ll_config, high_dma, no_of_vpath,
+				   &vdev);
+	if (ret) {
 		ret = -EINVAL;
 		goto _exit4;
 	}
 
+	ret = vxge_probe_fw_update(vdev);
+	if (ret)
+		goto _exit5;
+
 	vxge_hw_device_debug_set(hldev, VXGE_TRACE, VXGE_COMPONENT_LL);
 	VXGE_COPY_DEBUG_INFO_TO_LL(vdev, vxge_hw_device_error_level_get(hldev),
 		vxge_hw_device_trace_level_get(hldev));
 
 	/* set private HW device info */
-	hldev->ndev = vdev->ndev;
 	vdev->mtu = VXGE_HW_DEFAULT_MTU;
 	vdev->bar0 = attr.bar0;
 	vdev->max_vpath_supported = max_vpath_supported;
@@ -4307,7 +4465,7 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
 				"%s: mac_addr_list : memory allocation failed",
 				vdev->ndev->name);
 			ret = -EPERM;
-			goto _exit5;
+			goto _exit6;
 		}
 		macaddr = (u8 *)&entry->macaddr;
 		memcpy(macaddr, vdev->ndev->dev_addr, ETH_ALEN);
@@ -4347,10 +4505,10 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
 	kfree(ll_config);
 	return 0;
 
-_exit5:
+_exit6:
 	for (i = 0; i < vdev->no_of_vpath; i++)
 		vxge_free_mac_add_list(&vdev->vpaths[i]);
-
+_exit5:
 	vxge_device_unregister(hldev);
 _exit4:
 	pci_disable_sriov(pdev);
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h
index 3845e62..1699d75 100644
--- a/drivers/net/vxge/vxge-main.h
+++ b/drivers/net/vxge/vxge-main.h
@@ -397,6 +397,8 @@ struct vxge_tx_priv {
 extern void vxge_initialize_ethtool_ops(struct net_device *ndev);
 enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev);
 
+int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override);
+
 /**
  * #define VXGE_DEBUG_INIT: debug for initialization functions
  * #define VXGE_DEBUG_TX	 : debug transmit related functions
diff --git a/drivers/net/vxge/vxge-reg.h b/drivers/net/vxge/vxge-reg.h
index 93fd752..0df52db 100644
--- a/drivers/net/vxge/vxge-reg.h
+++ b/drivers/net/vxge/vxge-reg.h
@@ -49,6 +49,30 @@
 #define VXGE_HW_TITAN_VPMGMT_REG_SPACES			17
 #define VXGE_HW_TITAN_VPATH_REG_SPACES			17
 
+#define VXGE_HW_FW_API_GET_EPROM_REV			31
+
+#define VXGE_EPROM_IMG_MAJOR(val)		(u32) vxge_bVALn(val, 48, 4)
+#define VXGE_EPROM_IMG_MINOR(val)		(u32) vxge_bVALn(val, 52, 4)
+#define VXGE_EPROM_IMG_FIX(val)			(u32) vxge_bVALn(val, 56, 4)
+#define VXGE_EPROM_IMG_BUILD(val)		(u32) vxge_bVALn(val, 60, 4)
+
+#define VXGE_HW_GET_EPROM_IMAGE_INDEX(val)		vxge_bVALn(val, 16, 8)
+#define VXGE_HW_GET_EPROM_IMAGE_VALID(val)		vxge_bVALn(val, 31, 1)
+#define VXGE_HW_GET_EPROM_IMAGE_TYPE(val)		vxge_bVALn(val, 40, 8)
+#define VXGE_HW_GET_EPROM_IMAGE_REV(val)		vxge_bVALn(val, 48, 16)
+#define VXGE_HW_RTS_ACCESS_STEER_ROM_IMAGE_INDEX(val)	vxge_vBIT(val, 16, 8)
+
+#define VXGE_HW_FW_UPGRADE_MEMO				13
+#define VXGE_HW_FW_UPGRADE_ACTION			16
+#define VXGE_HW_FW_UPGRADE_OFFSET_START			2
+#define VXGE_HW_FW_UPGRADE_OFFSET_SEND			3
+#define VXGE_HW_FW_UPGRADE_OFFSET_COMMIT		4
+#define VXGE_HW_FW_UPGRADE_OFFSET_READ			5
+
+#define VXGE_HW_FW_UPGRADE_BLK_SIZE			16
+#define VXGE_HW_UPGRADE_GET_RET_ERR_CODE(val)		(val & 0xff)
+#define VXGE_HW_UPGRADE_GET_SEC_ERR_CODE(val)		((val >> 8) & 0xff)
+
 #define VXGE_HW_ASIC_MODE_RESERVED				0
 #define VXGE_HW_ASIC_MODE_NO_IOV				1
 #define VXGE_HW_ASIC_MODE_SR_IOV				2
@@ -165,13 +189,13 @@
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_ETYPE		2
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_PN		3
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_GEN_CFG	5
-#define	VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_SOLO_IT	6
+#define	VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_SOLO_IT		6
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_JHASH_CFG	7
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MASK		8
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_KEY		9
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_QOS		10
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_DS		11
-#define	VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MULTI_IT	12
+#define	VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MULTI_IT		12
 #define	VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO		13
 
 #define	VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_DA_MAC_ADDR(bits) \
@@ -437,6 +461,7 @@
 #define VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_BUILD(bits) \
 							vxge_bVALn(bits, 48, 16)
 #define VXGE_HW_RTS_ACCESS_STEER_DATA1_FLASH_VER_BUILD vxge_vBIT(val, 48, 16)
+#define VXGE_HW_RTS_ACCESS_STEER_CTRL_GET_ACTION(bits) vxge_bVALn(bits, 0, 8)
 
 #define	VXGE_HW_SRPCIM_TO_VPATH_ALARM_REG_GET_PPIF_SRPCIM_TO_VPATH_ALARM(bits)\
 							vxge_bVALn(bits, 0, 18)
diff --git a/drivers/net/vxge/vxge-version.h b/drivers/net/vxge/vxge-version.h
index 53fefe1..b4eced1 100644
--- a/drivers/net/vxge/vxge-version.h
+++ b/drivers/net/vxge/vxge-version.h
@@ -19,4 +19,31 @@
 #define VXGE_VERSION_FIX	"9"
 #define VXGE_VERSION_BUILD	"20840"
 #define VXGE_VERSION_FOR	"k"
+
+#define VXGE_FW_VER(maj, min, bld) (((maj) << 16) + ((min) << 8) + (bld))
+
+#define VXGE_DEAD_FW_VER_MAJOR	1
+#define VXGE_DEAD_FW_VER_MINOR	4
+#define VXGE_DEAD_FW_VER_BUILD	4
+
+#define VXGE_FW_DEAD_VER VXGE_FW_VER(VXGE_DEAD_FW_VER_MAJOR, \
+				     VXGE_DEAD_FW_VER_MINOR, \
+				     VXGE_DEAD_FW_VER_BUILD)
+
+#define VXGE_EPROM_FW_VER_MAJOR	1
+#define VXGE_EPROM_FW_VER_MINOR	6
+#define VXGE_EPROM_FW_VER_BUILD	1
+
+#define VXGE_EPROM_FW_VER VXGE_FW_VER(VXGE_EPROM_FW_VER_MAJOR, \
+				      VXGE_EPROM_FW_VER_MINOR, \
+				      VXGE_EPROM_FW_VER_BUILD)
+
+#define VXGE_CERT_FW_VER_MAJOR	1
+#define VXGE_CERT_FW_VER_MINOR	8
+#define VXGE_CERT_FW_VER_BUILD	1
+
+#define VXGE_CERT_FW_VER VXGE_FW_VER(VXGE_CERT_FW_VER_MAJOR, \
+				     VXGE_CERT_FW_VER_MINOR, \
+				     VXGE_CERT_FW_VER_BUILD)
+
 #endif
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 04/12] vxge: serialize access to steering control register
From: Jon Mason @ 2010-11-11 14:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

It is possible for multiple callers to access the firmware interface for
the same vpath simultaneously, resulting in uncertain output.  Add locks
to serialize access.  Also, make functions only accessed locally static,
thus requiring some movement of code blocks.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-config.c |  807 ++++++++++++++++------------------------
 drivers/net/vxge/vxge-config.h |    1 +
 2 files changed, 325 insertions(+), 483 deletions(-)

diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
index 212e301..c822463 100644
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -19,6 +19,7 @@
 
 #include "vxge-traffic.h"
 #include "vxge-config.h"
+#include "vxge-main.h"
 
 static enum vxge_hw_status
 __vxge_hw_fifo_create(
@@ -104,12 +105,6 @@ static void
 __vxge_hw_device_host_info_get(struct __vxge_hw_device *hldev);
 
 static enum vxge_hw_status
-__vxge_hw_vpath_card_info_get(
-	u32 vp_id,
-	struct vxge_hw_vpath_reg __iomem *vpath_reg,
-	struct vxge_hw_device_hw_info *hw_info);
-
-static enum vxge_hw_status
 __vxge_hw_device_initialize(struct __vxge_hw_device *hldev);
 
 static void
@@ -153,17 +148,6 @@ vxge_hw_vpath_stats_enable(struct __vxge_hw_vpath_handle *vpath_handle);
 static enum vxge_hw_status
 __vxge_hw_legacy_swapper_set(struct vxge_hw_legacy_reg __iomem *legacy_reg);
 
-static u64
-__vxge_hw_vpath_pci_func_mode_get(u32  vp_id,
-				  struct vxge_hw_vpath_reg __iomem *vpath_reg);
-
-static u32
-__vxge_hw_vpath_func_id_get(u32 vp_id, struct vxge_hw_vpmgmt_reg __iomem *vpmgmt_reg);
-
-static enum vxge_hw_status
-__vxge_hw_vpath_addr_get(u32 vp_id, struct vxge_hw_vpath_reg __iomem *vpath_reg,
-			 u8 (macaddr)[ETH_ALEN], u8 (macaddr_mask)[ETH_ALEN]);
-
 static enum vxge_hw_status
 __vxge_hw_vpath_reset_check(struct __vxge_hw_virtualpath *vpath);
 
@@ -171,9 +155,6 @@ __vxge_hw_vpath_reset_check(struct __vxge_hw_virtualpath *vpath);
 static enum vxge_hw_status
 __vxge_hw_vpath_sw_reset(struct __vxge_hw_device *devh, u32 vp_id);
 
-static enum vxge_hw_status
-__vxge_hw_vpath_fw_ver_get(u32 vp_id, struct vxge_hw_vpath_reg __iomem *vpath_reg,
-			   struct vxge_hw_device_hw_info *hw_info);
 
 static enum vxge_hw_status
 __vxge_hw_vpath_mac_configure(struct __vxge_hw_device *devh, u32 vp_id);
@@ -275,6 +256,72 @@ void vxge_hw_device_wait_receive_idle(struct __vxge_hw_device *hldev)
 	}
 }
 
+static enum vxge_hw_status
+vxge_hw_vpath_fw_api(struct __vxge_hw_virtualpath *vpath, u32 action,
+		     u32 fw_memo, u32 offset, u64 *data0, u64 *data1,
+		     u64 *steer_ctrl)
+{
+	struct vxge_hw_vpath_reg __iomem *vp_reg;
+	enum vxge_hw_status status;
+	u64 val64;
+	u32 retry = 0, max_retry = 100;
+
+	vp_reg = vpath->vp_reg;
+
+	if (vpath->vp_open) {
+		max_retry = 3;
+		spin_lock(&vpath->lock);
+	}
+
+	writeq(*data0, &vp_reg->rts_access_steer_data0);
+	writeq(*data1, &vp_reg->rts_access_steer_data1);
+	wmb();
+
+	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(action) |
+		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(fw_memo) |
+		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(offset) |
+		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
+		*steer_ctrl;
+
+	status = __vxge_hw_pio_mem_write64(val64,
+					   &vp_reg->rts_access_steer_ctrl,
+					   VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
+					   VXGE_HW_DEF_DEVICE_POLL_MILLIS);
+
+	/* The __vxge_hw_device_register_poll can udelay for a significant
+	 * amount of time, blocking other proccess from the CPU.  If it delays
+	 * for ~5secs, a NMI error can occur.  A way around this is to give up
+	 * the processor via msleep, but this is not allowed is under lock.
+	 * So, only allow it to sleep for ~4secs if open.  Otherwise, delay for
+	 * 1sec and sleep for 10ms until the firmware operation has completed
+	 * or timed-out.
+	 */
+	while ((status != VXGE_HW_OK) && retry++ < max_retry) {
+		if (!vpath->vp_open)
+			msleep(20);
+		status = __vxge_hw_device_register_poll(
+					&vp_reg->rts_access_steer_ctrl,
+					VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
+					VXGE_HW_DEF_DEVICE_POLL_MILLIS);
+	}
+
+	if (status != VXGE_HW_OK)
+		goto out;
+
+	val64 = readq(&vp_reg->rts_access_steer_ctrl);
+	if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS) {
+		*data0 = readq(&vp_reg->rts_access_steer_data0);
+		*data1 = readq(&vp_reg->rts_access_steer_data1);
+		*steer_ctrl = val64;
+	} else
+		status = VXGE_HW_FAIL;
+
+out:
+	if (vpath->vp_open)
+		spin_unlock(&vpath->lock);
+	return status;
+}
+
 /*
  * __vxge_hw_channel_allocate - Allocate memory for channel
  * This function allocates required memory for the channel and various arrays
@@ -650,10 +697,25 @@ __vxge_hw_device_is_privilaged(u32 host_type, u32 func_id)
 }
 
 /*
+ * __vxge_hw_vpath_func_id_get - Get the function id of the vpath.
+ * Returns the function number of the vpath.
+ */
+static u32
+__vxge_hw_vpath_func_id_get(struct vxge_hw_vpmgmt_reg __iomem *vpmgmt_reg)
+{
+	u64 val64;
+
+	val64 = readq(&vpmgmt_reg->vpath_to_func_map_cfg1);
+
+	return
+	 (u32)VXGE_HW_VPATH_TO_FUNC_MAP_CFG1_GET_VPATH_TO_FUNC_MAP_CFG1(val64);
+}
+
+/*
  * __vxge_hw_device_host_info_get
  * This routine returns the host type assignments
  */
-void __vxge_hw_device_host_info_get(struct __vxge_hw_device *hldev)
+static void __vxge_hw_device_host_info_get(struct __vxge_hw_device *hldev)
 {
 	u64 val64;
 	u32 i;
@@ -666,16 +728,18 @@ void __vxge_hw_device_host_info_get(struct __vxge_hw_device *hldev)
 	hldev->vpath_assignments = readq(&hldev->common_reg->vpath_assignments);
 
 	for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++) {
-
 		if (!(hldev->vpath_assignments & vxge_mBIT(i)))
 			continue;
 
 		hldev->func_id =
-			__vxge_hw_vpath_func_id_get(i, hldev->vpmgmt_reg[i]);
+			__vxge_hw_vpath_func_id_get(hldev->vpmgmt_reg[i]);
 
 		hldev->access_rights = __vxge_hw_device_access_rights_get(
 			hldev->host_type, hldev->func_id);
 
+		hldev->virtual_paths[i].vp_open = VXGE_HW_VP_NOT_OPEN;
+		hldev->virtual_paths[i].vp_reg = hldev->vpath_reg[i];
+
 		hldev->first_vp_id = i;
 		break;
 	}
@@ -732,6 +796,192 @@ exit:
 	return status;
 }
 
+/*
+ * __vxge_hw_vpath_fw_ver_get - Get the fw version
+ * Returns FW Version
+ */
+static enum vxge_hw_status
+__vxge_hw_vpath_fw_ver_get(struct __vxge_hw_virtualpath *vpath,
+			   struct vxge_hw_device_hw_info *hw_info)
+{
+	struct vxge_hw_device_version *fw_version = &hw_info->fw_version;
+	struct vxge_hw_device_date *fw_date = &hw_info->fw_date;
+	struct vxge_hw_device_version *flash_version = &hw_info->flash_version;
+	struct vxge_hw_device_date *flash_date = &hw_info->flash_date;
+	u64 data0, data1 = 0, steer_ctrl = 0;
+	enum vxge_hw_status status;
+
+	status = vxge_hw_vpath_fw_api(vpath,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_ENTRY,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
+			0, &data0, &data1, &steer_ctrl);
+	if (status != VXGE_HW_OK)
+		goto exit;
+
+	fw_date->day =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_DAY(data0);
+	fw_date->month =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_MONTH(data0);
+	fw_date->year =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_YEAR(data0);
+
+	snprintf(fw_date->date, VXGE_HW_FW_STRLEN, "%2.2d/%2.2d/%4.4d",
+		 fw_date->month, fw_date->day, fw_date->year);
+
+	fw_version->major =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_MAJOR(data0);
+	fw_version->minor =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_MINOR(data0);
+	fw_version->build =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_BUILD(data0);
+
+	snprintf(fw_version->version, VXGE_HW_FW_STRLEN, "%d.%d.%d",
+		 fw_version->major, fw_version->minor, fw_version->build);
+
+	flash_date->day =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_DAY(data1);
+	flash_date->month =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_MONTH(data1);
+	flash_date->year =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_YEAR(data1);
+
+	snprintf(flash_date->date, VXGE_HW_FW_STRLEN, "%2.2d/%2.2d/%4.4d",
+		 flash_date->month, flash_date->day, flash_date->year);
+
+	flash_version->major =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_MAJOR(data1);
+	flash_version->minor =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_MINOR(data1);
+	flash_version->build =
+	    (u32) VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_BUILD(data1);
+
+	snprintf(flash_version->version, VXGE_HW_FW_STRLEN, "%d.%d.%d",
+		 flash_version->major, flash_version->minor,
+		 flash_version->build);
+
+exit:
+	return status;
+}
+
+/*
+ * __vxge_hw_vpath_card_info_get - Get the serial numbers,
+ * part number and product description.
+ */
+static enum vxge_hw_status
+__vxge_hw_vpath_card_info_get(struct __vxge_hw_virtualpath *vpath,
+			      struct vxge_hw_device_hw_info *hw_info)
+{
+	enum vxge_hw_status status;
+	u64 data0, data1 = 0, steer_ctrl = 0;
+	u8 *serial_number = hw_info->serial_number;
+	u8 *part_number = hw_info->part_number;
+	u8 *product_desc = hw_info->product_desc;
+	u32 i, j = 0;
+
+	data0 = VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_SERIAL_NUMBER;
+
+	status = vxge_hw_vpath_fw_api(vpath,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
+			0, &data0, &data1, &steer_ctrl);
+	if (status != VXGE_HW_OK)
+		return status;
+
+	((u64 *)serial_number)[0] = be64_to_cpu(data0);
+	((u64 *)serial_number)[1] = be64_to_cpu(data1);
+
+	data0 = VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_PART_NUMBER;
+	data1 = steer_ctrl = 0;
+
+	status = vxge_hw_vpath_fw_api(vpath,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
+			0, &data0, &data1, &steer_ctrl);
+	if (status != VXGE_HW_OK)
+		return status;
+
+	((u64 *)part_number)[0] = be64_to_cpu(data0);
+	((u64 *)part_number)[1] = be64_to_cpu(data1);
+
+	for (i = VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_DESC_0;
+	     i <= VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_DESC_3; i++) {
+		data0 = i;
+		data1 = steer_ctrl = 0;
+
+		status = vxge_hw_vpath_fw_api(vpath,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
+			0, &data0, &data1, &steer_ctrl);
+		if (status != VXGE_HW_OK)
+			return status;
+
+		((u64 *)product_desc)[j++] = be64_to_cpu(data0);
+		((u64 *)product_desc)[j++] = be64_to_cpu(data1);
+	}
+
+	return status;
+}
+
+/*
+ * __vxge_hw_vpath_pci_func_mode_get - Get the pci mode
+ * Returns pci function mode
+ */
+static u64
+__vxge_hw_vpath_pci_func_mode_get(struct __vxge_hw_virtualpath *vpath)
+{
+	u64 data0, data1 = 0, steer_ctrl = 0;
+	enum vxge_hw_status status;
+
+	data0 = VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_PCI_MODE;
+
+	status = vxge_hw_vpath_fw_api(vpath,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
+			0, &data0, &data1, &steer_ctrl);
+
+	return data0;
+}
+
+/*
+ * __vxge_hw_vpath_addr_get - Get the hw address entry for this vpath
+ *               from MAC address table.
+ */
+static enum vxge_hw_status
+__vxge_hw_vpath_addr_get(struct __vxge_hw_virtualpath *vpath,
+			 u8 *macaddr, u8 *macaddr_mask)
+{
+	u64 action = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_LIST_FIRST_ENTRY,
+	    data0 = 0, data1 = 0, steer_ctrl = 0;
+	enum vxge_hw_status status;
+	int i;
+
+	do {
+		status = vxge_hw_vpath_fw_api(vpath, action,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_DA,
+			0, &data0, &data1, &steer_ctrl);
+		if (status != VXGE_HW_OK)
+			goto exit;
+
+		data0 = VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_DA_MAC_ADDR(data0);
+		data1 = VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_DA_MAC_ADDR_MASK(
+									data1);
+
+		for (i = ETH_ALEN; i > 0; i--) {
+			macaddr[i - 1] = (u8) (data0 & 0xFF);
+			data0 >>= 8;
+
+			macaddr_mask[i - 1] = (u8) (data1 & 0xFF);
+			data1 >>= 8;
+		}
+
+		action = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_LIST_NEXT_ENTRY;
+		data0 = 0, data1 = 0, steer_ctrl = 0;
+
+	} while (!is_valid_ether_addr(macaddr));
+exit:
+	return status;
+}
+
 /**
  * vxge_hw_device_hw_info_get - Get the hw information
  * Returns the vpath mask that has the bits set for each vpath allocated
@@ -747,9 +997,9 @@ vxge_hw_device_hw_info_get(void __iomem *bar0,
 	struct vxge_hw_toc_reg __iomem *toc;
 	struct vxge_hw_mrpcim_reg __iomem *mrpcim_reg;
 	struct vxge_hw_common_reg __iomem *common_reg;
-	struct vxge_hw_vpath_reg __iomem *vpath_reg;
 	struct vxge_hw_vpmgmt_reg __iomem *vpmgmt_reg;
 	enum vxge_hw_status status;
+	struct __vxge_hw_virtualpath vpath;
 
 	memset(hw_info, 0, sizeof(struct vxge_hw_device_hw_info));
 
@@ -784,7 +1034,7 @@ vxge_hw_device_hw_info_get(void __iomem *bar0,
 		vpmgmt_reg = (struct vxge_hw_vpmgmt_reg __iomem *)
 				(bar0 + val64);
 
-		hw_info->func_id = __vxge_hw_vpath_func_id_get(i, vpmgmt_reg);
+		hw_info->func_id = __vxge_hw_vpath_func_id_get(vpmgmt_reg);
 		if (__vxge_hw_device_access_rights_get(hw_info->host_type,
 			hw_info->func_id) &
 			VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM) {
@@ -800,16 +1050,18 @@ vxge_hw_device_hw_info_get(void __iomem *bar0,
 
 		val64 = readq(&toc->toc_vpath_pointer[i]);
 
-		vpath_reg = (struct vxge_hw_vpath_reg __iomem *)(bar0 + val64);
+		vpath.vp_reg = (struct vxge_hw_vpath_reg __iomem *)
+			       (bar0 + val64);
+		vpath.vp_open = 0;
 
 		hw_info->function_mode =
-			__vxge_hw_vpath_pci_func_mode_get(i, vpath_reg);
+			__vxge_hw_vpath_pci_func_mode_get(&vpath);
 
-		status = __vxge_hw_vpath_fw_ver_get(i, vpath_reg, hw_info);
+		status = __vxge_hw_vpath_fw_ver_get(&vpath, hw_info);
 		if (status != VXGE_HW_OK)
 			goto exit;
 
-		status = __vxge_hw_vpath_card_info_get(i, vpath_reg, hw_info);
+		status = __vxge_hw_vpath_card_info_get(&vpath, hw_info);
 		if (status != VXGE_HW_OK)
 			goto exit;
 
@@ -817,14 +1069,15 @@ vxge_hw_device_hw_info_get(void __iomem *bar0,
 	}
 
 	for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++) {
-
 		if (!((hw_info->vpath_mask) & vxge_mBIT(i)))
 			continue;
 
 		val64 = readq(&toc->toc_vpath_pointer[i]);
-		vpath_reg = (struct vxge_hw_vpath_reg __iomem *)(bar0 + val64);
+		vpath.vp_reg = (struct vxge_hw_vpath_reg __iomem *)
+			       (bar0 + val64);
+		vpath.vp_open = 0;
 
-		status =  __vxge_hw_vpath_addr_get(i, vpath_reg,
+		status =  __vxge_hw_vpath_addr_get(&vpath,
 				hw_info->mac_addrs[i],
 				hw_info->mac_addr_masks[i]);
 		if (status != VXGE_HW_OK)
@@ -896,7 +1149,6 @@ vxge_hw_device_initialize(
 	nblocks++;
 
 	for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++) {
-
 		if (!(hldev->vpath_assignments & vxge_mBIT(i)))
 			continue;
 
@@ -921,7 +1173,6 @@ vxge_hw_device_initialize(
 	}
 
 	status = __vxge_hw_device_initialize(hldev);
-
 	if (status != VXGE_HW_OK) {
 		vxge_hw_device_terminate(hldev);
 		goto exit;
@@ -958,7 +1209,6 @@ vxge_hw_device_stats_get(struct __vxge_hw_device *hldev,
 	enum vxge_hw_status status = VXGE_HW_OK;
 
 	for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++) {
-
 		if (!(hldev->vpaths_deployed & vxge_mBIT(i)) ||
 			(hldev->virtual_paths[i].vp_open ==
 				VXGE_HW_VP_NOT_OPEN))
@@ -2755,297 +3005,6 @@ exit:
 	return status;
 }
 
-/*
- * __vxge_hw_vpath_func_id_get - Get the function id of the vpath.
- * Returns the function number of the vpath.
- */
-static u32
-__vxge_hw_vpath_func_id_get(u32 vp_id,
-	struct vxge_hw_vpmgmt_reg __iomem *vpmgmt_reg)
-{
-	u64 val64;
-
-	val64 = readq(&vpmgmt_reg->vpath_to_func_map_cfg1);
-
-	return
-	 (u32)VXGE_HW_VPATH_TO_FUNC_MAP_CFG1_GET_VPATH_TO_FUNC_MAP_CFG1(val64);
-}
-
-/*
- * __vxge_hw_read_rts_ds - Program RTS steering critieria
- */
-static inline void
-__vxge_hw_read_rts_ds(struct vxge_hw_vpath_reg __iomem *vpath_reg,
-		      u64 dta_struct_sel)
-{
-	writeq(0, &vpath_reg->rts_access_steer_ctrl);
-	wmb();
-	writeq(dta_struct_sel, &vpath_reg->rts_access_steer_data0);
-	writeq(0, &vpath_reg->rts_access_steer_data1);
-	wmb();
-}
-
-
-/*
- * __vxge_hw_vpath_card_info_get - Get the serial numbers,
- * part number and product description.
- */
-static enum vxge_hw_status
-__vxge_hw_vpath_card_info_get(
-	u32 vp_id,
-	struct vxge_hw_vpath_reg __iomem *vpath_reg,
-	struct vxge_hw_device_hw_info *hw_info)
-{
-	u32 i, j;
-	u64 val64;
-	u64 data1 = 0ULL;
-	u64 data2 = 0ULL;
-	enum vxge_hw_status status = VXGE_HW_OK;
-	u8 *serial_number = hw_info->serial_number;
-	u8 *part_number = hw_info->part_number;
-	u8 *product_desc = hw_info->product_desc;
-
-	__vxge_hw_read_rts_ds(vpath_reg,
-		VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_SERIAL_NUMBER);
-
-	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(0);
-
-	status = __vxge_hw_pio_mem_write64(val64,
-				&vpath_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				VXGE_HW_DEF_DEVICE_POLL_MILLIS);
-
-	if (status != VXGE_HW_OK)
-		return status;
-
-	val64 = readq(&vpath_reg->rts_access_steer_ctrl);
-
-	if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS) {
-		data1 = readq(&vpath_reg->rts_access_steer_data0);
-		((u64 *)serial_number)[0] = be64_to_cpu(data1);
-
-		data2 = readq(&vpath_reg->rts_access_steer_data1);
-		((u64 *)serial_number)[1] = be64_to_cpu(data2);
-		status = VXGE_HW_OK;
-	} else
-		*serial_number = 0;
-
-	__vxge_hw_read_rts_ds(vpath_reg,
-			VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_PART_NUMBER);
-
-	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(0);
-
-	status = __vxge_hw_pio_mem_write64(val64,
-				&vpath_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				VXGE_HW_DEF_DEVICE_POLL_MILLIS);
-
-	if (status != VXGE_HW_OK)
-		return status;
-
-	val64 = readq(&vpath_reg->rts_access_steer_ctrl);
-
-	if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS) {
-
-		data1 = readq(&vpath_reg->rts_access_steer_data0);
-		((u64 *)part_number)[0] = be64_to_cpu(data1);
-
-		data2 = readq(&vpath_reg->rts_access_steer_data1);
-		((u64 *)part_number)[1] = be64_to_cpu(data2);
-
-		status = VXGE_HW_OK;
-
-	} else
-		*part_number = 0;
-
-	j = 0;
-
-	for (i = VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_DESC_0;
-	     i <= VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_DESC_3; i++) {
-
-		__vxge_hw_read_rts_ds(vpath_reg, i);
-
-		val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY) |
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO) |
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(0);
-
-		status = __vxge_hw_pio_mem_write64(val64,
-				&vpath_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				VXGE_HW_DEF_DEVICE_POLL_MILLIS);
-
-		if (status != VXGE_HW_OK)
-			return status;
-
-		val64 = readq(&vpath_reg->rts_access_steer_ctrl);
-
-		if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS) {
-
-			data1 = readq(&vpath_reg->rts_access_steer_data0);
-			((u64 *)product_desc)[j++] = be64_to_cpu(data1);
-
-			data2 = readq(&vpath_reg->rts_access_steer_data1);
-			((u64 *)product_desc)[j++] = be64_to_cpu(data2);
-
-			status = VXGE_HW_OK;
-		} else
-			*product_desc = 0;
-	}
-
-	return status;
-}
-
-/*
- * __vxge_hw_vpath_fw_ver_get - Get the fw version
- * Returns FW Version
- */
-static enum vxge_hw_status
-__vxge_hw_vpath_fw_ver_get(
-	u32 vp_id,
-	struct vxge_hw_vpath_reg __iomem *vpath_reg,
-	struct vxge_hw_device_hw_info *hw_info)
-{
-	u64 val64;
-	u64 data1 = 0ULL;
-	u64 data2 = 0ULL;
-	struct vxge_hw_device_version *fw_version = &hw_info->fw_version;
-	struct vxge_hw_device_date *fw_date = &hw_info->fw_date;
-	struct vxge_hw_device_version *flash_version = &hw_info->flash_version;
-	struct vxge_hw_device_date *flash_date = &hw_info->flash_date;
-	enum vxge_hw_status status = VXGE_HW_OK;
-
-	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_ENTRY) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(0);
-
-	status = __vxge_hw_pio_mem_write64(val64,
-				&vpath_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				VXGE_HW_DEF_DEVICE_POLL_MILLIS);
-
-	if (status != VXGE_HW_OK)
-		goto exit;
-
-	val64 = readq(&vpath_reg->rts_access_steer_ctrl);
-
-	if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS) {
-
-		data1 = readq(&vpath_reg->rts_access_steer_data0);
-		data2 = readq(&vpath_reg->rts_access_steer_data1);
-
-		fw_date->day =
-			(u32)VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_DAY(
-						data1);
-		fw_date->month =
-			(u32)VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_MONTH(
-						data1);
-		fw_date->year =
-			(u32)VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_YEAR(
-						data1);
-
-		snprintf(fw_date->date, VXGE_HW_FW_STRLEN, "%2.2d/%2.2d/%4.4d",
-			fw_date->month, fw_date->day, fw_date->year);
-
-		fw_version->major =
-		    (u32)VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_MAJOR(data1);
-		fw_version->minor =
-		    (u32)VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_MINOR(data1);
-		fw_version->build =
-		    (u32)VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_FW_VER_BUILD(data1);
-
-		snprintf(fw_version->version, VXGE_HW_FW_STRLEN, "%d.%d.%d",
-		    fw_version->major, fw_version->minor, fw_version->build);
-
-		flash_date->day =
-		  (u32)VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_DAY(data2);
-		flash_date->month =
-		 (u32)VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_MONTH(data2);
-		flash_date->year =
-		 (u32)VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_YEAR(data2);
-
-		snprintf(flash_date->date, VXGE_HW_FW_STRLEN,
-			"%2.2d/%2.2d/%4.4d",
-			flash_date->month, flash_date->day, flash_date->year);
-
-		flash_version->major =
-		 (u32)VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_MAJOR(data2);
-		flash_version->minor =
-		 (u32)VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_MINOR(data2);
-		flash_version->build =
-		 (u32)VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_FLASH_VER_BUILD(data2);
-
-		snprintf(flash_version->version, VXGE_HW_FW_STRLEN, "%d.%d.%d",
-			flash_version->major, flash_version->minor,
-			flash_version->build);
-
-		status = VXGE_HW_OK;
-
-	} else
-		status = VXGE_HW_FAIL;
-exit:
-	return status;
-}
-
-/*
- * __vxge_hw_vpath_pci_func_mode_get - Get the pci mode
- * Returns pci function mode
- */
-static u64
-__vxge_hw_vpath_pci_func_mode_get(
-	u32  vp_id,
-	struct vxge_hw_vpath_reg __iomem *vpath_reg)
-{
-	u64 val64;
-	u64 data1 = 0ULL;
-	enum vxge_hw_status status = VXGE_HW_OK;
-
-	__vxge_hw_read_rts_ds(vpath_reg,
-		VXGE_HW_RTS_ACCESS_STEER_DATA0_MEMO_ITEM_PCI_MODE);
-
-	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_MEMO_ENTRY) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(0);
-
-	status = __vxge_hw_pio_mem_write64(val64,
-				&vpath_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				VXGE_HW_DEF_DEVICE_POLL_MILLIS);
-
-	if (status != VXGE_HW_OK)
-		goto exit;
-
-	val64 = readq(&vpath_reg->rts_access_steer_ctrl);
-
-	if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS) {
-		data1 = readq(&vpath_reg->rts_access_steer_data0);
-		status = VXGE_HW_OK;
-	} else {
-		data1 = 0;
-		status = VXGE_HW_FAIL;
-	}
-exit:
-	return data1;
-}
-
 /**
  * vxge_hw_device_flick_link_led - Flick (blink) link LED.
  * @hldev: HW device.
@@ -3054,37 +3013,24 @@ exit:
  * Flicker the link LED.
  */
 enum vxge_hw_status
-vxge_hw_device_flick_link_led(struct __vxge_hw_device *hldev,
-			       u64 on_off)
+vxge_hw_device_flick_link_led(struct __vxge_hw_device *hldev, u64 on_off)
 {
-	u64 val64;
-	enum vxge_hw_status status = VXGE_HW_OK;
-	struct vxge_hw_vpath_reg __iomem *vp_reg;
+	struct __vxge_hw_virtualpath *vpath;
+	u64 data0, data1 = 0, steer_ctrl = 0;
+	enum vxge_hw_status status;
 
 	if (hldev == NULL) {
 		status = VXGE_HW_ERR_INVALID_DEVICE;
 		goto exit;
 	}
 
-	vp_reg = hldev->vpath_reg[hldev->first_vp_id];
+	vpath = &hldev->virtual_paths[hldev->first_vp_id];
 
-	writeq(0, &vp_reg->rts_access_steer_ctrl);
-	wmb();
-	writeq(on_off, &vp_reg->rts_access_steer_data0);
-	writeq(0, &vp_reg->rts_access_steer_data1);
-	wmb();
-
-	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_LED_CONTROL) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(
-			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(0);
-
-	status = __vxge_hw_pio_mem_write64(val64,
-				&vp_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				VXGE_HW_DEF_DEVICE_POLL_MILLIS);
+	data0 = on_off;
+	status = vxge_hw_vpath_fw_api(vpath,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_LED_CONTROL,
+			VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
+			0, &data0, &data1, &steer_ctrl);
 exit:
 	return status;
 }
@@ -3093,63 +3039,38 @@ exit:
  * __vxge_hw_vpath_rts_table_get - Get the entries from RTS access tables
  */
 enum vxge_hw_status
-__vxge_hw_vpath_rts_table_get(
-	struct __vxge_hw_vpath_handle *vp,
-	u32 action, u32 rts_table, u32 offset, u64 *data1, u64 *data2)
+__vxge_hw_vpath_rts_table_get(struct __vxge_hw_vpath_handle *vp,
+			      u32 action, u32 rts_table, u32 offset,
+			      u64 *data0, u64 *data1)
 {
-	u64 val64;
-	struct __vxge_hw_virtualpath *vpath;
-	struct vxge_hw_vpath_reg __iomem *vp_reg;
-
-	enum vxge_hw_status status = VXGE_HW_OK;
+	enum vxge_hw_status status;
+	u64 steer_ctrl = 0;
 
 	if (vp == NULL) {
 		status = VXGE_HW_ERR_INVALID_HANDLE;
 		goto exit;
 	}
 
-	vpath = vp->vpath;
-	vp_reg = vpath->vp_reg;
-
-	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(action) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(rts_table) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(offset);
-
 	if ((rts_table ==
-		VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_SOLO_IT) ||
+	     VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_SOLO_IT) ||
 	    (rts_table ==
-		VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MULTI_IT) ||
+	     VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MULTI_IT) ||
 	    (rts_table ==
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MASK) ||
+	     VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MASK) ||
 	    (rts_table ==
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_KEY)) {
-		val64 = val64 |	VXGE_HW_RTS_ACCESS_STEER_CTRL_TABLE_SEL;
+	     VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_KEY)) {
+		steer_ctrl = VXGE_HW_RTS_ACCESS_STEER_CTRL_TABLE_SEL;
 	}
 
-	status = __vxge_hw_pio_mem_write64(val64,
-				&vp_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				vpath->hldev->config.device_poll_millis);
-
+	status = vxge_hw_vpath_fw_api(vp->vpath, action, rts_table, offset,
+				      data0, data1, &steer_ctrl);
 	if (status != VXGE_HW_OK)
 		goto exit;
 
-	val64 = readq(&vp_reg->rts_access_steer_ctrl);
-
-	if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS) {
-
-		*data1 = readq(&vp_reg->rts_access_steer_data0);
-
-		if ((rts_table ==
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_DA) ||
-		(rts_table ==
-		VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MULTI_IT)) {
-			*data2 = readq(&vp_reg->rts_access_steer_data1);
-		}
-		status = VXGE_HW_OK;
-	} else
-		status = VXGE_HW_FAIL;
+	if ((rts_table != VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_DA) ||
+	    (rts_table !=
+	     VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MULTI_IT))
+		*data1 = 0;
 exit:
 	return status;
 }
@@ -3158,107 +3079,27 @@ exit:
  * __vxge_hw_vpath_rts_table_set - Set the entries of RTS access tables
  */
 enum vxge_hw_status
-__vxge_hw_vpath_rts_table_set(
-	struct __vxge_hw_vpath_handle *vp, u32 action, u32 rts_table,
-	u32 offset, u64 data1, u64 data2)
+__vxge_hw_vpath_rts_table_set(struct __vxge_hw_vpath_handle *vp, u32 action,
+			      u32 rts_table, u32 offset, u64 steer_data0,
+			      u64 steer_data1)
 {
-	u64 val64;
-	struct __vxge_hw_virtualpath *vpath;
-	enum vxge_hw_status status = VXGE_HW_OK;
-	struct vxge_hw_vpath_reg __iomem *vp_reg;
+	u64 data0, data1 = 0, steer_ctrl = 0;
+	enum vxge_hw_status status;
 
 	if (vp == NULL) {
 		status = VXGE_HW_ERR_INVALID_HANDLE;
 		goto exit;
 	}
 
-	vpath = vp->vpath;
-	vp_reg = vpath->vp_reg;
-
-	writeq(data1, &vp_reg->rts_access_steer_data0);
-	wmb();
+	data0 = steer_data0;
 
 	if ((rts_table == VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_DA) ||
 	    (rts_table ==
-		VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MULTI_IT)) {
-		writeq(data2, &vp_reg->rts_access_steer_data1);
-		wmb();
-	}
+	     VXGE_HW_RTS_ACS_STEER_CTRL_DATA_STRUCT_SEL_RTH_MULTI_IT))
+		data1 = steer_data1;
 
-	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(action) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(rts_table) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(offset);
-
-	status = __vxge_hw_pio_mem_write64(val64,
-				&vp_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				vpath->hldev->config.device_poll_millis);
-
-	if (status != VXGE_HW_OK)
-		goto exit;
-
-	val64 = readq(&vp_reg->rts_access_steer_ctrl);
-
-	if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS)
-		status = VXGE_HW_OK;
-	else
-		status = VXGE_HW_FAIL;
-exit:
-	return status;
-}
-
-/*
- * __vxge_hw_vpath_addr_get - Get the hw address entry for this vpath
- *               from MAC address table.
- */
-static enum vxge_hw_status
-__vxge_hw_vpath_addr_get(
-	u32 vp_id, struct vxge_hw_vpath_reg __iomem *vpath_reg,
-	u8 (macaddr)[ETH_ALEN], u8 (macaddr_mask)[ETH_ALEN])
-{
-	u32 i;
-	u64 val64;
-	u64 data1 = 0ULL;
-	u64 data2 = 0ULL;
-	enum vxge_hw_status status = VXGE_HW_OK;
-
-	val64 = VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION(
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_LIST_FIRST_ENTRY) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL(
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_DA) |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE |
-		VXGE_HW_RTS_ACCESS_STEER_CTRL_OFFSET(0);
-
-	status = __vxge_hw_pio_mem_write64(val64,
-				&vpath_reg->rts_access_steer_ctrl,
-				VXGE_HW_RTS_ACCESS_STEER_CTRL_STROBE,
-				VXGE_HW_DEF_DEVICE_POLL_MILLIS);
-
-	if (status != VXGE_HW_OK)
-		goto exit;
-
-	val64 = readq(&vpath_reg->rts_access_steer_ctrl);
-
-	if (val64 & VXGE_HW_RTS_ACCESS_STEER_CTRL_RMACJ_STATUS) {
-
-		data1 = readq(&vpath_reg->rts_access_steer_data0);
-		data2 = readq(&vpath_reg->rts_access_steer_data1);
-
-		data1 = VXGE_HW_RTS_ACCESS_STEER_DATA0_GET_DA_MAC_ADDR(data1);
-		data2 = VXGE_HW_RTS_ACCESS_STEER_DATA1_GET_DA_MAC_ADDR_MASK(
-							data2);
-
-		for (i = ETH_ALEN; i > 0; i--) {
-			macaddr[i-1] = (u8)(data1 & 0xFF);
-			data1 >>= 8;
-
-			macaddr_mask[i-1] = (u8)(data2 & 0xFF);
-			data2 >>= 8;
-		}
-		status = VXGE_HW_OK;
-	} else
-		status = VXGE_HW_FAIL;
+	status = vxge_hw_vpath_fw_api(vp->vpath, action, rts_table, offset,
+				      &data0, &data1, &steer_ctrl);
 exit:
 	return status;
 }
@@ -4199,6 +4040,7 @@ __vxge_hw_vp_initialize(struct __vxge_hw_device *hldev, u32 vp_id,
 
 	vpath = &hldev->virtual_paths[vp_id];
 
+	spin_lock_init(&hldev->virtual_paths[vp_id].lock);
 	vpath->vp_id = vp_id;
 	vpath->vp_open = VXGE_HW_VP_OPEN;
 	vpath->hldev = hldev;
@@ -4209,14 +4051,12 @@ __vxge_hw_vp_initialize(struct __vxge_hw_device *hldev, u32 vp_id,
 	__vxge_hw_vpath_reset(hldev, vp_id);
 
 	status = __vxge_hw_vpath_reset_check(vpath);
-
 	if (status != VXGE_HW_OK) {
 		memset(vpath, 0, sizeof(struct __vxge_hw_virtualpath));
 		goto exit;
 	}
 
 	status = __vxge_hw_vpath_mgmt_read(hldev, vpath);
-
 	if (status != VXGE_HW_OK) {
 		memset(vpath, 0, sizeof(struct __vxge_hw_virtualpath));
 		goto exit;
@@ -4230,7 +4070,6 @@ __vxge_hw_vp_initialize(struct __vxge_hw_device *hldev, u32 vp_id,
 		hldev->tim_int_mask1, vp_id);
 
 	status = __vxge_hw_vpath_initialize(hldev, vp_id);
-
 	if (status != VXGE_HW_OK)
 		__vxge_hw_vp_terminate(hldev, vp_id);
 exit:
@@ -4496,7 +4335,9 @@ enum vxge_hw_status vxge_hw_vpath_close(struct __vxge_hw_vpath_handle *vp)
 
 	__vxge_hw_vp_terminate(devh, vp_id);
 
+	spin_lock(&vpath->lock);
 	vpath->vp_open = VXGE_HW_VP_NOT_OPEN;
+	spin_unlock(&vpath->lock);
 
 vpath_close_exit:
 	return status;
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index b395d8d..6a81014 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -641,6 +641,7 @@ struct __vxge_hw_virtualpath {
 	struct vxge_hw_vpath_stats_hw_info	*hw_stats;
 	struct vxge_hw_vpath_stats_hw_info	*hw_stats_sav;
 	struct vxge_hw_vpath_stats_sw_info	*sw_stats;
+	spinlock_t lock;
 };
 
 /*
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 03/12] vxge: cleanup debug printing and asserts
From: Jon Mason @ 2010-11-11 14:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

Remove all of the unnecessary debug printk indirection and temporary
variables for vxge_debug_ll and vxge_assert.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-config.h |   42 ++++++++-------------------------------
 drivers/net/vxge/vxge-main.h   |    2 -
 2 files changed, 9 insertions(+), 35 deletions(-)

diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 95e7021..b395d8d 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -20,13 +20,6 @@
 #define VXGE_CACHE_LINE_SIZE 128
 #endif
 
-#define vxge_os_vaprintf(level, mask, fmt, ...) { \
-	char buff[255]; \
-		snprintf(buff, 255, fmt, __VA_ARGS__); \
-		printk(buff); \
-		printk("\n"); \
-}
-
 #ifndef VXGE_ALIGN
 #define VXGE_ALIGN(adrs, size) \
 	(((size) - (((u64)adrs) & ((size)-1))) & ((size)-1))
@@ -37,7 +30,6 @@
 #define VXGE_HW_DEFAULT_MTU			1500
 
 #ifdef VXGE_DEBUG_ASSERT
-
 /**
  * vxge_assert
  * @test: C-condition to check
@@ -48,16 +40,13 @@
  * compilation
  * time.
  */
-#define vxge_assert(test) { \
-	if (!(test)) \
-		vxge_os_bug("bad cond: "#test" at %s:%d\n", \
-				__FILE__, __LINE__); }
+#define vxge_assert(test) BUG_ON(!(test))
 #else
 #define vxge_assert(test)
 #endif /* end of VXGE_DEBUG_ASSERT */
 
 /**
- * enum enum vxge_debug_level
+ * enum vxge_debug_level
  * @VXGE_NONE: debug disabled
  * @VXGE_ERR: all errors going to be logged out
  * @VXGE_TRACE: all errors plus all kind of verbose tracing print outs
@@ -2000,7 +1989,7 @@ enum vxge_hw_status
 vxge_hw_vpath_strip_fcs_check(struct __vxge_hw_device *hldev, u64 vpath_mask);
 
 /**
- * vxge_debug
+ * vxge_debug_ll
  * @level: level of debug verbosity.
  * @mask: mask for the debug
  * @buf: Circular buffer for tracing
@@ -2012,26 +2001,13 @@ vxge_hw_vpath_strip_fcs_check(struct __vxge_hw_device *hldev, u64 vpath_mask);
  * may be compiled out if DEBUG macro was never defined.
  * See also: enum vxge_debug_level{}.
  */
-
-#define vxge_trace_aux(level, mask, fmt, ...) \
-{\
-		vxge_os_vaprintf(level, mask, fmt, __VA_ARGS__);\
-}
-
-#define vxge_debug(module, level, mask, fmt, ...) { \
-if ((level >= VXGE_TRACE && ((module & VXGE_DEBUG_TRACE_MASK) == module)) || \
-	(level >= VXGE_ERR && ((module & VXGE_DEBUG_ERR_MASK) == module))) {\
-	if ((mask & VXGE_DEBUG_MASK) == mask)\
-		vxge_trace_aux(level, mask, fmt, __VA_ARGS__); \
-} \
-}
-
 #if (VXGE_COMPONENT_LL & VXGE_DEBUG_MODULE_MASK)
-#define vxge_debug_ll(level, mask, fmt, ...) \
-{\
-	vxge_debug(VXGE_COMPONENT_LL, level, mask, fmt, __VA_ARGS__);\
-}
-
+#define vxge_debug_ll(level, mask, fmt, ...) do {			       \
+	if ((level >= VXGE_ERR && VXGE_COMPONENT_LL & VXGE_DEBUG_ERR_MASK) ||  \
+	    (level >= VXGE_TRACE && VXGE_COMPONENT_LL & VXGE_DEBUG_TRACE_MASK))\
+		if ((mask & VXGE_DEBUG_MASK) == mask)			       \
+			printk(fmt "\n", __VA_ARGS__);			       \
+} while (0)
 #else
 #define vxge_debug_ll(level, mask, fmt, ...)
 #endif
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h
index 54989d0..3845e62 100644
--- a/drivers/net/vxge/vxge-main.h
+++ b/drivers/net/vxge/vxge-main.h
@@ -387,8 +387,6 @@ struct vxge_tx_priv {
 	static int p = val; \
 	module_param(p, int, 0)
 
-#define vxge_os_bug(fmt...)		{ printk(fmt); BUG(); }
-
 #define vxge_os_timer(timer, handle, arg, exp) do { \
 		init_timer(&timer); \
 		timer.function = handle; \
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 02/12] vxge: Wait for Rx to become idle before reseting or closing
From: Jon Mason @ 2010-11-11 14:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

Wait for the receive traffic to become idle before attempting to close
or reset the adapter.  To enable the processing of packets while Receive
Idle, move the clearing of __VXGE_STATE_CARD_UP bit in vxge_close to
after it.  Also, modify the return value of the ISR when the adapter is
down to IRQ_HANDLED.  Otherwise there are unhandled interrupts for the
device.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-config.c |   86 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/vxge/vxge-config.h |    7 +++
 drivers/net/vxge/vxge-main.c   |   19 ++++++---
 drivers/net/vxge/vxge-main.h   |    2 +
 drivers/net/vxge/vxge-reg.h    |    1 +
 5 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
index 0a35ab1..212e301 100644
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -193,6 +193,88 @@ static enum vxge_hw_status
 __vxge_hw_vpath_xmac_rx_stats_get(struct __vxge_hw_virtualpath	*vpath,
 				  struct vxge_hw_xmac_vpath_rx_stats *vpath_rx_stats);
 
+static void
+vxge_hw_vpath_set_zero_rx_frm_len(struct vxge_hw_vpath_reg __iomem *vp_reg)
+{
+	u64 val64;
+
+	val64 = readq(&vp_reg->rxmac_vcfg0);
+	val64 &= ~VXGE_HW_RXMAC_VCFG0_RTS_MAX_FRM_LEN(0x3fff);
+	writeq(val64, &vp_reg->rxmac_vcfg0);
+	val64 = readq(&vp_reg->rxmac_vcfg0);
+
+	return;
+}
+
+/*
+ * vxge_hw_vpath_wait_receive_idle - Wait for Rx to become idle
+ */
+int vxge_hw_vpath_wait_receive_idle(struct __vxge_hw_device *hldev, u32 vp_id)
+{
+	struct vxge_hw_vpath_reg __iomem *vp_reg;
+	struct __vxge_hw_virtualpath *vpath;
+	u64 val64, rxd_count, rxd_spat;
+	int count = 0, total_count = 0;
+
+	vpath = &hldev->virtual_paths[vp_id];
+	vp_reg = vpath->vp_reg;
+
+	vxge_hw_vpath_set_zero_rx_frm_len(vp_reg);
+
+	/* Check that the ring controller for this vpath has enough free RxDs
+	 * to send frames to the host.  This is done by reading the
+	 * PRC_RXD_DOORBELL_VPn register and comparing the read value to the
+	 * RXD_SPAT value for the vpath.
+	 */
+	val64 = readq(&vp_reg->prc_cfg6);
+	rxd_spat = VXGE_HW_PRC_CFG6_GET_RXD_SPAT(val64) + 1;
+	/* Use a factor of 2 when comparing rxd_count against rxd_spat for some
+	 * leg room.
+	 */
+	rxd_spat *= 2;
+
+	do {
+		mdelay(1);
+
+		rxd_count = readq(&vp_reg->prc_rxd_doorbell);
+
+		/* Check that the ring controller for this vpath does
+		 * not have any frame in its pipeline.
+		 */
+		val64 = readq(&vp_reg->frm_in_progress_cnt);
+		if ((rxd_count <= rxd_spat) || (val64 > 0))
+			count = 0;
+		else
+			count++;
+		total_count++;
+	} while ((count < VXGE_HW_MIN_SUCCESSIVE_IDLE_COUNT) &&
+			(total_count < VXGE_HW_MAX_POLLING_COUNT));
+
+	if (total_count >= VXGE_HW_MAX_POLLING_COUNT)
+		printk(KERN_ALERT "%s: Still Receiving traffic. Abort wait\n",
+			__func__);
+
+	return total_count;
+}
+
+/* vxge_hw_device_wait_receive_idle - This function waits until all frames
+ * stored in the frame buffer for each vpath assigned to the given
+ * function (hldev) have been sent to the host.
+ */
+void vxge_hw_device_wait_receive_idle(struct __vxge_hw_device *hldev)
+{
+	int i, total_count = 0;
+
+	for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++) {
+		if (!(hldev->vpaths_deployed & vxge_mBIT(i)))
+			continue;
+
+		total_count += vxge_hw_vpath_wait_receive_idle(hldev, i);
+		if (total_count >= VXGE_HW_MAX_POLLING_COUNT)
+			break;
+	}
+}
+
 /*
  * __vxge_hw_channel_allocate - Allocate memory for channel
  * This function allocates required memory for the channel and various arrays
@@ -390,7 +472,7 @@ __vxge_hw_device_register_poll(void __iomem *reg, u64 mask, u32 max_millis)
 	return ret;
 }
 
- /* __vxge_hw_device_vpath_reset_in_prog_check - Check if vpath reset
+/* __vxge_hw_device_vpath_reset_in_prog_check - Check if vpath reset
  * in progress
  * This routine checks the vpath reset in progress register is turned zero
  */
@@ -1165,7 +1247,6 @@ exit:
  * It can be used to set or reset Pause frame generation or reception
  * support of the NIC.
  */
-
 enum vxge_hw_status vxge_hw_device_setpause_data(struct __vxge_hw_device *hldev,
 						 u32 port, u32 tx, u32 rx)
 {
@@ -1409,7 +1490,6 @@ exit:
 /*
  * __vxge_hw_ring_create - Create a Ring
  * This function creates Ring and initializes it.
- *
  */
 static enum vxge_hw_status
 __vxge_hw_ring_create(struct __vxge_hw_vpath_handle *vp,
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 5d77905..95e7021 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -2051,4 +2051,11 @@ enum vxge_hw_status vxge_hw_vpath_rts_rth_set(
 
 enum vxge_hw_status
 __vxge_hw_device_is_privilaged(u32 host_type, u32 func_id);
+
+#define VXGE_HW_MIN_SUCCESSIVE_IDLE_COUNT 5
+#define VXGE_HW_MAX_POLLING_COUNT 100
+
+int vxge_hw_vpath_wait_receive_idle(struct __vxge_hw_device *hldev, u32 vp_id);
+
+void  vxge_hw_device_wait_receive_idle(struct __vxge_hw_device *hldev);
 #endif
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 2f26c37..53db6a4 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -90,7 +90,6 @@ static int vxge_mac_list_add(struct vxge_vpath *vpath, struct macInfo *mac);
 static int vxge_mac_list_del(struct vxge_vpath *vpath, struct macInfo *mac);
 static enum vxge_hw_status vxge_restore_vpath_vid_table(struct vxge_vpath *vpath);
 static enum vxge_hw_status vxge_restore_vpath_mac_addr(struct vxge_vpath *vpath);
-static enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev);
 
 static inline int is_vxge_card_up(struct vxgedev *vdev)
 {
@@ -1299,8 +1298,13 @@ static void vxge_vpath_intr_enable(struct vxgedev *vdev, int vp_id)
 static void vxge_vpath_intr_disable(struct vxgedev *vdev, int vp_id)
 {
 	struct vxge_vpath *vpath = &vdev->vpaths[vp_id];
+	struct __vxge_hw_device *hldev;
 	int msix_id;
 
+	hldev = (struct __vxge_hw_device *)pci_get_drvdata(vdev->pdev);
+
+	vxge_hw_vpath_wait_receive_idle(hldev, vpath->device_id);
+
 	vxge_hw_vpath_intr_disable(vpath->handle);
 
 	if (vdev->config.intr_type == INTA)
@@ -1430,6 +1434,7 @@ static int do_vxge_reset(struct vxgedev *vdev, int event)
 	}
 
 	if (event == VXGE_LL_FULL_RESET) {
+		vxge_hw_device_wait_receive_idle(vdev->devh);
 		vxge_hw_device_intr_disable(vdev->devh);
 
 		switch (vdev->cric_err_event) {
@@ -1935,7 +1940,7 @@ static enum vxge_hw_status vxge_restore_vpath_mac_addr(struct vxge_vpath *vpath)
 }
 
 /* reset vpaths */
-static enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev)
+enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev)
 {
 	enum vxge_hw_status status = VXGE_HW_OK;
 	struct vxge_vpath *vpath;
@@ -2080,7 +2085,7 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
 		return IRQ_NONE;
 
 	if (unlikely(!is_vxge_card_up(vdev)))
-		return IRQ_NONE;
+		return IRQ_HANDLED;
 
 	status = vxge_hw_device_begin_irq(hldev, vdev->exec_mode,
 			&reason);
@@ -2787,7 +2792,6 @@ static int do_vxge_close(struct net_device *dev, int do_io)
 	while (test_and_set_bit(__VXGE_STATE_RESET_CARD, &vdev->state))
 		msleep(50);
 
-	clear_bit(__VXGE_STATE_CARD_UP, &vdev->state);
 	if (do_io) {
 		/* Put the vpath back in normal mode */
 		vpath_vector = vxge_mBIT(vdev->vpaths[0].device_id);
@@ -2831,6 +2835,11 @@ static int do_vxge_close(struct net_device *dev, int do_io)
 
 	del_timer_sync(&vdev->vp_reset_timer);
 
+	if (do_io)
+		vxge_hw_device_wait_receive_idle(hldev);
+
+	clear_bit(__VXGE_STATE_CARD_UP, &vdev->state);
+
 	/* Disable napi */
 	if (vdev->config.intr_type != MSI_X)
 		napi_disable(&vdev->napi);
@@ -2847,8 +2856,6 @@ static int do_vxge_close(struct net_device *dev, int do_io)
 	if (do_io)
 		vxge_hw_device_intr_disable(vdev->devh);
 
-	mdelay(1000);
-
 	vxge_rem_isr(vdev);
 
 	vxge_napi_del_all(vdev);
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h
index a4f6d86..54989d0 100644
--- a/drivers/net/vxge/vxge-main.h
+++ b/drivers/net/vxge/vxge-main.h
@@ -397,6 +397,8 @@ struct vxge_tx_priv {
 	} while (0);
 
 extern void vxge_initialize_ethtool_ops(struct net_device *ndev);
+enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev);
+
 /**
  * #define VXGE_DEBUG_INIT: debug for initialization functions
  * #define VXGE_DEBUG_TX	 : debug transmit related functions
diff --git a/drivers/net/vxge/vxge-reg.h b/drivers/net/vxge/vxge-reg.h
index 3dd5c96..93fd752 100644
--- a/drivers/net/vxge/vxge-reg.h
+++ b/drivers/net/vxge/vxge-reg.h
@@ -3998,6 +3998,7 @@ struct vxge_hw_vpath_reg {
 #define	VXGE_HW_PRC_CFG6_L4_CPC_TRSFR_CODE_EN	vxge_mBIT(9)
 #define VXGE_HW_PRC_CFG6_RXD_CRXDT(val) vxge_vBIT(val, 23, 9)
 #define VXGE_HW_PRC_CFG6_RXD_SPAT(val) vxge_vBIT(val, 36, 9)
+#define VXGE_HW_PRC_CFG6_GET_RXD_SPAT(val)	vxge_bVALn(val, 36, 9)
 /*0x00a78*/	u64	prc_cfg7;
 #define VXGE_HW_PRC_CFG7_SCATTER_MODE(val) vxge_vBIT(val, 6, 2)
 #define	VXGE_HW_PRC_CFG7_SMART_SCAT_EN	vxge_mBIT(11)
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 01/12] vxge: enable rxhash
From: Jon Mason @ 2010-11-11 14:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur, Ram Vepa

Enable RSS hashing and add ability to pass up the adapter calculated rx
hash up the network stack (if feature is available).  Add the ability to
enable/disable feature via ethtool, which requires that the adapter is
not running at the time.  Other miscellaneous cleanups and fixes
required to get RSS working.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Ram Vepa <ram.vepa@exar.com>
---
 drivers/net/vxge/vxge-config.c  |    2 +
 drivers/net/vxge/vxge-config.h  |   12 ++++----
 drivers/net/vxge/vxge-ethtool.c |   35 +++++++++++++++++++++++++
 drivers/net/vxge/vxge-main.c    |   54 ++++++++++++++++++++++++--------------
 drivers/net/vxge/vxge-main.h    |   18 ++++++------
 drivers/net/vxge/vxge-traffic.h |   28 --------------------
 6 files changed, 86 insertions(+), 63 deletions(-)

diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
index 906a3ca..0a35ab1 100644
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -3204,6 +3204,8 @@ enum vxge_hw_status vxge_hw_vpath_rts_rth_set(
 		     VXGE_HW_RTS_ACCESS_STEER_CTRL_ACTION_READ_ENTRY,
 		     VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_RTH_GEN_CFG,
 			0, &data0, &data1);
+	if (status != VXGE_HW_OK)
+		goto exit;
 
 	data0 &= ~(VXGE_HW_RTS_ACCESS_STEER_DATA0_RTH_GEN_BUCKET_SIZE(0xf) |
 			VXGE_HW_RTS_ACCESS_STEER_DATA0_RTH_GEN_ALG_SEL(0x3));
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 5c00861..5d77905 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -1413,12 +1413,12 @@ enum vxge_hw_rth_algoritms {
  * See also: vxge_hw_vpath_rts_rth_set(), vxge_hw_vpath_rts_rth_get().
  */
 struct vxge_hw_rth_hash_types {
-	u8 hash_type_tcpipv4_en;
-	u8 hash_type_ipv4_en;
-	u8 hash_type_tcpipv6_en;
-	u8 hash_type_ipv6_en;
-	u8 hash_type_tcpipv6ex_en;
-	u8 hash_type_ipv6ex_en;
+	u8 hash_type_tcpipv4_en:1,
+	   hash_type_ipv4_en:1,
+	   hash_type_tcpipv6_en:1,
+	   hash_type_ipv6_en:1,
+	   hash_type_tcpipv6ex_en:1,
+	   hash_type_ipv6ex_en:1;
 };
 
 void vxge_hw_device_debug_set(
diff --git a/drivers/net/vxge/vxge-ethtool.c b/drivers/net/vxge/vxge-ethtool.c
index b67746e..f8fd8da 100644
--- a/drivers/net/vxge/vxge-ethtool.c
+++ b/drivers/net/vxge/vxge-ethtool.c
@@ -1119,6 +1119,40 @@ static int vxge_ethtool_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
+static int vxge_set_flags(struct net_device *dev, u32 data)
+{
+	struct vxgedev *vdev = (struct vxgedev *)netdev_priv(dev);
+	enum vxge_hw_status status;
+
+	if (data & ~ETH_FLAG_RXHASH)
+		return -EOPNOTSUPP;
+
+	if (!!(data & ETH_FLAG_RXHASH) == vdev->devh->config.rth_en)
+		return 0;
+
+	if (netif_running(dev) || (vdev->config.rth_steering == NO_STEERING))
+		return -EINVAL;
+
+	vdev->devh->config.rth_en = !!(data & ETH_FLAG_RXHASH);
+
+	/* Enabling RTH requires some of the logic in vxge_device_register and a
+	 * vpath reset.  Due to these restrictions, only allow modification
+	 * while the interface is down.
+	 */
+	status = vxge_reset_all_vpaths(vdev);
+	if (status != VXGE_HW_OK) {
+		vdev->devh->config.rth_en = !vdev->devh->config.rth_en;
+		return -EFAULT;
+	}
+
+	if (vdev->devh->config.rth_en)
+		dev->features |= NETIF_F_RXHASH;
+	else
+		dev->features &= ~NETIF_F_RXHASH;
+
+	return 0;
+}
+
 static const struct ethtool_ops vxge_ethtool_ops = {
 	.get_settings		= vxge_ethtool_gset,
 	.set_settings		= vxge_ethtool_sset,
@@ -1140,6 +1174,7 @@ static const struct ethtool_ops vxge_ethtool_ops = {
 	.phys_id		= vxge_ethtool_idnic,
 	.get_sset_count		= vxge_ethtool_get_sset_count,
 	.get_ethtool_stats	= vxge_get_ethtool_stats,
+	.set_flags		= vxge_set_flags,
 };
 
 void vxge_initialize_ethtool_ops(struct net_device *ndev)
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 813829f..2f26c37 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -513,6 +513,13 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr,
 		else
 			skb_checksum_none_assert(skb);
 
+		/* rth_hash_type and rth_it_hit are non-zero regardless of
+		 * whether rss is enabled.  Only the rth_value is zero/non-zero
+		 * if rss is disabled/enabled, so key off of that.
+		 */
+		if (ext_info.rth_value)
+			skb->rxhash = ext_info.rth_value;
+
 		vxge_rx_complete(ring, skb, ext_info.vlan,
 			pkt_length, &ext_info);
 
@@ -1689,15 +1696,6 @@ static enum vxge_hw_status vxge_rth_configure(struct vxgedev *vdev)
 		mtable[index] = index % vdev->no_of_vpath;
 	}
 
-	/* Fill RTH hash types */
-	hash_types.hash_type_tcpipv4_en   = vdev->config.rth_hash_type_tcpipv4;
-	hash_types.hash_type_ipv4_en      = vdev->config.rth_hash_type_ipv4;
-	hash_types.hash_type_tcpipv6_en   = vdev->config.rth_hash_type_tcpipv6;
-	hash_types.hash_type_ipv6_en      = vdev->config.rth_hash_type_ipv6;
-	hash_types.hash_type_tcpipv6ex_en =
-					vdev->config.rth_hash_type_tcpipv6ex;
-	hash_types.hash_type_ipv6ex_en    = vdev->config.rth_hash_type_ipv6ex;
-
 	/* set indirection table, bucket-to-vpath mapping */
 	status = vxge_hw_vpath_rts_rth_itable_set(vdev->vp_handles,
 						vdev->no_of_vpath,
@@ -1710,12 +1708,21 @@ static enum vxge_hw_status vxge_rth_configure(struct vxgedev *vdev)
 		return status;
 	}
 
+	/* Fill RTH hash types */
+	hash_types.hash_type_tcpipv4_en   = vdev->config.rth_hash_type_tcpipv4;
+	hash_types.hash_type_ipv4_en      = vdev->config.rth_hash_type_ipv4;
+	hash_types.hash_type_tcpipv6_en   = vdev->config.rth_hash_type_tcpipv6;
+	hash_types.hash_type_ipv6_en      = vdev->config.rth_hash_type_ipv6;
+	hash_types.hash_type_tcpipv6ex_en =
+					vdev->config.rth_hash_type_tcpipv6ex;
+	hash_types.hash_type_ipv6ex_en    = vdev->config.rth_hash_type_ipv6ex;
+
 	/*
-	* Because the itable_set() method uses the active_table field
-	* for the target virtual path the RTH config should be updated
-	* for all VPATHs. The h/w only uses the lowest numbered VPATH
-	* when steering frames.
-	*/
+	 * Because the itable_set() method uses the active_table field
+	 * for the target virtual path the RTH config should be updated
+	 * for all VPATHs. The h/w only uses the lowest numbered VPATH
+	 * when steering frames.
+	 */
 	 for (index = 0; index < vdev->no_of_vpath; index++) {
 		status = vxge_hw_vpath_rts_rth_set(
 				vdev->vpaths[index].handle,
@@ -2598,6 +2605,8 @@ vxge_open(struct net_device *dev)
 			goto out2;
 		}
 	}
+	printk(KERN_INFO "%s: Receive Hashing Offload %s\n", dev->name,
+	       hldev->config.rth_en ? "enabled" : "disabled");
 
 	for (i = 0; i < vdev->no_of_vpath; i++) {
 		vpath = &vdev->vpaths[i];
@@ -3178,6 +3187,11 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
 
 	vxge_initialize_ethtool_ops(ndev);
 
+	if (vdev->config.rth_steering != NO_STEERING) {
+		ndev->features |= NETIF_F_RXHASH;
+		hldev->config.rth_en = VXGE_HW_RTH_ENABLE;
+	}
+
 	/* Allocate memory for vpath */
 	vdev->vpaths = kzalloc((sizeof(struct vxge_vpath)) *
 				no_of_vpath, GFP_KERNEL);
@@ -4163,12 +4177,12 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
 	ll_config->fifo_indicate_max_pkts = VXGE_FIFO_INDICATE_MAX_PKTS;
 	ll_config->addr_learn_en = addr_learn_en;
 	ll_config->rth_algorithm = RTH_ALG_JENKINS;
-	ll_config->rth_hash_type_tcpipv4 = VXGE_HW_RING_HASH_TYPE_TCP_IPV4;
-	ll_config->rth_hash_type_ipv4 = VXGE_HW_RING_HASH_TYPE_NONE;
-	ll_config->rth_hash_type_tcpipv6 = VXGE_HW_RING_HASH_TYPE_NONE;
-	ll_config->rth_hash_type_ipv6 = VXGE_HW_RING_HASH_TYPE_NONE;
-	ll_config->rth_hash_type_tcpipv6ex = VXGE_HW_RING_HASH_TYPE_NONE;
-	ll_config->rth_hash_type_ipv6ex = VXGE_HW_RING_HASH_TYPE_NONE;
+	ll_config->rth_hash_type_tcpipv4 = 1;
+	ll_config->rth_hash_type_ipv4 = 0;
+	ll_config->rth_hash_type_tcpipv6 = 0;
+	ll_config->rth_hash_type_ipv6 = 0;
+	ll_config->rth_hash_type_tcpipv6ex = 0;
+	ll_config->rth_hash_type_ipv6ex = 0;
 	ll_config->rth_bkt_sz = RTH_BUCKET_SIZE;
 	ll_config->tx_pause_enable = VXGE_PAUSE_CTRL_ENABLE;
 	ll_config->rx_pause_enable = VXGE_PAUSE_CTRL_ENABLE;
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h
index de64536..a4f6d86 100644
--- a/drivers/net/vxge/vxge-main.h
+++ b/drivers/net/vxge/vxge-main.h
@@ -145,15 +145,15 @@ struct vxge_config {
 
 	int		addr_learn_en;
 
-	int		rth_steering;
-	int		rth_algorithm;
-	int		rth_hash_type_tcpipv4;
-	int		rth_hash_type_ipv4;
-	int		rth_hash_type_tcpipv6;
-	int		rth_hash_type_ipv6;
-	int		rth_hash_type_tcpipv6ex;
-	int		rth_hash_type_ipv6ex;
-	int		rth_bkt_sz;
+	u32		rth_steering:2,
+			rth_algorithm:2,
+			rth_hash_type_tcpipv4:1,
+			rth_hash_type_ipv4:1,
+			rth_hash_type_tcpipv6:1,
+			rth_hash_type_ipv6:1,
+			rth_hash_type_tcpipv6ex:1,
+			rth_hash_type_ipv6ex:1,
+			rth_bkt_sz:8;
 	int		rth_jhash_golden_ratio;
 	int		tx_steering_type;
 	int 	fifo_indicate_max_pkts;
diff --git a/drivers/net/vxge/vxge-traffic.h b/drivers/net/vxge/vxge-traffic.h
index 9890d4d..1fceee8 100644
--- a/drivers/net/vxge/vxge-traffic.h
+++ b/drivers/net/vxge/vxge-traffic.h
@@ -1904,34 +1904,6 @@ enum vxge_hw_ring_tcode {
 	VXGE_HW_RING_T_CODE_MULTI_ERR			= 0xF
 };
 
-/**
- * enum enum vxge_hw_ring_hash_type - RTH hash types
- * @VXGE_HW_RING_HASH_TYPE_NONE: No Hash
- * @VXGE_HW_RING_HASH_TYPE_TCP_IPV4: TCP IPv4
- * @VXGE_HW_RING_HASH_TYPE_UDP_IPV4: UDP IPv4
- * @VXGE_HW_RING_HASH_TYPE_IPV4: IPv4
- * @VXGE_HW_RING_HASH_TYPE_TCP_IPV6: TCP IPv6
- * @VXGE_HW_RING_HASH_TYPE_UDP_IPV6: UDP IPv6
- * @VXGE_HW_RING_HASH_TYPE_IPV6: IPv6
- * @VXGE_HW_RING_HASH_TYPE_TCP_IPV6_EX: TCP IPv6 extension
- * @VXGE_HW_RING_HASH_TYPE_UDP_IPV6_EX: UDP IPv6 extension
- * @VXGE_HW_RING_HASH_TYPE_IPV6_EX: IPv6 extension
- *
- * RTH hash types
- */
-enum vxge_hw_ring_hash_type {
-	VXGE_HW_RING_HASH_TYPE_NONE			= 0x0,
-	VXGE_HW_RING_HASH_TYPE_TCP_IPV4		= 0x1,
-	VXGE_HW_RING_HASH_TYPE_UDP_IPV4		= 0x2,
-	VXGE_HW_RING_HASH_TYPE_IPV4			= 0x3,
-	VXGE_HW_RING_HASH_TYPE_TCP_IPV6		= 0x4,
-	VXGE_HW_RING_HASH_TYPE_UDP_IPV6		= 0x5,
-	VXGE_HW_RING_HASH_TYPE_IPV6			= 0x6,
-	VXGE_HW_RING_HASH_TYPE_TCP_IPV6_EX	= 0x7,
-	VXGE_HW_RING_HASH_TYPE_UDP_IPV6_EX	= 0x8,
-	VXGE_HW_RING_HASH_TYPE_IPV6_EX		= 0x9
-};
-
 enum vxge_hw_status vxge_hw_ring_rxd_reserve(
 	struct __vxge_hw_ring *ring_handle,
 	void **rxdh);
-- 
1.7.0.4


^ permalink raw reply related

* vxge update version 3
From: Jon Mason @ 2010-11-11 14:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sivakumar Subramani, Sreenivasa Honnur

This version of the patch series includes Ben Hutchings' comments on the
firmware flashing patch and resolves the merge issues with the latest
code.


^ permalink raw reply

* Re: sk->sk_socket seems to disappear before connection termination
From: Patrick McHardy @ 2010-11-11 11:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jan Engelhardt, David Howells, Netfilter Developer Mailing List,
	netdev, Rafał Maj
In-Reply-To: <1289410679.2860.245.camel@edumazet-laptop>

On 10.11.2010 18:37, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 18:17 +0100, Jan Engelhardt a écrit :
>> On Wednesday 2010-11-10 17:44, Eric Dumazet wrote:
>>>
>>> [ 9920.234680] ipt_LOG: sk=ffff880118bd32c0 sk->sk_socket=ffff88011d0d8c00 file=ffff88011cd4e100
>>> [ 9920.234731] IN= OUT=eth1 SRC=192.168.20.108 DST=192.168.20.110 LEN=52 TOS=0x10 PREC=0x00 TTL=64 ID=63704 DF PROTO=TCP SPT=60088 DPT=22 WINDOW=35 RES=0x00 ACK FIN URGP=0 UID=0 GID=0 
>>> [ 9920.235221] ipt_LOG: sk=ffff880078998000 sk->sk_socket=ffff880078c58300 file=          (null)
>>> [ 9920.235271] IN= OUT=eth1 SRC=192.168.20.108 DST=192.168.20.110 LEN=52 TOS=0x00 PREC=0x00 TTL=64 ID=0 DF PROTO=TCP SPT=60088 DPT=22 WINDOW=35 RES=0x00 ACK URGP=0 
>>>
>>> You can see in my log, that the last packet seems to be from a different
>>> socket ! (sk pointer changed to ffff880078998000 !)
>>
>> Yes, that's it.
>>
>>> Well well well, thats an ACK, in answer to FIN packet received from remote
>>> side.
>>
>> But why is it not handled by sk ffff880118bd32c0 anymore?
>> It does have, after all, the same (addr,port) tuple.
>> And it is sort of a hiccup for xt_owner users.
> 
> Its because of TIMEWAIT state : no more socket
> 
> We use a special tcp socket (net->ipv4.tcp_sock) in tcp_v4_send_ack()

Yeah, this has always been a problem with the owner match.
I don't think this fixable, it should probably only be used
in a stateful manner.

^ permalink raw reply

* Re: [e1000e] BUG triggered in blink path
From: Holger Eitzenberger @ 2010-11-11 10:17 UTC (permalink / raw)
  To: Brandeburg, Jesse; +Cc: e1000-devel, netdev
In-Reply-To: <alpine.WNT.2.00.1011101030130.14720@jbrandeb-desk1.amr.corp.intel.com>

[-- Attachment #1: Type: text/plain, Size: 1170 bytes --]

Hi Jesse,

I've attached the patch against net-next-2.6.  Please check if it's ok
for you.  I checked e1000, igb and ixgbe as well, they don't have that
problem.

 /holger

> > After taking a look I think this may be caused by initializing
> > adapter->led_blink_task several times in e1000_phys_id(), while possibly
> > led_blink_task is running:
> > 
> > 	if ((hw->phy.type == e1000_phy_ife) ||
> > 	    (hw->mac.type == e1000_pchlan) ||
> > 	    (hw->mac.type == e1000_82574)) {
> > 		INIT_WORK(&adapter->led_blink_task, e1000e_led_blink_task);
> > 		if (!adapter->blink_timer.function) {
> > 
> > I can't reproduce it after moving it inside the following if block,
> > but I'm not quite sure if this catches all races in there.  Especially
> > the msleep_interruptible() may be too optimistic because it may
> > actually not wait long enough.  Someone with more knowledge of the
> > driver should take a look.
> 
> thanks for your investigation and troubleshooting.  I don't think it is 
> correct at all to be calling INIT_WORK more than once.  In fact the 
> INIT_WORK should just be moved into probe, and then e1000_phys_id should 
> just do schedule_work.
> 
> 

[-- Attachment #2: x.diff --]
[-- Type: text/x-diff, Size: 2637 bytes --]

e1000e: fix double initialization in blink path

The kernel goes BUG() at the time 'ethtool -p eth0 3' comes
back, which is due to adapter->led_blink_task initialized
several times.  At the time it is still running this results
in a corrupted task_list of the associated workqueue.

The fix is to move the workqueue initialization to the
probe function instead.

Signed-off-by: Holger Eitzenberger <holger@eitzenberger.org>

Index: net-next-2.6/drivers/net/e1000e/ethtool.c
===================================================================
--- net-next-2.6.orig/drivers/net/e1000e/ethtool.c	2010-11-11 10:57:28.000000000 +0100
+++ net-next-2.6/drivers/net/e1000e/ethtool.c	2010-11-11 11:02:21.000000000 +0100
@@ -1860,7 +1860,7 @@
 /* bit defines for adapter->led_status */
 #define E1000_LED_ON		0
 
-static void e1000e_led_blink_task(struct work_struct *work)
+void e1000e_led_blink_task(struct work_struct *work)
 {
 	struct e1000_adapter *adapter = container_of(work,
 	                                struct e1000_adapter, led_blink_task);
@@ -1892,7 +1892,6 @@
 	    (hw->mac.type == e1000_pch2lan) ||
 	    (hw->mac.type == e1000_82583) ||
 	    (hw->mac.type == e1000_82574)) {
-		INIT_WORK(&adapter->led_blink_task, e1000e_led_blink_task);
 		if (!adapter->blink_timer.function) {
 			init_timer(&adapter->blink_timer);
 			adapter->blink_timer.function =
Index: net-next-2.6/drivers/net/e1000e/netdev.c
===================================================================
--- net-next-2.6.orig/drivers/net/e1000e/netdev.c	2010-11-11 10:57:28.000000000 +0100
+++ net-next-2.6/drivers/net/e1000e/netdev.c	2010-11-11 11:01:17.000000000 +0100
@@ -5864,6 +5864,7 @@
 	INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
 	INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
 	INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
+	INIT_WORK(&adapter->led_blink_task, e1000e_led_blink_task);
 
 	/* Initialize link parameters. User can change them with ethtool */
 	adapter->hw.mac.autoneg = 1;
Index: net-next-2.6/drivers/net/e1000e/e1000.h
===================================================================
--- net-next-2.6.orig/drivers/net/e1000e/e1000.h	2010-11-11 10:57:28.000000000 +0100
+++ net-next-2.6/drivers/net/e1000e/e1000.h	2010-11-11 11:01:57.000000000 +0100
@@ -482,6 +482,7 @@
 
 extern void e1000e_check_options(struct e1000_adapter *adapter);
 extern void e1000e_set_ethtool_ops(struct net_device *netdev);
+extern void e1000e_led_blink_task(struct work_struct *work);
 
 extern int e1000e_up(struct e1000_adapter *adapter);
 extern void e1000e_down(struct e1000_adapter *adapter);

^ permalink raw reply

* Re: [PATCH net-next-2.6 v2] can: Topcliff: PCH_CAN driver: Fix build warnings
From: Marc Kleine-Budde @ 2010-11-11 10:04 UTC (permalink / raw)
  To: Tomoya MORINAGA
  Cc: andrew.chih.howe.khor-ral2JQCrhuEAvxtiuMwx3w, Masayuki Ohtake,
	Samuel Ortiz, margie.foster-ral2JQCrhuEAvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, LKML,
	socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
	yong.y.wang-ral2JQCrhuEAvxtiuMwx3w,
	kok.howg.ewe-ral2JQCrhuEAvxtiuMwx3w, Wolfgang Grandegger,
	joel.clark-ral2JQCrhuEAvxtiuMwx3w, David S. Miller,
	Christian Pellegrin, qi.wang-ral2JQCrhuEAvxtiuMwx3w
In-Reply-To: <002501cb8186$b56a6280$66f8800a-a06+6cuVnkTSQfdrb5gaxUEOCMrvLtNR@public.gmane.org>


[-- Attachment #1.1: Type: text/plain, Size: 1160 bytes --]

On 11/11/2010 10:56 AM, Tomoya MORINAGA wrote:
>>>>> + if (priv->tx_obj == (PCH_OBJ_NUM + 1)) { /* Point tail Obj + 1 */
>>>>> +  while (ioread32(&priv->regs->treq2) & 0xfc00)
>>>>> +   udelay(1);
>>>
>>> When points tail of Tx message object,
>>> this driver waits until completion of all tx messaeg objects.
>>
>> Looping busy it not an option here.
>>
>>> Thus, application/driver ought not to be able to put Tx object exceed the number of tx message object.
>>> Thus I think these code(netif_stop_queue/netif_wake_queue) are completely redundant.
>>
>> Nope - please remove the waiting completely and convert your flow
>> control to netif_stop_queue/netif_wake_queue.
>>
> 
> I see.
> I will remove like above.
> 
> BTW, Let me know the reason.
> Could you explain the reason why you deny looping busy loop ?

That would kill performance.

cheers, Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

[-- Attachment #2: Type: text/plain, Size: 188 bytes --]

_______________________________________________
Socketcan-core mailing list
Socketcan-core-0fE9KPoRgkgATYTw5x5z8w@public.gmane.org
https://lists.berlios.de/mailman/listinfo/socketcan-core

^ permalink raw reply

* Re: [PATCH net-next-2.6 v2] can: Topcliff: PCH_CAN driver: Fix build warnings
From: Tomoya MORINAGA @ 2010-11-11  9:56 UTC (permalink / raw)
  To: Marc Kleine-Budde
  Cc: andrew.chih.howe.khor-ral2JQCrhuEAvxtiuMwx3w, Masayuki Ohtake,
	Samuel Ortiz, margie.foster-ral2JQCrhuEAvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, LKML,
	socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
	yong.y.wang-ral2JQCrhuEAvxtiuMwx3w,
	kok.howg.ewe-ral2JQCrhuEAvxtiuMwx3w, Wolfgang Grandegger,
	joel.clark-ral2JQCrhuEAvxtiuMwx3w, David S. Miller,
	Christian Pellegrin, qi.wang-ral2JQCrhuEAvxtiuMwx3w
In-Reply-To: <4CD945B4.4060408@pengutronix.de>

On Tuesday, November 09, 2010 9:59 PM, Marc Kleine-Budde wrote :

> 
> On 11/09/2010 01:26 PM, Tomoya MORINAGA wrote:
> >>>> Can you please explain me your locking sheme? If I understand the
> >>>> documenation correctly the two message interfaces can be used mutual.
> >>>> And you use one for rx the other one for tx.
> >>>
> >>> I show our locking scheme.
> >>> When CPU accesses MessageRAM via IF1, CPU protect until read-modify-write
> >>> so that IF2 access not occurred, vice versa.
> >>
> >> Why is that needed?
> >
> > For MessageRAM data consistency.
> 
> As far as I understand the datasheet the access to IF1 and IF2 is
> completely independent. Why do you lock here?

You are right.
Each IF1 and IF2 is completely independent.
All message objects can be accessed via both IF1 and IF2.
There is a possibility that a message object is accessed by IF1 and IF2 simultaneously.
But this driver separates message object by Tx/Rx completely.(Rx:1~26, Tx:27~32)
Thus, these locks are unnecessary.
I will delete these.

> 
> [...]
> 
> >>>> Please use just "debug" level not warning here. Consider to use
> >>>> netdev_dbg() instead. IMHO the __func__ can be dropped and the
> >>>> "official" name for the error is "Error Warning".
> >>>
> >>> I want to know the reason.
> >>> Why is it not dev_warn but netdev_dbg ?
> >>
> >> If you use warning level it would end up on the console or and in the
> >> syslog. It's quite complicated (for programs) to get information from
> >> there. This is why we send CAN error frames. They hold the same
> >> information but int a binary form, thus it's easier to process.
> >
> > I understand the reason.
> > BTW, Why do you say not dev_dbg but netdev_dbg ?
> 
> Sorry - netdev_dbg() is easier to use, its first argument is the
> netdevice, while dev_dbg needs a device and that's deeply hidden in the
> netdevice.
> 

I understand.


> [...]
> 
> >>>>> +static netdev_tx_t pch_xmit(struct sk_buff *skb, struct net_device *ndev)
> >>>>> +{
> >>>>> + unsigned long flags;
> >>>>> + struct pch_can_priv *priv = netdev_priv(ndev);
> >>>>> + struct can_frame *cf = (struct can_frame *)skb->data;
> >>>>> + int tx_buffer_avail = 0;
> >>>>
> >>>> What I'm totally missing is the TX flow controll. Your driver has to
> >>>> ensure that the package leave the controller in the order that come
> >>>> into the xmit function. Further you have to stop your xmit queue if
> >>>> you're out of tx objects and reenable if you have a object free.
> >>>>
> >>>> Use netif_stop_queue() and netif_wake_queue() for this.
> >>>
> >>> In this code, I think "out of tx objects" cannot be  occurred.
> >>
> >> It's not a matter of code it's the hardware. You cannot put more than a
> >> certain number of CAN frames into the hardware. If you have a CAN bus at
> >> a certain speed, you can only send a certain number of CAN frames in a
> >> second. So you cannot push more than this amount of frames/s into the
> >> hardware.
> >>
> >>> Nevertheless, are netif_stop_queue() and netif_wake_queue() is necessary ?
> >>
> >> Yes.
> >
> > I can' understand your issue.
> > Please can you hear my opinion?
> >
> > Please see the head of pch_xmit.
> >
> >>> + if (priv->tx_obj == (PCH_OBJ_NUM + 1)) { /* Point tail Obj + 1 */
> >>> +  while (ioread32(&priv->regs->treq2) & 0xfc00)
> >>> +   udelay(1);
> >
> > When points tail of Tx message object,
> > this driver waits until completion of all tx messaeg objects.
> 
> Looping busy it not an option here.
> 
> > Thus, application/driver ought not to be able to put Tx object exceed the number of tx message object.
> > Thus I think these code(netif_stop_queue/netif_wake_queue) are completely redundant.
> 
> Nope - please remove the waiting completely and convert your flow
> control to netif_stop_queue/netif_wake_queue.
> 

I see.
I will remove like above.

BTW, Let me know the reason.
Could you explain the reason why you deny looping busy loop ?


Thanks,

Tomoya MORINAGA
OKI SEMICONDUCTOR CO., LTD.

^ permalink raw reply

* [PATCH net-next-2.6] vlan: lockless transmit path
From: Eric Dumazet @ 2010-11-11  9:42 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Patrick McHardy

vlan is a stacked device, like tunnels. We should use the lockless
mechanism we are using in tunnels and loopback.

This patch completely removes locking in TX path.

tx stat counters are added into existing percpu stat structure, renamed
from vlan_rx_stats to vlan_pcpu_stats.

Note : this partially reverts commit 2e59af3dcbdf (vlan: multiqueue vlan
device)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
---
 net/8021q/vlan.c         |    4 -
 net/8021q/vlan.h         |   16 +++++--
 net/8021q/vlan_core.c    |    4 -
 net/8021q/vlan_dev.c     |   78 +++++++++++++++++++++----------------
 net/8021q/vlan_netlink.c |   20 ---------
 5 files changed, 59 insertions(+), 63 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 52077ca..2f54ce8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -272,13 +272,11 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
 	}
 
-	new_dev = alloc_netdev_mq(sizeof(struct vlan_dev_info), name,
-				  vlan_setup, real_dev->num_tx_queues);
+	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
 
 	if (new_dev == NULL)
 		return -ENOBUFS;
 
-	netif_copy_real_num_queues(new_dev, real_dev);
 	dev_net_set(new_dev, net);
 	/* need 4 bytes for extra VLAN header info,
 	 * hope the underlying device can handle it.
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index db01b31..a2dafb1 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -19,19 +19,25 @@ struct vlan_priority_tci_mapping {
 
 
 /**
- *	struct vlan_rx_stats - VLAN percpu rx stats
+ *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
  *	@rx_packets: number of received packets
  *	@rx_bytes: number of received bytes
  *	@rx_multicast: number of received multicast packets
+ *	@tx_packets: number of transmitted packets
+ *	@tx_bytes: number of transmitted bytes
  *	@syncp: synchronization point for 64bit counters
- *	@rx_errors: number of errors
+ *	@rx_errors: number of rx errors
+ *	@tx_dropped: number of tx drops
  */
-struct vlan_rx_stats {
+struct vlan_pcpu_stats {
 	u64			rx_packets;
 	u64			rx_bytes;
 	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
 	struct u64_stats_sync	syncp;
-	unsigned long		rx_errors;
+	u32			rx_errors;
+	u32			tx_dropped;
 };
 
 /**
@@ -64,7 +70,7 @@ struct vlan_dev_info {
 	struct proc_dir_entry			*dent;
 	unsigned long				cnt_inc_headroom_on_tx;
 	unsigned long				cnt_encap_on_xmit;
-	struct vlan_rx_stats __percpu		*vlan_rx_stats;
+	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
 };
 
 static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 69b2f79..ce8e3ab 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
 	struct sk_buff *skb = *skbp;
 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
 	struct net_device *vlan_dev;
-	struct vlan_rx_stats *rx_stats;
+	struct vlan_pcpu_stats *rx_stats;
 
 	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
 	if (!vlan_dev) {
@@ -26,7 +26,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats);
+	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 14e3d1f..3a67483 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -141,7 +141,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev)
 {
 	struct vlan_hdr *vhdr;
-	struct vlan_rx_stats *rx_stats;
+	struct vlan_pcpu_stats *rx_stats;
 	struct net_device *vlan_dev;
 	u16 vlan_id;
 	u16 vlan_tci;
@@ -177,7 +177,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	} else {
 		skb->dev = vlan_dev;
 
-		rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);
+		rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
 
 		u64_stats_update_begin(&rx_stats->syncp);
 		rx_stats->rx_packets++;
@@ -313,8 +313,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
-	int i = skb_get_queue_mapping(skb);
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 	unsigned int len;
 	int ret;
@@ -335,7 +333,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 		skb = __vlan_put_tag(skb, vlan_tci);
 		if (!skb) {
-			txq->tx_dropped++;
+			this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
 			return NETDEV_TX_OK;
 		}
 
@@ -349,19 +347,22 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	ret = dev_queue_xmit(skb);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
-		txq->tx_packets++;
-		txq->tx_bytes += len;
-	} else
-		txq->tx_dropped++;
+		struct vlan_pcpu_stats *stats;
 
+		stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
+		u64_stats_update_begin(&stats->syncp);
+		stats->tx_packets++;
+		stats->tx_bytes += len;
+		u64_stats_update_begin(&stats->syncp);
+	} else {
+		this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
+	}
 	return ret;
 }
 
 static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 						    struct net_device *dev)
 {
-	int i = skb_get_queue_mapping(skb);
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
 	u16 vlan_tci;
 	unsigned int len;
 	int ret;
@@ -375,10 +376,16 @@ static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 	ret = dev_queue_xmit(skb);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
-		txq->tx_packets++;
-		txq->tx_bytes += len;
-	} else
-		txq->tx_dropped++;
+		struct vlan_pcpu_stats *stats;
+
+		stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
+		u64_stats_update_begin(&stats->syncp);
+		stats->tx_packets++;
+		stats->tx_bytes += len;
+		u64_stats_update_begin(&stats->syncp);
+	} else {
+		this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
+	}
 
 	return ret;
 }
@@ -738,6 +745,7 @@ static int vlan_dev_init(struct net_device *dev)
 		      (1<<__LINK_STATE_PRESENT);
 
 	dev->features |= real_dev->features & real_dev->vlan_features;
+	dev->features |= NETIF_F_LLTX;
 	dev->gso_max_size = real_dev->gso_max_size;
 
 	/* ipv6 shared card related stuff */
@@ -773,8 +781,8 @@ static int vlan_dev_init(struct net_device *dev)
 
 	vlan_dev_set_lockdep_class(dev, subclass);
 
-	vlan_dev_info(dev)->vlan_rx_stats = alloc_percpu(struct vlan_rx_stats);
-	if (!vlan_dev_info(dev)->vlan_rx_stats)
+	vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
+	if (!vlan_dev_info(dev)->vlan_pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -786,8 +794,8 @@ static void vlan_dev_uninit(struct net_device *dev)
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	int i;
 
-	free_percpu(vlan->vlan_rx_stats);
-	vlan->vlan_rx_stats = NULL;
+	free_percpu(vlan->vlan_pcpu_stats);
+	vlan->vlan_pcpu_stats = NULL;
 	for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
 		while ((pm = vlan->egress_priority_map[i]) != NULL) {
 			vlan->egress_priority_map[i] = pm->next;
@@ -825,33 +833,37 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
-	dev_txq_stats_fold(dev, stats);
 
-	if (vlan_dev_info(dev)->vlan_rx_stats) {
-		struct vlan_rx_stats *p, accum = {0};
+	if (vlan_dev_info(dev)->vlan_pcpu_stats) {
+		struct vlan_pcpu_stats *p;
+		u32 rx_errors = 0, tx_dropped = 0;
 		int i;
 
 		for_each_possible_cpu(i) {
-			u64 rxpackets, rxbytes, rxmulticast;
+			u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
 			unsigned int start;
 
-			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i);
+			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i);
 			do {
 				start = u64_stats_fetch_begin_bh(&p->syncp);
 				rxpackets	= p->rx_packets;
 				rxbytes		= p->rx_bytes;
 				rxmulticast	= p->rx_multicast;
+				txpackets	= p->tx_packets;
+				txbytes		= p->tx_bytes;
 			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
-			accum.rx_packets += rxpackets;
-			accum.rx_bytes   += rxbytes;
-			accum.rx_multicast += rxmulticast;
-			/* rx_errors is ulong, not protected by syncp */
-			accum.rx_errors  += p->rx_errors;
+
+			stats->rx_packets	+= rxpackets;
+			stats->rx_bytes		+= rxbytes;
+			stats->multicast	+= rxmulticast;
+			stats->tx_packets	+= txpackets;
+			stats->tx_bytes		+= txbytes;
+			/* rx_errors & tx_dropped are u32 */
+			rx_errors	+= p->rx_errors;
+			tx_dropped	+= p->tx_dropped;
 		}
-		stats->rx_packets = accum.rx_packets;
-		stats->rx_bytes   = accum.rx_bytes;
-		stats->rx_errors  = accum.rx_errors;
-		stats->multicast  = accum.rx_multicast;
+		stats->rx_errors  = rx_errors;
+		stats->tx_dropped = tx_dropped;
 	}
 	return stats;
 }
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index ddc1057..be9a5c1 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -101,25 +101,6 @@ static int vlan_changelink(struct net_device *dev,
 	return 0;
 }
 
-static int vlan_get_tx_queues(struct net *net,
-			      struct nlattr *tb[],
-			      unsigned int *num_tx_queues,
-			      unsigned int *real_num_tx_queues)
-{
-	struct net_device *real_dev;
-
-	if (!tb[IFLA_LINK])
-		return -EINVAL;
-
-	real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
-	if (!real_dev)
-		return -ENODEV;
-
-	*num_tx_queues      = real_dev->num_tx_queues;
-	*real_num_tx_queues = real_dev->real_num_tx_queues;
-	return 0;
-}
-
 static int vlan_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
@@ -237,7 +218,6 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.maxtype	= IFLA_VLAN_MAX,
 	.policy		= vlan_policy,
 	.priv_size	= sizeof(struct vlan_dev_info),
-	.get_tx_queues  = vlan_get_tx_queues,
 	.setup		= vlan_setup,
 	.validate	= vlan_validate,
 	.newlink	= vlan_newlink,



^ permalink raw reply related

* Re: [PATCH 1/2] r6040: fix multicast operations
From: Shawn Lin @ 2010-11-11  8:39 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: netdev, Marc Leclerc, Albert Chen, David Miller, Ben Hutchings
In-Reply-To: <201011041104.24681.florian@openwrt.org>

The original code does not work well when the number of mulitcast
address to handle is greater than MCAST_MAX. It only enable promiscous
mode instead of multicast hash table mode, so the hash table function
will not be activated and all multicast frames will be recieved in this
condition.

This patch fixes the following issues with the r6040 NIC operating in
multicast:

1) When the IFF_ALLMULTI flag is set, we should write 0xffff to the NIC
hash table registers to make it process multicast traffic.

2) When the number of multicast address to handle is smaller than
MCAST_MAX, we should use the NIC multicast registers MID1_{L,M,H}.

3) The hashing of the address was not correct, due to an invalid
substraction (15 - (crc & 0x0f)) instead of (crc & 0x0f) and an
incorrect crc algorithm (ether_crc_le) instead of (ether_crc).

4) If necessary, we should set HASH_EN flag in MCR0 to enable multicast
hash table function.


The version is for net-next-2.6:

Reported-by: Marc Leclerc <marc-leclerc@signaturealpha.com>
Tested-by: Marc Leclerc <marc-leclerc@signaturealpha.com>
Signed-off-by: Shawn Lin <shawn@dmp.com.tw>
Signed-off-by: Albert Chen <albert.chen@rdc.com.tw>
---
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 0b014c8..e88e171 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -69,6 +69,8 @@
 
 /* MAC registers */
 #define MCR0           0x00    /* Control register 0 */
+#define  PROMISC       0x0020  /* Promiscuous mode */
+#define  HASH_EN       0x0100  /* Enable multicast hash table function
*/
 #define MCR1           0x04    /* Control register 1 */
 #define  MAC_RST       0x0001  /* Reset the MAC */
 #define MBCR           0x08    /* Bus control */
@@ -851,77 +853,84 @@ static void r6040_multicast_list(struct net_device
*dev)
 {
        struct r6040_private *lp = netdev_priv(dev);
        void __iomem *ioaddr = lp->base;
-       u16 *adrp;
-       u16 reg;
        unsigned long flags;
        struct netdev_hw_addr *ha;
        int i;
+       u16 hash_table[4] = { 0, };
 
-       /* MAC Address */
-       adrp = (u16 *)dev->dev_addr;
-       iowrite16(adrp[0], ioaddr + MID_0L);
-       iowrite16(adrp[1], ioaddr + MID_0M);
-       iowrite16(adrp[2], ioaddr + MID_0H);
-
-       /* Promiscous Mode */
        spin_lock_irqsave(&lp->lock, flags);
 
        /* Clear AMCP & PROM bits */
-       reg = ioread16(ioaddr) & ~0x0120;
+       lp->mcr0 = ioread16(ioaddr + MCR0) & ~(PROMISC | HASH_EN);
+
+       /* Promiscuous Mode */
        if (dev->flags & IFF_PROMISC) {
-               reg |= 0x0020;
-               lp->mcr0 |= 0x0020;
+               lp->mcr0 |= PROMISC;
        }
-       /* Too many multicast addresses
-        * accept all traffic */
-       else if ((netdev_mc_count(dev) > MCAST_MAX) ||
-                (dev->flags & IFF_ALLMULTI))
-               reg |= 0x0020;
-
-       iowrite16(reg, ioaddr);
-       spin_unlock_irqrestore(&lp->lock, flags);
+       /* Enable multicast hash table function to
+        * receive all multicast packets. */
+       else if (dev->flags & IFF_ALLMULTI) {
+               lp->mcr0 |= HASH_EN;
+
+               for (i = 0; i < MCAST_MAX ; i++) {
+                       iowrite16(0, ioaddr + MID_1L + 8 * i);
+                       iowrite16(0, ioaddr + MID_1M + 8 * i);
+                       iowrite16(0, ioaddr + MID_1H + 8 * i);
+               }
 
-       /* Build the hash table */
-       if (netdev_mc_count(dev) > MCAST_MAX) {
-               u16 hash_table[4];
+               for (i = 0; i < 4; i++)
+                       hash_table[i] = 0xffff;
+       }
+       /* Use internal multicast address registers if the number of
+        * multicast addresses is not greater than MCAST_MAX. */
+       else if (netdev_mc_count(dev) <= MCAST_MAX) {
+               i = 0;
+               netdev_for_each_mc_addr(ha, dev) {
+                       u16 *adrp = (u16 *) ha->addr;
+                       iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
+                       iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
+                       iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
+                       i++;
+               }
+               while (i < MCAST_MAX) {
+                       iowrite16(0, ioaddr + MID_1L + 8 * i);
+                       iowrite16(0, ioaddr + MID_1M + 8 * i);
+                       iowrite16(0, ioaddr + MID_1H + 8 * i);
+                       i++;
+               }
+       }
+       /* Otherwise, Enable multicast hash table function. */
+       else {
                u32 crc;
 
-               for (i = 0; i < 4; i++)
-                       hash_table[i] = 0;
+               lp->mcr0 |= HASH_EN;
 
-               netdev_for_each_mc_addr(ha, dev) {
-                       char *addrs = ha->addr;
+               for (i = 0; i < MCAST_MAX ; i++) {
+                       iowrite16(0, ioaddr + MID_1L + 8 * i);
+                       iowrite16(0, ioaddr + MID_1M + 8 * i);
+                       iowrite16(0, ioaddr + MID_1H + 8 * i);
+               }
 
-                       if (!(*addrs & 1))
-                               continue;
+               /* Build multicast hash table */
+               netdev_for_each_mc_addr(ha, dev) {
+                       u8 *addrs = ha->addr;
 
-                       crc = ether_crc_le(6, addrs);
+                       crc = ether_crc(ETH_ALEN, addrs);
                        crc >>= 26;
-                       hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf));
+                       hash_table[crc >> 4] |= 1 << (crc & 0xf);
                }
-               /* Fill the MAC hash tables with their values */
+       }
+       iowrite16(lp->mcr0, ioaddr + MCR0);
+
+       /* Fill the MAC hash tables with their values */
+       if (lp->mcr0 && HASH_EN) {
                iowrite16(hash_table[0], ioaddr + MAR0);
                iowrite16(hash_table[1], ioaddr + MAR1);
                iowrite16(hash_table[2], ioaddr + MAR2);
                iowrite16(hash_table[3], ioaddr + MAR3);
        }
-       /* Multicast Address 1~4 case */
-       i = 0;
-       netdev_for_each_mc_addr(ha, dev) {
-               if (i >= MCAST_MAX)
-                       break;
-               adrp = (u16 *) ha->addr;
-               iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
-               iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
-               iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
-               i++;
-       }
-       while (i < MCAST_MAX) {
-               iowrite16(0xffff, ioaddr + MID_1L + 8 * i);
-               iowrite16(0xffff, ioaddr + MID_1M + 8 * i);
-               iowrite16(0xffff, ioaddr + MID_1H + 8 * i);
-               i++;
-       }
+
+       spin_unlock_irqrestore(&lp->lock, flags);
 }
 
 static void netdev_get_drvinfo(struct net_device *dev,
---


The version is for 2.6.32.y and 2.6.27.y:

Reported-by: Marc Leclerc <marc-leclerc@signaturealpha.com>
Tested-by: Marc Leclerc <marc-leclerc@signaturealpha.com>
Signed-off-by: Shawn Lin <shawn@dmp.com.tw>
Signed-off-by: Albert Chen <albert.chen@rdc.com.tw>
---
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 9ee9f01..f9af419 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -69,6 +69,8 @@
 
 /* MAC registers */
 #define MCR0           0x00    /* Control register 0 */
+#define  PROMISC       0x0020  /* Promiscuous mode */
+#define  HASH_EN       0x0100  /* Enable multicast hash table function
*/
 #define MCR1           0x04    /* Control register 1 */
 #define  MAC_RST       0x0001  /* Reset the MAC */
 #define MBCR           0x08    /* Bus control */
@@ -935,76 +937,88 @@ static void r6040_multicast_list(struct net_device
*dev)
 {
        struct r6040_private *lp = netdev_priv(dev);
        void __iomem *ioaddr = lp->base;
-       u16 *adrp;
-       u16 reg;
        unsigned long flags;
        struct dev_mc_list *dmi = dev->mc_list;
        int i;
+       u16 hash_table[4] = { 0, };
 
-       /* MAC Address */
-       adrp = (u16 *)dev->dev_addr;
-       iowrite16(adrp[0], ioaddr + MID_0L);
-       iowrite16(adrp[1], ioaddr + MID_0M);
-       iowrite16(adrp[2], ioaddr + MID_0H);
-
-       /* Promiscous Mode */
        spin_lock_irqsave(&lp->lock, flags);
 
        /* Clear AMCP & PROM bits */
-       reg = ioread16(ioaddr) & ~0x0120;
+       lp->mcr0 = ioread16(ioaddr + MCR0) & ~(PROMISC | HASH_EN);
+
+       /* Promiscuous Mode */
        if (dev->flags & IFF_PROMISC) {
-               reg |= 0x0020;
-               lp->mcr0 |= 0x0020;
+               lp->mcr0 |= PROMISC;
        }
-       /* Too many multicast addresses
-        * accept all traffic */
-       else if ((dev->mc_count > MCAST_MAX)
-               || (dev->flags & IFF_ALLMULTI))
-               reg |= 0x0020;
+       /* Enable multicast hash table function to
+        * receive all multicast packets. */
+       else if (dev->flags & IFF_ALLMULTI) {
+               lp->mcr0 |= HASH_EN;
+
+               for (i = 0; i < MCAST_MAX ; i++) {
+                       iowrite16(0, ioaddr + MID_1L + 8 * i);
+                       iowrite16(0, ioaddr + MID_1M + 8 * i);
+                       iowrite16(0, ioaddr + MID_1H + 8 * i);
+               }
 
-       iowrite16(reg, ioaddr);
-       spin_unlock_irqrestore(&lp->lock, flags);
+               for (i = 0; i < 4; i++)
+                       hash_table[i] = 0xffff;
+       }
+       /* Use internal multicast address registers if the number of
+        * multicast addresses is not greater than MCAST_MAX. */
+       else if (dev->mc_count <= MCAST_MAX) {
+               i = 0;
+               while (i < dev->mc_count) {
+                       u16 *adrp = (u16 *) dmi->dmi_addr;
+                       dmi = dmi->next;
 
-       /* Build the hash table */
-       if (dev->mc_count > MCAST_MAX) {
-               u16 hash_table[4];
+                       iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
+                       iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
+                       iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
+                       i++;
+               }
+               while (i < MCAST_MAX) {
+                       iowrite16(0, ioaddr + MID_1L + 8 * i);
+                       iowrite16(0, ioaddr + MID_1M + 8 * i);
+                       iowrite16(0, ioaddr + MID_1H + 8 * i);
+                       i++;
+               }
+       }
+       /* Otherwise, Enable multicast hash table function. */
+       else {
                u32 crc;
 
-               for (i = 0; i < 4; i++)
-                       hash_table[i] = 0;
+               lp->mcr0 |= HASH_EN;
 
-               for (i = 0; i < dev->mc_count; i++) {
-                       char *addrs = dmi->dmi_addr;
+               for (i = 0; i < MCAST_MAX ; i++) {
+                       iowrite16(0, ioaddr + MID_1L + 8 * i);
+                       iowrite16(0, ioaddr + MID_1M + 8 * i);
+                       iowrite16(0, ioaddr + MID_1H + 8 * i);
+               }
 
+               /* Build multicast hash table */
+               for (i = 0; i < dev->mc_count; i++) {
+                       u8 *addrs = dmi->dmi_addr;
                        dmi = dmi->next;
 
-                       if (!(*addrs & 1))
-                               continue;
-
-                       crc = ether_crc_le(6, addrs);
+                       crc = ether_crc(ETH_ALEN, addrs);
                        crc >>= 26;
-                       hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf));
+                       hash_table[crc >> 4] |= 1 << (crc & 0xf);
                }
-               /* Fill the MAC hash tables with their values */
+
+       }
+       iowrite16(lp->mcr0, ioaddr + MCR0);
+
+       /* Fill the MAC hash tables with their values */
+       if (lp->mcr0 && HASH_EN) {
                iowrite16(hash_table[0], ioaddr + MAR0);
                iowrite16(hash_table[1], ioaddr + MAR1);
                iowrite16(hash_table[2], ioaddr + MAR2);
                iowrite16(hash_table[3], ioaddr + MAR3);
        }
-       /* Multicast Address 1~4 case */
-       dmi = dev->mc_list;
-       for (i = 0, dmi; (i < dev->mc_count) && (i < MCAST_MAX); i++) {
-               adrp = (u16 *)dmi->dmi_addr;
-               iowrite16(adrp[0], ioaddr + MID_1L + 8*i);
-               iowrite16(adrp[1], ioaddr + MID_1M + 8*i);
-               iowrite16(adrp[2], ioaddr + MID_1H + 8*i);
-               dmi = dmi->next;
-       }
-       for (i = dev->mc_count; i < MCAST_MAX; i++) {
-               iowrite16(0xffff, ioaddr + MID_1L + 8*i);
-               iowrite16(0xffff, ioaddr + MID_1M + 8*i);
-               iowrite16(0xffff, ioaddr + MID_1H + 8*i);
-       }
+
+       spin_unlock_irqrestore(&lp->lock, flags);
 }
 
 static void netdev_get_drvinfo(struct net_device *dev,
---



===========================================================================================
The privileged confidential information contained in this email is intended for use only by the addressees as indicated by the original sender of this email. 
If you are not the addressee indicated in this email or are not responsible for delivery of the email to such a person, please kindly reply to the sender indicating this fact and delete all copies of it from your computer and network server immediately. 
Your cooperation is highly appreciated. It is advised that any unauthorized use of confidential information of DM&P Group is strictly prohibited; and any information in this email irrelevant to the official business of DM&P Group shall be deemed as neither given nor endorsed by DM&P Group.

===========================================================================================

^ permalink raw reply related

* RE: [PATCH v15 00/17] Provide a zero-copy method on KVM virtio-net.
From: Xin, Xiaohui @ 2010-11-11  8:28 UTC (permalink / raw)
  To: David Miller
  Cc: netdev@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, mst@redhat.com, mingo@elte.hu,
	herbert@gondor.apana.org.au, jdike@linux.intel.com
In-Reply-To: <20101110.094641.193701683.davem@davemloft.net>

>-----Original Message-----
>From: David Miller [mailto:davem@davemloft.net]
>Sent: Thursday, November 11, 2010 1:47 AM
>To: Xin, Xiaohui
>Cc: netdev@vger.kernel.org; kvm@vger.kernel.org; linux-kernel@vger.kernel.org;
>mst@redhat.com; mingo@elte.hu; herbert@gondor.apana.org.au; jdike@linux.intel.com
>Subject: Re: [PATCH v15 00/17] Provide a zero-copy method on KVM virtio-net.
>
>From: xiaohui.xin@intel.com
>Date: Wed, 10 Nov 2010 17:23:28 +0800
>
>> From: Xin Xiaohui <xiaohui.xin@intel.com>
>>
>>>2) The idea to key off of skb->dev in skb_release_data() is
>>>   fundamentally flawed since many actions can change skb->dev on you,
>>>   which will end up causing a leak of your external data areas.
>>
>> How about this one? If the destructor_arg is not a good candidate,
>> then I have to add an apparent field in shinfo.
>
>If destructor_arg is actually a net_device pointer or similar,
>you will need to take a reference count on it or similar.
>
Do you mean destructor_arg will be consumed by other user?
If that case, may I add a new structure member in shinfo?
Thus only zero-copy will use it, and no need for the reference count.

>Which means --> good bye performance especially on SMP.
>
>You're going to be adding new serialization points and at
>least two new atomics per packet.

^ permalink raw reply

* Re: [PATCH] Enhance AF_PACKET implementation to not require high order contiguous memory allocation (v4)
From: Maciej Żenczykowski @ 2010-11-11  8:03 UTC (permalink / raw)
  To: nhorman; +Cc: netdev, davem, eric.dumazet
In-Reply-To: <1289416194-1844-1-git-send-email-nhorman@tuxdriver.com>

On Wed, Nov 10, 2010 at 11:09,  <nhorman@tuxdriver.com> wrote:
> From: Neil Horman <nhorman@tuxdriver.com>
>
> Version 4 of this patch.
>
> Change notes:
> 1) Removed extra memset.  Didn't think kcalloc added a GFP_ZERO the way kzalloc did :)
>
> Summary:
> It was shown to me recently that systems under high load were driven very deep
> into swap when tcpdump was run.  The reason this happened was because the
> AF_PACKET protocol has a SET_RINGBUFFER socket option that allows the user space
> application to specify how many entries an AF_PACKET socket will have and how
> large each entry will be.  It seems the default setting for tcpdump is to set
> the ring buffer to 32 entries of 64 Kb each, which implies 32 order 5
> allocation.  Thats difficult under good circumstances, and horrid under memory
> pressure.
>
> I thought it would be good to make that a bit more usable.  I was going to do a
> simple conversion of the ring buffer from contigous pages to iovecs, but
> unfortunately, the metadata which AF_PACKET places in these buffers can easily
> span a page boundary, and given that these buffers get mapped into user space,
> and the data layout doesn't easily allow for a change to padding between frames
> to avoid that, a simple iovec change is just going to break user space ABI
> consistency.
>
> So I've done this, I've added a three tiered mechanism to the af_packet set_ring
> socket option.  It attempts to allocate memory in the following order:
>
> 1) Using __get_free_pages with GFP_NORETRY set, so as to fail quickly without
> digging into swap
>
> 2) Using vmalloc
>
> 3) Using __get_free_pages with GFP_NORETRY clear, causing us to try as hard as
> needed to get the memory
>
> The effect is that we don't disturb the system as much when we're under load,
> while still being able to conduct tcpdumps effectively.
>
> Tested successfully by me.
>
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> ---
>  net/packet/af_packet.c |   84 ++++++++++++++++++++++++++++++++++++++---------
>  1 files changed, 68 insertions(+), 16 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 3616f27..a372390 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -163,8 +163,14 @@ struct packet_mreq_max {
>  static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
>                int closing, int tx_ring);
>
> +#define PGV_FROM_VMALLOC 1
> +struct pgv {
> +       char *buffer;
> +       unsigned char flags;
> +};
> +
>  struct packet_ring_buffer {
> -       char                    **pg_vec;
> +       struct pgv              *pg_vec;
>        unsigned int            head;
>        unsigned int            frames_per_block;
>        unsigned int            frame_size;
> @@ -283,7 +289,8 @@ static void *packet_lookup_frame(struct packet_sock *po,
>        pg_vec_pos = position / rb->frames_per_block;
>        frame_offset = position % rb->frames_per_block;
>
> -       h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
> +       h.raw = rb->pg_vec[pg_vec_pos].buffer +
> +               (frame_offset * rb->frame_size);
>
>        if (status != __packet_get_status(po, h.raw))
>                return NULL;
> @@ -2322,37 +2329,74 @@ static const struct vm_operations_struct packet_mmap_ops = {
>        .close  =       packet_mm_close,
>  };
>
> -static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
> +static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
> +                       unsigned int len)
>  {
>        int i;
>
>        for (i = 0; i < len; i++) {
> -               if (likely(pg_vec[i]))
> -                       free_pages((unsigned long) pg_vec[i], order);
> +               if (likely(pg_vec[i].buffer)) {
> +                       if (pg_vec[i].flags & PGV_FROM_VMALLOC)
> +                               vfree(pg_vec[i].buffer);
> +                       else
> +                               free_pages((unsigned long)pg_vec[i].buffer,
> +                                          order);
> +                       pg_vec[i].buffer = NULL;
> +               }
>        }
>        kfree(pg_vec);
>  }
>
> -static inline char *alloc_one_pg_vec_page(unsigned long order)
> +static inline char *alloc_one_pg_vec_page(unsigned long order,
> +                                         unsigned char *flags)
>  {
> -       gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
> +       char *buffer = NULL;
> +       gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
> +                         __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
> +
> +       buffer = (char *) __get_free_pages(gfp_flags, order);
> +
> +       if (buffer)
> +               return buffer;
> +
> +       /*
> +        * __get_free_pages failed, fall back to vmalloc
> +        */
> +       *flags |= PGV_FROM_VMALLOC;
> +       buffer = vmalloc((1 << order) * PAGE_SIZE);
>
> -       return (char *) __get_free_pages(gfp_flags, order);
> +       if (buffer)
> +               return buffer;
> +
> +       /*
> +        * vmalloc failed, lets dig into swap here
> +        */
> +       *flags = 0;
> +       gfp_flags &= ~__GFP_NORETRY;
> +       buffer = (char *)__get_free_pages(gfp_flags, order);
> +       if (buffer)
> +               return buffer;
> +
> +       /*
> +        * complete and utter failure
> +        */
> +       return NULL;
>  }
>
> -static char **alloc_pg_vec(struct tpacket_req *req, int order)
> +static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
>  {
>        unsigned int block_nr = req->tp_block_nr;
> -       char **pg_vec;
> +       struct pgv *pg_vec;
>        int i;
>
> -       pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
> +       pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
>        if (unlikely(!pg_vec))
>                goto out;
>
>        for (i = 0; i < block_nr; i++) {
> -               pg_vec[i] = alloc_one_pg_vec_page(order);
> -               if (unlikely(!pg_vec[i]))
> +               pg_vec[i].buffer = alloc_one_pg_vec_page(order,
> +                                                        &pg_vec[i].flags);
> +               if (unlikely(!pg_vec[i].buffer))
>                        goto out_free_pgvec;
>        }
>
> @@ -2361,6 +2405,7 @@ out:
>
>  out_free_pgvec:
>        free_pg_vec(pg_vec, order, block_nr);
> +       kfree(pg_vec);
>        pg_vec = NULL;
>        goto out;
>  }
> @@ -2368,7 +2413,7 @@ out_free_pgvec:
>  static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
>                int closing, int tx_ring)
>  {
> -       char **pg_vec = NULL;
> +       struct pgv *pg_vec = NULL;
>        struct packet_sock *po = pkt_sk(sk);
>        int was_running, order = 0;
>        struct packet_ring_buffer *rb;
> @@ -2530,15 +2575,22 @@ static int packet_mmap(struct file *file, struct socket *sock,
>                        continue;
>
>                for (i = 0; i < rb->pg_vec_len; i++) {
> -                       struct page *page = virt_to_page(rb->pg_vec[i]);
> +                       struct page *page;
> +                       void *kaddr = rb->pg_vec[i].buffer;
>                        int pg_num;
>
>                        for (pg_num = 0; pg_num < rb->pg_vec_pages;
> -                                       pg_num++, page++) {
> +                                       pg_num++) {
> +                               if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC)
> +                                       page = vmalloc_to_page(kaddr);
> +                               else
> +                                       page = virt_to_page(kaddr);
> +
>                                err = vm_insert_page(vma, start, page);
>                                if (unlikely(err))
>                                        goto out;
>                                start += PAGE_SIZE;
> +                               kaddr += PAGE_SIZE;
>                        }
>                }
>        }
> --
> 1.7.2.3
>
>
Acked-by: Maciej Żenczykowski <zenczykowski@gmail.com>
Reported-by: Maciej Żenczykowski <zenczykowski@gmail.com>

Nice to see this.

-- Maciej

^ permalink raw reply

* [PATCH net-next-2.6 v2] macvlan: lockless tx path
From: Eric Dumazet @ 2010-11-11  7:14 UTC (permalink / raw)
  To: David Miller; +Cc: Patrick McHardy, netdev, Ben Greear, Ben Hutchings
In-Reply-To: <1289411027.2860.248.camel@edumazet-laptop>

macvlan is a stacked device, like tunnels. We should use the lockless
mechanism we are using in tunnels and loopback.

This patch completely removes locking in TX path.

tx stat counters are added into existing percpu stat structure, renamed
from rx_stats to pcpu_stats.

Note : this reverts commit 2c11455321f37 (macvlan: add multiqueue
capability)

Note : rx_errors converted to a 32bit counter, like tx_dropped, since
they dont need 64bit range.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Ben Greear <greearb@candelatech.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
---
V2: correct kerneldoc
    u32 for tx_dropped and rx_errors

 drivers/net/macvlan.c      |   80 +++++++++++++++--------------------
 include/linux/if_macvlan.h |   34 ++++++++------
 2 files changed, 55 insertions(+), 59 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 0fc9dc7..93f0ba2 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -243,18 +243,22 @@ xmit_world:
 netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 			       struct net_device *dev)
 {
-	int i = skb_get_queue_mapping(skb);
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
 	unsigned int len = skb->len;
 	int ret;
+	const struct macvlan_dev *vlan = netdev_priv(dev);
 
 	ret = macvlan_queue_xmit(skb, dev);
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
-		txq->tx_packets++;
-		txq->tx_bytes += len;
-	} else
-		txq->tx_dropped++;
+		struct macvlan_pcpu_stats *pcpu_stats;
 
+		pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
+	} else {
+		this_cpu_inc(vlan->pcpu_stats->tx_dropped);
+	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(macvlan_start_xmit);
@@ -414,14 +418,15 @@ static int macvlan_init(struct net_device *dev)
 	dev->state		= (dev->state & ~MACVLAN_STATE_MASK) |
 				  (lowerdev->state & MACVLAN_STATE_MASK);
 	dev->features 		= lowerdev->features & MACVLAN_FEATURES;
+	dev->features		|= NETIF_F_LLTX;
 	dev->gso_max_size	= lowerdev->gso_max_size;
 	dev->iflink		= lowerdev->ifindex;
 	dev->hard_header_len	= lowerdev->hard_header_len;
 
 	macvlan_set_lockdep_class(dev);
 
-	vlan->rx_stats = alloc_percpu(struct macvlan_rx_stats);
-	if (!vlan->rx_stats)
+	vlan->pcpu_stats = alloc_percpu(struct macvlan_pcpu_stats);
+	if (!vlan->pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -431,7 +436,7 @@ static void macvlan_uninit(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	free_percpu(vlan->rx_stats);
+	free_percpu(vlan->pcpu_stats);
 }
 
 static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
@@ -439,33 +444,38 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	dev_txq_stats_fold(dev, stats);
-
-	if (vlan->rx_stats) {
-		struct macvlan_rx_stats *p, accum = {0};
-		u64 rx_packets, rx_bytes, rx_multicast;
+	if (vlan->pcpu_stats) {
+		struct macvlan_pcpu_stats *p;
+		u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes;
+		u32 rx_errors = 0, tx_dropped = 0;
 		unsigned int start;
 		int i;
 
 		for_each_possible_cpu(i) {
-			p = per_cpu_ptr(vlan->rx_stats, i);
+			p = per_cpu_ptr(vlan->pcpu_stats, i);
 			do {
 				start = u64_stats_fetch_begin_bh(&p->syncp);
 				rx_packets	= p->rx_packets;
 				rx_bytes	= p->rx_bytes;
 				rx_multicast	= p->rx_multicast;
+				tx_packets	= p->tx_packets;
+				tx_bytes	= p->tx_bytes;
 			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
-			accum.rx_packets	+= rx_packets;
-			accum.rx_bytes		+= rx_bytes;
-			accum.rx_multicast	+= rx_multicast;
-			/* rx_errors is an ulong, updated without syncp protection */
-			accum.rx_errors		+= p->rx_errors;
+
+			stats->rx_packets	+= rx_packets;
+			stats->rx_bytes		+= rx_bytes;
+			stats->multicast	+= rx_multicast;
+			stats->tx_packets	+= tx_packets;
+			stats->tx_bytes		+= tx_bytes;
+			/* rx_errors & tx_dropped are u32, updated
+			 * without syncp protection.
+			 */
+			rx_errors	+= p->rx_errors;
+			tx_dropped	+= p->tx_dropped;
 		}
-		stats->rx_packets = accum.rx_packets;
-		stats->rx_bytes   = accum.rx_bytes;
-		stats->rx_errors  = accum.rx_errors;
-		stats->rx_dropped = accum.rx_errors;
-		stats->multicast  = accum.rx_multicast;
+		stats->rx_errors	= rx_errors;
+		stats->rx_dropped	= rx_errors;
+		stats->tx_dropped	= tx_dropped;
 	}
 	return stats;
 }
@@ -601,25 +611,6 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
-static int macvlan_get_tx_queues(struct net *net,
-				 struct nlattr *tb[],
-				 unsigned int *num_tx_queues,
-				 unsigned int *real_num_tx_queues)
-{
-	struct net_device *real_dev;
-
-	if (!tb[IFLA_LINK])
-		return -EINVAL;
-
-	real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
-	if (!real_dev)
-		return -ENODEV;
-
-	*num_tx_queues      = real_dev->num_tx_queues;
-	*real_num_tx_queues = real_dev->real_num_tx_queues;
-	return 0;
-}
-
 int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[],
 			   int (*receive)(struct sk_buff *skb),
@@ -743,7 +734,6 @@ int macvlan_link_register(struct rtnl_link_ops *ops)
 {
 	/* common fields */
 	ops->priv_size		= sizeof(struct macvlan_dev);
-	ops->get_tx_queues	= macvlan_get_tx_queues;
 	ops->validate		= macvlan_validate;
 	ops->maxtype		= IFLA_MACVLAN_MAX;
 	ops->policy		= macvlan_policy;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 8a2fd66..e28b2e4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -25,19 +25,25 @@ struct macvlan_port;
 struct macvtap_queue;
 
 /**
- *	struct macvlan_rx_stats - MACVLAN percpu rx stats
+ *	struct macvlan_pcpu_stats - MACVLAN percpu stats
  *	@rx_packets: number of received packets
  *	@rx_bytes: number of received bytes
  *	@rx_multicast: number of received multicast packets
+ *	@tx_packets: number of transmitted packets
+ *	@tx_bytes: number of transmitted bytes
  *	@syncp: synchronization point for 64bit counters
- *	@rx_errors: number of errors
+ *	@rx_errors: number of rx errors
+ *	@tx_dropped: number of tx dropped packets
  */
-struct macvlan_rx_stats {
+struct macvlan_pcpu_stats {
 	u64			rx_packets;
 	u64			rx_bytes;
 	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
 	struct u64_stats_sync	syncp;
-	unsigned long		rx_errors;
+	u32			rx_errors;
+	u32			tx_dropped;
 };
 
 /*
@@ -52,7 +58,7 @@ struct macvlan_dev {
 	struct hlist_node	hlist;
 	struct macvlan_port	*port;
 	struct net_device	*lowerdev;
-	struct macvlan_rx_stats __percpu *rx_stats;
+	struct macvlan_pcpu_stats __percpu *pcpu_stats;
 	enum macvlan_mode	mode;
 	int (*receive)(struct sk_buff *skb);
 	int (*forward)(struct net_device *dev, struct sk_buff *skb);
@@ -64,18 +70,18 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
 				    unsigned int len, bool success,
 				    bool multicast)
 {
-	struct macvlan_rx_stats *rx_stats;
-
-	rx_stats = this_cpu_ptr(vlan->rx_stats);
 	if (likely(success)) {
-		u64_stats_update_begin(&rx_stats->syncp);
-		rx_stats->rx_packets++;;
-		rx_stats->rx_bytes += len;
+		struct macvlan_pcpu_stats *pcpu_stats;
+
+		pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->rx_packets++;
+		pcpu_stats->rx_bytes += len;
 		if (multicast)
-			rx_stats->rx_multicast++;
-		u64_stats_update_end(&rx_stats->syncp);
+			pcpu_stats->rx_multicast++;
+		u64_stats_update_end(&pcpu_stats->syncp);
 	} else {
-		rx_stats->rx_errors++;
+		this_cpu_inc(vlan->pcpu_stats->rx_errors);
 	}
 }
 



^ permalink raw reply related

* Re: [PATCH] macvlan: lockless tx path
From: Eric Dumazet @ 2010-11-11  7:03 UTC (permalink / raw)
  To: Ben Greear; +Cc: netdev
In-Reply-To: <4CDB2EBC.2090905@candelatech.com>

Le mercredi 10 novembre 2010 à 15:46 -0800, Ben Greear a écrit :
> On 11/10/2010 03:36 PM, Eric Dumazet wrote:
> > Le mercredi 10 novembre 2010 à 14:53 -0800, Ben Greear a écrit :
> >
> >> I did similar, and then wrote extra code to detect a 64-bit kernel and if
> >> so assume that the counters wrap at 64 bits so I didn't have to poll so
> >> often to make sure I didn't miss a wrap for a 10G NIC.  If instead one wraps at 33
> >> bits and the other at 36, there is no way for me to deal with the wrap
> >> properly w/out explicitly knowing about that 33 and 36.
> >>
> >
> > How do you define 'wrap around' ? Maybe your definition is wrong.
> 
> Maybe so.  My algorithm looks like:
> 
>   // uint64 accum;
>   // uint32 old;
>   // uint32 new;
>   if (old > new) {
>       // This assumes counters wrap at 32 bits (ie, 0xFFFFFFFF).
>       accum += ((uint32)(0xFFFFFFFF) - old) + new;
>   }
>   else if (old < new) {
>       accum += new - old;
>   }
>   old = new;
> 
> ...
> 
> Is there some way I can do this w/out the (0xFFFFFFFF - old),
> and thus the assumption of 32-bit counters?
> 

Yes, please take a look at RRD for an example, then you can adapt to
your needs.

<quote>

http://www.mrtg.org/rrdtool/tut/rrdtutorial.en.html

At the time of writing this document, RRDtool knows of counters that are
either 32 bits or 64 bits of size. These counters can handle the
following different values:

 - 32 bits: 0 ..           4294967295
 - 64 bits: 0 .. 18446744073709551615

If these numbers look strange to you, you can view them in their
hexadecimal form:

 - 32 bits: 0 ..         FFFFFFFF
 - 64 bits: 0 .. FFFFFFFFFFFFFFFF

RRDtool handles both counters the same. If an overflow occurs and the
delta would be negative, RRDtool first adds the maximum of a small
counter + 1 to the delta. If the delta is still negative, it had to be
the large counter that wrapped. Add the maximum possible value of the
large counter + 1 and subtract the erroneously added small value.

There is a risk in this: suppose the large counter wrapped while adding
a huge delta, it could happen, theoretically, that adding the smaller
value would make the delta positive. In this unlikely case the results
would not be correct. The increase should be nearly as high as the
maximum counter value for that to happen, so chances are you would have
several other problems as well and this particular problem would not
even be worth thinking about. Even though, I did include an example, so
you can judge for yourself.

The next section gives you some numerical examples for counter-wraps.
Try to do the calculations yourself or just believe me if your
calculator can't handle the numbers :)

Correction numbers:

 - 32 bits: (4294967295 + 1) =                                4294967296
 - 64 bits: (18446744073709551615 + 1)
                                    - correction1 = 18446744069414584320

 Before:        4294967200
 Increase:                100
 Should become: 4294967300
 But really is:             4
 Delta:        -4294967196
 Correction1:  -4294967196 + 4294967296 = 100

 Before:        18446744073709551000
 Increase:                             800
 Should become: 18446744073709551800
 But really is:                        184
 Delta:        -18446744073709550816
 Correction1:  -18446744073709550816
                                + 4294967296 = -18446744069414583520
 Correction2:  -18446744069414583520
                   + 18446744069414584320 = 800

 Before:        18446744073709551615 ( maximum value )
 Increase:      18446744069414584320 ( absurd increase, minimum for
 Should become: 36893488143124135935             this example to work )
 But really is: 18446744069414584319
 Delta:                     -4294967296
 Correction1:  -4294967296 + 4294967296 = 0
 (not negative -> no correction2)

 Before:        18446744073709551615 ( maximum value )
 Increase:      18446744069414584319 ( one less increase )
 Should become: 36893488143124135934
 But really is: 18446744069414584318
 Delta:                     -4294967297
 Correction1:  -4294967297 + 4294967296 = -1
 Correction2:  -1 + 18446744069414584320 = 18446744069414584319

As you can see from the last two examples, you need strange numbers for
RRDtool to fail (provided it's bug free of course), so this should not
happen. However, SNMP or whatever method you choose to collect the data,
might also report wrong numbers occasionally. We can't prevent all
errors, but there are some things we can do. The RRDtool "create"
command takes two special parameters for this. They define the minimum
and maximum allowed values. Until now, we used "U", meaning "unknown".
If you provide values for one or both of them and if RRDtool receives
data points that are outside these limits, it will ignore those values.
For a thermometer in degrees Celsius, the absolute minimum is just under
-273. For my router, I can assume this minimum is much higher so I would
set it to 10, where as the maximum temperature I would set to 80. Any
higher and the device would be out of order.

For the speed of my car, I would never expect negative numbers and also
I would not expect a speed higher than 230. Anything else, and there
must have been an error. Remember: the opposite is not true, if the
numbers pass this check, it doesn't mean that they are correct. Always
judge the graph with a healthy dose of suspicion if it seems weird to
you.

http://www.mrtg.org/rrdtool/doc/rrdcreate.en.html

COUNTER

    is for continuous incrementing counters like the ifInOctets counter
in a router. The COUNTER data source assumes that the counter never
decreases, except when a counter overflows. The update function takes
the overflow into account. The counter is stored as a per-second rate.
When the counter overflows, RRDtool checks if the overflow happened at
the 32bit or 64bit border and acts accordingly by adding an appropriate
value to the result.

</quote>



^ permalink raw reply

* Re: [PATCH] Enhance AF_PACKET implementation to not require high order contiguous memory allocation (v4)
From: Eric Dumazet @ 2010-11-11  6:29 UTC (permalink / raw)
  To: nhorman; +Cc: netdev, davem, zenczykowski
In-Reply-To: <1289416194-1844-1-git-send-email-nhorman@tuxdriver.com>

Le mercredi 10 novembre 2010 à 14:09 -0500, nhorman@tuxdriver.com a
écrit :
> From: Neil Horman <nhorman@tuxdriver.com>
> 
> Version 4 of this patch.
> 
> Change notes:
> 1) Removed extra memset.  Didn't think kcalloc added a GFP_ZERO the way kzalloc did :)
> 
> Summary:
> It was shown to me recently that systems under high load were driven very deep
> into swap when tcpdump was run.  The reason this happened was because the
> AF_PACKET protocol has a SET_RINGBUFFER socket option that allows the user space
> application to specify how many entries an AF_PACKET socket will have and how
> large each entry will be.  It seems the default setting for tcpdump is to set
> the ring buffer to 32 entries of 64 Kb each, which implies 32 order 5
> allocation.  Thats difficult under good circumstances, and horrid under memory
> pressure.
> 
> I thought it would be good to make that a bit more usable.  I was going to do a
> simple conversion of the ring buffer from contigous pages to iovecs, but
> unfortunately, the metadata which AF_PACKET places in these buffers can easily
> span a page boundary, and given that these buffers get mapped into user space,
> and the data layout doesn't easily allow for a change to padding between frames
> to avoid that, a simple iovec change is just going to break user space ABI
> consistency.
> 
> So I've done this, I've added a three tiered mechanism to the af_packet set_ring
> socket option.  It attempts to allocate memory in the following order:
> 
> 1) Using __get_free_pages with GFP_NORETRY set, so as to fail quickly without
> digging into swap
> 
> 2) Using vmalloc
> 
> 3) Using __get_free_pages with GFP_NORETRY clear, causing us to try as hard as
> needed to get the memory
> 
> The effect is that we don't disturb the system as much when we're under load,
> while still being able to conduct tcpdumps effectively.
> 
> Tested successfully by me.
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Thanks Neil !



^ permalink raw reply

* Re: pull request: wireless-2.6 2010-11-10
From: David Miller @ 2010-11-11  6:16 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20101110185843.GC2714-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>

From: "John W. Linville" <linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Date: Wed, 10 Nov 2010 13:58:43 -0500

> Here is a batch of fixes intended for 2.6.37.  Also mixed-in are a few
> device ID updates.  I think the changelog entries are reasonably
> documentary of the issues being fixed, so I won't belabor them. ;-)
> 
> This also includes a round of Bluetooth fixes from Gustavo:
> 
> "The following batch contains some bugfixes for 2.6.37. A fix for unaligned
> access in L2CAP, a Kconfig error, two fixes related to security of
> the Bluetooth links, and support for the a MacBook Air Bluetooth device.
> There is also a one line patch from Matthew Garret, that enables USB
> autosuspend for btusb module, which shall be completely safe. Please
> pull, thanks."

Pulled, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH 1/3] dccp ccid-2: Ack Vector interface clean-up
From: Gerrit Renker @ 2010-11-11  6:07 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev, Gerrit Renker
In-Reply-To: <1289455653-5463-1-git-send-email-gerrit@erg.abdn.ac.uk>

This patch brings the Ack Vector interface up to date. Its main purpose is
to lay the basis for the subsequent patches of this set, which will use the
new data structure fields and routines.

There are no real algorithmic changes, rather an adaptation:

 (1) Replaced the static Ack Vector size (2) with a #define so that it can
     be adapted (with low loss / Ack Ratio, a value of 1 works, so 2 seems
     to be sufficient for the moment) and added a solution so that computing
     the ECN nonce will continue to work - even with larger Ack Vectors.

 (2) Replaced the #defines for Ack Vector states with a complete enum.

 (3) Replaced #defines to compute Ack Vector length and state with general
     purpose routines (inlines), and updated code to use these.

 (4) Added a `tail' field (conversion to circular buffer in subsequent patch).

 (5) Updated the (outdated) documentation for Ack Vector struct.

 (6) All sequence number containers now trimmed to 48 bits.

 (7) Removal of unused bits:
     * removed dccpav_ack_nonce from struct dccp_ackvec, since this is already
       redundantly stored in the `dccpavr_ack_nonce' (of Ack Vector record);
     * removed Elapsed Time for Ack Vectors (it was nowhere used);
     * replaced semantics of dccpavr_sent_len with dccpavr_ack_runlen, since
       the code needs to be able to remember the old run length;
     * reduced the de-/allocation routines (redundant / duplicate tests).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.h      |  103 +++++++++++++++-------------
 net/dccp/ackvec.c      |  178 +++++++++++++++++-------------------------------
 net/dccp/ccids/ccid2.c |   13 ++--
 net/dccp/input.c       |    6 +-
 net/dccp/options.c     |    1 +
 5 files changed, 128 insertions(+), 173 deletions(-)

--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -3,9 +3,9 @@
 /*
  *  net/dccp/ackvec.h
  *
- *  An implementation of the DCCP protocol
+ *  An implementation of Ack Vectors for the DCCP protocol
+ *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
- *
  *	This program is free software; you can redistribute it and/or modify it
  *	under the terms of the GNU General Public License version 2 as
  *	published by the Free Software Foundation.
@@ -13,75 +13,84 @@
 
 #include <linux/dccp.h>
 #include <linux/compiler.h>
-#include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
-/* We can spread an ack vector across multiple options */
-#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
+/*
+ * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN,
+ * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
+ * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
+ * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
+ */
+#define DCCPAV_NUM_ACKVECS	2
+#define DCCPAV_MAX_ACKVEC_LEN	(DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
 
 /* Estimated minimum average Ack Vector length - used for updating MPS */
 #define DCCPAV_MIN_OPTLEN	16
 
-#define DCCP_ACKVEC_STATE_RECEIVED	0
-#define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
-#define DCCP_ACKVEC_STATE_NOT_RECEIVED	(3 << 6)
+enum dccp_ackvec_states {
+	DCCPAV_RECEIVED =	0x00,
+	DCCPAV_ECN_MARKED =	0x40,
+	DCCPAV_RESERVED =	0x80,
+	DCCPAV_NOT_RECEIVED =	0xC0
+};
+#define DCCPAV_MAX_RUNLEN	0x3F
 
-#define DCCP_ACKVEC_STATE_MASK		0xC0 /* 11000000 */
-#define DCCP_ACKVEC_LEN_MASK		0x3F /* 00111111 */
+static inline u8 dccp_ackvec_runlen(const u8 *cell)
+{
+	return *cell & DCCPAV_MAX_RUNLEN;
+}
 
-/** struct dccp_ackvec - ack vector
- *
- * This data structure is the one defined in RFC 4340, Appendix A.
- *
- * @av_buf_head - circular buffer head
- * @av_buf_tail - circular buffer tail
- * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
- *		       buffer (i.e. %av_buf_head)
- * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
- * 		       by the buffer with State 0
- *
- * Additionally, the HC-Receiver must keep some information about the
- * Ack Vectors it has recently sent. For each packet sent carrying an
- * Ack Vector, it remembers four variables:
+static inline u8 dccp_ackvec_state(const u8 *cell)
+{
+	return *cell & ~DCCPAV_MAX_RUNLEN;
+}
+
+/** struct dccp_ackvec - Ack Vector main data structure
  *
- * @av_records - list of dccp_ackvec_record
- * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ * This implements a fixed-size circular buffer within an array and is largely
+ * based on Appendix A of RFC 4340.
  *
- * @av_time - the time in usecs
- * @av_buf - circular buffer of acknowledgeable packets
+ * @av_buf:	   circular buffer storage area
+ * @av_buf_head:   head index; begin of live portion in @av_buf
+ * @av_buf_tail:   tail index; first index _after_ the live portion in @av_buf
+ * @av_buf_ackno:  highest seqno of acknowledgeable packet recorded in @av_buf
+ * @av_buf_nonce:  ECN nonce sums, each covering subsequent segments of up to
+ *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
+ * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
+ * @av_veclen:	   length of the live portion of @av_buf
  */
 struct dccp_ackvec {
-	u64			av_buf_ackno;
-	struct list_head	av_records;
-	ktime_t			av_time;
+	u8			av_buf[DCCPAV_MAX_ACKVEC_LEN];
 	u16			av_buf_head;
+	u16			av_buf_tail;
+	u64			av_buf_ackno:48;
+	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
+	struct list_head	av_records;
 	u16			av_vec_len;
-	u8			av_buf_nonce;
-	u8			av_ack_nonce;
-	u8			av_buf[DCCP_MAX_ACKVEC_LEN];
 };
 
-/** struct dccp_ackvec_record - ack vector record
+/** struct dccp_ackvec_record - Records information about sent Ack Vectors
  *
- * ACK vector record as defined in Appendix A of spec.
+ * These list entries define the additional information which the HC-Receiver
+ * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
  *
- * The list is sorted by avr_ack_seqno
+ * @avr_node:	    the list node in @av_records
+ * @avr_ack_seqno:  sequence number of the packet the Ack Vector was sent on
+ * @avr_ack_ackno:  the Ack number that this record/Ack Vector refers to
+ * @avr_ack_ptr:    pointer into @av_buf where this record starts
+ * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending
+ * @avr_ack_nonce:  the sum of @av_buf_nonce's at the time this record was sent
  *
- * @avr_node - node in av_records
- * @avr_ack_seqno - sequence number of the packet this record was sent on
- * @avr_ack_ackno - sequence number being acknowledged
- * @avr_ack_ptr - pointer into av_buf where this record starts
- * @avr_ack_nonce - av_ack_nonce at the time this record was sent
- * @avr_sent_len - lenght of the record in av_buf
+ * The list as a whole is sorted in descending order by @avr_ack_seqno.
  */
 struct dccp_ackvec_record {
 	struct list_head avr_node;
-	u64		 avr_ack_seqno;
-	u64		 avr_ack_ackno;
+	u64		 avr_ack_seqno:48;
+	u64		 avr_ack_ackno:48;
 	u16		 avr_ack_ptr;
-	u16		 avr_sent_len;
-	u8		 avr_ack_nonce;
+	u8		 avr_ack_runlen;
+	u8		 avr_ack_nonce:1;
 };
 
 struct sock;
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -1,7 +1,8 @@
 /*
  *  net/dccp/ackvec.c
  *
- *  An implementation of the DCCP protocol
+ *  An implementation of Ack Vectors for the DCCP protocol
+ *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
  *
  *      This program is free software; you can redistribute it and/or modify it
@@ -23,24 +24,32 @@
 static struct kmem_cache *dccp_ackvec_slab;
 static struct kmem_cache *dccp_ackvec_record_slab;
 
-static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
+struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 {
-	struct dccp_ackvec_record *avr =
-			kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
+	struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
+
+	if (av != NULL) {
+		av->av_buf_head	= DCCPAV_MAX_ACKVEC_LEN - 1;
+		INIT_LIST_HEAD(&av->av_records);
+	}
+	return av;
+}
 
-	if (avr != NULL)
-		INIT_LIST_HEAD(&avr->avr_node);
+static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
+{
+	struct dccp_ackvec_record *cur, *next;
 
-	return avr;
+	list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
+		kmem_cache_free(dccp_ackvec_record_slab, cur);
+	INIT_LIST_HEAD(&av->av_records);
 }
 
-static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
+void dccp_ackvec_free(struct dccp_ackvec *av)
 {
-	if (unlikely(avr == NULL))
-		return;
-	/* Check if deleting a linked record */
-	WARN_ON(!list_empty(&avr->avr_node));
-	kmem_cache_free(dccp_ackvec_record_slab, avr);
+	if (likely(av != NULL)) {
+		dccp_ackvec_purge_records(av);
+		kmem_cache_free(dccp_ackvec_slab, av);
+	}
 }
 
 static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
@@ -68,24 +77,16 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
 	/* Figure out how many options do we need to represent the ackvec */
 	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = av->av_vec_len + 2 * nr_opts, i;
-	u32 elapsed_time;
+	u16 len = av->av_vec_len + 2 * nr_opts;
+	u8 i, nonce = 0;
 	const unsigned char *tail, *from;
 	unsigned char *to;
 	struct dccp_ackvec_record *avr;
-	suseconds_t delta;
 
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
 		return -1;
 
-	delta = ktime_us_delta(ktime_get_real(), av->av_time);
-	elapsed_time = delta / 10;
-
-	if (elapsed_time != 0 &&
-	    dccp_insert_option_elapsed_time(skb, elapsed_time))
-		return -1;
-
-	avr = dccp_ackvec_record_new();
+	avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
 	if (avr == NULL)
 		return -1;
 
@@ -94,7 +95,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	to   = skb_push(skb, len);
 	len  = av->av_vec_len;
 	from = av->av_buf + av->av_buf_head;
-	tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
+	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 
 	for (i = 0; i < nr_opts; ++i) {
 		int copylen = len;
@@ -102,7 +103,13 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 		if (len > DCCP_SINGLE_OPT_MAXLEN)
 			copylen = DCCP_SINGLE_OPT_MAXLEN;
 
-		*to++ = DCCPO_ACK_VECTOR_0;
+		/*
+		 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
+		 * its type; ack_nonce is the sum of all individual buf_nonce's.
+		 */
+		nonce ^= av->av_buf_nonce[i];
+
+		*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
 		*to++ = copylen + 2;
 
 		/* Check if buf_head wraps */
@@ -123,75 +130,24 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/*
-	 *	From RFC 4340, A.2:
-	 *
-	 *	For each acknowledgement it sends, the HC-Receiver will add an
-	 *	acknowledgement record.  ack_seqno will equal the HC-Receiver
-	 *	sequence number it used for the ack packet; ack_ptr will equal
-	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
-	 *	equal buf_nonce.
+	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
 	 */
-	avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-	avr->avr_ack_ptr   = av->av_buf_head;
-	avr->avr_ack_ackno = av->av_buf_ackno;
-	avr->avr_ack_nonce = av->av_buf_nonce;
-	avr->avr_sent_len  = av->av_vec_len;
+	avr->avr_ack_seqno  = DCCP_SKB_CB(skb)->dccpd_seq;
+	avr->avr_ack_ptr    = av->av_buf_head;
+	avr->avr_ack_ackno  = av->av_buf_ackno;
+	avr->avr_ack_nonce  = nonce;
+	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
 
 	dccp_ackvec_insert_avr(av, avr);
 
 	dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
 		      "ack_ackno=%llu\n",
-		      dccp_role(sk), avr->avr_sent_len,
+		      dccp_role(sk), avr->avr_ack_runlen,
 		      (unsigned long long)avr->avr_ack_seqno,
 		      (unsigned long long)avr->avr_ack_ackno);
 	return 0;
 }
 
-struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
-{
-	struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
-
-	if (av != NULL) {
-		av->av_buf_head	 = DCCP_MAX_ACKVEC_LEN - 1;
-		av->av_buf_ackno = UINT48_MAX + 1;
-		av->av_buf_nonce = 0;
-		av->av_time	 = ktime_set(0, 0);
-		av->av_vec_len	 = 0;
-		INIT_LIST_HEAD(&av->av_records);
-	}
-
-	return av;
-}
-
-void dccp_ackvec_free(struct dccp_ackvec *av)
-{
-	if (unlikely(av == NULL))
-		return;
-
-	if (!list_empty(&av->av_records)) {
-		struct dccp_ackvec_record *avr, *next;
-
-		list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
-			list_del_init(&avr->avr_node);
-			dccp_ackvec_record_delete(avr);
-		}
-	}
-
-	kmem_cache_free(dccp_ackvec_slab, av);
-}
-
-static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
-				   const u32 index)
-{
-	return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
-}
-
-static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
-				 const u32 index)
-{
-	return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
-}
-
 /*
  * If several packets are missing, the HC-Receiver may prefer to enter multiple
  * bytes with run length 0, rather than a single byte with a larger run length;
@@ -204,7 +160,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 	long gap;
 	long new_head;
 
-	if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
+	if (av->av_vec_len + packets > DCCPAV_MAX_ACKVEC_LEN)
 		return -ENOBUFS;
 
 	gap	 = packets - 1;
@@ -212,18 +168,18 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 
 	if (new_head < 0) {
 		if (gap > 0) {
-			memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
+			memset(av->av_buf, DCCPAV_NOT_RECEIVED,
 			       gap + new_head + 1);
 			gap = -new_head;
 		}
-		new_head += DCCP_MAX_ACKVEC_LEN;
+		new_head += DCCPAV_MAX_ACKVEC_LEN;
 	}
 
 	av->av_buf_head = new_head;
 
 	if (gap > 0)
 		memset(av->av_buf + av->av_buf_head + 1,
-		       DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
+		       DCCPAV_NOT_RECEIVED, gap);
 
 	av->av_buf[av->av_buf_head] = state;
 	av->av_vec_len += packets;
@@ -236,6 +192,8 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		    const u64 ackno, const u8 state)
 {
+	u8 *cur_head = av->av_buf + av->av_buf_head,
+	   *buf_end  = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 	/*
 	 * Check at the right places if the buffer is full, if it is, tell the
 	 * caller to start dropping packets till the HC-Sender acks our ACK
@@ -260,7 +218,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 
 	/* See if this is the first ackno being inserted */
 	if (av->av_vec_len == 0) {
-		av->av_buf[av->av_buf_head] = state;
+		*cur_head = state;
 		av->av_vec_len = 1;
 	} else if (after48(ackno, av->av_buf_ackno)) {
 		const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
@@ -269,10 +227,9 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 * Look if the state of this packet is the same as the
 		 * previous ackno and if so if we can bump the head len.
 		 */
-		if (delta == 1 &&
-		    dccp_ackvec_state(av, av->av_buf_head) == state &&
-		    dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
-			av->av_buf[av->av_buf_head]++;
+		if (delta == 1 && dccp_ackvec_state(cur_head) == state &&
+		    dccp_ackvec_runlen(cur_head) < DCCPAV_MAX_RUNLEN)
+			*cur_head += 1;
 		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
 			return -ENOBUFS;
 	} else {
@@ -285,21 +242,17 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 *	could reduce the complexity of this scan.)
 		 */
 		u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
-		u32 index = av->av_buf_head;
 
 		while (1) {
-			const u8 len = dccp_ackvec_len(av, index);
-			const u8 av_state = dccp_ackvec_state(av, index);
+			const u8 len = dccp_ackvec_runlen(cur_head);
 			/*
 			 * valid packets not yet in av_buf have a reserved
 			 * entry, with a len equal to 0.
 			 */
-			if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
-			    len == 0 && delta == 0) { /* Found our
-							 reserved seat! */
+			if (*cur_head == DCCPAV_NOT_RECEIVED && delta == 0) {
 				dccp_pr_debug("Found %llu reserved seat!\n",
 					      (unsigned long long)ackno);
-				av->av_buf[index] = state;
+				*cur_head = state;
 				goto out;
 			}
 			/* len == 0 means one packet */
@@ -307,13 +260,12 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 				goto out_duplicate;
 
 			delta -= len + 1;
-			if (++index == DCCP_MAX_ACKVEC_LEN)
-				index = 0;
+			if (++cur_head == buf_end)
+				cur_head = av->av_buf;
 		}
 	}
 
 	av->av_buf_ackno = ackno;
-	av->av_time = ktime_get_real();
 out:
 	return 0;
 
@@ -333,13 +285,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
 	if (av->av_buf_head <= avr->avr_ack_ptr)
 		av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
 	else
-		av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
+		av->av_vec_len = DCCPAV_MAX_ACKVEC_LEN - 1 -
 				 av->av_buf_head + avr->avr_ack_ptr;
 
 	/* free records */
 	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
-		list_del_init(&avr->avr_node);
-		dccp_ackvec_record_delete(avr);
+		list_del(&avr->avr_node);
+		kmem_cache_free(dccp_ackvec_record_slab, avr);
 	}
 }
 
@@ -357,7 +309,7 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
 		if (ackno == avr->avr_ack_seqno) {
 			dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
 				      "ack_ackno=%llu, ACKED!\n",
-				      dccp_role(sk), 1,
+				      dccp_role(sk), avr->avr_ack_runlen,
 				      (unsigned long long)avr->avr_ack_seqno,
 				      (unsigned long long)avr->avr_ack_ackno);
 			dccp_ackvec_throw_record(av, avr);
@@ -387,7 +339,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 	 */
 	avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
 	while (i--) {
-		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+		const u8 rl = dccp_ackvec_runlen(vector);
 		u64 ackno_end_rl;
 
 		dccp_set_seqno(&ackno_end_rl, *ackno - rl);
@@ -404,8 +356,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 		break;
 found:
 		if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
-			const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
-			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
+			if (dccp_ackvec_state(vector) != DCCPAV_NOT_RECEIVED) {
 				dccp_pr_debug("%s ACK vector 0, len=%d, "
 					      "ack_seqno=%llu, ack_ackno=%llu, "
 					      "ACKED!\n",
@@ -448,10 +399,9 @@ int __init dccp_ackvec_init(void)
 	if (dccp_ackvec_slab == NULL)
 		goto out_err;
 
-	dccp_ackvec_record_slab =
-			kmem_cache_create("dccp_ackvec_record",
-					  sizeof(struct dccp_ackvec_record),
-					  0, SLAB_HWCACHE_ALIGN, NULL);
+	dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
+					     sizeof(struct dccp_ackvec_record),
+					     0, SLAB_HWCACHE_ALIGN, NULL);
 	if (dccp_ackvec_record_slab == NULL)
 		goto out_destroy_slab;
 
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -513,8 +513,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 					 &vector, &veclen)) != -1) {
 		/* go through this ack vector */
 		while (veclen--) {
-			const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
-			u64 ackno_end_rl = SUB48(ackno, rl);
+			u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector));
 
 			ccid2_pr_debug("ackvec start:%llu end:%llu\n",
 				       (unsigned long long)ackno,
@@ -537,17 +536,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			 * run length
 			 */
 			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
-				const u8 state = *vector &
-						 DCCP_ACKVEC_STATE_MASK;
+				const u8 state = dccp_ackvec_state(vector);
 
 				/* new packet received or marked */
-				if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+				if (state != DCCPAV_NOT_RECEIVED &&
 				    !seqp->ccid2s_acked) {
-					if (state ==
-					    DCCP_ACKVEC_STATE_ECN_MARKED) {
+					if (state == DCCPAV_ECN_MARKED)
 						ccid2_congestion_event(sk,
 								       seqp);
-					} else
+					else
 						ccid2_new_ack(sk, seqp,
 							      &maxincr);
 
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -378,8 +378,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 	if (dp->dccps_hc_rx_ackvec != NULL &&
 	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-			    DCCP_SKB_CB(skb)->dccpd_seq,
-			    DCCP_ACKVEC_STATE_RECEIVED))
+			    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
 		goto discard;
 	dccp_deliver_input_to_ccids(sk, skb);
 
@@ -637,8 +636,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (dp->dccps_hc_rx_ackvec != NULL &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-				    DCCP_SKB_CB(skb)->dccpd_seq,
-				    DCCP_ACKVEC_STATE_RECEIVED))
+				    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
 			goto discard;
 
 		dccp_deliver_input_to_ccids(sk, skb);
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -340,6 +340,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
 	return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
 }
 
+/* FIXME: This function is currently not used anywhere */
 int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
 {
 	const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);

^ permalink raw reply

* [PATCH 3/3] dccp ccid-2: Implementation of circular Ack Vector buffer with overflow handling
From: Gerrit Renker @ 2010-11-11  6:07 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev, Gerrit Renker
In-Reply-To: <1289455653-5463-3-git-send-email-gerrit@erg.abdn.ac.uk>

This completes the implementation of a circular buffer for Ack Vectors, by
extending the current (linear array-based) implementation.  The changes are:

 (a) An `overflow' flag to deal with the case of overflow. As before, dynamic
     growth of the buffer will not be supported; but code will be added to deal
     robustly with overflowing Ack Vector buffers.

 (b) A `tail_seqno' field. When naively implementing the algorithm of Appendix A
     in RFC 4340, problems arise whenever subsequent Ack Vector records overlap,
     which can bring the entire run length calculation completely out of synch.
     (This is documented on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/\
                                             ack_vectors/tracking_tail_ackno/ .)
 (c) The buffer length is now computed dynamically (i.e. current fill level),
     as the span between head to tail.

As a result, dccp_ackvec_pending() is now simpler - the #ifdef is no longer
necessary since buf_empty is always true when IP_DCCP_ACKVEC is not configured.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.h  |   10 ++++++++--
 net/dccp/ackvec.c  |   31 ++++++++++++++++++++++++++++++-
 net/dccp/dccp.h    |   11 +++++++----
 net/dccp/options.c |   10 +++++-----
 4 files changed, 50 insertions(+), 12 deletions(-)

--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -21,6 +21,7 @@
  * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
  * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
  * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
+ * The maximum value is bounded by the u16 types for indices and functions.
  */
 #define DCCPAV_NUM_ACKVECS	2
 #define DCCPAV_MAX_ACKVEC_LEN	(DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
@@ -55,8 +56,10 @@ static inline u8 dccp_ackvec_state(const u8 *cell)
  * @av_buf_head:   head index; begin of live portion in @av_buf
  * @av_buf_tail:   tail index; first index _after_ the live portion in @av_buf
  * @av_buf_ackno:  highest seqno of acknowledgeable packet recorded in @av_buf
+ * @av_tail_ackno: lowest  seqno of acknowledgeable packet recorded in @av_buf
  * @av_buf_nonce:  ECN nonce sums, each covering subsequent segments of up to
  *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
+ * @av_overflow:   if 1 then buf_head == buf_tail indicates buffer wraparound
  * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
  * @av_veclen:	   length of the live portion of @av_buf
  */
@@ -65,7 +68,9 @@ struct dccp_ackvec {
 	u16			av_buf_head;
 	u16			av_buf_tail;
 	u64			av_buf_ackno:48;
+	u64			av_tail_ackno:48;
 	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
+	u8			av_overflow:1;
 	struct list_head	av_records;
 	u16			av_vec_len;
 };
@@ -112,9 +117,10 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     const u8 *value, const u8 len);
 
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
+extern u16  dccp_ackvec_buflen(const struct dccp_ackvec *av);
 
-static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
 {
-	return av->av_vec_len;
+	return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
 }
 #endif /* _ACKVEC_H */
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -29,7 +29,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 	struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
 
 	if (av != NULL) {
-		av->av_buf_head	= DCCPAV_MAX_ACKVEC_LEN - 1;
+		av->av_buf_head	= av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
 		INIT_LIST_HEAD(&av->av_records);
 	}
 	return av;
@@ -72,6 +72,14 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 	avr->avr_ack_nonce  = nonce_sum;
 	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
 	/*
+	 * When the buffer overflows, we keep no more than one record. This is
+	 * the simplest way of disambiguating sender-Acks dating from before the
+	 * overflow from sender-Acks which refer to after the overflow; a simple
+	 * solution is preferable here since we are handling an exception.
+	 */
+	if (av->av_overflow)
+		dccp_ackvec_purge_records(av);
+	/*
 	 * Since GSS is incremented for each packet, the list is automatically
 	 * arranged in descending order of @ack_seqno.
 	 */
@@ -85,6 +93,27 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 }
 
 /*
+ * Buffer index and length computation using modulo-buffersize arithmetic.
+ * Note that, as pointers move from right to left, head is `before' tail.
+ */
+static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
+{
+	return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
+}
+
+static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
+{
+	return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
+}
+
+u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
+{
+	if (unlikely(av->av_overflow))
+		return DCCPAV_MAX_ACKVEC_LEN;
+	return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
+}
+
+/*
  * If several packets are missing, the HC-Receiver may prefer to enter multiple
  * bytes with run length 0, rather than a single byte with a larger run length;
  * this simplifies table updates if one of the missing packets arrives.
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -457,12 +457,15 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
 	dp->dccps_awh = dp->dccps_gss;
 }
 
+static inline int dccp_ackvec_pending(const struct sock *sk)
+{
+	return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL &&
+	       !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec);
+}
+
 static inline int dccp_ack_pending(const struct sock *sk)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	return (dp->dccps_hc_rx_ackvec != NULL &&
-		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
-	       inet_csk_ack_scheduled(sk);
+	return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
 }
 
 extern int  dccp_feat_finalise_settings(struct dccp_sock *dp);
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -429,9 +429,10 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	const u16 buflen = dccp_ackvec_buflen(av);
 	/* Figure out how many options do we need to represent the ackvec */
-	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = av->av_vec_len + 2 * nr_opts;
+	const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
+	u16 len = buflen + 2 * nr_opts;
 	u8 i, nonce = 0;
 	const unsigned char *tail, *from;
 	unsigned char *to;
@@ -442,7 +443,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
 
 	to   = skb_push(skb, len);
-	len  = av->av_vec_len;
+	len  = buflen;
 	from = av->av_buf + av->av_buf_head;
 	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 
@@ -580,8 +581,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 			if (dccp_insert_option_timestamp(skb))
 				return -1;
 
-		} else if (dp->dccps_hc_rx_ackvec != NULL &&
-			   dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+		} else if (dccp_ackvec_pending(sk) &&
 			   dccp_insert_option_ackvec(sk, skb)) {
 				return -1;
 		}

^ permalink raw reply

* net-next-2.6 [PATCH 0/3] dccp: Ack Vectors in circular buffer instead of array
From: Gerrit Renker @ 2010-11-11  6:07 UTC (permalink / raw)
  To: davem; +Cc: dccp, netdev
In-Reply-To: <ack_vector_circular_buffer_dccp>

Dave,

please can you consider this set, which is part I of a two-part patch set
to fix issues in the DCCP Ack Vector implementation.

It is a self-contained set to move away from a linear array to a circular
buffer with overflow handling. Part II would then follow next week.


 Patch #1: cleans up the old interface to prepare for the improved one.
 Patch #2: also tidies up the old interface, by separating the internals
           of Ack Vectors from the option-parsing code.
 Patch #3: Completes the implementation of a circular Ack Vector buffer.


I have also placed this in into a fresh (today's) copy of net-next-2.6, on

    git://eden-feed.erg.abdn.ac.uk/net-next-2.6        [subtree 'dccp']

The set has been tested for 3 years, and is fully bisectable.
---
 net/dccp/ackvec.c      |  251 ++++++++++++++++--------------------------------
 net/dccp/ackvec.h      |  115 +++++++++++++----------
 net/dccp/ccids/ccid2.c |   13 +--
 net/dccp/dccp.h        |   11 ++-
 net/dccp/input.c       |    6 +-
 net/dccp/options.c     |   65 ++++++++++++-
 6 files changed, 225 insertions(+), 236 deletions(-)

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox