Netdev List
 help / color / mirror / Atom feed
* [net-next 4/6] ixgbe: allow eeprom writes via ethtool
From: Jeff Kirsher @ 2011-10-17 12:21 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318854062-3628-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Implement support for ethtool -E

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c   |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   71 ++++++++++++++++++++++
 2 files changed, 73 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index e02e911..ef2afef 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -1305,6 +1305,8 @@ static struct ixgbe_mac_operations mac_ops_82598 = {
 static struct ixgbe_eeprom_operations eeprom_ops_82598 = {
 	.init_params		= &ixgbe_init_eeprom_params_generic,
 	.read			= &ixgbe_read_eerd_generic,
+	.write			= &ixgbe_write_eeprom_generic,
+	.write_buffer		= &ixgbe_write_eeprom_buffer_bit_bang_generic,
 	.read_buffer		= &ixgbe_read_eerd_buffer_generic,
 	.calc_checksum          = &ixgbe_calc_eeprom_checksum_generic,
 	.validate_checksum	= &ixgbe_validate_eeprom_checksum_generic,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index e102ff6..7acfce3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -814,6 +814,76 @@ static int ixgbe_get_eeprom(struct net_device *netdev,
 	return ret_val;
 }
 
+static int ixgbe_set_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	void *ptr;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EINVAL;
+
+	max_len = hw->eeprom.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/*
+		 * need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = hw->eeprom.ops.read(hw, first_word, &eeprom_buff[0]);
+		if (ret_val)
+			goto err;
+
+		ptr++;
+	}
+	if ((eeprom->offset + eeprom->len) & 1) {
+		/*
+		 * need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = hw->eeprom.ops.read(hw, last_word,
+					  &eeprom_buff[last_word - first_word]);
+		if (ret_val)
+			goto err;
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = hw->eeprom.ops.write_buffer(hw, first_word,
+					      last_word - first_word + 1,
+					      eeprom_buff);
+
+	/* Update the checksum */
+	if (ret_val == 0)
+		hw->eeprom.ops.update_checksum(hw);
+
+err:
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
 static void ixgbe_get_drvinfo(struct net_device *netdev,
                               struct ethtool_drvinfo *drvinfo)
 {
@@ -2524,6 +2594,7 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_link               = ethtool_op_get_link,
 	.get_eeprom_len         = ixgbe_get_eeprom_len,
 	.get_eeprom             = ixgbe_get_eeprom,
+	.set_eeprom             = ixgbe_set_eeprom,
 	.get_ringparam          = ixgbe_get_ringparam,
 	.set_ringparam          = ixgbe_set_ringparam,
 	.get_pauseparam         = ixgbe_get_pauseparam,
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 3/6] ixgbe: fix endianess when writing driver version to firmware
From: Jeff Kirsher @ 2011-10-17 12:20 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318854062-3628-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

This patch makes sure that register writes are in little endian and
also converts the reads back to big-endian.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 35fa444..834f044 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3341,7 +3341,7 @@ static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
  *  Communicates with the manageability block.  On success return 0
  *  else return IXGBE_ERR_HOST_INTERFACE_COMMAND.
  **/
-static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
+static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
 					u32 length)
 {
 	u32 hicr, i;
@@ -3374,7 +3374,7 @@ static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
 	 */
 	for (i = 0; i < dword_len; i++)
 		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
-				      i, *((u32 *)buffer + i));
+				      i, cpu_to_le32(buffer[i]));
 
 	/* Setting this bit tells the ARC that a new command is pending. */
 	IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
@@ -3398,9 +3398,10 @@ static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
 	dword_len = hdr_size >> 2;
 
 	/* first pull in the header so we know the buffer length */
-	for (i = 0; i < dword_len; i++)
-		*((u32 *)buffer + i) =
-			IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+	for (i = 0; i < dword_len; i++) {
+		buffer[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+		le32_to_cpus(&buffer[i]);
+	}
 
 	/* If there is any thing in data position pull it in */
 	buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len;
@@ -3418,8 +3419,7 @@ static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
 
 	/* Pull in the rest of the buffer (i is where we left off)*/
 	for (; i < buf_len; i++)
-		*((u32 *)buffer + i) =
-			IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+		buffer[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
 
 out:
 	return ret_val;
@@ -3465,7 +3465,7 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
 	fw_cmd.pad2 = 0;
 
 	for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
-		ret_val = ixgbe_host_interface_command(hw, (u8 *)&fw_cmd,
+		ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
 						       sizeof(fw_cmd));
 		if (ret_val != 0)
 			continue;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 2/6 v2] igb: Check if subordinate VFs are assigned to virtual machines
From: Jeff Kirsher @ 2011-10-17 12:20 UTC (permalink / raw)
  To: davem
  Cc: Greg Rose, netdev, gospo, sassmann, Konrad Rzeszutek Wilk,
	Christian Benvenuti, Sathya Perla, Dimitris Michailidis,
	Jon Mason, James Smart, Jeff Kirsher
In-Reply-To: <1318854062-3628-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

Kvm and the Xen pci-back driver will set a flag in the virtual function
pci device dev_flags when the VF is assigned to a guest VM.  Before
destroying subordinate VFs check to see if the flag is set and if so
skip the call to pci_disable_sriov() to avoid system crashes.

Copy the maintainer for the Xen pci-back driver.  Also CC'ing
maintainers of all drivers found to call pci_disable_sriov().

V2 - Fix  uninitialized variable warning

Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Christian Benvenuti <benve@cisco.com>
Cc: Sathya Perla <sathya.perla@emulex.com>
Cc: Dimitris Michailidis <dm@chelsio.com>
Cc: Jon Mason <jdmason@kudzu.us>
Cc: James Smart <james.smart@emulex.com>
Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |    3 +
 drivers/net/ethernet/intel/igb/igb_main.c |  177 ++++++++++++++++++++++++-----
 2 files changed, 150 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 4c500a7..5594430 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -72,6 +72,8 @@ struct igb_adapter;
 #define IGB_MAX_VF_MC_ENTRIES              30
 #define IGB_MAX_VF_FUNCTIONS               8
 #define IGB_MAX_VFTA_ENTRIES               128
+#define IGB_82576_VF_DEV_ID                0x10CA
+#define IGB_I350_VF_DEV_ID                 0x1520
 
 struct vf_data_storage {
 	unsigned char vf_mac_addresses[ETH_ALEN];
@@ -83,6 +85,7 @@ struct vf_data_storage {
 	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
 	u16 pf_qos;
 	u16 tx_rate;
+	struct pci_dev *vfdev;
 };
 
 #define IGB_VF_FLAG_CTS            0x00000001 /* VF is clear to send data */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index c10cc71..837adbb 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -162,6 +162,9 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 				 struct ifla_vf_info *ivi);
 static void igb_check_vf_rate_limit(struct igb_adapter *);
+static int igb_vf_configure(struct igb_adapter *adapter, int vf);
+static int igb_find_enabled_vfs(struct igb_adapter *adapter);
+static int igb_check_vf_assignment(struct igb_adapter *adapter);
 
 #ifdef CONFIG_PM
 static int igb_suspend(struct pci_dev *, pm_message_t);
@@ -2232,8 +2235,12 @@ static void __devexit igb_remove(struct pci_dev *pdev)
 	/* reclaim resources allocated to VFs */
 	if (adapter->vf_data) {
 		/* disable iov and allow time for transactions to clear */
-		pci_disable_sriov(pdev);
-		msleep(500);
+		if (!igb_check_vf_assignment(adapter)) {
+			pci_disable_sriov(pdev);
+			msleep(500);
+		} else {
+			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
+		}
 
 		kfree(adapter->vf_data);
 		adapter->vf_data = NULL;
@@ -2270,42 +2277,49 @@ static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
 {
 #ifdef CONFIG_PCI_IOV
 	struct pci_dev *pdev = adapter->pdev;
+	int old_vfs = igb_find_enabled_vfs(adapter);
+	int i;
 
-	if (adapter->vfs_allocated_count) {
-		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
-		                           sizeof(struct vf_data_storage),
-		                           GFP_KERNEL);
-		/* if allocation failed then we do not support SR-IOV */
-		if (!adapter->vf_data) {
-			adapter->vfs_allocated_count = 0;
-			dev_err(&pdev->dev, "Unable to allocate memory for VF "
-			        "Data Storage\n");
-		}
+	if (old_vfs) {
+		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
+			 "max_vfs setting of %d\n", old_vfs, max_vfs);
+		adapter->vfs_allocated_count = old_vfs;
 	}
 
-	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
-		kfree(adapter->vf_data);
-		adapter->vf_data = NULL;
-#endif /* CONFIG_PCI_IOV */
+	if (!adapter->vfs_allocated_count)
+		return;
+
+	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
+				sizeof(struct vf_data_storage), GFP_KERNEL);
+	/* if allocation failed then we do not support SR-IOV */
+	if (!adapter->vf_data) {
 		adapter->vfs_allocated_count = 0;
-#ifdef CONFIG_PCI_IOV
-	} else {
-		unsigned char mac_addr[ETH_ALEN];
-		int i;
-		dev_info(&pdev->dev, "%d vfs allocated\n",
-		         adapter->vfs_allocated_count);
-		for (i = 0; i < adapter->vfs_allocated_count; i++) {
-			random_ether_addr(mac_addr);
-			igb_set_vf_mac(adapter, i, mac_addr);
-		}
-		/* DMA Coalescing is not supported in IOV mode. */
-		if (adapter->flags & IGB_FLAG_DMAC)
-			adapter->flags &= ~IGB_FLAG_DMAC;
+		dev_err(&pdev->dev, "Unable to allocate memory for VF "
+			"Data Storage\n");
+		goto out;
 	}
+
+	if (!old_vfs) {
+		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
+			goto err_out;
+	}
+	dev_info(&pdev->dev, "%d VFs allocated\n",
+		 adapter->vfs_allocated_count);
+	for (i = 0; i < adapter->vfs_allocated_count; i++)
+		igb_vf_configure(adapter, i);
+
+	/* DMA Coalescing is not supported in IOV mode. */
+	adapter->flags &= ~IGB_FLAG_DMAC;
+	goto out;
+err_out:
+	kfree(adapter->vf_data);
+	adapter->vf_data = NULL;
+	adapter->vfs_allocated_count = 0;
+out:
+	return;
 #endif /* CONFIG_PCI_IOV */
 }
 
-
 /**
  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
  * @adapter: board private structure to initialize
@@ -4917,6 +4931,109 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
 }
 #endif /* CONFIG_IGB_DCA */
 
+#ifdef CONFIG_PCI_IOV
+static int igb_vf_configure(struct igb_adapter *adapter, int vf)
+{
+	unsigned char mac_addr[ETH_ALEN];
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pvfdev;
+	unsigned int device_id;
+	u16 thisvf_devfn;
+
+	random_ether_addr(mac_addr);
+	igb_set_vf_mac(adapter, vf, mac_addr);
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		device_id = IGB_82576_VF_DEV_ID;
+		/* VF Stride for 82576 is 2 */
+		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
+			(pdev->devfn & 1);
+		break;
+	case e1000_i350:
+		device_id = IGB_I350_VF_DEV_ID;
+		/* VF Stride for I350 is 4 */
+		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
+				(pdev->devfn & 3);
+		break;
+	default:
+		device_id = 0;
+		thisvf_devfn = 0;
+		break;
+	}
+
+	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
+	while (pvfdev) {
+		if (pvfdev->devfn == thisvf_devfn)
+			break;
+		pvfdev = pci_get_device(hw->vendor_id,
+					device_id, pvfdev);
+	}
+
+	if (pvfdev)
+		adapter->vf_data[vf].vfdev = pvfdev;
+	else
+		dev_err(&pdev->dev,
+			"Couldn't find pci dev ptr for VF %4.4x\n",
+			thisvf_devfn);
+	return pvfdev != NULL;
+}
+
+static int igb_find_enabled_vfs(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	struct pci_dev *pvfdev;
+	u16 vf_devfn = 0;
+	u16 vf_stride;
+	unsigned int device_id;
+	int vfs_found = 0;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		device_id = IGB_82576_VF_DEV_ID;
+		/* VF Stride for 82576 is 2 */
+		vf_stride = 2;
+		break;
+	case e1000_i350:
+		device_id = IGB_I350_VF_DEV_ID;
+		/* VF Stride for I350 is 4 */
+		vf_stride = 4;
+		break;
+	default:
+		device_id = 0;
+		vf_stride = 0;
+		break;
+	}
+
+	vf_devfn = pdev->devfn + 0x80;
+	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
+	while (pvfdev) {
+		if (pvfdev->devfn == vf_devfn)
+			vfs_found++;
+		vf_devfn += vf_stride;
+		pvfdev = pci_get_device(hw->vendor_id,
+					device_id, pvfdev);
+	}
+
+	return vfs_found;
+}
+
+static int igb_check_vf_assignment(struct igb_adapter *adapter)
+{
+	int i;
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		if (adapter->vf_data[i].vfdev) {
+			if (adapter->vf_data[i].vfdev->dev_flags &
+			    PCI_DEV_FLAGS_ASSIGNED)
+				return true;
+		}
+	}
+	return false;
+}
+
+#endif
 static void igb_ping_all_vfs(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 1/6] igbvf: Fix trunk vlan
From: Jeff Kirsher @ 2011-10-17 12:20 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jiri Pirko, Jeff Kirsher
In-Reply-To: <1318854062-3628-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

Changes to clean up the VLAN Rx path by Jiri Pirko broke trunk VLAN.
Trunk VLANs in a VF driver are those set using

"ip link set <pfdev> vf <n> <vlanid>"

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
CC: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igbvf/netdev.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 32b3044..23cc40f 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -102,8 +102,8 @@ static void igbvf_receive_skb(struct igbvf_adapter *adapter,
 {
 	if (status & E1000_RXD_STAT_VP) {
 		u16 vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
-
-		__vlan_hwaccel_put_tag(skb, vid);
+		if (test_bit(vid, adapter->active_vlans))
+			__vlan_hwaccel_put_tag(skb, vid);
 	}
 	netif_receive_skb(skb);
 }
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 0/6 v2][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2011-10-17 12:20 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

The following series contains updates to ixgbe, igbvf and igb.
This version of the series contains the following changes:

- igb fix/add check if subordinate VFs are assigned to VM's
- igbvf fix for trunk VLAN
- ixgbe 2 fixes for ethtool and 1 endianess fix

-v2 update the igb patch to resolve a variable initialization warning

The following are changes since commit fd38f734cb8200529e281338514945fcbff2364b:
  igbvf: convert to ndo_fix_features
and are available in the git repository at
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next.git

Emil Tantilov (3):
  ixgbe: fix endianess when writing driver version to firmware
  ixgbe: allow eeprom writes via ethtool
  ixgbe: change the eeprom version reported by ethtool

Greg Rose (2):
  igbvf: Fix trunk vlan
  igb: Check if subordinate VFs are assigned to virtual machines

Jacob Keller (1):
  ixgbe: add hardware timestamping support

 drivers/net/ethernet/intel/igb/igb.h             |    3 +
 drivers/net/ethernet/intel/igb/igb_main.c        |  177 +++++++--
 drivers/net/ethernet/intel/igbvf/netdev.c        |    4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         |   24 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c   |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c  |   16 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   84 ++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  452 +++++++++++++++++++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h    |   32 ++
 9 files changed, 743 insertions(+), 51 deletions(-)

-- 
1.7.6.4

^ permalink raw reply

* Re: [PATCH 1/4] ipv4: Fix pmtu propagating
From: Steffen Klassert @ 2011-10-17 12:18 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20111014055406.GP1830@secunet.com>

On Fri, Oct 14, 2011 at 07:54:06AM +0200, Steffen Klassert wrote:
> On Thu, Oct 13, 2011 at 01:58:08PM -0400, David Miller wrote:
> > 
> > Please find out exactly why dst->obsolete is non-zero on a freshly
> > looked up route.  It's unexpected.
> 
> Hm, on a slow path route lookup e.g. __mkroute_output() calls
> rt_dst_alloc() which initializes dst->obsolete to -1.

Just a follow up:
git commit d11a4dc18 (ipv4: check rt_genid in dst_check)
changed the initialization value of dst->obsolete from
0 to -1.

^ permalink raw reply

* Re: [net-next 2/6] igb: Check if subordinate VFs are assigned to virtual machines
From: Jeff Kirsher @ 2011-10-17 11:54 UTC (permalink / raw)
  To: davem@davemloft.net
  Cc: Rose, Gregory V, netdev@vger.kernel.org, gospo@redhat.com,
	sassmann@redhat.com, Konrad Rzeszutek Wilk, Christian Benvenuti,
	Sathya Perla, Dimitris Michailidis, Jon Mason, James Smart
In-Reply-To: <1318851161-30163-3-git-send-email-jeffrey.t.kirsher@intel.com>

[-- Attachment #1: Type: text/plain, Size: 1020 bytes --]

On Mon, 2011-10-17 at 04:32 -0700, Kirsher, Jeffrey T wrote:
> From: Greg Rose <gregory.v.rose@intel.com>
> 
> Kvm and the Xen pci-back driver will set a flag in the virtual
> function
> pci device dev_flags when the VF is assigned to a guest VM.  Before
> destroying subordinate VFs check to see if the flag is set and if so
> skip the call to pci_disable_sriov() to avoid system crashes.
> 
> Copy the maintainer for the Xen pci-back driver.  Also CC'ing
> maintainers of all drivers found to call pci_disable_sriov().
> 
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: Christian Benvenuti <benve@cisco.com>
> Cc: Sathya Perla <sathya.perla@emulex.com>
> Cc: Dimitris Michailidis <dm@chelsio.com>
> Cc: Jon Mason <jdmason@kudzu.us>
> Cc: James Smart <james.smart@emulex.com>
> 
> Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> 

Dave- please do not pull this patch (or the series).  There is an issue
with this patch.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* [net-next 5/6] ixgbe: add hardware timestamping support
From: Jeff Kirsher @ 2011-10-17 11:32 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318851161-30163-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Enable hardware offloading of timestamps via the SO_TIMESTAMPING
functionality. Adds support for enabling the hardware cycle counter,
and enabling the SO_TIMESTAMPING method for extracting the hardware
timestamps through the skb.

In the initial version, the cyclecounter structure was not properly
converting the cycles into nanoseconds, due to an incorrectly
calculated incvalue. v3 fixed this issue by recalculating the math and
giving a proper right-shift value to the cycle counter
structure. However, the DMA clock which is used to generate the
cyclecounter register changes frequency based on the link speed. This
version recalculates the incvalue based on the link speed every time
the device changes link speed.

The cyclecounter has the potential to miss a wrap-around of the
systim register (this should occur no more often than every 35
seconds) unless some activity regarding the cycle counter occurs at
least once within this time. This version adds a cycle counter read
every time the watchdog task is run, which should occur at least once
within this timeframe. Any packets being timestamped will also count
as a read due to the call to timecompare_update.

This version fixes an issue regarding timecompare not updating
detected skew after the clock offset is changed due to ptpd or outside
influence from the OS. Now the skew detection is forced just before we
hand a timestamp up to the kernel stack

Signed-off-by: Jacob E Keller <jacob.e.keller@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |   21 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  445 ++++++++++++++++++++++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |   32 ++
 3 files changed, 497 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 6c4d693..9e6635e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -36,6 +36,10 @@
 #include <linux/aer.h>
 #include <linux/if_vlan.h>
 
+#include <linux/clocksource.h>
+#include <linux/timecompare.h>
+#include <linux/net_tstamp.h>
+
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
 #include "ixgbe_dcb.h"
@@ -103,6 +107,7 @@
 #define IXGBE_TX_FLAGS_FSO		(u32)(1 << 6)
 #define IXGBE_TX_FLAGS_TXSW		(u32)(1 << 7)
 #define IXGBE_TX_FLAGS_MAPPED_AS_PAGE	(u32)(1 << 8)
+#define IXGBE_TX_FLAGS_TSTAMP		(u32)(1 << 9)
 #define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT  29
@@ -319,6 +324,16 @@ struct ixgbe_q_vector {
 	char name[IFNAMSIZ + 9];
 };
 
+#define IXGBE_INCVAL_BASE_10GB   32
+#define IXGBE_INCVAL_BASE_1GB    320
+#define IXGBE_INCVAL_BASE_100    3200
+#define IXGBE_INCVAL_SHIFT_10GB  28
+#define IXGBE_INCVAL_SHIFT_1GB   24
+#define IXGBE_INCVAL_SHIFT_100   21
+#define IXGBE_INCVAL_DIVISOR     5
+#define IXGBE_INCVAL_SHIFT_82599 7
+#define IXGBE_INCPER_SHIFT_82599 24
+
 /*
  * microsecond values for various ITR rates shifted by 2 to fit itr register
  * with the first 3 bits reserved 0
@@ -409,6 +424,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG2_SFP_NEEDS_RESET             (u32)(1 << 5)
 #define IXGBE_FLAG2_RESET_REQUESTED             (u32)(1 << 6)
 #define IXGBE_FLAG2_FDIR_REQUIRES_REINIT        (u32)(1 << 7)
+#define IXGBE_FLAG2_CYCLECOUNTER_RUNNING        (u32)(1 << 8)
 
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	u16 bd_number;
@@ -464,6 +480,11 @@ struct ixgbe_adapter {
 	struct net_device *netdev;
 	struct pci_dev *pdev;
 
+	struct cyclecounter cycles;
+	struct timecounter clock;
+	struct timecompare compare;
+	struct hwtstamp_config hwtstamp_config;
+
 	u32 test_icr;
 	struct ixgbe_ring test_tx_ring;
 	struct ixgbe_ring test_rx_ring;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index fb7d884..c92c3a7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -135,6 +135,220 @@ MODULE_VERSION(DRV_VERSION);
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
 
+static cycle_t ixgbe_read_clock(const struct cyclecounter *tc)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(tc, struct ixgbe_adapter, cycles);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 stamp = 0;
+
+
+	stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+	stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+
+	return stamp;
+}
+
+/**
+ * ixgbe_init_cyclecounter - initialize the systim cyclecounter
+ * @adapter: pointer to private adapter structure
+ *
+ * We need to call this function every time that the link goes up
+ * because the frequency of the DMA clock changes when the link
+ * speed changes.
+ */
+static void ixgbe_init_cyclecounter(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 incval = 0;
+
+	/*
+	 * (re)initialize hardware cycle counter
+	 * we keep this running just in case someone wants it
+	 * enabled in the future. However, we disable the flag
+	 * just in case so that no one tries to read during
+	 * the process of initializing.
+	 */
+	adapter->flags2 &= ~IXGBE_FLAG2_CYCLECOUNTER_RUNNING;
+
+	/*
+	 * 82598EB hardware does not support timestamping,
+	 * so we return after making sure the disable
+	 * the cyclecounter.
+	 */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+
+	memset(&adapter->cycles, 0, sizeof(adapter->cycles));
+	adapter->cycles.read = ixgbe_read_clock;
+	adapter->cycles.mask = CLOCKSOURCE_MASK(64);
+	adapter->cycles.mult = 1;
+
+	/**
+	 * Scale the NIC cycle counter by a large factor so that
+	 * relatively small corrections to the frequency can be added
+	 * or subtracted. The drawbacks of a large factor include
+	 * (a) the clock register overflows more quickly, (b) the cycle
+	 * counter structure must be able to convert the systim value
+	 * to nanoseconds using only a multiplier and a right-shift,
+	 * and (c) the value must fit within the timinca register space
+	 * => math based on internal DMA clock rate and available bits
+	 */
+	switch (adapter->link_speed) {
+	case IXGBE_LINK_SPEED_100_FULL:
+		incval = IXGBE_INCVAL_BASE_100;
+		incval <<= IXGBE_INCVAL_SHIFT_100;
+		incval /= IXGBE_INCVAL_DIVISOR;
+
+		adapter->cycles.shift = IXGBE_INCVAL_SHIFT_100;
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		incval = IXGBE_INCVAL_BASE_1GB;
+		incval <<= IXGBE_INCVAL_SHIFT_1GB;
+		incval /= IXGBE_INCVAL_DIVISOR;
+
+		adapter->cycles.shift = IXGBE_INCVAL_SHIFT_1GB;
+		break;
+	case IXGBE_LINK_SPEED_10GB_FULL:
+	default:
+		incval = IXGBE_INCVAL_BASE_10GB;
+		incval <<= IXGBE_INCVAL_SHIFT_10GB;
+		incval /= IXGBE_INCVAL_DIVISOR;
+
+		adapter->cycles.shift = IXGBE_INCVAL_SHIFT_10GB;
+		break;
+	}
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval);
+		break;
+	case ixgbe_mac_82599EB:
+		incval >>= IXGBE_INCVAL_SHIFT_82599;
+		adapter->cycles.shift -= IXGBE_INCVAL_SHIFT_82599;
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
+			    (1 << IXGBE_INCPER_SHIFT_82599) |
+				(u32)incval);
+		break;
+	default:
+		/* other devices aren't supported */
+		return;
+	}
+
+	/* reset the system time registers */
+	IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000);
+	IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Enable the watchdog task to decect cycle counter overflow */
+	adapter->flags2 |= IXGBE_FLAG2_CYCLECOUNTER_RUNNING;
+
+	/* reset the ns time counter */
+	timecounter_init(&adapter->clock,
+			 &adapter->cycles,
+			 ktime_to_ns(ktime_get_real()));
+}
+
+/**
+ * ixgbe_systim_to_hwtstamp - convert system time value to hw timestamp
+ * @adapter: board private structure
+ * @shhwtstamps: timestamp structure to update
+ * @regval: unsigned 64bit system time value.
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions
+ */
+static void ixgbe_systim_to_hwtstamp(struct ixgbe_adapter *adapter,
+				     struct skb_shared_hwtstamps *shhwtstamps,
+				     u64 regval)
+{
+	u64 ns;
+
+	/* timestamps aren't valid if the cyclecounter isn't initialized */
+	if (!(adapter->flags2 & IXGBE_FLAG2_CYCLECOUNTER_RUNNING))
+		return;
+
+	ns = timecounter_cyc2time(&adapter->clock, regval);
+
+	/*
+	 * force the timecompare structure to detect skew here in
+	 * order to prevent the case where the wall clock has been
+	 * adjusted by a factor which makes the previous skew
+	 * invalid. this prevents us from giving invalid timestamps to
+	 * the kernel stack.
+	 */
+	timecompare_update(&adapter->compare, 0);
+	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
+	shhwtstamps->hwtstamp = ns_to_ktime(ns);
+	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
+}
+
+/**
+ * ixgbe_tx_hwtstamp - utility function which checks for TX time stamp
+ * @q_vector: pointer to q_vector containing needed info
+ * @buffer: pointer to ixgbe_tx_buffer structure
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+static void ixgbe_tx_hwtstamp(struct ixgbe_q_vector *q_vector,
+			      struct ixgbe_tx_buffer *buffer_info)
+{
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct skb_shared_hwtstamps shhwtstamps;
+	u64 regval = 0;
+
+	/* if skb does not support hw timestamp or TX stamp not valid exit */
+	if (likely(!(buffer_info->tx_flags & IXGBE_TX_FLAGS_TSTAMP)) ||
+	    !(IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL) & IXGBE_TSYNCTXCTL_VALID))
+		return;
+
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL);
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32;
+
+	ixgbe_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
+}
+
+/**
+ * ixgbe_rx_hwtstamp - utility function which checks for RX time stamp
+ * @q_vector: pointer to q_vector containing needed info
+ * @skb: pointer to the skb
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+static void ixgbe_rx_hwtstamp(struct ixgbe_q_vector *q_vector,
+			      struct sk_buff *skb)
+{
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 regval = 0;
+
+	/*
+	 * If this bit is set, then the RX registers contain the time stamp. No
+	 * other packet will be time stamped until we read these registers, so
+	 * read the registers to make them available again. Because only one
+	 * packet can be time stamped at a time, we know that the register
+	 * values must belong to this one here and therefore we don't need to
+	 * compare any of the additional attributes stored for it.
+	 *
+	 * If nothing went wrong, then it should have a skb_shared_tx that we
+	 * can turn into a skb_shared_hwtstamps.
+	 */
+	if (!(IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL) & IXGBE_TSYNCRXCTL_VALID))
+		return;
+
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL);
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32;
+
+	ixgbe_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+}
+
 static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter)
 {
 	if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
@@ -773,6 +987,9 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 			tx_desc->wb.status = 0;
 			if (likely(tx_desc == eop_desc)) {
 				eop_desc = NULL;
+
+				ixgbe_tx_hwtstamp(q_vector, tx_buffer);
+
 				dev_kfree_skb_any(tx_buffer->skb);
 				tx_buffer->skb = NULL;
 
@@ -1399,6 +1616,9 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		if (adapter->netdev->features & NETIF_F_RXHASH)
 			ixgbe_rx_hash(rx_desc, skb);
 
+		if (unlikely(staterr & IXGBE_RXDADV_STAT_TS))
+			ixgbe_rx_hwtstamp(q_vector, skb);
+
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
 		total_rx_packets++;
@@ -5841,6 +6061,24 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter)
 }
 
 /**
+ * ixgbe_check_cycle_overflow_subtask - read the clock every
+ * few seconds to prevent the timecounter from losing cycles
+ * due to cycle counter overflow.
+ * @adapter - pointer to the device adapter structure
+ */
+static void ixgbe_check_cycle_overflow_subtask(struct ixgbe_adapter *adapter)
+{
+	u64 ns;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_CYCLECOUNTER_RUNNING))
+		return;
+
+	/* read the cycle counter and update the clock skew. */
+	ns = timecounter_read(&adapter->clock);
+	timecompare_update(&adapter->compare, ns);
+}
+
+/**
  * ixgbe_watchdog_update_link - update the link status
  * @adapter - pointer to the device adapter structure
  * @link_speed - pointer to a u32 to store the link_speed
@@ -5922,6 +6160,9 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
 		flow_rx = false;
 		break;
 	}
+
+	ixgbe_init_cyclecounter(adapter);
+
 	e_info(drv, "NIC Link is Up %s, Flow Control: %s\n",
 	       (link_speed == IXGBE_LINK_SPEED_10GB_FULL ?
 	       "10 Gbps" :
@@ -6268,6 +6509,7 @@ static void ixgbe_service_task(struct work_struct *work)
 	ixgbe_watchdog_subtask(adapter);
 	ixgbe_fdir_reinit_subtask(adapter);
 	ixgbe_check_hang_subtask(adapter);
+	ixgbe_check_cycle_overflow_subtask(adapter);
 
 	ixgbe_service_event_complete(adapter);
 }
@@ -6425,6 +6667,9 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags)
 	if (tx_flags & IXGBE_TX_FLAGS_HW_VLAN)
 		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_VLE);
 
+	if (tx_flags & IXGBE_TX_FLAGS_TSTAMP)
+		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_MAC_TSTAMP);
+
 	/* set segmentation enable bits for TSO/FSO */
 #ifdef IXGBE_FCOE
 	if ((tx_flags & IXGBE_TX_FLAGS_TSO) || (tx_flags & IXGBE_TX_FLAGS_FSO))
@@ -6800,6 +7045,11 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		tx_flags |= IXGBE_TX_FLAGS_TSTAMP;
+	}
+
 #ifdef CONFIG_PCI_IOV
 	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
 		tx_flags |= IXGBE_TX_FLAGS_TXSW;
@@ -6952,11 +7202,187 @@ static int ixgbe_mdio_write(struct net_device *netdev, int prtad, int devad,
 	return hw->phy.ops.write_reg(hw, addr, devad, value);
 }
 
+/**
+ * ixgbe_hwtstamp_ioctl - control hardware time stamping
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ **/
+static int ixgbe_hwtstamp_ioctl(struct net_device *netdev,
+				struct ifreq *ifr, int cmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hwtstamp_config config;
+	u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED;
+	u32 tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED;
+	u32 tsync_rx_mtrl = 0;
+	bool is_l4 = false;
+	bool is_l2 = false;
+	u32 regval;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (config.flags)
+		return -EINVAL;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tsync_tx_ctl = 0;
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tsync_rx_ctl = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_mtrl = IXGBE_RXMTRL_V1_SYNC_MSG;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_mtrl = IXGBE_RXMTRL_V1_DELAY_REQ_MSG;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2;
+		tsync_rx_mtrl = IXGBE_RXMTRL_V2_SYNC_MSG;
+		is_l2 = true;
+		is_l4 = true;
+		config.rx_filter = HWTSTAMP_FILTER_SOME;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2;
+		tsync_rx_mtrl = IXGBE_RXMTRL_V2_DELAY_REQ_MSG;
+		is_l2 = true;
+		is_l4 = true;
+		config.rx_filter = HWTSTAMP_FILTER_SOME;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
+		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_ALL:
+	default:
+		/*
+		 * register RXMTRL must be set, therefore it is not
+		 * possible to time stamp both V1 Sync and Delay_Req messages
+		 * and hardware does not support timestamping all packets
+		 * => return error
+		 */
+		return -ERANGE;
+	}
+
+	if (hw->mac.type == ixgbe_mac_82598EB) {
+		if (tsync_rx_ctl | tsync_tx_ctl)
+			return -ERANGE;
+		return 0;
+	}
+
+	/* define ethertype filter for timestamped packets */
+	if (is_l2)
+		IXGBE_WRITE_REG(hw, IXGBE_ETQF(3),
+				(IXGBE_ETQF_FILTER_EN | /* enable filter */
+				 IXGBE_ETQF_1588 | /* enable timestamping */
+				 ETH_P_1588));     /* 1588 eth protocol type */
+	else
+		IXGBE_WRITE_REG(hw, IXGBE_ETQF(3), 0);
+
+#define PTP_PORT 319
+	/* L4 Queue Filter[3]: filter by destination port and protocol */
+	if (is_l4) {
+		u32 ftqf = (IXGBE_FTQF_PROTOCOL_UDP /* UDP */
+			    | IXGBE_FTQF_POOL_MASK_EN /* Pool not compared */
+			    | IXGBE_FTQF_QUEUE_ENABLE);
+
+		ftqf |= ((IXGBE_FTQF_PROTOCOL_COMP_MASK /* protocol check */
+			  & IXGBE_FTQF_DEST_PORT_MASK /* dest check */
+			  & IXGBE_FTQF_SOURCE_PORT_MASK) /* source check */
+			 << IXGBE_FTQF_5TUPLE_MASK_SHIFT);
+
+		IXGBE_WRITE_REG(hw, IXGBE_L34T_IMIR(3),
+				(3 << IXGBE_IMIR_RX_QUEUE_SHIFT_82599 |
+				 IXGBE_IMIR_SIZE_BP_82599));
+
+		/* enable port check */
+		IXGBE_WRITE_REG(hw, IXGBE_SDPQF(3),
+				(htons(PTP_PORT) |
+				 htons(PTP_PORT) << 16));
+
+		IXGBE_WRITE_REG(hw, IXGBE_FTQF(3), ftqf);
+
+		tsync_rx_mtrl |= PTP_PORT << 16;
+	} else {
+		IXGBE_WRITE_REG(hw, IXGBE_FTQF(3), 0);
+	}
+
+	/* enable/disable TX */
+	regval = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
+	regval &= ~IXGBE_TSYNCTXCTL_ENABLED;
+	regval |= tsync_tx_ctl;
+	IXGBE_WRITE_REG(hw, IXGBE_TSYNCTXCTL, regval);
+
+	/* enable/disable RX */
+	regval = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
+	regval &= ~(IXGBE_TSYNCRXCTL_ENABLED | IXGBE_TSYNCRXCTL_TYPE_MASK);
+	regval |= tsync_rx_ctl;
+	IXGBE_WRITE_REG(hw, IXGBE_TSYNCRXCTL, regval);
+
+	/* define which PTP packets are time stamped */
+	IXGBE_WRITE_REG(hw, IXGBE_RXMTRL, tsync_rx_mtrl);
+
+	IXGBE_WRITE_FLUSH(hw);
+
+	adapter->hwtstamp_config = config;
+
+	/* clear TX/RX time stamp registers, just to be sure */
+	regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
+	regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+
 static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
-	return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd);
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return ixgbe_hwtstamp_ioctl(netdev, req, cmd);
+	default:
+		return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd);
+	}
 }
 
 /**
@@ -7643,6 +8069,23 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	/* pick up the PCI bus settings for reporting later */
 	hw->mac.ops.get_bus_info(hw);
 
+	/* initialize the cycle counter */
+	ixgbe_init_cyclecounter(adapter);
+
+	/*
+	 * Provide a timestamp synchronized against the system wall
+	 * clock. NIC time stamp reading requires ~3us per sample,
+	 * each sample was pretty stable even under load
+	 * => only require 10 samples for each offset comparison.
+	 */
+	if (adapter->flags2 & IXGBE_FLAG2_CYCLECOUNTER_RUNNING) {
+		memset(&adapter->compare, 0, sizeof(adapter->compare));
+		adapter->compare.source = &adapter->clock;
+		adapter->compare.target = ktime_get_real;
+		adapter->compare.num_samples = 10;
+		timecompare_update(&adapter->compare, 0);
+	}
+
 	/* print bus type/speed/width info */
 	e_dev_info("(PCI Express:%s:%s) %pM\n",
 		   (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 6c5cca8..8692672 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1837,6 +1837,36 @@ enum {
 #define IXGBE_RXDCTL_RLPML_EN   0x00008000
 #define IXGBE_RXDCTL_VME        0x40000000  /* VLAN mode enable */
 
+#define IXGBE_TSYNCTXCTL_VALID     0x00000001 /* Tx timestamp valid */
+#define IXGBE_TSYNCTXCTL_ENABLED   0x00000010 /* Tx timestamping enabled */
+
+#define IXGBE_TSYNCRXCTL_VALID     0x00000001 /* Rx timestamp valid */
+#define IXGBE_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */
+#define IXGBE_TSYNCRXCTL_TYPE_L2_V2      0x00
+#define IXGBE_TSYNCRXCTL_TYPE_L4_V1      0x02
+#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2   0x04
+#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2   0x0A
+#define IXGBE_TSYNCRXCTL_ENABLED   0x00000010 /* Rx Timestamping enabled */
+
+#define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF
+#define IXGBE_RXMTRL_V1_SYNC_MSG         0x00
+#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG    0x01
+#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG     0x02
+#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG   0x03
+#define IXGBE_RXMTRL_V1_MGMT_MSG         0x04
+
+#define IXGBE_RXMTRL_V2_MSGID_MASK      0x0000FF00
+#define IXGBE_RXMTRL_V2_SYNC_MSG            0x0000
+#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG       0x0100
+#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG      0x0200
+#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG     0x0300
+#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG        0x0800
+#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG      0x0900
+#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG 0x0A00
+#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG        0x0B00
+#define IXGBE_RXMTRL_V2_SIGNALLING_MSG      0x0C00
+#define IXGBE_RXMTRL_V2_MGMT_MSG            0x0D00
+
 #define IXGBE_FCTRL_SBP 0x00000002 /* Store Bad Packet */
 #define IXGBE_FCTRL_MPE 0x00000100 /* Multicast Promiscuous Ena*/
 #define IXGBE_FCTRL_UPE 0x00000200 /* Unicast Promiscuous Ena */
@@ -1966,6 +1996,7 @@ enum {
 #define IXGBE_RXDADV_STAT_FCSTAT_NODDP  0x00000010 /* 01: Ctxt w/o DDP */
 #define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */
 #define IXGBE_RXDADV_STAT_FCSTAT_DDP    0x00000030 /* 11: Ctxt w/ DDP */
+#define IXGBE_RXDADV_STAT_TS            0x00010000 /* IEEE1588 Time Stamp */
 
 /* PSRTYPE bit definitions */
 #define IXGBE_PSRTYPE_TCPHDR    0x00000010
@@ -2243,6 +2274,7 @@ struct ixgbe_adv_tx_context_desc {
 /* Adv Transmit Descriptor Config Masks */
 #define IXGBE_ADVTXD_DTALEN_MASK      0x0000FFFF /* Data buf length(bytes) */
 #define IXGBE_ADVTXD_MAC_LINKSEC      0x00040000 /* Insert LinkSec */
+#define IXGBE_ADVTXD_MAC_TSTAMP       0x00080000 /* IEEE1588 time stamp */
 #define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK   0x000003FF /* IPSec SA index */
 #define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK    0x000001FF /* IPSec ESP length */
 #define IXGBE_ADVTXD_DTYP_MASK  0x00F00000 /* DTYP mask */
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 6/6] ixgbe: change the eeprom version reported by ethtool
From: Jeff Kirsher @ 2011-10-17 11:32 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318851161-30163-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Use 32bit value starting at offset 0x2d for displaying the firmware
version in ethtool. This should work for all current ixgbe HW

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         |    3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   13 +++++++------
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |    7 ++++---
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 9e6635e..4881807 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -518,7 +518,8 @@ struct ixgbe_adapter {
 	u64 rsc_total_count;
 	u64 rsc_total_flush;
 	u32 wol;
-	u16 eeprom_version;
+	u16 eeprom_verh;
+	u16 eeprom_verl;
 	u16 eeprom_cap;
 
 	int node;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 7acfce3..70d58c3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -889,21 +889,22 @@ static void ixgbe_get_drvinfo(struct net_device *netdev,
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	char firmware_version[32];
+	u32 nvm_track_id;
 
 	strncpy(drvinfo->driver, ixgbe_driver_name,
 	        sizeof(drvinfo->driver) - 1);
 	strncpy(drvinfo->version, ixgbe_driver_version,
 	        sizeof(drvinfo->version) - 1);
 
-	snprintf(firmware_version, sizeof(firmware_version), "%d.%d-%d",
-	         (adapter->eeprom_version & 0xF000) >> 12,
-	         (adapter->eeprom_version & 0x0FF0) >> 4,
-	         adapter->eeprom_version & 0x000F);
+	nvm_track_id = (adapter->eeprom_verh << 16) |
+			adapter->eeprom_verl;
+	snprintf(firmware_version, sizeof(firmware_version), "0x%08x",
+		 nvm_track_id);
 
 	strncpy(drvinfo->fw_version, firmware_version,
-	        sizeof(drvinfo->fw_version));
+		sizeof(drvinfo->fw_version) - 1);
 	strncpy(drvinfo->bus_info, pci_name(adapter->pdev),
-	        sizeof(drvinfo->bus_info));
+		sizeof(drvinfo->bus_info) - 1);
 	drvinfo->n_stats = IXGBE_STATS_LEN;
 	drvinfo->testinfo_len = IXGBE_TEST_LEN;
 	drvinfo->regdump_len = ixgbe_get_regs_len(netdev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c92c3a7..31f4f53 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8066,6 +8066,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	}
 	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
 
+	/* save off EEPROM version number */
+	hw->eeprom.ops.read(hw, 0x2e, &adapter->eeprom_verh);
+	hw->eeprom.ops.read(hw, 0x2d, &adapter->eeprom_verl);
+
 	/* pick up the PCI bus settings for reporting later */
 	hw->mac.ops.get_bus_info(hw);
 
@@ -8115,9 +8119,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 			   "is required.\n");
 	}
 
-	/* save off EEPROM version number */
-	hw->eeprom.ops.read(hw, 0x29, &adapter->eeprom_version);
-
 	/* reset the hardware with the new settings */
 	err = hw->mac.ops.start_hw(hw);
 
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 4/6] ixgbe: allow eeprom writes via ethtool
From: Jeff Kirsher @ 2011-10-17 11:32 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318851161-30163-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Implement support for ethtool -E

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c   |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   71 ++++++++++++++++++++++
 2 files changed, 73 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index e02e911..ef2afef 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -1305,6 +1305,8 @@ static struct ixgbe_mac_operations mac_ops_82598 = {
 static struct ixgbe_eeprom_operations eeprom_ops_82598 = {
 	.init_params		= &ixgbe_init_eeprom_params_generic,
 	.read			= &ixgbe_read_eerd_generic,
+	.write			= &ixgbe_write_eeprom_generic,
+	.write_buffer		= &ixgbe_write_eeprom_buffer_bit_bang_generic,
 	.read_buffer		= &ixgbe_read_eerd_buffer_generic,
 	.calc_checksum          = &ixgbe_calc_eeprom_checksum_generic,
 	.validate_checksum	= &ixgbe_validate_eeprom_checksum_generic,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index e102ff6..7acfce3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -814,6 +814,76 @@ static int ixgbe_get_eeprom(struct net_device *netdev,
 	return ret_val;
 }
 
+static int ixgbe_set_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	void *ptr;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EINVAL;
+
+	max_len = hw->eeprom.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/*
+		 * need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = hw->eeprom.ops.read(hw, first_word, &eeprom_buff[0]);
+		if (ret_val)
+			goto err;
+
+		ptr++;
+	}
+	if ((eeprom->offset + eeprom->len) & 1) {
+		/*
+		 * need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = hw->eeprom.ops.read(hw, last_word,
+					  &eeprom_buff[last_word - first_word]);
+		if (ret_val)
+			goto err;
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = hw->eeprom.ops.write_buffer(hw, first_word,
+					      last_word - first_word + 1,
+					      eeprom_buff);
+
+	/* Update the checksum */
+	if (ret_val == 0)
+		hw->eeprom.ops.update_checksum(hw);
+
+err:
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
 static void ixgbe_get_drvinfo(struct net_device *netdev,
                               struct ethtool_drvinfo *drvinfo)
 {
@@ -2524,6 +2594,7 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_link               = ethtool_op_get_link,
 	.get_eeprom_len         = ixgbe_get_eeprom_len,
 	.get_eeprom             = ixgbe_get_eeprom,
+	.set_eeprom             = ixgbe_set_eeprom,
 	.get_ringparam          = ixgbe_get_ringparam,
 	.set_ringparam          = ixgbe_set_ringparam,
 	.get_pauseparam         = ixgbe_get_pauseparam,
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 3/6] ixgbe: fix endianess when writing driver version to firmware
From: Jeff Kirsher @ 2011-10-17 11:32 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318851161-30163-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

This patch makes sure that register writes are in little endian and
also converts the reads back to big-endian.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 35fa444..834f044 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3341,7 +3341,7 @@ static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
  *  Communicates with the manageability block.  On success return 0
  *  else return IXGBE_ERR_HOST_INTERFACE_COMMAND.
  **/
-static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
+static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
 					u32 length)
 {
 	u32 hicr, i;
@@ -3374,7 +3374,7 @@ static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
 	 */
 	for (i = 0; i < dword_len; i++)
 		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
-				      i, *((u32 *)buffer + i));
+				      i, cpu_to_le32(buffer[i]));
 
 	/* Setting this bit tells the ARC that a new command is pending. */
 	IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
@@ -3398,9 +3398,10 @@ static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
 	dword_len = hdr_size >> 2;
 
 	/* first pull in the header so we know the buffer length */
-	for (i = 0; i < dword_len; i++)
-		*((u32 *)buffer + i) =
-			IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+	for (i = 0; i < dword_len; i++) {
+		buffer[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+		le32_to_cpus(&buffer[i]);
+	}
 
 	/* If there is any thing in data position pull it in */
 	buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len;
@@ -3418,8 +3419,7 @@ static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
 
 	/* Pull in the rest of the buffer (i is where we left off)*/
 	for (; i < buf_len; i++)
-		*((u32 *)buffer + i) =
-			IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+		buffer[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
 
 out:
 	return ret_val;
@@ -3465,7 +3465,7 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
 	fw_cmd.pad2 = 0;
 
 	for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
-		ret_val = ixgbe_host_interface_command(hw, (u8 *)&fw_cmd,
+		ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
 						       sizeof(fw_cmd));
 		if (ret_val != 0)
 			continue;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 2/6] igb: Check if subordinate VFs are assigned to virtual machines
From: Jeff Kirsher @ 2011-10-17 11:32 UTC (permalink / raw)
  To: davem
  Cc: Greg Rose, netdev, gospo, sassmann, Konrad Rzeszutek Wilk,
	Christian Benvenuti, Sathya Perla, Dimitris Michailidis,
	Jon Mason, James Smart, Jeff Kirsher
In-Reply-To: <1318851161-30163-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

Kvm and the Xen pci-back driver will set a flag in the virtual function
pci device dev_flags when the VF is assigned to a guest VM.  Before
destroying subordinate VFs check to see if the flag is set and if so
skip the call to pci_disable_sriov() to avoid system crashes.

Copy the maintainer for the Xen pci-back driver.  Also CC'ing
maintainers of all drivers found to call pci_disable_sriov().

Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Christian Benvenuti <benve@cisco.com>
Cc: Sathya Perla <sathya.perla@emulex.com>
Cc: Dimitris Michailidis <dm@chelsio.com>
Cc: Jon Mason <jdmason@kudzu.us>
Cc: James Smart <james.smart@emulex.com>

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |    3 +
 drivers/net/ethernet/intel/igb/igb_main.c |  176 ++++++++++++++++++++++++-----
 2 files changed, 149 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 4c500a7..5594430 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -72,6 +72,8 @@ struct igb_adapter;
 #define IGB_MAX_VF_MC_ENTRIES              30
 #define IGB_MAX_VF_FUNCTIONS               8
 #define IGB_MAX_VFTA_ENTRIES               128
+#define IGB_82576_VF_DEV_ID                0x10CA
+#define IGB_I350_VF_DEV_ID                 0x1520
 
 struct vf_data_storage {
 	unsigned char vf_mac_addresses[ETH_ALEN];
@@ -83,6 +85,7 @@ struct vf_data_storage {
 	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
 	u16 pf_qos;
 	u16 tx_rate;
+	struct pci_dev *vfdev;
 };
 
 #define IGB_VF_FLAG_CTS            0x00000001 /* VF is clear to send data */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index c10cc71..95eb9d5 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -162,6 +162,9 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 				 struct ifla_vf_info *ivi);
 static void igb_check_vf_rate_limit(struct igb_adapter *);
+static int igb_vf_configure(struct igb_adapter *adapter, int vf);
+static int igb_find_enabled_vfs(struct igb_adapter *adapter);
+static int igb_check_vf_assignment(struct igb_adapter *adapter);
 
 #ifdef CONFIG_PM
 static int igb_suspend(struct pci_dev *, pm_message_t);
@@ -2232,8 +2235,12 @@ static void __devexit igb_remove(struct pci_dev *pdev)
 	/* reclaim resources allocated to VFs */
 	if (adapter->vf_data) {
 		/* disable iov and allow time for transactions to clear */
-		pci_disable_sriov(pdev);
-		msleep(500);
+		if (!igb_check_vf_assignment(adapter)) {
+			pci_disable_sriov(pdev);
+			msleep(500);
+		} else {
+			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
+		}
 
 		kfree(adapter->vf_data);
 		adapter->vf_data = NULL;
@@ -2270,42 +2277,49 @@ static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
 {
 #ifdef CONFIG_PCI_IOV
 	struct pci_dev *pdev = adapter->pdev;
+	int old_vfs = igb_find_enabled_vfs(adapter);
+	int i;
 
-	if (adapter->vfs_allocated_count) {
-		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
-		                           sizeof(struct vf_data_storage),
-		                           GFP_KERNEL);
-		/* if allocation failed then we do not support SR-IOV */
-		if (!adapter->vf_data) {
-			adapter->vfs_allocated_count = 0;
-			dev_err(&pdev->dev, "Unable to allocate memory for VF "
-			        "Data Storage\n");
-		}
+	if (old_vfs) {
+		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
+			 "max_vfs setting of %d\n", old_vfs, max_vfs);
+		adapter->vfs_allocated_count = old_vfs;
 	}
 
-	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
-		kfree(adapter->vf_data);
-		adapter->vf_data = NULL;
-#endif /* CONFIG_PCI_IOV */
+	if (!adapter->vfs_allocated_count)
+		return;
+
+	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
+			sizeof(struct vf_data_storage), GFP_KERNEL);
+	/* if allocation failed then we do not support SR-IOV */
+	if (!adapter->vf_data) {
 		adapter->vfs_allocated_count = 0;
-#ifdef CONFIG_PCI_IOV
-	} else {
-		unsigned char mac_addr[ETH_ALEN];
-		int i;
-		dev_info(&pdev->dev, "%d vfs allocated\n",
-		         adapter->vfs_allocated_count);
-		for (i = 0; i < adapter->vfs_allocated_count; i++) {
-			random_ether_addr(mac_addr);
-			igb_set_vf_mac(adapter, i, mac_addr);
-		}
-		/* DMA Coalescing is not supported in IOV mode. */
-		if (adapter->flags & IGB_FLAG_DMAC)
-			adapter->flags &= ~IGB_FLAG_DMAC;
+		dev_err(&pdev->dev, "Unable to allocate memory for VF "
+				"Data Storage\n");
+		goto out;
 	}
+
+	if (!old_vfs) {
+		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
+			goto err_out;
+	}
+	dev_info(&pdev->dev, "%d VFs allocated\n",
+			adapter->vfs_allocated_count);
+	for (i = 0; i < adapter->vfs_allocated_count; i++)
+		igb_vf_configure(adapter, i);
+
+	/* DMA Coalescing is not supported in IOV mode. */
+	adapter->flags &= ~IGB_FLAG_DMAC;
+	goto out;
+err_out:
+	kfree(adapter->vf_data);
+	adapter->vf_data = NULL;
+	adapter->vfs_allocated_count = 0;
+out:
+	return;
 #endif /* CONFIG_PCI_IOV */
 }
 
-
 /**
  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
  * @adapter: board private structure to initialize
@@ -4917,6 +4931,108 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
 }
 #endif /* CONFIG_IGB_DCA */
 
+#ifdef CONFIG_PCI_IOV
+static int igb_vf_configure(struct igb_adapter *adapter, int vf)
+{
+	unsigned char mac_addr[ETH_ALEN];
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pvfdev;
+	unsigned int device_id;
+	u16 thisvf_devfn;
+
+	random_ether_addr(mac_addr);
+	igb_set_vf_mac(adapter, vf, mac_addr);
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		device_id = IGB_82576_VF_DEV_ID;
+		/* VF Stride for 82576 is 2 */
+		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
+			(pdev->devfn & 1);
+		break;
+	case e1000_i350:
+		device_id = IGB_I350_VF_DEV_ID;
+		/* VF Stride for I350 is 4 */
+		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
+			(pdev->devfn & 3);
+		break;
+	default:
+		device_id = 0;
+		break;
+	}
+
+	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
+	while (pvfdev) {
+		if (pvfdev->devfn == thisvf_devfn)
+			break;
+		pvfdev = pci_get_device(hw->vendor_id,
+					device_id, pvfdev);
+	}
+
+	if (pvfdev)
+		adapter->vf_data[vf].vfdev = pvfdev;
+	else
+		dev_err(&pdev->dev,
+				"Couldn't find pci dev ptr for VF %4.4x\n",
+				thisvf_devfn);
+	return pvfdev != NULL;
+}
+
+static int igb_find_enabled_vfs(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	struct pci_dev *pvfdev;
+	u16 vf_devfn = 0;
+	u16 vf_stride;
+	unsigned int device_id;
+	int vfs_found = 0;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		device_id = IGB_82576_VF_DEV_ID;
+		/* VF Stride for 82576 is 2 */
+		vf_stride = 2;
+		break;
+	case e1000_i350:
+		device_id = IGB_I350_VF_DEV_ID;
+		/* VF Stride for I350 is 4 */
+		vf_stride = 4;
+		break;
+	default:
+		device_id = 0;
+		vf_stride = 0;
+		break;
+	}
+
+	vf_devfn = pdev->devfn + 0x80;
+	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
+	while (pvfdev) {
+		if (pvfdev->devfn == vf_devfn)
+			vfs_found++;
+		vf_devfn += vf_stride;
+		pvfdev = pci_get_device(hw->vendor_id,
+					device_id, pvfdev);
+	}
+
+	return vfs_found;
+}
+
+static int igb_check_vf_assignment(struct igb_adapter *adapter)
+{
+	int i;
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		if (adapter->vf_data[i].vfdev) {
+			if (adapter->vf_data[i].vfdev->dev_flags &
+					PCI_DEV_FLAGS_ASSIGNED)
+				return true;
+		}
+	}
+	return false;
+}
+
+#endif
 static void igb_ping_all_vfs(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 1/6] igbvf: Fix trunk vlan
From: Jeff Kirsher @ 2011-10-17 11:32 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jiri Pirko, Jeff Kirsher
In-Reply-To: <1318851161-30163-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

Changes to clean up the VLAN Rx path by Jiri Pirko broke trunk VLAN.
Trunk VLANs in a VF driver are those set using

"ip link set <pfdev> vf <n> <vlanid>"

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
CC: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igbvf/netdev.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 32b3044..23cc40f 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -102,8 +102,8 @@ static void igbvf_receive_skb(struct igbvf_adapter *adapter,
 {
 	if (status & E1000_RXD_STAT_VP) {
 		u16 vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
-
-		__vlan_hwaccel_put_tag(skb, vid);
+		if (test_bit(vid, adapter->active_vlans))
+			__vlan_hwaccel_put_tag(skb, vid);
 	}
 	netif_receive_skb(skb);
 }
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 0/6][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2011-10-17 11:32 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

The following series contains updates to ixgbe, igbvf and igb.
This version of the series contains the following changes:

- igb fix/add check if subordinate VFs are assigned to VM's
- igbvf fix for trunk VLAN
- ixgbe 2 fixes for ethtool and 1 endianess fix

The following are changes since commit fd38f734cb8200529e281338514945fcbff2364b:
  igbvf: convert to ndo_fix_features
and are available in the git repository at
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next.git

Emil Tantilov (3):
  ixgbe: fix endianess when writing driver version to firmware
  ixgbe: allow eeprom writes via ethtool
  ixgbe: change the eeprom version reported by ethtool

Greg Rose (2):
  igbvf: Fix trunk vlan
  igb: Check if subordinate VFs are assigned to virtual machines

Jacob Keller (1):
  ixgbe: add hardware timestamping support

 drivers/net/ethernet/intel/igb/igb.h             |    3 +
 drivers/net/ethernet/intel/igb/igb_main.c        |  176 +++++++--
 drivers/net/ethernet/intel/igbvf/netdev.c        |    4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         |   24 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c   |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c  |   16 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   84 ++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  452 +++++++++++++++++++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h    |   32 ++
 9 files changed, 742 insertions(+), 51 deletions(-)

-- 
1.7.6.4

^ permalink raw reply

* RE: [PATCH] smsc911x: Add regulator support
From: Robert MARKLUND @ 2011-10-17 11:30 UTC (permalink / raw)
  To: Mark Brown; +Cc: netdev@vger.kernel.org, Steve Glendinning, Mathieu Poirer
In-Reply-To: <20111017105256.GG5448@sirena.org.uk>

> -----Original Message-----
> From: Mark Brown [mailto:broonie@opensource.wolfsonmicro.com]
> Sent: den 17 oktober 2011 12:53
> To: Robert MARKLUND
> Cc: netdev@vger.kernel.org; Steve Glendinning; Mathieu Poirer
> Subject: Re: [PATCH] smsc911x: Add regulator support
> 
> On Mon, Oct 17, 2011 at 08:56:37AM +0200, Robert Marklund wrote:
> 
> > +	/* Request regulator for vddvario */
> > +	if (request && !pdata->regulator_vddvario) {
> > +		pdata->regulator_vddvario = regulator_get(&pdev-
> >dev,
> > +				"vddvario");
> > +		if (IS_ERR(pdata->regulator_vddvario)) {
> > +			netdev_warn(ndev,
> > +					"%s: Failed to
> get regulator '%s'\n",
> > +					__func__,
> "vddvario");
> > +			pdata->regulator_vddvario = NULL;
> > +		}
> 
> No, this is broken - look at how other devices use the regulator API.
> The driver should just request and use the regulators unconditionally
> and let the stubbing and mapping facilities the API has deal with
> ensuring that they always succeed.

[Robert MARKLUND] 
So what you mean is get them and use them and ignore all the return codes, and let the FW take care of the error handling ?

> 
> As a side note the use of "pdata" as a name for the driver internal
> data
> is really not helpful, pdata is traditionally the platform data passed
> in by the machine (which would be even more broken).

[Robert MARKLUND] 
In the driver they have used this name for this structure throughout the file I just followed that.
Personally I think it will be more confusing to change the name of this structure in just this new function.

/R

^ permalink raw reply

* Re: [PATCH] smsc911x: Add regulator support
From: Mark Brown @ 2011-10-17 10:52 UTC (permalink / raw)
  To: Robert Marklund; +Cc: netdev, Steve Glendinning, Mathieu Poirer
In-Reply-To: <1318834597-3479-1-git-send-email-robert.marklund@stericsson.com>

On Mon, Oct 17, 2011 at 08:56:37AM +0200, Robert Marklund wrote:

> +	/* Request regulator for vddvario */
> +	if (request && !pdata->regulator_vddvario) {
> +		pdata->regulator_vddvario = regulator_get(&pdev->dev,
> +				"vddvario");
> +		if (IS_ERR(pdata->regulator_vddvario)) {
> +			netdev_warn(ndev,
> +					"%s: Failed to get regulator '%s'\n",
> +					__func__, "vddvario");
> +			pdata->regulator_vddvario = NULL;
> +		}

No, this is broken - look at how other devices use the regulator API.
The driver should just request and use the regulators unconditionally
and let the stubbing and mapping facilities the API has deal with
ensuring that they always succeed.

As a side note the use of "pdata" as a name for the driver internal data
is really not helpful, pdata is traditionally the platform data passed
in by the machine (which would be even more broken).

^ permalink raw reply

* Re: [PATCH 3/3] x25: Prevent skb overreads when checking call user data
From: Andrew Hendry @ 2011-10-17 10:28 UTC (permalink / raw)
  To: Matthew Daley; +Cc: netdev, Eric Dumazet, stable
In-Reply-To: <1318653905-13716-4-git-send-email-mattjd@gmail.com>

Acked-by: Andrew Hendry <andrew.hendry@gmail.com>

On Sat, Oct 15, 2011 at 3:45 PM, Matthew Daley <mattjd@gmail.com> wrote:
> x25_find_listener does not check that the amount of call user data given
> in the skb is big enough in per-socket comparisons, hence buffer
> overreads may occur.  Fix this by adding a check.
>
> Signed-off-by: Matthew Daley <mattjd@gmail.com>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Andrew Hendry <andrew.hendry@gmail.com>
> Cc: stable <stable@kernel.org>
> ---
>  net/x25/af_x25.c |    3 ++-
>  1 files changed, 2 insertions(+), 1 deletions(-)
>
> diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
> index aa567b0..5f03e4e 100644
> --- a/net/x25/af_x25.c
> +++ b/net/x25/af_x25.c
> @@ -295,7 +295,8 @@ static struct sock *x25_find_listener(struct x25_address *addr,
>                         * Found a listening socket, now check the incoming
>                         * call user data vs this sockets call user data
>                         */
> -                       if(skb->len > 0 && x25_sk(s)->cudmatchlength > 0) {
> +                       if (x25_sk(s)->cudmatchlength > 0 &&
> +                               skb->len >= x25_sk(s)->cudmatchlength) {
>                                if((memcmp(x25_sk(s)->calluserdata.cuddata,
>                                        skb->data,
>                                        x25_sk(s)->cudmatchlength)) == 0) {
> --
> 1.7.2.5
>
>

^ permalink raw reply

* Re: [PATCH 2/3] x25: Handle undersized/fragmented skbs
From: Andrew Hendry @ 2011-10-17 10:28 UTC (permalink / raw)
  To: Matthew Daley; +Cc: netdev, Eric Dumazet, stable
In-Reply-To: <1318653905-13716-3-git-send-email-mattjd@gmail.com>

Ran through with a lot of corrupted data, looks stable.

Acked-by: Andrew Hendry <andrew.hendry@gmail.com>

On Sat, Oct 15, 2011 at 3:45 PM, Matthew Daley <mattjd@gmail.com> wrote:
> There are multiple locations in the X.25 packet layer where a skb is
> assumed to be of at least a certain size and that all its data is
> currently available at skb->data.  These assumptions are not checked,
> hence buffer overreads may occur.  Use pskb_may_pull to check these
> minimal size assumptions and ensure that data is available at skb->data
> when necessary, as well as use skb_copy_bits where needed.
>
> Signed-off-by: Matthew Daley <mattjd@gmail.com>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Andrew Hendry <andrew.hendry@gmail.com>
> Cc: stable <stable@kernel.org>
> ---
>  net/x25/af_x25.c         |   31 ++++++++++++++++++++++++-------
>  net/x25/x25_dev.c        |    6 ++++++
>  net/x25/x25_facilities.c |   10 ++++++----
>  net/x25/x25_in.c         |   40 +++++++++++++++++++++++++++++++++++-----
>  net/x25/x25_link.c       |    3 +++
>  net/x25/x25_subr.c       |   14 +++++++++++++-
>  6 files changed, 87 insertions(+), 17 deletions(-)
>
> diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
> index a4bd172..aa567b0 100644
> --- a/net/x25/af_x25.c
> +++ b/net/x25/af_x25.c
> @@ -91,7 +91,7 @@ int x25_parse_address_block(struct sk_buff *skb,
>        int needed;
>        int rc;
>
> -       if (skb->len < 1) {
> +       if (!pskb_may_pull(skb, 1)) {
>                /* packet has no address block */
>                rc = 0;
>                goto empty;
> @@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb,
>        len = *skb->data;
>        needed = 1 + (len >> 4) + (len & 0x0f);
>
> -       if (skb->len < needed) {
> +       if (!pskb_may_pull(skb, needed)) {
>                /* packet is too short to hold the addresses it claims
>                   to hold */
>                rc = -1;
> @@ -951,10 +951,10 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
>         *
>         *      Facilities length is mandatory in call request packets
>         */
> -       if (skb->len < 1)
> +       if (!pskb_may_pull(skb, 1))
>                goto out_clear_request;
>        len = skb->data[0] + 1;
> -       if (skb->len < len)
> +       if (!pskb_may_pull(skb, len))
>                goto out_clear_request;
>        skb_pull(skb,len);
>
> @@ -965,6 +965,13 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
>                goto out_clear_request;
>
>        /*
> +        *      Get all the call user data so it can be used in
> +        *      x25_find_listener and skb_copy_from_linear_data up ahead.
> +        */
> +       if (!pskb_may_pull(skb, skb->len))
> +               goto out_clear_request;
> +
> +       /*
>         *      Find a listener for the particular address/cud pair.
>         */
>        sk = x25_find_listener(&source_addr,skb);
> @@ -1172,6 +1179,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
>         *      byte of the user data is the logical value of the Q Bit.
>         */
>        if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
> +               if (!pskb_may_pull(skb, 1))
> +                       goto out_kfree_skb;
> +
>                qbit = skb->data[0];
>                skb_pull(skb, 1);
>        }
> @@ -1250,7 +1260,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
>        struct x25_sock *x25 = x25_sk(sk);
>        struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)msg->msg_name;
>        size_t copied;
> -       int qbit;
> +       int qbit, header_len = x25->neighbour->extended ?
> +               X25_EXT_MIN_LEN : X25_STD_MIN_LEN;
> +
>        struct sk_buff *skb;
>        unsigned char *asmptr;
>        int rc = -ENOTCONN;
> @@ -1271,6 +1283,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
>
>                skb = skb_dequeue(&x25->interrupt_in_queue);
>
> +               if (!pskb_may_pull(skb, X25_STD_MIN_LEN))
> +                       goto out_free_dgram;
> +
>                skb_pull(skb, X25_STD_MIN_LEN);
>
>                /*
> @@ -1291,10 +1306,12 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
>                if (!skb)
>                        goto out;
>
> +               if (!pskb_may_pull(skb, header_len))
> +                       goto out_free_dgram;
> +
>                qbit = (skb->data[0] & X25_Q_BIT) == X25_Q_BIT;
>
> -               skb_pull(skb, x25->neighbour->extended ?
> -                               X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
> +               skb_pull(skb, header_len);
>
>                if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
>                        asmptr  = skb_push(skb, 1);
> diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
> index e547ca1..fa2b418 100644
> --- a/net/x25/x25_dev.c
> +++ b/net/x25/x25_dev.c
> @@ -32,6 +32,9 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
>        unsigned short frametype;
>        unsigned int lci;
>
> +       if (!pskb_may_pull(skb, X25_STD_MIN_LEN))
> +               return 0;
> +
>        frametype = skb->data[2];
>        lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
>
> @@ -115,6 +118,9 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
>                goto drop;
>        }
>
> +       if (!pskb_may_pull(skb, 1))
> +               return 0;
> +
>        switch (skb->data[0]) {
>
>        case X25_IFACE_DATA:
> diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
> index f77e4e7..36384a1 100644
> --- a/net/x25/x25_facilities.c
> +++ b/net/x25/x25_facilities.c
> @@ -44,7 +44,7 @@
>  int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
>                struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask)
>  {
> -       unsigned char *p = skb->data;
> +       unsigned char *p;
>        unsigned int len;
>
>        *vc_fac_mask = 0;
> @@ -60,14 +60,16 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
>        memset(dte_facs->called_ae, '\0', sizeof(dte_facs->called_ae));
>        memset(dte_facs->calling_ae, '\0', sizeof(dte_facs->calling_ae));
>
> -       if (skb->len < 1)
> +       if (!pskb_may_pull(skb, 1))
>                return 0;
>
> -       len = *p++;
> +       len = skb->data[0];
>
> -       if (len >= skb->len)
> +       if (!pskb_may_pull(skb, 1 + len))
>                return -1;
>
> +       p = skb->data + 1;
> +
>        while (len > 0) {
>                switch (*p & X25_FAC_CLASS_MASK) {
>                case X25_FAC_CLASS_A:
> diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
> index 63488fd..a49cd4e 100644
> --- a/net/x25/x25_in.c
> +++ b/net/x25/x25_in.c
> @@ -107,6 +107,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>                /*
>                 *      Parse the data in the frame.
>                 */
> +               if (!pskb_may_pull(skb, X25_STD_MIN_LEN))
> +                       goto out_clear;
>                skb_pull(skb, X25_STD_MIN_LEN);
>
>                len = x25_parse_address_block(skb, &source_addr,
> @@ -130,9 +132,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>                        if (skb->len > X25_MAX_CUD_LEN)
>                                goto out_clear;
>
> -                       skb_copy_from_linear_data(skb,
> -                                                 x25->calluserdata.cuddata,
> -                                                 skb->len);
> +                       skb_copy_bits(skb, 0, x25->calluserdata.cuddata,
> +                               skb->len);
>                        x25->calluserdata.cudlength = skb->len;
>                }
>                if (!sock_flag(sk, SOCK_DEAD))
> @@ -140,6 +141,9 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>                break;
>        }
>        case X25_CLEAR_REQUEST:
> +               if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
> +                       goto out_clear;
> +
>                x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
>                x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]);
>                break;
> @@ -167,6 +171,9 @@ static int x25_state2_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>        switch (frametype) {
>
>                case X25_CLEAR_REQUEST:
> +                       if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
> +                               goto out_clear;
> +
>                        x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
>                        x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
>                        break;
> @@ -180,6 +187,11 @@ static int x25_state2_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>        }
>
>        return 0;
> +
> +out_clear:
> +       x25_write_internal(sk, X25_CLEAR_REQUEST);
> +       x25_start_t23timer(sk);
> +       return 0;
>  }
>
>  /*
> @@ -209,6 +221,9 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>                        break;
>
>                case X25_CLEAR_REQUEST:
> +                       if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
> +                               goto out_clear;
> +
>                        x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
>                        x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
>                        break;
> @@ -307,6 +322,12 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>        }
>
>        return queued;
> +
> +out_clear:
> +       x25_write_internal(sk, X25_CLEAR_REQUEST);
> +       x25->state = X25_STATE_2;
> +       x25_start_t23timer(sk);
> +       return 0;
>  }
>
>  /*
> @@ -316,13 +337,13 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>  */
>  static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametype)
>  {
> +       struct x25_sock *x25 = x25_sk(sk);
> +
>        switch (frametype) {
>
>                case X25_RESET_REQUEST:
>                        x25_write_internal(sk, X25_RESET_CONFIRMATION);
>                case X25_RESET_CONFIRMATION: {
> -                       struct x25_sock *x25 = x25_sk(sk);
> -
>                        x25_stop_timer(sk);
>                        x25->condition = 0x00;
>                        x25->va        = 0;
> @@ -334,6 +355,9 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>                        break;
>                }
>                case X25_CLEAR_REQUEST:
> +                       if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
> +                               goto out_clear;
> +
>                        x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
>                        x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
>                        break;
> @@ -343,6 +367,12 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp
>        }
>
>        return 0;
> +
> +out_clear:
> +       x25_write_internal(sk, X25_CLEAR_REQUEST);
> +       x25->state = X25_STATE_2;
> +       x25_start_t23timer(sk);
> +       return 0;
>  }
>
>  /* Higher level upcall for a LAPB frame */
> diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
> index 037958f..4acacf3 100644
> --- a/net/x25/x25_link.c
> +++ b/net/x25/x25_link.c
> @@ -90,6 +90,9 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb,
>                break;
>
>        case X25_DIAGNOSTIC:
> +               if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 4))
> +                       break;
> +
>                printk(KERN_WARNING "x25: diagnostic #%d - %02X %02X %02X\n",
>                       skb->data[3], skb->data[4],
>                       skb->data[5], skb->data[6]);
> diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
> index 24a342e..5170d52 100644
> --- a/net/x25/x25_subr.c
> +++ b/net/x25/x25_subr.c
> @@ -269,7 +269,11 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
>               int *d, int *m)
>  {
>        struct x25_sock *x25 = x25_sk(sk);
> -       unsigned char *frame = skb->data;
> +       unsigned char *frame;
> +
> +       if (!pskb_may_pull(skb, X25_STD_MIN_LEN))
> +               return X25_ILLEGAL;
> +       frame = skb->data;
>
>        *ns = *nr = *q = *d = *m = 0;
>
> @@ -294,6 +298,10 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
>                if (frame[2] == X25_RR  ||
>                    frame[2] == X25_RNR ||
>                    frame[2] == X25_REJ) {
> +                       if (!pskb_may_pull(skb, X25_EXT_MIN_LEN))
> +                               return X25_ILLEGAL;
> +                       frame = skb->data;
> +
>                        *nr = (frame[3] >> 1) & 0x7F;
>                        return frame[2];
>                }
> @@ -308,6 +316,10 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
>
>        if (x25->neighbour->extended) {
>                if ((frame[2] & 0x01) == X25_DATA) {
> +                       if (!pskb_may_pull(skb, X25_EXT_MIN_LEN))
> +                               return X25_ILLEGAL;
> +                       frame = skb->data;
> +
>                        *q  = (frame[0] & X25_Q_BIT) == X25_Q_BIT;
>                        *d  = (frame[0] & X25_D_BIT) == X25_D_BIT;
>                        *m  = (frame[3] & X25_EXT_M_BIT) == X25_EXT_M_BIT;
> --
> 1.7.2.5
>
>

^ permalink raw reply

* Re: [PATCH] tg3: Dont dump registers if interface not ready.
From: Joe Jin @ 2011-10-17  9:52 UTC (permalink / raw)
  To: Matt Carlson
  Cc: Xiao Jiang, Michael Chan, Guru Anbalagane, Gurudas Pai,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Greg Marsden
In-Reply-To: <20111005021423.GA2787@mcarlson.broadcom.com>

Hi Matt,

Sorry for late response, after apply your changes, driver did not dumped
registers info as well, but still hit tg3_stop_block timed out as below:

tg3 0000:03:01.0: eth0: Link is up at 1000 Mbps, full duplex
tg3 0000:03:01.0: eth0: Flow control is on for TX and on for RX
tg3 0000:03:01.0: tg3_stop_block timed out, ofs=4800 enable_bit=2
tg3 0000:03:01.0: eth0: Link is down
tg3 0000:03:01.1: eth1: Link is up at 1000 Mbps, full duplex
tg3 0000:03:01.1: eth1: Flow control is off for TX and off for RX
tg3 0000:03:01.1: eth1: Link is down
tg3 0000:03:01.0: eth0: Link is up at 1000 Mbps, full duplex
tg3 0000:03:01.0: eth0: Flow control is on for TX and on for RX
tg3 0000:03:01.0: tg3_stop_block timed out, ofs=4800 enable_bit=2
tg3 0000:03:01.0: eth0: Link is down
RPC: Registered named UNIX socket transport module.
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
RPC: Registered tcp NFSv4.1 backchannel transport module.
SELinux: initialized (dev rpc_pipefs, type rpc_pipefs), uses genfs_contexts
tg3 0000:03:01.1: eth1: Link is up at 1000 Mbps, full duplex
tg3 0000:03:01.1: eth1: Flow control is off for TX and off for RX
tg3 0000:03:01.1: eth1: Link is down
tg3 0000:03:01.0: eth0: Link is up at 1000 Mbps, full duplex
tg3 0000:03:01.0: eth0: Flow control is on for TX and on for RX
tg3 0000:03:01.0: tg3_stop_block timed out, ofs=4800 enable_bit=2
tg3 0000:03:01.0: eth0: Link is down
tg3 0000:03:01.1: eth1: Link is up at 1000 Mbps, full duplex
tg3 0000:03:01.1: eth1: Flow control is off for TX and off for RX
ADDRCONF(NETDEV_CHANGE): eth1: link becomes ready
tg3 0000:03:01.0: eth0: Link is up at 1000 Mbps, full duplex
tg3 0000:03:01.0: eth0: Flow control is on for TX and on for RX
ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready


Any suggestion?

Thanks,
Joe

On 10/05/11 10:14, Matt Carlson wrote:
> On Wed, Sep 28, 2011 at 11:50:01PM -0700, Xiao Jiang wrote:
>> Joe Jin wrote:
>>> When bootup the server with BCM5704 Gigabit Ethernet get below warning:
>>>
>>> tg3 0000:03:01.0: eth0: DMA Status error.  Resetting chip.
>>> <Registers state of device>
>>> tg3 0000:03:01.0: eth0: 0: Host status block [00000007:00000002:(0000:0000:0000):(0000:0000)]
>>> tg3 0000:03:01.0: eth0: 0: NAPI info [00000001:00000002:(0000:0000:01ff):0000:(00c8:0000:0000:0000)]
>>> tg3 0000:03:01.0: eth0: Link is up at 1000 Mbps, full duplex
>>> tg3 0000:03:01.0: eth0: Flow control is on for TX and on for RX
>>> tg3 0000:03:01.0: tg3_stop_block timed out, ofs=4800 enable_bit=2
>>> tg3 0000:03:01.0: eth0: Link is down
>>> tg3 0000:03:01.0: eth0: Link is up at 1000 Mbps, full duplex
>>> tg3 0000:03:01.0: eth0: Flow control is on for TX and on for RX
>>>
>>> If device not ready, then would not dump registers info.
>>>
>>> Signed-off-by: Joe Jin <joe.jin@oracle.com>
>>> Signed-off-by: Guru Anbalagane <guru.anbalagane@oracle.com>
>>> Reported-by: Gurudas Pai <gurudas.pai@oracle.com>
>>> Cc: Matt Carlson <mcarlson@broadcom.com>
>>> Cc: Michael Chan <mchan@broadcom.com>
>>> ---
>>>  drivers/net/tg3.c |   11 +++++++++--
>>>   
>> Maybe this one should based on net-next tree too, the tg3.c
>> is lived below ./drivers/net/ethernet/broadcom/ in that tree.
>>
>> Thanks,
>> Xiao Jiang
>>>  1 files changed, 9 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
>>> index 4a1374d..d79d344 100644
>>> --- a/drivers/net/tg3.c
>>> +++ b/drivers/net/tg3.c
>>> @@ -5475,10 +5475,15 @@ static void tg3_process_error(struct tg3 *tp)
>>>  {
>>>  	u32 val;
>>>  	bool real_error = false;
>>> +	bool dump = true;
>>>  
>>>  	if (tg3_flag(tp, ERROR_PROCESSED))
>>>  		return;
>>>  
>>> +	/* If interface not ready then dont dump error */
>>> +	if (!netif_carrier_ok(tp->dev))
>>> +		dump = false;
> 
> Would you still experience the problem if you did the following instead
> of the above link check?
> 
> 		if (tg3_flag(tp, INIT_COMPLETE))
> 			dump = false;
> 
>>>  	/* Check Flow Attention register */
>>>  	val = tr32(HOSTCC_FLOW_ATTN);
>>>  	if (val & ~HOSTCC_FLOW_ATTN_MBUF_LWM) {
>>> @@ -5492,14 +5497,16 @@ static void tg3_process_error(struct tg3 *tp)
>>>  	}
>>>  
>>>  	if (tr32(RDMAC_STATUS) || tr32(WDMAC_STATUS)) {
>>> -		netdev_err(tp->dev, "DMA Status error.  Resetting chip.\n");
>>> +		if (dump)
>>> +			netdev_err(tp->dev, "DMA Status error.  Resetting chip.\n");
>>>  		real_error = true;
>>>  	}
>>>  
>>>  	if (!real_error)
>>>  		return;
>>>  
>>> -	tg3_dump_state(tp);
>>> +	if (dump)
>>> +		tg3_dump_state(tp);
>>>  
>>>  	tg3_flag_set(tp, ERROR_PROCESSED);
>>>  	schedule_work(&tp->reset_task);
>>>   
>>
>>
> 


-- 
Oracle <http://www.oracle.com>
Joe Jin | Software Development Senior Manager | +8610.6106.5624
ORACLE | Linux and Virtualization
No. 24 Zhongguancun Software Park, Haidian District | 100193 Beijing 

^ permalink raw reply

* IPv6 routing requests ignore NLM_F_CREATE and NLM_F_REPLACE
From: Vaittinen, Matti (EXT-Other - FI/Oulu) @ 2011-10-17  9:06 UTC (permalink / raw)
  To: netdev


Hi dee Ho!

I was enchancing an userspace application configuring IPv4 routes via
netlink sockets to support IPv6 route configuration too. While doing
this I noticed that NLM_F_* flags seemed to have no handling at IPv6
side. For example replacing a route to some destiantion, with route
having different pref_src (or metric or gateway or...) can be done by
having NLM_F_REPLACE flag specified in netlink request and leaving out
NLM_F_CREATE.

However with IPv6, if new route being requested has different properties
(like gateway or metric or..) the existing one will not be replaced.
Instead a new route will be created - even if NLM_F_CREATE was not
specified in request.

That causes some inconvenience when a route is being changed. Routes
need to be queried, and matching route needs to be explisitly deleted by
userspace application. Also creating new route even without NLM_F_CREATE
feels a bit strange to me.

I was wondering if this is a bug or wanted behaviour? I was thinking of
trying to write a patch to add support for replacing a route, but I feel
I'm a bit lost with the fib :) I guess the fib6_add_rt2node function
could be changed to inspect the NLM_F_ flags from nl_info pointer, and
to perform replace instead of returning -EEXIST / performing insertion.
Also returning error when NLM_F_CREATE is not specified, and existing
route is not found could propably be implemented.

Anyways, before I spend more time trying to understand the data
structures in fib6, I would like to ask if the handling of NLM_F_* flags
is dropped out in purpose?


Br. Matti Vaittinen

--

Theory:
Theoretical approach means that everything is well known, but still
nothing works.
Practice:
Practical approach means that everything works but no one knows why.

Thank God we have theory and practice balanced here. Nothing works, and
no one knows why...

^ permalink raw reply

* MESSAGE FROM BILL GATES.
From: Microsoft Office @ 2011-10-17  7:54 UTC (permalink / raw)


Your Email Id has won 750,000.00GBP with a HP Laptop and a BlackBerry Phone in the MICROSOFT NATIONAL PROMOTION DRAW Promo 2011. send your

Names.
Address.
Sex.
Age.
Tel.
Occupation.
Country.
*City/State:
 
Dr.Terry Cole
Email: microsoftgame6@gala.net

^ permalink raw reply

* Re: [PATCH] dev: use ifindex hash for dev_seq_ops
From: Daniel Baluta @ 2011-10-17  8:03 UTC (permalink / raw)
  To: Eric Dumazet, shemminger
  Cc: Mihai Maruseac, davem, mirq-linux, therbert, jpirko, netdev,
	linux-kernel, Mihai Maruseac
In-Reply-To: <1318596791.2223.13.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

> This assumes device ifindexes are contained in a small range
> [N .. N + X]
>
> I understand this can help some benchmarks, but in real world this wont
> help that much once ifindexes are 'fragmented' (If really this multi
> thousand devices stuff is for real)
>
> Listen, we currently have 256 slots in the hash table.
>
> Can we try to make 'offset' something like  (slot_number<<24) +
> (position in hash chain [slot_number]), instead of (position in devices
> global list)


Eric, we can refine the idea of our first patch [1], where we recorded
the (bucket, offset) pair. Stephen, do you agree with this?


thanks,
Daniel.

[1] http://patchwork.ozlabs.org/patch/118331/

^ permalink raw reply

* [PATCH] iproute2: Conforming to -D_FORTIFY_SOURCE=2 restrictions
From: Bin Li @ 2011-10-17  7:35 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: Type: text/plain, Size: 1902 bytes --]

Hi,

The issue is from below link.

https://bugzilla.novell.com/show_bug.cgi?id=719537

The issue is debug at below.

(gdb) bt
#0  0x00007ffff7697945 in raise (sig=<optimized out>)
    at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1  0x00007ffff7698f21 in abort () at abort.c:92
#2  0x00007ffff76d48ef in __libc_message (do_abort=2,
    fmt=0x7ffff7789541 "*** %s ***: %s terminated\n")
    at ../sysdeps/unix/sysv/linux/libc_fatal.c:186
#3  0x00007ffff7750177 in __fortify_fail (
    msg=0x7ffff77894d8 "buffer overflow detected") at fortify_fail.c:32
#4  0x00007ffff774de10 in __chk_fail () at chk_fail.c:29
#5  0x00007ffff774cf8d in __strncpy_chk (
    s1=0x640c <Address 0x640c out of bounds>,
    s2=0x640c <Address 0x640c out of bounds>, n=6, s1len=18446744073709551615)
    at strncpy_chk.c:34
#6  0x000000000041e9c8 in strncpy (__len=<optimized out>,
    __src=<optimized out>, __dest=<optimized out>)
    at /usr/include/bits/string3.h:123
#7  xfrm_algo_parse (max=<optimized out>, buf=<optimized out>,
    key=<optimized out>, name=<optimized out>, type=<optimized out>,
    alg=<optimized out>) at xfrm_state.c:166

(gdb) l
161                     len = slen;
162                     if (len > 0) {
163                             if (len > max)
164                                     invarg("\"ALGOKEY\" makes buffer
overflow\n", key);
165
166                             strncpy(buf, key, len);
167                     }
168             }
169
170             alg->alg_key_len = len * 8;
(gdb) up
#8  xfrm_state_modify (cmd=<optimized out>, flags=<optimized out>, argc=1,
    argv=0x7fffffffe370) at xfrm_state.c:406
406                                     xfrm_algo_parse((void *)&alg, type,
name, key,

the compiler passes zero to __builtin___strncpy_chk as the buffer size.
xfrm_algo_parse is inlined into xfrm_state_modify.


Thanks!

Sincerely Yours,

Bin Li

http://zh.opensuse.org

[-- Attachment #2: iproute2-FORTIFY_SOURCE.patch --]
[-- Type: text/x-patch, Size: 2234 bytes --]

diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index a76be47..30a9aa3 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -368,13 +368,16 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 						struct xfrm_algo_auth auth;
 					} u;
 					char buf[XFRM_ALGO_KEY_BUF_SIZE];
-				} alg = {};
+				} *alg;
 				int len;
 				__u32 icvlen, trunclen;
 				char *name;
 				char *key;
 				char *buf;
 
+				alg = alloca (sizeof (*alg) + XFRM_ALGO_KEY_BUF_SIZE);
+				memset (alg, 0, sizeof (*alg) + XFRM_ALGO_KEY_BUF_SIZE);
+
 				switch (type) {
 				case XFRMA_ALG_AEAD:
 					if (aeadop)
@@ -412,8 +415,8 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 				NEXT_ARG();
 				key = *argv;
 
-				buf = alg.u.alg.alg_key;
-				len = sizeof(alg.u.alg);
+				buf = alg->u.alg.alg_key;
+				len = sizeof(alg->u.alg);
 
 				switch (type) {
 				case XFRMA_ALG_AEAD:
@@ -423,10 +426,10 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 					if (get_u32(&icvlen, *argv, 0))
 						invarg("\"aead\" ICV length is invalid",
 						       *argv);
-					alg.u.aead.alg_icv_len = icvlen;
+					alg->u.aead.alg_icv_len = icvlen;
 
-					buf = alg.u.aead.alg_key;
-					len = sizeof(alg.u.aead);
+					buf = alg->u.aead.alg_key;
+					len = sizeof(alg->u.aead);
 					break;
 				case XFRMA_ALG_AUTH_TRUNC:
 					if (!NEXT_ARG_OK())
@@ -435,19 +438,19 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 					if (get_u32(&trunclen, *argv, 0))
 						invarg("\"auth\" trunc length is invalid",
 						       *argv);
-					alg.u.auth.alg_trunc_len = trunclen;
+					alg->u.auth.alg_trunc_len = trunclen;
 
-					buf = alg.u.auth.alg_key;
-					len = sizeof(alg.u.auth);
+					buf = alg->u.auth.alg_key;
+					len = sizeof(alg->u.auth);
 					break;
 				}
 
-				xfrm_algo_parse((void *)&alg, type, name, key,
-						buf, sizeof(alg.buf));
-				len += alg.u.alg.alg_key_len;
+				xfrm_algo_parse((void *)alg, type, name, key,
+						buf, sizeof(alg->buf));
+				len += alg->u.alg.alg_key_len;
 
 				addattr_l(&req.n, sizeof(req.buf), type,
-					  (void *)&alg, len);
+					  (void *)alg, len);
 				break;
 			}
 			default:

^ permalink raw reply related

* Re: PROBLEM: System call 'sendmsg' of process ospfd (quagga) causes kernel oops
From: Elmar Vonlanthen @ 2011-10-17  7:16 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: linux-kernel, netdev, Timo Teräs, Herbert Xu
In-Reply-To: <1318604266.2223.29.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

2011/10/14 Eric Dumazet <eric.dumazet@gmail.com>:
> Please try following patch :
>
> [PATCH] ip_gre: dont increase dev->needed_headroom on a live device
>
> It seems ip_gre is able to change dev->needed_headroom on the fly.
>
> Its is not legal unfortunately and triggers a BUG in raw_sendmsg()
>
> skb = sock_alloc_send_skb(sk, ... + LL_ALLOCATED_SPACE(rt->dst.dev)
>
> < another cpu change dev->needed_headromm (making it bigger)
>
> ...
> skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
>
> We end with LL_RESERVED_SPACE() being bigger than LL_ALLOCATED_SPACE()
> -> we crash later because skb head is exhausted.
>
> Bug introduced in commit 243aad83 in 2.6.34 (ip_gre: include route
> header_len in max_headroom calculation)
>
> Reported-by: Elmar Vonlanthen <evonlanthen@gmail.com>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: Timo Teräs <timo.teras@iki.fi>
> CC: Herbert Xu <herbert@gondor.apana.org.au>
> ---
> diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
> index 8871067..1505dcf 100644
> --- a/net/ipv4/ip_gre.c
> +++ b/net/ipv4/ip_gre.c
> @@ -835,8 +835,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
>        if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
>            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
>                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
> -               if (max_headroom > dev->needed_headroom)
> -                       dev->needed_headroom = max_headroom;
>                if (!new_skb) {
>                        ip_rt_put(rt);
>                        dev->stats.tx_dropped++;

Hello

I tried this patch and I was not able anymore to reproduce the kernel
oops. So the patch solved the bug.
Thank you very much!

Would it be possible to add the patch to the long term kernel 2.6.35
as well? Because this is the one I use at the moment in production.

And sorry for posting to the wrong mailing list (linux-kernel).

Best regards
Elmar

^ permalink raw reply

* [PATCH] smsc911x: Add regulator support
From: Robert Marklund @ 2011-10-17  6:56 UTC (permalink / raw)
  To: netdev, Steve Glendinning; +Cc: Mathieu Poirer, Robert Marklund

Add some regulator support, there can be
necessary to add more regulators to suite
all power save needs. But this is a start.

Also add a wait for the chip to be ready after
the regulators are enabled, this was a bug in
the old implementation.

Signed-off-by: Robert Marklund <robert.marklund@stericsson.com>
---
 drivers/net/smsc911x.c |  113 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 113 insertions(+), 0 deletions(-)

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index b9016a3..4de3bd8 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -53,6 +53,8 @@
 #include <linux/phy.h>
 #include <linux/smsc911x.h>
 #include <linux/device.h>
+#include <linux/regulator/consumer.h>
+
 #include "smsc911x.h"
 
 #define SMSC_CHIPNAME		"smsc911x"
@@ -133,6 +135,10 @@ struct smsc911x_data {
 
 	/* register access functions */
 	const struct smsc911x_ops *ops;
+
+	/* regulators */
+	struct regulator *regulator_vddvario;
+	struct regulator *regulator_vdd33a;
 };
 
 /* Easy access to information */
@@ -357,6 +363,81 @@ out:
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
+/* Enable resources(clocks and regulators) */
+static int smsc911x_enable_resources(struct platform_device *pdev, bool enable)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct smsc911x_data *pdata = netdev_priv(ndev);
+	int err = 0;
+
+	/* enable/diable regulator for vddvario */
+	if (pdata->regulator_vddvario) {
+		if (enable) {
+			err = regulator_enable(pdata->regulator_vddvario);
+			if (err < 0) {
+				netdev_err(ndev, "%s: regulator_enable failed '%s'\n",
+						__func__, "vddvario");
+			}
+		} else
+			err = regulator_disable(pdata->regulator_vdd33a);
+	}
+
+	/* enable/diableregulator for vdd33a */
+	if (pdata->regulator_vdd33a) {
+		if (enable) {
+			err = regulator_enable(pdata->regulator_vdd33a);
+			if (err < 0) {
+				netdev_err(ndev, "%s: regulator_enable failed '%s'\n",
+						__func__, "vdd33a");
+			}
+		} else
+			err = regulator_disable(pdata->regulator_vdd33a);
+	}
+	return err;
+}
+
+
+/* Request resources(clocks and regulators) */
+static int smsc911x_request_resources(struct platform_device *pdev,
+		bool request)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct smsc911x_data *pdata = netdev_priv(ndev);
+	int err = 0;
+
+	/* Request regulator for vddvario */
+	if (request && !pdata->regulator_vddvario) {
+		pdata->regulator_vddvario = regulator_get(&pdev->dev,
+				"vddvario");
+		if (IS_ERR(pdata->regulator_vddvario)) {
+			netdev_warn(ndev,
+					"%s: Failed to get regulator '%s'\n",
+					__func__, "vddvario");
+			pdata->regulator_vddvario = NULL;
+		}
+	} else if (!request && pdata->regulator_vddvario) {
+		regulator_put(pdata->regulator_vddvario);
+		pdata->regulator_vddvario = NULL;
+	}
+
+	/* Request regulator for vdd33a */
+	if (request && !pdata->regulator_vddvario) {
+		pdata->regulator_vdd33a = regulator_get(&pdev->dev,
+				"vdd33a");
+		if (IS_ERR(pdata->regulator_vdd33a)) {
+			netdev_warn(ndev,
+					"%s: Failed to get regulator '%s'\n",
+					__func__, "vdd33a");
+			pdata->regulator_vdd33a = NULL;
+		}
+	} else if (!request && pdata->regulator_vdd33a) {
+		regulator_put(pdata->regulator_vdd33a);
+		pdata->regulator_vdd33a = NULL;
+	}
+
+	return err;
+}
+
 /* waits for MAC not busy, with timeout.  Only called by smsc911x_mac_read
  * and smsc911x_mac_write, so assumes mac_lock is held */
 static int smsc911x_mac_complete(struct smsc911x_data *pdata)
@@ -2047,6 +2128,7 @@ static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
 	struct net_device *dev;
 	struct smsc911x_data *pdata;
 	struct resource *res;
+	int retval;
 
 	dev = platform_get_drvdata(pdev);
 	BUG_ON(!dev);
@@ -2074,6 +2156,12 @@ static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
 
 	iounmap(pdata->ioaddr);
 
+	if (smsc911x_enable_resources(pdev, false))
+		pr_warn("Could not disable resource\n");
+
+	retval = smsc911x_request_resources(pdev, false);
+	/* ignore not all have regulators */
+
 	free_netdev(dev);
 
 	return 0;
@@ -2104,6 +2192,7 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	unsigned int intcfg = 0;
 	int res_size, irq_flags;
 	int retval;
+	int to = 100;
 
 	pr_info("Driver version %s\n", SMSC_DRV_VERSION);
 
@@ -2158,6 +2247,17 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	pdata->dev = dev;
 	pdata->msg_enable = ((1 << debug) - 1);
 
+	platform_set_drvdata(pdev, dev);
+
+	retval = smsc911x_request_resources(pdev, true);
+	/* ignore not all have regulators */
+
+	retval = smsc911x_enable_resources(pdev, true);
+	if (retval) {
+		pr_warn("Could not enable resource\n");
+		goto out_0;
+	}
+
 	if (pdata->ioaddr == NULL) {
 		SMSC_WARN(pdata, probe, "Error smsc911x base address invalid");
 		retval = -ENOMEM;
@@ -2170,6 +2270,18 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	if (config->shift)
 		pdata->ops = &shifted_smsc911x_ops;
 
+	/* poll the READY bit in PMT_CTRL. Any other access to the device is
+	 * forbidden while this bit isn't set. Try for 100ms
+	 */
+	while (!(smsc911x_reg_read(pdata, PMT_CTRL) & PMT_CTRL_READY_) && --to)
+		udelay(1000);
+
+	if (to == 0) {
+		pr_err("Device not READY in 100ms aborting\n");
+		goto out_0;
+	}
+
+
 	retval = smsc911x_init(dev);
 	if (retval < 0)
 		goto out_unmap_io_3;
@@ -2262,6 +2374,7 @@ out_0:
 	return retval;
 }
 
+
 #ifdef CONFIG_PM
 /* This implementation assumes the devices remains powered on its VDDVARIO
  * pins during suspend. */
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox