Netdev List
 help / color / mirror / Atom feed
* [net-next-2.6 06/27] Documentation/networking/ixgbevf.txt: Update documentation
From: Jeff Kirsher @ 2010-12-10  9:50 UTC (permalink / raw)
  To: davem, davem; +Cc: Jeff Kirsher, netdev, gospo, bphilips
In-Reply-To: <1291974667-30254-1-git-send-email-jeffrey.t.kirsher@intel.com>

Update Intel Wired LAN ixgbevf documentation.

Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 Documentation/networking/ixgbevf.txt |    4 ----
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/ixgbevf.txt b/Documentation/networking/ixgbevf.txt
index 21dd5d1..5a91a41 100644
--- a/Documentation/networking/ixgbevf.txt
+++ b/Documentation/networking/ixgbevf.txt
@@ -35,10 +35,6 @@ Driver ID Guide at:
 Known Issues/Troubleshooting
 ============================
 
-  Unloading Physical Function (PF) Driver Causes System Reboots When VM is
-  Running and VF is Loaded on the VM
-  ------------------------------------------------------------------------
-  Do not unload the PF driver (ixgbe) while VFs are assigned to guests.
 
 Support
 =======
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 08/27] e1000: fix return value not set on error
From: Jeff Kirsher @ 2010-12-10  9:50 UTC (permalink / raw)
  To: davem, davem, dnelson
  Cc: Dean Nelson, netdev, gospo, bphilips, stable, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1291974667-30254-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Dean Nelson <dnelson@redhat.com>

Dean noticed that 'err' wasn't being set when the "goto err_dma"
statement is executed in the following hunk from the commit. It's value
will be zero as a result of a successful call to e1000_init_hw_struct().

This patch changes the error condition to be correctly propagated.

CC: stable@kernel.org
Signed-off-by:  Dean Nelson <dnelson@redhat.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000/e1000_main.c |   10 ++++++----
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 06c7d1c..491bf2a 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -971,11 +971,13 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 		 */
 		dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
 		pci_using_dac = 1;
-	} else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
-		dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 	} else {
-		pr_err("No usable DMA config, aborting\n");
-		goto err_dma;
+		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			pr_err("No usable DMA config, aborting\n");
+			goto err_dma;
+		}
+		dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 	}
 
 	netdev->netdev_ops = &e1000_netdev_ops;
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 10/27] ixgb: Don't check for vlan group on transmit
From: Jeff Kirsher @ 2010-12-10  9:50 UTC (permalink / raw)
  To: davem, davem; +Cc: Emil Tantilov, netdev, gospo, bphilips, jesse, Jeff Kirsher
In-Reply-To: <1291974667-30254-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Based on a patch from Jesse Gross.

Enable vlan tag insertion even when vlan group is not configured.

For ixgb HW both CTRL0.VME and VLE bit in the Tx descriptor need to be set
in order to enable HW acceleration.

Introduced separate functions for enabling/disabling of vlan tag stripping
similar to ixgbe.

CC: Jesse Gross <jesse@nicira.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgb/ixgb_main.c |   51 ++++++++++++++++++++++++-----------------
 1 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 211a169..2e98506 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -98,6 +98,8 @@ static void ixgb_alloc_rx_buffers(struct ixgb_adapter *, int);
 static void ixgb_tx_timeout(struct net_device *dev);
 static void ixgb_tx_timeout_task(struct work_struct *work);
 
+static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter);
+static void ixgb_vlan_strip_disable(struct ixgb_adapter *adapter);
 static void ixgb_vlan_rx_register(struct net_device *netdev,
                                   struct vlan_group *grp);
 static void ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
@@ -1076,6 +1078,8 @@ ixgb_set_multi(struct net_device *netdev)
 
 	if (netdev->flags & IFF_PROMISC) {
 		rctl |= (IXGB_RCTL_UPE | IXGB_RCTL_MPE);
+		/* disable VLAN filtering */
+		rctl &= ~IXGB_RCTL_CFIEN;
 		rctl &= ~IXGB_RCTL_VFE;
 	} else {
 		if (netdev->flags & IFF_ALLMULTI) {
@@ -1084,7 +1088,9 @@ ixgb_set_multi(struct net_device *netdev)
 		} else {
 			rctl &= ~(IXGB_RCTL_UPE | IXGB_RCTL_MPE);
 		}
+		/* enable VLAN filtering */
 		rctl |= IXGB_RCTL_VFE;
+		rctl &= ~IXGB_RCTL_CFIEN;
 	}
 
 	if (netdev_mc_count(netdev) > IXGB_MAX_NUM_MULTICAST_ADDRESSES) {
@@ -1103,6 +1109,12 @@ ixgb_set_multi(struct net_device *netdev)
 
 		ixgb_mc_addr_list_update(hw, mta, netdev_mc_count(netdev), 0);
 	}
+
+	if (netdev->features & NETIF_F_HW_VLAN_RX)
+		ixgb_vlan_strip_enable(adapter);
+	else
+		ixgb_vlan_strip_disable(adapter);
+
 }
 
 /**
@@ -2150,33 +2162,30 @@ static void
 ixgb_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	u32 ctrl, rctl;
 
-	ixgb_irq_disable(adapter);
 	adapter->vlgrp = grp;
+}
 
-	if (grp) {
-		/* enable VLAN tag insert/strip */
-		ctrl = IXGB_READ_REG(&adapter->hw, CTRL0);
-		ctrl |= IXGB_CTRL0_VME;
-		IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl);
-
-		/* enable VLAN receive filtering */
+static void
+ixgb_vlan_strip_enable(struct ixgb_adapter *adapter)
+{
+	u32 ctrl;
 
-		rctl = IXGB_READ_REG(&adapter->hw, RCTL);
-		rctl &= ~IXGB_RCTL_CFIEN;
-		IXGB_WRITE_REG(&adapter->hw, RCTL, rctl);
-	} else {
-		/* disable VLAN tag insert/strip */
+	/* enable VLAN tag insert/strip */
+	ctrl = IXGB_READ_REG(&adapter->hw, CTRL0);
+	ctrl |= IXGB_CTRL0_VME;
+	IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl);
+}
 
-		ctrl = IXGB_READ_REG(&adapter->hw, CTRL0);
-		ctrl &= ~IXGB_CTRL0_VME;
-		IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl);
-	}
+static void
+ixgb_vlan_strip_disable(struct ixgb_adapter *adapter)
+{
+	u32 ctrl;
 
-	/* don't enable interrupts unless we are UP */
-	if (adapter->netdev->flags & IFF_UP)
-		ixgb_irq_enable(adapter);
+	/* disable VLAN tag insert/strip */
+	ctrl = IXGB_READ_REG(&adapter->hw, CTRL0);
+	ctrl &= ~IXGB_CTRL0_VME;
+	IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl);
 }
 
 static void
-- 
1.7.3.2


^ permalink raw reply related

* [RFC PATCH V2 2/5] Add a new device flag for zero copy
From: Shirley Ma @ 2010-12-10  9:57 UTC (permalink / raw)
  To: Avi Kivity, Arnd Bergmann, mst; +Cc: xiaohui.xin, netdev, kvm, linux-kernel

Signed-off-by: Shirley Ma <xma@us.ibm.com>
---

 include/linux/netdevice.h |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8fd2c2..7207665 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -857,6 +857,9 @@ struct net_device {
 #define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
 #define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
 
+/* bit 29 is for device to map userspace buffers -- zerocopy */
+#define NETIF_F_ZEROCOPY	(1 << 29)	
+
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
 #define NETIF_F_GSO_MASK	0x00ff0000

^ permalink raw reply related

* [RFC PATCH V2 3/5] Add userspace buffers callback in skb_share_info
From: Shirley Ma @ 2010-12-10 10:01 UTC (permalink / raw)
  To: Avi Kivity, Arnd Bergmann, mst; +Cc: xiaohui.xin, netdev, kvm, linux-kernel

Signed-off-by: Shirley Ma <xma@us.ibm.com>
---

 include/linux/skbuff.h |   13 +++++++++++++
 net/core/skbuff.c      |   13 ++++++++++++-
 2 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e6ba898..938a7cb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -183,6 +183,15 @@ enum {
 	SKBTX_DRV_NEEDS_SK_REF = 1 << 3,
 };
 
+/* The callback notifies userspace to release buffers when skb DMA is done in
+ * lower device, the desc is used to track userspace buffer index.
+ */
+struct skb_ubuf_info {
+	/* support buffers allocation from userspace */
+	void		(*callback)(struct sk_buff *);
+	size_t		desc;
+};
+
 /* This data is invariant across clones and lives at
  * the end of the header data, ie. at skb->end.
  */
@@ -205,6 +214,10 @@ struct skb_shared_info {
 	/* Intermediate layers must ensure that destructor_arg
 	 * remains valid until skb destructor */
 	void *		destructor_arg;
+
+	/* DMA mapping from userspace buffers */
+	struct skb_ubuf_info ubuf;
+
 	/* must be last field, see pskb_expand_head() */
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 104f844..f9468a0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -210,6 +210,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
+	shinfo->ubuf.callback = NULL;
 
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
@@ -329,6 +330,15 @@ static void skb_release_data(struct sk_buff *skb)
 
 		if (skb_has_frag_list(skb))
 			skb_drop_fraglist(skb);
+		
+		/*
+		 * if skb buf is from userspace, we need to notify the caller
+		 * the lower device DMA has done;
+		 */
+		if (skb_shinfo(skb)->ubuf.callback) {
+			skb_shinfo(skb)->ubuf.callback(skb);
+			skb_shinfo(skb)->ubuf.callback = NULL;
+		}
 
 		kfree(skb->head);
 	}
@@ -492,6 +502,7 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
 	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
+	shinfo->ubuf.callback = NULL;
 
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->data = skb->head + NET_SKB_PAD;

^ permalink raw reply related

* Re: rndis gadget: Inconsistent locking
From: Michał Nazarewicz @ 2010-12-10 10:01 UTC (permalink / raw)
  To: Neil Jones
  Cc: linux-usb-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <AANLkTinE1srqkpib0+8Q63XjnhRYEWaaDsX70tc_OeUm-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

On Thu, 09 Dec 2010 18:00:55 +0100, Neil Jones <neiljay-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

>> Does this help: <https://patchwork.kernel.org/patch/195562/>?
>
> Yes cheers, warning gone and driver seems fine so far.
>
> has this been accepted upstream ?

No, it hasn't.  There were some concerns that this patch is invalid and may
not work in some situations -- it uses an "incorrect" way of getting network
interface statistics which, however, happens to work with g_ether.  No one
got to implementing the worker solution, so as of my knowledge, this is the
only working patch.

-- 
Best regards,                                        _     _
| Humble Liege of Serenely Enlightened Majesty of  o' \,=./ `o
| Computer Science,  Michał "mina86" Nazarewicz       (o o)
+----[mina86*mina86.com]---[mina86*jabber.org]----ooO--(_)--Ooo--
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [net-next-2.6 11/27] ixgbe: fix X540 phy id to correct value
From: Jeff Kirsher @ 2010-12-10 10:01 UTC (permalink / raw)
  To: davem, davem; +Cc: Don Skidmore, netdev, gospo, bphilips, Jeff Kirsher

From: Don Skidmore <donald.c.skidmore@intel.com>

The existing PHY ID for X540 was from early production hardware and
is no longer correct.  This patch corrects that.

Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_phy.c  |    2 +-
 drivers/net/ixgbe/ixgbe_type.h |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_phy.c b/drivers/net/ixgbe/ixgbe_phy.c
index c445fbc..8f7123e 100644
--- a/drivers/net/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ixgbe/ixgbe_phy.c
@@ -115,7 +115,7 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
 	case TN1010_PHY_ID:
 		phy_type = ixgbe_phy_tn;
 		break;
-	case AQ1202_PHY_ID:
+	case X540_PHY_ID:
 		phy_type = ixgbe_phy_aq;
 		break;
 	case QT2022_PHY_ID:
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index 0f80893..59f6d0a 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -999,7 +999,7 @@
 /* PHY IDs*/
 #define TN1010_PHY_ID    0x00A19410
 #define TNX_FW_REV       0xB
-#define AQ1202_PHY_ID    0x03A1B440
+#define X540_PHY_ID      0x01540200
 #define QT2022_PHY_ID    0x0043A400
 #define ATH_PHY_ID       0x03429050
 #define AQ_FW_REV        0x20
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 12/27] ixgbe: fix X540 to use it's own info struct
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Don Skidmore, netdev, gospo, bphilips, Jeff Kirsher

From: Don Skidmore <donald.c.skidmore@intel.com>

This patch enables X540 hardware to use it's own set of support
functions.  This is useful as it has no need of SFP+ support.  A
couple minor bugs with the eeprom semaphore were also cleaned up.

Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_main.c |   16 ++++++++--------
 drivers/net/ixgbe/ixgbe_x540.c |    4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index fdb35d0..f2694f2 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -118,7 +118,7 @@ static DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = {
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE),
 	 board_82599 },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T),
-	 board_82599 },
+	 board_X540 },
 
 	/* required last entry */
 	{0, }
@@ -1897,6 +1897,13 @@ static irqreturn_t ixgbe_msix_lsc(int irq, void *data)
 
 	switch (hw->mac.type) {
 	case ixgbe_mac_82599EB:
+		ixgbe_check_sfp_event(adapter, eicr);
+		if ((adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) &&
+		    ((eicr & IXGBE_EICR_GPI_SDP0) || (eicr & IXGBE_EICR_LSC))) {
+			adapter->interrupt_event = eicr;
+			schedule_work(&adapter->check_overtemp_task);
+		}
+		/* now fallthrough to handle Flow Director */
 	case ixgbe_mac_X540:
 		/* Handle Flow Director Full threshold interrupt */
 		if (eicr & IXGBE_EICR_FLOW_DIR) {
@@ -1912,12 +1919,6 @@ static irqreturn_t ixgbe_msix_lsc(int irq, void *data)
 					schedule_work(&adapter->fdir_reinit_task);
 			}
 		}
-		ixgbe_check_sfp_event(adapter, eicr);
-		if ((adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) &&
-		    ((eicr & IXGBE_EICR_GPI_SDP0) || (eicr & IXGBE_EICR_LSC))) {
-			adapter->interrupt_event = eicr;
-			schedule_work(&adapter->check_overtemp_task);
-		}
 		break;
 	default:
 		break;
@@ -2508,7 +2509,6 @@ static irqreturn_t ixgbe_intr(int irq, void *data)
 
 	switch (hw->mac.type) {
 	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
 		ixgbe_check_sfp_event(adapter, eicr);
 		if ((adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) &&
 		    ((eicr & IXGBE_EICR_GPI_SDP0) || (eicr & IXGBE_EICR_LSC))) {
diff --git a/drivers/net/ixgbe/ixgbe_x540.c b/drivers/net/ixgbe/ixgbe_x540.c
index 9649fa7..cf88515 100644
--- a/drivers/net/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ixgbe/ixgbe_x540.c
@@ -278,7 +278,7 @@ static s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data)
 {
 	s32 status;
 
-	if (ixgbe_acquire_swfw_sync_X540(hw, IXGBE_GSSR_EEP_SM))
+	if (ixgbe_acquire_swfw_sync_X540(hw, IXGBE_GSSR_EEP_SM) == 0)
 		status = ixgbe_read_eerd_generic(hw, offset, data);
 	else
 		status = IXGBE_ERR_SWFW_SYNC;
@@ -311,7 +311,7 @@ static s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data)
 	       (data << IXGBE_EEPROM_RW_REG_DATA) |
 	       IXGBE_EEPROM_RW_REG_START;
 
-	if (ixgbe_acquire_swfw_sync_X540(hw, IXGBE_GSSR_EEP_SM)) {
+	if (ixgbe_acquire_swfw_sync_X540(hw, IXGBE_GSSR_EEP_SM) == 0) {
 		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
 		if (status != 0) {
 			hw_dbg(hw, "Eeprom write EEWR timed out\n");
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 13/27] ixgbe: fix ntuple support
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Emil Tantilov, netdev, gospo, bphilips, jesse, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

commit f62bbb5e62c6e4a91fb222d22bc46e8d4d7e59ef
ixgbe: Update ixgbe to use new vlan accleration.

removed ETH_FLAG_NTUPLE from the supported flags.

This patch puts it back on to allow for setting ntuple via ethtool.

CC: Jesse Gross <jesse@nicira.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 90a740d..f2245ac 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2202,7 +2202,7 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	need_reset = (data & ETH_FLAG_RXVLAN) !=
 		     (netdev->features & NETIF_F_HW_VLAN_RX);
 
-	rc = ethtool_op_set_flags(netdev, data, ETH_FLAG_LRO |
+	rc = ethtool_op_set_flags(netdev, data, ETH_FLAG_LRO | ETH_FLAG_NTUPLE |
 					ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN);
 	if (rc)
 		return rc;
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 14/27] ixgbe: cleanup string function calls to use bound checking versions.
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Don Skidmore, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Don Skidmore <donald.c.skidmore@intel.com>

Some minor cleanup to use string calls that use bound checks just to
be extra safe.

Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |    5 +++--
 drivers/net/ixgbe/ixgbe_main.c    |   16 ++++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index f2245ac..23ff23e 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -839,9 +839,10 @@ static void ixgbe_get_drvinfo(struct net_device *netdev,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	char firmware_version[32];
 
-	strncpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver));
+	strncpy(drvinfo->driver, ixgbe_driver_name,
+	        sizeof(drvinfo->driver) - 1);
 	strncpy(drvinfo->version, ixgbe_driver_version,
-	        sizeof(drvinfo->version));
+	        sizeof(drvinfo->version) - 1);
 
 	snprintf(firmware_version, sizeof(firmware_version), "%d.%d-%d",
 	         (adapter->eeprom_version & 0xF000) >> 12,
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index f2694f2..8af0fc0 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2338,14 +2338,14 @@ static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
 		handler = SET_HANDLER(q_vector);
 
 		if (handler == &ixgbe_msix_clean_rx) {
-			sprintf(q_vector->name, "%s-%s-%d",
-				netdev->name, "rx", ri++);
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+			         "%s-%s-%d", netdev->name, "rx", ri++);
 		} else if (handler == &ixgbe_msix_clean_tx) {
-			sprintf(q_vector->name, "%s-%s-%d",
-				netdev->name, "tx", ti++);
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+			         "%s-%s-%d", netdev->name, "tx", ti++);
 		} else if (handler == &ixgbe_msix_clean_many) {
-			sprintf(q_vector->name, "%s-%s-%d",
-				netdev->name, "TxRx", ri++);
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+			         "%s-%s-%d", netdev->name, "TxRx", ri++);
 			ti++;
 		} else {
 			/* skip this unused q_vector */
@@ -7047,7 +7047,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	netdev->netdev_ops = &ixgbe_netdev_ops;
 	ixgbe_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
-	strcpy(netdev->name, pci_name(pdev));
+	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
 	adapter->bd_number = cards_found;
 
@@ -7269,7 +7269,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 
 	err = ixgbe_read_pba_string_generic(hw, part_str, IXGBE_PBANUM_LENGTH);
 	if (err)
-		strcpy(part_str, "Unknown");
+		strncpy(part_str, "Unknown", IXGBE_PBANUM_LENGTH);
 	if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
 		e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
 			   hw->mac.type, hw->phy.type, hw->phy.sfp_type,
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 15/27] e1000e: fix double initialization in blink path
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Holger Eitzenberger, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Holger Eitzenberger <holger@eitzenberger.org>

The kernel goes BUG() at the time 'ethtool -p eth0 3' comes
back, which is due to adapter->led_blink_task initialized
several times.  At the time it is still running this results
in a corrupted task_list of the associated workqueue.

The fix is to move the workqueue initialization to the
probe function instead.

Signed-off-by: Holger Eitzenberger <holger@eitzenberger.org>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/e1000.h   |    1 +
 drivers/net/e1000e/ethtool.c |    3 +--
 drivers/net/e1000e/netdev.c  |    1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index fdc67fe..3d9366f 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -482,6 +482,7 @@ extern const char e1000e_driver_version[];
 
 extern void e1000e_check_options(struct e1000_adapter *adapter);
 extern void e1000e_set_ethtool_ops(struct net_device *netdev);
+extern void e1000e_led_blink_task(struct work_struct *work);
 
 extern int e1000e_up(struct e1000_adapter *adapter);
 extern void e1000e_down(struct e1000_adapter *adapter);
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index 8984d16..26d4f3b 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -1860,7 +1860,7 @@ static int e1000_set_wol(struct net_device *netdev,
 /* bit defines for adapter->led_status */
 #define E1000_LED_ON		0
 
-static void e1000e_led_blink_task(struct work_struct *work)
+void e1000e_led_blink_task(struct work_struct *work)
 {
 	struct e1000_adapter *adapter = container_of(work,
 	                                struct e1000_adapter, led_blink_task);
@@ -1892,7 +1892,6 @@ static int e1000_phys_id(struct net_device *netdev, u32 data)
 	    (hw->mac.type == e1000_pch2lan) ||
 	    (hw->mac.type == e1000_82583) ||
 	    (hw->mac.type == e1000_82574)) {
-		INIT_WORK(&adapter->led_blink_task, e1000e_led_blink_task);
 		if (!adapter->blink_timer.function) {
 			init_timer(&adapter->blink_timer);
 			adapter->blink_timer.function =
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 0adcb79..f8efbbb 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -5903,6 +5903,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 	INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
 	INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
 	INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
+	INIT_WORK(&adapter->led_blink_task, e1000e_led_blink_task);
 
 	/* Initialize link parameters. User can change them with ethtool */
 	adapter->hw.mac.autoneg = 1;
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 16/27] e1000e: 82571-based mezzanine card can fail ethtool link test
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

On certain 82571-based mezzanine NICs in some blade servers, the ethtool
link test can fail due to the serdes_has_link flag not set correctly.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/82571.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c
index 9333921..6942e2f 100644
--- a/drivers/net/e1000e/82571.c
+++ b/drivers/net/e1000e/82571.c
@@ -1523,8 +1523,10 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
 				    e1000_serdes_link_autoneg_progress;
 				mac->serdes_has_link = false;
 				e_dbg("AN_UP     -> AN_PROG\n");
+			} else {
+				mac->serdes_has_link = true;
 			}
-		break;
+			break;
 
 		case e1000_serdes_link_forced_up:
 			/*
@@ -1543,6 +1545,8 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
 				    e1000_serdes_link_autoneg_progress;
 				mac->serdes_has_link = false;
 				e_dbg("FORCED_UP -> AN_PROG\n");
+			} else {
+				mac->serdes_has_link = true;
 			}
 			break;
 
@@ -1598,6 +1602,7 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
 			ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
 			mac->serdes_link_state =
 			    e1000_serdes_link_autoneg_progress;
+			mac->serdes_has_link = false;
 			e_dbg("DOWN      -> AN_PROG\n");
 			break;
 		}
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 17/27] e1000e: 82574/82583 performance improvement
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

Increasing the transmit fifo by 4K (by decreasing the receive fifo size
specified in .pba by the same amount) increases Tx performance.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/82571.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c
index 6942e2f..280d41f 100644
--- a/drivers/net/e1000e/82571.c
+++ b/drivers/net/e1000e/82571.c
@@ -2005,7 +2005,7 @@ struct e1000_info e1000_82574_info = {
 				  | FLAG_HAS_AMT
 				  | FLAG_HAS_CTRLEXT_ON_LOAD,
 	.flags2			  = FLAG2_CHECK_PHY_HANG,
-	.pba			= 36,
+	.pba			= 32,
 	.max_hw_frame_size	= DEFAULT_JUMBO,
 	.get_variants		= e1000_get_variants_82571,
 	.mac_ops		= &e82571_mac_ops,
@@ -2022,7 +2022,7 @@ struct e1000_info e1000_82583_info = {
 				  | FLAG_HAS_SMART_POWER_DOWN
 				  | FLAG_HAS_AMT
 				  | FLAG_HAS_CTRLEXT_ON_LOAD,
-	.pba			= 36,
+	.pba			= 32,
 	.max_hw_frame_size	= ETH_FRAME_LEN + ETH_FCS_LEN,
 	.get_variants		= e1000_get_variants_82571,
 	.mac_ops		= &e82571_mac_ops,
-- 
1.7.3.2


^ permalink raw reply related

* [PATCH] Document the kernel_recvmsg() function
From: Martin Lucina @ 2010-12-10 10:04 UTC (permalink / raw)
  To: netdev; +Cc: Martin Sustrik, David S. Miller

[Updated and sent to the netdev mailing list, Eric thx for the pointer]

Hi,

so, today we spent all day figuring out how the kernel_sendmsg() function
*actually* works. This patch adds some documentation to help the next poor
sod.

-mato

>From 1a977fc0b9544c53761ba3c4c26ca1aac2018663 Mon Sep 17 00:00:00 2001
From: Martin Lucina <mato@kotelna.sk>
Date: Thu, 9 Dec 2010 17:11:18 +0100
Subject: [PATCH] Document the kernel_recvmsg() function

Signed-off-by: Martin Lucina <mato@kotelna.sk>
---
 net/socket.c |   15 +++++++++++++++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 3ca2fd9..088fb3f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -732,6 +732,21 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 	return ret;
 }
 
+/**
+ * kernel_recvmsg - Receive a message from a socket (kernel space)
+ * @sock:       The socket to receive the message from
+ * @msg:        Received message
+ * @vec:        Input s/g array for message data
+ * @num:        Size of input s/g array
+ * @size:       Number of bytes to read
+ * @flags:      Message flags (MSG_DONTWAIT, etc...)
+ *
+ * On return the msg structure contains the scatter/gather array passed in the
+ * vec argument. The array is modified so that it consists of the unfilled
+ * portion of the original array.
+ *
+ * The returned value is the total number of bytes received, or an error.
+ */
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size, int flags)
 {
-- 
1.7.1


^ permalink raw reply related

* [net-next-2.6 18/27] e1000e: 82577/8 must acquire h/w semaphore before workaround
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

The workaround function e1000_configure_k1_pchlan() assumes the h/w
semaphore is already acquired.  This was originally missed when setting up
the part for the ethtool loopback test.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/ethtool.c |    7 +++++++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index 26d4f3b..29b09113 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -1263,6 +1263,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
 	u32 ctrl_reg = 0;
 	u32 stat_reg = 0;
 	u16 phy_reg = 0;
+	s32 ret_val = 0;
 
 	hw->mac.autoneg = 0;
 
@@ -1322,7 +1323,13 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
 	case e1000_phy_82577:
 	case e1000_phy_82578:
 		/* Workaround: K1 must be disabled for stable 1Gbps operation */
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val) {
+			e_err("Cannot setup 1Gbps loopback.\n");
+			return ret_val;
+		}
 		e1000_configure_k1_ich8lan(hw, false);
+		hw->phy.ops.release(hw);
 		break;
 	case e1000_phy_82579:
 		/* Disable PHY energy detect power down */
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 19/27] e1000e: 82571 Serdes can fail to get link
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

When link partner is sending continuous Config symbols, the 82571 Serdes
FIFO can overflow resulting in Invalid bit getting set.  To resolve this,
if Sync and Config bits are both 1 ignore the Invalid bit and restart
auto-negotiation.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/82571.c |   35 +++++++++++++++++++++++++++--------
 1 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c
index 280d41f..e57e409 100644
--- a/drivers/net/e1000e/82571.c
+++ b/drivers/net/e1000e/82571.c
@@ -52,6 +52,7 @@
 			      (ID_LED_DEF1_DEF2))
 
 #define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000
+#define AN_RETRY_COUNT          5 /* Autoneg Retry Count value */
 #define E1000_BASE1000T_STATUS          10
 #define E1000_IDLE_ERROR_COUNT_MASK     0xFF
 #define E1000_RECEIVE_ERROR_COUNTER     21
@@ -1503,6 +1504,8 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
 	u32 rxcw;
 	u32 ctrl;
 	u32 status;
+	u32 txcw;
+	u32 i;
 	s32 ret_val = 0;
 
 	ctrl = er32(CTRL);
@@ -1613,16 +1616,32 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
 			e_dbg("ANYSTATE  -> DOWN\n");
 		} else {
 			/*
-			 * We have sync, and can tolerate one invalid (IV)
-			 * codeword before declaring link down, so reread
-			 * to look again.
+			 * Check several times, if Sync and Config
+			 * both are consistently 1 then simply ignore
+			 * the Invalid bit and restart Autoneg
 			 */
-			udelay(10);
-			rxcw = er32(RXCW);
-			if (rxcw & E1000_RXCW_IV) {
-				mac->serdes_link_state = e1000_serdes_link_down;
+			for (i = 0; i < AN_RETRY_COUNT; i++) {
+				udelay(10);
+				rxcw = er32(RXCW);
+				if ((rxcw & E1000_RXCW_IV) &&
+				    !((rxcw & E1000_RXCW_SYNCH) &&
+				      (rxcw & E1000_RXCW_C))) {
+					mac->serdes_has_link = false;
+					mac->serdes_link_state =
+					    e1000_serdes_link_down;
+					e_dbg("ANYSTATE  -> DOWN\n");
+					break;
+				}
+			}
+
+			if (i == AN_RETRY_COUNT) {
+				txcw = er32(TXCW);
+				txcw |= E1000_TXCW_ANE;
+				ew32(TXCW, txcw);
+				mac->serdes_link_state =
+				    e1000_serdes_link_autoneg_progress;
 				mac->serdes_has_link = false;
-				e_dbg("ANYSTATE  -> DOWN\n");
+				e_dbg("ANYSTATE  -> AN_PROG\n");
 			}
 		}
 	}
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 20/27] e1000e: 82577/8/9 mis-configured OEM bits during S0->Sx
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

The LPLU (Low Power Link Up) and Gigabit Disable bits (a.k.a. OEM bits)
were being configured incorrectly when device goes to D3 state.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/ich8lan.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index e3374d9..d7fc930 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -3591,7 +3591,7 @@ void e1000e_disable_gig_wol_ich8lan(struct e1000_hw *hw)
 	ew32(PHY_CTRL, phy_ctrl);
 
 	if (hw->mac.type >= e1000_pchlan) {
-		e1000_oem_bits_config_ich8lan(hw, true);
+		e1000_oem_bits_config_ich8lan(hw, false);
 		ret_val = hw->phy.ops.acquire(hw);
 		if (ret_val)
 			return;
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 21/27] e1000e: 82579 PHY incorrectly identified during init
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

During init, reading the 2 PHY ID registers back-to-back in the default
fast mode could return invalid data (all F's) and in slow mode could
return data to the second read the data from the first read.  To resolve
the issue in fast mode, set to slow mode before any PHY accesses; to
resolve the issue in slow mode, put in a delay for every 82579 PHY access.
Since this PHY is currently only paired with the pch2lan MAC and the PHY
type is not known before the first PHY access which can fail this way,
check for this based on MAC-type.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/ich8lan.c |   16 +++++++++++-----
 drivers/net/e1000e/phy.c     |   14 ++++++++++++++
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index d7fc930..5080372 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -338,12 +338,17 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 	}
 
 	phy->id = e1000_phy_unknown;
-	ret_val = e1000e_get_phy_id(hw);
-	if (ret_val)
-		goto out;
-	if ((phy->id == 0) || (phy->id == PHY_REVISION_MASK)) {
+	switch (hw->mac.type) {
+	default:
+		ret_val = e1000e_get_phy_id(hw);
+		if (ret_val)
+			goto out;
+		if ((phy->id != 0) && (phy->id != PHY_REVISION_MASK))
+			break;
+		/* fall-through */
+	case e1000_pch2lan:
 		/*
-		 * In case the PHY needs to be in mdio slow mode (eg. 82577),
+		 * In case the PHY needs to be in mdio slow mode,
 		 * set slow mode and try to get the PHY id again.
 		 */
 		ret_val = e1000_set_mdio_slow_mode_hv(hw);
@@ -352,6 +357,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 		ret_val = e1000e_get_phy_id(hw);
 		if (ret_val)
 			goto out;
+		break;
 	}
 	phy->type = e1000e_get_phy_type_from_id(phy->id);
 
diff --git a/drivers/net/e1000e/phy.c b/drivers/net/e1000e/phy.c
index 6ad90cc..95da386 100644
--- a/drivers/net/e1000e/phy.c
+++ b/drivers/net/e1000e/phy.c
@@ -226,6 +226,13 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
 	}
 	*data = (u16) mdic;
 
+	/*
+	 * Allow some time after each MDIC transaction to avoid
+	 * reading duplicate data in the next MDIC transaction.
+	 */
+	if (hw->mac.type == e1000_pch2lan)
+		udelay(100);
+
 	return 0;
 }
 
@@ -279,6 +286,13 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
 		return -E1000_ERR_PHY;
 	}
 
+	/*
+	 * Allow some time after each MDIC transaction to avoid
+	 * reading duplicate data in the next MDIC transaction.
+	 */
+	if (hw->mac.type == e1000_pch2lan)
+		udelay(100);
+
 	return 0;
 }
 
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 22/27] e1000e: support new PBA format from EEPROM
From: Jeff Kirsher @ 2010-12-10 10:03 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975414-30487-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

Provide support to e1000e for displaying the new format of the PBA found
in the EEPROM.  The unique PBA identifier is no longer restricted to
hexadecimal numbers and must now be read and displayed as a string.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/defines.h |    8 ++-
 drivers/net/e1000e/e1000.h   |    3 +-
 drivers/net/e1000e/lib.c     |  135 +++++++++++++++++++++++++++++++++++-------
 drivers/net/e1000e/netdev.c  |   12 +++-
 4 files changed, 130 insertions(+), 28 deletions(-)

diff --git a/drivers/net/e1000e/defines.h b/drivers/net/e1000e/defines.h
index 016ea38..7245dc2 100644
--- a/drivers/net/e1000e/defines.h
+++ b/drivers/net/e1000e/defines.h
@@ -488,6 +488,9 @@
 #define E1000_BLK_PHY_RESET   12
 #define E1000_ERR_SWFW_SYNC 13
 #define E1000_NOT_IMPLEMENTED 14
+#define E1000_ERR_INVALID_ARGUMENT  16
+#define E1000_ERR_NO_SPACE          17
+#define E1000_ERR_NVM_PBA_SECTION   18
 
 /* Loop limit on how long we wait for auto-negotiation to complete */
 #define FIBER_LINK_UP_LIMIT               50
@@ -650,13 +653,16 @@
 /* Mask bits for fields in Word 0x03 of the EEPROM */
 #define NVM_COMPAT_LOM    0x0800
 
+/* length of string needed to store PBA number */
+#define E1000_PBANUM_LENGTH             11
+
 /* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
 #define NVM_SUM                    0xBABA
 
 /* PBA (printed board assembly) number words */
 #define NVM_PBA_OFFSET_0           8
 #define NVM_PBA_OFFSET_1           9
-
+#define NVM_PBA_PTR_GUARD          0xFAFA
 #define NVM_WORD_SIZE_BASE_SHIFT   6
 
 /* NVM Commands - SPI */
diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index 3d9366f..2c913b8 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -514,7 +514,8 @@ extern struct e1000_info e1000_pch_info;
 extern struct e1000_info e1000_pch2_info;
 extern struct e1000_info e1000_es2_info;
 
-extern s32 e1000e_read_pba_num(struct e1000_hw *hw, u32 *pba_num);
+extern s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
+					 u32 pba_num_size);
 
 extern s32  e1000e_commit_phy(struct e1000_hw *hw);
 
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c
index 0fd4eb5..8377523 100644
--- a/drivers/net/e1000e/lib.c
+++ b/drivers/net/e1000e/lib.c
@@ -2139,6 +2139,119 @@ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
 }
 
 /**
+ *  e1000_read_pba_string_generic - Read device part number
+ *  @hw: pointer to the HW structure
+ *  @pba_num: pointer to device part number
+ *  @pba_num_size: size of part number buffer
+ *
+ *  Reads the product board assembly (PBA) number from the EEPROM and stores
+ *  the value in pba_num.
+ **/
+s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
+				  u32 pba_num_size)
+{
+	s32 ret_val;
+	u16 nvm_data;
+	u16 pba_ptr;
+	u16 offset;
+	u16 length;
+
+	if (pba_num == NULL) {
+		e_dbg("PBA string buffer was null\n");
+		ret_val = E1000_ERR_INVALID_ARGUMENT;
+		goto out;
+	}
+
+	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	/*
+	 * if nvm_data is not ptr guard the PBA must be in legacy format which
+	 * means pba_ptr is actually our second data word for the PBA number
+	 * and we can decode it into an ascii string
+	 */
+	if (nvm_data != NVM_PBA_PTR_GUARD) {
+		e_dbg("NVM PBA number is not stored as string\n");
+
+		/* we will need 11 characters to store the PBA */
+		if (pba_num_size < 11) {
+			e_dbg("PBA string buffer too small\n");
+			return E1000_ERR_NO_SPACE;
+		}
+
+		/* extract hex string from data and pba_ptr */
+		pba_num[0] = (nvm_data >> 12) & 0xF;
+		pba_num[1] = (nvm_data >> 8) & 0xF;
+		pba_num[2] = (nvm_data >> 4) & 0xF;
+		pba_num[3] = nvm_data & 0xF;
+		pba_num[4] = (pba_ptr >> 12) & 0xF;
+		pba_num[5] = (pba_ptr >> 8) & 0xF;
+		pba_num[6] = '-';
+		pba_num[7] = 0;
+		pba_num[8] = (pba_ptr >> 4) & 0xF;
+		pba_num[9] = pba_ptr & 0xF;
+
+		/* put a null character on the end of our string */
+		pba_num[10] = '\0';
+
+		/* switch all the data but the '-' to hex char */
+		for (offset = 0; offset < 10; offset++) {
+			if (pba_num[offset] < 0xA)
+				pba_num[offset] += '0';
+			else if (pba_num[offset] < 0x10)
+				pba_num[offset] += 'A' - 0xA;
+		}
+
+		goto out;
+	}
+
+	ret_val = e1000_read_nvm(hw, pba_ptr, 1, &length);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	if (length == 0xFFFF || length == 0) {
+		e_dbg("NVM PBA number section invalid length\n");
+		ret_val = E1000_ERR_NVM_PBA_SECTION;
+		goto out;
+	}
+	/* check if pba_num buffer is big enough */
+	if (pba_num_size < (((u32)length * 2) - 1)) {
+		e_dbg("PBA string buffer too small\n");
+		ret_val = E1000_ERR_NO_SPACE;
+		goto out;
+	}
+
+	/* trim pba length from start of string */
+	pba_ptr++;
+	length--;
+
+	for (offset = 0; offset < length; offset++) {
+		ret_val = e1000_read_nvm(hw, pba_ptr + offset, 1, &nvm_data);
+		if (ret_val) {
+			e_dbg("NVM Read Error\n");
+			goto out;
+		}
+		pba_num[offset * 2] = (u8)(nvm_data >> 8);
+		pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
+	}
+	pba_num[offset * 2] = '\0';
+
+out:
+	return ret_val;
+}
+
+/**
  *  e1000_read_mac_addr_generic - Read device MAC address
  *  @hw: pointer to the HW structure
  *
@@ -2579,25 +2692,3 @@ bool e1000e_enable_mng_pass_thru(struct e1000_hw *hw)
 out:
 	return ret_val;
 }
-
-s32 e1000e_read_pba_num(struct e1000_hw *hw, u32 *pba_num)
-{
-	s32 ret_val;
-	u16 nvm_data;
-
-	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
-	if (ret_val) {
-		e_dbg("NVM Read Error\n");
-		return ret_val;
-	}
-	*pba_num = (u32)(nvm_data << 16);
-
-	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_1, 1, &nvm_data);
-	if (ret_val) {
-		e_dbg("NVM Read Error\n");
-		return ret_val;
-	}
-	*pba_num |= nvm_data;
-
-	return 0;
-}
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index f8efbbb..393b76d 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -5626,7 +5626,8 @@ static void e1000_print_device_info(struct e1000_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	struct net_device *netdev = adapter->netdev;
-	u32 pba_num;
+	u32 ret_val;
+	u8 pba_str[E1000_PBANUM_LENGTH];
 
 	/* print bus type/speed/width info */
 	e_info("(PCI Express:2.5GB/s:%s) %pM\n",
@@ -5637,9 +5638,12 @@ static void e1000_print_device_info(struct e1000_adapter *adapter)
 	       netdev->dev_addr);
 	e_info("Intel(R) PRO/%s Network Connection\n",
 	       (hw->phy.type == e1000_phy_ife) ? "10/100" : "1000");
-	e1000e_read_pba_num(hw, &pba_num);
-	e_info("MAC: %d, PHY: %d, PBA No: %06x-%03x\n",
-	       hw->mac.type, hw->phy.type, (pba_num >> 8), (pba_num & 0xff));
+	ret_val = e1000_read_pba_string_generic(hw, pba_str,
+						E1000_PBANUM_LENGTH);
+	if (ret_val)
+		strcpy(pba_str, "Unknown");
+	e_info("MAC: %d, PHY: %d, PBA No: %s\n",
+	       hw->mac.type, hw->phy.type, pba_str);
 }
 
 static void e1000_eeprom_checks(struct e1000_adapter *adapter)
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 24/27] e1000e: minor error message corrections
From: Jeff Kirsher @ 2010-12-10 10:06 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher

From: Bruce Allan <bruce.w.allan@intel.com>

Correct error messages when setting up Rx resources and when checking
module parameters.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/netdev.c |    2 +-
 drivers/net/e1000e/param.c  |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 4bf843a..6e1f3a3 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2130,7 +2130,7 @@ err_pages:
 	}
 err:
 	vfree(rx_ring->buffer_info);
-	e_err("Unable to allocate memory for the transmit descriptor ring\n");
+	e_err("Unable to allocate memory for the receive descriptor ring\n");
 	return err;
 }
 
diff --git a/drivers/net/e1000e/param.c b/drivers/net/e1000e/param.c
index 3d36911..a9612b0 100644
--- a/drivers/net/e1000e/param.c
+++ b/drivers/net/e1000e/param.c
@@ -421,7 +421,7 @@ void __devinit e1000e_check_options(struct e1000_adapter *adapter)
 		static const struct e1000_option opt = {
 			.type = enable_option,
 			.name = "CRC Stripping",
-			.err  = "defaulting to enabled",
+			.err  = "defaulting to Enabled",
 			.def  = OPTION_ENABLED
 		};
 
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 25/27] e1000e: static analysis tools complain of a possible null ptr p dereference
From: Jeff Kirsher @ 2010-12-10 10:06 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975585-30576-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

Adding this default case resolves the issue.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/ethtool.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index 29b09113..72ce0ec 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -1992,6 +1992,10 @@ static void e1000_get_ethtool_stats(struct net_device *netdev,
 			p = (char *) adapter +
 					e1000_gstrings_stats[i].stat_offset;
 			break;
+		default:
+			data[i] = 0;
+			continue;
+			break;
 		}
 
 		data[i] = (e1000_gstrings_stats[i].sizeof_stat ==
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 26/27] e1000e: increment the driver version
From: Jeff Kirsher @ 2010-12-10 10:06 UTC (permalink / raw)
  To: davem, davem; +Cc: Bruce Allan, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975585-30576-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/netdev.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 6e1f3a3..5530d0b 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -54,7 +54,7 @@
 
 #define DRV_EXTRAVERSION "-k2"
 
-#define DRV_VERSION "1.2.7" DRV_EXTRAVERSION
+#define DRV_VERSION "1.2.20" DRV_EXTRAVERSION
 char e1000e_driver_name[] = "e1000e";
 const char e1000e_driver_version[] = DRV_VERSION;
 
-- 
1.7.3.2


^ permalink raw reply related

* [net-next-2.6 27/27] igb: Add new function to read part number from EEPROM in string format
From: Jeff Kirsher @ 2010-12-10 10:06 UTC (permalink / raw)
  To: davem, davem; +Cc: Carolyn Wyborny, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1291975585-30576-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Carolyn Wyborny <carolyn.wyborny@intel.com>

New adapters will have part numbers stored in string format rather than
simple hex format. This function will read part number formats in either
hex or string.

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/igb/e1000_defines.h |    7 +++
 drivers/net/igb/e1000_nvm.c     |   93 ++++++++++++++++++++++++++++++++++++---
 drivers/net/igb/e1000_nvm.h     |    2 +
 drivers/net/igb/igb_main.c      |   11 +++--
 4 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/drivers/net/igb/e1000_defines.h b/drivers/net/igb/e1000_defines.h
index 6222279..6319ed9 100644
--- a/drivers/net/igb/e1000_defines.h
+++ b/drivers/net/igb/e1000_defines.h
@@ -419,6 +419,9 @@
 #define E1000_ERR_SWFW_SYNC 13
 #define E1000_NOT_IMPLEMENTED 14
 #define E1000_ERR_MBX      15
+#define E1000_ERR_INVALID_ARGUMENT  16
+#define E1000_ERR_NO_SPACE          17
+#define E1000_ERR_NVM_PBA_SECTION   18
 
 /* Loop limit on how long we wait for auto-negotiation to complete */
 #define COPPER_LINK_UP_LIMIT              10
@@ -580,11 +583,15 @@
 
 /* Mask bits for fields in Word 0x1a of the NVM */
 
+/* length of string needed to store part num */
+#define E1000_PBANUM_LENGTH         11
+
 /* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
 #define NVM_SUM                    0xBABA
 
 #define NVM_PBA_OFFSET_0           8
 #define NVM_PBA_OFFSET_1           9
+#define NVM_PBA_PTR_GUARD          0xFAFA
 #define NVM_WORD_SIZE_BASE_SHIFT   6
 
 /* NVM Commands - Microwire */
diff --git a/drivers/net/igb/e1000_nvm.c b/drivers/net/igb/e1000_nvm.c
index d83b77fa..6b5cc2c 100644
--- a/drivers/net/igb/e1000_nvm.c
+++ b/drivers/net/igb/e1000_nvm.c
@@ -445,31 +445,112 @@ out:
 }
 
 /**
- *  igb_read_part_num - Read device part number
+ *  igb_read_part_string - Read device part number
  *  @hw: pointer to the HW structure
  *  @part_num: pointer to device part number
+ *  @part_num_size: size of part number buffer
  *
  *  Reads the product board assembly (PBA) number from the EEPROM and stores
  *  the value in part_num.
  **/
-s32 igb_read_part_num(struct e1000_hw *hw, u32 *part_num)
+s32 igb_read_part_string(struct e1000_hw *hw, u8 *part_num, u32 part_num_size)
 {
-	s32  ret_val;
+	s32 ret_val;
 	u16 nvm_data;
+	u16 pointer;
+	u16 offset;
+	u16 length;
+
+	if (part_num == NULL) {
+		hw_dbg("PBA string buffer was null\n");
+		ret_val = E1000_ERR_INVALID_ARGUMENT;
+		goto out;
+	}
 
 	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
 	if (ret_val) {
 		hw_dbg("NVM Read Error\n");
 		goto out;
 	}
-	*part_num = (u32)(nvm_data << 16);
 
-	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &nvm_data);
+	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pointer);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	/*
+	 * if nvm_data is not ptr guard the PBA must be in legacy format which
+	 * means pointer is actually our second data word for the PBA number
+	 * and we can decode it into an ascii string
+	 */
+	if (nvm_data != NVM_PBA_PTR_GUARD) {
+		hw_dbg("NVM PBA number is not stored as string\n");
+
+		/* we will need 11 characters to store the PBA */
+		if (part_num_size < 11) {
+			hw_dbg("PBA string buffer too small\n");
+			return E1000_ERR_NO_SPACE;
+		}
+
+		/* extract hex string from data and pointer */
+		part_num[0] = (nvm_data >> 12) & 0xF;
+		part_num[1] = (nvm_data >> 8) & 0xF;
+		part_num[2] = (nvm_data >> 4) & 0xF;
+		part_num[3] = nvm_data & 0xF;
+		part_num[4] = (pointer >> 12) & 0xF;
+		part_num[5] = (pointer >> 8) & 0xF;
+		part_num[6] = '-';
+		part_num[7] = 0;
+		part_num[8] = (pointer >> 4) & 0xF;
+		part_num[9] = pointer & 0xF;
+
+		/* put a null character on the end of our string */
+		part_num[10] = '\0';
+
+		/* switch all the data but the '-' to hex char */
+		for (offset = 0; offset < 10; offset++) {
+			if (part_num[offset] < 0xA)
+				part_num[offset] += '0';
+			else if (part_num[offset] < 0x10)
+				part_num[offset] += 'A' - 0xA;
+		}
+
+		goto out;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, pointer, 1, &length);
 	if (ret_val) {
 		hw_dbg("NVM Read Error\n");
 		goto out;
 	}
-	*part_num |= nvm_data;
+
+	if (length == 0xFFFF || length == 0) {
+		hw_dbg("NVM PBA number section invalid length\n");
+		ret_val = E1000_ERR_NVM_PBA_SECTION;
+		goto out;
+	}
+	/* check if part_num buffer is big enough */
+	if (part_num_size < (((u32)length * 2) - 1)) {
+		hw_dbg("PBA string buffer too small\n");
+		ret_val = E1000_ERR_NO_SPACE;
+		goto out;
+	}
+
+	/* trim pba length from start of string */
+	pointer++;
+	length--;
+
+	for (offset = 0; offset < length; offset++) {
+		ret_val = hw->nvm.ops.read(hw, pointer + offset, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error\n");
+			goto out;
+		}
+		part_num[offset * 2] = (u8)(nvm_data >> 8);
+		part_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
+	}
+	part_num[offset * 2] = '\0';
 
 out:
 	return ret_val;
diff --git a/drivers/net/igb/e1000_nvm.h b/drivers/net/igb/e1000_nvm.h
index 1041c34..29c956a 100644
--- a/drivers/net/igb/e1000_nvm.h
+++ b/drivers/net/igb/e1000_nvm.h
@@ -32,6 +32,8 @@ s32  igb_acquire_nvm(struct e1000_hw *hw);
 void igb_release_nvm(struct e1000_hw *hw);
 s32  igb_read_mac_addr(struct e1000_hw *hw);
 s32  igb_read_part_num(struct e1000_hw *hw, u32 *part_num);
+s32  igb_read_part_string(struct e1000_hw *hw, u8 *part_num,
+                          u32 part_num_size);
 s32  igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
 s32  igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
 s32  igb_validate_nvm_checksum(struct e1000_hw *hw);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 67ea262..041f8e6 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -1729,12 +1729,13 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 	struct igb_adapter *adapter;
 	struct e1000_hw *hw;
 	u16 eeprom_data = 0;
+	s32 ret_val;
 	static int global_quad_port_a; /* global quad port a indication */
 	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
 	unsigned long mmio_start, mmio_len;
 	int err, pci_using_dac;
 	u16 eeprom_apme_mask = IGB_EEPROM_APME;
-	u32 part_num;
+	u8 part_str[E1000_PBANUM_LENGTH];
 
 	/* Catch broken hardware that put the wrong VF device ID in
 	 * the PCIe SR-IOV capability.
@@ -2000,10 +2001,10 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 		   "unknown"),
 		 netdev->dev_addr);
 
-	igb_read_part_num(hw, &part_num);
-	dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
-		(part_num >> 8), (part_num & 0xff));
-
+	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
+	if (ret_val)
+		strcpy(part_str, "Unknown");
+	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
 	dev_info(&pdev->dev,
 		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
 		adapter->msix_entries ? "MSI-X" :
-- 
1.7.3.2


^ permalink raw reply related

* [RFC PATCH V2 4/5] Add vhost zero copy callback to release guest kernel buffers
From: Shirley Ma @ 2010-12-10 10:08 UTC (permalink / raw)
  To: Avi Kivity, Arnd Bergmann, mst; +Cc: xiaohui.xin, netdev, kvm, linux-kernel

This patch uses msg_control to pass vhost callback to macvtap (any better
idea to pass this in a simple way?). vhost doesn't notify guest to release
buffers until the underlying lower device DMA has done for these buffers.
This vq can not be reset if any outstanding reference.

Signed-off-by: Shirley Ma <xma@us.ibm.com>
---

 drivers/vhost/net.c   |   13 ++++++++++-
 drivers/vhost/vhost.c |   56 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/vhost/vhost.h |    7 ++++++
 3 files changed, 75 insertions(+), 1 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f442668..6779a1c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -128,6 +128,7 @@ static void handle_tx(struct vhost_net *net)
 	int err, wmem;
 	size_t hdr_size;
 	struct socket *sock;
+	struct skb_ubuf_info pend;
 
 	/* TODO: check that we are running from vhost_worker?
 	 * Not sure it's worth it, it's straight-forward enough. */
@@ -189,6 +190,13 @@ static void handle_tx(struct vhost_net *net)
 			       iov_length(vq->hdr, s), hdr_size);
 			break;
 		}
+		/* use msg_control to pass vhost zerocopy ubuf info here */
+		if (sock_flag(sock->sk, SOCK_ZEROCOPY)) {
+			pend.callback = vq->callback;
+			pend.desc = head;
+			msg.msg_control = &pend;
+			msg.msg_controllen = sizeof(pend);
+		}
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
 		err = sock->ops->sendmsg(NULL, sock, &msg, len);
 		if (unlikely(err < 0)) {
@@ -199,7 +207,10 @@ static void handle_tx(struct vhost_net *net)
 		if (err != len)
 			pr_debug("Truncated TX packet: "
 				 " len %d != %zd\n", err, len);
-		vhost_add_used_and_signal(&net->dev, vq, head, 0);
+		if (sock_flag(sock->sk, SOCK_ZEROCOPY))
+			vhost_zerocopy_add_used_and_signal(vq);
+		else
+			vhost_add_used_and_signal(&net->dev, vq, head, 0);
 		total_len += len;
 		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
 			vhost_poll_queue(&vq->poll);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 94701ff..b0074bc 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -170,6 +170,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->call_ctx = NULL;
 	vq->call = NULL;
 	vq->log_ctx = NULL;
+	atomic_set(&vq->refcnt, 0);
+	vq->upend_cnt = 0;
 }
 
 static int vhost_worker(void *data)
@@ -273,6 +275,9 @@ long vhost_dev_init(struct vhost_dev *dev,
 		dev->vqs[i].heads = NULL;
 		dev->vqs[i].dev = dev;
 		mutex_init(&dev->vqs[i].mutex);
+		spin_lock_init(&dev->vqs[i].zerocopy_lock);
+		dev->vqs[i].upend_cnt = 0;
+		atomic_set(&dev->vqs[i].refcnt, 0);
 		vhost_vq_reset(dev, dev->vqs + i);
 		if (dev->vqs[i].handle_kick)
 			vhost_poll_init(&dev->vqs[i].poll,
@@ -370,10 +375,37 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
 	return 0;
 }
 
+void vhost_zerocopy_add_used_and_signal(struct vhost_virtqueue *vq)
+{
+	struct vring_used_elem heads[64];
+	int count, left, mod;
+	unsigned long flags;
+
+	count = (vq->num > 64) ? 64 : vq->num;
+	mod = vq->ubuf_cnt / count;
+	/* notify guest when number of descriptors greater than count */
+	if (mod == 0)
+		return;
+	/* 
+	 * avoid holding spin lock by notifying guest x64 buffers first
+	 */
+	vhost_add_used_and_signal_n(vq->dev, vq, vq->heads, count * mod);
+	/* reset the counter when notifying guest the rest*/
+	left = vq->ubuf_cnt - mod * count;
+	if (left > 0) {
+		spin_lock_irqsave(&vq->zerocopy_lock, flags);
+		memcpy(heads, &vq->heads[mod * count], left * sizeof *vq->heads);
+		vq->ubuf_cnt = 0;
+		spin_unlock_irqrestore(&vq->zerocopy_lock, flags);
+		vhost_add_used_and_signal_n(vq->dev, vq, heads, left);
+	}
+}
+
 /* Caller should have device mutex */
 void vhost_dev_cleanup(struct vhost_dev *dev)
 {
 	int i;
+	unsigned long begin = jiffies;
 	for (i = 0; i < dev->nvqs; ++i) {
 		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
 			vhost_poll_stop(&dev->vqs[i].poll);
@@ -389,6 +421,12 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 			eventfd_ctx_put(dev->vqs[i].call_ctx);
 		if (dev->vqs[i].call)
 			fput(dev->vqs[i].call);
+		/* wait for all lower device DMAs done, then notify guest */
+		if (atomic_read(&dev->vqs[i].refcnt)) {
+			if (time_after(jiffies, begin + 5 * HZ))
+				vhost_zerocopy_add_used_and_signal(&dev->vqs[i]);
+		}
+
 		vhost_vq_reset(dev, dev->vqs + i);
 	}
 	vhost_dev_free_iovecs(dev);
@@ -1389,3 +1427,21 @@ void vhost_disable_notify(struct vhost_virtqueue *vq)
 		vq_err(vq, "Failed to enable notification at %p: %d\n",
 		       &vq->used->flags, r);
 }
+
+void vhost_zerocopy_callback(struct sk_buff *skb)
+{
+	unsigned long flags;
+	size_t head = skb_shinfo(skb)->ubuf.desc;
+	struct vhost_virtqueue *vq;
+
+	vq = (struct vhost_virtqueue *)container_of(
+					skb_shinfo(skb)->ubuf.callback,
+					struct vhost_virtqueue, callback);
+	if (vq) {
+		spin_lock_irqsave(&vq->zerocopy_lock, flags);
+		vq->heads[vq->upend_cnt].id = head;
+		++vq->upend_cnt;
+		spin_unlock_irqrestore(&vq->zerocopy_lock, flags);
+		atomic_dec(&vq->refcnt);
+	}
+}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 073d06a..42d283a 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -108,6 +108,11 @@ struct vhost_virtqueue {
 	/* Log write descriptors */
 	void __user *log_base;
 	struct vhost_log *log;
+	/* vhost zerocopy */
+	atomic_t refcnt; /* num of outstanding DMAs */
+	spinlock_t zerocopy_lock;
+	int upend_cnt; /* num of buffers DMA has done, not notify guest yet */
+	void (*callback)(struct sk_buff *skb); /* notify guest DMA done */
 };
 
 struct vhost_dev {
@@ -154,6 +159,8 @@ bool vhost_enable_notify(struct vhost_virtqueue *);
 
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
 		    unsigned int log_num, u64 len);
+void vhost_zerocopy_callback(struct sk_buff *skb);
+void vhost_zerocopy_add_used_and_signal(struct vhost_virtqueue *vq);
 
 #define vq_err(vq, fmt, ...) do {                                  \
 		pr_debug(pr_fmt(fmt), ##__VA_ARGS__);       \

^ permalink raw reply related

* [RFC PATCH V2 5/5] Add TX zero copy in macvtap
From: Shirley Ma @ 2010-12-10 10:13 UTC (permalink / raw)
  To: Avi Kivity, Arnd Bergmann, mst; +Cc: xiaohui.xin, netdev, kvm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 6174 bytes --]

Only when buffer size is greater than GOODCOPY_LEN (128), macvtap enables zero-copy.

Signed-off-by: Shirley Ma <xma@us.ibm.com>
---

 drivers/net/macvtap.c |  128 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 116 insertions(+), 12 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 4256727..2ec9692 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -60,6 +60,7 @@ static struct proto macvtap_proto = {
  */
 static dev_t macvtap_major;
 #define MACVTAP_NUM_DEVS 65536
+#define GOODCOPY_LEN  (L1_CACHE_BYTES < 128 ? 128 : L1_CACHE_BYTES)
 static struct class *macvtap_class;
 static struct cdev macvtap_cdev;
 
@@ -338,6 +339,7 @@ static int macvtap_open(struct inode *inode, struct file *file)
 {
 	struct net *net = current->nsproxy->net_ns;
 	struct net_device *dev = dev_get_by_index(net, iminor(inode));
+	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct macvtap_queue *q;
 	int err;
 
@@ -367,6 +369,16 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
 	q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
 
+	/*
+	 * so far only VM uses macvtap, enable zero copy between guest
+	 * kernel and host kernel when lower device supports high memory
+	 * DMA
+	 */
+	if (vlan) {
+		if (vlan->lowerdev->features & NETIF_F_ZEROCOPY)
+			sock_set_flag(&q->sk, SOCK_ZEROCOPY);
+	}
+
 	err = macvtap_set_queue(dev, file, q);
 	if (err)
 		sock_put(&q->sk);
@@ -431,6 +443,80 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
 	return skb;
 }
 
+/* set skb frags from iovec, this can move to core network code for reuse */
+static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
+				  int offset, size_t count)
+{
+	int len = iov_length(from, count) - offset;
+	int copy = skb_headlen(skb);
+	int size, offset1 = 0;
+	int i = 0;
+	skb_frag_t *f;
+
+	/* Skip over from offset */
+	while (offset >= from->iov_len) {
+		offset -= from->iov_len;
+		++from;
+		--count;
+	}
+
+	/* copy up to skb headlen */
+	while (copy > 0) {
+		size = min_t(unsigned int, copy, from->iov_len - offset);
+		if (copy_from_user(skb->data + offset1, from->iov_base + offset,
+				   size))
+			return -EFAULT;
+		if (copy > size) {
+			++from;
+			--count;
+		}
+		copy -= size;
+		offset1 += size;
+		offset = 0;
+	}
+
+	if (len == offset1)
+		return 0;
+
+	while (count--) {
+		struct page *page[MAX_SKB_FRAGS];
+		int num_pages;
+		unsigned long base;
+
+		len = from->iov_len - offset1;
+		if (!len) {
+			offset1 = 0;
+			++from;
+			continue;
+		}
+		base = (unsigned long)from->iov_base + offset1;
+		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
+		num_pages = get_user_pages_fast(base, size, 0, &page[i]);
+		if ((num_pages != size) ||
+		    (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
+			/* put_page is in skb free */
+			return -EFAULT;
+		while (len) {
+			f = &skb_shinfo(skb)->frags[i];
+			f->page = page[i];
+			f->page_offset = base & ~PAGE_MASK;
+			f->size = min_t(int, len, PAGE_SIZE - f->page_offset);
+			skb->data_len += f->size;
+			skb->len += f->size;
+			skb->truesize += f->size;
+			skb_shinfo(skb)->nr_frags++;
+			/* increase sk_wmem_alloc */
+			atomic_add(f->size, &skb->sk->sk_wmem_alloc);
+			base += f->size;
+			len -= f->size;
+			i++;
+		}
+		offset1 = 0;
+		++from;
+	}
+	return 0;	
+}
+
 /*
  * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should
  * be shared with the tun/tap driver.
@@ -514,17 +600,19 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
 
 
 /* Get packet from user space buffer */
-static ssize_t macvtap_get_user(struct macvtap_queue *q,
-				const struct iovec *iv, size_t count,
-				int noblock)
+static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+				const struct iovec *iv, unsigned long total_len,
+				size_t count, int noblock)
 {
 	struct sk_buff *skb;
 	struct macvlan_dev *vlan;
-	size_t len = count;
+	unsigned long len = total_len;
 	int err;
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int vnet_hdr_len = 0;
+	int copylen, zerocopy;
 
+	zerocopy = sock_flag(&q->sk, SOCK_ZEROCOPY) && (len > GOODCOPY_LEN); 
 	if (q->flags & IFF_VNET_HDR) {
 		vnet_hdr_len = q->vnet_hdr_sz;
 
@@ -550,12 +638,28 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
 	if (unlikely(len < ETH_HLEN))
 		goto err;
 
-	skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len,
-				noblock, &err);
+	if (zerocopy)
+		copylen = vnet_hdr.hdr_len;
+	else
+		copylen = len;
+	
+	skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen,
+				vnet_hdr.hdr_len, noblock, &err);
 	if (!skb)
 		goto err;
-
-	err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len);
+		
+	if (zerocopy) 
+		err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
+	else
+		err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
+						   len);
+	if (sock_flag(&q->sk, SOCK_ZEROCOPY)) {
+		struct skb_ubuf_info pend =
+					(struct skb_ubuf_info *)m->msg_control;
+
+		skb_shinfo(skb)->ubuf.callback = pend.callback;
+		skb_shinfo(skb)->ubuf.desc = pend.desc;
+	}
 	if (err)
 		goto err_kfree;
 
@@ -577,7 +681,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
 		kfree_skb(skb);
 	rcu_read_unlock_bh();
 
-	return count;
+	return total_len;
 
 err_kfree:
 	kfree_skb(skb);
@@ -599,8 +703,8 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
 	ssize_t result = -ENOLINK;
 	struct macvtap_queue *q = file->private_data;
 
-	result = macvtap_get_user(q, iv, iov_length(iv, count),
-			      file->f_flags & O_NONBLOCK);
+	result = macvtap_get_user(q, NULL, iv, iov_length(iv, count), count,
+			 	  file->f_flags & O_NONBLOCK);
 	return result;
 }
 
@@ -813,7 +917,7 @@ static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
 			   struct msghdr *m, size_t total_len)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
-	return macvtap_get_user(q, m->msg_iov, total_len,
+	return macvtap_get_user(q, m, m->msg_iov, total_len, m->msg_iovlen,
 			    m->msg_flags & MSG_DONTWAIT);
 }
 


[-- Attachment #2: macvtap-zero.patch --]
[-- Type: text/x-patch, Size: 6039 bytes --]

 drivers/net/macvtap.c |  128 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 116 insertions(+), 12 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 4256727..2ec9692 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -60,6 +60,7 @@ static struct proto macvtap_proto = {
  */
 static dev_t macvtap_major;
 #define MACVTAP_NUM_DEVS 65536
+#define GOODCOPY_LEN  (L1_CACHE_BYTES < 128 ? 128 : L1_CACHE_BYTES)
 static struct class *macvtap_class;
 static struct cdev macvtap_cdev;
 
@@ -338,6 +339,7 @@ static int macvtap_open(struct inode *inode, struct file *file)
 {
 	struct net *net = current->nsproxy->net_ns;
 	struct net_device *dev = dev_get_by_index(net, iminor(inode));
+	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct macvtap_queue *q;
 	int err;
 
@@ -367,6 +369,16 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
 	q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
 
+	/*
+	 * so far only VM uses macvtap, enable zero copy between guest
+	 * kernel and host kernel when lower device supports high memory
+	 * DMA
+	 */
+	if (vlan) {
+		if (vlan->lowerdev->features & NETIF_F_ZEROCOPY)
+			sock_set_flag(&q->sk, SOCK_ZEROCOPY);
+	}
+
 	err = macvtap_set_queue(dev, file, q);
 	if (err)
 		sock_put(&q->sk);
@@ -431,6 +443,80 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
 	return skb;
 }
 
+/* set skb frags from iovec, this can move to core network code for reuse */
+static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
+				  int offset, size_t count)
+{
+	int len = iov_length(from, count) - offset;
+	int copy = skb_headlen(skb);
+	int size, offset1 = 0;
+	int i = 0;
+	skb_frag_t *f;
+
+	/* Skip over from offset */
+	while (offset >= from->iov_len) {
+		offset -= from->iov_len;
+		++from;
+		--count;
+	}
+
+	/* copy up to skb headlen */
+	while (copy > 0) {
+		size = min_t(unsigned int, copy, from->iov_len - offset);
+		if (copy_from_user(skb->data + offset1, from->iov_base + offset,
+				   size))
+			return -EFAULT;
+		if (copy > size) {
+			++from;
+			--count;
+		}
+		copy -= size;
+		offset1 += size;
+		offset = 0;
+	}
+
+	if (len == offset1)
+		return 0;
+
+	while (count--) {
+		struct page *page[MAX_SKB_FRAGS];
+		int num_pages;
+		unsigned long base;
+
+		len = from->iov_len - offset1;
+		if (!len) {
+			offset1 = 0;
+			++from;
+			continue;
+		}
+		base = (unsigned long)from->iov_base + offset1;
+		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
+		num_pages = get_user_pages_fast(base, size, 0, &page[i]);
+		if ((num_pages != size) ||
+		    (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
+			/* put_page is in skb free */
+			return -EFAULT;
+		while (len) {
+			f = &skb_shinfo(skb)->frags[i];
+			f->page = page[i];
+			f->page_offset = base & ~PAGE_MASK;
+			f->size = min_t(int, len, PAGE_SIZE - f->page_offset);
+			skb->data_len += f->size;
+			skb->len += f->size;
+			skb->truesize += f->size;
+			skb_shinfo(skb)->nr_frags++;
+			/* increase sk_wmem_alloc */
+			atomic_add(f->size, &skb->sk->sk_wmem_alloc);
+			base += f->size;
+			len -= f->size;
+			i++;
+		}
+		offset1 = 0;
+		++from;
+	}
+	return 0;	
+}
+
 /*
  * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should
  * be shared with the tun/tap driver.
@@ -514,17 +600,19 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
 
 
 /* Get packet from user space buffer */
-static ssize_t macvtap_get_user(struct macvtap_queue *q,
-				const struct iovec *iv, size_t count,
-				int noblock)
+static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+				const struct iovec *iv, unsigned long total_len,
+				size_t count, int noblock)
 {
 	struct sk_buff *skb;
 	struct macvlan_dev *vlan;
-	size_t len = count;
+	unsigned long len = total_len;
 	int err;
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int vnet_hdr_len = 0;
+	int copylen, zerocopy;
 
+	zerocopy = sock_flag(&q->sk, SOCK_ZEROCOPY) && (len > GOODCOPY_LEN); 
 	if (q->flags & IFF_VNET_HDR) {
 		vnet_hdr_len = q->vnet_hdr_sz;
 
@@ -550,12 +638,28 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
 	if (unlikely(len < ETH_HLEN))
 		goto err;
 
-	skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len,
-				noblock, &err);
+	if (zerocopy)
+		copylen = vnet_hdr.hdr_len;
+	else
+		copylen = len;
+	
+	skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen,
+				vnet_hdr.hdr_len, noblock, &err);
 	if (!skb)
 		goto err;
-
-	err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len);
+		
+	if (zerocopy) 
+		err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
+	else
+		err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
+						   len);
+	if (sock_flag(&q->sk, SOCK_ZEROCOPY)) {
+		struct skb_ubuf_info pend =
+					(struct skb_ubuf_info *)m->msg_control;
+
+		skb_shinfo(skb)->ubuf.callback = pend.callback;
+		skb_shinfo(skb)->ubuf.desc = pend.desc;
+	}
 	if (err)
 		goto err_kfree;
 
@@ -577,7 +681,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
 		kfree_skb(skb);
 	rcu_read_unlock_bh();
 
-	return count;
+	return total_len;
 
 err_kfree:
 	kfree_skb(skb);
@@ -599,8 +703,8 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
 	ssize_t result = -ENOLINK;
 	struct macvtap_queue *q = file->private_data;
 
-	result = macvtap_get_user(q, iv, iov_length(iv, count),
-			      file->f_flags & O_NONBLOCK);
+	result = macvtap_get_user(q, NULL, iv, iov_length(iv, count), count,
+			 	  file->f_flags & O_NONBLOCK);
 	return result;
 }
 
@@ -813,7 +917,7 @@ static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
 			   struct msghdr *m, size_t total_len)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
-	return macvtap_get_user(q, m->msg_iov, total_len,
+	return macvtap_get_user(q, m, m->msg_iov, total_len, m->msg_iovlen,
 			    m->msg_flags & MSG_DONTWAIT);
 }
 

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox