Netdev List
 help / color / mirror / Atom feed
* [net-next-2.6 PATCH 1/3] ixgbe: Remove unneeded register writes in VF VLAN setup
From: Jeff Kirsher @ 2010-05-06  5:57 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Greg Rose, Jeff Kirsher

From: Greg Rose <gregory.v.rose@intel.com>

The driver is unnecessarily writing values to VLAN control registers.
These writes already done elsewhere and are superfluous here.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/ixgbe/ixgbe_sriov.c |   11 -----------
 1 files changed, 0 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_sriov.c b/drivers/net/ixgbe/ixgbe_sriov.c
index 221b2ca..c4e5150 100644
--- a/drivers/net/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ixgbe/ixgbe_sriov.c
@@ -98,17 +98,6 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter)
 
 int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf)
 {
-	u32 ctrl;
-
-	/* Check if global VLAN already set, if not set it */
-	ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL);
-	if (!(ctrl & IXGBE_VLNCTRL_VFE)) {
-		/* enable VLAN tag insert/strip */
-		ctrl |= IXGBE_VLNCTRL_VFE;
-		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_VLNCTRL, ctrl);
-	}
-
 	return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add);
 }
 


^ permalink raw reply related

* Re: [PATCH/RFC] cxgb4: Add MAINTAINERS info
From: Or Gerlitz @ 2010-05-06  5:57 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma, netdev, swise, dm
In-Reply-To: <adawrvijhpq.fsf_-_@roland-alpha.cisco.com>

Roland Dreier wrote:
>  +CXGB4 ETHERNET DRIVER (CXGB4)
>   
not sure who's the butterfly that caused this, but this was somehow 
committed as  "CXGB4 ETHERNET DRIVER (CXGB3)" and same goes for the IW_ 
piece

Or.


^ permalink raw reply

* [net-next-2.6 V5 PATCH 3/3] Add SR-IOV support to enic (please don't apply this patch)
From: Scott Feldman @ 2010-05-06  4:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd
In-Reply-To: <20100506044200.29677.20604.stgit@savbu-pc100.cisco.com>

From: Scott Feldman <scofeldm@cisco.com>

This patch is to illustrate how port-profiles will be assigned to VFs in
a compliant SR-IOV enic device.  Here the VF devices are dynamic enics and
the PF device is a "static" enic device.  Only the PF device resonds to
ndo_vf_{set|get}_port_profile to set/get the port-profile on a VF.  It's
not possible to set a port-profile on a PF since PFs have an immutable port
assignment on the external switch, established when the PF was provisioned.

The same driver (enic) is used for both PFs and VFs devices.  The PF
enables N number of VFs based on a PF configuration parameter assigned
when the PF is provisioned.

While this patch is functionally complete, we (Cisco) need to do more testing
before we can cliam full SR-IOV support in Linux, so we ask that this patch
not be applied at this time.  it is provide with this patch set for
illustrative purposes only to show how the port-profile netlink API would
be used for a SR-IOV compliant device that supports port-profiles.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 drivers/net/enic/enic.h      |    5 +-
 drivers/net/enic/enic_main.c |   96 +++++++++++++++++++++++++++++++-----------
 drivers/net/enic/enic_res.c  |    3 +
 drivers/net/enic/vnic_dev.c  |   12 +++--
 drivers/net/enic/vnic_dev.h  |    6 +--
 drivers/net/enic/vnic_enet.h |    1 
 6 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index 718033f..4d00e5e 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -34,7 +34,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"1.3.1.1-pp"
+#define DRV_VERSION		"1.3.1.1-sr-iov"
 #define DRV_COPYRIGHT		"Copyright 2008-2009 Cisco Systems, Inc"
 #define PFX			DRV_NAME ": "
 
@@ -95,7 +95,8 @@ struct enic {
 	u32 port_mtu;
 	u32 rx_coalesce_usecs;
 	u32 tx_coalesce_usecs;
-	struct ifla_vf_port_profile pp;
+	struct ifla_vf_port_profile *pp;
+	unsigned int vf_count;
 
 	/* work queue cache line section */
 	____cacheline_aligned struct vnic_wq wq[ENIC_WQ_MAX];
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 8e5e46b..1488431 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -941,35 +941,36 @@ static void enic_tx_timeout(struct net_device *netdev)
 	schedule_work(&enic->reset);
 }
 
-static int enic_vnic_dev_deinit(struct enic *enic)
+static int enic_vnic_dev_deinit(struct enic *enic, int vf)
 {
 	int err;
 
 	spin_lock(&enic->devcmd_lock);
-	err = vnic_dev_deinit(enic->vdev);
+	err = vnic_dev_deinit(enic->vdev, vf);
 	spin_unlock(&enic->devcmd_lock);
 
 	return err;
 }
 
-static int enic_dev_init_prov(struct enic *enic, struct vic_provinfo *vp)
+static int enic_dev_init_prov(struct enic *enic, int vf,
+	struct vic_provinfo *vp)
 {
 	int err;
 
 	spin_lock(&enic->devcmd_lock);
-	err = vnic_dev_init_prov(enic->vdev,
+	err = vnic_dev_init_prov(enic->vdev, vf,
 		(u8 *)vp, vic_provinfo_size(vp));
 	spin_unlock(&enic->devcmd_lock);
 
 	return err;
 }
 
-static int enic_dev_init_done(struct enic *enic, int *done, int *error)
+static int enic_dev_init_done(struct enic *enic, int vf, int *done, int *error)
 {
 	int err;
 
 	spin_lock(&enic->devcmd_lock);
-	err = vnic_dev_init_done(enic->vdev, done, error);
+	err = vnic_dev_init_done(enic->vdev, vf, done, error);
 	spin_unlock(&enic->devcmd_lock);
 
 	return err;
@@ -993,23 +994,22 @@ static int enic_set_vf_port_profile(struct net_device *netdev, int vf,
 	struct enic *enic = netdev_priv(netdev);
 	struct vic_provinfo *vp;
 	u8 oui[3] = VIC_PROVINFO_CISCO_OUI;
-	u8 *mac = ivp->mac;
 	int err;
 
-	if (!enic_is_dynamic(enic))
+	if (enic_is_dynamic(enic))
 		return -EOPNOTSUPP;
 
-	memset(&enic->pp, 0, sizeof(enic->pp));
+	if (vf < 0 || vf >= enic->vf_count)
+		return -EOPNOTSUPP;
+
+	memset(&enic->pp[vf], 0, sizeof(enic->pp[vf]));
 
-	enic_vnic_dev_deinit(enic);
+	enic_vnic_dev_deinit(enic, vf);
 
 	if (strlen(ivp->port_profile) == 0)
 		return 0;
 
-	if (is_zero_ether_addr(mac))
-		mac = netdev->dev_addr;
-
-	if (!is_valid_ether_addr(mac))
+	if (!is_valid_ether_addr(ipv->mac))
 		return -EADDRNOTAVAIL;
 
 	vp = vic_provinfo_alloc(GFP_KERNEL, oui, VIC_PROVINFO_LINUX_TYPE);
@@ -1019,7 +1019,7 @@ static int enic_set_vf_port_profile(struct net_device *netdev, int vf,
 	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR,
 		IFLA_VF_PORT_PROFILE_MAX, ivp->port_profile);
 	vic_provinfo_add_tlv(vp, VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR,
-		ETH_ALEN, mac);
+		ETH_ALEN, ivp->mac);
 	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_HOST_UUID_STR,
 		IFLA_VF_UUID_MAX, ivp->host_uuid);
 	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_UUID_STR,
@@ -1027,11 +1027,11 @@ static int enic_set_vf_port_profile(struct net_device *netdev, int vf,
 	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_NAME_STR,
 		IFLA_VF_CLIENT_NAME_MAX, ivp->client_name);
 
-	err = enic_dev_init_prov(enic, vp);
+	err = enic_dev_init_prov(enic, vf, vp);
 	if (err)
 		goto err_out;
 
-	memcpy(&enic->pp, ivp, sizeof(enic->pp));
+	memcpy(&enic->pp[vf], ivp, sizeof(enic->pp[vf]));
 
 err_out:
 	vic_provinfo_free(vp);
@@ -1045,23 +1045,26 @@ static int enic_get_vf_port_profile(struct net_device *netdev, int vf,
 	struct enic *enic = netdev_priv(netdev);
 	int err, error, done;
 
-	if (!enic_is_dynamic(enic))
+	if (enic_is_dynamic(enic))
+		return -EOPNOTSUPP;
+
+	if (vf < 0 || vf >= enic->vf_count)
 		return -EOPNOTSUPP;
 
-	enic->pp.status = IFLA_VF_PORT_PROFILE_STATUS_UNKNOWN;
+	enic->pp[vf].status = IFLA_VF_PORT_PROFILE_STATUS_UNKNOWN;
 
-	err = enic_dev_init_done(enic, &done, &error);
+	err = enic_dev_init_done(enic, vf, &done, &error);
 
 	if (err || error)
-		enic->pp.status = IFLA_VF_PORT_PROFILE_STATUS_ERROR;
+		enic->pp[vf].status = IFLA_VF_PORT_PROFILE_STATUS_ERROR;
 
 	if (!done)
-		enic->pp.status = IFLA_VF_PORT_PROFILE_STATUS_INPROGRESS;
+		enic->pp[vf].status = IFLA_VF_PORT_PROFILE_STATUS_INPROGRESS;
 
 	if (!error)
-		enic->pp.status = IFLA_VF_PORT_PROFILE_STATUS_SUCCESS;
+		enic->pp[vf].status = IFLA_VF_PORT_PROFILE_STATUS_SUCCESS;
 
-	memcpy(ivp, &enic->pp, sizeof(enic->pp));
+	memcpy(ivp, &enic->pp[vf], sizeof(enic->pp[vf]));
 
 	return 0;
 }
@@ -2023,6 +2026,37 @@ err_out_free_vnic_resources:
 	return err;
 }
 
+static int enic_enable_vfs(struct enic *enic)
+{
+	int err;
+
+	enic->vf_count = enic->config.vf_count;
+
+	enic->pp = kzalloc(enic->vf_count  *
+		sizeof(struct ifla_vf_port_profile), GFP_KERNEL);
+	if (!enic->pp)
+		return -ENOMEM;
+
+	if (enic->pdev->is_physfn && enic->vf_count > 0) {
+
+		err = pci_enable_sriov(enic->pdev, enic->vf_count);
+		if (err) {
+			kfree(enic->pp);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void enic_disable_vfs(struct enic *enic)
+{
+       if (enic->pdev->is_physfn && enic->vf_count > 0)
+               pci_disable_sriov(enic->pdev);
+       kfree(enic->pp);
+       enic->vf_count = 0;
+}
+
 static void enic_iounmap(struct enic *enic)
 {
 	unsigned int i;
@@ -2174,6 +2208,13 @@ static int __devinit enic_probe(struct pci_dev *pdev,
 		goto err_out_dev_close;
 	}
 
+	err = enic_enable_vfs(enic);
+	if (err) {
+		printk(KERN_ERR PFX
+			"SR-IOV VF enable failed, aborting.\n");
+		goto err_out_dev_deinit;
+	}
+
 	/* Setup notification timer, HW reset task, and locks
 	 */
 
@@ -2198,7 +2239,7 @@ static int __devinit enic_probe(struct pci_dev *pdev,
 	if (err) {
 		printk(KERN_ERR PFX
 			"Invalid MAC address, aborting.\n");
-		goto err_out_dev_deinit;
+		goto err_out_disable_vfs;
 	}
 
 	enic->tx_coalesce_usecs = enic->config.intr_timer_usec;
@@ -2234,11 +2275,13 @@ static int __devinit enic_probe(struct pci_dev *pdev,
 	if (err) {
 		printk(KERN_ERR PFX
 			"Cannot register net device, aborting.\n");
-		goto err_out_dev_deinit;
+		goto err_out_disable_vfs;
 	}
 
 	return 0;
 
+err_out_disable_vfs:
+	enic_disable_vfs(enic);
 err_out_dev_deinit:
 	enic_dev_deinit(enic);
 err_out_dev_close:
@@ -2267,6 +2310,7 @@ static void __devexit enic_remove(struct pci_dev *pdev)
 
 		flush_scheduled_work();
 		unregister_netdev(netdev);
+		enic_disable_vfs(enic);
 		enic_dev_deinit(enic);
 		vnic_dev_close(enic->vdev);
 		vnic_dev_unregister(enic->vdev);
diff --git a/drivers/net/enic/enic_res.c b/drivers/net/enic/enic_res.c
index 02839bf..dfb37f2 100644
--- a/drivers/net/enic/enic_res.c
+++ b/drivers/net/enic/enic_res.c
@@ -69,6 +69,7 @@ int enic_get_vnic_config(struct enic *enic)
 	GET_CONFIG(intr_timer_type);
 	GET_CONFIG(intr_mode);
 	GET_CONFIG(intr_timer_usec);
+	GET_CONFIG(vf_count);
 
 	c->wq_desc_count =
 		min_t(u32, ENIC_MAX_WQ_DESCS,
@@ -99,6 +100,8 @@ int enic_get_vnic_config(struct enic *enic)
 		c->mtu, ENIC_SETTING(enic, TXCSUM),
 		ENIC_SETTING(enic, RXCSUM), ENIC_SETTING(enic, TSO),
 		ENIC_SETTING(enic, LRO), c->intr_timer_usec);
+	if (c->vf_count)
+		printk(KERN_INFO PFX "vNIC SR-IOV VF count %d\n", c->vf_count);
 
 	return 0;
 }
diff --git a/drivers/net/enic/vnic_dev.c b/drivers/net/enic/vnic_dev.c
index e351b0f..261d5f0 100644
--- a/drivers/net/enic/vnic_dev.c
+++ b/drivers/net/enic/vnic_dev.c
@@ -682,9 +682,9 @@ int vnic_dev_init(struct vnic_dev *vdev, int arg)
 	return r;
 }
 
-int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err)
+int vnic_dev_init_done(struct vnic_dev *vdev, u16 vf, int *done, int *err)
 {
-	u64 a0 = 0, a1 = 0;
+	u64 a0 = vf, a1 = 0;
 	int wait = 1000;
 	int ret;
 
@@ -701,9 +701,9 @@ int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err)
 	return 0;
 }
 
-int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len)
+int vnic_dev_init_prov(struct vnic_dev *vdev, u16 vf, u8 *buf, u32 len)
 {
-	u64 a0, a1 = len;
+	u64 a0, a1 = (u64)len | ((u64)vf << 32);
 	int wait = 1000;
 	u64 prov_pa;
 	void *prov_buf;
@@ -724,9 +724,9 @@ int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len)
 	return ret;
 }
 
-int vnic_dev_deinit(struct vnic_dev *vdev)
+int vnic_dev_deinit(struct vnic_dev *vdev, u16 vf)
 {
-	u64 a0 = 0, a1 = 0;
+	u64 a0 = vf, a1 = 0;
 	int wait = 1000;
 
 	return vnic_dev_cmd(vdev, CMD_DEINIT, &a0, &a1, wait);
diff --git a/drivers/net/enic/vnic_dev.h b/drivers/net/enic/vnic_dev.h
index 27f5a5a..d508187 100644
--- a/drivers/net/enic/vnic_dev.h
+++ b/drivers/net/enic/vnic_dev.h
@@ -124,9 +124,9 @@ int vnic_dev_disable(struct vnic_dev *vdev);
 int vnic_dev_open(struct vnic_dev *vdev, int arg);
 int vnic_dev_open_done(struct vnic_dev *vdev, int *done);
 int vnic_dev_init(struct vnic_dev *vdev, int arg);
-int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err);
-int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len);
-int vnic_dev_deinit(struct vnic_dev *vdev);
+int vnic_dev_init_done(struct vnic_dev *vdev, u16 vf, int *done, int *err);
+int vnic_dev_init_prov(struct vnic_dev *vdev, u16 vf, u8 *buf, u32 len);
+int vnic_dev_deinit(struct vnic_dev *vdev, u16 vf);
 int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg);
 int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done);
 void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
diff --git a/drivers/net/enic/vnic_enet.h b/drivers/net/enic/vnic_enet.h
index 8eeb675..466a7b3 100644
--- a/drivers/net/enic/vnic_enet.h
+++ b/drivers/net/enic/vnic_enet.h
@@ -35,6 +35,7 @@ struct vnic_enet_config {
 	u8 intr_mode;
 	char devname[16];
 	u32 intr_timer_usec;
+	u16 vf_count;
 };
 
 #define VENETF_TSO		0x1	/* TSO enabled */


^ permalink raw reply related

* [net-next-2.6 V5 PATCH 2/3] Add ndo_{set|get}_vf_port_profile op support for enic dynamic vnics
From: Scott Feldman @ 2010-05-06  4:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd
In-Reply-To: <20100506044200.29677.20604.stgit@savbu-pc100.cisco.com>

From: Scott Feldman <scofeldm@cisco.com>

Add enic ndo_{set|get}_vf_port_profile ops to support setting/getting
port-profile for enic dynamic devices.  Enic dynamic devices are just like
normal enic eth devices except dynamic enics require an extra configuration
step to assign a port-profile identifier to the interface before the
interface is useable.  Once a port-profile is assigned, link comes up on the
interface and is ready for I/O.  The port-profile is used to configure the
network port assigned to the interface.  The network port configuration
includes VLAN membership, QoS policies, and port security settings typical
of a data center network.

A dynamic enic is assigned a default random mac address.  If no mac address
parameter is specified in the ndo_set_vf_port_profile op, the default random
mac address is used when assigning the port-profile.  Otherwise the mac
address specified in the op is used.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 drivers/net/enic/Makefile    |    2 
 drivers/net/enic/enic.h      |    3 -
 drivers/net/enic/enic_main.c |  200 +++++++++++++++++++++++++++++++++++++-----
 drivers/net/enic/vnic_dev.c  |   50 +++++++++++
 drivers/net/enic/vnic_dev.h  |    3 +
 drivers/net/enic/vnic_vic.c  |   73 +++++++++++++++
 drivers/net/enic/vnic_vic.h  |   59 ++++++++++++
 7 files changed, 363 insertions(+), 27 deletions(-)

diff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile
index 391c3bc..e7b6c31 100644
--- a/drivers/net/enic/Makefile
+++ b/drivers/net/enic/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_ENIC) := enic.o
 
 enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \
-	enic_res.o vnic_dev.o vnic_rq.o
+	enic_res.o vnic_dev.o vnic_rq.o vnic_vic.o
 
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index 5fa56f1..718033f 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -34,7 +34,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"1.3.1.1"
+#define DRV_VERSION		"1.3.1.1-pp"
 #define DRV_COPYRIGHT		"Copyright 2008-2009 Cisco Systems, Inc"
 #define PFX			DRV_NAME ": "
 
@@ -95,6 +95,7 @@ struct enic {
 	u32 port_mtu;
 	u32 rx_coalesce_usecs;
 	u32 tx_coalesce_usecs;
+	struct ifla_vf_port_profile pp;
 
 	/* work queue cache line section */
 	____cacheline_aligned struct vnic_wq wq[ENIC_WQ_MAX];
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 1232887..8e5e46b 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -29,6 +29,7 @@
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/if_link.h>
 #include <linux/ethtool.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -40,6 +41,7 @@
 #include "vnic_dev.h"
 #include "vnic_intr.h"
 #include "vnic_stats.h"
+#include "vnic_vic.h"
 #include "enic_res.h"
 #include "enic.h"
 
@@ -49,10 +51,12 @@
 #define ENIC_DESC_MAX_SPLITS		(MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1)
 
 #define PCI_DEVICE_ID_CISCO_VIC_ENET         0x0043  /* ethernet vnic */
+#define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN     0x0044  /* enet dynamic vnic */
 
 /* Supported devices */
 static DEFINE_PCI_DEVICE_TABLE(enic_id_table) = {
 	{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) },
+	{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) },
 	{ 0, }	/* end of table */
 };
 
@@ -113,6 +117,11 @@ static const struct enic_stat enic_rx_stats[] = {
 static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
 static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);
 
+static int enic_is_dynamic(struct enic *enic)
+{
+	return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
+}
+
 static int enic_get_settings(struct net_device *netdev,
 	struct ethtool_cmd *ecmd)
 {
@@ -810,14 +819,24 @@ static void enic_reset_mcaddrs(struct enic *enic)
 
 static int enic_set_mac_addr(struct net_device *netdev, char *addr)
 {
-	if (!is_valid_ether_addr(addr))
-		return -EADDRNOTAVAIL;
+	struct enic *enic = netdev_priv(netdev);
 
-	memcpy(netdev->dev_addr, addr, netdev->addr_len);
+	if (enic_is_dynamic(enic)) {
+		random_ether_addr(netdev->dev_addr);
+	} else {
+		if (!is_valid_ether_addr(addr))
+			return -EADDRNOTAVAIL;
+		memcpy(netdev->dev_addr, addr, netdev->addr_len);
+	}
 
 	return 0;
 }
 
+static int enic_set_mac_address(struct net_device *netdev, void *p)
+{
+	return -EOPNOTSUPP;
+}
+
 /* netif_tx_lock held, BHs disabled */
 static void enic_set_multicast_list(struct net_device *netdev)
 {
@@ -922,6 +941,131 @@ static void enic_tx_timeout(struct net_device *netdev)
 	schedule_work(&enic->reset);
 }
 
+static int enic_vnic_dev_deinit(struct enic *enic)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_deinit(enic->vdev);
+	spin_unlock(&enic->devcmd_lock);
+
+	return err;
+}
+
+static int enic_dev_init_prov(struct enic *enic, struct vic_provinfo *vp)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_init_prov(enic->vdev,
+		(u8 *)vp, vic_provinfo_size(vp));
+	spin_unlock(&enic->devcmd_lock);
+
+	return err;
+}
+
+static int enic_dev_init_done(struct enic *enic, int *done, int *error)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_init_done(enic->vdev, done, error);
+	spin_unlock(&enic->devcmd_lock);
+
+	return err;
+}
+
+static int enic_provinfo_add_tlv_str(struct vic_provinfo *vp, u16 type,
+	u16 max_length, char *str)
+{
+	if (!str)
+		return 0;
+
+	if (strlen(str) + 1 > max_length)
+		return 0;
+
+	return vic_provinfo_add_tlv(vp, type, strlen(str) + 1, str);
+}
+
+static int enic_set_vf_port_profile(struct net_device *netdev, int vf,
+	struct ifla_vf_port_profile *ivp)
+{
+	struct enic *enic = netdev_priv(netdev);
+	struct vic_provinfo *vp;
+	u8 oui[3] = VIC_PROVINFO_CISCO_OUI;
+	u8 *mac = ivp->mac;
+	int err;
+
+	if (!enic_is_dynamic(enic))
+		return -EOPNOTSUPP;
+
+	memset(&enic->pp, 0, sizeof(enic->pp));
+
+	enic_vnic_dev_deinit(enic);
+
+	if (strlen(ivp->port_profile) == 0)
+		return 0;
+
+	if (is_zero_ether_addr(mac))
+		mac = netdev->dev_addr;
+
+	if (!is_valid_ether_addr(mac))
+		return -EADDRNOTAVAIL;
+
+	vp = vic_provinfo_alloc(GFP_KERNEL, oui, VIC_PROVINFO_LINUX_TYPE);
+	if (!vp)
+		return -ENOMEM;
+
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR,
+		IFLA_VF_PORT_PROFILE_MAX, ivp->port_profile);
+	vic_provinfo_add_tlv(vp, VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR,
+		ETH_ALEN, mac);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_HOST_UUID_STR,
+		IFLA_VF_UUID_MAX, ivp->host_uuid);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_UUID_STR,
+		IFLA_VF_UUID_MAX, ivp->client_uuid);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_NAME_STR,
+		IFLA_VF_CLIENT_NAME_MAX, ivp->client_name);
+
+	err = enic_dev_init_prov(enic, vp);
+	if (err)
+		goto err_out;
+
+	memcpy(&enic->pp, ivp, sizeof(enic->pp));
+
+err_out:
+	vic_provinfo_free(vp);
+
+	return err;
+}
+
+static int enic_get_vf_port_profile(struct net_device *netdev, int vf,
+	struct ifla_vf_port_profile *ivp)
+{
+	struct enic *enic = netdev_priv(netdev);
+	int err, error, done;
+
+	if (!enic_is_dynamic(enic))
+		return -EOPNOTSUPP;
+
+	enic->pp.status = IFLA_VF_PORT_PROFILE_STATUS_UNKNOWN;
+
+	err = enic_dev_init_done(enic, &done, &error);
+
+	if (err || error)
+		enic->pp.status = IFLA_VF_PORT_PROFILE_STATUS_ERROR;
+
+	if (!done)
+		enic->pp.status = IFLA_VF_PORT_PROFILE_STATUS_INPROGRESS;
+
+	if (!error)
+		enic->pp.status = IFLA_VF_PORT_PROFILE_STATUS_SUCCESS;
+
+	memcpy(ivp, &enic->pp, sizeof(enic->pp));
+
+	return 0;
+}
+
 static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
 {
 	struct enic *enic = vnic_dev_priv(rq->vdev);
@@ -1440,10 +1584,12 @@ static int enic_open(struct net_device *netdev)
 	for (i = 0; i < enic->rq_count; i++)
 		vnic_rq_enable(&enic->rq[i]);
 
-	spin_lock(&enic->devcmd_lock);
-	enic_add_station_addr(enic);
-	spin_unlock(&enic->devcmd_lock);
-	enic_set_multicast_list(netdev);
+	if (!enic_is_dynamic(enic)) {
+		spin_lock(&enic->devcmd_lock);
+		enic_add_station_addr(enic);
+		spin_unlock(&enic->devcmd_lock);
+		enic_set_multicast_list(netdev);
+	}
 
 	netif_wake_queue(netdev);
 	napi_enable(&enic->napi);
@@ -1775,20 +1921,22 @@ static void enic_clear_intr_mode(struct enic *enic)
 }
 
 static const struct net_device_ops enic_netdev_ops = {
-	.ndo_open		= enic_open,
-	.ndo_stop		= enic_stop,
-	.ndo_start_xmit		= enic_hard_start_xmit,
-	.ndo_get_stats		= enic_get_stats,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_set_multicast_list	= enic_set_multicast_list,
-	.ndo_change_mtu		= enic_change_mtu,
-	.ndo_vlan_rx_register	= enic_vlan_rx_register,
-	.ndo_vlan_rx_add_vid	= enic_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= enic_vlan_rx_kill_vid,
-	.ndo_tx_timeout		= enic_tx_timeout,
+	.ndo_open			= enic_open,
+	.ndo_stop			= enic_stop,
+	.ndo_start_xmit			= enic_hard_start_xmit,
+	.ndo_get_stats			= enic_get_stats,
+	.ndo_validate_addr		= eth_validate_addr,
+	.ndo_set_multicast_list		= enic_set_multicast_list,
+	.ndo_set_mac_address		= enic_set_mac_address,
+	.ndo_change_mtu			= enic_change_mtu,
+	.ndo_vlan_rx_register		= enic_vlan_rx_register,
+	.ndo_vlan_rx_add_vid		= enic_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid		= enic_vlan_rx_kill_vid,
+	.ndo_tx_timeout			= enic_tx_timeout,
+	.ndo_set_vf_port_profile	= enic_set_vf_port_profile,
+	.ndo_get_vf_port_profile	= enic_get_vf_port_profile,
 #ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= enic_poll_controller,
+	.ndo_poll_controller		= enic_poll_controller,
 #endif
 };
 
@@ -2010,11 +2158,13 @@ static int __devinit enic_probe(struct pci_dev *pdev,
 
 	netif_carrier_off(netdev);
 
-	err = vnic_dev_init(enic->vdev, 0);
-	if (err) {
-		printk(KERN_ERR PFX
-			"vNIC dev init failed, aborting.\n");
-		goto err_out_dev_close;
+	if (!enic_is_dynamic(enic)) {
+		err = vnic_dev_init(enic->vdev, 0);
+		if (err) {
+			printk(KERN_ERR PFX
+				"vNIC dev init failed, aborting.\n");
+			goto err_out_dev_close;
+		}
 	}
 
 	err = enic_dev_init(enic);
diff --git a/drivers/net/enic/vnic_dev.c b/drivers/net/enic/vnic_dev.c
index d43a9d4..e351b0f 100644
--- a/drivers/net/enic/vnic_dev.c
+++ b/drivers/net/enic/vnic_dev.c
@@ -682,6 +682,56 @@ int vnic_dev_init(struct vnic_dev *vdev, int arg)
 	return r;
 }
 
+int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	int ret;
+
+	*done = 0;
+
+	ret = vnic_dev_cmd(vdev, CMD_INIT_STATUS, &a0, &a1, wait);
+	if (ret)
+		return ret;
+
+	*done = (a0 == 0);
+
+	*err = (a0 == 0) ? a1 : 0;
+
+	return 0;
+}
+
+int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len)
+{
+	u64 a0, a1 = len;
+	int wait = 1000;
+	u64 prov_pa;
+	void *prov_buf;
+	int ret;
+
+	prov_buf = pci_alloc_consistent(vdev->pdev, len, &prov_pa);
+	if (!prov_buf)
+		return -ENOMEM;
+
+	memcpy(prov_buf, buf, len);
+
+	a0 = prov_pa;
+
+	ret = vnic_dev_cmd(vdev, CMD_INIT_PROV_INFO, &a0, &a1, wait);
+
+	pci_free_consistent(vdev->pdev, len, prov_buf, prov_pa);
+
+	return ret;
+}
+
+int vnic_dev_deinit(struct vnic_dev *vdev)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+
+	return vnic_dev_cmd(vdev, CMD_DEINIT, &a0, &a1, wait);
+}
+
 int vnic_dev_link_status(struct vnic_dev *vdev)
 {
 	if (vdev->linkstatus)
diff --git a/drivers/net/enic/vnic_dev.h b/drivers/net/enic/vnic_dev.h
index f5be640..27f5a5a 100644
--- a/drivers/net/enic/vnic_dev.h
+++ b/drivers/net/enic/vnic_dev.h
@@ -124,6 +124,9 @@ int vnic_dev_disable(struct vnic_dev *vdev);
 int vnic_dev_open(struct vnic_dev *vdev, int arg);
 int vnic_dev_open_done(struct vnic_dev *vdev, int *done);
 int vnic_dev_init(struct vnic_dev *vdev, int arg);
+int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err);
+int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len);
+int vnic_dev_deinit(struct vnic_dev *vdev);
 int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg);
 int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done);
 void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
diff --git a/drivers/net/enic/vnic_vic.c b/drivers/net/enic/vnic_vic.c
new file mode 100644
index 0000000..d769772
--- /dev/null
+++ b/drivers/net/enic/vnic_vic.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "vnic_vic.h"
+
+struct vic_provinfo *vic_provinfo_alloc(gfp_t flags, u8 *oui, u8 type)
+{
+	struct vic_provinfo *vp = kzalloc(VIC_PROVINFO_MAX_DATA, flags);
+
+	if (!vp || !oui)
+		return NULL;
+
+	memcpy(vp->oui, oui, sizeof(vp->oui));
+	vp->type = type;
+	vp->length = htonl(sizeof(vp->num_tlvs));
+
+	return vp;
+}
+
+void vic_provinfo_free(struct vic_provinfo *vp)
+{
+	kfree(vp);
+}
+
+int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
+	void *value)
+{
+	struct vic_provinfo_tlv *tlv;
+
+	if (!vp || !value)
+		return -EINVAL;
+
+	if (ntohl(vp->length) + sizeof(*tlv) + length >
+		VIC_PROVINFO_MAX_TLV_DATA)
+		return -ENOMEM;
+
+	tlv = (struct vic_provinfo_tlv *)((u8 *)vp->tlv +
+		ntohl(vp->length) - sizeof(vp->num_tlvs));
+
+	tlv->type = htons(type);
+	tlv->length = htons(length);
+	memcpy(tlv->value, value, length);
+
+	vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1);
+	vp->length = htonl(ntohl(vp->length) + sizeof(*tlv) + length);
+
+	return 0;
+}
+
+size_t vic_provinfo_size(struct vic_provinfo *vp)
+{
+	return vp ?  ntohl(vp->length) + sizeof(*vp) - sizeof(vp->num_tlvs) : 0;
+}
diff --git a/drivers/net/enic/vnic_vic.h b/drivers/net/enic/vnic_vic.h
new file mode 100644
index 0000000..085c2a2
--- /dev/null
+++ b/drivers/net/enic/vnic_vic.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _VNIC_VIC_H_
+#define _VNIC_VIC_H_
+
+/* Note: All integer fields in NETWORK byte order */
+
+/* Note: String field lengths include null char */
+
+#define VIC_PROVINFO_CISCO_OUI		{ 0x00, 0x00, 0x0c }
+#define VIC_PROVINFO_LINUX_TYPE		0x2
+
+enum vic_linux_prov_tlv_type {
+	VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR = 0,
+	VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR = 1,			/* u8[6] */
+	VIC_LINUX_PROV_TLV_CLIENT_NAME_STR = 2,
+	VIC_LINUX_PROV_TLV_HOST_UUID_STR = 8,
+	VIC_LINUX_PROV_TLV_CLIENT_UUID_STR = 9,
+};
+
+struct vic_provinfo {
+	u8 oui[3];		/* OUI of data provider */
+	u8 type;		/* provider-specific type */
+	u32 length;		/* length of data below */
+	u32 num_tlvs;		/* number of tlvs */
+	struct vic_provinfo_tlv {
+		u16 type;
+		u16 length;
+		u8 value[0];
+	} tlv[0];
+} __attribute__ ((packed));
+
+#define VIC_PROVINFO_MAX_DATA		1385
+#define VIC_PROVINFO_MAX_TLV_DATA (VIC_PROVINFO_MAX_DATA - \
+	sizeof(struct vic_provinfo))
+
+struct vic_provinfo *vic_provinfo_alloc(gfp_t flags, u8 *oui, u8 type);
+void vic_provinfo_free(struct vic_provinfo *vp);
+int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
+	void *value);
+size_t vic_provinfo_size(struct vic_provinfo *vp);
+
+#endif	/* _VNIC_VIC_H_ */


^ permalink raw reply related

* [net-next-2.6 V5 PATCH 1/3] Add netdev/netlink port-profile support (was iovnl)
From: Scott Feldman @ 2010-05-06  4:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd
In-Reply-To: <20100506044200.29677.20604.stgit@savbu-pc100.cisco.com>

From: Scott Feldman <scofeldm@cisco.com>

Add new netdev ops ndo_{set|get}_vf_port_profile to allow setting of
port-profile on a netdev interface.  Extends netlink socket RTM_SETLINK/
RTM_GETLINK with new sub cmd called IFLA_VF_PORT_PROFILE (added to end of
IFLA_cmd list).

A port-profile is used to configure/enable the external switch port backing
the netdev interface, not to configure the host-facing side of the netdev.  A
port-profile is an identifier known to the switch.  How port-profiles are
installed on the switch or how available port-profiles are made know to the
host is outside the scope of this patch.

The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg (more about that later)
communicates with the switch, and the switch port backing the host netdev
interface is configured/enabled based on the settings defined by the port-
profile.  What those settings comprise, and how those settings are managed is
again outside the scope of this patch, since this patch only deals with the
first step in the flow.

Since we're using netlink sockets, the receiver of the RTM_SETLINK msg can
be in kernel- or user-space.  For kernel-space recipient, rtnetlink.c, the
new ndo_set_vf_port_profile netdev op is called to set the port-profile.
User-space recipients can decide how they comminucate the IFLA_VF_PORT_PROFILE
to the external switch.

There is a RTM_GETLINK cmd to to return port-profile setting of an
interface and to also return the status of the last port-profile.

IFLA_VF_PORT_PROFILE is modeled after the existing IFLA_VF_* cmd where a
VF number is passed in to identify the virtual function (VF) of an SR-IOV-
capable device.  In this case, the target of IFLA_VF_PORT_PROFILE msg is the
netdev physical function (PF) device.  The PF will apply the port-profile
to the VF.  IFLA_VF_PORT_PROFILE can also be used for devices that don't
adhere to SR-IOV and can apply the port-profile directly to the netdev
target.  In this case, the VF number is ignored.

Passing in a NULL port-profile is used to delete the port-profile association.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 include/linux/if_link.h   |   25 +++++++++++++++++++++++++
 include/linux/netdevice.h |   10 ++++++++++
 net/core/rtnetlink.c      |   39 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 1 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index cfd420b..d763358 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -116,6 +116,7 @@ enum {
 	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
 	IFLA_VFINFO,
 	IFLA_STATS64,
+	IFLA_VF_PORT_PROFILE,
 	__IFLA_MAX
 };
 
@@ -259,4 +260,28 @@ struct ifla_vf_info {
 	__u32 qos;
 	__u32 tx_rate;
 };
+
+enum {
+	IFLA_VF_PORT_PROFILE_STATUS_UNKNOWN,
+	IFLA_VF_PORT_PROFILE_STATUS_SUCCESS,
+	IFLA_VF_PORT_PROFILE_STATUS_INPROGRESS,
+	IFLA_VF_PORT_PROFILE_STATUS_ERROR,
+};
+
+#define IFLA_VF_PORT_PROFILE_MAX	40
+#define IFLA_VF_UUID_MAX		40
+#define IFLA_VF_CLIENT_NAME_MAX		40
+
+struct ifla_vf_port_profile {
+	__u32 vf;
+	__u32 flags;
+	__u32 status;
+	__u8 port_profile[IFLA_VF_PORT_PROFILE_MAX];
+	__u8 mac[32];					/* MAX_ADDR_LEN */
+	/* UUID e.g. "CEEFD3B1-9E11-11DE-BDFD-000BAB01C0FB" */
+	__u8 host_uuid[IFLA_VF_UUID_MAX];
+	__u8 client_uuid[IFLA_VF_UUID_MAX];
+	__u8 client_name[IFLA_VF_CLIENT_NAME_MAX];	/* e.g. "vm0-eth1" */
+};
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c5ed5f..949abdb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -696,6 +696,10 @@ struct netdev_rx_queue {
  * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
+ * int (*ndo_set_vf_port_profile)(struct net_device *dev, int vf,
+ *				  struct ifla_vf_port_profile *ivp);
+ * int (*ndo_get_vf_port_profile)(struct net_device *dev, int vf,
+ *				  struct ifla_vf_port_profile *ivp);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -744,6 +748,12 @@ struct net_device_ops {
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
+	int			(*ndo_set_vf_port_profile)(
+					struct net_device *dev, int vf,
+					struct ifla_vf_port_profile *ivp);
+	int			(*ndo_get_vf_port_profile)(
+					struct net_device *dev, int vf,
+					struct ifla_vf_port_profile *ivp);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78c8598..e427a70 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -747,17 +747,40 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		goto nla_put_failure;
 	copy_rtnl_link_stats64(nla_data(attr), stats);
 
+	if (dev->dev.parent)
+		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
 		struct ifla_vf_info ivi;
 
-		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
 		for (i = 0; i < dev_num_vf(dev->dev.parent); i++) {
 			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
 				break;
 			NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
 		}
 	}
+
+	if (dev->netdev_ops->ndo_get_vf_port_profile && dev->dev.parent) {
+		struct ifla_vf_port_profile ivp;
+
+		if (dev_num_vf(dev->dev.parent)) {
+			int i;
+
+			for (i = 0; i < dev_num_vf(dev->dev.parent); i++) {
+				if (dev->netdev_ops->ndo_get_vf_port_profile(
+					dev, i, &ivp))
+					break;
+				NLA_PUT(skb, IFLA_VF_PORT_PROFILE,
+					sizeof(ivp), &ivp);
+			}
+		} else if (!dev->netdev_ops->ndo_get_vf_port_profile(dev,
+			0, &ivp)) {
+			NLA_PUT(skb, IFLA_VF_PORT_PROFILE,
+				sizeof(ivp), &ivp);
+		}
+	}
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -824,6 +847,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_vlan) },
 	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_VF_PORT_PROFILE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_port_profile)},
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1028,6 +1053,18 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 	err = 0;
 
+	if (tb[IFLA_VF_PORT_PROFILE]) {
+		struct ifla_vf_port_profile *ivp;
+		ivp = nla_data(tb[IFLA_VF_PORT_PROFILE]);
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_port_profile)
+			err = ops->ndo_set_vf_port_profile(dev, ivp->vf, ivp);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+	err = 0;
+
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "


^ permalink raw reply related

* [net-next-2.6 V5 PATCH 0/3] Add port-profile netlink support
From: Scott Feldman @ 2010-05-06  4:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd

(Resending to fix a bug found in testing get_vf_port_profile).

The following series adds port-profile netlink support and adds an
implementation to Cisco's enic netdev driver:

	1/3: Adds port-profile netlink RTM_SETLINK/RTM_GETLINK support, and
	     adds matching netdev ops net_{set|get}_vf_port_profile.

	2/3: Adds enic support for net_{set|get}_vf_port_profile for enic
	     dynamic devices.

	3/3: (please don't apply this 3rd patch) Enables SR-IOV support for
	     enic to illustrate support for port-profile netlink using SR-IOV-
	     compliant devices.

The SETLINK/GETLINK support follows the model for other IFLA_VF_* msgs used
for SR-IOV devices where the receipent of the netlink msg is the PF, but the
target is the VF.

The intent of this patch set is to cover both definitions of port-profile
as defined by Cisco's enic use and as defined by VSI discover protocol (VDP),
used in VEPA implemenations.  While both definitions are based on pre-
standards, the concept of a port-profile to be applied to an external switch
port on behalf of a virtual machine interface is common, as well as many
of the fields defining the protocols.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>

^ permalink raw reply

* Re: linux-next: manual merge of the staging-next tree with the net tree
From: Stephen Rothwell @ 2010-05-06  4:21 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-next, linux-kernel, Prashant P. Shah, Jiri Pirko,
	David Miller, netdev
In-Reply-To: <20100506042048.GA25425@kroah.com>

[-- Attachment #1: Type: text/plain, Size: 456 bytes --]

Hi Greg,

On Wed, 5 May 2010 21:20:48 -0700 Greg KH <greg@kroah.com> wrote:
>
> Thanks for doing this.  Hm, the arlan driver is scheduled to be deleted
> in .35, so it would make things easier if I just do it now to keep these
> kinds of merge issues from happening.  I'll queue that up tomorrow.

Ah, yes, that would be easier.  Thanks.

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply

* Re: linux-next: manual merge of the staging-next tree with the net tree
From: Greg KH @ 2010-05-06  4:20 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: linux-next, linux-kernel, Prashant P. Shah, Jiri Pirko,
	David Miller, netdev
In-Reply-To: <20100506140543.d6f3841f.sfr@canb.auug.org.au>

On Thu, May 06, 2010 at 02:05:43PM +1000, Stephen Rothwell wrote:
> Hi Greg,
> 
> Today's linux-next merge of the staging-next tree got a conflict in
> drivers/staging/arlan/arlan-main.c between commit
> 22bedad3ce112d5ca1eaf043d4990fa2ed698c87 ("net: convert multicast list to
> list_head") from the net tree and commit
> dd730b627cf8ff0b9d20df94fd31b6192b188710 ("Staging: arlan: fixed
> unnecessary whitespace style issue in arlan-main.c") from the
> staging-next tree.
> 
> I fixed it up (see below - this is bigger than is would otherwise be
> because there is a conflict with another patch that was previously
> reported) and can carry the fix as necessary.

Thanks for doing this.  Hm, the arlan driver is scheduled to be deleted
in .35, so it would make things easier if I just do it now to keep these
kinds of merge issues from happening.  I'll queue that up tomorrow.

thanks,

greg k-h

^ permalink raw reply

* [PATCH] forcedeth: Account for consumed budget in napi poll
From: Tom Herbert @ 2010-05-06  4:15 UTC (permalink / raw)
  To: netdev, davem

Repeated calls to nv_rx_process in napi poll routine do not take
portion of budget that has been consumed in previous calls.  Fix by
subtracting the number of packets processed.

Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index f9e1dd4..e282d0a 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -3564,14 +3564,15 @@ static int nv_napi_poll(struct napi_struct *napi, int budget)
 			tx_work += nv_tx_done(dev, np->tx_ring_size);
 			spin_unlock_irqrestore(&np->lock, flags);
 
-			rx_count = nv_rx_process(dev, budget);
+			rx_count = nv_rx_process(dev, budget - rx_work);
 			retcode = nv_alloc_rx(dev);
 		} else {
 			spin_lock_irqsave(&np->lock, flags);
 			tx_work += nv_tx_done_optimized(dev, np->tx_ring_size);
 			spin_unlock_irqrestore(&np->lock, flags);
 
-			rx_count = nv_rx_process_optimized(dev, budget);
+			rx_count = nv_rx_process_optimized(dev,
+			    budget - rx_work);
 			retcode = nv_alloc_rx_optimized(dev);
 		}
 	} while (retcode == 0 &&

^ permalink raw reply related

* linux-next: manual merge of the staging-next tree with the net tree
From: Stephen Rothwell @ 2010-05-06  4:05 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-next, linux-kernel, Prashant P. Shah, Jiri Pirko,
	David Miller, netdev

Hi Greg,

Today's linux-next merge of the staging-next tree got a conflict in
drivers/staging/arlan/arlan-main.c between commit
22bedad3ce112d5ca1eaf043d4990fa2ed698c87 ("net: convert multicast list to
list_head") from the net tree and commit
dd730b627cf8ff0b9d20df94fd31b6192b188710 ("Staging: arlan: fixed
unnecessary whitespace style issue in arlan-main.c") from the
staging-next tree.

I fixed it up (see below - this is bigger than is would otherwise be
because there is a conflict with another patch that was previously
reported) and can carry the fix as necessary.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

diff --cc drivers/staging/arlan/arlan-main.c
index 8028452,301b979..0000000
--- a/drivers/staging/arlan/arlan-main.c
+++ b/drivers/staging/arlan/arlan-main.c
@@@ -1438,105 -1336,99 +1336,99 @@@ static void arlan_rx_interrupt(struct n
  		priv->in_time10 = jiffies;
  	}
  	DEBUGSHM(1, "arlan rcv pkt rxStatus= %d ", arlan->rxStatus, u_char);
- 	switch (rxStatus)
+ 	switch (rxStatus) {
+ 	case 1:
+ 	case 2:
+ 	case 3:
  	{
- 		case 1:
- 		case 2:
- 		case 3:
- 		{
- 			/* Malloc up new buffer. */
- 			struct sk_buff *skb;
+ 		/* Malloc up new buffer. */
+ 		struct sk_buff *skb;
  
- 			DEBUGSHM(50, "arlan recv pkt offs=%d\n", arlan->rxOffset, u_short);
- 			DEBUGSHM(1, "arlan rxFrmType = %d \n", arlan->rxFrmType, u_char);
- 			DEBUGSHM(1, KERN_INFO "arlan rx scrambled = %d \n", arlan->scrambled, u_char);
+ 		DEBUGSHM(50, "arlan recv pkt offs=%d\n", arlan->rxOffset, u_short);
+ 		DEBUGSHM(1, "arlan rxFrmType = %d\n", arlan->rxFrmType, u_char);
+ 		DEBUGSHM(1, KERN_INFO "arlan rx scrambled = %d\n", arlan->scrambled, u_char);
  
- 			/* here we do multicast filtering to avoid slow 8-bit memcopy */
+ 		/* here we do multicast filtering to avoid slow 8-bit memcopy */
  #ifdef ARLAN_MULTICAST
- 			if (!(dev->flags & IFF_ALLMULTI) &&
- 				!(dev->flags & IFF_PROMISC) &&
- 				!netdev_mc_empty(dev))
- 			{
- 				char hw_dst_addr[6];
- 				struct netdev_hw_addr *ha;
- 				int i;
- 
- 				memcpy_fromio(hw_dst_addr, arlan->ultimateDestAddress, 6);
- 				if (hw_dst_addr[0] == 0x01)
- 				{
- 					if (mdebug)
- 						if (hw_dst_addr[1] == 0x00)
- 							printk(KERN_ERR "%s mcast 0x0100 \n", dev->name);
- 						else if (hw_dst_addr[1] == 0x40)
- 							printk(KERN_ERR "%s m/bcast 0x0140 \n", dev->name);
- 					netdev_for_each_mc_entry(ha, dev) {
- 						if (arlan_debug & ARLAN_DEBUG_HEADER_DUMP)
- 							printk(KERN_ERR "%s mcl %pM\n",
- 							       dev->name,
- 							       ha->addr);
- 						for (i = 0; i < 6; i++)
- 							if (ha->addr[i] != hw_dst_addr[i])
- 								break;
- 						if (i == 6)
+ 		if (!(dev->flags & IFF_ALLMULTI) &&
+ 			!(dev->flags & IFF_PROMISC) &&
+ 			!netdev_mc_empty(dev)) {
+ 			char hw_dst_addr[6];
 -			struct dev_mc_list *dmi;
++			struct netdev_hw_addr *ha;
+ 			int i;
+ 
+ 			memcpy_fromio(hw_dst_addr, arlan->ultimateDestAddress, 6);
+ 			if (hw_dst_addr[0] == 0x01) {
+ 				if (mdebug)
+ 					if (hw_dst_addr[1] == 0x00)
+ 						printk(KERN_ERR "%s mcast 0x0100\n", dev->name);
+ 					else if (hw_dst_addr[1] == 0x40)
+ 						printk(KERN_ERR "%s m/bcast 0x0140\n", dev->name);
 -				netdev_for_each_mc_entry(dmi, dev) {
++				netdev_for_each_mc_entry(ha, dev) {
+ 					if (arlan_debug & ARLAN_DEBUG_HEADER_DUMP)
+ 						printk(KERN_ERR "%s mcl %pM\n",
 -						       dev->name, dmi->dmi_addr);
++						       dev->name, ha->addr);
+ 					for (i = 0; i < 6; i++)
 -						if (dmi->dmi_addr[i] != hw_dst_addr[i])
++						if (ha->addr[i] != hw_dst_addr[i])
  							break;
- 					}
- 					/* we reach here if multicast filtering is on and packet 
- 					 * is multicast and not for receive */
- 					goto end_of_interrupt;
+ 					if (i == 6)
+ 						break;
  				}
+ 				/* we reach here if multicast filtering is on and packet */
+ 				/* is multicast and not for receive */
+ 				goto end_of_interrupt;
  			}
- #endif				// ARLAN_MULTICAST
- 			/* multicast filtering ends here */
- 			pkt_len += ARLAN_FAKE_HDR_LEN;
- 
- 			skb = dev_alloc_skb(pkt_len + 4);
- 			if (skb == NULL)
- 			{
- 				printk(KERN_ERR "%s: Memory squeeze, dropping packet.\n", dev->name);
- 				dev->stats.rx_dropped++;
- 				break;
- 			}
- 			skb_reserve(skb, 2);
- 			skbtmp = skb_put(skb, pkt_len);
+ 		}
+ #endif	/* ARLAN_MULTICAST */
+ 		/* multicast filtering ends here */
+ 		pkt_len += ARLAN_FAKE_HDR_LEN;
+ 
+ 		skb = dev_alloc_skb(pkt_len + 4);
+ 		if (skb == NULL) {
+ 			printk(KERN_ERR "%s: Memory squeeze, dropping packet.\n", dev->name);
+ 			dev->stats.rx_dropped++;
+ 			break;
+ 		}
+ 		skb_reserve(skb, 2);
+ 		skbtmp = skb_put(skb, pkt_len);
  
- 			memcpy_fromio(skbtmp + ARLAN_FAKE_HDR_LEN, ((char __iomem *) arlan) + rxOffset, pkt_len - ARLAN_FAKE_HDR_LEN);
- 			memcpy_fromio(skbtmp, arlan->ultimateDestAddress, 6);
- 			memcpy_fromio(skbtmp + 6, arlan->rxSrc, 6);
- 			WRITESHMB(arlan->rxStatus, 0x00);
- 			arlan_command(dev, ARLAN_COMMAND_RX);
+ 		memcpy_fromio(skbtmp + ARLAN_FAKE_HDR_LEN, ((char __iomem *) arlan) + rxOffset, pkt_len - ARLAN_FAKE_HDR_LEN);
+ 		memcpy_fromio(skbtmp, arlan->ultimateDestAddress, 6);
+ 		memcpy_fromio(skbtmp + 6, arlan->rxSrc, 6);
+ 		WRITESHMB(arlan->rxStatus, 0x00);
+ 		arlan_command(dev, ARLAN_COMMAND_RX);
  
- 			IFDEBUG(ARLAN_DEBUG_HEADER_DUMP)
- 			{
- 				char immedDestAddress[6];
- 				char immedSrcAddress[6];
- 				memcpy_fromio(immedDestAddress, arlan->immedDestAddress, 6);
- 				memcpy_fromio(immedSrcAddress, arlan->immedSrcAddress, 6);
- 
- 				printk(KERN_WARNING "%s t %pM f %pM imd %pM ims %pM\n",
- 				       dev->name, skbtmp,
- 				       &skbtmp[6],
- 				       immedDestAddress,
- 				       immedSrcAddress);
- 			}
- 			skb->protocol = eth_type_trans(skb, dev);
- 			IFDEBUG(ARLAN_DEBUG_HEADER_DUMP)
- 				if (skb->protocol != 0x608 && skb->protocol != 0x8)
- 				{
- 					for (i = 0; i <= 22; i++)
- 						printk("%02x:", (u_char) skbtmp[i + 12]);
- 					printk(KERN_ERR "\n");
- 					printk(KERN_WARNING "arlan kernel pkt type trans %x \n", skb->protocol);
- 				}
- 			netif_rx(skb);
- 			dev->stats.rx_packets++;
- 			dev->stats.rx_bytes += pkt_len;
+ 		IFDEBUG(ARLAN_DEBUG_HEADER_DUMP)
+ 		{
+ 			char immedDestAddress[6];
+ 			char immedSrcAddress[6];
+ 			memcpy_fromio(immedDestAddress, arlan->immedDestAddress, 6);
+ 			memcpy_fromio(immedSrcAddress, arlan->immedSrcAddress, 6);
+ 
+ 			printk(KERN_WARNING "%s t %pM f %pM imd %pM ims %pM\n",
+ 			       dev->name, skbtmp,
+ 			       &skbtmp[6],
+ 			       immedDestAddress,
+ 			       immedSrcAddress);
  		}
+ 		skb->protocol = eth_type_trans(skb, dev);
+ 		IFDEBUG(ARLAN_DEBUG_HEADER_DUMP)
+ 			if (skb->protocol != 0x608 && skb->protocol != 0x8) {
+ 				for (i = 0; i <= 22; i++)
+ 					printk("%02x:", (u_char) skbtmp[i + 12]);
+ 				printk(KERN_ERR "\n");
+ 				printk(KERN_WARNING "arlan kernel pkt type trans %x\n", skb->protocol);
+ 			}
+ 		netif_rx(skb);
+ 		dev->stats.rx_packets++;
+ 		dev->stats.rx_bytes += pkt_len;
+ 	}
+ 	break;
+ 
+ 	default:
+ 		printk(KERN_ERR "arlan intr: received unknown status\n");
+ 		dev->stats.rx_crc_errors++;
  		break;
- 		
- 		default:
- 			printk(KERN_ERR "arlan intr: received unknown status\n");
- 			dev->stats.rx_crc_errors++;
- 			break;
  	}
  	ARLAN_DEBUG_EXIT("arlan_rx_interrupt");
  }

^ permalink raw reply

* [PATCH] bnx2x: Fix check to get RX hash
From: Tom Herbert @ 2010-05-06  3:57 UTC (permalink / raw)
  To: davem, netdev

Flag used in check to get rxhash out of the descriptor is incorrect one.
Fix to use the proper features flag.


Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index f706ed1..2bc35c7 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -1726,7 +1726,7 @@ reuse_rx:
 
 			skb->protocol = eth_type_trans(skb, bp->dev);
 
-			if ((bp->dev->features & ETH_FLAG_RXHASH) &&
+			if ((bp->dev->features & NETIF_F_RXHASH) &&
 			    (cqe_fp_status_flags &
 			     ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
 				skb->rxhash = le32_to_cpu(

^ permalink raw reply related

* Re: [PATCH iproute2] document initcwnd
From: Stephen Hemminger @ 2010-05-06  2:33 UTC (permalink / raw)
  To: Brian Bloniarz; +Cc: dormando, netdev, Rick Jones, shemminger
In-Reply-To: <4BE21D64.4040600@athenacr.com>

On Wed, 05 May 2010 21:37:40 -0400
Brian Bloniarz <bmb@athenacr.com> wrote:

> Stephen Hemminger wrote:
> > On Wed, 05 May 2010 16:56:34 -0400
> > Brian Bloniarz <bmb@athenacr.com> wrote:
> > 
> >> dormando wrote:
> >>>> This sounds like TCP slow start.
> >>>>
> >>>> http://en.wikipedia.org/wiki/Slow-start
> >>>>
> >>>> As far as tunables you might want to play with the initcwnd route
> >>>> flag (see "ip route help")
> >>> Ah, yes, initcwnd was it. I'm well aware of TCP Congestion control / slow
> >>> start / etc. However I couldn't find the damn tunable for it :)
> >> Documenting the flag in ip(8) might increase its visibility
> >> a little. I don't see it documented in the iproute2 git head,
> >> though it shows up on http://linux.die.net/man/8/ip somehow.
> >>
> >> Stephen, do you know why that is?
> > 
> > No one sent me an official patch to change it?
> 
> Mention initcwnd in ip(8). Text taken from doc/ip-cref.tex.
> 
> Signed-off-by: Brian Bloniarz <bmb@athenacr.com>

Ok, I will add it with an explicit caution about not doing this on public
networks.

^ permalink raw reply

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
From: Matt Mackall @ 2010-05-06  2:05 UTC (permalink / raw)
  To: Amerigo Wang
  Cc: linux-kernel, netdev, bridge, Andy Gospodarek, Neil Horman,
	Jeff Moyer, Stephen Hemminger, bonding-devel, Jay Vosburgh,
	David Miller
In-Reply-To: <20100505081514.5157.83783.sendpatchset@localhost.localdomain>

On Wed, 2010-05-05 at 04:11 -0400, Amerigo Wang wrote:
> V5:
> Fix coding style problems pointed by David.

Aside from my concern about the policy of disabling netpoll on
bridges/bonds with only partial netpoll support, I don't have any
remaining issues with this. But I'll leave it to other folks to ack the
underlying driver bits for this series.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply

* Re: [Pv-drivers] RFC: Network Plugin Architecture (NPA) for vmxnet3
From: Scott Feldman @ 2010-05-06  2:03 UTC (permalink / raw)
  To: Shreyas Bhatewara, Arnd Bergmann, Dmitry Torokhov
  Cc: Christoph Hellwig, pv-drivers@vmware.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org, Pankaj Thakkar
In-Reply-To: <89E2752CFA8EC044846EB8499819134102AF2896F5@EXCH-MBX-4.vmware.com>

On 5/5/10 10:29 AM, "Dmitry Torokhov" <dtor@vmware.com> wrote:

> It would not be a binary blob but software properly released under GPL.
> The current plan is for the shell to enforce GPL requirement on the
> plugin code, similar to what module loaded does for regular kernel
> modules.

On 5/5/10 3:05 PM, "Shreyas Bhatewara" <sbhatewara@vmware.com> wrote:

> The plugin image is not linked against Linux kernel. It is OS agnostic infact
> (Eg. same plugin works for Linux and Windows VMs)

Are there any issues with injecting the GPL-licensed plug-in into the
Windows vmxnet3 NDIS driver?

-scott

^ permalink raw reply

* Re: linux kernel's IPV6_MULTICAST_HOPS default is 64; should be 1?
From: Brian Haley @ 2010-05-06  1:50 UTC (permalink / raw)
  To: David Miller; +Cc: dlstevens, enh, netdev, netdev-owner
In-Reply-To: <20100505.150008.102542618.davem@davemloft.net>

David Miller wrote:
> From: Brian Haley <brian.haley@hp.com>
> Date: Wed, 05 May 2010 11:36:31 -0400
> 
>> I now see that in Elliot's email, but I think it's incorrect.  The RFC
>> says that setting it to -1 should get you the kernel default, which is
>> now 1.  Without this change, setting it to -1 will get you 64, the
>> old behavior.  If the user wants to, they can always just set it to
>> 64 themselves, that's better than assuming when you set it to -1
>> you're going to get 64.
> 
> It's not 64, it's whatever the per-route metric is.

Not unless that metric's been set via RTAX_HOPLIMIT (and I believe
this is the unicast hop limit value anyways), and that metric
defaults to -1.  Routes added via a Router Advertisement are most
likely going to have a hop limit of 64, but I believe that's only
supposed to apply to unicast.

I *did* search the kernel code and test this before my original reply - it
uses the unicast hop limit from the interface as Elliot originally showed.

~# sysctl net.ipv6.conf.eth2.hop_limit
net.ipv6.conf.eth2.hop_limit = 64

21:04:48.766181 IP6 (hlim 64, next-header UDP (17) payload length: 108)
    fe80::21f:29ff:fef0:2f46.48914 > ip6-allrouters.7639: UDP, length 100

~# sysctl net.ipv6.conf.eth2.hop_limit=63
net.ipv6.conf.eth2.hop_limit = 63

21:05:09.670190 IP6 (hlim 63, next-header UDP (17) payload length: 108)
    fe80::21f:29ff:fef0:2f46.48914 > ip6-allrouters.7639: UDP, length 100

At this point in time I'll gladly implement a per-interface sysctl
to end this discussion.

-Brian

^ permalink raw reply

* [PATCH iproute2] document initcwnd
From: Brian Bloniarz @ 2010-05-06  1:37 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dormando, netdev, Rick Jones, shemminger
In-Reply-To: <20100505150343.0c09c6ba@nehalam>

Stephen Hemminger wrote:
> On Wed, 05 May 2010 16:56:34 -0400
> Brian Bloniarz <bmb@athenacr.com> wrote:
> 
>> dormando wrote:
>>>> This sounds like TCP slow start.
>>>>
>>>> http://en.wikipedia.org/wiki/Slow-start
>>>>
>>>> As far as tunables you might want to play with the initcwnd route
>>>> flag (see "ip route help")
>>> Ah, yes, initcwnd was it. I'm well aware of TCP Congestion control / slow
>>> start / etc. However I couldn't find the damn tunable for it :)
>> Documenting the flag in ip(8) might increase its visibility
>> a little. I don't see it documented in the iproute2 git head,
>> though it shows up on http://linux.die.net/man/8/ip somehow.
>>
>> Stephen, do you know why that is?
> 
> No one sent me an official patch to change it?

Mention initcwnd in ip(8). Text taken from doc/ip-cref.tex.

Signed-off-by: Brian Bloniarz <bmb@athenacr.com>

diff --git a/man/man8/ip.8 b/man/man8/ip.8
index a5d2915..777a0a7 100644
--- a/man/man8/ip.8
+++ b/man/man8/ip.8
@@ -211,7 +211,9 @@ replace " | " monitor " } "
 .B  realms
 .IR REALM " ] [ "
 .B  rto_min
-.IR TIME " ]"
+.IR TIME " ] [ "
+.B  initcwnd
+.IR NUMBER " ]"
 
 .ti -8
 .IR TYPE " := [ "
@@ -1561,6 +1563,13 @@ the clamp for congestion window.  It is ignored if the
 flag is not used.
 
 .TP
+.BI initcwnd " NUMBER " "(2.5.70+ only)"
+Initial congestion window size for connections to this destination.
+Actual window size is this value multiplied by the MSS
+(``Maximal Segment Size'') for same connection. The default is
+zero, meaning to use the values specified in RFC2414.
+
+.TP
 .BI advmss " NUMBER " "(2.3.15+ only)"
 the MSS ('Maximal Segment Size') to advertise to these
 destinations when establishing TCP connections.  If it is not given,

^ permalink raw reply related

* kernel panic when using netns+bridges+tc(netem)
From: Martín Ferrari @ 2010-05-06  1:01 UTC (permalink / raw)
  To: netdev; +Cc: Mathieu Lacage

Hi there,

While working on my project that uses netns, I found another bug. This
one causes a "Kernel panic - not syncing: Fatal exception in
interrupt", and I can reproduce it in 2.6.33 and 2.6.34-rc5, but not
in 2.6.32. It dies during a call to __free_skb.
I tested this on my x86_64 laptop (2 cores) and on qemu. In qemu it
was not triggered until I asked it to emulate 2 cpus instead of one,
so it is probably a SMP-only issue.

Scenario:

I set up a number of network namespaces, each with two veths to netns
1. In the main namespace I take those veths and bridge them in pairs,
to configure a linear topology; also I configure the netem qdisc to
simulate link delay.

Once the network is set up, I run a client/server program to send UDP
packets from one end of the topology to the other. After a few seconds
of sending packets (not really deterministic) it panics.

Note that I didn't experience this problem when using only 2
namespaces (so, no routing)

below the dumps. These all come from the qemu, as I couldn't use
netconsole in the network at work, but I checked and the backtraces
were essentially the same

First, two panics with 2.6.34, each one with a slightly different backtrace

[   65.272131] ------------[ cut here ]------------
[   65.272135] kernel BUG at mm/slub.c:2846!
[   65.272135] invalid opcode: 0000 [#1] SMP
[   65.272135] last sysfs file: /sys/devices/virtual/net/lo/operstate
[   65.272135] CPU 1
[   65.272135] Modules linked in: sch_netem veth bridge stp netconsole
configfs loop parport_pc parport evdev tpm_tis tpm snd_pcm tpm_bios
snd_timer snd soundcore snd_page_alloc pcspkr psmouse serio_raw
i2c_piix4 button i2c_core processor ext3 jbd mbcache ide_cd_mod cdrom
ide_gd_mod ata_generic ata_piix libata 8139too scsi_mod floppy piix
8139cp mii ide_core thermal thermal_sys [last unloaded: configfs]
[   65.272135]
[   65.272135] Pid: 1518, comm: udp-perf Not tainted 2.6.34-rc5 #1 /
[   65.272135] RIP: 0010:[<ffffffff810e0d6b>]  [<ffffffff810e0d6b>]
kfree+0x55/0xc6
[   65.272135] RSP: 0018:ffff880001a23d90  EFLAGS: 00010246
[   65.272135] RAX: 0100000000000000 RBX: ffff88007d6bc600 RCX: 0000000000012850
[   65.272135] RDX: ffff88007d6bc600 RSI: 000000000000000e RDI: ffffea0001b6f610
[   65.272135] RBP: ffff88007d6ae200 R08: ffff88007d6bc600 R09: ffffffffa0280690
[   65.272135] R10: 0000000000000002 R11: ffff88007d6bc500 R12: ffffffff8123a77f
[   65.272135] R13: 000000000000002b R14: ffff88007d39b600 R15: ffff88007d6bc600
[   65.272135] FS:  00007f637c9dd6f0(0000) GS:ffff880001a20000(0000)
knlGS:0000000000000000
[   65.272135] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   65.272135] CR2: 00000000009deaa0 CR3: 000000007d82d000 CR4: 00000000000006e0
[   65.272135] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   65.272135] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[   65.272135] Process udp-perf (pid: 1518, threadinfo
ffff88007d92a000, task ffff88007e7ff810)
[   65.272135] Stack:
[   65.272135]  ffff88007d6bc600 ffff88007d6bc600 0000000000000246
ffffffff8123a77f
[   65.272135] <0> ffff880001a32860 ffffffff81241e01 ffffe8ffff623280
ffffe8ffff83ffc0
[   65.272135] <0> ffff88007d6bc600 ffffffffa028057d 000000027d4c06c8
ffff88007d4c0600
[   65.272135] Call Trace:
[   65.272135]  <IRQ>
[   65.272135]  [<ffffffff8123a77f>] ? __kfree_skb+0x11/0x7d
[   65.272135]  [<ffffffff81241e01>] ? netif_rx+0xe2/0xee
[   65.272135]  [<ffffffffa028057d>] ? veth_xmit+0x6e/0xad [veth]
[   65.272135]  [<ffffffff8124301f>] ? dev_hard_start_xmit+0x221/0x301
[   65.272135]  [<ffffffff81256d9a>] ? sch_direct_xmit+0x5b/0x15d
[   65.272135]  [<ffffffff81256f55>] ? __qdisc_run+0xb9/0xd8
[   65.272135]  [<ffffffff81240511>] ? net_tx_action+0xd6/0x149
[   65.272135]  [<ffffffff8104ba02>] ? __do_softirq+0xdd/0x19f
[   65.272135]  [<ffffffff8101e515>] ? lapic_next_event+0x18/0x1d
[   65.272135]  [<ffffffff810099dc>] ? call_softirq+0x1c/0x30
[   65.272135]  [<ffffffff8100b863>] ? do_softirq+0x3f/0x79
[   65.272135]  [<ffffffff8104b88e>] ? irq_exit+0x36/0x76
[   65.272135]  [<ffffffff8101f000>] ? smp_apic_timer_interrupt+0x86/0x94
[   65.272135]  [<ffffffff81009493>] ? apic_timer_interrupt+0x13/0x20
[   65.272135]  <EOI>
[   65.272135]  [<ffffffff8114c1ea>] ? cap_sk_getsecid+0x0/0x1
[   65.272135]  [<ffffffff812a0b0f>] ? __xfrm_lookup+0x2/0xb04
[   65.272135]  [<ffffffff81264b8d>] ? ip_route_output_flow+0x77/0x1cc
[   65.272135]  [<ffffffff812887c6>] ? udp_sendmsg+0x32d/0x5f3
[   65.272135]  [<ffffffff81009400>] ? irq_entries_start+0x3c0/0x400
[   65.272135]  [<ffffffff8128e112>] ? inet_sendmsg+0x53/0x58
[   65.272135]  [<ffffffff8123388d>] ? sock_sendmsg+0x83/0x9b
[   65.272135]  [<ffffffff8103a042>] ? pick_next_task_fair+0xca/0xd6
[   65.272135]  [<ffffffff812ef74a>] ? schedule+0x52b/0x593
[   65.272135]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[   65.272135]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[   65.272135]  [<ffffffff81185d4b>] ? _copy_from_user+0x1b/0x30
[   65.272135]  [<ffffffff8100765f>] ? __switch_to+0x1b5/0x2a6
[   65.272135]  [<ffffffff81232227>] ? copy_from_user+0x13/0x25
[   65.272135]  [<ffffffff812353cb>] ? sys_sendto+0xd7/0x117
[   65.272135]  [<ffffffff8103f7b1>] ? finish_task_switch+0x34/0xa1
[   65.272135]  [<ffffffff812ef74a>] ? schedule+0x52b/0x593
[   65.272135]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[   65.272135]  [<ffffffff81008ac2>] ? system_call_fastpath+0x16/0x1b
[   65.272135] Code: 83 c3 08 48 83 3b 00 eb ec 48 83 fd 10 0f 86 84
00 00 00 48 89 ef e8 f4 e2 ff ff 48 89 c7 48 8b 00 84 c0 78 13 66 a9
00 c0 75 04 <0f> 0b eb fe 5b 5d 41 5c e9 fb 57 fd ff 48 8b 4c 24 18 4c
8b 47
[   65.272135] RIP  [<ffffffff810e0d6b>] kfree+0x55/0xc6
[   65.272135]  RSP <ffff880001a23d90>
[   65.385803] ---[ end trace 42d2fb5b94980ab5 ]---
[   65.386337] Kernel panic - not syncing: Fatal exception in interrupt
[   65.386943] Pid: 1518, comm: udp-perf Tainted: G      D    2.6.34-rc5 #1
[   65.387557] Call Trace:
[   65.388011]  <IRQ>  [<ffffffff812eef9b>] ? panic+0x77/0xf7
[   65.388729]  [<ffffffff81046b88>] ? kmsg_dump+0xa6/0x13e
[   65.389292]  [<ffffffff8100c8c2>] ? oops_end+0xa7/0xb4
[   65.389871]  [<ffffffff8123a77f>] ? __kfree_skb+0x11/0x7d
[   65.390441]  [<ffffffff8100a695>] ? do_invalid_op+0x8b/0x95
[   65.391032]  [<ffffffff810e0d6b>] ? kfree+0x55/0xc6
[   65.391587]  [<ffffffffa026f301>] ?
br_nf_pre_routing_finish+0x0/0x25e [bridge]
[   65.392512]  [<ffffffffa026f301>] ?
br_nf_pre_routing_finish+0x0/0x25e [bridge]
[   65.393399]  [<ffffffff8100975b>] ? invalid_op+0x1b/0x20
[   65.393978]  [<ffffffff8123a77f>] ? __kfree_skb+0x11/0x7d
[   65.394547]  [<ffffffff810e0d6b>] ? kfree+0x55/0xc6
[   65.395091]  [<ffffffff8123a77f>] ? __kfree_skb+0x11/0x7d
[   65.395668]  [<ffffffff81241e01>] ? netif_rx+0xe2/0xee
[   65.396438]  [<ffffffffa028057d>] ? veth_xmit+0x6e/0xad [veth]
[   65.397026]  [<ffffffff8124301f>] ? dev_hard_start_xmit+0x221/0x301
[   65.397640]  [<ffffffff81256d9a>] ? sch_direct_xmit+0x5b/0x15d
[   65.398222]  [<ffffffff81256f55>] ? __qdisc_run+0xb9/0xd8
[   65.398788]  [<ffffffff81240511>] ? net_tx_action+0xd6/0x149
[   65.399365]  [<ffffffff8104ba02>] ? __do_softirq+0xdd/0x19f
[   65.399935]  [<ffffffff8101e515>] ? lapic_next_event+0x18/0x1d
[   65.400560]  [<ffffffff810099dc>] ? call_softirq+0x1c/0x30
[   65.401125]  [<ffffffff8100b863>] ? do_softirq+0x3f/0x79
[   65.401708]  [<ffffffff8104b88e>] ? irq_exit+0x36/0x76
[   65.402267]  [<ffffffff8101f000>] ? smp_apic_timer_interrupt+0x86/0x94
[   65.402869]  [<ffffffff81009493>] ? apic_timer_interrupt+0x13/0x20
[   65.403463]  <EOI>  [<ffffffff8114c1ea>] ? cap_sk_getsecid+0x0/0x1
[   65.404210]  [<ffffffff812a0b0f>] ? __xfrm_lookup+0x2/0xb04
[   65.404779]  [<ffffffff81264b8d>] ? ip_route_output_flow+0x77/0x1cc
[   65.405375]  [<ffffffff812887c6>] ? udp_sendmsg+0x32d/0x5f3
[   65.405945]  [<ffffffff81009400>] ? irq_entries_start+0x3c0/0x400
[   65.406534]  [<ffffffff8128e112>] ? inet_sendmsg+0x53/0x58
[   65.407102]  [<ffffffff8123388d>] ? sock_sendmsg+0x83/0x9b
[   65.407674]  [<ffffffff8103a042>] ? pick_next_task_fair+0xca/0xd6
[   65.408296]  [<ffffffff812ef74a>] ? schedule+0x52b/0x593
[   65.408854]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[   65.409468]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[   65.410084]  [<ffffffff81185d4b>] ? _copy_from_user+0x1b/0x30
[   65.410667]  [<ffffffff8100765f>] ? __switch_to+0x1b5/0x2a6
[   65.510031]  [<ffffffff81232227>] ? copy_from_user+0x13/0x25
[   65.510818]  [<ffffffff812353cb>] ? sys_sendto+0xd7/0x117
[   65.511608]  [<ffffffff8103f7b1>] ? finish_task_switch+0x34/0xa1
[   65.512465]  [<ffffffff812ef74a>] ? schedule+0x52b/0x593
[   65.513229]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[   65.514040]  [<ffffffff81008ac2>] ? system_call_fastpath+0x16/0x1b

[ 1438.042568] ------------[ cut here ]------------
[ 1438.043170] kernel BUG at mm/slub.c:2846!
[ 1438.043711] invalid opcode: 0000 [#1] SMP
[ 1438.044531] last sysfs file: /sys/devices/virtual/net/lo/operstate
[ 1438.045148] CPU 1
[ 1438.045350] Modules linked in: sch_netem veth bridge stp netconsole
configfs loop parport_pc tpm_tis tpm snd_pcm psmouse parport tpm_bios
snd_timer snd soundcore snd_page_alloc evdev pcspkr serio_raw
i2c_piix4 i2c_core button processor ext3 jbd mbcache ide_cd_mod cdrom
ide_gd_mod ata_generic ata_piix 8139too libata scsi_mod floppy 8139cp
mii thermal thermal_sys piix ide_core [last unloaded: scsi_wait_scan]
[ 1438.046215]
[ 1438.046215] Pid: 1476, comm: udp-perf Not tainted 2.6.34-rc5 #1 /
[ 1438.046215] RIP: 0010:[<ffffffff810e0d6b>]  [<ffffffff810e0d6b>]
kfree+0x55/0xc6
[ 1438.046215] RSP: 0018:ffff880001a23d90  EFLAGS: 00010246
[ 1438.046215] RAX: 0100000000000000 RBX: ffff88007d882200 RCX: 0000000000012850
[ 1438.046215] RDX: ffff88007d882200 RSI: 000000000000000e RDI: ffffea0001b972a0
[ 1438.046215] RBP: ffff88007e20c000 R08: ffff88007d882200 R09: ffffffffa026c690
[ 1438.046215] R10: 0000000000000002 R11: ffff88007d882100 R12: ffffffff8123a77f
[ 1438.046215] R13: 0000000000000032 R14: ffff8800378e0700 R15: ffff88007d882200
[ 1438.046215] FS:  00007f1c1d07f6f0(0000) GS:ffff880001a20000(0000)
knlGS:0000000000000000
[ 1438.046215] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 1438.046215] CR2: 00007f6ea9e07310 CR3: 0000000037834000 CR4: 00000000000006e0
[ 1438.046215] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1438.046215] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1438.046215] Process udp-perf (pid: 1476, threadinfo
ffff88007db0e000, task ffff8800379a2350)
[ 1438.046215] Stack:
[ 1438.046215]  ffff88007d882200 ffff88007d882200 0000000000000246
ffffffff8123a77f
[ 1438.046215] <0> ffff880001a32860 ffffffff81241e01 ffffe8ffffa3f430
ffffe8ffffa3d180
[ 1438.046215] <0> ffff88007d882200 ffffffffa026c57d 0000000201a30500
ffff88007d171200
[ 1438.046215] Call Trace:
[ 1438.046215]  <IRQ>
[ 1438.046215]  [<ffffffff8123a77f>] ? __kfree_skb+0x11/0x7d
[ 1438.046215]  [<ffffffff81241e01>] ? netif_rx+0xe2/0xee
[ 1438.046215]  [<ffffffffa026c57d>] ? veth_xmit+0x6e/0xad [veth]
[ 1438.046215]  [<ffffffff8124301f>] ? dev_hard_start_xmit+0x221/0x301
[ 1438.046215]  [<ffffffff81256d9a>] ? sch_direct_xmit+0x5b/0x15d
[ 1438.046215]  [<ffffffff81256f55>] ? __qdisc_run+0xb9/0xd8
[ 1438.046215]  [<ffffffff81240511>] ? net_tx_action+0xd6/0x149
[ 1438.046215]  [<ffffffff8104ba02>] ? __do_softirq+0xdd/0x19f
[ 1438.046215]  [<ffffffff8101e515>] ? lapic_next_event+0x18/0x1d
[ 1438.046215]  [<ffffffff810099dc>] ? call_softirq+0x1c/0x30
[ 1438.046215]  [<ffffffff8100b863>] ? do_softirq+0x3f/0x79
[ 1438.046215]  [<ffffffff8104b88e>] ? irq_exit+0x36/0x76
[ 1438.046215]  [<ffffffff8101f000>] ? smp_apic_timer_interrupt+0x86/0x94
[ 1438.046215]  [<ffffffff81009493>] ? apic_timer_interrupt+0x13/0x20
[ 1438.046215]  <EOI>
[ 1438.046215]  [<ffffffff81288499>] ? udp_sendmsg+0x0/0x5f3
[ 1438.046215]  [<ffffffff8128850f>] ? udp_sendmsg+0x76/0x5f3
[ 1438.046215]  [<ffffffff812889a7>] ? udp_sendmsg+0x50e/0x5f3
[ 1438.046215]  [<ffffffff8123388d>] ? sock_sendmsg+0x83/0x9b
[ 1438.046215]  [<ffffffff8103a042>] ? pick_next_task_fair+0xca/0xd6
[ 1438.046215]  [<ffffffff812ef74a>] ? schedule+0x52b/0x593
[ 1438.046215]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[ 1438.046215]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[ 1438.046215]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[ 1438.046215]  [<ffffffff810e93ed>] ? fget_light+0x0/0xa1
[ 1438.046215]  [<ffffffff81185e40>] ? copy_user_generic_string+0x30/0x40
[ 1438.046215]  [<ffffffff81232227>] ? copy_from_user+0x13/0x25
[ 1438.046215]  [<ffffffff812353cb>] ? sys_sendto+0xd7/0x117
[ 1438.046215]  [<ffffffff8103f7b1>] ? finish_task_switch+0x34/0xa1
[ 1438.046215]  [<ffffffff812ef74a>] ? schedule+0x52b/0x593
[ 1438.046215]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[ 1438.046215]  [<ffffffff81008ac2>] ? system_call_fastpath+0x16/0x1b
[ 1438.046215] Code: 83 c3 08 48 83 3b 00 eb ec 48 83 fd 10 0f 86 84
00 00 00 48 89 ef e8 f4 e2 ff ff 48 89 c7 48 8b 00 84 c0 78 13 66 a9
00 c0 75 04 <0f> 0b eb fe 5b 5d 41 5c e9 fb 57 fd ff 48 8b 4c 24 18 4c
8b 47
[ 1438.046215] RIP  [<ffffffff810e0d6b>] kfree+0x55/0xc6
[ 1438.046215]  RSP <ffff880001a23d90>
[ 1438.102706] ---[ end trace ab36062dcf233d6a ]---
[ 1438.103251] Kernel panic - not syncing: Fatal exception in interrupt
[ 1438.103912] Pid: 1476, comm: udp-perf Tainted: G      D    2.6.34-rc5 #1
[ 1438.104563] Call Trace:
[ 1438.105017]  <IRQ>  [<ffffffff812eef9b>] ? panic+0x77/0xf7
[ 1438.105718]  [<ffffffff81046b88>] ? kmsg_dump+0xa6/0x13e
[ 1438.106293]  [<ffffffff8100c8c2>] ? oops_end+0xa7/0xb4
[ 1438.106866]  [<ffffffff8123a77f>] ? __kfree_skb+0x11/0x7d
[ 1438.107451]  [<ffffffff8100a695>] ? do_invalid_op+0x8b/0x95
[ 1438.108036]  [<ffffffff810e0d6b>] ? kfree+0x55/0xc6
[ 1438.108799]  [<ffffffffa025b301>] ?
br_nf_pre_routing_finish+0x0/0x25e [bridge]
[ 1438.109699]  [<ffffffffa025b301>] ?
br_nf_pre_routing_finish+0x0/0x25e [bridge]
[ 1438.110604]  [<ffffffff8100975b>] ? invalid_op+0x1b/0x20
[ 1438.111172]  [<ffffffff8123a77f>] ? __kfree_skb+0x11/0x7d
[ 1438.111753]  [<ffffffff810e0d6b>] ? kfree+0x55/0xc6
[ 1438.112359]  [<ffffffff8123a77f>] ? __kfree_skb+0x11/0x7d
[ 1438.112952]  [<ffffffff81241e01>] ? netif_rx+0xe2/0xee
[ 1438.113527]  [<ffffffffa026c57d>] ? veth_xmit+0x6e/0xad [veth]
[ 1438.114123]  [<ffffffff8124301f>] ? dev_hard_start_xmit+0x221/0x301
[ 1438.114734]  [<ffffffff81256d9a>] ? sch_direct_xmit+0x5b/0x15d
[ 1438.115334]  [<ffffffff81256f55>] ? __qdisc_run+0xb9/0xd8
[ 1438.115912]  [<ffffffff81240511>] ? net_tx_action+0xd6/0x149
[ 1438.116544]  [<ffffffff8104ba02>] ? __do_softirq+0xdd/0x19f
[ 1438.117128]  [<ffffffff8101e515>] ? lapic_next_event+0x18/0x1d
[ 1438.117732]  [<ffffffff810099dc>] ? call_softirq+0x1c/0x30
[ 1438.118420]  [<ffffffff8100b863>] ? do_softirq+0x3f/0x79
[ 1438.119066]  [<ffffffff8104b88e>] ? irq_exit+0x36/0x76
[ 1438.147879]  [<ffffffff8101f000>] ? smp_apic_timer_interrupt+0x86/0x94
[ 1438.148615]  [<ffffffff81009493>] ? apic_timer_interrupt+0x13/0x20
[ 1438.149294]  <EOI>  [<ffffffff81288499>] ? udp_sendmsg+0x0/0x5f3
[ 1438.150101]  [<ffffffff8128850f>] ? udp_sendmsg+0x76/0x5f3
[ 1438.150681]  [<ffffffff812889a7>] ? udp_sendmsg+0x50e/0x5f3
[ 1438.151276]  [<ffffffff8123388d>] ? sock_sendmsg+0x83/0x9b
[ 1438.151853]  [<ffffffff8103a042>] ? pick_next_task_fair+0xca/0xd6
[ 1438.152508]  [<ffffffff812ef74a>] ? schedule+0x52b/0x593
[ 1438.153078]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[ 1438.153687]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[ 1438.154295]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[ 1438.154902]  [<ffffffff810e93ed>] ? fget_light+0x0/0xa1
[ 1438.155477]  [<ffffffff81185e40>] ? copy_user_generic_string+0x30/0x40
[ 1438.156097]  [<ffffffff81232227>] ? copy_from_user+0x13/0x25
[ 1438.156720]  [<ffffffff812353cb>] ? sys_sendto+0xd7/0x117
[ 1438.157376]  [<ffffffff8103f7b1>] ? finish_task_switch+0x34/0xa1
[ 1438.157970]  [<ffffffff812ef74a>] ? schedule+0x52b/0x593
[ 1438.158557]  [<ffffffff8100948e>] ? apic_timer_interrupt+0xe/0x20
[ 1438.159153]  [<ffffffff81008ac2>] ? system_call_fastpath+0x16/0x1b

------------

Finally, a panic in 2.6.33. Note that the line in wioch BUG is
triggered is different

[  102.442815] ------------[ cut here ]------------
[  102.443433] kernel BUG at
/build/mattems-linux-2.6_2.6.33-1~experimental.4-amd64-ieqSsa/linux-2.6-2.6.33-1~experimental.4/debian/build/source_amd64_none/mm/slub.c:2969!
[  102.444874] invalid opcode: 0000 [#1] SMP
[  102.444958] last sysfs file: /sys/devices/virtual/net/lo/operstate
[  102.444958] CPU 0
[  102.444958] Pid: 4, comm: ksoftirqd/0 Not tainted 2.6.33-2-amd64 #1 /
[  102.444958] RIP: 0010:[<ffffffff810e1e2c>]  [<ffffffff810e1e2c>]
kfree+0x55/0xcb
[  102.444958] RSP: 0018:ffff880001a03df8  EFLAGS: 00010246
[  102.444958] RAX: 0100000000000000 RBX: ffff88007e439000 RCX: 0000000000012d70
[  102.444958] RDX: 000000000000006a RSI: ffffea0001b76a70 RDI: ffffea0000c1a328
[  102.444958] RBP: ffff880037533c00 R08: ffff88007e21e500 R09: ffffffff8162bbe0
[  102.444958] R10: 000000037e439e00 R11: ffff88007e439e00 R12: ffffffff81239cf2
[  102.444958] R13: 000000000000006a R14: ffff88007efb3000 R15: ffff88007e21e500
[  102.444958] FS:  0000000000000000(0000) GS:ffff880001a00000(0000)
knlGS:0000000000000000
[  102.444958] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  102.444958] CR2: 00007fc8e25cd0f0 CR3: 000000007d088000 CR4: 00000000000006f0
[  102.444958] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  102.444958] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  102.444958] Process ksoftirqd/0 (pid: 4, threadinfo
ffff88007fb78000, task ffff88007fb61500)
[  102.444958] Stack:
[  102.444958]  ffff88007e439000 ffff88007e439000 0000000000000246
ffffffff81239cf2
[  102.444958] <0> ffff880001a12d80 ffffffff8124125d ffffe8ffff818190
ffffe8ffff815ee0
[  102.444958] <0> ffff88007e439000 ffffffffa025254b ffff880001a10010
ffff88007ef95200
[  102.444958] Call Trace:
[  102.444958]  <IRQ>
[  102.444958]  [<ffffffff81239cf2>] ? __kfree_skb+0x11/0x7d
[  102.444958]  [<ffffffff8124125d>] ? netif_rx+0xe2/0xee
[  102.444958]  [<ffffffffa025254b>] ? veth_xmit+0x6e/0xad [veth]
[  102.444958]  [<ffffffff8124243a>] ? dev_hard_start_xmit+0x221/0x2dc
[  102.444958]  [<ffffffff81255514>] ? sch_direct_xmit+0x5b/0x15d
[  102.444958]  [<ffffffff812556cf>] ? __qdisc_run+0xb9/0xda
[  102.444958]  [<ffffffff8123fa99>] ? net_tx_action+0xd6/0x149
[  102.444958]  [<ffffffff8104c918>] ? __do_softirq+0xdd/0x1a1
[  102.444958]  [<ffffffff810099dc>] ? call_softirq+0x1c/0x30
[  102.444958]  <EOI>
[  102.444958]  [<ffffffff8100b85f>] ? do_softirq+0x3f/0x79
[  102.444958]  [<ffffffff8104c504>] ? run_ksoftirqd+0x6a/0x118
[  102.444958]  [<ffffffff8104c49a>] ? run_ksoftirqd+0x0/0x118
[  102.444958]  [<ffffffff8105ee99>] ? kthread+0x79/0x81
[  102.444958]  [<ffffffff810098e4>] ? kernel_thread_helper+0x4/0x10
[  102.444958]  [<ffffffff8105ee20>] ? kthread+0x0/0x81
[  102.444958]  [<ffffffff810098e0>] ? kernel_thread_helper+0x0/0x10
[  102.444958] Code: 83 c3 08 48 83 3b 00 eb ec 48 83 fd 10 0f 86 89
00 00 00 48 89 ef e8 f0 e6 ff ff 48 89 c7 48 8b 00 84 c0 78 13 66 a9
00 c0 75 04 <0f> 0b eb fe 5b 5d 41 5c e9 45 5a fd ff 48 8b 4c 24 18 4c
8b 4f
[  102.444958] RIP  [<ffffffff810e1e2c>] kfree+0x55/0xcb
[  102.444958]  RSP <ffff880001a03df8>
[  102.484000] ---[ end trace b1157390d40df1cb ]---
[  102.485018] Kernel panic - not syncing: Fatal exception in interrupt
[  102.485647] Pid: 4, comm: ksoftirqd/0 Tainted: G      D    2.6.33-2-amd64 #1
[  102.486630] Call Trace:
[  102.487112]  <IRQ>  [<ffffffff812ec605>] ? panic+0x86/0x14b
[  102.487870]  [<ffffffff8104c799>] ? irq_exit+0x48/0x76
[  102.488474]  [<ffffffff812ee893>] ? ret_from_intr+0x0/0x11
[  102.489068]  [<ffffffff810478c0>] ? kmsg_dump+0xa6/0x13e
[  102.489661]  [<ffffffff8100c89a>] ? oops_end+0xa7/0xb4
[  102.490245]  [<ffffffff81239cf2>] ? __kfree_skb+0x11/0x7d
[  102.490836]  [<ffffffff8100a690>] ? do_invalid_op+0x8b/0x95
[  102.491436]  [<ffffffff810e1e2c>] ? kfree+0x55/0xcb
[  102.492020]  [<ffffffffa0244aee>] ?
br_nf_pre_routing_finish+0x284/0x2a6 [bridge]
[  102.492942]  [<ffffffffa024486a>] ?
br_nf_pre_routing_finish+0x0/0x2a6 [bridge]
[  102.493857]  [<ffffffff8125f204>] ? nf_hook_slow+0x62/0xc3
[  102.523082]  [<ffffffffa024486a>] ?
br_nf_pre_routing_finish+0x0/0x2a6 [bridge]
[  102.524008]  [<ffffffff8100975b>] ? invalid_op+0x1b/0x20
[  102.524631]  [<ffffffff81239cf2>] ? __kfree_skb+0x11/0x7d
[  102.525225]  [<ffffffff810e1e2c>] ? kfree+0x55/0xcb
[  102.525795]  [<ffffffff810e1e1c>] ? kfree+0x45/0xcb
[  102.526402]  [<ffffffff81239cf2>] ? __kfree_skb+0x11/0x7d
[  102.526992]  [<ffffffff8124125d>] ? netif_rx+0xe2/0xee
[  102.527576]  [<ffffffffa025254b>] ? veth_xmit+0x6e/0xad [veth]
[  102.528199]  [<ffffffff8124243a>] ? dev_hard_start_xmit+0x221/0x2dc
[  102.528821]  [<ffffffff81255514>] ? sch_direct_xmit+0x5b/0x15d
[  102.529429]  [<ffffffff812556cf>] ? __qdisc_run+0xb9/0xda
[  102.530018]  [<ffffffff8123fa99>] ? net_tx_action+0xd6/0x149
[  102.530618]  [<ffffffff8104c918>] ? __do_softirq+0xdd/0x1a1
[  102.531214]  [<ffffffff810099dc>] ? call_softirq+0x1c/0x30
[  102.531804]  <EOI>  [<ffffffff8100b85f>] ? do_softirq+0x3f/0x79
[  102.532572]  [<ffffffff8104c504>] ? run_ksoftirqd+0x6a/0x118
[  102.533169]  [<ffffffff8104c49a>] ? run_ksoftirqd+0x0/0x118
[  102.533764]  [<ffffffff8105ee99>] ? kthread+0x79/0x81
[  102.534340]  [<ffffffff810098e4>] ? kernel_thread_helper+0x4/0x10
[  102.534954]  [<ffffffff8105ee20>] ? kthread+0x0/0x81
[  102.535526]  [<ffffffff810098e0>] ? kernel_thread_helper+0x0/0x10

-- 
Martín Ferrari

^ permalink raw reply

* Re: virtio: put last_used and last_avail index into ring itself.
From: Rusty Russell @ 2010-05-06  0:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, virtualization, kvm, linux-kernel, mingo, linux-mm, akpm,
	hpa, gregory.haskins, s.hetze, Daniel Walker, Eric Dumazet
In-Reply-To: <20100504182236.GA14141@redhat.com>

On Wed, 5 May 2010 03:52:36 am Michael S. Tsirkin wrote:
> > virtio: put last_used and last_avail index into ring itself.
> > 
> > Generally, the other end of the virtio ring doesn't need to see where
> > you're up to in consuming the ring.  However, to completely understand
> > what's going on from the outside, this information must be exposed.
> > For example, if you want to save and restore a virtio_ring, but you're
> > not the consumer because the kernel is using it directly.
> > 
> > Fortunately, we have room to expand: the ring is always a whole number
> > of pages and there's hundreds of bytes of padding after the avail ring
> > and the used ring, whatever the number of descriptors (which must be a
> > power of 2).
> > 
> > We add a feature bit so the guest can tell the host that it's writing
> > out the current value there, if it wants to use that.
> > 
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> 
> I've been looking at this patch some more (more on why
> later), and I wonder: would it be better to add some
> alignment to the last used index address, so that
> if we later add more stuff at the tail, it all
> fits in a single cache line?

In theory, but not in practice.  We don't have many rings, so the
difference between 1 and 2 cache lines is not very much.

> We use a new feature bit anyway, so layout change should not be
> a problem.
> 
> Since I raised the question of caches: for used ring,
> the ring is not aligned to 64 bit, so on CPUs with 64 bit
> or larger cache lines, used entries will often cross
> cache line boundaries. Am I right and might it
> have been better to align ring entries to cache line boundaries?
> 
> What do you think?

I think everyone is settled on 128 byte cache lines for the forseeable
future, so it's not really an issue.

Cheers,
Rusty.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [net-next-2.6 PATCH 4/4] e1000e: Save irq into netdev structure
From: Jeff Kirsher @ 2010-05-06  0:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Tom Herbert, Bruce Allan, Jeff Kirsher
In-Reply-To: <20100506000221.8042.64794.stgit@localhost.localdomain>

From: Tom Herbert <therbert@google.com>

Set net->devirq to pdev->irq.  This should be consistent with other
drivers.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/e1000e/netdev.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index b767dbb..c5f65a2 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -5456,6 +5456,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
+	netdev->irq = pdev->irq;
+
 	pci_set_drvdata(pdev, netdev);
 	adapter = netdev_priv(netdev);
 	hw = &adapter->hw;


^ permalink raw reply related

* [net-next-2.6 PATCH 3/4] e1000e: Remove unnessary log message
From: Jeff Kirsher @ 2010-05-06  0:03 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Tom Herbert, Bruce Allan, Jeff Kirsher
In-Reply-To: <20100506000221.8042.64794.stgit@localhost.localdomain>

From: Tom Herbert <therbert@google.com>

Remove e_info message printed whenever TSO is enabled or disabled.
This is not very useful and just clutters dmesg.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/e1000e/ethtool.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index c81118a..6ff376c 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -412,7 +412,6 @@ static int e1000_set_tso(struct net_device *netdev, u32 data)
 		netdev->features &= ~NETIF_F_TSO6;
 	}
 
-	e_info("TSO is %s\n", data ? "Enabled" : "Disabled");
 	adapter->flags |= FLAG_TSO_FORCE;
 	return 0;
 }


^ permalink raw reply related

* [net-next-2.6 PATCH 2/4] e1000e: reduce writes of RX producer ptr
From: Jeff Kirsher @ 2010-05-06  0:02 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Tom Herbert, Bruce Allan, Jeff Kirsher
In-Reply-To: <20100506000221.8042.64794.stgit@localhost.localdomain>

From: Tom Herbert <therbert@google.com>

Reduce number of writes to RX producer pointer.   When alloc'ing RX
buffers, only write the RX producer pointer once every
E1000_RX_BUFFER_WRITE (16) buffers created.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/e1000e/netdev.c |   57 +++++++++++++++++--------------------------
 1 files changed, 23 insertions(+), 34 deletions(-)

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 1eb9b59..b767dbb 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -548,26 +548,23 @@ map_skb:
 		rx_desc = E1000_RX_DESC(*rx_ring, i);
 		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
 
+		if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) {
+			/*
+			 * Force memory writes to complete before letting h/w
+			 * know there are new descriptors to fetch.  (Only
+			 * applicable for weak-ordered memory model archs,
+			 * such as IA-64).
+			 */
+			wmb();
+			writel(i, adapter->hw.hw_addr + rx_ring->tail);
+		}
 		i++;
 		if (i == rx_ring->count)
 			i = 0;
 		buffer_info = &rx_ring->buffer_info[i];
 	}
 
-	if (rx_ring->next_to_use != i) {
-		rx_ring->next_to_use = i;
-		if (i-- == 0)
-			i = (rx_ring->count - 1);
-
-		/*
-		 * Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.  (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
-		writel(i, adapter->hw.hw_addr + rx_ring->tail);
-	}
+	rx_ring->next_to_use = i;
 }
 
 /**
@@ -649,6 +646,17 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 
 		rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
 
+		if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) {
+			/*
+			 * Force memory writes to complete before letting h/w
+			 * know there are new descriptors to fetch.  (Only
+			 * applicable for weak-ordered memory model archs,
+			 * such as IA-64).
+			 */
+			wmb();
+			writel(i<<1, adapter->hw.hw_addr + rx_ring->tail);
+		}
+
 		i++;
 		if (i == rx_ring->count)
 			i = 0;
@@ -656,26 +664,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 	}
 
 no_buffers:
-	if (rx_ring->next_to_use != i) {
-		rx_ring->next_to_use = i;
-
-		if (!(i--))
-			i = (rx_ring->count - 1);
-
-		/*
-		 * Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.  (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
-		/*
-		 * Hardware increments by 16 bytes, but packet split
-		 * descriptors are 32 bytes...so we increment tail
-		 * twice as much.
-		 */
-		writel(i<<1, adapter->hw.hw_addr + rx_ring->tail);
-	}
+	rx_ring->next_to_use = i;
 }
 
 /**


^ permalink raw reply related

* [net-next-2.6 PATCH 1/4] e1000e: save skb counts in TX to avoid cache misses
From: Jeff Kirsher @ 2010-05-06  0:02 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Tom Herbert, Bruce Allan, Jeff Kirsher

From: Tom Herbert <therbert@google.com>

In e1000_tx_map, precompute number of segements and bytecounts which
are derived from fields in skb; these are stored in buffer_info.  When
cleaning tx in e1000_clean_tx_irq use the values in the associated
buffer_info for statistics counting, this eliminates cache misses
on skb fields.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/e1000e/e1000.h  |    2 ++
 drivers/net/e1000e/netdev.c |   18 +++++++++---------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index 906c4da..c0b3db4 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -189,6 +189,8 @@ struct e1000_buffer {
 			unsigned long time_stamp;
 			u16 length;
 			u16 next_to_watch;
+			unsigned int segs;
+			unsigned int bytecount;
 			u16 mapped_as_page;
 		};
 		/* Rx */
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 0a16465..1eb9b59 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -1001,14 +1001,8 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
 			cleaned = (i == eop);
 
 			if (cleaned) {
-				struct sk_buff *skb = buffer_info->skb;
-				unsigned int segs, bytecount;
-				segs = skb_shinfo(skb)->gso_segs ?: 1;
-				/* multiply data chunks by size of headers */
-				bytecount = ((segs - 1) * skb_headlen(skb)) +
-					    skb->len;
-				total_tx_packets += segs;
-				total_tx_bytes += bytecount;
+				total_tx_packets += buffer_info->segs;
+				total_tx_bytes += buffer_info->bytecount;
 			}
 
 			e1000_put_txbuf(adapter, buffer_info);
@@ -4277,7 +4271,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 	struct e1000_buffer *buffer_info;
 	unsigned int len = skb_headlen(skb);
 	unsigned int offset = 0, size, count = 0, i;
-	unsigned int f;
+	unsigned int f, bytecount, segs;
 
 	i = tx_ring->next_to_use;
 
@@ -4337,7 +4331,13 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 		}
 	}
 
+	segs = skb_shinfo(skb)->gso_segs ?: 1;
+	/* multiply data chunks by size of headers */
+	bytecount = ((segs - 1) * skb_headlen(skb)) + skb->len;
+
 	tx_ring->buffer_info[i].skb = skb;
+	tx_ring->buffer_info[i].segs = segs;
+	tx_ring->buffer_info[i].bytecount = bytecount;
 	tx_ring->buffer_info[first].next_to_watch = i;
 
 	return count;


^ permalink raw reply related

* Re: [PATCH]PM QOS refresh against next-20100430
From: Rafael J. Wysocki @ 2010-05-05 23:59 UTC (permalink / raw)
  To: mgross
  Cc: Kevin Hilman, aili, dwalker, tiwai, bruce.w.allan, davidb, mcgrof,
	pavel, linux-pm, lkml, NetDev, Johannes Berg,
	ACPI Devel Maling List, Len Brown, John W. Linville
In-Reply-To: <20100504143043.GA27927@linux.intel.com>

On Tuesday 04 May 2010, mark gross wrote:
> On Sat, May 01, 2010 at 01:08:28AM +0200, Rafael J. Wysocki wrote:
> > On Saturday 01 May 2010, mark gross wrote:
> > > On Sat, May 01, 2010 at 12:13:16AM +0200, Rafael J. Wysocki wrote:
> > > > On Friday 30 April 2010, mark gross wrote:
> > > > > The following is a refresh of the PM_QOS implementation, this patch
> > > > > updates some documentation input I got from Randy.
> > > > > 
> > > > > This patch changes the string based list management to a handle base
> > > > > implementation to help with the hot path use of pm-qos, it also renames
> > > > > much of the API to use "request" as opposed to "requirement" that was
> > > > > used in the initial implementation.  I did this because request more
> > > > > accurately represents what it actually does.
> > > > > 
> > > > > Also, I added a string based ABI for users wanting to use a string
> > > > > interface.  So if the user writes 0xDDDDDDDD formatted hex it will be
> > > > > accepted by the interface.  (someone asked me for it and I don't think
> > > > > it hurts anything.)
> > > > > 
> > > > > I really would like to get this refresh taken care of.  Its been taking
> > > > > me too long to close this.  please review or include it in next.
> > > > > 
> > > > > Thanks!
> > > > 
> > > > Well, I'd take it to suspend-2.6/linux-next, but first, it touches
> > > > subsystems whose maintainers were not in the Cc list, like the network
> > > > drivers, wireless and ACPI.  The changes are trivial, so I hope they don't
> > > > mind.
> > > > 
> > > > Second, my tree is based on the Linus' tree rather than linux-next and
> > > > the change in net/mac80211/scan.c doesn't seem to match that.  Please tell me
> > > > what I'm supposed to do about that.
> > > 
> > > You can waite for monday and I'll send a rebased version to linus' tree.
> > > 
> > > I thought linux-next was where folks wanted me to put it.
> > > 
> > > I'll email out a new one monday.
> > 
> > Great, thanks!
> > 
> > Rafael
> 
> Sorry I'm late, 
> 
> 
> Signed-off-by: markgross <mgross@linux.intel.com>

Applied to suspend-2.6/linux-next.  Please verify the changelog.

Rafael

^ permalink raw reply

* [PATCH 4/6] netdev: octeon_mgmt: Free TX skbufs in a timely manner.
From: David Daney @ 2010-05-05 23:03 UTC (permalink / raw)
  To: netdev; +Cc: linux-mips, David Daney
In-Reply-To: <1273100593-11043-1-git-send-email-ddaney@caviumnetworks.com>

We also reduce the high water mark to 1 so skbufs are not stranded for
long periods of time.  Since we are cleaning after each packet, no
need to do it in the transmit path.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 drivers/net/octeon/octeon_mgmt.c |    5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/octeon/octeon_mgmt.c b/drivers/net/octeon/octeon_mgmt.c
index 633fa89..3cf6f62 100644
--- a/drivers/net/octeon/octeon_mgmt.c
+++ b/drivers/net/octeon/octeon_mgmt.c
@@ -832,9 +832,9 @@ static int octeon_mgmt_open(struct net_device *netdev)
 	mix_irhwm.s.irhwm = 0;
 	cvmx_write_csr(CVMX_MIXX_IRHWM(port), mix_irhwm.u64);
 
-	/* Interrupt when we have 5 or more packets to clean.  */
+	/* Interrupt when we have 1 or more packets to clean.  */
 	mix_orhwm.u64 = 0;
-	mix_orhwm.s.orhwm = 5;
+	mix_orhwm.s.orhwm = 1;
 	cvmx_write_csr(CVMX_MIXX_ORHWM(port), mix_orhwm.u64);
 
 	/* Enable receive and transmit interrupts */
@@ -995,7 +995,6 @@ static int octeon_mgmt_xmit(struct sk_buff *skb, struct net_device *netdev)
 	cvmx_write_csr(CVMX_MIXX_ORING2(port), 1);
 
 	netdev->trans_start = jiffies;
-	octeon_mgmt_clean_tx_buffers(p);
 	octeon_mgmt_update_tx_stats(netdev);
 	return NETDEV_TX_OK;
 }
-- 
1.6.6.1


^ permalink raw reply related

* [PATCH 6/6] netdev: octeon_mgmt: Remove some gratuitous blank lines.
From: David Daney @ 2010-05-05 23:03 UTC (permalink / raw)
  To: netdev; +Cc: linux-mips, David Daney
In-Reply-To: <1273100593-11043-1-git-send-email-ddaney@caviumnetworks.com>

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 drivers/net/octeon/octeon_mgmt.c |    7 -------
 1 files changed, 0 insertions(+), 7 deletions(-)

diff --git a/drivers/net/octeon/octeon_mgmt.c b/drivers/net/octeon/octeon_mgmt.c
index 1fdc7b3..3924703 100644
--- a/drivers/net/octeon/octeon_mgmt.c
+++ b/drivers/net/octeon/octeon_mgmt.c
@@ -380,7 +380,6 @@ done:
 	mix_ircnt.s.ircnt = 1;
 	cvmx_write_csr(CVMX_MIXX_IRCNT(port), mix_ircnt.u64);
 	return rc;
-
 }
 
 static int octeon_mgmt_receive_packets(struct octeon_mgmt *p, int budget)
@@ -390,7 +389,6 @@ static int octeon_mgmt_receive_packets(struct octeon_mgmt *p, int budget)
 	union cvmx_mixx_ircnt mix_ircnt;
 	int rc;
 
-
 	mix_ircnt.u64 = cvmx_read_csr(CVMX_MIXX_IRCNT(port));
 	while (work_done < budget && mix_ircnt.s.ircnt) {
 
@@ -516,7 +514,6 @@ static void octeon_mgmt_set_rx_filtering(struct net_device *netdev)
 			octeon_mgmt_cam_state_add(&cam_state, ha->addr);
 	}
 
-
 	spin_lock_irqsave(&p->lock, flags);
 
 	/* Disable packet I/O. */
@@ -525,7 +522,6 @@ static void octeon_mgmt_set_rx_filtering(struct net_device *netdev)
 	agl_gmx_prtx.s.en = 0;
 	cvmx_write_csr(CVMX_AGL_GMX_PRTX_CFG(port), agl_gmx_prtx.u64);
 
-
 	adr_ctl.u64 = 0;
 	adr_ctl.s.cam_mode = cam_mode;
 	adr_ctl.s.mcst = multicast_mode;
@@ -928,7 +924,6 @@ static int octeon_mgmt_stop(struct net_device *netdev)
 
 	octeon_mgmt_reset_hw(p);
 
-
 	free_irq(p->irq, netdev);
 
 	/* dma_unmap is a nop on Octeon, so just free everything.  */
@@ -945,7 +940,6 @@ static int octeon_mgmt_stop(struct net_device *netdev)
 			 DMA_BIDIRECTIONAL);
 	kfree(p->tx_ring);
 
-
 	return 0;
 }
 
@@ -1112,7 +1106,6 @@ static int __init octeon_mgmt_probe(struct platform_device *pdev)
 	netdev->netdev_ops = &octeon_mgmt_ops;
 	netdev->ethtool_ops = &octeon_mgmt_ethtool_ops;
 
-
 	/* The mgmt ports get the first N MACs.  */
 	for (i = 0; i < 6; i++)
 		netdev->dev_addr[i] = octeon_bootinfo->mac_addr_base[i];
-- 
1.6.6.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox