Netdev List
 help / color / mirror / Atom feed
* [PATCH 3/5] bonding: Use a per netns implementation of /sys/class/net/bonding_masters.
From: Eric W. Biederman @ 2011-10-13  7:56 UTC (permalink / raw)
  To: Greg Kroah-Hartman, David Miller
  Cc: linux-kernel, netdev, Tejun Heo, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <m1sjmx9vhf.fsf_-_@fess.ebiederm.org>


This fixes a network namespace misfeature that bonding_masters looked at
current instead of the remembering the context where in which
/sys/class/net/bonding_masters was opened in to see which network
namespace to act upon.

This removes the need for sysfs to handle tagged directories with
untagged members allowing for a conceptually simpler sysfs
implementation.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 drivers/net/bonding/bond_main.c  |    7 +----
 drivers/net/bonding/bond_sysfs.c |   45 ++++++++++++++++++++++++++-----------
 drivers/net/bonding/bonding.h    |    7 ++++-
 3 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6d79b78..e084b1c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4898,6 +4898,7 @@ static int __net_init bond_net_init(struct net *net)
 	INIT_LIST_HEAD(&bn->dev_list);
 
 	bond_create_proc_dir(bn);
+	bond_create_sysfs(bn);
 	
 	return 0;
 }
@@ -4906,6 +4907,7 @@ static void __net_exit bond_net_exit(struct net *net)
 {
 	struct bond_net *bn = net_generic(net, bond_net_id);
 
+	bond_destroy_sysfs(bn);
 	bond_destroy_proc_dir(bn);
 }
 
@@ -4943,10 +4945,6 @@ static int __init bonding_init(void)
 			goto err;
 	}
 
-	res = bond_create_sysfs();
-	if (res)
-		goto err;
-
 	register_netdevice_notifier(&bond_netdev_notifier);
 	register_inetaddr_notifier(&bond_inetaddr_notifier);
 out:
@@ -4964,7 +4962,6 @@ static void __exit bonding_exit(void)
 	unregister_netdevice_notifier(&bond_netdev_notifier);
 	unregister_inetaddr_notifier(&bond_inetaddr_notifier);
 
-	bond_destroy_sysfs();
 	bond_destroy_debugfs();
 
 	rtnl_link_unregister(&bond_link_ops);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 2dfb4bf..6044ff8 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -55,8 +55,8 @@ static ssize_t bonding_show_bonds(struct class *cls,
 				  struct class_attribute *attr,
 				  char *buf)
 {
-	struct net *net = current->nsproxy->net_ns;
-	struct bond_net *bn = net_generic(net, bond_net_id);
+	struct bond_net *bn =
+		container_of(attr, struct bond_net, class_attr_bonding_masters);
 	int res = 0;
 	struct bonding *bond;
 
@@ -79,9 +79,8 @@ static ssize_t bonding_show_bonds(struct class *cls,
 	return res;
 }
 
-static struct net_device *bond_get_by_name(struct net *net, const char *ifname)
+static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifname)
 {
-	struct bond_net *bn = net_generic(net, bond_net_id);
 	struct bonding *bond;
 
 	list_for_each_entry(bond, &bn->dev_list, bond_list) {
@@ -103,7 +102,8 @@ static ssize_t bonding_store_bonds(struct class *cls,
 				   struct class_attribute *attr,
 				   const char *buffer, size_t count)
 {
-	struct net *net = current->nsproxy->net_ns;
+	struct bond_net *bn =
+		container_of(attr, struct bond_net, class_attr_bonding_masters);
 	char command[IFNAMSIZ + 1] = {0, };
 	char *ifname;
 	int rv, res = count;
@@ -116,7 +116,7 @@ static ssize_t bonding_store_bonds(struct class *cls,
 
 	if (command[0] == '+') {
 		pr_info("%s is being created...\n", ifname);
-		rv = bond_create(net, ifname);
+		rv = bond_create(bn->net, ifname);
 		if (rv) {
 			if (rv == -EEXIST)
 				pr_info("%s already exists.\n", ifname);
@@ -128,7 +128,7 @@ static ssize_t bonding_store_bonds(struct class *cls,
 		struct net_device *bond_dev;
 
 		rtnl_lock();
-		bond_dev = bond_get_by_name(net, ifname);
+		bond_dev = bond_get_by_name(bn, ifname);
 		if (bond_dev) {
 			pr_info("%s is being deleted...\n", ifname);
 			unregister_netdevice(bond_dev);
@@ -150,9 +150,24 @@ err_no_cmd:
 	return -EPERM;
 }
 
+static const void *bonding_namespace(struct class *cls,
+				     const struct class_attribute *attr)
+{
+	const struct bond_net *bn =
+		container_of(attr, struct bond_net, class_attr_bonding_masters);
+	return bn->net;
+}
+
 /* class attribute for bond_masters file.  This ends up in /sys/class/net */
-static CLASS_ATTR(bonding_masters,  S_IWUSR | S_IRUGO,
-		  bonding_show_bonds, bonding_store_bonds);
+static const struct class_attribute class_attr_bonding_masters = {
+	.attr = {
+		.name = "bonding_masters",
+		.mode = S_IWUSR | S_IRUGO,
+	},
+	.show = bonding_show_bonds,
+	.store = bonding_store_bonds,
+	.namespace = bonding_namespace,
+};
 
 int bond_create_slave_symlinks(struct net_device *master,
 			       struct net_device *slave)
@@ -1655,11 +1670,13 @@ static struct attribute_group bonding_group = {
  * Initialize sysfs.  This sets up the bonding_masters file in
  * /sys/class/net.
  */
-int bond_create_sysfs(void)
+int bond_create_sysfs(struct bond_net *bn)
 {
 	int ret;
 
-	ret = netdev_class_create_file(&class_attr_bonding_masters);
+	bn->class_attr_bonding_masters = class_attr_bonding_masters;
+
+	ret = netdev_class_create_file(&bn->class_attr_bonding_masters);
 	/*
 	 * Permit multiple loads of the module by ignoring failures to
 	 * create the bonding_masters sysfs file.  Bonding devices
@@ -1673,7 +1690,7 @@ int bond_create_sysfs(void)
 	 */
 	if (ret == -EEXIST) {
 		/* Is someone being kinky and naming a device bonding_master? */
-		if (__dev_get_by_name(&init_net,
+		if (__dev_get_by_name(bn->net,
 				      class_attr_bonding_masters.attr.name))
 			pr_err("network device named %s already exists in sysfs",
 			       class_attr_bonding_masters.attr.name);
@@ -1687,9 +1704,9 @@ int bond_create_sysfs(void)
 /*
  * Remove /sys/class/net/bonding_masters.
  */
-void bond_destroy_sysfs(void)
+void bond_destroy_sysfs(struct bond_net *bn)
 {
-	netdev_class_remove_file(&class_attr_bonding_masters);
+	netdev_class_remove_file(&bn->class_attr_bonding_masters);
 }
 
 /*
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 43526a2..1007f9a 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -380,11 +380,13 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
 	return slave->inactive;
 }
 
+struct bond_net;
+
 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
 int bond_create(struct net *net, const char *name);
-int bond_create_sysfs(void);
-void bond_destroy_sysfs(void);
+int bond_create_sysfs(struct bond_net *net);
+void bond_destroy_sysfs(struct bond_net *net);
 void bond_prepare_sysfs_group(struct bonding *bond);
 int bond_create_slave_symlinks(struct net_device *master, struct net_device *slave);
 void bond_destroy_slave_symlinks(struct net_device *master, struct net_device *slave);
@@ -410,6 +412,7 @@ struct bond_net {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *	proc_dir;
 #endif
+	struct class_attribute	class_attr_bonding_masters;
 };
 
 #ifdef CONFIG_PROC_FS
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH 2/5] class: Implement support for class attrs in tagged sysfs directories.
From: Eric W. Biederman @ 2011-10-13  7:55 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: David Miller, linux-kernel, netdev, Tejun Heo, Jay Vosburgh,
	Andy Gospodarek
In-Reply-To: <m1wrc99vjx.fsf@fess.ebiederm.org>


Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 drivers/base/class.c   |   17 +++++++++++++++--
 include/linux/device.h |    2 ++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 4f1df2e..b80d91c 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -47,6 +47,18 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
+static const void *class_attr_namespace(struct kobject *kobj,
+					const struct attribute *attr)
+{
+	struct class_attribute *class_attr = to_class_attr(attr);
+	struct subsys_private *cp = to_subsys_private(kobj);
+	const void *ns = NULL;
+
+	if (class_attr->namespace)
+		ns = class_attr->namespace(cp->class, class_attr);
+	return ns;
+}
+
 static void class_release(struct kobject *kobj)
 {
 	struct subsys_private *cp = to_subsys_private(kobj);
@@ -72,8 +84,9 @@ static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject
 }
 
 static const struct sysfs_ops class_sysfs_ops = {
-	.show	= class_attr_show,
-	.store	= class_attr_store,
+	.show	   = class_attr_show,
+	.store	   = class_attr_store,
+	.namespace = class_attr_namespace,
 };
 
 static struct kobj_type class_ktype = {
diff --git a/include/linux/device.h b/include/linux/device.h
index c20dfbf..ea70bb2 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -350,6 +350,8 @@ struct class_attribute {
 			char *buf);
 	ssize_t (*store)(struct class *class, struct class_attribute *attr,
 			const char *buf, size_t count);
+	const void *(*namespace)(struct class *class,
+				 const struct class_attribute *attr);
 };
 
 #define CLASS_ATTR(_name, _mode, _show, _store)			\
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH 1/5] sysfs: Implement support for tagged files in sysfs.
From: Eric W. Biederman @ 2011-10-13  7:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: David Miller, linux-kernel, netdev, Tejun Heo, Jay Vosburgh,
	Andy Gospodarek
In-Reply-To: <m1hb3dbae5.fsf@fess.ebiederm.org>


Looking up files in sysfs is hard to understand and analyize because we
currently allow placing untagged files in tagged directories.  In the
implementation of that we have two subtly different meanings of NULL.
NULL meaning there is no tag on a directory entry and NULL meaning
we don't care which namespace the lookup is performed for.  This
multiple uses of NULL have resulted in subtle bugs (since fixed) 
in the code.

Currently it is only the bonding driver that needs to have an untagged
file in a tagged directory.

To untagle this mess I am adding support for tagged files to sysfs.
Modifying the bonding driver to implement bonding_masters as a tagged
file.  Registering bonding_masters once for each network namespace.
Then I am removing support for untagged entries in tagged sysfs
directories.

Resulting in code that is much easier to reason about.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/sysfs/file.c       |   53 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/sysfs.h |    1 +
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1ad8c93..07c1b4e 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -488,17 +488,56 @@ const struct file_operations sysfs_file_operations = {
 	.poll		= sysfs_poll,
 };
 
+int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
+		  const void **pns)
+{
+	struct sysfs_dirent *dir_sd = kobj->sd;
+	const struct sysfs_ops *ops;
+	const void *ns = NULL;
+	int err;
+
+	err = 0;
+	if (!sysfs_ns_type(dir_sd))
+		goto out;
+
+	err = -EINVAL;
+	if (!kobj->ktype)
+		goto out;
+	ops = kobj->ktype->sysfs_ops;
+	if (!ops)
+		goto out;
+	if (!ops->namespace)
+		goto out;
+
+	err = 0;
+	ns = ops->namespace(kobj, attr);
+out:
+	if (err) {
+		WARN(1, KERN_ERR "missing sysfs namespace attribute operation for "
+		     "kobject: %s\n", kobject_name(kobj));
+	}
+	*pns = ns;
+	return err;
+}
+
 int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
 			const struct attribute *attr, int type, mode_t amode)
 {
 	umode_t mode = (amode & S_IALLUGO) | S_IFREG;
 	struct sysfs_addrm_cxt acxt;
 	struct sysfs_dirent *sd;
+	const void *ns;
 	int rc;
 
+	rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns);
+	if (rc)
+		return rc;
+
 	sd = sysfs_new_dirent(attr->name, mode, type);
 	if (!sd)
 		return -ENOMEM;
+
+	sd->s_ns = ns;
 	sd->s_attr.attr = (void *)attr;
 	sysfs_dirent_init_lockdep(sd);
 
@@ -586,12 +625,17 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 {
 	struct sysfs_dirent *sd;
 	struct iattr newattrs;
+	const void *ns;
 	int rc;
 
+	rc = sysfs_attr_ns(kobj, attr, &ns);
+	if (rc)
+		return rc;
+
 	mutex_lock(&sysfs_mutex);
 
 	rc = -ENOENT;
-	sd = sysfs_find_dirent(kobj->sd, NULL, attr->name);
+	sd = sysfs_find_dirent(kobj->sd, ns, attr->name);
 	if (!sd)
 		goto out;
 
@@ -616,7 +660,12 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
-	sysfs_hash_and_remove(kobj->sd, NULL, attr->name);
+	const void *ns;
+
+	if (sysfs_attr_ns(kobj, attr, &ns))
+		return;
+
+	sysfs_hash_and_remove(kobj->sd, ns, attr->name);
 }
 
 void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr)
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d7d2f21..dac0859 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -112,6 +112,7 @@ struct bin_attribute {
 struct sysfs_ops {
 	ssize_t	(*show)(struct kobject *, struct attribute *,char *);
 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
+	const void *(*namespace)(struct kobject *, const struct attribute *);
 };
 
 struct sysfs_dirent;
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH 0/5] Better namespace handling for /sys/class/net/bonding_masters
From: Eric W. Biederman @ 2011-10-13  7:47 UTC (permalink / raw)
  To: Greg Kroah-Hartman, David Miller
  Cc: linux-kernel, netdev, Tejun Heo, Jay Vosburgh, Andy Gospodarek


When I was looking at another sysfs issue that Al pointed out (since
fixed) I realized that I had implemented a trivial code size but overly
clever way to handle /sys/class/net/bonding_masters.

This patchset removes the support for untagged entries in tagged
directories (that is currently used to support bonding_masters)
and replaces it with support for tagged sysfs attributes.

In the process this fixes a small misfeature in how bonding_masters
derives the network namespace we are dealing with.  This change
allows bonding_masters to derive the network namespace from the
copy of bonding_masters we open instead of magically from current.

The final patch of this patchset adds sanity checks to sysfs.  To
ensure that we don't accidentally mishandle tagged sysfs entities.

I have tested this code against 3.1-rc9 on my laptop with a mostly yes
config and I am not seeing any problems.  The loud screaming warnings I
have added in the last patch should catch any corner cases in how people
use sysfs that I might have overlooked.

Greg, Dave I'm don't know whose tree to merge this through as this code
is equally device-core and networking.  I am hoping that we can get this
improvement merged for 3.2.

Farther out the simplifications introduced in this patchset make it much
easier to implement sysfs directories that can scale when there are
enormous numbers of entries in them.

Eric W. Biederman (5):
      sysfs: Implement support for tagged files in sysfs.
      class: Implement support for class attrs in tagged sysfs directories.
      bonding: Use a per netns implementation of /sys/class/net/bonding_masters.
      sysfs: Remove support for tagged directories with untagged members.
      sysfs: Reject with a warning invalid uses of tagged directories.

---
 drivers/base/class.c             |   17 ++++++++++-
 drivers/net/bonding/bond_main.c  |    7 +---
 drivers/net/bonding/bond_sysfs.c |   45 +++++++++++++++++++++---------
 drivers/net/bonding/bonding.h    |    7 +++-
 fs/sysfs/dir.c                   |   20 +++++++++++--
 fs/sysfs/file.c                  |   56 ++++++++++++++++++++++++++++++++++---
 fs/sysfs/inode.c                 |    2 -
 include/linux/device.h           |    2 +
 include/linux/sysfs.h            |    1 +
 9 files changed, 124 insertions(+), 33 deletions(-)

Eric

^ permalink raw reply

* Re: [PATCH v6 1/8] Basic kernel memory functionality for the Memory Controller
From: Greg Thelen @ 2011-10-13  7:18 UTC (permalink / raw)
  To: Glauber Costa
  Cc: linux-kernel, paul, lizf, kamezawa.hiroyu, ebiederm, davem,
	netdev, linux-mm, kirill, avagin, devel
In-Reply-To: <1318242268-2234-2-git-send-email-glommer@parallels.com>

On Mon, Oct 10, 2011 at 3:24 AM, Glauber Costa <glommer@parallels.com> wrote:
> diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
> index 06eb6d9..bf00cd2 100644
> --- a/Documentation/cgroups/memory.txt
> +++ b/Documentation/cgroups/memory.txt
...
> @@ -255,6 +262,31 @@ When oom event notifier is registered, event will be delivered.
>   per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
>   zone->lru_lock, it has no lock of its own.
>
> +2.7 Kernel Memory Extension (CONFIG_CGROUP_MEM_RES_CTLR_KMEM)
> +
> + With the Kernel memory extension, the Memory Controller is able to limit

Extra leading space before 'With'.

> +the amount of kernel memory used by the system. Kernel memory is fundamentally
> +different than user memory, since it can't be swapped out, which makes it
> +possible to DoS the system by consuming too much of this precious resource.
> +Kernel memory limits are not imposed for the root cgroup.
> +
> +Memory limits as specified by the standard Memory Controller may or may not
> +take kernel memory into consideration. This is achieved through the file
> +memory.independent_kmem_limit. A Value different than 0 will allow for kernel

s/Value/value/

> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 3508777..d25c5cb 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
...
> +static int kmem_limit_independent_write(struct cgroup *cont, struct cftype *cft,
> +                                       u64 val)
> +{
> +       cgroup_lock();
> +       mem_cgroup_from_cont(cont)->kmem_independent_accounting = !!val;
> +       cgroup_unlock();

I do not think cgroup_lock,unlock are needed here.  The cont and
associated cgroup should be guaranteed by the caller to be valid.
Does this lock provide some other synchronization?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [net-next 10/11] igb: Loopback functionality supports for i350 devices
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Akeem G. Abodunrin, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: "Akeem G. Abodunrin" <akeem.g.abodunrin@intel.com>

This patch adds VMDq loopback pf support for i350 devices. The patch
is necessary since the register that enabled loopback was moved and
renamed from DTXSWC to TXSWC.

Signed-off-by: "Akeem G. Abodunrin" <akeem.g.abodunrin@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_82575.c |   29 ++++++++++++++++++++-----
 drivers/net/ethernet/intel/igb/e1000_regs.h  |    1 +
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 3771bd2..6580cea 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -1580,14 +1580,31 @@ void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf)
  **/
 void igb_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable)
 {
-	u32 dtxswc = rd32(E1000_DTXSWC);
+	u32 dtxswc;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		dtxswc = rd32(E1000_DTXSWC);
+		if (enable)
+			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		else
+			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		wr32(E1000_DTXSWC, dtxswc);
+		break;
+	case e1000_i350:
+		dtxswc = rd32(E1000_TXSWC);
+		if (enable)
+			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		else
+			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		wr32(E1000_TXSWC, dtxswc);
+		break;
+	default:
+		/* Currently no other hardware supports loopback */
+		break;
+	}
 
-	if (enable)
-		dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
-	else
-		dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
 
-	wr32(E1000_DTXSWC, dtxswc);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 0990f6d..0a860bc 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -318,6 +318,7 @@
 #define E1000_RPLOLR    0x05AF0 /* Replication Offload - RW */
 #define E1000_UTA       0x0A000 /* Unicast Table Array - RW */
 #define E1000_IOVTCL    0x05BBC /* IOV Control Register */
+#define E1000_TXSWC     0x05ACC /* Tx Switch Control */
 /* These act per VF so an array friendly macro is used */
 #define E1000_P2VMAILBOX(_n)   (0x00C00 + (4 * (_n)))
 #define E1000_VMBMEM(_n)       (0x00800 + (64 * (_n)))
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 11/11] igb: Version bump.
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Carolyn Wyborny, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Carolyn Wyborny <carolyn.wyborny@intel.com>

This change updates the driver version to 3.2.10.

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 582432f..8227824 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -57,8 +57,8 @@
 #include "igb.h"
 
 #define MAJ 3
-#define MIN 0
-#define BUILD 6
+#define MIN 2
+#define BUILD 10
 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
 __stringify(BUILD) "-k"
 char igb_driver_name[] = "igb";
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 08/11] igb: Add workaround for byte swapped VLAN on i350 local traffic
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

On i350 when traffic is looped back from a VF to the PF the value is byte
swapped from the normal format.  In order to address this we need to add a
flag indicating that the ring will need to byte swap the loopback packets
prior to processing them.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_defines.h |    1 +
 drivers/net/ethernet/intel/igb/igb.h           |    1 +
 drivers/net/ethernet/intel/igb/igb_main.c      |   29 +++++++++++++++++++-----
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 68558be..f5fc572 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -85,6 +85,7 @@
 #define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
 #define E1000_RXD_STAT_TS       0x10000 /* Pkt was time stamped */
 
+#define E1000_RXDEXT_STATERR_LB    0x00040000
 #define E1000_RXDEXT_STATERR_CE    0x01000000
 #define E1000_RXDEXT_STATERR_SE    0x02000000
 #define E1000_RXDEXT_STATERR_SEQ   0x04000000
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 4e665a9..4c500a7 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -245,6 +245,7 @@ struct igb_ring {
 
 enum e1000_ring_flags_t {
 	IGB_RING_FLAG_RX_SCTP_CSUM,
+	IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
 	IGB_RING_FLAG_TX_CTX_IDX,
 	IGB_RING_FLAG_TX_DETECT_HANG
 };
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 9e79306..582432f 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -735,6 +735,11 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 		/* set flag indicating ring supports SCTP checksum offload */
 		if (adapter->hw.mac.type >= e1000_82576)
 			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
+
+		/* On i350, loopback VLAN packets have the tag byte-swapped. */
+		if (adapter->hw.mac.type == e1000_i350)
+			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
+
 		adapter->rx_ring[i] = ring;
 	}
 	/* Restore the adapter's original node */
@@ -5864,6 +5869,23 @@ static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
 
 	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
 }
+
+static void igb_rx_vlan(struct igb_ring *ring,
+			union e1000_adv_rx_desc *rx_desc,
+			struct sk_buff *skb)
+{
+	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
+		u16 vid;
+		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
+		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
+			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
+		else
+			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+
+		__vlan_hwaccel_put_tag(skb, vid);
+	}
+}
+
 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
 {
 	/* HW will not DMA in data larger than the given buffer, even if it
@@ -5960,12 +5982,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
 		igb_rx_hwtstamp(q_vector, rx_desc, skb);
 		igb_rx_hash(rx_ring, rx_desc, skb);
 		igb_rx_checksum(rx_ring, rx_desc, skb);
-
-		if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
-			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
-
-			__vlan_hwaccel_put_tag(skb, vid);
-		}
+		igb_rx_vlan(rx_ring, rx_desc, skb);
 
 		total_bytes += skb->len;
 		total_packets++;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 09/11] igb: fix static function warnings reported by sparse
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

igb_update/validate_nvm_checksum_with_offset() should be static.
Also removes unneeded prototypes for the above functions.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_82575.c |    9 +++------
 1 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index c0857bd..3771bd2 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -66,10 +66,6 @@ static s32  igb_set_pcie_completion_timeout(struct e1000_hw *hw);
 static s32  igb_reset_mdicnfg_82580(struct e1000_hw *hw);
 static s32  igb_validate_nvm_checksum_82580(struct e1000_hw *hw);
 static s32  igb_update_nvm_checksum_82580(struct e1000_hw *hw);
-static s32  igb_update_nvm_checksum_with_offset(struct e1000_hw *hw,
-						u16 offset);
-static s32 igb_validate_nvm_checksum_with_offset(struct e1000_hw *hw,
-						u16 offset);
 static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw);
 static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw);
 static const u16 e1000_82580_rxpbs_table[] =
@@ -1820,7 +1816,8 @@ u16 igb_rxpbs_adjust_82580(u32 data)
  *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
  *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
  **/
-s32 igb_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
+static s32 igb_validate_nvm_checksum_with_offset(struct e1000_hw *hw,
+						 u16 offset)
 {
 	s32 ret_val = 0;
 	u16 checksum = 0;
@@ -1855,7 +1852,7 @@ out:
  *  up to the checksum.  Then calculates the EEPROM checksum and writes the
  *  value to the EEPROM.
  **/
-s32 igb_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
+static s32 igb_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
 {
 	s32 ret_val;
 	u16 checksum = 0;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 07/11] igb: Drop unnecessary write of E1000_IMS from igb_msix_other
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Since we mask interrupts in EIMS not in IMS there is no need to re-enable
mask bits in that register.  As such we can remove the write to IMS from
the end of igb_msix_other.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |    6 ------
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index fee771b..9e79306 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -4766,12 +4766,6 @@ static irqreturn_t igb_msix_other(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (adapter->vfs_allocated_count)
-		wr32(E1000_IMS, E1000_IMS_LSC |
-				E1000_IMS_VMMB |
-				E1000_IMS_DOUTSYNC);
-	else
-		wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
 	wr32(E1000_EIMS, adapter->eims_other);
 
 	return IRQ_HANDLED;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 06/11] igb: Fix features that are currently 82580 only and should also be i350
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change allows support for per packet timesync and global device reset
on the i350 adapter.  These features were supported on both 82580 and i350
however it looks like several checks where not updated and as such the i350
support was not enabled.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |   15 ++++++---------
 1 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index efc367b..fee771b 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -563,7 +563,7 @@ static cycle_t igb_read_clock(const struct cyclecounter *tc)
 	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
 	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
 	 */
-	if (hw->mac.type == e1000_82580) {
+	if (hw->mac.type >= e1000_82580) {
 		stamp = rd32(E1000_SYSTIMR) >> 8;
 		shift = IGB_82580_TSYNC_SHIFT;
 	}
@@ -1367,7 +1367,7 @@ static void igb_irq_enable(struct igb_adapter *adapter)
 	struct e1000_hw *hw = &adapter->hw;
 
 	if (adapter->msix_entries) {
-		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
+		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
 		u32 regval = rd32(E1000_EIAC);
 		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
 		regval = rd32(E1000_EIAM);
@@ -1377,9 +1377,6 @@ static void igb_irq_enable(struct igb_adapter *adapter)
 			wr32(E1000_MBVFIMR, 0xFF);
 			ims |= E1000_IMS_VMMB;
 		}
-		if (adapter->hw.mac.type == e1000_82580)
-			ims |= E1000_IMS_DRSTA;
-
 		wr32(E1000_IMS, ims);
 	} else {
 		wr32(E1000_IMS, IMS_ENABLE_MASK |
@@ -3112,7 +3109,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
 	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 #endif
 	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
-	if (hw->mac.type == e1000_82580)
+	if (hw->mac.type >= e1000_82580)
 		srrctl |= E1000_SRRCTL_TIMESTAMP;
 	/* Only set Drop Enable if we are supporting multiple queues */
 	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
@@ -4463,7 +4460,7 @@ static void igb_tx_timeout(struct net_device *netdev)
 	/* Do the reset outside of interrupt context */
 	adapter->tx_timeout_count++;
 
-	if (hw->mac.type == e1000_82580)
+	if (hw->mac.type >= e1000_82580)
 		hw->dev_spec._82575.global_device_reset = true;
 
 	schedule_work(&adapter->reset_task);
@@ -5581,7 +5578,7 @@ static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
 	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
 	 * 24 to match clock shift we setup earlier.
 	 */
-	if (adapter->hw.mac.type == e1000_82580)
+	if (adapter->hw.mac.type >= e1000_82580)
 		regval <<= IGB_82580_TSYNC_SHIFT;
 
 	ns = timecounter_cyc2time(&adapter->clock, regval);
@@ -6276,7 +6273,7 @@ static int igb_hwtstamp_ioctl(struct net_device *netdev,
 	 * timestamped, so enable timestamping in all packets as
 	 * long as one rx filter was configured.
 	 */
-	if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
+	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
 		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
 	}
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 05/11] igb: Make certain one vector is always assigned in igb_request_irq
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change makes certain that one interrupt is always initialized in
igb_request_irq.  In addition we drop the use of adapter->pdev and
instead just call pdev since we made a local copy of the pointer earlier in
the function.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 3905a49..efc367b 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1262,7 +1262,7 @@ static int igb_request_irq(struct igb_adapter *adapter)
 			goto request_done;
 		/* fall back to MSI */
 		igb_clear_interrupt_scheme(adapter);
-		if (!pci_enable_msi(adapter->pdev))
+		if (!pci_enable_msi(pdev))
 			adapter->flags |= IGB_FLAG_HAS_MSI;
 		igb_free_all_tx_resources(adapter);
 		igb_free_all_rx_resources(adapter);
@@ -1284,12 +1284,12 @@ static int igb_request_irq(struct igb_adapter *adapter)
 		}
 		igb_setup_all_tx_resources(adapter);
 		igb_setup_all_rx_resources(adapter);
-	} else {
-		igb_assign_vector(adapter->q_vector[0], 0);
 	}
 
+	igb_assign_vector(adapter->q_vector[0], 0);
+
 	if (adapter->flags & IGB_FLAG_HAS_MSI) {
-		err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
+		err = request_irq(pdev->irq, igb_intr_msi, 0,
 				  netdev->name, adapter);
 		if (!err)
 			goto request_done;
@@ -1299,11 +1299,11 @@ static int igb_request_irq(struct igb_adapter *adapter)
 		adapter->flags &= ~IGB_FLAG_HAS_MSI;
 	}
 
-	err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
+	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
 			  netdev->name, adapter);
 
 	if (err)
-		dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
+		dev_err(&pdev->dev, "Error %d getting interrupt\n",
 			err);
 
 request_done:
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 04/11] igb: avoid unnecessarily creating a local copy of the q_vector
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This is mostly a drop of unnecessary pointer defines for q_vector when we
don't have issues with line width and don't have multiple references to
the pointer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |   46 ++++++++++------------------
 1 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 10670f9..3905a49 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1317,11 +1317,9 @@ static void igb_free_irq(struct igb_adapter *adapter)
 
 		free_irq(adapter->msix_entries[vector++].vector, adapter);
 
-		for (i = 0; i < adapter->num_q_vectors; i++) {
-			struct igb_q_vector *q_vector = adapter->q_vector[i];
+		for (i = 0; i < adapter->num_q_vectors; i++)
 			free_irq(adapter->msix_entries[vector++].vector,
-			         q_vector);
-		}
+				 adapter->q_vector[i]);
 	} else {
 		free_irq(adapter->pdev->irq, adapter);
 	}
@@ -1523,10 +1521,9 @@ int igb_up(struct igb_adapter *adapter)
 
 	clear_bit(__IGB_DOWN, &adapter->state);
 
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		struct igb_q_vector *q_vector = adapter->q_vector[i];
-		napi_enable(&q_vector->napi);
-	}
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&(adapter->q_vector[i]->napi));
+
 	if (adapter->msix_entries)
 		igb_configure_msix(adapter);
 	else
@@ -1578,10 +1575,8 @@ void igb_down(struct igb_adapter *adapter)
 	wrfl();
 	msleep(10);
 
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		struct igb_q_vector *q_vector = adapter->q_vector[i];
-		napi_disable(&q_vector->napi);
-	}
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_disable(&(adapter->q_vector[i]->napi));
 
 	igb_irq_disable(adapter);
 
@@ -2546,10 +2541,8 @@ static int igb_open(struct net_device *netdev)
 	/* From here on the code is the same as igb_up() */
 	clear_bit(__IGB_DOWN, &adapter->state);
 
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		struct igb_q_vector *q_vector = adapter->q_vector[i];
-		napi_enable(&q_vector->napi);
-	}
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&(adapter->q_vector[i]->napi));
 
 	/* Clear any pending interrupts. */
 	rd32(E1000_ICR);
@@ -3769,10 +3762,8 @@ static void igb_watchdog_task(struct work_struct *work)
 	/* Cause software interrupt to ensure rx ring is cleaned */
 	if (adapter->msix_entries) {
 		u32 eics = 0;
-		for (i = 0; i < adapter->num_q_vectors; i++) {
-			struct igb_q_vector *q_vector = adapter->q_vector[i];
-			eics |= q_vector->eims_value;
-		}
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			eics |= adapter->q_vector[i]->eims_value;
 		wr32(E1000_EICS, eics);
 	} else {
 		wr32(E1000_ICS, E1000_ICS_RXDMT0);
@@ -6671,18 +6662,15 @@ static void igb_netpoll(struct net_device *netdev)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
+	struct igb_q_vector *q_vector;
 	int i;
 
-	if (!adapter->msix_entries) {
-		struct igb_q_vector *q_vector = adapter->q_vector[0];
-		igb_irq_disable(adapter);
-		napi_schedule(&q_vector->napi);
-		return;
-	}
-
 	for (i = 0; i < adapter->num_q_vectors; i++) {
-		struct igb_q_vector *q_vector = adapter->q_vector[i];
-		wr32(E1000_EIMC, q_vector->eims_value);
+		q_vector = adapter->q_vector[i];
+		if (adapter->msix_entries)
+			wr32(E1000_EIMC, q_vector->eims_value);
+		else
+			igb_irq_disable(adapter);
 		napi_schedule(&q_vector->napi);
 	}
 }
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 02/11] ixgbe: Add FCoE DDP allocation failure counters to ethtool stats.
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Amir Hanania, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Amir Hanania <amir.hanania@intel.com>

Add 2 new counters to ethtool:
	1. Count DDP allocation failure since we max the number of buffers
		allowed in one DDP context.
	2. Count DDP allocation failure since we max the number of buffers
		allowed in one DDP context when we alloc an extra buffer.

Signed-off-by: Amir Hanania <amir.hanania@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c    |   44 ++++++++++++++++-----
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h    |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |   17 ++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h    |    2 +
 5 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 18520ce..e102ff6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -113,6 +113,8 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"rx_fcoe_dropped", IXGBE_STAT(stats.fcoerpdc)},
 	{"rx_fcoe_packets", IXGBE_STAT(stats.fcoeprc)},
 	{"rx_fcoe_dwords", IXGBE_STAT(stats.fcoedwrc)},
+	{"fcoe_noddp", IXGBE_STAT(stats.fcoe_noddp)},
+	{"fcoe_noddp_ext_buff", IXGBE_STAT(stats.fcoe_noddp_ext_buff)},
 	{"tx_fcoe_packets", IXGBE_STAT(stats.fcoeptc)},
 	{"tx_fcoe_dwords", IXGBE_STAT(stats.fcoedwtc)},
 #endif /* IXGBE_FCOE */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 323f452..df3b1be 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -145,6 +145,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	u32 fcbuff, fcdmarw, fcfltrw, fcrxctl;
 	dma_addr_t addr = 0;
 	struct pci_pool *pool;
+	unsigned int cpu;
 
 	if (!netdev || !sgl)
 		return 0;
@@ -182,7 +183,8 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	}
 
 	/* alloc the udl from per cpu ddp pool */
-	pool = *per_cpu_ptr(fcoe->pool, get_cpu());
+	cpu = get_cpu();
+	pool = *per_cpu_ptr(fcoe->pool, cpu);
 	ddp->udl = pci_pool_alloc(pool, GFP_ATOMIC, &ddp->udp);
 	if (!ddp->udl) {
 		e_err(drv, "failed allocated ddp context\n");
@@ -199,9 +201,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 		while (len) {
 			/* max number of buffers allowed in one DDP context */
 			if (j >= IXGBE_BUFFCNT_MAX) {
-				e_err(drv, "xid=%x:%d,%d,%d:addr=%llx "
-				      "not enough descriptors\n",
-				      xid, i, j, dmacount, (u64)addr);
+				*per_cpu_ptr(fcoe->pcpu_noddp, cpu) += 1;
 				goto out_noddp_free;
 			}
 
@@ -241,12 +241,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	 */
 	if (lastsize == bufflen) {
 		if (j >= IXGBE_BUFFCNT_MAX) {
-			printk_once("Will NOT use DDP since there are not "
-				    "enough user buffers. We need an  extra "
-				    "buffer because lastsize is bufflen. "
-				    "xid=%x:%d,%d,%d:addr=%llx\n",
-				    xid, i, j, dmacount, (u64)addr);
-
+			*per_cpu_ptr(fcoe->pcpu_noddp_ext_buff, cpu) += 1;
 			goto out_noddp_free;
 		}
 
@@ -600,6 +595,7 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
 	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_FCOE];
+	unsigned int cpu;
 
 	if (!fcoe->pool) {
 		spin_lock_init(&fcoe->lock);
@@ -627,6 +623,24 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 			e_err(drv, "failed to map extra DDP buffer\n");
 			goto out_extra_ddp_buffer;
 		}
+
+		/* Alloc per cpu mem to count the ddp alloc failure number */
+		fcoe->pcpu_noddp = alloc_percpu(u64);
+		if (!fcoe->pcpu_noddp) {
+			e_err(drv, "failed to alloc noddp counter\n");
+			goto out_pcpu_noddp_alloc_fail;
+		}
+
+		fcoe->pcpu_noddp_ext_buff = alloc_percpu(u64);
+		if (!fcoe->pcpu_noddp_ext_buff) {
+			e_err(drv, "failed to alloc noddp extra buff cnt\n");
+			goto out_pcpu_noddp_extra_buff_alloc_fail;
+		}
+
+		for_each_possible_cpu(cpu) {
+			*per_cpu_ptr(fcoe->pcpu_noddp, cpu) = 0;
+			*per_cpu_ptr(fcoe->pcpu_noddp_ext_buff, cpu) = 0;
+		}
 	}
 
 	/* Enable L2 eth type filter for FCoE */
@@ -664,7 +678,13 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL, IXGBE_FCRXCTRL_FCCRCBO |
 			(FC_FCOE_VER << IXGBE_FCRXCTRL_FCOEVER_SHIFT));
 	return;
-
+out_pcpu_noddp_extra_buff_alloc_fail:
+	free_percpu(fcoe->pcpu_noddp);
+out_pcpu_noddp_alloc_fail:
+	dma_unmap_single(&adapter->pdev->dev,
+			 fcoe->extra_ddp_buffer_dma,
+			 IXGBE_FCBUFF_MIN,
+			 DMA_FROM_DEVICE);
 out_extra_ddp_buffer:
 	kfree(fcoe->extra_ddp_buffer);
 out_ddp_pools:
@@ -693,6 +713,8 @@ void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter)
 			 fcoe->extra_ddp_buffer_dma,
 			 IXGBE_FCBUFF_MIN,
 			 DMA_FROM_DEVICE);
+	free_percpu(fcoe->pcpu_noddp);
+	free_percpu(fcoe->pcpu_noddp_ext_buff);
 	kfree(fcoe->extra_ddp_buffer);
 	ixgbe_fcoe_ddp_pools_free(fcoe);
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
index 99de145..261fd62 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
@@ -73,6 +73,8 @@ struct ixgbe_fcoe {
 	unsigned char *extra_ddp_buffer;
 	dma_addr_t extra_ddp_buffer_dma;
 	unsigned long mode;
+	u64 __percpu *pcpu_noddp;
+	u64 __percpu *pcpu_noddp_ext_buff;
 #ifdef CONFIG_IXGBE_DCB
 	u8 up;
 #endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b95c6e9..f6fea67 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5552,6 +5552,11 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 	u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
 	u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
 	u64 bytes = 0, packets = 0;
+#ifdef IXGBE_FCOE
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+	unsigned int cpu;
+	u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0;
+#endif /* IXGBE_FCOE */
 
 	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
 	    test_bit(__IXGBE_RESETTING, &adapter->state))
@@ -5679,6 +5684,18 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 		hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
 		hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
 		hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
+		/* Add up per cpu counters for total ddp aloc fail */
+		if (fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) {
+			for_each_possible_cpu(cpu) {
+				fcoe_noddp_counts_sum +=
+					*per_cpu_ptr(fcoe->pcpu_noddp, cpu);
+				fcoe_noddp_ext_buff_counts_sum +=
+					*per_cpu_ptr(fcoe->
+						pcpu_noddp_ext_buff, cpu);
+			}
+		}
+		hwstats->fcoe_noddp = fcoe_noddp_counts_sum;
+		hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum;
 #endif /* IXGBE_FCOE */
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index d1d6894..6c5cca8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2682,6 +2682,8 @@ struct ixgbe_hw_stats {
 	u64 fcoeptc;
 	u64 fcoedwrc;
 	u64 fcoedwtc;
+	u64 fcoe_noddp;
+	u64 fcoe_noddp_ext_buff;
 	u64 b2ospc;
 	u64 b2ogprc;
 	u64 o2bgptc;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 03/11] ixgbe: Correct check for change in FCoE priority
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Mark Rustad, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Mark Rustad <mark.d.rustad@intel.com>

Correct a check for change in FCoE priority when IEEE mode DCB is in use.
In IEEE mode a different function has to be used to get the FCoE priority
mask. Also, the check for the mask assumed that only one priority was set.
In case there should be more than one, check just the bit.

These changes help avoid link flapping issues that can come up when IEEE
DCB is in use.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c |   12 ++++++++++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index be66bb6..3631d63 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -318,7 +318,15 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 			      .selector = DCB_APP_IDTYPE_ETHTYPE,
 			      .protocol = ETH_P_FCOE,
 			     };
-	u8 up = dcb_getapp(netdev, &app);
+	u8 up;
+
+	/* In IEEE mode, use the IEEE Ethertype selector value */
+	if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) {
+		app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
+		up = dcb_ieee_getapp_mask(netdev, &app);
+	} else {
+		up = dcb_getapp(netdev, &app);
+	}
 #endif
 
 	/* Fail command if not in CEE mode */
@@ -331,7 +339,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 		return DCB_NO_HW_CHG;
 
 #ifdef IXGBE_FCOE
-	if (up && (up != (1 << adapter->fcoe.up)))
+	if (up && !(up & (1 << adapter->fcoe.up)))
 		adapter->dcb_set_bitmap |= BIT_APP_UPCHG;
 
 	/*
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 01/11] ixgbe: Add protection from VF invalid target DMA
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318490112-5092-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

It is possible for a VF to set an invalid target DMA address in its
Tx/Rx descriptor buffer pointers.  The workarounds in this patch
will guard against such an event and issue a VFLR to the VF in response.
The VFLR will shut down the VF until an administrator can take action
to investigate the event and correct the problem.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |    4 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  172 ++++++++++++++++++++++++-
 2 files changed, 175 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 38940d7..c1f76aa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -116,6 +116,8 @@
 #define MAX_EMULATION_MAC_ADDRS         16
 #define IXGBE_MAX_PF_MACVLANS           15
 #define VMDQ_P(p)   ((p) + adapter->num_vfs)
+#define IXGBE_82599_VF_DEVICE_ID        0x10ED
+#define IXGBE_X540_VF_DEVICE_ID         0x1515
 
 struct vf_data_storage {
 	unsigned char vf_mac_addresses[ETH_ALEN];
@@ -512,6 +514,8 @@ struct ixgbe_adapter {
 	struct hlist_head fdir_filter_list;
 	union ixgbe_atr_input fdir_mask;
 	int fdir_filter_count;
+	u32 timer_event_accumulator;
+	u32 vferr_refcount;
 };
 
 struct ixgbe_fdir_filter {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1519a23..b95c6e9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6112,6 +6112,51 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter)
 	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
 }
 
+#ifdef CONFIG_PCI_IOV
+static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
+{
+	int vf;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 gpc;
+	u32 ciaa, ciad;
+
+	gpc = IXGBE_READ_REG(hw, IXGBE_TXDGPC);
+	if (gpc) /* If incrementing then no need for the check below */
+		return;
+	/*
+	 * Check to see if a bad DMA write target from an errant or
+	 * malicious VF has caused a PCIe error.  If so then we can
+	 * issue a VFLR to the offending VF(s) and then resume without
+	 * requesting a full slot reset.
+	 */
+
+	for (vf = 0; vf < adapter->num_vfs; vf++) {
+		ciaa = (vf << 16) | 0x80000000;
+		/* 32 bit read so align, we really want status at offset 6 */
+		ciaa |= PCI_COMMAND;
+		IXGBE_WRITE_REG(hw, IXGBE_CIAA_82599, ciaa);
+		ciad = IXGBE_READ_REG(hw, IXGBE_CIAD_82599);
+		ciaa &= 0x7FFFFFFF;
+		/* disable debug mode asap after reading data */
+		IXGBE_WRITE_REG(hw, IXGBE_CIAA_82599, ciaa);
+		/* Get the upper 16 bits which will be the PCI status reg */
+		ciad >>= 16;
+		if (ciad & PCI_STATUS_REC_MASTER_ABORT) {
+			netdev_err(netdev, "VF %d Hung DMA\n", vf);
+			/* Issue VFLR */
+			ciaa = (vf << 16) | 0x80000000;
+			ciaa |= 0xA8;
+			IXGBE_WRITE_REG(hw, IXGBE_CIAA_82599, ciaa);
+			ciad = 0x00008000;  /* VFLR */
+			IXGBE_WRITE_REG(hw, IXGBE_CIAD_82599, ciad);
+			ciaa &= 0x7FFFFFFF;
+			IXGBE_WRITE_REG(hw, IXGBE_CIAA_82599, ciaa);
+		}
+	}
+}
+
+#endif
 /**
  * ixgbe_service_timer - Timer Call-back
  * @data: pointer to adapter cast into an unsigned long
@@ -6120,17 +6165,49 @@ static void ixgbe_service_timer(unsigned long data)
 {
 	struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data;
 	unsigned long next_event_offset;
+	bool ready = true;
 
+#ifdef CONFIG_PCI_IOV
+	ready = false;
+
+	/*
+	 * don't bother with SR-IOV VF DMA hang check if there are
+	 * no VFs or the link is down
+	 */
+	if (!adapter->num_vfs ||
+	    (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)) {
+		ready = true;
+		goto normal_timer_service;
+	}
+
+	/* If we have VFs allocated then we must check for DMA hangs */
+	ixgbe_check_for_bad_vf(adapter);
+	next_event_offset = HZ / 50;
+	adapter->timer_event_accumulator++;
+
+	if (adapter->timer_event_accumulator >= 100) {
+		ready = true;
+		adapter->timer_event_accumulator = 0;
+	}
+
+	goto schedule_event;
+
+normal_timer_service:
+#endif
 	/* poll faster when waiting for link */
 	if (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)
 		next_event_offset = HZ / 10;
 	else
 		next_event_offset = HZ * 2;
 
+#ifdef CONFIG_PCI_IOV
+schedule_event:
+#endif
 	/* Reset the timer */
 	mod_timer(&adapter->service_timer, next_event_offset + jiffies);
 
-	ixgbe_service_event_schedule(adapter);
+	if (ready)
+		ixgbe_service_event_schedule(adapter);
 }
 
 static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter)
@@ -7717,6 +7794,91 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
 	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
 	struct net_device *netdev = adapter->netdev;
 
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev *bdev, *vfdev;
+	u32 dw0, dw1, dw2, dw3;
+	int vf, pos;
+	u16 req_id, pf_func;
+
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB ||
+	    adapter->num_vfs == 0)
+		goto skip_bad_vf_detection;
+
+	bdev = pdev->bus->self;
+	while (bdev && (bdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
+		bdev = bdev->bus->self;
+
+	if (!bdev)
+		goto skip_bad_vf_detection;
+
+	pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		goto skip_bad_vf_detection;
+
+	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0);
+	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1);
+	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2);
+	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3);
+
+	req_id = dw1 >> 16;
+	/* On the 82599 if bit 7 of the requestor ID is set then it's a VF */
+	if (!(req_id & 0x0080))
+		goto skip_bad_vf_detection;
+
+	pf_func = req_id & 0x01;
+	if ((pf_func & 1) == (pdev->devfn & 1)) {
+		unsigned int device_id;
+
+		vf = (req_id & 0x7F) >> 1;
+		e_dev_err("VF %d has caused a PCIe error\n", vf);
+		e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
+				"%8.8x\tdw3: %8.8x\n",
+		dw0, dw1, dw2, dw3);
+		switch (adapter->hw.mac.type) {
+		case ixgbe_mac_82599EB:
+			device_id = IXGBE_82599_VF_DEVICE_ID;
+			break;
+		case ixgbe_mac_X540:
+			device_id = IXGBE_X540_VF_DEVICE_ID;
+			break;
+		default:
+			device_id = 0;
+			break;
+		}
+
+		/* Find the pci device of the offending VF */
+		vfdev = pci_get_device(IXGBE_INTEL_VENDOR_ID, device_id, NULL);
+		while (vfdev) {
+			if (vfdev->devfn == (req_id & 0xFF))
+				break;
+			vfdev = pci_get_device(IXGBE_INTEL_VENDOR_ID,
+					       device_id, vfdev);
+		}
+		/*
+		 * There's a slim chance the VF could have been hot plugged,
+		 * so if it is no longer present we don't need to issue the
+		 * VFLR.  Just clean up the AER in that case.
+		 */
+		if (vfdev) {
+			e_dev_err("Issuing VFLR to VF %d\n", vf);
+			pci_write_config_dword(vfdev, 0xA8, 0x00008000);
+		}
+
+		pci_cleanup_aer_uncorrect_error_status(pdev);
+	}
+
+	/*
+	 * Even though the error may have occurred on the other port
+	 * we still need to increment the vf error reference count for
+	 * both ports because the I/O resume function will be called
+	 * for both of them.
+	 */
+	adapter->vferr_refcount++;
+
+	return PCI_ERS_RESULT_RECOVERED;
+
+skip_bad_vf_detection:
+#endif /* CONFIG_PCI_IOV */
 	netif_device_detach(netdev);
 
 	if (state == pci_channel_io_perm_failure)
@@ -7779,6 +7941,14 @@ static void ixgbe_io_resume(struct pci_dev *pdev)
 	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
 	struct net_device *netdev = adapter->netdev;
 
+#ifdef CONFIG_PCI_IOV
+	if (adapter->vferr_refcount) {
+		e_info(drv, "Resuming after VF err\n");
+		adapter->vferr_refcount--;
+		return;
+	}
+
+#endif
 	if (netif_running(netdev))
 		ixgbe_up(adapter);
 
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 00/11 v2][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2011-10-13  7:15 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

The following series contains updates to ixgbe and igb.  This
version of the series contains the following changes:

- ixgbe add FCoE stats, add protection from invalid DMA and fix
  check for change in FCoE priority
- igb version bump, fix sparse warnings and finish up the cleanup
  work of igb by Alex Duyck.

- v2: drop the "igb: move DMA Coalescing feature code into separate function."
  patch while Carolyn fixes the patch based on David Miller's comments.

The following are changes since commit 9687c637388f63b87fcc18eee6e65bcfca4f49ca:
  Merge branch 'for-davem' of git://git.infradead.org/users/linville/wireless-next
and are available in the git repository at
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next.git
or
  git://github.com/Jkirsher/net-next.git

Akeem G. Abodunrin (1):
  igb: Loopback functionality supports for i350 devices

Alexander Duyck (5):
  igb: avoid unnecessarily creating a local copy of the q_vector
  igb: Make certain one vector is always assigned in igb_request_irq
  igb: Fix features that are currently 82580 only and should also be
    i350
  igb: Drop unnecessary write of E1000_IMS from igb_msix_other
  igb: Add workaround for byte swapped VLAN on i350 local traffic

Amir Hanania (1):
  ixgbe: Add FCoE DDP allocation failure counters to ethtool stats.

Carolyn Wyborny (1):
  igb: Version bump.

Emil Tantilov (1):
  igb: fix static function warnings reported by sparse

Greg Rose (1):
  ixgbe: Add protection from VF invalid target DMA

Mark Rustad (1):
  ixgbe: Correct check for change in FCoE priority

 drivers/net/ethernet/intel/igb/e1000_82575.c     |   38 +++--
 drivers/net/ethernet/intel/igb/e1000_defines.h   |    1 +
 drivers/net/ethernet/intel/igb/e1000_regs.h      |    1 +
 drivers/net/ethernet/intel/igb/igb.h             |    1 +
 drivers/net/ethernet/intel/igb/igb_main.c        |  112 ++++++-------
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         |    4 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c  |   12 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c    |   44 ++++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h    |    2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  189 +++++++++++++++++++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h    |    2 +
 12 files changed, 324 insertions(+), 84 deletions(-)

-- 
1.7.6.4

^ permalink raw reply

* Hello Dear Friend
From: Mr Ashraf Salah @ 2011-10-13  3:55 UTC (permalink / raw)




My Dear Friend,

Assalamu alaikum!

I'm Mr Ashraf Salah Eldin Abdullah, please I have a business issue to discuss 
with you,If you are interested reply me.

Please contact me urgently.

Best regards,

Mr Ashraf Salah Eldin Abdullah.

^ permalink raw reply

* Re: linux-next: manual merge of the staging tree with the net tree
From: Greg KH @ 2011-10-13  6:12 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: linux-next, linux-kernel, Arend van Spriel, David Miller, netdev,
	Jiri Pirko, Eliad Peller, John W. Linville
In-Reply-To: <20111013164353.09c495e1b63d23e380cca1f7@canb.auug.org.au>

On Thu, Oct 13, 2011 at 04:43:53PM +1100, Stephen Rothwell wrote:
> Hi Greg,
> 
> Today's linux-next merge of the staging tree got a conflicts in
> drivers/staging/brcm80211/brcmfmac/dhd_linux.c and
> drivers/staging/brcm80211/brcmsmac/mac80211_if.c between commits from the
> net-next tree and commit fc2d6e573be6 ("staging: brcm80211: remove
> brcm80211 driver from the staging tree") from the staging tree.
> 
> The latter removed the files, so I did that

Wonderful, thanks for doing that.

greg

^ permalink raw reply

* Re: [PATCH v6 8/8] Disable task moving when using kernel memory accounting
From: KAMEZAWA Hiroyuki @ 2011-10-13  6:00 UTC (permalink / raw)
  To: Glauber Costa
  Cc: linux-kernel, paul, lizf, ebiederm, davem, gthelen, netdev,
	linux-mm, kirill, avagin, devel
In-Reply-To: <1318242268-2234-9-git-send-email-glommer@parallels.com>

On Mon, 10 Oct 2011 14:24:28 +0400
Glauber Costa <glommer@parallels.com> wrote:

> Since this code is still experimental, we are leaving the exact
> details of how to move tasks between cgroups when kernel memory
> accounting is used as future work.
> 
> For now, we simply disallow movement if there are any pending
> accounted memory.
> 
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v6 7/8] Display current tcp memory allocation in kmem cgroup
From: KAMEZAWA Hiroyuki @ 2011-10-13  5:59 UTC (permalink / raw)
  To: Glauber Costa
  Cc: linux-kernel, paul, lizf, ebiederm, davem, gthelen, netdev,
	linux-mm, kirill, avagin, devel
In-Reply-To: <1318242268-2234-8-git-send-email-glommer@parallels.com>

On Mon, 10 Oct 2011 14:24:27 +0400
Glauber Costa <glommer@parallels.com> wrote:

> This patch introduces kmem.tcp_current_memory file, living in the
> kmem_cgroup filesystem. It is a simple read-only file that displays the
> amount of kernel memory currently consumed by the cgroup.
> 
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: David S. Miller <davem@davemloft.net>
> CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
> CC: Eric W. Biederman <ebiederm@xmission.com>

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v6 6/8] tcp buffer limitation: per-cgroup limit
From: KAMEZAWA Hiroyuki @ 2011-10-13  5:58 UTC (permalink / raw)
  To: Glauber Costa
  Cc: linux-kernel, paul, lizf, ebiederm, davem, gthelen, netdev,
	linux-mm, kirill, avagin, devel
In-Reply-To: <1318242268-2234-7-git-send-email-glommer@parallels.com>

On Mon, 10 Oct 2011 14:24:26 +0400
Glauber Costa <glommer@parallels.com> wrote:

> This patch uses the "tcp_max_mem" field of the kmem_cgroup to
> effectively control the amount of kernel memory pinned by a cgroup.
> 
> We have to make sure that none of the memory pressure thresholds
> specified in the namespace are bigger than the current cgroup.
> 
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: David S. Miller <davem@davemloft.net>
> CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
> CC: Eric W. Biederman <ebiederm@xmission.com>

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v6 4/8] per-cgroup tcp buffers control
From: KAMEZAWA Hiroyuki @ 2011-10-13  5:56 UTC (permalink / raw)
  To: Glauber Costa
  Cc: linux-kernel, paul, lizf, ebiederm, davem, gthelen, netdev,
	linux-mm, kirill, avagin, devel
In-Reply-To: <1318242268-2234-5-git-send-email-glommer@parallels.com>

On Mon, 10 Oct 2011 14:24:24 +0400
Glauber Costa <glommer@parallels.com> wrote:

> With all the infrastructure in place, this patch implements
> per-cgroup control for tcp memory pressure handling.
> 
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujtisu.com>
> CC: David S. Miller <davem@davemloft.net>
> CC: Eric W. Biederman <ebiederm@xmission.com>

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

^ permalink raw reply

* Re: [PATCH v6 3/8] foundations of per-cgroup memory pressure controlling.
From: KAMEZAWA Hiroyuki @ 2011-10-13  5:53 UTC (permalink / raw)
  To: Glauber Costa
  Cc: linux-kernel, paul, lizf, ebiederm, davem, gthelen, netdev,
	linux-mm, kirill, avagin, devel
In-Reply-To: <1318242268-2234-4-git-send-email-glommer@parallels.com>

On Mon, 10 Oct 2011 14:24:23 +0400
Glauber Costa <glommer@parallels.com> wrote:

> This patch converts struct sock fields memory_pressure,
> memory_allocated, sockets_allocated, and sysctl_mem (now prot_mem)
> to function pointers, receiving a struct mem_cgroup parameter.
> 
> enter_memory_pressure is kept the same, since all its callers
> have socket a context, and the kmem_cgroup can be derived from
> the socket itself.
> 
> To keep things working, the patch convert all users of those fields
> to use acessor functions.
> 
> In my benchmarks I didn't see a significant performance difference
> with this patch applied compared to a baseline (around 1 % diff, thus
> inside error margin).
> 
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: David S. Miller <davem@davemloft.net>
> CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
> CC: Eric W. Biederman <ebiederm@xmission.com>

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

a nitpick.


>  #ifdef CONFIG_INET
> +enum {
> +	UNDER_LIMIT,
> +	OVER_LIMIT,
> +};
> +

It may be better to move this to res_counter.h or memcontrol.h


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v6 1/8] Basic kernel memory functionality for the Memory Controller
From: KAMEZAWA Hiroyuki @ 2011-10-13  5:44 UTC (permalink / raw)
  To: Glauber Costa
  Cc: linux-kernel, paul, lizf, ebiederm, davem, gthelen, netdev,
	linux-mm, kirill, avagin, devel
In-Reply-To: <1318242268-2234-2-git-send-email-glommer@parallels.com>

On Mon, 10 Oct 2011 14:24:21 +0400
Glauber Costa <glommer@parallels.com> wrote:

> This patch lays down the foundation for the kernel memory component
> of the Memory Controller.
> 
> As of today, I am only laying down the following files:
> 
>  * memory.independent_kmem_limit
>  * memory.kmem.limit_in_bytes (currently ignored)
>  * memory.kmem.usage_in_bytes (always zero)
> 
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
> CC: Paul Menage <paul@paulmenage.org>
> CC: Greg Thelen <gthelen@google.com>

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox