* [PATCH 1/2] RFC: sysctl: write ctl_table->extra2 to table entries created from a ctl_path
From: Lucian Adrian Grijincu @ 2011-01-28 6:10 UTC (permalink / raw)
To: netdev; +Cc: Octavian Purdila
[-- Attachment #1: Type: text/plain, Size: 1087 bytes --]
For each entry in an array of 'struct ctl_path' we were registering a
'struct ctl_table' array with two entries:
A) one to store the name + permissions,
B) one as an end-of-array marker (completely blank).
but we were not using any of the data storage fields
(data, extra1, extra2) in the first 'struct ctl_table'.
This adds possibility of storring some user provided
pointer in the 'extra2' field.
All users these functions store NULL in the 'extra2'
field like they used to before this patch:
* register_sysctl_paths
* register_net_sysctl_table
* register_net_sysctl_rotable
Until now sysctl_check_table considered that the 'struct ctl_table' of
directories may not store anything in the 'extra2' field. We no longer
consider this a fault.
Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
include/linux/sysctl.h | 2 +-
include/net/net_namespace.h | 2 ++
kernel/sysctl.c | 7 +++++--
kernel/sysctl_check.c | 2 --
net/sysctl_net.c | 20 ++++++++++++++------
5 files changed, 22 insertions(+), 11 deletions(-)
[-- Attachment #2: 0001-RFC-sysctl-write-ctl_table-extra2-to-table-entries-c.patch --]
[-- Type: text/x-patch, Size: 5048 bytes --]
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7bb5cb6..333c72b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -1058,7 +1058,7 @@ struct ctl_path {
void register_sysctl_root(struct ctl_table_root *root);
struct ctl_table_header *__register_sysctl_paths(
struct ctl_table_root *root, struct nsproxy *namespaces,
- const struct ctl_path *path, struct ctl_table *table);
+ const struct ctl_path *path, struct ctl_table *table, void *pathdata);
struct ctl_table_header *register_sysctl_table(struct ctl_table * table);
struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
struct ctl_table *table);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index b3b4a34..4c80c30 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -274,6 +274,8 @@ struct ctl_table_header;
extern struct ctl_table_header *register_net_sysctl_table(struct net *net,
const struct ctl_path *path, struct ctl_table *table);
+struct ctl_table_header *register_net_sysctl_table_pathdata(struct net *net,
+ const struct ctl_path *path, struct ctl_table *table, void *pathdata);
extern struct ctl_table_header *register_net_sysctl_rotable(
const struct ctl_path *path, struct ctl_table *table);
extern void unregister_net_sysctl_table(struct ctl_table_header *header);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bc86bb3..279a0c8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1773,6 +1773,8 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
* @namespaces: Data to compute which lists of sysctl entries are visible
* @path: The path to the directory the sysctl table is in.
* @table: the top-level table structure
+ * @pathdata: user provided pointer to data that will be stored in
+ * every ctl_table node of the path allocated for @path
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
@@ -1823,7 +1825,7 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
struct ctl_table_header *__register_sysctl_paths(
struct ctl_table_root *root,
struct nsproxy *namespaces,
- const struct ctl_path *path, struct ctl_table *table)
+ const struct ctl_path *path, struct ctl_table *table, void *pathdata)
{
struct ctl_table_header *header;
struct ctl_table *new, **prevp;
@@ -1855,6 +1857,7 @@ struct ctl_table_header *__register_sysctl_paths(
/* Copy the procname */
new->procname = path->procname;
new->mode = 0555;
+ new->extra2 = pathdata;
*prevp = new;
prevp = &new->child;
@@ -1910,7 +1913,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
struct ctl_table *table)
{
return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
- path, table);
+ path, table, NULL);
}
/**
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 10b90d8..8fd9b71 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -127,8 +127,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
set_fail(&fail, table, "Directory with proc_handler");
if (table->extra1)
set_fail(&fail, table, "Directory with extra1");
- if (table->extra2)
- set_fail(&fail, table, "Directory with extra2");
} else {
if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index ca84212..9c92cac 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -103,22 +103,30 @@ out:
}
subsys_initcall(sysctl_init);
-struct ctl_table_header *register_net_sysctl_table(struct net *net,
- const struct ctl_path *path, struct ctl_table *table)
+struct ctl_table_header *register_net_sysctl_table_pathdata(struct net *net,
+ const struct ctl_path *path, struct ctl_table *table, void *pathdata)
{
struct nsproxy namespaces;
namespaces = *current->nsproxy;
namespaces.net_ns = net;
- return __register_sysctl_paths(&net_sysctl_root,
- &namespaces, path, table);
+ return __register_sysctl_paths(&net_sysctl_root, &namespaces,
+ path, table, pathdata);
+}
+EXPORT_SYMBOL_GPL(register_net_sysctl_table_pathdata);
+
+struct ctl_table_header *register_net_sysctl_table(struct net *net,
+ const struct ctl_path *path, struct ctl_table *table)
+{
+ return register_net_sysctl_table_pathdata(net, path, table, NULL);
}
EXPORT_SYMBOL_GPL(register_net_sysctl_table);
+
struct ctl_table_header *register_net_sysctl_rotable(const
struct ctl_path *path, struct ctl_table *table)
{
- return __register_sysctl_paths(&net_sysctl_ro_root,
- &init_nsproxy, path, table);
+ return __register_sysctl_paths(&net_sysctl_ro_root, &init_nsproxy,
+ path, table, NULL);
}
EXPORT_SYMBOL_GPL(register_net_sysctl_rotable);
^ permalink raw reply related
* [net-2.6 3/7] ixgbe: fix for 82599 erratum on Header Splitting.
From: Jeff Kirsher @ 2011-01-28 6:12 UTC (permalink / raw)
To: davem; +Cc: Don Skidmore, bphilips, netdev, stable, Jeff Kirsher
In-Reply-To: <1296195143-2870-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Don Skidmore <donald.c.skidmore@intel.com>
We have found a hardware erratum on 82599 hardware that can lead to
unpredictable behavior when Header Splitting mode is enabled. So
we are no longer enabling this feature on affected hardware.
Please see the 82599 Specification Update for more information.
CC: stable@kernel.org
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_main.c | 11 +++++++++--
1 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 44a1cf0..1495b74 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3176,9 +3176,16 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
u32 mhadd, hlreg0;
/* Decide whether to use packet split mode or not */
+ /* On by default */
+ adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
+
/* Do not use packet split if we're in SR-IOV Mode */
- if (!adapter->num_vfs)
- adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
+ if (adapter->num_vfs)
+ adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED;
+
+ /* Disable packet split due to 82599 erratum #45 */
+ if (hw->mac.type == ixgbe_mac_82599EB)
+ adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED;
/* Set the RX buffer length according to the mode */
if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) {
--
1.7.3.5
^ permalink raw reply related
* [PATCH 2/2] RFCv2: ipv4: share sysctl net/ipv4/conf/DEVNAME/ tables
From: Lucian Adrian Grijincu @ 2011-01-28 6:12 UTC (permalink / raw)
To: netdev
Cc: Alexey Dobriyan, David S. Miller, Hideaki YOSHIFUJI,
Patrick McHardy, Nick Piggin, Al Viro, Christoph Hellwig,
Dave Chinner, Eric Dumazet, Eric W. Biederman, Thomas Graf
[-- Attachment #1: Type: text/plain, Size: 1343 bytes --]
Before this, for each network device DEVNAME that supports ipv4 a new
sysctl table was registered in $PROC/sys/net/ipv4/conf/DEVNAME/.
The sysctl table was identical for all network devices, except for:
* data: pointer to the data to be accessed in the sysctl
* extra1: the 'struct ipv4_devconf*' of the network device
* extra2: the 'struct net*' of the network namespace
Assuming we have a device name and a 'struct net*', we can get the
'struct net_device*'. From there we can compute:
* data:
* extra1: 'struct ipv4_devconf*' can be reached from 'struct net_device*'
* extra2: the 'struct net*' that we assume we have
The device name is determined from the path to the file (the name of
the parent dentry).
The 'struct net*' is stored in the parent 'struct ctl_table*' path by
register_net_sysctl_table_pathdata().
NOTE: this breaks ctl_table->parent: the last registered net device
will be the parent of any $PROC/sys/net/ipv4/conf/DEVNAME/$CTL
ctl_table and selinux may behave in a wrong way because of this.
This is just a RFC patch at the moment.
Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
fs/proc/proc_sysctl.c | 16 +++-
include/linux/inetdevice.h | 12 +++-
net/ipv4/devinet.c | 203 +++++++++++++++++++++++++++++---------------
3 files changed, 161 insertions(+), 70 deletions(-)
[-- Attachment #2: 0002-RFCv2-ipv4-share-sysctl-net-ipv4-conf-DEVNAME-tables.patch --]
[-- Type: text/x-patch, Size: 12417 bytes --]
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92..52fd702 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -129,6 +129,11 @@ out:
return err;
}
+
+typedef int proc_handler_extended(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos,
+ struct file *filp);
+
static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
size_t count, loff_t *ppos, int write)
{
@@ -137,6 +142,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
ssize_t error;
size_t res;
+ proc_handler_extended *phx = (proc_handler_extended *) table->proc_handler;
if (IS_ERR(head))
return PTR_ERR(head);
@@ -156,7 +162,15 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
/* careful: calling conventions are nasty here */
res = count;
- error = table->proc_handler(table, write, buf, &res, ppos);
+ /* Most handlers only use the first 5 arguments (without @filp).
+ * Changing all is too much of work, as, at the time of writting only
+ * the devinet.c proc_handlers know about and use the @filp.
+ *
+ * This is just a HACK for now, I did this this way to not
+ * waste time changing all the handlers, in the final version
+ * I'll change all the handlers if there's not other solution.
+ */
+ error = phx(table, write, buf, &res, ppos, filp);
if (!error)
error = res;
out:
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ae8fdc5..caf06b3 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -43,8 +43,18 @@ enum
#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1)
+
+struct devinet_sysctl {
+ /* dev_name holds a copy of dev_name, because '.procname' is
+ * regarded as const by sysctl and we wouldn't want anyone to
+ * change it under our feet (see SIOCSIFNAME). */
+ char *dev_name;
+ struct ctl_table_header *sysctl_header;
+};
+
+
struct ipv4_devconf {
- void *sysctl;
+ struct devinet_sysctl devinet_sysctl;
int data[IPV4_DEVCONF_MAX];
DECLARE_BITMAP(state, IPV4_DEVCONF_MAX);
};
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 748cb5b..b42425d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -147,7 +147,7 @@ void in_dev_finish_destroy(struct in_device *idev)
}
EXPORT_SYMBOL(in_dev_finish_destroy);
-static struct in_device *inetdev_init(struct net_device *dev)
+struct in_device *inetdev_init(struct net_device *dev)
{
struct in_device *in_dev;
@@ -158,7 +158,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
goto out;
memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
sizeof(in_dev->cnf));
- in_dev->cnf.sysctl = NULL;
+ in_dev->cnf.devinet_sysctl.dev_name = NULL;
+ in_dev->cnf.devinet_sysctl.sysctl_header = NULL;
in_dev->dev = dev;
in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
if (!in_dev->arp_parms)
@@ -1375,6 +1376,67 @@ static void inet_forward_change(struct net *net)
}
}
+
+
+static int devinet_conf_handler(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp,
+ proc_handler *proc_handler)
+{
+ /* The path to this file is of the form:
+ * $PROC_MOUNT/sys/net/ipv4/conf/$DEVNAME/$CTL
+ *
+ * The array of 'struct ctl_table' of devinet entries is
+ * shared between all ipv4 network devices and the 'data'
+ * field of each structure only hold the offset into the
+ * 'data' field of 'struct ipv4_devconf'.
+ *
+ * To find the propper location of the data that must be
+ * accessed by this handler we need the device name and the
+ * network namespace in which it belongs.
+ */
+
+ /* We store the network namespace in the parent table's ->extra2 */
+ struct inode *parent_inode = filp->f_path.dentry->d_parent->d_inode;
+ struct ctl_table *parent_table = PROC_I(parent_inode)->sysctl_entry;
+ struct net *net = parent_table->extra2;
+
+ const char *dev_name = filp->f_path.dentry->d_parent->d_name.name;
+ struct ctl_table tmp_ctl;
+ struct net_device *dev = NULL;
+ struct in_device *in_dev = NULL;
+ struct ipv4_devconf *cnf;
+ int ret;
+
+ if (strcmp(dev_name, "all") == 0) {
+ cnf = net->ipv4.devconf_all;
+ } else if (strcmp(dev_name, "default") == 0) {
+ cnf = net->ipv4.devconf_dflt;
+ } else {
+ /* the device could have been renamed (SIOCSIFADDR) or
+ * deleted since we started accessing it's proc sysctl */
+ dev = dev_get_by_name(net, dev_name);
+ if (dev == NULL)
+ return -ENOENT;
+ in_dev = in_dev_get(dev);
+ cnf = &in_dev->cnf;
+ }
+
+ tmp_ctl = *ctl;
+ tmp_ctl.data += (char *)cnf - (char *)&ipv4_devconf;
+ tmp_ctl.extra1 = cnf;
+ tmp_ctl.extra2 = net;
+
+ ret = proc_handler(&tmp_ctl, write, buffer, lenp, ppos);
+
+ if (in_dev)
+ in_dev_put(in_dev);
+ if (dev)
+ dev_put(dev);
+ return ret;
+}
+
static int devinet_conf_proc(ctl_table *ctl, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
@@ -1445,6 +1507,33 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
return ret;
}
+static int devinet_conf_proc__(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp)
+{
+ return devinet_conf_handler(ctl, write, buffer, lenp, ppos, filp,
+ devinet_conf_proc);
+}
+
+static int devinet_sysctl_forward__(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp)
+{
+ return devinet_conf_handler(ctl, write, buffer, lenp, ppos, filp,
+ devinet_sysctl_forward);
+}
+
+static int ipv4_doint_and_flush__(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp)
+{
+ return devinet_conf_handler(ctl, write, buffer, lenp, ppos, filp,
+ ipv4_doint_and_flush);
+}
+
#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
{ \
.procname = name, \
@@ -1452,67 +1541,60 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
IPV4_DEVCONF_ ## attr - 1, \
.maxlen = sizeof(int), \
.mode = mval, \
- .proc_handler = proc, \
- .extra1 = &ipv4_devconf, \
+ .proc_handler = (proc_handler *) proc, \
}
#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
- DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
+ DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc__)
#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
- DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
+ DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc__)
#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
- DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
-
-static struct devinet_sysctl_table {
- struct ctl_table_header *sysctl_header;
- struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
- char *dev_name;
-} devinet_sysctl = {
- .devinet_vars = {
- DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
- devinet_sysctl_forward),
- DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
-
- DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
- DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
- DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
- DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
- DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
- DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
- "accept_source_route"),
- DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
- DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
- DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
- DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
- DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
- DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
- DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
- DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
- DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
- DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
- DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
- DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
- DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
-
- DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
- DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
- DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
- "force_igmp_version"),
- DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
- "promote_secondaries"),
- },
+ DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush__)
+
+struct ctl_table ipv4_devinet_sysctl_table[__IPV4_DEVCONF_MAX] = {
+ DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
+ devinet_sysctl_forward__),
+ DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
+
+ DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
+ DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
+ DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
+ "accept_source_route"),
+ DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
+ DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
+ DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
+ DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
+ DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
+ DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
+ DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
+ DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
+ DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
+
+ DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
+ "force_igmp_version"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
+ "promote_secondaries"),
+ { }
};
static int __devinet_sysctl_register(struct net *net, char *dev_name,
- struct ipv4_devconf *p)
+ struct ipv4_devconf *cnf)
{
- int i;
- struct devinet_sysctl_table *t;
+ struct devinet_sysctl *t = &cnf->devinet_sysctl;
#define DEVINET_CTL_PATH_DEV 3
@@ -1524,16 +1606,6 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
{ },
};
- t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
- if (!t)
- goto out;
-
- for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
- t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
- t->devinet_vars[i].extra1 = p;
- t->devinet_vars[i].extra2 = net;
- }
-
/*
* Make a copy of dev_name, because '.procname' is regarded as const
* by sysctl and we wouldn't want anyone to change it under our feet
@@ -1541,37 +1613,32 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
*/
t->dev_name = kstrdup(dev_name, GFP_KERNEL);
if (!t->dev_name)
- goto free;
+ goto out;
devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
- t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
- t->devinet_vars);
+ t->sysctl_header = register_net_sysctl_table_pathdata(net,
+ devinet_ctl_path, ipv4_devinet_sysctl_table, net);
if (!t->sysctl_header)
goto free_procname;
- p->sysctl = t;
return 0;
free_procname:
kfree(t->dev_name);
-free:
- kfree(t);
out:
return -ENOBUFS;
}
static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
{
- struct devinet_sysctl_table *t = cnf->sysctl;
+ struct devinet_sysctl *t = &cnf->devinet_sysctl;
if (t == NULL)
return;
- cnf->sysctl = NULL;
unregister_sysctl_table(t->sysctl_header);
kfree(t->dev_name);
- kfree(t);
}
static void devinet_sysctl_register(struct in_device *idev)
^ permalink raw reply related
* [net-2.6 3/7] ixgbe: fix for 82599 erratum on Header Splitting.
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem; +Cc: Don Skidmore, bphilips, netdev, Jeff Kirsher, gospo, stable
In-Reply-To: <1296195535-2990-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Don Skidmore <donald.c.skidmore@intel.com>
We have found a hardware erratum on 82599 hardware that can lead to
unpredictable behavior when Header Splitting mode is enabled. So
we are no longer enabling this feature on affected hardware.
Please see the 82599 Specification Update for more information.
CC: stable@kernel.org
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_main.c | 11 +++++++++--
1 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 44a1cf0..1495b74 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3176,9 +3176,16 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
u32 mhadd, hlreg0;
/* Decide whether to use packet split mode or not */
+ /* On by default */
+ adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
+
/* Do not use packet split if we're in SR-IOV Mode */
- if (!adapter->num_vfs)
- adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
+ if (adapter->num_vfs)
+ adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED;
+
+ /* Disable packet split due to 82599 erratum #45 */
+ if (hw->mac.type == ixgbe_mac_82599EB)
+ adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED;
/* Set the RX buffer length according to the mode */
if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) {
--
1.7.3.5
^ permalink raw reply related
* [net-2.6 0/7][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem; +Cc: Jeff Kirsher, netdev, gospo, bphilips
The following series contains the addition of a PHY id for e1000
and several ixgbe fixes.
The following are changes since commit 4bb9ebc78097376b3734c6d3001a96aecac0f7bb:
bnx2: Eliminate AER error messages on systems not supporting it
and are available in the git repository at:
master.kernel.org:/pub/scm/linux/kernel/git/jkirsher/net-2.6 master
Alexander Duyck (1):
ixgbe: limit VF access to network traffic
Amir Hanania (1):
ixgbe: DDP last buffer size work around
Don Skidmore (3):
ixgbe: fix for 82599 erratum on Header Splitting.
ixgbe: cleanup variable initialization
ixgbe: update version string
Emil Tantilov (1):
ixgbe: fix variable set but not used warnings by gcc 4.6
Florian Fainelli (1):
e1000: add support for Marvell Alaska M88E1118R PHY
drivers/net/e1000/e1000_hw.c | 4 +++-
drivers/net/e1000/e1000_hw.h | 1 +
drivers/net/ixgbe/ixgbe_common.c | 3 +++
drivers/net/ixgbe/ixgbe_fcoe.c | 21 ++++++++++++++++++++-
drivers/net/ixgbe/ixgbe_main.c | 16 ++++++++++------
drivers/net/ixgbe/ixgbe_sriov.c | 2 --
drivers/net/ixgbe/ixgbe_x540.c | 6 +++---
7 files changed, 40 insertions(+), 13 deletions(-)
--
1.7.3.5
^ permalink raw reply
* [net-2.6 1/7] e1000: add support for Marvell Alaska M88E1118R PHY
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem
Cc: Florian Fainelli, netdev, gospo, bphilips, Dirk Brandewie,
Jeff Kirsher
In-Reply-To: <1296195535-2990-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Florian Fainelli <ffainelli@freebox.fr>
This patch adds support for Marvell Alask M88E188R PHY chips. Support for
other M88* PHYs is already there, so there is nothing more to add than its
PHY id.
CC: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Florian Fainelli <ffainelli@freebox.fr>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/e1000/e1000_hw.c | 4 +++-
drivers/net/e1000/e1000_hw.h | 1 +
2 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index aed223b..7501d97 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -124,6 +124,7 @@ static s32 e1000_set_phy_type(struct e1000_hw *hw)
case M88E1000_I_PHY_ID:
case M88E1011_I_PHY_ID:
case M88E1111_I_PHY_ID:
+ case M88E1118_E_PHY_ID:
hw->phy_type = e1000_phy_m88;
break;
case IGP01E1000_I_PHY_ID:
@@ -3222,7 +3223,8 @@ static s32 e1000_detect_gig_phy(struct e1000_hw *hw)
break;
case e1000_ce4100:
if ((hw->phy_id == RTL8211B_PHY_ID) ||
- (hw->phy_id == RTL8201N_PHY_ID))
+ (hw->phy_id == RTL8201N_PHY_ID) ||
+ (hw->phy_id == M88E1118_E_PHY_ID))
match = true;
break;
case e1000_82541:
diff --git a/drivers/net/e1000/e1000_hw.h b/drivers/net/e1000/e1000_hw.h
index 196eeda..c70b23d 100644
--- a/drivers/net/e1000/e1000_hw.h
+++ b/drivers/net/e1000/e1000_hw.h
@@ -2917,6 +2917,7 @@ struct e1000_host_command_info {
#define M88E1000_14_PHY_ID M88E1000_E_PHY_ID
#define M88E1011_I_REV_4 0x04
#define M88E1111_I_PHY_ID 0x01410CC0
+#define M88E1118_E_PHY_ID 0x01410E40
#define L1LXT971A_PHY_ID 0x001378E0
#define RTL8211B_PHY_ID 0x001CC910
--
1.7.3.5
^ permalink raw reply related
* [net-2.6 2/7] ixgbe: fix variable set but not used warnings by gcc 4.6
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem; +Cc: Emil Tantilov, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1296195535-2990-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Emil Tantilov <emil.s.tantilov@intel.com>
Caught with gcc 4.6 -Wunused-but-set-variable
Remove unused napi_vectors variable.
Fix the use of reset_bit in ixgbe_reset_hw_X540()
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_main.c | 3 ---
drivers/net/ixgbe/ixgbe_x540.c | 6 +++---
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 602078b..44a1cf0 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4863,16 +4863,13 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
{
int q_idx, num_q_vectors;
struct ixgbe_q_vector *q_vector;
- int napi_vectors;
int (*poll)(struct napi_struct *, int);
if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
- napi_vectors = adapter->num_rx_queues;
poll = &ixgbe_clean_rxtx_many;
} else {
num_q_vectors = 1;
- napi_vectors = 1;
poll = &ixgbe_poll;
}
diff --git a/drivers/net/ixgbe/ixgbe_x540.c b/drivers/net/ixgbe/ixgbe_x540.c
index 3a89239..f2518b0 100644
--- a/drivers/net/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ixgbe/ixgbe_x540.c
@@ -133,17 +133,17 @@ static s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw)
}
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
- IXGBE_WRITE_REG(hw, IXGBE_CTRL, (ctrl | IXGBE_CTRL_RST));
+ IXGBE_WRITE_REG(hw, IXGBE_CTRL, (ctrl | reset_bit));
IXGBE_WRITE_FLUSH(hw);
/* Poll for reset bit to self-clear indicating reset is complete */
for (i = 0; i < 10; i++) {
udelay(1);
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
- if (!(ctrl & IXGBE_CTRL_RST))
+ if (!(ctrl & reset_bit))
break;
}
- if (ctrl & IXGBE_CTRL_RST) {
+ if (ctrl & reset_bit) {
status = IXGBE_ERR_RESET_FAILED;
hw_dbg(hw, "Reset polling failed to complete.\n");
}
--
1.7.3.5
^ permalink raw reply related
* [net-2.6 3/7] ixgbe: fix for 82599 erratum on Header Splitting.
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem; +Cc: Don Skidmore, netdev, gospo, bphilips, stable, Jeff Kirsher
In-Reply-To: <1296195535-2990-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Don Skidmore <donald.c.skidmore@intel.com>
We have found a hardware erratum on 82599 hardware that can lead to
unpredictable behavior when Header Splitting mode is enabled. So
we are no longer enabling this feature on affected hardware.
Please see the 82599 Specification Update for more information.
CC: stable@kernel.org
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_main.c | 11 +++++++++--
1 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 44a1cf0..1495b74 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3176,9 +3176,16 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
u32 mhadd, hlreg0;
/* Decide whether to use packet split mode or not */
+ /* On by default */
+ adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
+
/* Do not use packet split if we're in SR-IOV Mode */
- if (!adapter->num_vfs)
- adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
+ if (adapter->num_vfs)
+ adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED;
+
+ /* Disable packet split due to 82599 erratum #45 */
+ if (hw->mac.type == ixgbe_mac_82599EB)
+ adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED;
/* Set the RX buffer length according to the mode */
if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) {
--
1.7.3.5
^ permalink raw reply related
* [net-2.6 4/7] ixgbe: limit VF access to network traffic
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem; +Cc: Alexander Duyck, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1296195535-2990-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Alexander Duyck <alexander.h.duyck@intel.com>
This change fixes VM pool allocation issues based on MAC address filtering,
as well as limits the scope of VF access to promiscuous mode.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_common.c | 3 +++
drivers/net/ixgbe/ixgbe_sriov.c | 2 --
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c
index d5ede2d..ebbda7d 100644
--- a/drivers/net/ixgbe/ixgbe_common.c
+++ b/drivers/net/ixgbe/ixgbe_common.c
@@ -1370,6 +1370,9 @@ s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
hw_dbg(hw, " New MAC Addr =%pM\n", hw->mac.addr);
hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
+
+ /* clear VMDq pool/queue selection for RAR 0 */
+ hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL);
}
hw->addr_ctrl.overflow_promisc = 0;
diff --git a/drivers/net/ixgbe/ixgbe_sriov.c b/drivers/net/ixgbe/ixgbe_sriov.c
index 47b1573..187b3a1 100644
--- a/drivers/net/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ixgbe/ixgbe_sriov.c
@@ -110,12 +110,10 @@ static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid,
return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add);
}
-
static void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe)
{
u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
vmolr |= (IXGBE_VMOLR_ROMPE |
- IXGBE_VMOLR_ROPE |
IXGBE_VMOLR_BAM);
if (aupe)
vmolr |= IXGBE_VMOLR_AUPE;
--
1.7.3.5
^ permalink raw reply related
* [net-2.6 6/7] ixgbe: cleanup variable initialization
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem; +Cc: Don Skidmore, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1296195535-2990-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Don Skidmore <donald.c.skidmore@intel.com>
The ixgbe_fcoe_ddp_get function wasn't initializing one of its variables
and this was producing compiler warnings. This patch cleans that up.
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_fcoe.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index ffac3f6..24d74ca 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -165,7 +165,7 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
unsigned int thisoff = 0;
unsigned int thislen = 0;
u32 fcbuff, fcdmarw, fcfltrw;
- dma_addr_t addr;
+ dma_addr_t addr = 0;
if (!netdev || !sgl)
return 0;
--
1.7.3.5
^ permalink raw reply related
* [net-2.6 5/7] ixgbe: DDP last buffer size work around
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem; +Cc: Amir Hanania, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1296195535-2990-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Amir Hanania <amir.hanania@intel.com>
We found a hardware erratum on 82599 hardware that can lead to buffer
overwriting if the last buffer in FCoE DDP is exactly PAGE_SIZE.
If this is the case, we will make sure that there is no HW access to
this buffer.
Please see the 82599 Specification Update for more information.
Signed-off-by: Amir Hanania <amir.hanania@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_fcoe.c | 19 +++++++++++++++++++
1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index 6342d48..ffac3f6 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -254,6 +254,25 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
/* only the last buffer may have non-full bufflen */
lastsize = thisoff + thislen;
+ /*
+ * lastsize can not be PAGE_SIZE.
+ * If it is then adding another buffer with lastsize = 1.
+ * Since lastsize is 1 there will be no HW access to this buffer.
+ */
+ if (lastsize == PAGE_SIZE) {
+ if (j == (IXGBE_BUFFCNT_MAX - 1)) {
+ e_err(drv, "xid=%x:%d,%d,%d:addr=%llx "
+ "not enough descriptors only since lastsize "
+ "is PAGE_SIZE\n",
+ xid, i, j, dmacount, (u64)addr);
+ goto out_noddp_free;
+ }
+
+ ddp->udl[j+1] = ddp->udl[j];
+ j++;
+ lastsize = 1;
+ }
+
fcbuff = (IXGBE_FCBUFF_4KB << IXGBE_FCBUFF_BUFFSIZE_SHIFT);
fcbuff |= ((j & 0xff) << IXGBE_FCBUFF_BUFFCNT_SHIFT);
fcbuff |= (firstoff << IXGBE_FCBUFF_OFFSET_SHIFT);
--
1.7.3.5
^ permalink raw reply related
* [net-2.6 7/7] ixgbe: update version string
From: Jeff Kirsher @ 2011-01-28 6:18 UTC (permalink / raw)
To: davem; +Cc: Don Skidmore, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1296195535-2990-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Don Skidmore <donald.c.skidmore@intel.com>
This will synchronize the version string with that of the latest source
forge driver which shares its functionality.
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_main.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 1495b74..83e13a3 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -52,7 +52,7 @@ char ixgbe_driver_name[] = "ixgbe";
static const char ixgbe_driver_string[] =
"Intel(R) 10 Gigabit PCI Express Network Driver";
-#define DRV_VERSION "3.0.12-k2"
+#define DRV_VERSION "3.2.9-k2"
const char ixgbe_driver_version[] = DRV_VERSION;
static char ixgbe_copyright[] = "Copyright (c) 1999-2010 Intel Corporation.";
--
1.7.3.5
^ permalink raw reply related
* Re: [PATCH] RFC: ipv4: share sysctl net/ipv4/conf/DEVNAME/ tables
From: Lucian Adrian Grijincu @ 2011-01-28 6:21 UTC (permalink / raw)
To: Alexey Dobriyan, netdev
In-Reply-To: <20110115104139.GA4816@p183.telecom.by>
[Resent to the list because the last reply got rejected because of HTML]
On Sat, Jan 15, 2011 at 12:41 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
> I wonder where interactions with device renaming are handled.
I did some digging and I'm pretty confident that this will not cause
problems with regards to device renaming.
On device rename these are the relevant call stacks:
RENAME
• dev_ioctl
∘ rtnl_lock();
∘ dev_ifsioc(net, &ifr, cmd);
‣ dev_change_name
• dev_get_valid_name
∘ strlcpy(dev->name, name, IFNAMSIZ)
• call_netdevice_notifiers(NETDEV_CHANGENAME);
∘ inetdev_event
‣ devinet_sysctl_unregister(in_dev)
• unregister_sysctl_table(header)
∘ lock sysctl
∘ start_unregistering(header);
‣ if (header->used) { unlock sysctl,
wait_for_completion; lock sysctl }
∘ unlock sysctl
‣ devinet_sysctl_register(in_dev)
∘ rtnl_unlock();
HANDLER
• proc_sys_call_handler
∘ head = grab_header(inode)
‣ sysctl_head_grab
• lock sysctl
• head->used++
• unlock sysctl
∘ if (IS_ERR(head)) return err
∘ devinet_conf_handler
‣ dev_get_by_name(dev, filp->f_path.dentry->d_parent->d_name.name)
∘ sysctl_head_finish(head)
‣ lock sysctl
‣ if (--head->used && unregistering) complete()
‣ unlock sysctl
Compressed:
RENAME (under rtnl lock)
• R1: memcpy(dev->name, newname)
• R2: if the sysctl header is used wait until it's not used any more,
mark header as invalid
HANDLER:
• H1: get header, if header invalid, return error
• H2: dev_get_by_name
• H3: if there's someone waiting to unregister, complete it's action
Only one rename can be in progress at a time (because of the
rtnl_lock), so cases like A->B, C->A cannot run in parallel. To
finish a device rename, we need to unregister the sysctl table header
first.
• R2 < H1: a RENAME runs before a HANDLER, then the HANDLER
will fail at H1 (the sysctl header will be made invalid at R2).
• H1 < R2:
∘ HANDLER acquired the header
‣ R1 < H2: dev_get_by_name will not find the device (because R1 renamed it)
‣ R1 > H2: dev_get_by_name will return the correct device
(the name is still valid)
In conclusion, I don't see any race conditions and I don't see how we
could get the wrong device after a rename.
I've posted a new version of the patch with some improvements.
--
.
..: Lucian
^ permalink raw reply
* [PATCH] net: Pre-COW metrics for TCP.
From: David Miller @ 2011-01-28 6:27 UTC (permalink / raw)
To: netdev
TCP is going to record metrics for the connection,
so pre-COW the route metrics at route cache entry
creation time.
This avoids several atomic operations that have to
occur if we COW the metrics after the entry reaches
global visibility.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/flow.h | 3 ++-
include/net/inet_sock.h | 8 +++++++-
include/net/route.h | 4 ++++
net/ipv4/route.c | 26 +++++++++++++++++++++++---
4 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/include/net/flow.h b/include/net/flow.h
index 240b7f3..1ae901f 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -48,7 +48,8 @@ struct flowi {
__u8 proto;
__u8 flags;
-#define FLOWI_FLAG_ANYSRC 0x01
+#define FLOWI_FLAG_ANYSRC 0x01
+#define FLOWI_FLAG_PRECOW_METRICS 0x02
union {
struct {
__be16 sport;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 8181498..6e6dfd7 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -219,7 +219,13 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops
static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{
- return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0;
+ __u8 flags = 0;
+
+ if (inet_sk(sk)->transparent)
+ flags |= FLOWI_FLAG_ANYSRC;
+ if (sk->sk_protocol == IPPROTO_TCP)
+ flags |= FLOWI_FLAG_PRECOW_METRICS;
+ return flags;
}
#endif /* _INET_SOCK_H */
diff --git a/include/net/route.h b/include/net/route.h
index 5677cbf..e586465 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -182,6 +182,8 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst,
if (inet_sk(sk)->transparent)
fl.flags |= FLOWI_FLAG_ANYSRC;
+ if (protocol == IPPROTO_TCP)
+ fl.flags |= FLOWI_FLAG_PRECOW_METRICS;
if (!dst || !src) {
err = __ip_route_output_key(net, rp, &fl);
@@ -209,6 +211,8 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol,
fl.proto = protocol;
if (inet_sk(sk)->transparent)
fl.flags |= FLOWI_FLAG_ANYSRC;
+ if (protocol == IPPROTO_TCP)
+ fl.flags |= FLOWI_FLAG_PRECOW_METRICS;
ip_rt_put(*rp);
*rp = NULL;
security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 68cee35..dd57f48 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1857,6 +1857,28 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
return mtu;
}
+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
+{
+ if (!(rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS)) {
+ no_cow:
+ rt->fi = fi;
+ atomic_inc(&fi->fib_clntref);
+ dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+ } else {
+ struct inet_peer *peer;
+
+ if (!rt->peer)
+ rt_bind_peer(rt, 1);
+ peer = rt->peer;
+ if (!peer)
+ goto no_cow;
+ if (inet_metrics_new(peer))
+ memcpy(peer->metrics, fi->fib_metrics,
+ sizeof(u32) * RTAX_MAX);
+ dst_init_metrics(&rt->dst, peer->metrics, false);
+ }
+}
+
static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
{
struct dst_entry *dst = &rt->dst;
@@ -1866,9 +1888,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
if (FIB_RES_GW(*res) &&
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = FIB_RES_GW(*res);
- rt->fi = fi;
- atomic_inc(&fi->fib_clntref);
- dst_init_metrics(dst, fi->fib_metrics, true);
+ rt_init_metrics(rt, fi);
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
#endif
--
1.7.3.4
^ permalink raw reply related
* Re: [net-2.6 3/7] ixgbe: fix for 82599 erratum on Header Splitting.
From: David Miller @ 2011-01-28 6:33 UTC (permalink / raw)
To: jeffrey.t.kirsher; +Cc: donald.c.skidmore, netdev, gospo, bphilips, stable
In-Reply-To: <1296195535-2990-4-git-send-email-jeffrey.t.kirsher@intel.com>
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 27 Jan 2011 22:18:51 -0800
> diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
> index 44a1cf0..1495b74 100644
> --- a/drivers/net/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ixgbe/ixgbe_main.c
> @@ -3176,9 +3176,16 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
> u32 mhadd, hlreg0;
>
> /* Decide whether to use packet split mode or not */
> + /* On by default */
> + adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
> +
Please fix this indentation, it's a TAB then a SPACE character.
^ permalink raw reply
* Re: [net-2.6 5/7] ixgbe: DDP last buffer size work around
From: David Miller @ 2011-01-28 6:34 UTC (permalink / raw)
To: jeffrey.t.kirsher; +Cc: amir.hanania, netdev, gospo, bphilips
In-Reply-To: <1296195535-2990-6-git-send-email-jeffrey.t.kirsher@intel.com>
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 27 Jan 2011 22:18:53 -0800
> From: Amir Hanania <amir.hanania@intel.com>
>
> We found a hardware erratum on 82599 hardware that can lead to buffer
> overwriting if the last buffer in FCoE DDP is exactly PAGE_SIZE.
> If this is the case, we will make sure that there is no HW access to
> this buffer.
>
> Please see the 82599 Specification Update for more information.
>
> Signed-off-by: Amir Hanania <amir.hanania@intel.com>
> Tested-by: Ross Brattain <ross.b.brattain@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
PAGE_SIZE is variable, I can't see how the hardware BUGs on all
possible architecutre values of PAGE_SIZE.
^ permalink raw reply
* Re: [net-2.6 3/7] ixgbe: fix for 82599 erratum on Header Splitting.
From: Jeff Kirsher @ 2011-01-28 6:58 UTC (permalink / raw)
To: David Miller
Cc: Skidmore, Donald C, bphilips@novell.com, gospo@redhat.com,
stable@kernel.org, netdev@vger.kernel.org
In-Reply-To: <20110127.223339.226789576.davem@davemloft.net>
[-- Attachment #1.1: Type: text/plain, Size: 753 bytes --]
On Thu, 2011-01-27 at 22:33 -0800, David Miller wrote:
> From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> Date: Thu, 27 Jan 2011 22:18:51 -0800
>
> > diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
> > index 44a1cf0..1495b74 100644
> > --- a/drivers/net/ixgbe/ixgbe_main.c
> > +++ b/drivers/net/ixgbe/ixgbe_main.c
> > @@ -3176,9 +3176,16 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
> > u32 mhadd, hlreg0;
> >
> > /* Decide whether to use packet split mode or not */
> > + /* On by default */
> > + adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
> > +
>
> Please fix this indentation, it's a TAB then a SPACE character.
Grrr, sorry I did not catch it. Fixing it up now.
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 490 bytes --]
[-- Attachment #2: Type: text/plain, Size: 140 bytes --]
_______________________________________________
stable mailing list
stable@linux.kernel.org
http://linux.kernel.org/mailman/listinfo/stable
^ permalink raw reply
* Re: [PATCH V10 01/15] time: Introduce timekeeping_inject_offset
From: Richard Cochran @ 2011-01-28 7:08 UTC (permalink / raw)
To: John Stultz
Cc: Richard Cochran, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-api-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
Alan Cox, Arnd Bergmann, Christoph Lameter, David Miller,
Krzysztof Halasa, Peter Zijlstra, Rodolfo Giometti,
Thomas Gleixner, Benjamin Herrenschmidt, H. Peter Anvin,
Ingo Molnar, Mike Frysinger, Paul Mackerras, Russell King
In-Reply-To: <1296154099.2855.162.camel@work-vm>
On Thu, Jan 27, 2011 at 10:48:19AM -0800, John Stultz wrote:
> While I appreciate you preserving the path author, and the signoffs are
> right, you really should send the email under your own name.
Guess I've been reading too much spam. It wore off on me.
> The proper style is to keep the mail-header From: the same (ie: Richard
> Cochran <richardcochran-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>), but as the first line of the mail
> body put:
> From: Author Name <author-MBgCfJrXA/zQT0dZR+AlfA@public.gmane.org>
Sorry about this. Arnd already explained this to me, and I did catch
it with V9. This time I forgot.
I would like to get to the bottom of this. Here is what I did:
1. Saved your patch to disk in mbox format using Mutt.
2. git am
3. ... rebase, rebase, rebase, ...
4. git format-patch [options] 1234..abcd
5. Edit cover letter
6. for x in 00*; do mutt -H $x; done
Git format-patch places the "From: John Stultz <john.stultz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>"
line with the other mail headers, and so I guess mutt just faithfully
preserves this.
I don't like having to remember to fix this manually. There must be a
better way...
Sorry,
Richard
^ permalink raw reply
* Re: [PATCH 2/4] net: Make NETCONSOLE_DYNAMIC use select CONFIGFS_FS
From: WANG Cong @ 2011-01-28 8:25 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-fsdevel, netdev
In-Reply-To: <1296155430-3796-3-git-send-email-nab@linux-iscsi.org>
On Thu, 27 Jan 2011 11:10:28 -0800, Nicholas A. Bellinger wrote:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
>
> Convert 'depends && SYSFS && CONFIGFS_FS' to 'select CONFIGFS_FS'
>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Thanks.
^ permalink raw reply
* Re: [RFC PATCH] ipsec: fix IPv4 AH alignment on 32 bits
From: Nicolas Dichtel @ 2011-01-28 8:51 UTC (permalink / raw)
To: Herbert Xu; +Cc: David Miller, netdev, christophe.gouault
In-Reply-To: <20110128045108.GA8351@gondor.apana.org.au>
On 28/01/2011 05:51, Herbert Xu wrote:
> David Miller<davem@davemloft.net> wrote:
>>
>> We cannot just start rejecting the old 8-byte alignment on input if
>> Linux has been using an 8-byte alignment since day one.
>>
>> If you want this change to be considered seriously, you need to relax
>> the AH4 input check.
>
> I second your sentiment. However, in this particular case it
> would appear that our old implementation was also overly strict
> in rejecting 32-bit alignment so even if we relax it now it still
> wouldn't work with an old implementation once we reduce the padding
> on output (unless you traffic was one-way only).
Yes, this was my initial problem.
>
> So perhaps an SA configuration flag is needed?
I agree. If David is ok, I will update the patch.
Regards,
Nicolas
^ permalink raw reply
* RE: [PATCH v2] gianfar: Fall back to software tcp/udp checksum on oldercontrollers
From: David Laight @ 2011-01-28 9:10 UTC (permalink / raw)
Cc: netdev, linuxppc-dev
In-Reply-To: <640295.36173.qm@web37608.mail.mud.yahoo.com>
> + if (unlikely(gfar_has_errata(priv, GFAR_ERRATA_12)
> + && ((unsigned long)fcb % 0x20) > 0x18)) {
You need to check the generated code, but I think you need:
if (unlikely(gfar_has_errata(priv, GFAR_ERRATA_12))
&& unlikely(((unsigned long)fcb % 0x20) > 0x18))
ie unlikely() around both the primitive comparisons.
David
^ permalink raw reply
* Re:
From: Young Chang @ 2011-01-28 20:41 UTC (permalink / raw)
My name is Mr. Young Chang, I work with the Mevas Bank bank here in Hong Kong. I have a business proposal that i will like you to handle with me from my bank worth $19.7m. If interested, kindly get back to me for more details.
^ permalink raw reply
* Re: [PATCH V10 01/15] time: Introduce timekeeping_inject_offset
From: Arnd Bergmann @ 2011-01-28 12:05 UTC (permalink / raw)
To: Richard Cochran
Cc: John Stultz, Richard Cochran, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-api-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
Alan Cox, Christoph Lameter, David Miller, Krzysztof Halasa,
Peter Zijlstra, Rodolfo Giometti, Thomas Gleixner,
Benjamin Herrenschmidt, H. Peter Anvin, Ingo Molnar,
Mike Frysinger, Paul Mackerras, Russell King
In-Reply-To: <20110128070800.GA3225-7KxsofuKt4IfAd9E5cN8NEzG7cXyKsk/@public.gmane.org>
On Friday 28 January 2011, Richard Cochran wrote:
> I would like to get to the bottom of this. Here is what I did:
>
> 1. Saved your patch to disk in mbox format using Mutt.
> 2. git am
> 3. ... rebase, rebase, rebase, ...
> 4. git format-patch [options] 1234..abcd
> 5. Edit cover letter
> 6. for x in 00*; do mutt -H $x; done
>
> Git format-patch places the "From: John Stultz <john.stultz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>"
> line with the other mail headers, and so I guess mutt just faithfully
> preserves this.
>
> I don't like having to remember to fix this manually. There must be a
> better way...
The problem is step 6. The output of git format-patch does not work when
sending with mutt. The easiest solution is to send with git send-email,
which does the same as mutt -H, but gets it right.
Arnd
^ permalink raw reply
* Re: Realtek r8168C / r8169 driver VLAN TAG stripping
From: Francois Romieu @ 2011-01-28 12:06 UTC (permalink / raw)
To: Anand Raj Manickam; +Cc: netdev, Hayes, Ivan Vecera
In-Reply-To: <AANLkTin17BgJDdTbSMork8XEw4PVz_KrVBZvHdQwTRNH@mail.gmail.com>
Added Ivan to the Cc:. He has got a 8168c with XID 1c4000c0 and may tell if
hardware VLAN works for him or not.
Anand Raj Manickam <anandrm@gmail.com> :
> On Thu, Jan 27, 2011 at 10:20 PM, Francois Romieu <romieu@fr.zoreil.com> wrote:
> > Anand Raj Manickam <anandrm@gmail.com> :
> >> On Thu, Jan 27, 2011 at 8:37 PM, Francois Romieu <romieu@fr.zoreil.com> wrote:
> >> > Anand Raj Manickam <anandrm@gmail.com> :
> > [...]
> >> > - ip addr show
> >>
> >> 3: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> >> link/ether 00:17:54:00:f6:62 brd ff:ff:ff:ff:ff:ff
> >> inet 172.16.1.1/16 brd 172.16.255.255 scope global eth0
> >> inet6 fe80::217:54ff:fe00:f662/64 scope link
> >> valid_lft forever preferred_lft forever
> >>
> >> 8: eth0.50@eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> >> link/ether 00:17:54:00:f6:62 brd ff:ff:ff:ff:ff:ff
> >> inet 172.16.10.10/24 brd 172.16.10.255 scope global eth0.50
> >> inet6 fe80::217:54ff:fe00:f662/64 scope link
> >> valid_lft forever preferred_lft forever
> >
> > Could you try again after issuing :
> >
> > ip addr del 172.16.1.1/16 brd 172.16.255.255 dev eth0
>
>
> I did try this NO luck ;-(
>
> > then send the unabbreviated "ip addr show" and "ip route show all" if
> > things do not perform better.
> >
>
> ip addr show
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> inet 127.0.0.1/8 scope host lo
> inet6 ::1/128 scope host
> valid_lft forever preferred_lft forever
> 2: sit0: <NOARP> mtu 1480 qdisc noop
> link/sit 0.0.0.0 brd 0.0.0.0
> 3: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 00:17:54:00:f6:62 brd ff:ff:ff:ff:ff:ff
> inet6 fe80::217:54ff:fe00:f662/64 scope link
> valid_lft forever preferred_lft forever
> 4: eth1: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 00:17:54:00:f6:63 brd ff:ff:ff:ff:ff:ff
> 5: eth2: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 00:30:67:09:2c:b9 brd ff:ff:ff:ff:ff:ff
> inet 10.1.1.2/24 brd 10.1.1.255 scope global eth2
> inet6 fe80::230:67ff:fe09:2cb9/64 scope link
> valid_lft forever preferred_lft forever
> 6: eth3: <BROADCAST,MULTICAST> mtu 1500 qdisc noop qlen 1000
> link/ether 00:17:54:00:65:6b brd ff:ff:ff:ff:ff:ff
> 7: eth4: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 00:17:54:00:65:6a brd ff:ff:ff:ff:ff:ff
> inet 192.168.138.155/24 brd 192.168.138.255 scope global eth4
> inet6 fe80::217:54ff:fe00:656a/64 scope link
> valid_lft forever preferred_lft forever
> 8: eth0.50@eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 00:17:54:00:f6:62 brd ff:ff:ff:ff:ff:ff
> inet 172.16.10.10/24 brd 172.16.10.255 scope global eth0.50
> inet6 fe80::217:54ff:fe00:f662/64 scope link
> valid_lft forever preferred_lft forever
(mostly sequential hardware mac adresses)
Which Arkino product is it ? Quad (+1) port switch / hub ? AK1140 ?
Forget the "ip route show all" for now.
[...]
> >> The same config works on forcedeth
> >
> > What do you call "same config" ?
>
> The Same setup below works on forcedeth driver
So you can remove any single 8168 adapter from eth[0134], replace it with
an external (non-LOM) forcedeth, keep the three remaining 8168s and it
works correctly ?
If your setup includes a card that contains several 8168 chipsets behind
some kind of bridge, it is not exactly the same setup as a single (LOM ?)
forcedeth network adapter.
[...]
> >
> > I am mildly convinced that your config is simple enough to isolate a
> > driver level vlan problem.
>
> The reason why i m sure its on the Driver / Chipset is this ..
[printk removed]
Ok. This is an evidence.
Reading my rev1.0 8168c datasheet from may 2007, when there is no tx
offload, no checksumming, the tx descriptor layout should be the same
as the perennial 8169 tx descriptor layout.
Either (1) the VLAN registers and descriptor layout is different for this
chipset or (2) something prevents the register / descriptor write (read ?)
to be completely effective or (3) there is something beyond the 8168 or
(4) there is a 8168 hardware bug.
1 : Hayes may answer. You can give Realtek's own driver a try btw.
2 : Seen before. It could be a software or a (non-8168) hardware one.
I have no idea if your hardware setup includes a single card with
four ports or four independent cards with their own 8168 or worse.
3 : See the hardware setup part of (2).
4 : I don't hope so. Hayes may answer as well.
--
Ueimor
^ permalink raw reply
* Re: Network performance with small packets
From: Michael S. Tsirkin @ 2011-01-28 12:16 UTC (permalink / raw)
To: Shirley Ma; +Cc: David Miller, steved, kvm, netdev
In-Reply-To: <1296163838.1640.53.camel@localhost.localdomain>
On Thu, Jan 27, 2011 at 01:30:38PM -0800, Shirley Ma wrote:
> On Thu, 2011-01-27 at 13:02 -0800, David Miller wrote:
> > > Interesting. Could this is be a variant of the now famuous
> > bufferbloat then?
> >
> > Sigh, bufferbloat is the new global warming... :-/
>
> Yep, some places become colder, some other places become warmer; Same as
> BW results, sometimes faster, sometimes slower. :)
>
> Shirley
OK, so thinking about it more, maybe the issue is this:
tx becomes full. We process one request and interrupt the guest,
then it adds one request and the queue is full again.
Maybe the following will help it stabilize?
By itself it does nothing, but if you set
all the parameters to a huge value we will
only interrupt when we see an empty ring.
Which might be too much: pls try other values
in the middle: e.g. make bufs half the ring,
or bytes some small value, or packets some
small value etc.
Warning: completely untested.
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index aac05bc..6769cdc 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -32,6 +32,13 @@
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_NET_WEIGHT 0x80000
+int tx_bytes_coalesce = 0;
+module_param(tx_bytes_coalesce, int, 0644);
+int tx_bufs_coalesce = 0;
+module_param(tx_bufs_coalesce, int, 0644);
+int tx_packets_coalesce = 0;
+module_param(tx_packets_coalesce, int, 0644);
+
enum {
VHOST_NET_VQ_RX = 0,
VHOST_NET_VQ_TX = 1,
@@ -127,6 +134,9 @@ static void handle_tx(struct vhost_net *net)
int err, wmem;
size_t hdr_size;
struct socket *sock;
+ int bytes_coalesced = 0;
+ int bufs_coalesced = 0;
+ int packets_coalesced = 0;
/* TODO: check that we are running from vhost_worker? */
sock = rcu_dereference_check(vq->private_data, 1);
@@ -196,14 +206,26 @@ static void handle_tx(struct vhost_net *net)
if (err != len)
pr_debug("Truncated TX packet: "
" len %d != %zd\n", err, len);
- vhost_add_used_and_signal(&net->dev, vq, head, 0);
total_len += len;
+ packets_coalesced += 1;
+ bytes_coalesced += len;
+ bufs_coalesced += in;
+ if (unlikely(packets_coalesced > tx_packets_coalesce ||
+ bytes_coalesced > tx_bytes_coalesce ||
+ bufs_coalesced > tx_bufs_coalesce))
+ vhost_add_used_and_signal(&net->dev, vq, head, 0);
+ else
+ vhost_add_used(vq, head, 0);
if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
vhost_poll_queue(&vq->poll);
break;
}
}
+ if (likely(packets_coalesced > tx_packets_coalesce ||
+ bytes_coalesced > tx_bytes_coalesce ||
+ bufs_coalesced > tx_bufs_coalesce))
+ vhost_signal(&net->dev, vq);
mutex_unlock(&vq->mutex);
}
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox