* [PATCH net-next v6 1/9] forcedeth: fix stats on hardware without extended stats support
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 2/9] net-sysfs: fixed minor sparse warning David Decotigny
` (7 subsequent siblings)
8 siblings, 0 replies; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, david decotigny
From: david decotigny <david.decotigny@google.com>
This change makes sure that tx_packets/rx_bytes ifconfig counters are
updated even on NICs that don't provide hardware support for these
stats: they are now updated in software. For the sake of consistency,
we also now have tx_bytes updated in software (hardware counters
include ethernet CRC, and software doesn't account for it).
This reverts parts of:
- "forcedeth: statistics optimization" (21828163b2)
- "forcedeth: Improve stats counters" (0bdfea8ba8)
- "forcedeth: remove unneeded stats updates" (4687f3f364)
Tested:
pktgen + loopback (http://patchwork.ozlabs.org/patch/124698/)
reports identical tx_packets/rx_packets and tx_bytes/rx_bytes.
Signed-off-by: David Decotigny <david.decotigny@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 898bdf2cb43eb0a962c397eb4dd1aec2c7211be2)
---
drivers/net/ethernet/nvidia/forcedeth.c | 36 +++++++++++++++++++++++-------
1 files changed, 27 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index e8a5ae3..374625b 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -609,7 +609,7 @@ struct nv_ethtool_str {
};
static const struct nv_ethtool_str nv_estats_str[] = {
- { "tx_bytes" },
+ { "tx_bytes" }, /* includes Ethernet FCS CRC */
{ "tx_zero_rexmt" },
{ "tx_one_rexmt" },
{ "tx_many_rexmt" },
@@ -637,7 +637,7 @@ static const struct nv_ethtool_str nv_estats_str[] = {
/* version 2 stats */
{ "tx_deferral" },
{ "tx_packets" },
- { "rx_bytes" },
+ { "rx_bytes" }, /* includes Ethernet FCS CRC */
{ "tx_pause" },
{ "rx_pause" },
{ "rx_drop_frame" },
@@ -649,7 +649,7 @@ static const struct nv_ethtool_str nv_estats_str[] = {
};
struct nv_ethtool_stats {
- u64 tx_bytes;
+ u64 tx_bytes; /* should be ifconfig->tx_bytes + 4*tx_packets */
u64 tx_zero_rexmt;
u64 tx_one_rexmt;
u64 tx_many_rexmt;
@@ -670,14 +670,14 @@ struct nv_ethtool_stats {
u64 rx_unicast;
u64 rx_multicast;
u64 rx_broadcast;
- u64 rx_packets;
+ u64 rx_packets; /* should be ifconfig->rx_packets */
u64 rx_errors_total;
u64 tx_errors_total;
/* version 2 stats */
u64 tx_deferral;
- u64 tx_packets;
- u64 rx_bytes;
+ u64 tx_packets; /* should be ifconfig->tx_packets */
+ u64 rx_bytes; /* should be ifconfig->rx_bytes + 4*rx_packets */
u64 tx_pause;
u64 rx_pause;
u64 rx_drop_frame;
@@ -1706,10 +1706,17 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
if (np->driver_data & (DEV_HAS_STATISTICS_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3)) {
nv_get_hw_stats(dev);
+ /*
+ * Note: because HW stats are not always available and
+ * for consistency reasons, the following ifconfig
+ * stats are managed by software: rx_bytes, tx_bytes,
+ * rx_packets and tx_packets. The related hardware
+ * stats reported by ethtool should be equivalent to
+ * these ifconfig stats, with 4 additional bytes per
+ * packet (Ethernet FCS CRC).
+ */
+
/* copy to net_device stats */
- dev->stats.tx_packets = np->estats.tx_packets;
- dev->stats.rx_bytes = np->estats.rx_bytes;
- dev->stats.tx_bytes = np->estats.tx_bytes;
dev->stats.tx_fifo_errors = np->estats.tx_fifo_errors;
dev->stats.tx_carrier_errors = np->estats.tx_carrier_errors;
dev->stats.rx_crc_errors = np->estats.rx_crc_errors;
@@ -2380,6 +2387,9 @@ static int nv_tx_done(struct net_device *dev, int limit)
if (flags & NV_TX_ERROR) {
if ((flags & NV_TX_RETRYERROR) && !(flags & NV_TX_RETRYCOUNT_MASK))
nv_legacybackoff_reseed(dev);
+ } else {
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
}
dev_kfree_skb_any(np->get_tx_ctx->skb);
np->get_tx_ctx->skb = NULL;
@@ -2390,6 +2400,9 @@ static int nv_tx_done(struct net_device *dev, int limit)
if (flags & NV_TX2_ERROR) {
if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK))
nv_legacybackoff_reseed(dev);
+ } else {
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
}
dev_kfree_skb_any(np->get_tx_ctx->skb);
np->get_tx_ctx->skb = NULL;
@@ -2429,6 +2442,9 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
else
nv_legacybackoff_reseed(dev);
}
+ } else {
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
}
dev_kfree_skb_any(np->get_tx_ctx->skb);
@@ -2678,6 +2694,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
skb->protocol = eth_type_trans(skb, dev);
napi_gro_receive(&np->napi, skb);
dev->stats.rx_packets++;
+ dev->stats.rx_bytes += len;
next_pkt:
if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
np->get_rx.orig = np->first_rx.orig;
@@ -2761,6 +2778,7 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
}
napi_gro_receive(&np->napi, skb);
dev->stats.rx_packets++;
+ dev->stats.rx_bytes += len;
} else {
dev_kfree_skb(skb);
}
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH net-next v6 2/9] net-sysfs: fixed minor sparse warning
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 1/9] forcedeth: fix stats on hardware without extended stats support David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options David Decotigny
` (6 subsequent siblings)
8 siblings, 0 replies; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, David Decotigny
This commit fixes following warning:
net/core/net-sysfs.c:921:6: warning: symbol 'numa_node' shadows an earlier one
include/linux/topology.h:222:1: originally declared here
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
net/core/net-sysfs.c | 12 ++++++------
1 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c71c434..a64382f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -901,7 +901,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
struct xps_map *map, *new_map;
struct xps_dev_maps *dev_maps, *new_dev_maps;
int nonempty = 0;
- int numa_node = -2;
+ int numa_node_id = -2;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -944,10 +944,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
#ifdef CONFIG_NUMA
if (need_set) {
- if (numa_node == -2)
- numa_node = cpu_to_node(cpu);
- else if (numa_node != cpu_to_node(cpu))
- numa_node = -1;
+ if (numa_node_id == -2)
+ numa_node_id = cpu_to_node(cpu);
+ else if (numa_node_id != cpu_to_node(cpu))
+ numa_node_id = -1;
}
#endif
if (need_set && pos >= map_len) {
@@ -997,7 +997,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
if (dev_maps)
kfree_rcu(dev_maps, rcu);
- netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
+ netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
NUMA_NO_NODE);
mutex_unlock(&xps_map_mutex);
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 1/9] forcedeth: fix stats on hardware without extended stats support David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 2/9] net-sysfs: fixed minor sparse warning David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
2011-11-16 23:12 ` Ben Hutchings
2011-11-16 22:15 ` [PATCH net-next v6 4/9] net: new counter for tx_timeout errors in sysfs David Decotigny
` (5 subsequent siblings)
8 siblings, 1 reply; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, David Decotigny
This adds a description of RPS/XPS options and allow them to be
changed at make menuconfig time.
It also fixes following checkpatch syntax warnings:
ERROR: trailing whitespace
+^I $
ERROR: trailing whitespace
+^I$
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
net/Kconfig | 22 +++++++++++++++++-----
1 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/net/Kconfig b/net/Kconfig
index a073148..8e2104e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -10,7 +10,7 @@ menuconfig NET
The reason is that some programs need kernel networking support even
when running on a stand-alone machine that isn't connected to any
other computer.
-
+
If you are upgrading from an older kernel, you
should consider updating your networking tools too because changes
in the kernel and the tools often go hand in hand. The tools are
@@ -217,20 +217,33 @@ source "net/dns_resolver/Kconfig"
source "net/batman-adv/Kconfig"
config RPS
- boolean
+ boolean "Enable Receive Packet Steering"
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y
+ help
+ RPS distributes the load of received packet processing
+ across multiple CPUs. If unsure, say Y.
config RFS_ACCEL
- boolean
+ boolean "Enable Hardware Acceleration of RFS"
depends on RPS && GENERIC_HARDIRQS
select CPU_RMAP
default y
+ help
+ This is the hardware version of RPS. On multi-queue network
+ devices, this configures the hardware to distribute the
+ received packets across multiple CPUs. If unsure, say Y.
config XPS
- boolean
+ boolean "Enable Transmit Packet Steering"
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y
+ help
+ For multiqueue devices, XPS selects a transmit queue during
+ packet transmission based on configuration. This is done by
+ mapping the CPU transmitting the packet to a queue. XPS can
+ reduce transmit network latency on SMP systems. If unsure,
+ say Y.
config HAVE_BPF_JIT
bool
@@ -274,7 +287,6 @@ config NET_TCPPROBE
Documentation on how to use TCP connection probing can be found
at:
-
http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe
To compile this code as a module, choose M here: the
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
2011-11-16 22:15 ` [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options David Decotigny
@ 2011-11-16 23:12 ` Ben Hutchings
2011-11-17 1:54 ` David Decotigny
0 siblings, 1 reply; 22+ messages in thread
From: Ben Hutchings @ 2011-11-16 23:12 UTC (permalink / raw)
To: David Decotigny
Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Eric Dumazet,
Jeff Kirsher, Jiri Pirko, Joe Perches, Szymon Janc, Richard Jones,
Ayaz Abdulla, Tom Herbert
On Wed, 2011-11-16 at 14:15 -0800, David Decotigny wrote:
> This adds a description of RPS/XPS options and allow them to be
> changed at make menuconfig time.
I'm not sure why you think this is necessary.
[...]
> config RPS
> - boolean
> + boolean "Enable Receive Packet Steering"
> depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
> default y
> + help
> + RPS distributes the load of received packet processing
> + across multiple CPUs. If unsure, say Y.
>
> config RFS_ACCEL
> - boolean
> + boolean "Enable Hardware Acceleration of RFS"
> depends on RPS && GENERIC_HARDIRQS
> select CPU_RMAP
> default y
> + help
> + This is the hardware version of RPS. On multi-queue network
> + devices, this configures the hardware to distribute the
> + received packets across multiple CPUs. If unsure, say Y.
[...]
There is some confusion/conflation between RPS and RFS in both code and
documentation.
RPS originaly referred to spreading out RX packet processing based on a
flow hash. Most multiqueue devices do this in hardware, which is
commonly referred to as RSS. RSS can be enabled independently of any
networking core features.
RFS refers to directing RX packet processsing of specific flows based on
where the corresponding sockets have been used. RFS acceleration means
that the driver and hardware help with this by changing hardware queue
selection for specific flows.
The RPS Kconfig option controls both RPS and RFS, and various references
to 'RPS' in the code really cover RFS as well.
The RFS_ACCEL Kconfig option enables RFS to support acceleration, and
like most offload features it has no effect without a driver that
specifically supports that. The option only exists to abstract the
slightly odd dependency on GENERIC_HARDIRQS, and I don't think it should
be manually controllable.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
2011-11-16 23:12 ` Ben Hutchings
@ 2011-11-17 1:54 ` David Decotigny
2011-11-17 2:54 ` Ben Hutchings
0 siblings, 1 reply; 22+ messages in thread
From: David Decotigny @ 2011-11-17 1:54 UTC (permalink / raw)
To: Ben Hutchings
Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Eric Dumazet,
Jeff Kirsher, Jiri Pirko, Joe Perches, Szymon Janc, Richard Jones,
Ayaz Abdulla, Tom Herbert
On Wed, Nov 16, 2011 at 3:12 PM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Wed, 2011-11-16 at 14:15 -0800, David Decotigny wrote:
>> This adds a description of RPS/XPS options and allow them to be
>> changed at make menuconfig time.
>
> I'm not sure why you think this is necessary.
On my copy, make menuconfig doesn't let me change these CONFIG_ items,
unless I add the string after "boolean", which this patch does. I
agree, the help I added after is pure cosmetic.
>> config RFS_ACCEL
[...]
> RFS refers to directing RX packet processsing of specific flows based on
Oops, you are right. I got confused and somehow read "RPS" instead of
"RFS" here. Thank you for the great explanation!
Will update this commit message. I don't know how patchwork handles
the follow-ups (it didn't work yesterday), so I'm afraid I will have
to send yet another batch of updates...
Regards,
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
2011-11-17 1:54 ` David Decotigny
@ 2011-11-17 2:54 ` Ben Hutchings
2011-11-17 2:59 ` David Miller
0 siblings, 1 reply; 22+ messages in thread
From: Ben Hutchings @ 2011-11-17 2:54 UTC (permalink / raw)
To: David Decotigny
Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Eric Dumazet,
Jeff Kirsher, Jiri Pirko, Joe Perches, Szymon Janc, Richard Jones,
Ayaz Abdulla, Tom Herbert
On Wed, 2011-11-16 at 17:54 -0800, David Decotigny wrote:
> On Wed, Nov 16, 2011 at 3:12 PM, Ben Hutchings
> <bhutchings@solarflare.com> wrote:
> > On Wed, 2011-11-16 at 14:15 -0800, David Decotigny wrote:
> >> This adds a description of RPS/XPS options and allow them to be
> >> changed at make menuconfig time.
> >
> > I'm not sure why you think this is necessary.
>
> On my copy, make menuconfig doesn't let me change these CONFIG_ items,
> unless I add the string after "boolean", which this patch does. I
> agree, the help I added after is pure cosmetic.
[...]
I know, but I'm asking why you think it's necessary to make them
optional.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
2011-11-17 2:54 ` Ben Hutchings
@ 2011-11-17 2:59 ` David Miller
2011-11-17 3:19 ` Eric Dumazet
0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2011-11-17 2:59 UTC (permalink / raw)
To: bhutchings
Cc: david.decotigny, netdev, linux-kernel, ian.campbell, eric.dumazet,
jeffrey.t.kirsher, jpirko, joe, szymon, rick.jones2, AAbdulla,
therbert
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 17 Nov 2011 02:54:03 +0000
> On Wed, 2011-11-16 at 17:54 -0800, David Decotigny wrote:
>> On Wed, Nov 16, 2011 at 3:12 PM, Ben Hutchings
>> <bhutchings@solarflare.com> wrote:
>> > On Wed, 2011-11-16 at 14:15 -0800, David Decotigny wrote:
>> >> This adds a description of RPS/XPS options and allow them to be
>> >> changed at make menuconfig time.
>> >
>> > I'm not sure why you think this is necessary.
>>
>> On my copy, make menuconfig doesn't let me change these CONFIG_ items,
>> unless I add the string after "boolean", which this patch does. I
>> agree, the help I added after is pure cosmetic.
> [...]
>
> I know, but I'm asking why you think it's necessary to make them
> optional.
They shouldn't be, they are internal dependency descriptions and should
be invisible to the user.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
2011-11-17 2:59 ` David Miller
@ 2011-11-17 3:19 ` Eric Dumazet
0 siblings, 0 replies; 22+ messages in thread
From: Eric Dumazet @ 2011-11-17 3:19 UTC (permalink / raw)
To: David Miller
Cc: bhutchings, david.decotigny, netdev, linux-kernel, ian.campbell,
jeffrey.t.kirsher, jpirko, joe, szymon, rick.jones2, AAbdulla,
therbert
Le mercredi 16 novembre 2011 à 21:59 -0500, David Miller a écrit :
> They shouldn't be, they are internal dependency descriptions and should
> be invisible to the user.
Unless someone has evidence of a performance cost :)
The rps_lock() adds an atomic operation in netif_rx(), even if RPS is
not really used/configured...
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH net-next v6 4/9] net: new counter for tx_timeout errors in sysfs
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
` (2 preceding siblings ...)
2011-11-16 22:15 ` [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 5/9] forcedeth: Add messages to indicate using MSI or MSI-X David Decotigny
` (4 subsequent siblings)
8 siblings, 0 replies; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, David Decotigny
This adds the /sys/class/net/DEV/queues/Q/tx_timeout attribute
containing the total number of timeout events on the given queue. It
is always available with CONFIG_SYSFS, independently of
CONFIG_RPS/XPS.
Credits to Stephen Hemminger for a preliminary version of this patch.
Tested:
without CONFIG_SYSFS (compilation only)
with sysfs and without CONFIG_RPS & CONFIG_XPS
with sysfs and without CONFIG_RPS
with sysfs and without CONFIG_XPS
with defaults
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
include/linux/netdevice.h | 12 ++++++++++--
net/core/net-sysfs.c | 37 +++++++++++++++++++++++++++++++------
net/sched/sch_generic.c | 1 +
3 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cbeb586..9e6cf09 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -530,7 +530,7 @@ struct netdev_queue {
struct Qdisc *qdisc;
unsigned long state;
struct Qdisc *qdisc_sleeping;
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
struct kobject kobj;
#endif
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
@@ -545,6 +545,12 @@ struct netdev_queue {
* please use this field instead of dev->trans_start
*/
unsigned long trans_start;
+
+ /*
+ * Number of TX timeouts for this queue
+ * (/sys/class/net/DEV/Q/trans_timeout)
+ */
+ unsigned long trans_timeout;
} ____cacheline_aligned_in_smp;
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1184,9 +1190,11 @@ struct net_device {
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
struct kset *queues_kset;
+#endif
+#ifdef CONFIG_RPS
struct netdev_rx_queue *_rx;
/* Number of RX queues allocated at register_netdev() time */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a64382f..602b141 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -780,7 +780,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
#endif
}
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
/*
* netdev_queue sysfs structures and functions.
*/
@@ -826,6 +826,23 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
+static ssize_t show_trans_timeout(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ char *buf)
+{
+ unsigned long trans_timeout;
+
+ spin_lock_irq(&queue->_xmit_lock);
+ trans_timeout = queue->trans_timeout;
+ spin_unlock_irq(&queue->_xmit_lock);
+
+ return sprintf(buf, "%lu", trans_timeout);
+}
+
+static struct netdev_queue_attribute queue_trans_timeout =
+ __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+
+#ifdef CONFIG_XPS
static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
@@ -1020,12 +1037,17 @@ error:
static struct netdev_queue_attribute xps_cpus_attribute =
__ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+#endif /* CONFIG_XPS */
static struct attribute *netdev_queue_default_attrs[] = {
+ &queue_trans_timeout.attr,
+#ifdef CONFIG_XPS
&xps_cpus_attribute.attr,
+#endif
NULL
};
+#ifdef CONFIG_XPS
static void netdev_queue_release(struct kobject *kobj)
{
struct netdev_queue *queue = to_netdev_queue(kobj);
@@ -1076,10 +1098,13 @@ static void netdev_queue_release(struct kobject *kobj)
memset(kobj, 0, sizeof(*kobj));
dev_put(queue->dev);
}
+#endif /* CONFIG_XPS */
static struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
+#ifdef CONFIG_XPS
.release = netdev_queue_release,
+#endif
.default_attrs = netdev_queue_default_attrs,
};
@@ -1102,12 +1127,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
return error;
}
-#endif /* CONFIG_XPS */
+#endif /* CONFIG_SYSFS */
int
netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
int i;
int error = 0;
@@ -1125,14 +1150,14 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
return error;
#else
return 0;
-#endif
+#endif /* CONFIG_SYSFS */
}
static int register_queue_kobjects(struct net_device *net)
{
int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
net->queues_kset = kset_create_and_add("queues",
NULL, &net->dev.kobj);
if (!net->queues_kset)
@@ -1173,7 +1198,7 @@ static void remove_queue_kobjects(struct net_device *net)
net_rx_queue_update_kobjects(net, real_rx, 0);
netdev_queue_update_kobjects(net, real_tx, 0);
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
kset_unregister(net->queues_kset);
#endif
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69fca27..79ac145 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -246,6 +246,7 @@ static void dev_watchdog(unsigned long arg)
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
some_queue_timedout = 1;
+ txq->trans_timeout++;
break;
}
}
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH net-next v6 5/9] forcedeth: Add messages to indicate using MSI or MSI-X
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
` (3 preceding siblings ...)
2011-11-16 22:15 ` [PATCH net-next v6 4/9] net: new counter for tx_timeout errors in sysfs David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 6/9] forcedeth: allow to silence "TX timeout" debug messages David Decotigny
` (3 subsequent siblings)
8 siblings, 0 replies; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, Mike Ditto, David Decotigny
From: Mike Ditto <mditto@google.com>
This adds a few kernel messages to indicate whether PCIe interrupts
are signaled with MSI or MSI-X.
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
drivers/net/ethernet/nvidia/forcedeth.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 374625b..fe17e42 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -3810,6 +3810,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
writel(0, base + NvRegMSIXMap0);
writel(0, base + NvRegMSIXMap1);
}
+ netdev_info(dev, "MSI-X enabled\n");
}
}
if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) {
@@ -3831,6 +3832,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
writel(0, base + NvRegMSIMap1);
/* enable msi vector 0 */
writel(NVREG_MSI_VECTOR_0_ENABLED, base + NvRegMSIIrqMask);
+ netdev_info(dev, "MSI enabled\n");
}
}
if (ret != 0) {
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH net-next v6 6/9] forcedeth: allow to silence "TX timeout" debug messages
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
` (4 preceding siblings ...)
2011-11-16 22:15 ` [PATCH net-next v6 5/9] forcedeth: Add messages to indicate using MSI or MSI-X David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 7/9] forcedeth: implement ndo_get_stats64() API David Decotigny
` (2 subsequent siblings)
8 siblings, 0 replies; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, Sameer Nanda, David Decotigny
From: Sameer Nanda <snanda@google.com>
This adds a new module parameter "debug_tx_timeout" to silence most
debug messages in case of TX timeout. These messages don't provide a
signal/noise ratio high enough for production systems and, with ~30kB
logged each time, they tend to add to a cascade effect if the system
is already under stress (memory pressure, disk, etc.).
By default, the parameter is clear, meaning that only a single warning
will be reported.
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
drivers/net/ethernet/nvidia/forcedeth.c | 98 ++++++++++++++++++-------------
1 files changed, 57 insertions(+), 41 deletions(-)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index fe17e42..9b917ff 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -892,6 +892,11 @@ enum {
static int dma_64bit = NV_DMA_64BIT_ENABLED;
/*
+ * Debug output control for tx_timeout
+ */
+static bool debug_tx_timeout = false;
+
+/*
* Crossover Detection
* Realtek 8201 phy + some OEM boards do not work properly.
*/
@@ -2477,56 +2482,64 @@ static void nv_tx_timeout(struct net_device *dev)
u32 status;
union ring_type put_tx;
int saved_tx_limit;
- int i;
if (np->msi_flags & NV_MSI_X_ENABLED)
status = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK;
else
status = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK;
- netdev_info(dev, "Got tx_timeout. irq: %08x\n", status);
+ netdev_warn(dev, "Got tx_timeout. irq status: %08x\n", status);
- netdev_info(dev, "Ring at %lx\n", (unsigned long)np->ring_addr);
- netdev_info(dev, "Dumping tx registers\n");
- for (i = 0; i <= np->register_size; i += 32) {
- netdev_info(dev,
- "%3x: %08x %08x %08x %08x %08x %08x %08x %08x\n",
- i,
- readl(base + i + 0), readl(base + i + 4),
- readl(base + i + 8), readl(base + i + 12),
- readl(base + i + 16), readl(base + i + 20),
- readl(base + i + 24), readl(base + i + 28));
- }
- netdev_info(dev, "Dumping tx ring\n");
- for (i = 0; i < np->tx_ring_size; i += 4) {
- if (!nv_optimized(np)) {
- netdev_info(dev,
- "%03x: %08x %08x // %08x %08x // %08x %08x // %08x %08x\n",
- i,
- le32_to_cpu(np->tx_ring.orig[i].buf),
- le32_to_cpu(np->tx_ring.orig[i].flaglen),
- le32_to_cpu(np->tx_ring.orig[i+1].buf),
- le32_to_cpu(np->tx_ring.orig[i+1].flaglen),
- le32_to_cpu(np->tx_ring.orig[i+2].buf),
- le32_to_cpu(np->tx_ring.orig[i+2].flaglen),
- le32_to_cpu(np->tx_ring.orig[i+3].buf),
- le32_to_cpu(np->tx_ring.orig[i+3].flaglen));
- } else {
+ if (unlikely(debug_tx_timeout)) {
+ int i;
+
+ netdev_info(dev, "Ring at %lx\n", (unsigned long)np->ring_addr);
+ netdev_info(dev, "Dumping tx registers\n");
+ for (i = 0; i <= np->register_size; i += 32) {
netdev_info(dev,
- "%03x: %08x %08x %08x // %08x %08x %08x // %08x %08x %08x // %08x %08x %08x\n",
+ "%3x: %08x %08x %08x %08x "
+ "%08x %08x %08x %08x\n",
i,
- le32_to_cpu(np->tx_ring.ex[i].bufhigh),
- le32_to_cpu(np->tx_ring.ex[i].buflow),
- le32_to_cpu(np->tx_ring.ex[i].flaglen),
- le32_to_cpu(np->tx_ring.ex[i+1].bufhigh),
- le32_to_cpu(np->tx_ring.ex[i+1].buflow),
- le32_to_cpu(np->tx_ring.ex[i+1].flaglen),
- le32_to_cpu(np->tx_ring.ex[i+2].bufhigh),
- le32_to_cpu(np->tx_ring.ex[i+2].buflow),
- le32_to_cpu(np->tx_ring.ex[i+2].flaglen),
- le32_to_cpu(np->tx_ring.ex[i+3].bufhigh),
- le32_to_cpu(np->tx_ring.ex[i+3].buflow),
- le32_to_cpu(np->tx_ring.ex[i+3].flaglen));
+ readl(base + i + 0), readl(base + i + 4),
+ readl(base + i + 8), readl(base + i + 12),
+ readl(base + i + 16), readl(base + i + 20),
+ readl(base + i + 24), readl(base + i + 28));
+ }
+ netdev_info(dev, "Dumping tx ring\n");
+ for (i = 0; i < np->tx_ring_size; i += 4) {
+ if (!nv_optimized(np)) {
+ netdev_info(dev,
+ "%03x: %08x %08x // %08x %08x "
+ "// %08x %08x // %08x %08x\n",
+ i,
+ le32_to_cpu(np->tx_ring.orig[i].buf),
+ le32_to_cpu(np->tx_ring.orig[i].flaglen),
+ le32_to_cpu(np->tx_ring.orig[i+1].buf),
+ le32_to_cpu(np->tx_ring.orig[i+1].flaglen),
+ le32_to_cpu(np->tx_ring.orig[i+2].buf),
+ le32_to_cpu(np->tx_ring.orig[i+2].flaglen),
+ le32_to_cpu(np->tx_ring.orig[i+3].buf),
+ le32_to_cpu(np->tx_ring.orig[i+3].flaglen));
+ } else {
+ netdev_info(dev,
+ "%03x: %08x %08x %08x "
+ "// %08x %08x %08x "
+ "// %08x %08x %08x "
+ "// %08x %08x %08x\n",
+ i,
+ le32_to_cpu(np->tx_ring.ex[i].bufhigh),
+ le32_to_cpu(np->tx_ring.ex[i].buflow),
+ le32_to_cpu(np->tx_ring.ex[i].flaglen),
+ le32_to_cpu(np->tx_ring.ex[i+1].bufhigh),
+ le32_to_cpu(np->tx_ring.ex[i+1].buflow),
+ le32_to_cpu(np->tx_ring.ex[i+1].flaglen),
+ le32_to_cpu(np->tx_ring.ex[i+2].bufhigh),
+ le32_to_cpu(np->tx_ring.ex[i+2].buflow),
+ le32_to_cpu(np->tx_ring.ex[i+2].flaglen),
+ le32_to_cpu(np->tx_ring.ex[i+3].bufhigh),
+ le32_to_cpu(np->tx_ring.ex[i+3].buflow),
+ le32_to_cpu(np->tx_ring.ex[i+3].flaglen));
+ }
}
}
@@ -6156,6 +6169,9 @@ module_param(phy_cross, int, 0);
MODULE_PARM_DESC(phy_cross, "Phy crossover detection for Realtek 8201 phy is enabled by setting to 1 and disabled by setting to 0.");
module_param(phy_power_down, int, 0);
MODULE_PARM_DESC(phy_power_down, "Power down phy and disable link when interface is down (1), or leave phy powered up (0).");
+module_param(debug_tx_timeout, bool, 0);
+MODULE_PARM_DESC(debug_tx_timeout,
+ "Dump tx related registers and ring when tx_timeout happens");
MODULE_AUTHOR("Manfred Spraul <manfred@colorfullife.com>");
MODULE_DESCRIPTION("Reverse Engineered nForce ethernet driver");
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH net-next v6 7/9] forcedeth: implement ndo_get_stats64() API
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
` (5 preceding siblings ...)
2011-11-16 22:15 ` [PATCH net-next v6 6/9] forcedeth: allow to silence "TX timeout" debug messages David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
2011-11-17 6:34 ` Eric Dumazet
2011-11-16 22:15 ` [PATCH net-next v6 8/9] forcedeth: account for dropped RX frames David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 9/9] forcedeth: stats updated with a deferrable timer David Decotigny
8 siblings, 1 reply; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, David Decotigny
This commit implements the ndo_get_stats64() API for forcedeth. Since
hardware stats are being updated from different contexts (process and
timer), this commit adds synchronization. For software stats, it
relies on the u64_stats_sync.h API.
Tested:
- 16-way SMP x86_64 ->
RX bytes:7244556582 (7.2 GB) TX bytes:181904254 (181.9 MB)
- pktgen + loopback: identical rx_bytes/tx_bytes and rx_packets/tx_packets
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
drivers/net/ethernet/nvidia/forcedeth.c | 197 +++++++++++++++++++++++--------
1 files changed, 146 insertions(+), 51 deletions(-)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 9b917ff..427c9c1 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -65,7 +65,8 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/prefetch.h>
-#include <linux/io.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/io.h>
#include <asm/irq.h>
#include <asm/system.h>
@@ -736,6 +737,16 @@ struct nv_skb_map {
* - tx setup is lockless: it relies on netif_tx_lock. Actual submission
* needs netdev_priv(dev)->lock :-(
* - set_multicast_list: preparation lockless, relies on netif_tx_lock.
+ *
+ * Hardware stats updates are protected by hwstats_lock:
+ * - updated by nv_do_stats_poll (timer). This is meant to avoid
+ * integer wraparound in the NIC stats registers, at low frequency
+ * (0.1 Hz)
+ * - updated by nv_get_ethtool_stats + nv_get_stats64
+ *
+ * Software stats are accessed only through 64b synchronization points
+ * and are not subject to other synchronization techniques (single
+ * update thread on the TX or RX paths).
*/
/* in dev: base, irq */
@@ -745,9 +756,10 @@ struct fe_priv {
struct net_device *dev;
struct napi_struct napi;
- /* General data:
- * Locking: spin_lock(&np->lock); */
+ /* hardware stats are updated in syscall and timer */
+ spinlock_t hwstats_lock;
struct nv_ethtool_stats estats;
+
int in_shutdown;
u32 linkspeed;
int duplex;
@@ -798,6 +810,12 @@ struct fe_priv {
u32 nic_poll_irq;
int rx_ring_size;
+ /* RX software stats */
+ struct u64_stats_sync swstats_rx_syncp;
+ u64 stat_rx_packets;
+ u64 stat_rx_bytes; /* not always available in HW */
+ u64 stat_rx_missed_errors;
+
/* media detection workaround.
* Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
*/
@@ -820,6 +838,12 @@ struct fe_priv {
struct nv_skb_map *tx_end_flip;
int tx_stop;
+ /* TX software stats */
+ struct u64_stats_sync swstats_tx_syncp;
+ u64 stat_tx_packets; /* not always available in HW */
+ u64 stat_tx_bytes;
+ u64 stat_tx_dropped;
+
/* msi/msi-x fields */
u32 msi_flags;
struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS];
@@ -1635,11 +1659,19 @@ static void nv_mac_reset(struct net_device *dev)
pci_push(base);
}
-static void nv_get_hw_stats(struct net_device *dev)
+/* Caller must appropriately lock netdev_priv(dev)->hwstats_lock */
+static void nv_update_stats(struct net_device *dev)
{
struct fe_priv *np = netdev_priv(dev);
u8 __iomem *base = get_hwbase(dev);
+ /* If it happens that this is run in top-half context, then
+ * replace the spin_lock of hwstats_lock with
+ * spin_lock_irqsave() in calling functions. */
+ WARN_ONCE(in_irq(), "forcedeth: estats spin_lock(_bh) from top-half");
+ assert_spin_locked(&np->hwstats_lock);
+
+ /* query hardware */
np->estats.tx_bytes += readl(base + NvRegTxCnt);
np->estats.tx_zero_rexmt += readl(base + NvRegTxZeroReXmt);
np->estats.tx_one_rexmt += readl(base + NvRegTxOneReXmt);
@@ -1698,40 +1730,72 @@ static void nv_get_hw_stats(struct net_device *dev)
}
/*
- * nv_get_stats: dev->get_stats function
+ * nv_get_stats64: dev->ndo_get_stats64 function
* Get latest stats value from the nic.
* Called with read_lock(&dev_base_lock) held for read -
* only synchronized against unregister_netdevice.
*/
-static struct net_device_stats *nv_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64*
+nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
+ __acquires(&netdev_priv(dev)->hwstats_lock)
+ __releases(&netdev_priv(dev)->hwstats_lock)
{
struct fe_priv *np = netdev_priv(dev);
+ unsigned int syncp_start;
+
+ /*
+ * Note: because HW stats are not always available and for
+ * consistency reasons, the following ifconfig stats are
+ * managed by software: rx_bytes, tx_bytes, rx_packets and
+ * tx_packets. The related hardware stats reported by ethtool
+ * should be equivalent to these ifconfig stats, with 4
+ * additional bytes per packet (Ethernet FCS CRC), except for
+ * tx_packets when TSO kicks in.
+ */
+
+ /* software stats */
+ do {
+ syncp_start = u64_stats_fetch_begin(&np->swstats_rx_syncp);
+ storage->rx_packets = np->stat_rx_packets;
+ storage->rx_bytes = np->stat_rx_bytes;
+ storage->rx_missed_errors = np->stat_rx_missed_errors;
+ } while (u64_stats_fetch_retry(&np->swstats_rx_syncp, syncp_start));
+
+ do {
+ syncp_start = u64_stats_fetch_begin(&np->swstats_tx_syncp);
+ storage->tx_packets = np->stat_tx_packets;
+ storage->tx_bytes = np->stat_tx_bytes;
+ storage->tx_dropped = np->stat_tx_dropped;
+ } while (u64_stats_fetch_retry(&np->swstats_tx_syncp, syncp_start));
/* If the nic supports hw counters then retrieve latest values */
- if (np->driver_data & (DEV_HAS_STATISTICS_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3)) {
- nv_get_hw_stats(dev);
+ if (np->driver_data & DEV_HAS_STATISTICS_V123) {
+ spin_lock_bh(&np->hwstats_lock);
- /*
- * Note: because HW stats are not always available and
- * for consistency reasons, the following ifconfig
- * stats are managed by software: rx_bytes, tx_bytes,
- * rx_packets and tx_packets. The related hardware
- * stats reported by ethtool should be equivalent to
- * these ifconfig stats, with 4 additional bytes per
- * packet (Ethernet FCS CRC).
- */
+ nv_update_stats(dev);
+
+ /* generic stats */
+ storage->rx_errors = np->estats.rx_errors_total;
+ storage->tx_errors = np->estats.tx_errors_total;
+
+ /* meaningful only when NIC supports stats v3 */
+ storage->multicast = np->estats.rx_multicast;
+
+ /* detailed rx_errors */
+ storage->rx_length_errors = np->estats.rx_length_error;
+ storage->rx_over_errors = np->estats.rx_over_errors;
+ storage->rx_crc_errors = np->estats.rx_crc_errors;
+ storage->rx_frame_errors = np->estats.rx_frame_align_error;
+ storage->rx_fifo_errors = np->estats.rx_drop_frame;
- /* copy to net_device stats */
- dev->stats.tx_fifo_errors = np->estats.tx_fifo_errors;
- dev->stats.tx_carrier_errors = np->estats.tx_carrier_errors;
- dev->stats.rx_crc_errors = np->estats.rx_crc_errors;
- dev->stats.rx_over_errors = np->estats.rx_over_errors;
- dev->stats.rx_fifo_errors = np->estats.rx_drop_frame;
- dev->stats.rx_errors = np->estats.rx_errors_total;
- dev->stats.tx_errors = np->estats.tx_errors_total;
+ /* detailed tx_errors */
+ storage->tx_carrier_errors = np->estats.tx_carrier_errors;
+ storage->tx_fifo_errors = np->estats.tx_fifo_errors;
+
+ spin_unlock_bh(&np->hwstats_lock);
}
- return &dev->stats;
+ return storage;
}
/*
@@ -1932,8 +1996,11 @@ static void nv_drain_tx(struct net_device *dev)
np->tx_ring.ex[i].bufhigh = 0;
np->tx_ring.ex[i].buflow = 0;
}
- if (nv_release_txskb(np, &np->tx_skb[i]))
- dev->stats.tx_dropped++;
+ if (nv_release_txskb(np, &np->tx_skb[i])) {
+ u64_stats_update_begin(&np->swstats_tx_syncp);
+ np->stat_tx_dropped++;
+ u64_stats_update_end(&np->swstats_tx_syncp);
+ }
np->tx_skb[i].dma = 0;
np->tx_skb[i].dma_len = 0;
np->tx_skb[i].dma_single = 0;
@@ -2390,11 +2457,14 @@ static int nv_tx_done(struct net_device *dev, int limit)
if (np->desc_ver == DESC_VER_1) {
if (flags & NV_TX_LASTPACKET) {
if (flags & NV_TX_ERROR) {
- if ((flags & NV_TX_RETRYERROR) && !(flags & NV_TX_RETRYCOUNT_MASK))
+ if ((flags & NV_TX_RETRYERROR)
+ && !(flags & NV_TX_RETRYCOUNT_MASK))
nv_legacybackoff_reseed(dev);
} else {
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
+ u64_stats_update_begin(&np->swstats_tx_syncp);
+ np->stat_tx_packets++;
+ np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+ u64_stats_update_end(&np->swstats_tx_syncp);
}
dev_kfree_skb_any(np->get_tx_ctx->skb);
np->get_tx_ctx->skb = NULL;
@@ -2403,11 +2473,14 @@ static int nv_tx_done(struct net_device *dev, int limit)
} else {
if (flags & NV_TX2_LASTPACKET) {
if (flags & NV_TX2_ERROR) {
- if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK))
+ if ((flags & NV_TX2_RETRYERROR)
+ && !(flags & NV_TX2_RETRYCOUNT_MASK))
nv_legacybackoff_reseed(dev);
} else {
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
+ u64_stats_update_begin(&np->swstats_tx_syncp);
+ np->stat_tx_packets++;
+ np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+ u64_stats_update_end(&np->swstats_tx_syncp);
}
dev_kfree_skb_any(np->get_tx_ctx->skb);
np->get_tx_ctx->skb = NULL;
@@ -2441,15 +2514,18 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
if (flags & NV_TX2_LASTPACKET) {
if (flags & NV_TX2_ERROR) {
- if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK)) {
+ if ((flags & NV_TX2_RETRYERROR)
+ && !(flags & NV_TX2_RETRYCOUNT_MASK)) {
if (np->driver_data & DEV_HAS_GEAR_MODE)
nv_gear_backoff_reseed(dev);
else
nv_legacybackoff_reseed(dev);
}
} else {
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
+ u64_stats_update_begin(&np->swstats_tx_syncp);
+ np->stat_tx_packets++;
+ np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+ u64_stats_update_end(&np->swstats_tx_syncp);
}
dev_kfree_skb_any(np->get_tx_ctx->skb);
@@ -2662,8 +2738,11 @@ static int nv_rx_process(struct net_device *dev, int limit)
}
/* the rest are hard errors */
else {
- if (flags & NV_RX_MISSEDFRAME)
- dev->stats.rx_missed_errors++;
+ if (flags & NV_RX_MISSEDFRAME) {
+ u64_stats_update_begin(&np->swstats_rx_syncp);
+ np->stat_rx_missed_errors++;
+ u64_stats_update_end(&np->swstats_rx_syncp);
+ }
dev_kfree_skb(skb);
goto next_pkt;
}
@@ -2706,8 +2785,10 @@ static int nv_rx_process(struct net_device *dev, int limit)
skb_put(skb, len);
skb->protocol = eth_type_trans(skb, dev);
napi_gro_receive(&np->napi, skb);
- dev->stats.rx_packets++;
- dev->stats.rx_bytes += len;
+ u64_stats_update_begin(&np->swstats_rx_syncp);
+ np->stat_rx_packets++;
+ np->stat_rx_bytes += len;
+ u64_stats_update_end(&np->swstats_rx_syncp);
next_pkt:
if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
np->get_rx.orig = np->first_rx.orig;
@@ -2790,8 +2871,10 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
__vlan_hwaccel_put_tag(skb, vid);
}
napi_gro_receive(&np->napi, skb);
- dev->stats.rx_packets++;
- dev->stats.rx_bytes += len;
+ u64_stats_update_begin(&np->swstats_rx_syncp);
+ np->stat_rx_packets++;
+ np->stat_rx_bytes += len;
+ u64_stats_update_end(&np->swstats_rx_syncp);
} else {
dev_kfree_skb(skb);
}
@@ -4000,11 +4083,18 @@ static void nv_poll_controller(struct net_device *dev)
#endif
static void nv_do_stats_poll(unsigned long data)
+ __acquires(&netdev_priv(dev)->hwstats_lock)
+ __releases(&netdev_priv(dev)->hwstats_lock)
{
struct net_device *dev = (struct net_device *) data;
struct fe_priv *np = netdev_priv(dev);
- nv_get_hw_stats(dev);
+ /* If lock is currently taken, the stats are being refreshed
+ * and hence fresh enough */
+ if (spin_trylock(&np->hwstats_lock)) {
+ nv_update_stats(dev);
+ spin_unlock(&np->hwstats_lock);
+ }
if (!np->in_shutdown)
mod_timer(&np->stats_poll,
@@ -4711,14 +4801,18 @@ static int nv_get_sset_count(struct net_device *dev, int sset)
}
}
-static void nv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *estats, u64 *buffer)
+static void nv_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *estats, u64 *buffer)
+ __acquires(&netdev_priv(dev)->hwstats_lock)
+ __releases(&netdev_priv(dev)->hwstats_lock)
{
struct fe_priv *np = netdev_priv(dev);
- /* update stats */
- nv_get_hw_stats(dev);
-
- memcpy(buffer, &np->estats, nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(u64));
+ spin_lock_bh(&np->hwstats_lock);
+ nv_update_stats(dev);
+ memcpy(buffer, &np->estats,
+ nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(u64));
+ spin_unlock_bh(&np->hwstats_lock);
}
static int nv_link_test(struct net_device *dev)
@@ -5362,7 +5456,7 @@ static int nv_close(struct net_device *dev)
static const struct net_device_ops nv_netdev_ops = {
.ndo_open = nv_open,
.ndo_stop = nv_close,
- .ndo_get_stats = nv_get_stats,
+ .ndo_get_stats64 = nv_get_stats64,
.ndo_start_xmit = nv_start_xmit,
.ndo_tx_timeout = nv_tx_timeout,
.ndo_change_mtu = nv_change_mtu,
@@ -5379,7 +5473,7 @@ static const struct net_device_ops nv_netdev_ops = {
static const struct net_device_ops nv_netdev_ops_optimized = {
.ndo_open = nv_open,
.ndo_stop = nv_close,
- .ndo_get_stats = nv_get_stats,
+ .ndo_get_stats64 = nv_get_stats64,
.ndo_start_xmit = nv_start_xmit_optimized,
.ndo_tx_timeout = nv_tx_timeout,
.ndo_change_mtu = nv_change_mtu,
@@ -5418,6 +5512,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
np->dev = dev;
np->pci_dev = pci_dev;
spin_lock_init(&np->lock);
+ spin_lock_init(&np->hwstats_lock);
SET_NETDEV_DEV(dev, &pci_dev->dev);
init_timer(&np->oom_kick);
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH net-next v6 7/9] forcedeth: implement ndo_get_stats64() API
2011-11-16 22:15 ` [PATCH net-next v6 7/9] forcedeth: implement ndo_get_stats64() API David Decotigny
@ 2011-11-17 6:34 ` Eric Dumazet
2011-11-17 17:39 ` David Decotigny
0 siblings, 1 reply; 22+ messages in thread
From: Eric Dumazet @ 2011-11-17 6:34 UTC (permalink / raw)
To: David Decotigny
Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla
Le mercredi 16 novembre 2011 à 14:15 -0800, David Decotigny a écrit :
> This commit implements the ndo_get_stats64() API for forcedeth. Since
> hardware stats are being updated from different contexts (process and
> timer), this commit adds synchronization. For software stats, it
> relies on the u64_stats_sync.h API.
>
> Tested:
> - 16-way SMP x86_64 ->
> RX bytes:7244556582 (7.2 GB) TX bytes:181904254 (181.9 MB)
> - pktgen + loopback: identical rx_bytes/tx_bytes and rx_packets/tx_packets
>
>
>
> Signed-off-by: David Decotigny <david.decotigny@google.com>
> ---
> drivers/net/ethernet/nvidia/forcedeth.c | 197 +++++++++++++++++++++++--------
> 1 files changed, 146 insertions(+), 51 deletions(-)
>
> +static struct rtnl_link_stats64*
> +nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
> + __acquires(&netdev_priv(dev)->hwstats_lock)
> + __releases(&netdev_priv(dev)->hwstats_lock)
> {
> struct fe_priv *np = netdev_priv(dev);
> + unsigned int syncp_start;
> +
> + /*
> + * Note: because HW stats are not always available and for
> + * consistency reasons, the following ifconfig stats are
> + * managed by software: rx_bytes, tx_bytes, rx_packets and
> + * tx_packets. The related hardware stats reported by ethtool
> + * should be equivalent to these ifconfig stats, with 4
> + * additional bytes per packet (Ethernet FCS CRC), except for
> + * tx_packets when TSO kicks in.
> + */
> +
> + /* software stats */
> + do {
> + syncp_start = u64_stats_fetch_begin(&np->swstats_rx_syncp);
> + storage->rx_packets = np->stat_rx_packets;
> + storage->rx_bytes = np->stat_rx_bytes;
> + storage->rx_missed_errors = np->stat_rx_missed_errors;
> + } while (u64_stats_fetch_retry(&np->swstats_rx_syncp, syncp_start));
> +
> + do {
> + syncp_start = u64_stats_fetch_begin(&np->swstats_tx_syncp);
> + storage->tx_packets = np->stat_tx_packets;
> + storage->tx_bytes = np->stat_tx_bytes;
> + storage->tx_dropped = np->stat_tx_dropped;
> + } while (u64_stats_fetch_retry(&np->swstats_tx_syncp, syncp_start));
>
I have no idea why you think u64_stats_fetch_begin() is safe on 32bit
arch here.
Hint : On CONFIG_SMP=n build, only preemption is disabled in
u64_stats_fetch_begin()
So an interrupt could come and change your counters while you were
reading them.
Its very unlikely, but its possible.
You should use the _bh variants.
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH net-next v6 7/9] forcedeth: implement ndo_get_stats64() API
2011-11-17 6:34 ` Eric Dumazet
@ 2011-11-17 17:39 ` David Decotigny
2011-11-17 17:47 ` Eric Dumazet
0 siblings, 1 reply; 22+ messages in thread
From: David Decotigny @ 2011-11-17 17:39 UTC (permalink / raw)
To: Eric Dumazet
Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla
On Wed, Nov 16, 2011 at 10:34 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> You should use the _bh variants.
Oooops. It's unforgivable: the sky2 implementation you mentioned in a
previous message was the simple and right model to follow. Grrrrrr.
Sending a fix shortly.
Thanks!
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH net-next v6 7/9] forcedeth: implement ndo_get_stats64() API
2011-11-17 17:39 ` David Decotigny
@ 2011-11-17 17:47 ` Eric Dumazet
0 siblings, 0 replies; 22+ messages in thread
From: Eric Dumazet @ 2011-11-17 17:47 UTC (permalink / raw)
To: David Decotigny
Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla
Le jeudi 17 novembre 2011 à 09:39 -0800, David Decotigny a écrit :
> On Wed, Nov 16, 2011 at 10:34 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > You should use the _bh variants.
>
> Oooops. It's unforgivable: the sky2 implementation you mentioned in a
> previous message was the simple and right model to follow. Grrrrrr.
> Sending a fix shortly.
>
Dont worry, we all did the exact same mistakes :)
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH net-next v6 8/9] forcedeth: account for dropped RX frames
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
` (6 preceding siblings ...)
2011-11-16 22:15 ` [PATCH net-next v6 7/9] forcedeth: implement ndo_get_stats64() API David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
2011-11-16 22:15 ` [PATCH net-next v6 9/9] forcedeth: stats updated with a deferrable timer David Decotigny
8 siblings, 0 replies; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, David Decotigny
This adds code to update the stats counter for dropped RX frames.
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
drivers/net/ethernet/nvidia/forcedeth.c | 14 ++++++++++++--
1 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 427c9c1..9bf993d 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -815,6 +815,7 @@ struct fe_priv {
u64 stat_rx_packets;
u64 stat_rx_bytes; /* not always available in HW */
u64 stat_rx_missed_errors;
+ u64 stat_rx_dropped;
/* media detection workaround.
* Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
@@ -1758,6 +1759,7 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
syncp_start = u64_stats_fetch_begin(&np->swstats_rx_syncp);
storage->rx_packets = np->stat_rx_packets;
storage->rx_bytes = np->stat_rx_bytes;
+ storage->rx_dropped = np->stat_rx_dropped;
storage->rx_missed_errors = np->stat_rx_missed_errors;
} while (u64_stats_fetch_retry(&np->swstats_rx_syncp, syncp_start));
@@ -1828,8 +1830,12 @@ static int nv_alloc_rx(struct net_device *dev)
np->put_rx.orig = np->first_rx.orig;
if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
np->put_rx_ctx = np->first_rx_ctx;
- } else
+ } else {
+ u64_stats_update_begin(&np->swstats_rx_syncp);
+ np->stat_rx_dropped++;
+ u64_stats_update_end(&np->swstats_rx_syncp);
return 1;
+ }
}
return 0;
}
@@ -1860,8 +1866,12 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
np->put_rx.ex = np->first_rx.ex;
if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
np->put_rx_ctx = np->first_rx_ctx;
- } else
+ } else {
+ u64_stats_update_begin(&np->swstats_rx_syncp);
+ np->stat_rx_dropped++;
+ u64_stats_update_end(&np->swstats_rx_syncp);
return 1;
+ }
}
return 0;
}
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH net-next v6 9/9] forcedeth: stats updated with a deferrable timer
2011-11-16 22:15 [PATCH net-next v6 0/9] net-sysfs+forcedeth: stats & debug enhancements David Decotigny
` (7 preceding siblings ...)
2011-11-16 22:15 ` [PATCH net-next v6 8/9] forcedeth: account for dropped RX frames David Decotigny
@ 2011-11-16 22:15 ` David Decotigny
8 siblings, 0 replies; 22+ messages in thread
From: David Decotigny @ 2011-11-16 22:15 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, David Decotigny
Mark stats timer as deferrable: punctuality in waking the stats timer
callback doesn't matter much, as it is responsible only to avoid
integer wraparound.
We need at least 1 other timer to fire within 17s (fully loaded 1Gbps)
to avoid wrap-arounds. Desired period is still 10s.
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
drivers/net/ethernet/nvidia/forcedeth.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 9bf993d..630174b 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -5531,7 +5531,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
init_timer(&np->nic_poll);
np->nic_poll.data = (unsigned long) dev;
np->nic_poll.function = nv_do_nic_poll; /* timer handler */
- init_timer(&np->stats_poll);
+ init_timer_deferrable(&np->stats_poll);
np->stats_poll.data = (unsigned long) dev;
np->stats_poll.function = nv_do_stats_poll; /* timer handler */
--
1.7.3.1
^ permalink raw reply related [flat|nested] 22+ messages in thread