* [PATCH] bridge: per-cpu packet statistics
@ 2010-03-02 0:16 Stephen Hemminger
2010-03-02 6:01 ` Eric Dumazet
` (3 more replies)
0 siblings, 4 replies; 15+ messages in thread
From: Stephen Hemminger @ 2010-03-02 0:16 UTC (permalink / raw)
To: David Miller, netdev, bridge
The shared packet statistics are a potential source of slow down
on bridged traffic. Convert to per-cpu array, but only keep those
statistics which change per-packet.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
net/bridge/br_device.c | 43 ++++++++++++++++++++++++++++++++++++++-----
net/bridge/br_if.c | 6 ++++++
net/bridge/br_input.c | 5 +++--
net/bridge/br_private.h | 8 ++++++++
4 files changed, 55 insertions(+), 7 deletions(-)
--- a/net/bridge/br_device.c 2010-03-01 08:22:23.476657998 -0800
+++ b/net/bridge/br_device.c 2010-03-01 15:31:36.737227465 -0800
@@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *
const unsigned char *dest = skb->data;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- BR_INPUT_SKB_CB(skb)->brdev = dev;
+ brstats->tx_packets++;
+ brstats->tx_bytes += skb->len;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
@@ -81,6 +82,28 @@ static int br_dev_stop(struct net_device
return 0;
}
+static struct net_device_stats *br_get_stats(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_device_stats *stats = &dev->stats;
+ unsigned int cpu;
+
+ stats->tx_bytes = stats->tx_packets = 0;
+ stats->rx_bytes = stats->rx_packets = 0;
+
+ for_each_online_cpu(cpu) {
+ const struct br_cpu_netstats *bstats
+ = per_cpu_ptr(br->stats, cpu);
+
+ stats->tx_bytes += bstats->tx_bytes;
+ stats->tx_packets += bstats->tx_packets;
+ stats->rx_bytes += bstats->rx_bytes;
+ stats->rx_packets += bstats->rx_packets;
+ }
+
+ return stats;
+}
+
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
@@ -180,19 +203,27 @@ static const struct net_device_ops br_ne
.ndo_open = br_dev_open,
.ndo_stop = br_dev_stop,
.ndo_start_xmit = br_dev_xmit,
+ .ndo_get_stats = br_get_stats,
.ndo_set_mac_address = br_set_mac_address,
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
};
+static void br_dev_free(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ free_percpu(br->stats);
+}
+
void br_dev_setup(struct net_device *dev)
{
random_ether_addr(dev->dev_addr);
ether_setup(dev);
dev->netdev_ops = &br_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = br_dev_free;
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
dev->tx_queue_len = 0;
dev->priv_flags = IFF_EBRIDGE;
--- a/net/bridge/br_if.c 2010-03-01 08:22:23.476657998 -0800
+++ b/net/bridge/br_if.c 2010-03-01 15:30:47.733227819 -0800
@@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev
br = netdev_priv(dev);
br->dev = dev;
+ br->stats = alloc_percpu(sizeof(struct br_cpu_netstats));
+ if (!br->stats) {
+ free_netdev(dev);
+ return NULL;
+ }
+
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
--- a/net/bridge/br_input.c 2010-03-01 08:22:23.476657998 -0800
+++ b/net/bridge/br_input.c 2010-03-01 15:32:45.882471626 -0800
@@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = {
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+ struct net_bridge *br = netdev_priv(brdev);
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- brdev->stats.rx_packets++;
- brdev->stats.rx_bytes += skb->len;
+ brstats->rx_packets++;
+ brstats->rx_bytes += skb->len;
indev = skb->dev;
skb->dev = brdev;
--- a/net/bridge/br_private.h 2010-03-01 08:22:23.476657998 -0800
+++ b/net/bridge/br_private.h 2010-03-01 15:31:03.437228864 -0800
@@ -135,6 +135,14 @@ struct net_bridge
spinlock_t lock;
struct list_head port_list;
struct net_device *dev;
+
+ struct br_cpu_netstats __percpu {
+ unsigned long rx_packets;
+ unsigned long tx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_bytes;
+ } *stats;
+
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
unsigned long feature_mask;
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] bridge: per-cpu packet statistics
2010-03-02 0:16 [PATCH] bridge: per-cpu packet statistics Stephen Hemminger
@ 2010-03-02 6:01 ` Eric Dumazet
2010-03-02 17:22 ` [Bridge] " Stephen Hemminger
2010-03-02 7:43 ` Eric Dumazet
` (2 subsequent siblings)
3 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2010-03-02 6:01 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev, bridge
Le lundi 01 mars 2010 à 16:16 -0800, Stephen Hemminger a écrit :
> The shared packet statistics are a potential source of slow down
> on bridged traffic. Convert to per-cpu array, but only keep those
> statistics which change per-packet.
>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>
> ---
>
> net/bridge/br_device.c | 43 ++++++++++++++++++++++++++++++++++++++-----
> net/bridge/br_if.c | 6 ++++++
> net/bridge/br_input.c | 5 +++--
> net/bridge/br_private.h | 8 ++++++++
> 4 files changed, 55 insertions(+), 7 deletions(-)
>
> --- a/net/bridge/br_device.c 2010-03-01 08:22:23.476657998 -0800
> +++ b/net/bridge/br_device.c 2010-03-01 15:31:36.737227465 -0800
> @@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *
> const unsigned char *dest = skb->data;
> struct net_bridge_fdb_entry *dst;
> struct net_bridge_mdb_entry *mdst;
> + struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
>
> - BR_INPUT_SKB_CB(skb)->brdev = dev;
> + brstats->tx_packets++;
> + brstats->tx_bytes += skb->len;
On TX path, this is not really necessary, since we already dirtied
txq->lock before calling br_dev_xmit(), we can use txq->tx_packets and
txq->tx_bytes for free ?
>
> - dev->stats.tx_packets++;
> - dev->stats.tx_bytes += skb->len;
> + BR_INPUT_SKB_CB(skb)->brdev = dev;
>
> skb_reset_mac_header(skb);
> skb_pull(skb, ETH_HLEN);
> @@ -81,6 +82,28 @@ static int br_dev_stop(struct net_device
> return 0;
> }
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] bridge: per-cpu packet statistics
2010-03-02 0:16 [PATCH] bridge: per-cpu packet statistics Stephen Hemminger
2010-03-02 6:01 ` Eric Dumazet
@ 2010-03-02 7:43 ` Eric Dumazet
2010-03-02 7:51 ` Eric Dumazet
2010-03-02 9:02 ` Eric Dumazet
3 siblings, 0 replies; 15+ messages in thread
From: Eric Dumazet @ 2010-03-02 7:43 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev, bridge
Le lundi 01 mars 2010 à 16:16 -0800, Stephen Hemminger a écrit :
> +static void br_dev_free(struct net_device *dev)
> +{
> + struct net_bridge *br = netdev_priv(dev);
> +
> + free_percpu(br->stats);
> +}
> +
> void br_dev_setup(struct net_device *dev)
> {
> random_ether_addr(dev->dev_addr);
> ether_setup(dev);
>
> dev->netdev_ops = &br_netdev_ops;
> - dev->destructor = free_netdev;
> + dev->destructor = br_dev_free;
> SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
Isnt free_netdev() missing after this change ?
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] bridge: per-cpu packet statistics
2010-03-02 0:16 [PATCH] bridge: per-cpu packet statistics Stephen Hemminger
2010-03-02 6:01 ` Eric Dumazet
2010-03-02 7:43 ` Eric Dumazet
@ 2010-03-02 7:51 ` Eric Dumazet
2010-03-02 9:02 ` Eric Dumazet
3 siblings, 0 replies; 15+ messages in thread
From: Eric Dumazet @ 2010-03-02 7:51 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev, bridge
Le lundi 01 mars 2010 à 16:16 -0800, Stephen Hemminger a écrit :
> --- a/net/bridge/br_if.c 2010-03-01 08:22:23.476657998 -0800
> +++ b/net/bridge/br_if.c 2010-03-01 15:30:47.733227819 -0800
> @@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev
> br = netdev_priv(dev);
> br->dev = dev;
>
> + br->stats = alloc_percpu(sizeof(struct br_cpu_netstats));
> + if (!br->stats) {
> + free_netdev(dev);
> + return NULL;
> + }
> +
Strange... this should be :
nr->stats = alloc_percpu(struct br_cpu_netstats);
Or even better, ask percpu allocator an aligned chunk
(2 or 4 longs) instead of (1 long)
nr->stats = __alloc_percpu(sizeof(struct br_cpu_netstats),
4 * sizeof(long));
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] bridge: per-cpu packet statistics
2010-03-02 0:16 [PATCH] bridge: per-cpu packet statistics Stephen Hemminger
` (2 preceding siblings ...)
2010-03-02 7:51 ` Eric Dumazet
@ 2010-03-02 9:02 ` Eric Dumazet
2010-03-02 17:23 ` [Bridge] " Stephen Hemminger
2010-03-02 17:58 ` [PATCH] bridge: per-cpu packet statistics (v2) Stephen Hemminger
3 siblings, 2 replies; 15+ messages in thread
From: Eric Dumazet @ 2010-03-02 9:02 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev, bridge
From: Stephen Hemminger <shemminger@vyatta.com>
Le lundi 01 mars 2010 à 16:16 -0800, Stephen Hemminger a écrit :
> + for_each_online_cpu(cpu) {
> + const struct br_cpu_netstats *bstats
> + = per_cpu_ptr(br->stats, cpu);
> +
> + stats->rx_bytes += bstats->rx_bytes;
> + stats->rx_packets += bstats->rx_packets;
> + }
And last point, we should use for_each_possible_cpu() here
Here is your patch with all my comments integrated :
1) Use txq->{tx_bytes|tx_packets} counter
2) alloc_percpu(struct ...) instead of alloc_percpu(sizeof(struct ...))
3) free_netdev() in destructor
4) for_each_possible_cpu() instead of for_each_online_cpu()
5) br_get_stats() use local variables for the sake of concurrent users
Next step would be to use multiqueue :)
Thanks
[PATCH] bridge: per-cpu packet statistics
The shared packet statistics are a potential source of slow down
on bridged traffic. Convert to per-cpu array for rx_packets/rx_bytes RX
accounting, and use txq tx_packets/tx_bytes for TX accounting
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
net/bridge/br_device.c | 39 +++++++++++++++++++++++++++++++++++---
net/bridge/br_if.c | 6 +++++
net/bridge/br_input.c | 6 +++--
net/bridge/br_private.h | 6 +++++
4 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eb7062d..e73c42c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -22,6 +22,8 @@
/* net device transmit always called with no BH (preempt_disabled) */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ int qidx = skb_get_queue_mapping(skb);
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, qidx);
struct net_bridge *br = netdev_priv(dev);
const unsigned char *dest = skb->data;
struct net_bridge_fdb_entry *dst;
@@ -29,8 +31,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->brdev = dev;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ txq->tx_packets++;
+ txq->tx_bytes += skb->len;
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
@@ -81,6 +83,28 @@ static int br_dev_stop(struct net_device *dev)
return 0;
}
+static struct net_device_stats *br_get_stats(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_device_stats *stats = &dev->stats;
+ unsigned long rx_bytes = 0, rx_packets = 0;
+ unsigned int cpu;
+
+ dev_txq_stats_fold(dev, stats);
+
+ for_each_possible_cpu(cpu) {
+ const struct br_cpu_netstats *bstats
+ = per_cpu_ptr(br->stats, cpu);
+
+ rx_bytes += bstats->rx_bytes;
+ rx_packets += bstats->rx_packets;
+ }
+ stats->rx_bytes = rx_bytes;
+ stats->rx_packets = rx_packets;
+
+ return stats;
+}
+
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
@@ -180,19 +204,28 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_open = br_dev_open,
.ndo_stop = br_dev_stop,
.ndo_start_xmit = br_dev_xmit,
+ .ndo_get_stats = br_get_stats,
.ndo_set_mac_address = br_set_mac_address,
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
};
+static void br_dev_free(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ free_percpu(br->stats);
+ free_netdev(dev);
+}
+
void br_dev_setup(struct net_device *dev)
{
random_ether_addr(dev->dev_addr);
ether_setup(dev);
dev->netdev_ops = &br_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = br_dev_free;
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
dev->tx_queue_len = 0;
dev->priv_flags = IFF_EBRIDGE;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index b6a3872..b7cdd2e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name)
br = netdev_priv(dev);
br->dev = dev;
+ br->stats = alloc_percpu(struct br_cpu_netstats);
+ if (!br->stats) {
+ free_netdev(dev);
+ return NULL;
+ }
+
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 53b3985..7a5a5b4 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+ struct net_bridge *br = netdev_priv(brdev);
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- brdev->stats.rx_packets++;
- brdev->stats.rx_bytes += skb->len;
+ brstats->rx_packets++;
+ brstats->rx_bytes += skb->len;
indev = skb->dev;
skb->dev = brdev;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9191198..06b30af 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -135,6 +135,12 @@ struct net_bridge
spinlock_t lock;
struct list_head port_list;
struct net_device *dev;
+
+ struct br_cpu_netstats __percpu {
+ unsigned long rx_bytes;
+ unsigned long rx_packets;
+ } *stats;
+
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
unsigned long feature_mask;
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Bridge] [PATCH] bridge: per-cpu packet statistics
2010-03-02 6:01 ` Eric Dumazet
@ 2010-03-02 17:22 ` Stephen Hemminger
2010-03-16 2:48 ` Herbert Xu
0 siblings, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2010-03-02 17:22 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, bridge, David Miller
On Tue, 02 Mar 2010 07:01:30 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le lundi 01 mars 2010 à 16:16 -0800, Stephen Hemminger a écrit :
> > The shared packet statistics are a potential source of slow down
> > on bridged traffic. Convert to per-cpu array, but only keep those
> > statistics which change per-packet.
> >
> > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> >
> > ---
> >
> > net/bridge/br_device.c | 43 ++++++++++++++++++++++++++++++++++++++-----
> > net/bridge/br_if.c | 6 ++++++
> > net/bridge/br_input.c | 5 +++--
> > net/bridge/br_private.h | 8 ++++++++
> > 4 files changed, 55 insertions(+), 7 deletions(-)
> >
> > --- a/net/bridge/br_device.c 2010-03-01 08:22:23.476657998 -0800
> > +++ b/net/bridge/br_device.c 2010-03-01 15:31:36.737227465 -0800
> > @@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *
> > const unsigned char *dest = skb->data;
> > struct net_bridge_fdb_entry *dst;
> > struct net_bridge_mdb_entry *mdst;
> > + struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
> >
> > - BR_INPUT_SKB_CB(skb)->brdev = dev;
> > + brstats->tx_packets++;
> > + brstats->tx_bytes += skb->len;
>
>
> On TX path, this is not really necessary, since we already dirtied
> txq->lock before calling br_dev_xmit(), we can use txq->tx_packets and
> txq->tx_bytes for free ?
Bridge is already using lockless transmit LLTX, so tx_lock is not touched.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Bridge] [PATCH] bridge: per-cpu packet statistics
2010-03-02 9:02 ` Eric Dumazet
@ 2010-03-02 17:23 ` Stephen Hemminger
2010-03-16 2:51 ` Herbert Xu
2010-03-02 17:58 ` [PATCH] bridge: per-cpu packet statistics (v2) Stephen Hemminger
1 sibling, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2010-03-02 17:23 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, bridge, David Miller
On Tue, 02 Mar 2010 10:02:59 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Stephen Hemminger <shemminger@vyatta.com>
>
> Le lundi 01 mars 2010 à 16:16 -0800, Stephen Hemminger a écrit :
>
> > + for_each_online_cpu(cpu) {
> > + const struct br_cpu_netstats *bstats
> > + = per_cpu_ptr(br->stats, cpu);
> > +
> > + stats->rx_bytes += bstats->rx_bytes;
> > + stats->rx_packets += bstats->rx_packets;
> > + }
>
> And last point, we should use for_each_possible_cpu() here
>
> Here is your patch with all my comments integrated :
>
> 1) Use txq->{tx_bytes|tx_packets} counter
>
> 2) alloc_percpu(struct ...) instead of alloc_percpu(sizeof(struct ...))
>
> 3) free_netdev() in destructor
>
> 4) for_each_possible_cpu() instead of for_each_online_cpu()
>
> 5) br_get_stats() use local variables for the sake of concurrent users
No need, bridge has no queue!
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH] bridge: per-cpu packet statistics (v2)
2010-03-02 9:02 ` Eric Dumazet
2010-03-02 17:23 ` [Bridge] " Stephen Hemminger
@ 2010-03-02 17:58 ` Stephen Hemminger
2010-03-02 18:06 ` Eric Dumazet
1 sibling, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2010-03-02 17:58 UTC (permalink / raw)
To: David Miller; +Cc: Eric Dumazet, netdev, bridge
The shared packet statistics are a potential source of slow down
on bridged traffic. Convert to per-cpu array, but only keep those
statistics which change per-packet.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
Take Eric's bug fix changes. But keep the per cpu transmit
per cpu statistics. There is no point in going multiqueue for
a pseudo-interface which is lockless and queue less.
net/bridge/br_device.c | 43 ++++++++++++++++++++++++++++++++++++++-----
net/bridge/br_if.c | 6 ++++++
net/bridge/br_input.c | 5 +++--
net/bridge/br_private.h | 8 ++++++++
4 files changed, 55 insertions(+), 7 deletions(-)
--- a/net/bridge/br_device.c 2010-03-02 09:26:23.208191713 -0800
+++ b/net/bridge/br_device.c 2010-03-02 09:29:38.811818410 -0800
@@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *
const unsigned char *dest = skb->data;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- BR_INPUT_SKB_CB(skb)->brdev = dev;
+ brstats->tx_packets++;
+ brstats->tx_bytes += skb->len;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
@@ -81,6 +82,31 @@ static int br_dev_stop(struct net_device
return 0;
}
+static struct net_device_stats *br_get_stats(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_device_stats *stats = &dev->stats;
+ struct br_cpu_netstats sum = { 0 };
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ const struct br_cpu_netstats *bstats
+ = per_cpu_ptr(br->stats, cpu);
+
+ sum.tx_bytes += bstats->tx_bytes;
+ sum.tx_packets += bstats->tx_packets;
+ sum.rx_bytes += bstats->rx_bytes;
+ sum.rx_packets += bstats->rx_packets;
+ }
+
+ stats->tx_bytes = sum.tx_bytes;
+ stats->tx_packets = sum.tx_packets;
+ stats->rx_bytes = sum.rx_bytes;
+ stats->rx_packets = sum.rx_packets;
+
+ return stats;
+}
+
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
@@ -180,19 +206,28 @@ static const struct net_device_ops br_ne
.ndo_open = br_dev_open,
.ndo_stop = br_dev_stop,
.ndo_start_xmit = br_dev_xmit,
+ .ndo_get_stats = br_get_stats,
.ndo_set_mac_address = br_set_mac_address,
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
};
+static void br_dev_free(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ free_percpu(br->stats);
+ free_netdev(dev);
+}
+
void br_dev_setup(struct net_device *dev)
{
random_ether_addr(dev->dev_addr);
ether_setup(dev);
dev->netdev_ops = &br_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = br_dev_free;
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
dev->tx_queue_len = 0;
dev->priv_flags = IFF_EBRIDGE;
--- a/net/bridge/br_if.c 2010-03-02 09:26:23.188192219 -0800
+++ b/net/bridge/br_if.c 2010-03-02 09:31:19.801199305 -0800
@@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev
br = netdev_priv(dev);
br->dev = dev;
+ br->stats = alloc_percpu(struct br_cpu_netstats);
+ if (!br->stats) {
+ free_netdev(dev);
+ return NULL;
+ }
+
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
--- a/net/bridge/br_input.c 2010-03-02 09:26:23.196192436 -0800
+++ b/net/bridge/br_input.c 2010-03-02 09:27:04.424067504 -0800
@@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = {
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+ struct net_bridge *br = netdev_priv(brdev);
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- brdev->stats.rx_packets++;
- brdev->stats.rx_bytes += skb->len;
+ brstats->rx_packets++;
+ brstats->rx_bytes += skb->len;
indev = skb->dev;
skb->dev = brdev;
--- a/net/bridge/br_private.h 2010-03-02 09:26:23.180193181 -0800
+++ b/net/bridge/br_private.h 2010-03-02 09:27:04.424067504 -0800
@@ -135,6 +135,14 @@ struct net_bridge
spinlock_t lock;
struct list_head port_list;
struct net_device *dev;
+
+ struct br_cpu_netstats __percpu {
+ unsigned long rx_packets;
+ unsigned long tx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_bytes;
+ } *stats;
+
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
unsigned long feature_mask;
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] bridge: per-cpu packet statistics (v2)
2010-03-02 17:58 ` [PATCH] bridge: per-cpu packet statistics (v2) Stephen Hemminger
@ 2010-03-02 18:06 ` Eric Dumazet
2010-03-02 23:32 ` [Bridge] [PATCH] bridge: per-cpu packet statistics (v3) Stephen Hemminger
0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2010-03-02 18:06 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev, bridge
Le mardi 02 mars 2010 à 09:58 -0800, Stephen Hemminger a écrit :
> + struct br_cpu_netstats __percpu {
> + unsigned long rx_packets;
> + unsigned long tx_packets;
> + unsigned long rx_bytes;
> + unsigned long tx_bytes;
> + } *stats;
> +
Ok, please group rx stats together instead of interleave, this to reduce
possibility of touching two cache lines in tx path (or rx path)
struct br_cpu_netstats __percpu {
> unsigned long rx_packets;
> + unsigned long rx_bytes;
>
> + unsigned long tx_packets;
> + unsigned long tx_bytes;
> + } *stats;
> +
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Bridge] [PATCH] bridge: per-cpu packet statistics (v3)
2010-03-02 18:06 ` Eric Dumazet
@ 2010-03-02 23:32 ` Stephen Hemminger
2010-03-03 6:09 ` Eric Dumazet
2010-03-17 4:27 ` David Miller
0 siblings, 2 replies; 15+ messages in thread
From: Stephen Hemminger @ 2010-03-02 23:32 UTC (permalink / raw)
To: David Miller; +Cc: Eric Dumazet, netdev, bridge
The shared packet statistics are a potential source of slow down
on bridged traffic. Convert to per-cpu array, but only keep those
statistics which change per-packet.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
Move tx and rx to be next to each other per Eric's suggestion
net/bridge/br_device.c | 43 ++++++++++++++++++++++++++++++++++++++-----
net/bridge/br_if.c | 6 ++++++
net/bridge/br_input.c | 5 +++--
net/bridge/br_private.h | 8 ++++++++
4 files changed, 55 insertions(+), 7 deletions(-)
--- a/net/bridge/br_device.c 2010-03-02 10:48:44.527817663 -0800
+++ b/net/bridge/br_device.c 2010-03-02 10:48:48.287817348 -0800
@@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *
const unsigned char *dest = skb->data;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- BR_INPUT_SKB_CB(skb)->brdev = dev;
+ brstats->tx_packets++;
+ brstats->tx_bytes += skb->len;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
@@ -81,6 +82,31 @@ static int br_dev_stop(struct net_device
return 0;
}
+static struct net_device_stats *br_get_stats(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_device_stats *stats = &dev->stats;
+ struct br_cpu_netstats sum = { 0 };
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ const struct br_cpu_netstats *bstats
+ = per_cpu_ptr(br->stats, cpu);
+
+ sum.tx_bytes += bstats->tx_bytes;
+ sum.tx_packets += bstats->tx_packets;
+ sum.rx_bytes += bstats->rx_bytes;
+ sum.rx_packets += bstats->rx_packets;
+ }
+
+ stats->tx_bytes = sum.tx_bytes;
+ stats->tx_packets = sum.tx_packets;
+ stats->rx_bytes = sum.rx_bytes;
+ stats->rx_packets = sum.rx_packets;
+
+ return stats;
+}
+
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
@@ -180,19 +206,28 @@ static const struct net_device_ops br_ne
.ndo_open = br_dev_open,
.ndo_stop = br_dev_stop,
.ndo_start_xmit = br_dev_xmit,
+ .ndo_get_stats = br_get_stats,
.ndo_set_mac_address = br_set_mac_address,
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
};
+static void br_dev_free(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ free_percpu(br->stats);
+ free_netdev(dev);
+}
+
void br_dev_setup(struct net_device *dev)
{
random_ether_addr(dev->dev_addr);
ether_setup(dev);
dev->netdev_ops = &br_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = br_dev_free;
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
dev->tx_queue_len = 0;
dev->priv_flags = IFF_EBRIDGE;
--- a/net/bridge/br_if.c 2010-03-02 10:48:44.507817575 -0800
+++ b/net/bridge/br_if.c 2010-03-02 10:48:48.287817348 -0800
@@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev
br = netdev_priv(dev);
br->dev = dev;
+ br->stats = alloc_percpu(struct br_cpu_netstats);
+ if (!br->stats) {
+ free_netdev(dev);
+ return NULL;
+ }
+
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
--- a/net/bridge/br_input.c 2010-03-02 10:48:44.515818544 -0800
+++ b/net/bridge/br_input.c 2010-03-02 10:48:48.287817348 -0800
@@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = {
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+ struct net_bridge *br = netdev_priv(brdev);
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- brdev->stats.rx_packets++;
- brdev->stats.rx_bytes += skb->len;
+ brstats->rx_packets++;
+ brstats->rx_bytes += skb->len;
indev = skb->dev;
skb->dev = brdev;
--- a/net/bridge/br_private.h 2010-03-02 10:48:44.503817627 -0800
+++ b/net/bridge/br_private.h 2010-03-02 10:49:10.632566819 -0800
@@ -135,6 +135,14 @@ struct net_bridge
spinlock_t lock;
struct list_head port_list;
struct net_device *dev;
+
+ struct br_cpu_netstats __percpu {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+ } *stats;
+
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
unsigned long feature_mask;
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Bridge] [PATCH] bridge: per-cpu packet statistics (v3)
2010-03-02 23:32 ` [Bridge] [PATCH] bridge: per-cpu packet statistics (v3) Stephen Hemminger
@ 2010-03-03 6:09 ` Eric Dumazet
2010-03-03 9:16 ` David Miller
2010-03-17 4:27 ` David Miller
1 sibling, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2010-03-03 6:09 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev, bridge
Le mardi 02 mars 2010 à 15:32 -0800, Stephen Hemminger a écrit :
> The shared packet statistics are a potential source of slow down
> on bridged traffic. Convert to per-cpu array, but only keep those
> statistics which change per-packet.
>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>
Thanks Stephen !
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Bridge] [PATCH] bridge: per-cpu packet statistics (v3)
2010-03-03 6:09 ` Eric Dumazet
@ 2010-03-03 9:16 ` David Miller
0 siblings, 0 replies; 15+ messages in thread
From: David Miller @ 2010-03-03 9:16 UTC (permalink / raw)
To: eric.dumazet; +Cc: shemminger, netdev, bridge
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 03 Mar 2010 07:09:04 +0100
> Le mardi 02 mars 2010 à 15:32 -0800, Stephen Hemminger a écrit :
>> The shared packet statistics are a potential source of slow down
>> on bridged traffic. Convert to per-cpu array, but only keep those
>> statistics which change per-packet.
>>
>> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>>
>
> Thanks Stephen !
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
This is fine but needs to go into the next merge window, not
this one.
Bug fixes only now.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Bridge] [PATCH] bridge: per-cpu packet statistics
2010-03-02 17:22 ` [Bridge] " Stephen Hemminger
@ 2010-03-16 2:48 ` Herbert Xu
0 siblings, 0 replies; 15+ messages in thread
From: Herbert Xu @ 2010-03-16 2:48 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: eric.dumazet, netdev, bridge, davem
Stephen Hemminger <shemminger@vyatta.com> wrote:
>
> Bridge is already using lockless transmit LLTX, so tx_lock is not touched.
LLTX doesn't actually buy you anything since you're still going
through a single qdisc. To get the full benefits of the per-cpu
counters you need to implement multiqueue support in the bridge.
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Bridge] [PATCH] bridge: per-cpu packet statistics
2010-03-02 17:23 ` [Bridge] " Stephen Hemminger
@ 2010-03-16 2:51 ` Herbert Xu
0 siblings, 0 replies; 15+ messages in thread
From: Herbert Xu @ 2010-03-16 2:51 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: eric.dumazet, netdev, bridge, davem
Stephen Hemminger <shemminger@vyatta.com> wrote:
>
> No need, bridge has no queue!
OK, ignore my previous comment :)
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Bridge] [PATCH] bridge: per-cpu packet statistics (v3)
2010-03-02 23:32 ` [Bridge] [PATCH] bridge: per-cpu packet statistics (v3) Stephen Hemminger
2010-03-03 6:09 ` Eric Dumazet
@ 2010-03-17 4:27 ` David Miller
1 sibling, 0 replies; 15+ messages in thread
From: David Miller @ 2010-03-17 4:27 UTC (permalink / raw)
To: shemminger; +Cc: eric.dumazet, netdev, bridge
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 15:32:09 -0800
> The shared packet statistics are a potential source of slow down
> on bridged traffic. Convert to per-cpu array, but only keep those
> statistics which change per-packet.
>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Applied.
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2010-03-17 4:26 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-02 0:16 [PATCH] bridge: per-cpu packet statistics Stephen Hemminger
2010-03-02 6:01 ` Eric Dumazet
2010-03-02 17:22 ` [Bridge] " Stephen Hemminger
2010-03-16 2:48 ` Herbert Xu
2010-03-02 7:43 ` Eric Dumazet
2010-03-02 7:51 ` Eric Dumazet
2010-03-02 9:02 ` Eric Dumazet
2010-03-02 17:23 ` [Bridge] " Stephen Hemminger
2010-03-16 2:51 ` Herbert Xu
2010-03-02 17:58 ` [PATCH] bridge: per-cpu packet statistics (v2) Stephen Hemminger
2010-03-02 18:06 ` Eric Dumazet
2010-03-02 23:32 ` [Bridge] [PATCH] bridge: per-cpu packet statistics (v3) Stephen Hemminger
2010-03-03 6:09 ` Eric Dumazet
2010-03-03 9:16 ` David Miller
2010-03-17 4:27 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).