netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next-2.6] loopback: Implement 64bit stats on 32bit arches
@ 2010-06-14 15:59 Eric Dumazet
  2010-06-15  6:14 ` David Miller
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2010-06-14 15:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ben Hutchings

Uses a seqcount_t to synchronize stat producer and consumer, for packets
and bytes counter, now u64 types.

(dropped counter being rarely used, stay a native "unsigned long" type)

No noticeable performance impact on x86, as it only adds two increments
per frame. It might be more expensive on arches where smp_wmb() is not
free.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 drivers/net/loopback.c |   61 ++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 10 deletions(-)
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 72b7949..09334f8 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -60,11 +60,51 @@
 #include <net/net_namespace.h>
 
 struct pcpu_lstats {
-	unsigned long packets;
-	unsigned long bytes;
+	u64 packets;
+	u64 bytes;
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	seqcount_t seq;
+#endif
 	unsigned long drops;
 };
 
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+static void inline lstats_update_begin(struct pcpu_lstats *lstats)
+{
+	write_seqcount_begin(&lstats->seq);
+}
+static void inline lstats_update_end(struct pcpu_lstats *lstats)
+{
+	write_seqcount_end(&lstats->seq);
+}
+static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
+{
+	u64 tpackets, tbytes;
+	unsigned int seq;
+
+	do {
+		seq = read_seqcount_begin(&lstats->seq);
+		tpackets = lstats->packets;
+		tbytes = lstats->bytes;
+	} while (read_seqcount_retry(&lstats->seq, seq));
+
+	*packets += tpackets;
+	*bytes += tbytes;
+}
+#else
+static void inline lstats_update_begin(struct pcpu_lstats *lstats)
+{
+}
+static void inline lstats_update_end(struct pcpu_lstats *lstats)
+{
+}
+static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
+{
+	*packets += lstats->packets;
+	*bytes += lstats->bytes;
+}
+#endif
+
 /*
  * The higher levels take care of making this non-reentrant (it's
  * called with bh's disabled).
@@ -86,21 +126,23 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 
 	len = skb->len;
 	if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
+		lstats_update_begin(lb_stats);
 		lb_stats->bytes += len;
 		lb_stats->packets++;
+		lstats_update_end(lb_stats);
 	} else
 		lb_stats->drops++;
 
 	return NETDEV_TX_OK;
 }
 
-static struct net_device_stats *loopback_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev)
 {
 	const struct pcpu_lstats __percpu *pcpu_lstats;
-	struct net_device_stats *stats = &dev->stats;
-	unsigned long bytes = 0;
-	unsigned long packets = 0;
-	unsigned long drops = 0;
+	struct rtnl_link_stats64 *stats = &dev->stats64;
+	u64 bytes = 0;
+	u64 packets = 0;
+	u64 drops = 0;
 	int i;
 
 	pcpu_lstats = (void __percpu __force *)dev->ml_priv;
@@ -108,8 +150,7 @@ static struct net_device_stats *loopback_get_stats(struct net_device *dev)
 		const struct pcpu_lstats *lb_stats;
 
 		lb_stats = per_cpu_ptr(pcpu_lstats, i);
-		bytes   += lb_stats->bytes;
-		packets += lb_stats->packets;
+		lstats_fetch_and_add(&packets, &bytes, lb_stats);
 		drops   += lb_stats->drops;
 	}
 	stats->rx_packets = packets;
@@ -158,7 +199,7 @@ static void loopback_dev_free(struct net_device *dev)
 static const struct net_device_ops loopback_ops = {
 	.ndo_init      = loopback_dev_init,
 	.ndo_start_xmit= loopback_xmit,
-	.ndo_get_stats = loopback_get_stats,
+	.ndo_get_stats64 = loopback_get_stats64,
 };
 
 /*



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] loopback: Implement 64bit stats on 32bit arches
  2010-06-14 15:59 [PATCH net-next-2.6] loopback: Implement 64bit stats on 32bit arches Eric Dumazet
@ 2010-06-15  6:14 ` David Miller
  2010-06-15  6:49   ` Nick Piggin
  2010-06-15 10:14   ` [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure Eric Dumazet
  0 siblings, 2 replies; 19+ messages in thread
From: David Miller @ 2010-06-15  6:14 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, bhutchings

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 17:59:22 +0200

> Uses a seqcount_t to synchronize stat producer and consumer, for packets
> and bytes counter, now u64 types.
> 
> (dropped counter being rarely used, stay a native "unsigned long" type)
> 
> No noticeable performance impact on x86, as it only adds two increments
> per frame. It might be more expensive on arches where smp_wmb() is not
> free.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied, but I suspect we might end up eventually needing to
abstract this kind of technique in a common place so other
spots can use it.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] loopback: Implement 64bit stats on 32bit arches
  2010-06-15  6:14 ` David Miller
@ 2010-06-15  6:49   ` Nick Piggin
  2010-06-15  7:23     ` Eric Dumazet
  2010-06-15 10:14   ` [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure Eric Dumazet
  1 sibling, 1 reply; 19+ messages in thread
From: Nick Piggin @ 2010-06-15  6:49 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, netdev, bhutchings

On Mon, Jun 14, 2010 at 11:14:12PM -0700, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 14 Jun 2010 17:59:22 +0200
> 
> > Uses a seqcount_t to synchronize stat producer and consumer, for packets
> > and bytes counter, now u64 types.
> > 
> > (dropped counter being rarely used, stay a native "unsigned long" type)
> > 
> > No noticeable performance impact on x86, as it only adds two increments
> > per frame. It might be more expensive on arches where smp_wmb() is not
> > free.
> > 
> > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> 
> Applied, but I suspect we might end up eventually needing to
> abstract this kind of technique in a common place so other
> spots can use it.

Check i_size stuff in include/linux/fs.h if you consider doing this.
And keep preempt in mind too. I assume you can't be preempted at this
point, but if you're prone to change the locking, it might be worth
the (small) cost of doing explicit preempt_disable() (and maybe to
help the sanity of the -rt guys too).


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] loopback: Implement 64bit stats on 32bit arches
  2010-06-15  6:49   ` Nick Piggin
@ 2010-06-15  7:23     ` Eric Dumazet
  0 siblings, 0 replies; 19+ messages in thread
From: Eric Dumazet @ 2010-06-15  7:23 UTC (permalink / raw)
  To: Nick Piggin; +Cc: David Miller, netdev, bhutchings

Le mardi 15 juin 2010 à 16:49 +1000, Nick Piggin a écrit :
> On Mon, Jun 14, 2010 at 11:14:12PM -0700, David Miller wrote:
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > Date: Mon, 14 Jun 2010 17:59:22 +0200
> > 
> > > Uses a seqcount_t to synchronize stat producer and consumer, for packets
> > > and bytes counter, now u64 types.
> > > 
> > > (dropped counter being rarely used, stay a native "unsigned long" type)
> > > 
> > > No noticeable performance impact on x86, as it only adds two increments
> > > per frame. It might be more expensive on arches where smp_wmb() is not
> > > free.
> > > 
> > > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> > 
> > Applied, but I suspect we might end up eventually needing to
> > abstract this kind of technique in a common place so other
> > spots can use it.
> 

David, I was planning adding similar stuff for SNMP, xt_RATEEST and txq
accounting, so yes, I'll probably move helpers to a common include file.

This first patch was a first shot to introduce the ground.

> Check i_size stuff in include/linux/fs.h if you consider doing this.

Yes, I was aware of this stuff.

> And keep preempt in mind too. I assume you can't be preempted at this
> point, but if you're prone to change the locking, it might be worth
> the (small) cost of doing explicit preempt_disable() (and maybe to
> help the sanity of the -rt guys too).
> 

Yes, we cannot be preempted at this point, as ndo_start_xmit() handlers
are called with BH disabled (there is a comment for this assertion in
front of loopback_xmit())

dev_queue_xmit() 
  rcu_read_lock_bh();
  ...
  ndo_start_xmit();

I'll take care of preempt status for following patches, but I suspect
its more a BH enable/disable question in network stack anyway.

Thanks !



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
  2010-06-15  6:14 ` David Miller
  2010-06-15  6:49   ` Nick Piggin
@ 2010-06-15 10:14   ` Eric Dumazet
  2010-06-15 10:25     ` Nick Piggin
  2010-06-15 10:39     ` [PATCH net-next-2.6] bridge: 64bit rx/tx counters Eric Dumazet
  1 sibling, 2 replies; 19+ messages in thread
From: Eric Dumazet @ 2010-06-15 10:14 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, bhutchings, Nick Piggin

Le lundi 14 juin 2010 à 23:14 -0700, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 14 Jun 2010 17:59:22 +0200
> 
> > Uses a seqcount_t to synchronize stat producer and consumer, for packets
> > and bytes counter, now u64 types.
> > 
> > (dropped counter being rarely used, stay a native "unsigned long" type)
> > 
> > No noticeable performance impact on x86, as it only adds two increments
> > per frame. It might be more expensive on arches where smp_wmb() is not
> > free.
> > 
> > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> 
> Applied, but I suspect we might end up eventually needing to
> abstract this kind of technique in a common place so other
> spots can use it.

Here is the followup patch to abstract things a bit, before upcoming
conversions.

Thanks !

[PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure

To properly implement 64bits network statistics on 32bit or 64bit hosts,
we provide one new type and four methods, to ease conversions.

Stats producer should use following template granted it already got an
exclusive access to counters (a previous lock is taken, or per cpu
data [used in a non preemptable context])

Let me repeat : stats producers must be serialized by other means before
using this template. Preemption must be disabled too.

u64_stats_update_begin(&stats->syncp);
stats->bytes += len;
stats->packets++;
u64_stats_update_end(&stats->syncp);

While a consumer should use following template to get consistent
snapshot :

u64 tbytes, tpackets;
unsigned int start;

do {
	start = u64_stats_fetch_begin(&stats->syncp);
	tbytes = stats->bytes;
	tpackets = stats->packets;
} while (u64_stats_fetch_retry(&stats->lock, syncp));

This patch uses this infrastructure in net loopback driver, instead of
specific one added in commit 6b10de38f0ef (loopback: Implement 64bit
stats on 32bit arches)

Suggested by David Miller

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Nick Piggin <npiggin@suse.de>
---
 drivers/net/loopback.c    |   61 ++++++++----------------------------
 include/linux/netdevice.h |   50 +++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 46 deletions(-)

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 09334f8..f20b156 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -60,51 +60,12 @@
 #include <net/net_namespace.h>
 
 struct pcpu_lstats {
-	u64 packets;
-	u64 bytes;
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-	seqcount_t seq;
-#endif
-	unsigned long drops;
+	u64			packets;
+	u64			bytes;
+	struct u64_stats_sync	syncp;
+	unsigned long		drops;
 };
 
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-static void inline lstats_update_begin(struct pcpu_lstats *lstats)
-{
-	write_seqcount_begin(&lstats->seq);
-}
-static void inline lstats_update_end(struct pcpu_lstats *lstats)
-{
-	write_seqcount_end(&lstats->seq);
-}
-static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
-{
-	u64 tpackets, tbytes;
-	unsigned int seq;
-
-	do {
-		seq = read_seqcount_begin(&lstats->seq);
-		tpackets = lstats->packets;
-		tbytes = lstats->bytes;
-	} while (read_seqcount_retry(&lstats->seq, seq));
-
-	*packets += tpackets;
-	*bytes += tbytes;
-}
-#else
-static void inline lstats_update_begin(struct pcpu_lstats *lstats)
-{
-}
-static void inline lstats_update_end(struct pcpu_lstats *lstats)
-{
-}
-static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
-{
-	*packets += lstats->packets;
-	*bytes += lstats->bytes;
-}
-#endif
-
 /*
  * The higher levels take care of making this non-reentrant (it's
  * called with bh's disabled).
@@ -126,10 +87,10 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 
 	len = skb->len;
 	if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
-		lstats_update_begin(lb_stats);
+		u64_stats_update_begin(&lb_stats->syncp);
 		lb_stats->bytes += len;
 		lb_stats->packets++;
-		lstats_update_end(lb_stats);
+		u64_stats_update_end(&lb_stats->syncp);
 	} else
 		lb_stats->drops++;
 
@@ -148,10 +109,18 @@ static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev)
 	pcpu_lstats = (void __percpu __force *)dev->ml_priv;
 	for_each_possible_cpu(i) {
 		const struct pcpu_lstats *lb_stats;
+		u64 tbytes, tpackets;
+		unsigned int start;
 
 		lb_stats = per_cpu_ptr(pcpu_lstats, i);
-		lstats_fetch_and_add(&packets, &bytes, lb_stats);
+		do {
+			start = u64_stats_fetch_begin(&lb_stats->syncp);
+			tbytes = lb_stats->bytes;
+			tpackets = lb_stats->packets;
+		} while (u64_stats_fetch_retry(&lb_stats->syncp, start));
 		drops   += lb_stats->drops;
+		bytes   += tbytes;
+		packets += tpackets;
 	}
 	stats->rx_packets = packets;
 	stats->tx_packets = packets;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4fbccc5..dd1d93d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -174,6 +174,56 @@ static inline bool dev_xmit_complete(int rc)
 #define NET_DEVICE_STATS_DEFINE(name)	unsigned long pad_ ## name, name
 #endif
 
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+struct u64_stats_sync {
+	seqcount_t	seq;
+};
+
+static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+	write_seqcount_begin(&syncp->seq);
+}
+
+static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+	write_seqcount_end(&syncp->seq);
+}
+
+static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+	return read_seqcount_begin(&syncp->seq);
+}
+
+static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+	return read_seqcount_retry(&syncp->seq, start);
+}
+
+#else
+struct u64_stats_sync {
+};
+
+static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+}
+
+static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+}
+
+static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+	return 0;
+}
+
+static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+	return false;
+}
+#endif
+
 struct net_device_stats {
 	NET_DEVICE_STATS_DEFINE(rx_packets);
 	NET_DEVICE_STATS_DEFINE(tx_packets);



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
  2010-06-15 10:14   ` [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure Eric Dumazet
@ 2010-06-15 10:25     ` Nick Piggin
  2010-06-15 10:43       ` Eric Dumazet
  2010-06-15 10:39     ` [PATCH net-next-2.6] bridge: 64bit rx/tx counters Eric Dumazet
  1 sibling, 1 reply; 19+ messages in thread
From: Nick Piggin @ 2010-06-15 10:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, bhutchings

On Tue, Jun 15, 2010 at 12:14:16PM +0200, Eric Dumazet wrote:
> Here is the followup patch to abstract things a bit, before upcoming
> conversions.
> 
> Thanks !
> 
> [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
> 
> To properly implement 64bits network statistics on 32bit or 64bit hosts,
> we provide one new type and four methods, to ease conversions.
> 
> Stats producer should use following template granted it already got an
> exclusive access to counters (a previous lock is taken, or per cpu
> data [used in a non preemptable context])
> 
> Let me repeat : stats producers must be serialized by other means before
> using this template. Preemption must be disabled too.
> 
> u64_stats_update_begin(&stats->syncp);
> stats->bytes += len;
> stats->packets++;
> u64_stats_update_end(&stats->syncp);
> 
> While a consumer should use following template to get consistent
> snapshot :
> 
> u64 tbytes, tpackets;
> unsigned int start;
> 
> do {
> 	start = u64_stats_fetch_begin(&stats->syncp);
> 	tbytes = stats->bytes;
> 	tpackets = stats->packets;
> } while (u64_stats_fetch_retry(&stats->lock, syncp));
> 
> This patch uses this infrastructure in net loopback driver, instead of
> specific one added in commit 6b10de38f0ef (loopback: Implement 64bit
> stats on 32bit arches)
> 
> Suggested by David Miller

Cool, I don't mind this, but perhaps could you add some comments
because it _will_ either be misused or copied and misused elsewhere :)

Callers must:
- write side must ensure mutual exclusion (even if it was previously OK
  to have lost updates on the writer side, the seqlock will explodde if
  it is taken concurrently for write)
- write side must not sleep
- readside and writeside must have local-CPU exclusion from one another;
  preempt, irq, bh as appropriate
- will only protect 64-bit sizes from tearing -- eg updating 2 different
  stats under the same write side will not ensure they are both seen in
  the same read side

But I do like the minimal design.

> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: Nick Piggin <npiggin@suse.de>
> ---
>  drivers/net/loopback.c    |   61 ++++++++----------------------------
>  include/linux/netdevice.h |   50 +++++++++++++++++++++++++++++
>  2 files changed, 65 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
> index 09334f8..f20b156 100644
> --- a/drivers/net/loopback.c
> +++ b/drivers/net/loopback.c
> @@ -60,51 +60,12 @@
>  #include <net/net_namespace.h>
>  
>  struct pcpu_lstats {
> -	u64 packets;
> -	u64 bytes;
> -#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
> -	seqcount_t seq;
> -#endif
> -	unsigned long drops;
> +	u64			packets;
> +	u64			bytes;
> +	struct u64_stats_sync	syncp;
> +	unsigned long		drops;
>  };
>  
> -#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
> -static void inline lstats_update_begin(struct pcpu_lstats *lstats)
> -{
> -	write_seqcount_begin(&lstats->seq);
> -}
> -static void inline lstats_update_end(struct pcpu_lstats *lstats)
> -{
> -	write_seqcount_end(&lstats->seq);
> -}
> -static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
> -{
> -	u64 tpackets, tbytes;
> -	unsigned int seq;
> -
> -	do {
> -		seq = read_seqcount_begin(&lstats->seq);
> -		tpackets = lstats->packets;
> -		tbytes = lstats->bytes;
> -	} while (read_seqcount_retry(&lstats->seq, seq));
> -
> -	*packets += tpackets;
> -	*bytes += tbytes;
> -}
> -#else
> -static void inline lstats_update_begin(struct pcpu_lstats *lstats)
> -{
> -}
> -static void inline lstats_update_end(struct pcpu_lstats *lstats)
> -{
> -}
> -static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
> -{
> -	*packets += lstats->packets;
> -	*bytes += lstats->bytes;
> -}
> -#endif
> -
>  /*
>   * The higher levels take care of making this non-reentrant (it's
>   * called with bh's disabled).
> @@ -126,10 +87,10 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
>  
>  	len = skb->len;
>  	if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
> -		lstats_update_begin(lb_stats);
> +		u64_stats_update_begin(&lb_stats->syncp);
>  		lb_stats->bytes += len;
>  		lb_stats->packets++;
> -		lstats_update_end(lb_stats);
> +		u64_stats_update_end(&lb_stats->syncp);
>  	} else
>  		lb_stats->drops++;
>  
> @@ -148,10 +109,18 @@ static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev)
>  	pcpu_lstats = (void __percpu __force *)dev->ml_priv;
>  	for_each_possible_cpu(i) {
>  		const struct pcpu_lstats *lb_stats;
> +		u64 tbytes, tpackets;
> +		unsigned int start;
>  
>  		lb_stats = per_cpu_ptr(pcpu_lstats, i);
> -		lstats_fetch_and_add(&packets, &bytes, lb_stats);
> +		do {
> +			start = u64_stats_fetch_begin(&lb_stats->syncp);
> +			tbytes = lb_stats->bytes;
> +			tpackets = lb_stats->packets;
> +		} while (u64_stats_fetch_retry(&lb_stats->syncp, start));
>  		drops   += lb_stats->drops;
> +		bytes   += tbytes;
> +		packets += tpackets;
>  	}
>  	stats->rx_packets = packets;
>  	stats->tx_packets = packets;
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 4fbccc5..dd1d93d 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -174,6 +174,56 @@ static inline bool dev_xmit_complete(int rc)
>  #define NET_DEVICE_STATS_DEFINE(name)	unsigned long pad_ ## name, name
>  #endif
>  
> +#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
> +struct u64_stats_sync {
> +	seqcount_t	seq;
> +};
> +
> +static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
> +{
> +	write_seqcount_begin(&syncp->seq);
> +}
> +
> +static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
> +{
> +	write_seqcount_end(&syncp->seq);
> +}
> +
> +static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
> +{
> +	return read_seqcount_begin(&syncp->seq);
> +}
> +
> +static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
> +					 unsigned int start)
> +{
> +	return read_seqcount_retry(&syncp->seq, start);
> +}
> +
> +#else
> +struct u64_stats_sync {
> +};
> +
> +static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
> +{
> +}
> +
> +static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
> +{
> +}
> +
> +static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
> +{
> +	return 0;
> +}
> +
> +static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
> +					 unsigned int start)
> +{
> +	return false;
> +}
> +#endif
> +
>  struct net_device_stats {
>  	NET_DEVICE_STATS_DEFINE(rx_packets);
>  	NET_DEVICE_STATS_DEFINE(tx_packets);
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next-2.6] bridge: 64bit rx/tx counters
  2010-06-15 10:14   ` [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure Eric Dumazet
  2010-06-15 10:25     ` Nick Piggin
@ 2010-06-15 10:39     ` Eric Dumazet
  2010-06-22 17:25       ` David Miller
  2010-08-10  4:47       ` Andrew Morton
  1 sibling, 2 replies; 19+ messages in thread
From: Eric Dumazet @ 2010-06-15 10:39 UTC (permalink / raw)
  To: David Miller, Stephen Hemminger; +Cc: netdev, bhutchings, Nick Piggin

Note : should be applied after "net: Introduce 
u64_stats_sync infrastructure", if accepted.


Thanks

[PATCH net-next-2.6] bridge: 64bit rx/tx counters

Use u64_stats_sync infrastructure to provide 64bit rx/tx 
counters even on 32bit hosts.

It is safe to use a single u64_stats_sync for rx and tx,
because BH is disabled on both, and we use per_cpu data.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/bridge/br_device.c  |   24 +++++++++++++++---------
 net/bridge/br_input.c   |    2 ++
 net/bridge/br_private.h |    9 +++++----
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index b898364..d7bfe31 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -38,8 +38,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 #endif
 
+	u64_stats_update_begin(&brstats->syncp);
 	brstats->tx_packets++;
 	brstats->tx_bytes += skb->len;
+	u64_stats_update_end(&brstats->syncp);
 
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
@@ -92,21 +94,25 @@ static int br_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-static struct net_device_stats *br_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_device_stats *stats = &dev->stats;
-	struct br_cpu_netstats sum = { 0 };
+	struct rtnl_link_stats64 *stats = &dev->stats64;
+	struct br_cpu_netstats tmp, sum = { 0 };
 	unsigned int cpu;
 
 	for_each_possible_cpu(cpu) {
+		unsigned int start;
 		const struct br_cpu_netstats *bstats
 			= per_cpu_ptr(br->stats, cpu);
-
-		sum.tx_bytes   += bstats->tx_bytes;
-		sum.tx_packets += bstats->tx_packets;
-		sum.rx_bytes   += bstats->rx_bytes;
-		sum.rx_packets += bstats->rx_packets;
+		do {
+			start = u64_stats_fetch_begin(&bstats->syncp);
+			memcpy(&tmp, bstats, sizeof(tmp));
+		} while (u64_stats_fetch_retry(&bstats->syncp, start));
+		sum.tx_bytes   += tmp.tx_bytes;
+		sum.tx_packets += tmp.tx_packets;
+		sum.rx_bytes   += tmp.rx_bytes;
+		sum.rx_packets += tmp.rx_packets;
 	}
 
 	stats->tx_bytes   = sum.tx_bytes;
@@ -288,7 +294,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_open		 = br_dev_open,
 	.ndo_stop		 = br_dev_stop,
 	.ndo_start_xmit		 = br_dev_xmit,
-	.ndo_get_stats		 = br_get_stats,
+	.ndo_get_stats64	 = br_get_stats64,
 	.ndo_set_mac_address	 = br_set_mac_address,
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 99647d8..86d357b 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -27,8 +27,10 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	struct net_bridge *br = netdev_priv(brdev);
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
+	u64_stats_update_begin(&brstats->syncp);
 	brstats->rx_packets++;
 	brstats->rx_bytes += skb->len;
+	u64_stats_update_end(&brstats->syncp);
 
 	indev = skb->dev;
 	skb->dev = brdev;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c83519b..f078c54 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -146,10 +146,11 @@ struct net_bridge_port
 };
 
 struct br_cpu_netstats {
-	unsigned long	rx_packets;
-	unsigned long	rx_bytes;
-	unsigned long	tx_packets;
-	unsigned long	tx_bytes;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
 };
 
 struct net_bridge



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
  2010-06-15 10:25     ` Nick Piggin
@ 2010-06-15 10:43       ` Eric Dumazet
  2010-06-15 11:04         ` Nick Piggin
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2010-06-15 10:43 UTC (permalink / raw)
  To: Nick Piggin; +Cc: David Miller, netdev, bhutchings

Le mardi 15 juin 2010 à 20:25 +1000, Nick Piggin a écrit :
> On Tue, Jun 15, 2010 at 12:14:16PM +0200, Eric Dumazet wrote:
> > Here is the followup patch to abstract things a bit, before upcoming
> > conversions.
> > 
> > Thanks !
> > 
> > [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
> > 
> > To properly implement 64bits network statistics on 32bit or 64bit hosts,
> > we provide one new type and four methods, to ease conversions.
> > 
> > Stats producer should use following template granted it already got an
> > exclusive access to counters (a previous lock is taken, or per cpu
> > data [used in a non preemptable context])
> > 
> > Let me repeat : stats producers must be serialized by other means before
> > using this template. Preemption must be disabled too.
> > 
> > u64_stats_update_begin(&stats->syncp);
> > stats->bytes += len;
> > stats->packets++;
> > u64_stats_update_end(&stats->syncp);
> > 
> > While a consumer should use following template to get consistent
> > snapshot :
> > 
> > u64 tbytes, tpackets;
> > unsigned int start;
> > 
> > do {
> > 	start = u64_stats_fetch_begin(&stats->syncp);
> > 	tbytes = stats->bytes;
> > 	tpackets = stats->packets;
> > } while (u64_stats_fetch_retry(&stats->lock, syncp));
> > 
> > This patch uses this infrastructure in net loopback driver, instead of
> > specific one added in commit 6b10de38f0ef (loopback: Implement 64bit
> > stats on 32bit arches)
> > 
> > Suggested by David Miller
> 
> Cool, I don't mind this, but perhaps could you add some comments
> because it _will_ either be misused or copied and misused elsewhere :)
> 
> Callers must:
> - write side must ensure mutual exclusion (even if it was previously OK
>   to have lost updates on the writer side, the seqlock will explodde if
>   it is taken concurrently for write)
> - write side must not sleep
> - readside and writeside must have local-CPU exclusion from one another;
>   preempt, irq, bh as appropriate
> - will only protect 64-bit sizes from tearing -- eg updating 2 different
>   stats under the same write side will not ensure they are both seen in
>   the same read side

Hmm, I am not sure I got this one, could you please give me a buggy
example ?

> 
> But I do like the minimal design.

Thanks !

I'll submit a v2 patch after my lunch to add all your comments, because
all clarifications are indeed very very welcomed !




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
  2010-06-15 10:43       ` Eric Dumazet
@ 2010-06-15 11:04         ` Nick Piggin
  2010-06-15 12:12           ` Eric Dumazet
  2010-06-15 13:29           ` [PATCH net-next-2.6 v2] " Eric Dumazet
  0 siblings, 2 replies; 19+ messages in thread
From: Nick Piggin @ 2010-06-15 11:04 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, bhutchings

On Tue, Jun 15, 2010 at 12:43:25PM +0200, Eric Dumazet wrote:
> Le mardi 15 juin 2010 à 20:25 +1000, Nick Piggin a écrit :
> > On Tue, Jun 15, 2010 at 12:14:16PM +0200, Eric Dumazet wrote:
> > > Here is the followup patch to abstract things a bit, before upcoming
> > > conversions.
> > > 
> > > Thanks !
> > > 
> > > [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
> > > 
> > > To properly implement 64bits network statistics on 32bit or 64bit hosts,
> > > we provide one new type and four methods, to ease conversions.
> > > 
> > > Stats producer should use following template granted it already got an
> > > exclusive access to counters (a previous lock is taken, or per cpu
> > > data [used in a non preemptable context])
> > > 
> > > Let me repeat : stats producers must be serialized by other means before
> > > using this template. Preemption must be disabled too.
> > > 
> > > u64_stats_update_begin(&stats->syncp);
> > > stats->bytes += len;
> > > stats->packets++;
> > > u64_stats_update_end(&stats->syncp);
> > > 
> > > While a consumer should use following template to get consistent
> > > snapshot :
> > > 
> > > u64 tbytes, tpackets;
> > > unsigned int start;
> > > 
> > > do {
> > > 	start = u64_stats_fetch_begin(&stats->syncp);
> > > 	tbytes = stats->bytes;
> > > 	tpackets = stats->packets;
> > > } while (u64_stats_fetch_retry(&stats->lock, syncp));
> > > 
> > > This patch uses this infrastructure in net loopback driver, instead of
> > > specific one added in commit 6b10de38f0ef (loopback: Implement 64bit
> > > stats on 32bit arches)
> > > 
> > > Suggested by David Miller
> > 
> > Cool, I don't mind this, but perhaps could you add some comments
> > because it _will_ either be misused or copied and misused elsewhere :)
> > 
> > Callers must:
> > - write side must ensure mutual exclusion (even if it was previously OK
> >   to have lost updates on the writer side, the seqlock will explodde if
> >   it is taken concurrently for write)
> > - write side must not sleep
> > - readside and writeside must have local-CPU exclusion from one another;
> >   preempt, irq, bh as appropriate
> > - will only protect 64-bit sizes from tearing -- eg updating 2 different
> >   stats under the same write side will not ensure they are both seen in
> >   the same read side
> 
> Hmm, I am not sure I got this one, could you please give me a buggy
> example ?

So if you have a regular seqlock, the sequence:

write_seqcount_begin
stat1++
stat2--
write_seqcount_end

do read_seqcount_begin
  sum = stat1+stat2;
while (read_seqcount_retry)
BUG_ON(sum != 0);

This code is OK. But if it is using your stat sync, then it is buggy.
This is obvious to you of course, but someone who doesn't consider
the implementation might get caught out.

I guess all my other points are properties of seqcount code itself,
but they are not documented really well with the seqlock API
unfortunately.


> > But I do like the minimal design.
> 
> Thanks !
> 
> I'll submit a v2 patch after my lunch to add all your comments, because
> all clarifications are indeed very very welcomed !

Thanks!

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
  2010-06-15 11:04         ` Nick Piggin
@ 2010-06-15 12:12           ` Eric Dumazet
  2010-06-15 13:29           ` [PATCH net-next-2.6 v2] " Eric Dumazet
  1 sibling, 0 replies; 19+ messages in thread
From: Eric Dumazet @ 2010-06-15 12:12 UTC (permalink / raw)
  To: Nick Piggin; +Cc: David Miller, netdev, bhutchings

Le mardi 15 juin 2010 à 21:04 +1000, Nick Piggin a écrit :

> So if you have a regular seqlock, the sequence:
> 
> write_seqcount_begin
> stat1++
> stat2--
> write_seqcount_end
> 
> do read_seqcount_begin
>   sum = stat1+stat2;
> while (read_seqcount_retry)
> BUG_ON(sum != 0);
> 
> This code is OK. But if it is using your stat sync, then it is buggy.
> This is obvious to you of course, but someone who doesn't consider
> the implementation might get caught out.
> 
> I guess all my other points are properties of seqcount code itself,
> but they are not documented really well with the seqlock API
> unfortunately.
> 

Yep, not counting fact that stat_sync is a noop on 64 bits anyway, so
external synchronization is pretty much needed (and probably already
present on all path were I need this stuff. If not, it must be fixed)




^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next-2.6 v2] net: Introduce u64_stats_sync infrastructure
  2010-06-15 11:04         ` Nick Piggin
  2010-06-15 12:12           ` Eric Dumazet
@ 2010-06-15 13:29           ` Eric Dumazet
  2010-06-22 17:24             ` David Miller
  1 sibling, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2010-06-15 13:29 UTC (permalink / raw)
  To: Nick Piggin; +Cc: David Miller, netdev, bhutchings

Le mardi 15 juin 2010 à 21:04 +1000, Nick Piggin a écrit :
> On Tue, Jun 15, 2010 at 12:43:25PM +0200, Eric Dumazet wrote:

> > I'll submit a v2 patch after my lunch to add all your comments, because
> > all clarifications are indeed very very welcomed !
> 
> Thanks!

Here is second version of the patch, with infrastructure only.

I chose to add a new include file, and document API in same file instead
of Documentation/somefile

Once accepted, I'll provide the loopback driver update.

Thanks !

[PATCH net-next-2.6 v2] net: Introduce u64_stats_sync infrastructure

To properly implement 64bits network statistics on 32bit or 64bit hosts,
we provide one new type and four methods, to ease conversions.

Stats producer should use following template granted it already got an
exclusive access to counters (include/linux/u64_stats_sync.h contains
some documentation about details)

    u64_stats_update_begin(&stats->syncp);
    stats->bytes64 += len;
    stats->packets64++;
    u64_stats_update_end(&stats->syncp);

While a consumer should use following template to get consistent
snapshot :

    u64 tbytes, tpackets;
    unsigned int start;

    do {
        start = u64_stats_fetch_begin(&stats->syncp);
        tbytes = stats->bytes64;
        tpackets = stats->packets64;
    } while (u64_stats_fetch_retry(&stats->lock, syncp));


Suggested by David Miller, and comments courtesy of Nick Piggin.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Nick Piggin <npiggin@suse.de>
---
 include/linux/u64_stats_sync.h |  107 +++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index e69de29..5a4f318 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -0,0 +1,107 @@
+#ifndef _LINUX_U64_STATS_SYNC_H
+#define _LINUX_U64_STATS_SYNC_H
+
+/*
+ * To properly implement 64bits network statistics on 32bit and 64bit hosts,
+ * we provide a synchronization point, that is a noop on 64bit or UP kernels.
+ *
+ * Key points :
+ * 1) Use a seqcount on SMP 32bits, with low overhead.
+ * 2) Whole thing is a noop on 64bit arches or UP kernels.
+ * 3) Write side must ensure mutual exclusion or one seqcount update could
+ *    be lost, thus blocking readers forever.
+ *    If this synchronization point is not a mutex, but a spinlock or 
+ *    spinlock_bh() or disable_bh() :
+ * 3.1) Write side should not sleep.
+ * 3.2) Write side should not allow preemption.
+ * 3.3) If applicable, interrupts should be disabled.
+ *
+ * 4) If reader fetches several counters, there is no guarantee the whole values
+ *    are consistent (remember point 1) : this is a noop on 64bit arches anyway)
+ *
+ * 5) readers are allowed to sleep or be preempted/interrupted : They perform
+ *    pure reads. But if they have to fetch many values, it's better to not allow
+ *    preemptions/interruptions to avoid many retries.
+ *
+ * Usage :
+ *
+ * Stats producer (writer) should use following template granted it already got
+ * an exclusive access to counters (a lock is already taken, or per cpu
+ * data is used [in a non preemptable context])
+ * 
+ *   spin_lock_bh(...) or other synchronization to get exclusive access
+ *   ...
+ *   u64_stats_update_begin(&stats->syncp);
+ *   stats->bytes64 += len; // non atomic operation
+ *   stats->packets64++;    // non atomic operation 
+ *   u64_stats_update_end(&stats->syncp);
+ *
+ * While a consumer (reader) should use following template to get consistent
+ * snapshot for each variable (but no guarantee on several ones)
+ *
+ * u64 tbytes, tpackets;
+ * unsigned int start;
+ *
+ * do {
+ *         start = u64_stats_fetch_begin(&stats->syncp);
+ *         tbytes = stats->bytes64; // non atomic operation
+ *         tpackets = stats->packets64; // non atomic operation
+ * } while (u64_stats_fetch_retry(&stats->lock, syncp));
+ *
+ *
+ * Example of use in drivers/net/loopback.c, using per_cpu containers,
+ * in BH disabled context.
+ */
+#include <linux/seqlock.h>
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+struct u64_stats_sync {
+	seqcount_t	seq;
+};
+
+static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+	write_seqcount_begin(&syncp->seq);
+}
+
+static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+	write_seqcount_end(&syncp->seq);
+}
+
+static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+	return read_seqcount_begin(&syncp->seq);
+}
+
+static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+	return read_seqcount_retry(&syncp->seq, start);
+}
+
+#else
+struct u64_stats_sync {
+};
+
+static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+}
+
+static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+}
+
+static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+	return 0;
+}
+
+static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+	return false;
+}
+#endif
+
+#endif /* _LINUX_U64_STATS_SYNC_H */



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6 v2] net: Introduce u64_stats_sync infrastructure
  2010-06-15 13:29           ` [PATCH net-next-2.6 v2] " Eric Dumazet
@ 2010-06-22 17:24             ` David Miller
  2010-06-22 17:31               ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: David Miller @ 2010-06-22 17:24 UTC (permalink / raw)
  To: eric.dumazet; +Cc: npiggin, netdev, bhutchings

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 15:29:54 +0200

> diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
> index e69de29..5a4f318 100644
> --- a/include/linux/u64_stats_sync.h
> +++ b/include/linux/u64_stats_sync.h
> @@ -0,0 +1,107 @@
> +#ifndef _LINUX_U64_STATS_SYNC_H
> +#define _LINUX_U64_STATS_SYNC_H
> +

That's a really weird patch hunk for a newly added file.

The hunk header is saying that the new file start 1 line
in.  Which doesn't make any sense, and GIT reject this
saying that you're trying to make a modification to a
file which doesn't exist.

I applied this by hand, but I really wonder how you managed
to create such a patch :-)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] bridge: 64bit rx/tx counters
  2010-06-15 10:39     ` [PATCH net-next-2.6] bridge: 64bit rx/tx counters Eric Dumazet
@ 2010-06-22 17:25       ` David Miller
  2010-08-10  4:47       ` Andrew Morton
  1 sibling, 0 replies; 19+ messages in thread
From: David Miller @ 2010-06-22 17:25 UTC (permalink / raw)
  To: eric.dumazet; +Cc: shemminger, netdev, bhutchings, npiggin

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 12:39:36 +0200

> Note : should be applied after "net: Introduce 
> u64_stats_sync infrastructure", if accepted.
> 
> 
> Thanks
> 
> [PATCH net-next-2.6] bridge: 64bit rx/tx counters
> 
> Use u64_stats_sync infrastructure to provide 64bit rx/tx 
> counters even on 32bit hosts.
> 
> It is safe to use a single u64_stats_sync for rx and tx,
> because BH is disabled on both, and we use per_cpu data.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6 v2] net: Introduce u64_stats_sync infrastructure
  2010-06-22 17:24             ` David Miller
@ 2010-06-22 17:31               ` Eric Dumazet
  0 siblings, 0 replies; 19+ messages in thread
From: Eric Dumazet @ 2010-06-22 17:31 UTC (permalink / raw)
  To: David Miller; +Cc: npiggin, netdev, bhutchings

Le mardi 22 juin 2010 à 10:24 -0700, David Miller a écrit :

> That's a really weird patch hunk for a newly added file.
> 
> The hunk header is saying that the new file start 1 line
> in.  Which doesn't make any sense, and GIT reject this
> saying that you're trying to make a modification to a
> file which doesn't exist.
> 
> I applied this by hand, but I really wonder how you managed
> to create such a patch :-)

Now you mention it, I remember I had problems about this newly added
file, and had to use several git commands to stabilize my tree.

I really dont know what happened exactly, sorry :!)

Thanks !



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] bridge: 64bit rx/tx counters
  2010-06-15 10:39     ` [PATCH net-next-2.6] bridge: 64bit rx/tx counters Eric Dumazet
  2010-06-22 17:25       ` David Miller
@ 2010-08-10  4:47       ` Andrew Morton
  2010-08-12 12:16         ` Eric Dumazet
  1 sibling, 1 reply; 19+ messages in thread
From: Andrew Morton @ 2010-08-10  4:47 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Stephen Hemminger, netdev, bhutchings, Nick Piggin

On Tue, 15 Jun 2010 12:39:36 +0200 Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Note : should be applied after "net: Introduce 
> u64_stats_sync infrastructure", if accepted.
> 
> 
> Thanks
> 
> [PATCH net-next-2.6] bridge: 64bit rx/tx counters
> 
> Use u64_stats_sync infrastructure to provide 64bit rx/tx 
> counters even on 32bit hosts.
> 
> It is safe to use a single u64_stats_sync for rx and tx,
> because BH is disabled on both, and we use per_cpu data.
>

Oh for fuck's sake.  Will you guys just stop adding generic kernel
infrastructure behind everyone's backs?

Had I actually been aware that this stuff was going into the tree I'd
have pointed out that the u64_stats_* api needs renaming. 
s/stats/counter/ because it has no business assuming that the counter
is being used for statistics.


And all this open-coded per-cpu counter stuff added all over the place.
Were percpu_counters tested or reviewed and found inadequate and unfixable?
If so, please do tell.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] bridge: 64bit rx/tx counters
  2010-08-10  4:47       ` Andrew Morton
@ 2010-08-12 12:16         ` Eric Dumazet
  2010-08-12 15:07           ` Andrew Morton
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2010-08-12 12:16 UTC (permalink / raw)
  To: Andrew Morton
  Cc: David Miller, Stephen Hemminger, netdev, bhutchings, Nick Piggin

Le lundi 09 août 2010 à 21:47 -0700, Andrew Morton a écrit :

> Oh for fuck's sake.  Will you guys just stop adding generic kernel
> infrastructure behind everyone's backs?
> 
> Had I actually been aware that this stuff was going into the tree I'd
> have pointed out that the u64_stats_* api needs renaming. 
> s/stats/counter/ because it has no business assuming that the counter
> is being used for statistics.
> 
> 

Sure. Someone suggested to change the name, considering values could
also be signed (s64 instead of u64_...)

> And all this open-coded per-cpu counter stuff added all over the place.
> Were percpu_counters tested or reviewed and found inadequate and unfixable?
> If so, please do tell.
> 

percpu_counters tries hard to maintain a view of the current value of
the (global) counter. This adds a cost because of a shared cache line
and locking. (__percpu_counter_sum() is not very scalable on big hosts,
it locks the percpu_counter lock for a possibly long iteration)


For network stats we dont want to maintain this central value, we do the
folding only when necessary. And this folding has zero effect on
concurrent writers (counter updates)

For network stack, we also need to update two values, a packet counter
and a bytes counter. percpu_counter is not very good for the 'bytes
counter', since we would have to use a arbitrary big bias value.
Using several percpu_counter would also probably use more cache lines.

Also please note this stuff is only needed for 32bit arches. 

Using percpu_counter would slow down network stack on modern arches.


I am very well aware of the percpu_counter stuff, I believe I tried to
optimize it a bit in the past.

Thanks



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] bridge: 64bit rx/tx counters
  2010-08-12 12:16         ` Eric Dumazet
@ 2010-08-12 15:07           ` Andrew Morton
  2010-08-12 21:47             ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: Andrew Morton @ 2010-08-12 15:07 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Stephen Hemminger, netdev, bhutchings, Nick Piggin

On Thu, 12 Aug 2010 14:16:15 +0200 Eric Dumazet <eric.dumazet@gmail.com> wrote:

> > And all this open-coded per-cpu counter stuff added all over the place.
> > Were percpu_counters tested or reviewed and found inadequate and unfixable?
> > If so, please do tell.
> > 
> 
> percpu_counters tries hard to maintain a view of the current value of
> the (global) counter. This adds a cost because of a shared cache line
> and locking. (__percpu_counter_sum() is not very scalable on big hosts,
> it locks the percpu_counter lock for a possibly long iteration)

Could be.  Is percpu_counter_read_positive() unsuitable?

> 
> For network stats we dont want to maintain this central value, we do the
> folding only when necessary.

hm.  Well, why?  That big walk across all possible CPUs could be really
expensive for some applications.  Especially if num_possible_cpus is
much larger than num_online_cpus, which iirc can happen in
virtualisation setups; probably it can happen in non-virtualised
machines too.

> And this folding has zero effect on
> concurrent writers (counter updates)

The fastpath looks a little expensive in the code you've added.  The
write_seqlock() does an rmw and a wmb() and the stats inc is a 64-bit
rmw whereas percpu_counters do a simple 32-bit add.  So I'd expect that
at some suitable batch value, percpu-counters are faster on 32-bit. 

They'll usually be slower on 64-bit, until that num_possible_cpus walk
bites you.

percpu_counters might need some work to make them irq-friendly.  That
bare spin_lock().

btw, I worry a bit about seqlocks in the presence of interrupts:

static inline void write_seqcount_begin(seqcount_t *s)
{
	s->sequence++;
	smp_wmb();
}

are we assuming that the ++ there is atomic wrt interrupts?  I think
so.  Is that always true for all architectures, compiler versions, etc?

> For network stack, we also need to update two values, a packet counter
> and a bytes counter. percpu_counter is not very good for the 'bytes
> counter', since we would have to use a arbitrary big bias value.

OK, that's a nasty problem for percpu-counters.

> Using several percpu_counter would also probably use more cache lines.
> 
> Also please note this stuff is only needed for 32bit arches. 
> 
> Using percpu_counter would slow down network stack on modern arches.

Was this ever quantified?

> 
> I am very well aware of the percpu_counter stuff, I believe I tried to
> optimize it a bit in the past.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] bridge: 64bit rx/tx counters
  2010-08-12 15:07           ` Andrew Morton
@ 2010-08-12 21:47             ` Eric Dumazet
  2010-08-12 22:11               ` Andrew Morton
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2010-08-12 21:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: David Miller, Stephen Hemminger, netdev, bhutchings, Nick Piggin

Le jeudi 12 août 2010 à 08:07 -0700, Andrew Morton a écrit : 
> On Thu, 12 Aug 2010 14:16:15 +0200 Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> > > And all this open-coded per-cpu counter stuff added all over the place.
> > > Were percpu_counters tested or reviewed and found inadequate and unfixable?
> > > If so, please do tell.
> > > 
> > 
> > percpu_counters tries hard to maintain a view of the current value of
> > the (global) counter. This adds a cost because of a shared cache line
> > and locking. (__percpu_counter_sum() is not very scalable on big hosts,
> > it locks the percpu_counter lock for a possibly long iteration)
> 
> Could be.  Is percpu_counter_read_positive() unsuitable?
> 

I bet most people want precise counters when doing 'ifconfig lo'

SNMP applications would be very surprised to get non increasing values
between two samples, or inexact values.

> > 
> > For network stats we dont want to maintain this central value, we do the
> > folding only when necessary.
> 
> hm.  Well, why?  That big walk across all possible CPUs could be really
> expensive for some applications.  Especially if num_possible_cpus is
> much larger than num_online_cpus, which iirc can happen in
> virtualisation setups; probably it can happen in non-virtualised
> machines too.
> 

Agreed.

> > And this folding has zero effect on
> > concurrent writers (counter updates)
> 
> The fastpath looks a little expensive in the code you've added.  The
> write_seqlock() does an rmw and a wmb() and the stats inc is a 64-bit
> rmw whereas percpu_counters do a simple 32-bit add.  So I'd expect that
> at some suitable batch value, percpu-counters are faster on 32-bit. 
> 

Hmm... 6 instructions (16 bytes of text) are a "little expensive" versus
120 instructions if we use percpu_counter ?

Following code from drivers/net/loopback.c

	u64_stats_update_begin(&lb_stats->syncp);
	lb_stats->bytes += len;
	lb_stats->packets++;
	u64_stats_update_end(&lb_stats->syncp);

maps on i386 to :

	ff 46 10             	incl   0x10(%esi)  // u64_stats_update_begin(&lb_stats->syncp);
	89 f8                	mov    %edi,%eax
	99                   	cltd   
	01 7e 08             	add    %edi,0x8(%esi)
	11 56 0c             	adc    %edx,0xc(%esi)
	83 06 01             	addl   $0x1,(%esi)
	83 56 04 00          	adcl   $0x0,0x4(%esi)
	ff 46 10             	incl   0x10(%esi) // u64_stats_update_end(&lb_stats->syncp);


Exactly 6 added instructions compared to previous kernel (32bit
counters), only on 32bit hosts. These instructions are not expensive (no
conditional branches, no extra register pressure) and access private cpu
data.

While two calls to __percpu_counter_add() add about 120 instructions,
even on 64bit hosts, wasting precious cpu cycles.



> They'll usually be slower on 64-bit, until that num_possible_cpus walk
> bites you.
> 

But are you aware we already fold SNMP values using for_each_possible()
macros, before adding 64bit counters ? Not related to 64bit stuff
really...

> percpu_counters might need some work to make them irq-friendly.  That
> bare spin_lock().
> 
> btw, I worry a bit about seqlocks in the presence of interrupts:
> 

Please note that nothing is assumed about interrupts and seqcounts

Both readers and writers must mask them if necessary.

In most situations, masking softirq is enough for networking cases
(updates are performed from softirq handler, reads from process context)

> static inline void write_seqcount_begin(seqcount_t *s)
> {
> 	s->sequence++;
> 	smp_wmb();
> }
> 
> are we assuming that the ++ there is atomic wrt interrupts?  I think
> so.  Is that always true for all architectures, compiler versions, etc?
> 

s->sequence++ is certainly not atomic wrt interrupts on RISC arches

> > For network stack, we also need to update two values, a packet counter
> > and a bytes counter. percpu_counter is not very good for the 'bytes
> > counter', since we would have to use a arbitrary big bias value.
> 
> OK, that's a nasty problem for percpu-counters.
> 
> > Using several percpu_counter would also probably use more cache lines.
> > 
> > Also please note this stuff is only needed for 32bit arches. 
> > 
> > Using percpu_counter would slow down network stack on modern arches.
> 
> Was this ever quantified?

A single misplacement of dst refcount was responsible for a 25% tbench
slowdown on a small machine (8 cores). Without any lock, only atomic
operations on a shared cache line...

So I think we could easily quantify a big slow down adding two
percpu_counters add() in a driver fastpath and a 16 or 32 cores machine.
(It would be a revert of percpu stuff we added last years)

Improvements would be

0) Just forget about 64bit stuff on 32bit arches as we did from linux
0.99. People should not run 40Gb links on 32bit kernels :)

1) If we really want percpu_counter() stuff, find a way to make it
hierarchical or use a a very big BIAS (2^30 ?). And/Or reduce
percpu_counter_add() complexity for increasing unsigned counters.

2) Avoid the write_seqcount_begin()/end() stuff when a writer changes
only the low order parts of the 64bit counter.

   (ie maintain a 32bit percpu value, and only atomicaly touch the
shared upper 32bits (and the seqcount) when overflowing this 32bit
percpu value.

Not sure its worth the added conditional branch.

Thanks



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next-2.6] bridge: 64bit rx/tx counters
  2010-08-12 21:47             ` Eric Dumazet
@ 2010-08-12 22:11               ` Andrew Morton
  0 siblings, 0 replies; 19+ messages in thread
From: Andrew Morton @ 2010-08-12 22:11 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Stephen Hemminger, netdev, bhutchings, Nick Piggin

On Thu, 12 Aug 2010 23:47:37 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le jeudi 12 ao__t 2010 __ 08:07 -0700, Andrew Morton a __crit : 
> > On Thu, 12 Aug 2010 14:16:15 +0200 Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > 
> > > > And all this open-coded per-cpu counter stuff added all over the place.
> > > > Were percpu_counters tested or reviewed and found inadequate and unfixable?
> > > > If so, please do tell.
> > > > 
> > > 
> > > percpu_counters tries hard to maintain a view of the current value of
> > > the (global) counter. This adds a cost because of a shared cache line
> > > and locking. (__percpu_counter_sum() is not very scalable on big hosts,
> > > it locks the percpu_counter lock for a possibly long iteration)
> > 
> > Could be.  Is percpu_counter_read_positive() unsuitable?
> > 
> 
> I bet most people want precise counters when doing 'ifconfig lo'
> 
> SNMP applications would be very surprised to get non increasing values
> between two samples, or inexact values.

percpu_counter_read_positive() should be returning monotonically
increasing numbers - if it ever went backward that would be bad.  But
yes, the value will increase in a lumpy fashion.  Probably one would
need to make informed choices between percpu_counter_read_positive()
and percpu_counter_sum(), depending on the type of stat.

But that's all a bit academic.

>
> > > And this folding has zero effect on
> > > concurrent writers (counter updates)
> > 
> > The fastpath looks a little expensive in the code you've added.  The
> > write_seqlock() does an rmw and a wmb() and the stats inc is a 64-bit
> > rmw whereas percpu_counters do a simple 32-bit add.  So I'd expect that
> > at some suitable batch value, percpu-counters are faster on 32-bit. 
> > 
> 
> Hmm... 6 instructions (16 bytes of text) are a "little expensive" versus
> 120 instructions if we use percpu_counter ?
> 
> Following code from drivers/net/loopback.c
> 
> 	u64_stats_update_begin(&lb_stats->syncp);
> 	lb_stats->bytes += len;
> 	lb_stats->packets++;
> 	u64_stats_update_end(&lb_stats->syncp);
> 
> maps on i386 to :
> 
> 	ff 46 10             	incl   0x10(%esi)  // u64_stats_update_begin(&lb_stats->syncp);
> 	89 f8                	mov    %edi,%eax
> 	99                   	cltd   
> 	01 7e 08             	add    %edi,0x8(%esi)
> 	11 56 0c             	adc    %edx,0xc(%esi)
> 	83 06 01             	addl   $0x1,(%esi)
> 	83 56 04 00          	adcl   $0x0,0x4(%esi)
> 	ff 46 10             	incl   0x10(%esi) // u64_stats_update_end(&lb_stats->syncp);
> 
> 
> Exactly 6 added instructions compared to previous kernel (32bit
> counters), only on 32bit hosts. These instructions are not expensive (no
> conditional branches, no extra register pressure) and access private cpu
> data.
> 
> While two calls to __percpu_counter_add() add about 120 instructions,
> even on 64bit hosts, wasting precious cpu cycles.

Oy.  You omitted the per_cpu_ptr() evaluation and, I bet, included all
the executed-1/batch-times instructions.

> 
> > They'll usually be slower on 64-bit, until that num_possible_cpus walk
> > bites you.
> > 
> 
> But are you aware we already fold SNMP values using for_each_possible()
> macros, before adding 64bit counters ? Not related to 64bit stuff
> really...


> > percpu_counters might need some work to make them irq-friendly.  That
> > bare spin_lock().
> > 
> > btw, I worry a bit about seqlocks in the presence of interrupts:
> > 
> 
> Please note that nothing is assumed about interrupts and seqcounts
> 
> Both readers and writers must mask them if necessary.
> 
> In most situations, masking softirq is enough for networking cases
> (updates are performed from softirq handler, reads from process context)

Yup, write_seqcount_begin/end() are pretty dangerous-looking.  The
caller needs to protect the lock against other CPUs, against interrupts
and even against preemption.



^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2010-08-12 22:11 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-14 15:59 [PATCH net-next-2.6] loopback: Implement 64bit stats on 32bit arches Eric Dumazet
2010-06-15  6:14 ` David Miller
2010-06-15  6:49   ` Nick Piggin
2010-06-15  7:23     ` Eric Dumazet
2010-06-15 10:14   ` [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure Eric Dumazet
2010-06-15 10:25     ` Nick Piggin
2010-06-15 10:43       ` Eric Dumazet
2010-06-15 11:04         ` Nick Piggin
2010-06-15 12:12           ` Eric Dumazet
2010-06-15 13:29           ` [PATCH net-next-2.6 v2] " Eric Dumazet
2010-06-22 17:24             ` David Miller
2010-06-22 17:31               ` Eric Dumazet
2010-06-15 10:39     ` [PATCH net-next-2.6] bridge: 64bit rx/tx counters Eric Dumazet
2010-06-22 17:25       ` David Miller
2010-08-10  4:47       ` Andrew Morton
2010-08-12 12:16         ` Eric Dumazet
2010-08-12 15:07           ` Andrew Morton
2010-08-12 21:47             ` Eric Dumazet
2010-08-12 22:11               ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).