All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay.com>
To: Stephen Hemminger <shemminger@vyatta.com>
Cc: David Miller <davem@davemloft.net>, netdev@vger.kernel.org
Subject: Re: [PATCH 4/6] netfilter: abstract xt_counters
Date: Sun, 01 Feb 2009 13:25:26 +0100	[thread overview]
Message-ID: <498594B6.6000905@cosmosbay.com> (raw)
In-Reply-To: <20090130215729.416851870@vyatta.com>

Stephen Hemminger a écrit :
> Break out the parts of the x_tables code that manipulates counters so
> changes to locking are easier.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> 
> ---
>  include/linux/netfilter/x_tables.h |    6 +++++-
>  net/ipv4/netfilter/arp_tables.c    |    9 +++++----
>  net/ipv4/netfilter/ip_tables.c     |    9 +++++----
>  net/ipv6/netfilter/ip6_tables.c    |   21 +++++++++++----------
>  net/netfilter/x_tables.c           |   15 +++++++++++++++
>  5 files changed, 41 insertions(+), 19 deletions(-)
> 
> --- a/include/linux/netfilter/x_tables.h	2009-01-30 08:31:48.630454493 -0800
> +++ b/include/linux/netfilter/x_tables.h	2009-01-30 09:14:01.294680339 -0800
> @@ -105,13 +105,17 @@ struct _xt_align
>  #define XT_ERROR_TARGET "ERROR"
>  
>  #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
> -#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
>  
>  struct xt_counters
>  {
>  	u_int64_t pcnt, bcnt;			/* Packet and byte counters */
>  };
>  
> +extern void xt_add_counter(struct xt_counters *c, unsigned b, unsigned p);
> +extern void xt_sum_counter(struct xt_counters *t,
> +			   int cpu, const struct xt_counters *c);
> +
> +
>  /* The argument to IPT_SO_ADD_COUNTERS. */
>  struct xt_counters_info
>  {
> --- a/net/ipv4/netfilter/arp_tables.c	2009-01-30 08:31:48.569479503 -0800
> +++ b/net/ipv4/netfilter/arp_tables.c	2009-01-30 09:12:40.181542286 -0800
> @@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buf
>  
>  			hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
>  				(2 * skb->dev->addr_len);
> -			ADD_COUNTER(e->counters, hdr_len, 1);
> +			xt_add_counter(&e->counters, hdr_len, 1);
>  
>  			t = arpt_get_target(e);
>  
> @@ -662,10 +662,11 @@ static int translate_table(const char *n
>  
>  /* Gets counters. */
>  static inline int add_entry_to_counter(const struct arpt_entry *e,
> +				       int cpu,
>  				       struct xt_counters total[],
>  				       unsigned int *i)
>  {
> -	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> +	xt_sum_counter(total, cpu, &e->counters);
>  
>  	(*i)++;
>  	return 0;
> @@ -709,6 +710,7 @@ static void get_counters(const struct xt
>  		ARPT_ENTRY_ITERATE(t->entries[cpu],
>  				   t->size,
>  				   add_entry_to_counter,
> +				   cpu,
>  				   counters,
>  				   &i);
>  	}
> @@ -1082,8 +1084,7 @@ static inline int add_counter_to_entry(s
>  				       const struct xt_counters addme[],
>  				       unsigned int *i)
>  {
> -
> -	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> +	xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>  
>  	(*i)++;
>  	return 0;
> --- a/net/ipv4/netfilter/ip_tables.c	2009-01-30 08:31:48.538730580 -0800
> +++ b/net/ipv4/netfilter/ip_tables.c	2009-01-30 09:12:40.169542168 -0800
> @@ -366,7 +366,7 @@ ipt_do_table(struct sk_buff *skb,
>  			if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
>  				goto no_match;
>  
> -			ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
> +			xt_add_counter(&e->counters, ntohs(ip->tot_len), 1);
>  
>  			t = ipt_get_target(e);
>  			IP_NF_ASSERT(t->u.kernel.target);
> @@ -872,10 +872,11 @@ translate_table(const char *name,
>  /* Gets counters. */
>  static inline int
>  add_entry_to_counter(const struct ipt_entry *e,
> +		     int cpu,
>  		     struct xt_counters total[],
>  		     unsigned int *i)
>  {
> -	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> +	xt_sum_counter(total, cpu, &e->counters);
>  
>  	(*i)++;
>  	return 0;
> @@ -921,6 +922,7 @@ get_counters(const struct xt_table_info 
>  		IPT_ENTRY_ITERATE(t->entries[cpu],
>  				  t->size,
>  				  add_entry_to_counter,
> +				  cpu,
>  				  counters,
>  				  &i);
>  	}
> @@ -1327,8 +1329,7 @@ add_counter_to_entry(struct ipt_entry *e
>  		 (long unsigned int)addme[*i].pcnt,
>  		 (long unsigned int)addme[*i].bcnt);
>  #endif
> -
> -	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> +	xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>  
>  	(*i)++;
>  	return 0;
> --- a/net/ipv6/netfilter/ip6_tables.c	2009-01-30 08:31:48.605479850 -0800
> +++ b/net/ipv6/netfilter/ip6_tables.c	2009-01-30 09:12:40.205542065 -0800
> @@ -392,9 +392,9 @@ ip6t_do_table(struct sk_buff *skb,
>  			if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
>  				goto no_match;
>  
> -			ADD_COUNTER(e->counters,
> -				    ntohs(ipv6_hdr(skb)->payload_len) +
> -				    sizeof(struct ipv6hdr), 1);
> +			xt_add_counter(&e->counters,
> +					ntohs(ipv6_hdr(skb)->payload_len) +
> +					sizeof(struct ipv6hdr), 1);
>  
>  			t = ip6t_get_target(e);
>  			IP_NF_ASSERT(t->u.kernel.target);
> @@ -901,10 +901,11 @@ translate_table(const char *name,
>  /* Gets counters. */
>  static inline int
>  add_entry_to_counter(const struct ip6t_entry *e,
> +		     int cpu,
>  		     struct xt_counters total[],
>  		     unsigned int *i)
>  {
> -	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> +	xt_sum_counter(total, cpu, &e->counters);
>  
>  	(*i)++;
>  	return 0;
> @@ -948,10 +949,11 @@ get_counters(const struct xt_table_info 
>  			continue;
>  		i = 0;
>  		IP6T_ENTRY_ITERATE(t->entries[cpu],
> -				  t->size,
> -				  add_entry_to_counter,
> -				  counters,
> -				  &i);
> +				   t->size,
> +				   add_entry_to_counter,
> +				   cpu,
> +				   counters,
> +				   &i);
>  	}
>  }
>  
> @@ -1357,8 +1359,7 @@ add_counter_to_entry(struct ip6t_entry *
>  		 (long unsigned int)addme[*i].pcnt,
>  		 (long unsigned int)addme[*i].bcnt);
>  #endif
> -
> -	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> +	xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>  
>  	(*i)++;
>  	return 0;
> --- a/net/netfilter/x_tables.c	2009-01-30 08:38:32.949293300 -0800
> +++ b/net/netfilter/x_tables.c	2009-01-30 09:13:27.636792850 -0800
> @@ -577,6 +577,21 @@ int xt_compat_target_to_user(struct xt_e
>  EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
>  #endif
>  
> +void xt_add_counter(struct xt_counters *c, unsigned b, unsigned p)
> +{
> +	c->bcnt += b;
> +	c->pcnt += p;
> +}
> +EXPORT_SYMBOL_GPL(xt_add_counter);
> +
> +void xt_sum_counter(struct xt_counters *t, int cpu,
> +		    const struct xt_counters *c)
> +{
> +	t->pcnt += c->pcnt;
> +	t->bcnt += c->bcnt;
> +}
> +EXPORT_SYMBOL_GPL(xt_sum_counter);
> +
>  struct xt_table_info *xt_alloc_table_info(unsigned int size)
>  {
>  	struct xt_table_info *newinfo;
> 

First I wondered if adding out of line xt_add_counter() could slow firewalls,
I did some testings, with tbench and a small iptables setup (about 16 matched rules per packet)


CPU: Core 2, speed 3000.11 MHz (estimated)
Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (Unhalted core cycles) count 100000
samples  %        symbol name
3166081  12.8996  ipt_do_table
2471426  10.0694  copy_from_user
1016717   4.1424  copy_to_user
902072    3.6753  schedule
687266    2.8001  xt_add_counter
589899    2.4034  tcp_sendmsg
579619    2.3615  tcp_ack
455883    1.8574  tcp_transmit_skb
 


c044d110 <xt_add_counter>: /* xt_add_counter total: 687266  2.8001 */
 80675  0.3287 :c044d110:       push   %ebp
 15574  0.0635 :c044d111:       mov    %esp,%ebp
     2 8.1e-06 :c044d113:       sub    $0xc,%esp
 39583  0.1613 :c044d116:       mov    %ebx,(%esp)
  4387  0.0179 :c044d119:       mov    %edi,0x8(%esp)
  1187  0.0048 :c044d11d:       mov    %esi,0x4(%esp)
  1601  0.0065 :c044d121:       mov    $0xc068ae4c,%edi
 38881  0.1584 :c044d126:       mov    %fs:0xc0688540,%ebx
  5585  0.0228 :c044d12d:       add    %ebx,%edi
  3910  0.0159 :c044d12f:       incl   (%edi)
133482  0.5438 :c044d131:       xor    %esi,%esi
    32 1.3e-04 :c044d133:       add    %edx,0x8(%eax)
 71181  0.2900 :c044d136:       mov    %ecx,%edx
  7986  0.0325 :c044d138:       adc    %esi,0xc(%eax)
 88695  0.3614 :c044d13b:       xor    %ecx,%ecx
    15 6.1e-05 :c044d13d:       add    %edx,(%eax)
 41496  0.1691 :c044d13f:       adc    %ecx,0x4(%eax)
 52944  0.2157 :c044d142:       incl   (%edi)
 30759  0.1253 :c044d144:       mov    (%esp),%ebx
 20241  0.0825 :c044d147:       mov    0x4(%esp),%esi
  5662  0.0231 :c044d14b:       mov    0x8(%esp),%edi
  2288  0.0093 :c044d14f:       leave
 41100  0.1675 :c044d150:       ret

tbench 8 results here, after all your patches applied :

Throughput 2331 MB/sec 8 procs

And if inlined :

Throughput 2359.06 MB/sec 8 procs

and if we check inlined code/costs we see :

  2597  0.1719 :c048dfee:       mov    -0x64(%ebp),%edx
   182  0.0120 :c048dff1:       movzwl 0x2(%edx),%eax
   524  0.0347 :c048dff5:       mov    %fs:0xc0688540,%ecx
     7 4.6e-04 :c048dffc:       add    -0x70(%ebp),%ecx
  2465  0.1632 :c048dfff:       incl   (%ecx)
  9476  0.6273 :c048e001:       addl   $0x1,0x60(%edi)
 10068  0.6665 :c048e005:       adcl   $0x0,0x64(%edi)
  6543  0.4332 :c048e009:       xor    %edx,%edx
     1 6.6e-05 :c048e00b:       rol    $0x8,%ax
   234  0.0155 :c048e00f:       movzwl %ax,%eax
  2198  0.1455 :c048e012:       add    %eax,0x68(%edi)
    80  0.0053 :c048e015:       adc    %edx,0x6c(%edi)
  2858  0.1892 :c048e018:       incl   (%ecx)


With upcoming work on fast percpu accesses, we might in the future see following code:
(no need for registers to compute address of variable)

      mov    -0x64(%ebp),%edx
      movzwl 0x2(%edx),%eax
      incl   %fs:xt_counter_sequence
      addl   $0x1,0x60(%edi)
      adcl   $0x0,0x64(%edi)
      xor    %edx,%edx
      rol    $0x8,%ax
      movzwl %ax,%eax
      add    %eax,0x68(%edi)
      adc    %edx,0x6c(%edi)
      incl   %fs:xt_counter_sequence



  reply	other threads:[~2009-02-01 12:25 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-01-30 21:57 [PATCH 0/6] iptables: eliminate read/write lock (v0.4) Stephen Hemminger
2009-01-30 21:57 ` [PATCH 1/6] netfilter: change elements in x_tables Stephen Hemminger
2009-01-30 21:57 ` [PATCH 2/6] netfilter: remove unneeded initializations Stephen Hemminger
2009-01-30 21:57 ` [PATCH 3/6] ebtables: " Stephen Hemminger
2009-01-30 21:57 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger
2009-02-01 12:25   ` Eric Dumazet [this message]
2009-02-02 23:33     ` [PATCH 3/3] iptables: lock free counters (alternate version) Stephen Hemminger
2009-02-03 19:00       ` Eric Dumazet
2009-02-03 19:19         ` Eric Dumazet
2009-02-03 19:32         ` Paul E. McKenney
2009-02-03 20:20           ` Eric Dumazet
2009-02-03 20:44             ` Stephen Hemminger
2009-02-03 21:05               ` Eric Dumazet
2009-02-03 21:10             ` Paul E. McKenney
2009-02-03 21:22               ` Stephen Hemminger
2009-02-03 21:27                 ` Rick Jones
2009-02-03 23:11                 ` Paul E. McKenney
2009-02-03 23:18                   ` Stephen Hemminger
2009-01-30 21:57 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
2009-01-30 21:57 ` [PATCH 6/6] netfilter: convert x_tables to use RCU Stephen Hemminger
2009-01-31 17:27   ` Eric Dumazet
  -- strict thread matches above, loose matches on Subject: below --
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
2009-01-29 19:12 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=498594B6.6000905@cosmosbay.com \
    --to=dada1@cosmosbay.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=shemminger@vyatta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.