[PATCH] use 32bit counters for connection-based accounting

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] use 32bit counters for connection-based accounting
@ 2005-10-07 10:54 Harald Welte
  2005-10-07 20:26 ` David S. Miller
  0 siblings, 1 reply; 11+ messages in thread
From: Harald Welte @ 2005-10-07 10:54 UTC (permalink / raw)
  To: Netfilter Development Mailinglist; +Cc: David Miller

[-- Attachment #1: Type: text/plain, Size: 5207 bytes --]

Hi Dave!

I know this is too late for 2.6.14, so do you want me to hold back such
patches or do you already have a 2.6.15 queue?


[NETFILTER] Use only 32bit counters for CONNTRACK_ACCT

Initially we used 64bit counters for conntrack-based accounting, since we
had no event mechanism to tell userspace that our counters are about to
overflow.  With nfnetlink_conntrack, we now have such a event mechanism and
thus can save 16bytes per connection.

Signed-off-by: Harald Welte <laforge@netfilter.org>

---
commit d5b3f0a9fdcf7881d3b6efedd862aef6d561db03
tree ed8bfda8979a5004385bfe8dd313c57834c4f73e
parent 84446f9b12c7f160a4133cf6550514f6e5c02bfb
author Harald Welte <laforge@netfilter.org> Fri, 07 Oct 2005 12:50:44 +0200
committer Harald Welte <laforge@netfilter.org> Fri, 07 Oct 2005 12:50:44 +0200

 include/linux/netfilter/nfnetlink_conntrack.h |    6 ++++--
 include/linux/netfilter_ipv4/ip_conntrack.h   |    8 ++++++--
 net/ipv4/netfilter/ip_conntrack_core.c        |   13 ++++++++-----
 net/ipv4/netfilter/ip_conntrack_netlink.c     |    8 ++++----
 4 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -77,8 +77,10 @@ enum ctattr_protoinfo {
 
 enum ctattr_counters {
 	CTA_COUNTERS_UNSPEC,
-	CTA_COUNTERS_PACKETS,
-	CTA_COUNTERS_BYTES,
+	CTA_COUNTERS_PACKETS,		/* old 64bit counters */
+	CTA_COUNTERS_BYTES,		/* old 64bit counters */
+	CTA_COUNTERS32_PACKETS,
+	CTA_COUNTERS32_BYTES,
 	__CTA_COUNTERS_MAX
 };
 #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -117,6 +117,10 @@ enum ip_conntrack_events
 	/* NAT info */
 	IPCT_NATINFO_BIT = 10,
 	IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
+
+	/* Counter highest bit has been set */
+	IPCT_COUNTER_FILLING_BIT = 11,
+	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
 };
 
 enum ip_conntrack_expect_events {
@@ -192,8 +196,8 @@ do {									\
 
 struct ip_conntrack_counter
 {
-	u_int64_t packets;
-	u_int64_t bytes;
+	u_int32_t packets;
+	u_int32_t bytes;
 };
 
 struct ip_conntrack_helper;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1119,7 +1119,7 @@ void __ip_ct_refresh_acct(struct ip_conn
 			unsigned long extra_jiffies,
 			int do_acct)
 {
-	int do_event = 0;
+	int event = 0;
 
 	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
 	IP_NF_ASSERT(skb);
@@ -1129,13 +1129,13 @@ void __ip_ct_refresh_acct(struct ip_conn
 	/* If not in hash table, timer will not be active yet */
 	if (!is_confirmed(ct)) {
 		ct->timeout.expires = extra_jiffies;
-		do_event = 1;
+		event = IPCT_REFRESH;
 	} else {
 		/* Need del_timer for race avoidance (may already be dying). */
 		if (del_timer(&ct->timeout)) {
 			ct->timeout.expires = jiffies + extra_jiffies;
 			add_timer(&ct->timeout);
-			do_event = 1;
+			event = IPCT_REFRESH;
 		}
 	}
 
@@ -1144,14 +1144,17 @@ void __ip_ct_refresh_acct(struct ip_conn
 		ct->counters[CTINFO2DIR(ctinfo)].packets++;
 		ct->counters[CTINFO2DIR(ctinfo)].bytes += 
 						ntohs(skb->nh.iph->tot_len);
+		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
+		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
+			event |= IPCT_COUNTER_FILLING;
 	}
 #endif
 
 	write_unlock_bh(&ip_conntrack_lock);
 
 	/* must be unlocked when calling event cache */
-	if (do_event)
-		ip_conntrack_event_cache(IPCT_REFRESH, skb);
+	if (event)
+		ip_conntrack_event_cache(event, skb);
 }
 
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *
 	struct nfattr *nest_count = NFA_NEST(skb, type);
 	u_int64_t tmp;
 
-	tmp = cpu_to_be64(ct->counters[dir].packets);
-	NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp);
+	tmp = htonl(ct->counters[dir].packets);
+	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
 
-	tmp = cpu_to_be64(ct->counters[dir].bytes);
-	NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp);
+	tmp = htonl(ct->counters[dir].bytes);
+	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
 
 	NFA_NEST_END(skb, nest_count);
 
-- 
- Harald Welte <laforge@netfilter.org>                 http://netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-07 10:54 [PATCH] use 32bit counters for connection-based accounting Harald Welte
@ 2005-10-07 20:26 ` David S. Miller
  2005-10-07 22:04   ` Harald Welte
  0 siblings, 1 reply; 11+ messages in thread
From: David S. Miller @ 2005-10-07 20:26 UTC (permalink / raw)
  To: laforge; +Cc: netfilter-devel

From: Harald Welte <laforge@netfilter.org>
Date: Fri, 7 Oct 2005 12:54:51 +0200

> I know this is too late for 2.6.14, so do you want me to hold back such
> patches or do you already have a 2.6.15 queue?
> 
> [NETFILTER] Use only 32bit counters for CONNTRACK_ACCT
> 
> Initially we used 64bit counters for conntrack-based accounting, since we
> had no event mechanism to tell userspace that our counters are about to
> overflow.  With nfnetlink_conntrack, we now have such a event mechanism and
> thus can save 16bytes per connection.
> 
> Signed-off-by: Harald Welte <laforge@netfilter.org>

I would like to actually suggest you keep the 64-bit counters.

Is there a truly strong reason to shrink them to be 32-bits?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-07 20:26 ` David S. Miller
@ 2005-10-07 22:04   ` Harald Welte
  2005-10-10  4:45     ` David S. Miller
  2005-10-10  5:16     ` Patrick Schaaf
  0 siblings, 2 replies; 11+ messages in thread
From: Harald Welte @ 2005-10-07 22:04 UTC (permalink / raw)
  To: David S. Miller, kaber; +Cc: netfilter-devel

[-- Attachment #1: Type: text/plain, Size: 2585 bytes --]

Hi Dave!

On Fri, Oct 07, 2005 at 01:26:19PM -0700, David S. Miller wrote:
> 
> > I know this is too late for 2.6.14, so do you want me to hold back such
> > patches or do you already have a 2.6.15 queue?
> > 
> > [NETFILTER] Use only 32bit counters for CONNTRACK_ACCT
> > 
> > Initially we used 64bit counters for conntrack-based accounting, since we
> > had no event mechanism to tell userspace that our counters are about to
> > overflow.  With nfnetlink_conntrack, we now have such a event mechanism and
> > thus can save 16bytes per connection.
> > 
> > Signed-off-by: Harald Welte <laforge@netfilter.org>
> 
> I would like to actually suggest you keep the 64-bit counters.

That's surprising ;)

> Is there a truly strong reason to shrink them to be 32-bits?

Because we now can do better (by signalling the event and having
userspace accounting daemon take care of it) while not wasting
rediculous amounts of memory.  I mean, everywhere we try to make struct
ip_conntrack smaller (similar to the skb diet), and those 64bit counters
are really large.  We need four of them.  Yes, they're only present if
you actually enable CONNTRACK_ACCT.  But like we all know, most people
(and esp. distributions) will just enable them - at least as soon as
ulogd2 (or even other accounting daemons) are finished.  ulogd2 is
almost there, so I think it can be released when 2.6.14 final hits the
road.

If you actually try to estimate how many connections will have more than
4GB traffic in a single direction, then (at least currently) that will
be quite few.  And for those who overflow, we get the event.

The likely candidates for logging the accounting data (NETFLOW and IPFIX
flow exchange formats) both don't have the notion of a connection, so
sending a per-flow record every time the counter raises above 2GB is
perfectly fine with them.

If you really really want to veto that change, I'd offer at least two
compromises:

1) at least shrink the two packet counters to 32bits
2) make the counters 64bit on 64bit archs, and 32bits on 32bit archs

I'm still in favour of the 32bit ones, as I think Patrick is, too.

Greetings from workshop.netfilter.org,
	Harald
-- 
- Harald Welte <laforge@netfilter.org>                 http://netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-07 22:04   ` Harald Welte
@ 2005-10-10  4:45     ` David S. Miller
  2005-10-10  5:16     ` Patrick Schaaf
  1 sibling, 0 replies; 11+ messages in thread
From: David S. Miller @ 2005-10-10  4:45 UTC (permalink / raw)
  To: laforge; +Cc: netfilter-devel, kaber

From: Harald Welte <laforge@netfilter.org>
Date: Sat, 8 Oct 2005 00:04:35 +0200

> > Is there a truly strong reason to shrink them to be 32-bits?
> 
> Because we now can do better (by signalling the event and having
> userspace accounting daemon take care of it) while not wasting
> rediculous amounts of memory.  I mean, everywhere we try to make struct
> ip_conntrack smaller (similar to the skb diet), and those 64bit counters
> are really large.  We need four of them.  Yes, they're only present if
> you actually enable CONNTRACK_ACCT.  But like we all know, most people
> (and esp. distributions) will just enable them - at least as soon as
> ulogd2 (or even other accounting daemons) are finished.  ulogd2 is
> almost there, so I think it can be released when 2.6.14 final hits the
> road.

I'm convinced, so I'll apply the patch.

It's just that we've had problems upgrading from 32-bit to 64-bit
counters in other areas of the networking, some of which have still
not occurred, such that this change felt like a regression :-)

Anyways, like I said, I'll apply it :)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-07 22:04   ` Harald Welte
  2005-10-10  4:45     ` David S. Miller
@ 2005-10-10  5:16     ` Patrick Schaaf
  2005-10-10  8:30       ` Harald Welte
  1 sibling, 1 reply; 11+ messages in thread
From: Patrick Schaaf @ 2005-10-10  5:16 UTC (permalink / raw)
  To: Harald Welte, David S. Miller, kaber, netfilter-devel

> > > [NETFILTER] Use only 32bit counters for CONNTRACK_ACCT
[...]
Dave:
> > I would like to actually suggest you keep the 64-bit counters.

For what it's worth, I second this suggestion. With 64-bit counters
in kernel/conntrack proper, as-yet-uninvented new matches and targets
can do fancy things to practically all real world data streams,
whatever fancy things they want to invent - the base is there.
When somebody gets an idea of a useful new match or target that
would need 64-bit per-conntrack counters, and he sees that he'll
have to implement kernel->user->kernel roundabout communication
just to get at the full counters, the idea will DIE IMMEDIATELY.

BTW, how wide are the regular iptables per-rule byte/packet counters?
Do we still have the 64-byte-per-each-and-every-rule wasted (and
always touched / brought in cache) for input/output network interface
name/mask, even when no network interface matching is done at all?

best regards
  Partick

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-10  5:16     ` Patrick Schaaf
@ 2005-10-10  8:30       ` Harald Welte
  2005-10-10  8:59         ` Patrick Schaaf
  0 siblings, 1 reply; 11+ messages in thread
From: Harald Welte @ 2005-10-10  8:30 UTC (permalink / raw)
  To: Patrick Schaaf; +Cc: netfilter-devel, David S. Miller, kaber

[-- Attachment #1: Type: text/plain, Size: 2986 bytes --]

On Mon, Oct 10, 2005 at 07:16:33AM +0200, Patrick Schaaf wrote:
> > > > [NETFILTER] Use only 32bit counters for CONNTRACK_ACCT
> [...]
> Dave:
> > > I would like to actually suggest you keep the 64-bit counters.
> 
> For what it's worth, I second this suggestion. With 64-bit counters
> in kernel/conntrack proper, as-yet-uninvented new matches and targets
> can do fancy things to practically all real world data streams,
> whatever fancy things they want to invent - the base is there.

Mh. Ok, so I vote for making them configurable. It's not a big deal, and
ctnetlink can deal with it (64bit and 32bit counters even have different Tags)

> When somebody gets an idea of a useful new match or target that
> would need 64-bit per-conntrack counters, and he sees that he'll
> have to implement kernel->user->kernel roundabout communication
> just to get at the full counters, the idea will DIE IMMEDIATELY.

that's true.  But I also refuse to waste 16bytes (which may well cost us
yet another cacheline per conntrack) just of some 'future ideas' of
which we don't really know yet.

Especially since we're trying to move a lot of stuff to userspace
anyways.  Conntrack helpers will soon be able to run in userspace, by
using nfnetlink_queue and ctnetlink.  In the long term we'll also have
cheap ruleset modifications on a per-rule granularity... so a userspace
daemon could keep adding the counters and then modify the ruleset.
Currently you could emulate this with ipt_condition, but that's more of
a hack.

> BTW, how wide are the regular iptables per-rule byte/packet counters?
> Do we still have the 64-byte-per-each-and-every-rule wasted (and
> always touched / brought in cache) 

Yes,_but_
1) the way how iptables is designed prevents any changes to the
structure layout without breaking userspace compatibility.  If we had a
development kerne, we can do that.  But not for a stable release.
Both pkttables and nf-hipac 'fix' this problem.

2) _usually_ people tend to have way more connection tracking entries
than rules.  Conntracks quite commonly go in the counts of
hundred-thousands, where iptables rules are typically several oders of
magnitude less.

> for input/output network interface
> name/mask, even when no network interface matching is done at all?

yes, yes, yes.  It's all so wrong.  As stated before, we cannot fix it
for compatibility reasons.  We can fix ctnetlink, though.

So I vote for a Kconfig option that decides whether to use 64bit or
32bit for number-of-bytes countes.  number-of-packets will be fixed to
32bit.  Agreed?

-- 
- Harald Welte <laforge@netfilter.org>                 http://netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-10  8:30       ` Harald Welte
@ 2005-10-10  8:59         ` Patrick Schaaf
  2005-10-10  9:47           ` Harald Welte
  2005-10-10 14:00           ` Amin Azez
  0 siblings, 2 replies; 11+ messages in thread
From: Patrick Schaaf @ 2005-10-10  8:59 UTC (permalink / raw)
  To: Harald Welte, Patrick Schaaf, David S. Miller, kaber,
	netfilter-devel

Hi Harald,

thanks for the extended rationale (not quoted, but appreciated).

I personally would have no problem with configuring this at compile time,
if neccessary. It was good to read that userlevel already gets typed
counters, so must cope.

I have one (ignorant) question left: when a certain byte counter
for some direction/flow overflows, is the packet counter forced
to overflow at the same time? Same question for when the packet
counter overflows - does the associated byte counter wrap, too?
(I intuit such would be desired behaviour. Wouldn't it?)

Re number-of-packets at 32 bit, I can only tell that in the
real world I know, the Radius protocol is sprouting extended
attributes to communicate the overflow (high 32 bit) part
of the per-connection (DSL-session etc) packet counters.
(The same two-attribute approach was used years ago for the
byte counters, but everybody was sure 32 bit packet counters
were good enough). Oh, the Radius practise does not include
any means to flag 'I overflowed, some time in the past'.

Hmm. Would it be hard to have each conntrack start out with
32 bit counters, plus an 'I overflowed' indicator? And some
additional memory (isn't there a nice 'ext *' interface now)
is used for the very rare conntracks which indeed need
64-bit counters?

best regards
  Patrick

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-10  8:59         ` Patrick Schaaf
@ 2005-10-10  9:47           ` Harald Welte
  2005-10-10 16:38             ` Patrick Schaaf
  2005-10-10 14:00           ` Amin Azez
  1 sibling, 1 reply; 11+ messages in thread
From: Harald Welte @ 2005-10-10  9:47 UTC (permalink / raw)
  To: Patrick Schaaf; +Cc: netfilter-devel, David S. Miller, kaber

[-- Attachment #1: Type: text/plain, Size: 2466 bytes --]

Hi Patrick,

On Mon, Oct 10, 2005 at 10:59:35AM +0200, Patrick Schaaf wrote:

> I personally would have no problem with configuring this at compile time,
> if neccessary. It was good to read that userlevel already gets typed
> counters, so must cope.

ok.

> I have one (ignorant) question left: when a certain byte counter
> for some direction/flow overflows, is the packet counter forced
> to overflow at the same time? Same question for when the packet
> counter overflows - does the associated byte counter wrap, too?
> (I intuit such would be desired behaviour. Wouldn't it?)

no, the counters will overflow independently - and we have no indication
that such an event occurred.

> Re number-of-packets at 32 bit, I can only tell that in the
> real world I know, the Radius protocol is sprouting extended
> attributes to communicate the overflow (high 32 bit) part
> of the per-connection (DSL-session etc) packet counters.
> (The same two-attribute approach was used years ago for the
> byte counters, but everybody was sure 32 bit packet counters
> were good enough). Oh, the Radius practise does not include
> any means to flag 'I overflowed, some time in the past'.

Radius is, however, used for doing accounting on "per line" basis, that
is per dialup/dsl session.  Yes, i see, in the case of PPTP/L2TP all
packets within such a session are likely to end up in one conntrack...

> Hmm. Would it be hard to have each conntrack start out with
> 32 bit counters, plus an 'I overflowed' indicator? And some
> additional memory (isn't there a nice 'ext *' interface now)
> is used for the very rare conntracks which indeed need
> 64-bit counters?

ct_extend was never merged, though we probably will port and submit it
on top of nf_conntrack (nf_conntrack goes into 2.6.15).  I think at this
point a "start with 32bits but allocate 64bit countes on demand"
approach would be the best solution.

meanwhile, I'll hack up a patch that will make 32/64bit a
Kconfig-option, and try to add some overflow indicator.  That's all
post-2.6.14, though.

-- 
- Harald Welte <laforge@netfilter.org>                 http://netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-10  8:59         ` Patrick Schaaf
  2005-10-10  9:47           ` Harald Welte
@ 2005-10-10 14:00           ` Amin Azez
  1 sibling, 0 replies; 11+ messages in thread
From: Amin Azez @ 2005-10-10 14:00 UTC (permalink / raw)
  To: netfilter-devel

Patrick Schaaf wrote:
> I have one (ignorant) question left: when a certain byte counter
> for some direction/flow overflows, is the packet counter forced
> to overflow at the same time? Same question for when the packet
> counter overflows - does the associated byte counter wrap, too?
> (I intuit such would be desired behaviour. Wouldn't it?)
> 
> Re number-of-packets at 32 bit, I can only tell that in the
> real world I know, the Radius protocol is sprouting extended
> attributes to communicate the overflow (high 32 bit) part
> of the per-connection (DSL-session etc) packet counters.
> (The same two-attribute approach was used years ago for the
> byte counters, but everybody was sure 32 bit packet counters
> were good enough). Oh, the Radius practise does not include
> any means to flag 'I overflowed, some time in the past'.
> 
> Hmm. Would it be hard to have each conntrack start out with
> 32 bit counters, plus an 'I overflowed' indicator? 

Or even an "I overflowed n times counter"?
*cough*

ctnetlink issueing overflow events is nice, but only if interested
parties are listening at the time.

I'm all for keeping it configurable.

Sam

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-10  9:47           ` Harald Welte
@ 2005-10-10 16:38             ` Patrick Schaaf
  2005-10-10 21:36               ` Harald Welte
  0 siblings, 1 reply; 11+ messages in thread
From: Patrick Schaaf @ 2005-10-10 16:38 UTC (permalink / raw)
  To: Harald Welte, Patrick Schaaf, David S. Miller, kaber,
	netfilter-devel

> > I have one (ignorant) question left: when a certain byte counter
> > for some direction/flow overflows, is the packet counter forced
> > to overflow at the same time? Same question for when the packet
> > counter overflows - does the associated byte counter wrap, too?
> > (I intuit such would be desired behaviour. Wouldn't it?)
> 
> no, the counters will overflow independently - and we have no indication
> that such an event occurred.

Could this be discussed a bit? My rationale is that sometimes
all I'm interested in, is a quick evaluation of average packet
sizes. With synchronized byte and packet counters, this is
one divide away. When synchronization cannot be assumed, several
samples MUST be taken, and differences calculated. Damn.

> Radius is, however, used for doing accounting on "per line" basis, that
> is per dialup/dsl session.  Yes, i see, in the case of PPTP/L2TP all
> packets within such a session are likely to end up in one conntrack...

L2TP conntracking itself would be an exellent example of very long
living flows: it even has multiple (think possibly a hundred)
individual end user tunnel sessions all running between the
same L3/L4 association. BTW, average packet size is a nice
fuzzy indicator for upstream/downstream direction :)

all the best
  Patrick

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] use 32bit counters for connection-based accounting
  2005-10-10 16:38             ` Patrick Schaaf
@ 2005-10-10 21:36               ` Harald Welte
  0 siblings, 0 replies; 11+ messages in thread
From: Harald Welte @ 2005-10-10 21:36 UTC (permalink / raw)
  To: Patrick Schaaf; +Cc: netfilter-devel, David S. Miller, kaber


[-- Attachment #1.1: Type: text/plain, Size: 1488 bytes --]

On Mon, Oct 10, 2005 at 06:38:45PM +0200, Patrick Schaaf wrote:
> > > I have one (ignorant) question left: when a certain byte counter
> > > for some direction/flow overflows, is the packet counter forced
> > > to overflow at the same time? Same question for when the packet
> > > counter overflows - does the associated byte counter wrap, too?
> > > (I intuit such would be desired behaviour. Wouldn't it?)
> > 
> > no, the counters will overflow independently - and we have no indication
> > that such an event occurred.
> 
> Could this be discussed a bit? My rationale is that sometimes
> all I'm interested in, is a quick evaluation of average packet
> sizes. With synchronized byte and packet counters, this is
> one divide away. When synchronization cannot be assumed, several
> samples MUST be taken, and differences calculated. Damn.

I'm fixing this up now, see the attached preliminary, untested and
not-yet-fully-finished patch.  As it seems something like this will
still have to go into 2.6.14, even though I feel really sorry about the
necessity of yet another patch in this area.

-- 
- Harald Welte <laforge@netfilter.org>                 http://netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #1.2: counters.diff --]
[-- Type: text/plain, Size: 8695 bytes --]

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -27,6 +27,8 @@ enum nfnetlink_groups {
 #define NFNLGRP_CONNTRACK_EXP_UPDATE	NFNLGRP_CONNTRACK_EXP_UPDATE
 	NFNLGRP_CONNTRACK_EXP_DESTROY,
 #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
+	NFNLGRP_CONNTRACK_CTR_OVERFLOW,
+#define NFNLGRP_CONNTRACK_CTR_OVERFLOW	NFNLGRP_CONNTRACK_CTR_OVERFLOW
 	__NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -69,6 +69,10 @@ enum ip_conntrack_status {
 	/* Connection is dying (removed from lists), can not be unset. */
 	IPS_DYING_BIT = 9,
 	IPS_DYING = (1 << IPS_DYING_BIT),
+
+	/* Per connection counters have overflowed at least once */
+	IPS_COUNTER_OVERFLOW_BIT = 10,
+	IPS_COUNTER_OVERFLOW = (1 << IPS_COUNTER_OVERFLOW_BIT),
 };
 
 /* Connection tracking event bits */
@@ -119,10 +123,25 @@ enum ip_conntrack_events
 	IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
 
 	/* Counter highest bit has been set */
-	IPCT_COUNTER_FILLING_BIT = 11,
-	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
+	IPCT_ACCT_OFLOW_PKTS_ORIG_BIT = 11,
+	IPCT_ACCT_OFLOW_PKTS_ORIG = (1 << IPCT_ACCT_OFLOW_PKTS_ORIG_BIT),
+#define IPCT_ACCT_OFLOW_PKTS IPCT_ACCT_OFLOW_PKTS_ORIG
+
+	IPCT_ACCT_OFLOW_PKTS_RPLY_BIT = 12,
+	IPCT_ACCT_OFLOW_PKTS_RPLY = (1 << IPCT_ACCT_OFLOW_PKTS_RPLY_BIT),
+
+	/* Counter highest bit has been set */
+	IPCT_ACCT_OFLOW_BYTES_ORIG_BIT = 13,
+	IPCT_ACCT_OFLOW_BYTES_ORIG = (1 << IPCT_ACCT_OFLOW_BYTES_ORIG_BIT),
+#define IPCT_ACCT_OFLOW_BYTES IPCT_ACCT_OFLOW_BYTES_ORIG
+
+	IPCT_ACCT_OFLOW_BYTES_RPLY_BIT = 14,
+	IPCT_ACCT_OFLOW_BYTES_RPLY = (1 << IPCT_ACCT_OFLOW_BYTES_RPLY_BIT),
 };
 
+#define IPCT_ACCT_OFLOW_BITS (IPCT_ACCT_OFLOW_BYTES_RPLY|IPCT_ACCT_OFLOW_BYTES_ORIG \
+			      IPCT_ACCT_OFLOW_PKTS_RPLY|IPCT_ACCT_OFLOW_PKTS_ORIG)
+
 enum ip_conntrack_expect_events {
 	IPEXP_NEW_BIT = 0,
 	IPEXP_NEW = (1 << IPEXP_NEW_BIT),
@@ -194,11 +213,21 @@ do {									\
 #define IP_NF_ASSERT(x)
 #endif
 
+#ifdef CONFIG_IP_NF_CT_ACCT
+#define CTR_HIGHEST_BIT (1 << ((sizeof(ct_ctr_t)*8)-1))
+#ifdef CONFIG_IP_NF_CT_ACCT64
+typedef ct_ctr_t u_int64_t;
+#define ct_ctr_to_be(x) cpu_to_be64(x)
+#else
+typedef ct_ctr_t u_int32_t;
+#define ct_ctr_to_be(x) htonl(x)
+#endif /* CONFIG_IP_NF_CT_ACCT64 */
 struct ip_conntrack_counter
 {
-	u_int32_t packets;
-	u_int32_t bytes;
+	ct_ctr_t packets;
+	ct_ctr_t bytes;
 };
+#endif /* CONFIG_IP_NF_CT_ACCT */
 
 struct ip_conntrack_helper;
 
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -32,6 +32,21 @@ config IP_NF_CT_ACCT
 
 	  If unsure, say `N'.
 
+config IP_NF_CT_ACCT64
+	bool "Use 64bit counters for flow accounting"
+	depends on IP_NF_CT_ACCT
+	help
+	  The conntrack counters are 32bit by default, which makes them
+	  overflow at more than 4GB of traffic in one direction of a
+	  connection.  This is fine if you have a userspace accounting daemon
+	  that receives the COUNTER_FILLING event message.
+	  
+	  However, if you want to use the counters inside the kernel (e.g.
+	  by the connbytes match), you should select this option to increase
+	  their range to 64bits.
+
+	  If unsure, say `N'.
+
 config IP_NF_CONNTRACK_MARK
 	bool  'Connection mark tracking support'
 	depends on IP_NF_CONNTRACK
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1112,6 +1112,35 @@ void ip_conntrack_helper_unregister(stru
 	synchronize_net();
 }
 
+static inline int __ip_ct_acct(struct ip_conntrack *ct,
+				enum ip_conntrack_info ctinfo,
+				const struct sk_buff *skb)
+{
+	unsigned int ret = 0;
+#ifdef CONFIG_IP_NF_CT_ACCT
+	unsigned int dir = CTINFO2DIR(ctinfo);
+	ct_ctr_t packets, bytes;
+
+	/* The idea is to just reset the highest bit and continue counting.
+	 * Userspace will then have to add that 'highest bit' to their
+	 * accounting system.  */
+	ct->counters[dir].packets++;
+	if (unlikely(ct->counters[dir].packets > CTR_HIGHEST_BIT)) {
+		set_bit(IPS_COUNTER_OVERFLOW, &ct->status);
+		ct->counters[dir].packets &= ~CTR_HIGHEST_BIT;
+		ret |= IPCT_ACCT_OFLOW_PKTS + dir;
+	}
+
+	ct->counters[dir].bytes += ntohs(skb->nh.iph->tot_len);
+	if (unlikely(ct->counters[dir].bytes > CTR_HIGHEST_BIT)) {
+		set_bit(IPS_COUNTER_OVERFLOW, &ct->status);
+		ct->counters[dir].bytes &= ~CTR_HIGHEST_BIT;
+		ret |= IPCT_ACCT_OFLOW_BYTES + dir;
+	}
+#endif
+	return ret;
+}
+
 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
 void __ip_ct_refresh_acct(struct ip_conntrack *ct, 
 		        enum ip_conntrack_info ctinfo,
@@ -1139,16 +1168,8 @@ void __ip_ct_refresh_acct(struct ip_conn
 		}
 	}
 
-#ifdef CONFIG_IP_NF_CT_ACCT
-	if (do_acct) {
-		ct->counters[CTINFO2DIR(ctinfo)].packets++;
-		ct->counters[CTINFO2DIR(ctinfo)].bytes += 
-						ntohs(skb->nh.iph->tot_len);
-		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
-		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
-			event |= IPCT_COUNTER_FILLING;
-	}
-#endif
+	if (do_acct)
+		event |= __ip_ct_acct(ct, ctinfo, skb);
 
 	write_unlock_bh(&ip_conntrack_lock);
 
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -168,20 +168,28 @@ nfattr_failure:
 	return -ENOMEM;
 }
 
-#ifdef CONFIG_IP_NF_CT_ACCT
 static inline int
 ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
-			enum ip_conntrack_dir dir)
+			enum ip_conntrack_dir dir, unsigned int events)
 {
+#ifdef CONFIG_IP_NF_CT_ACCT
 	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
 	struct nfattr *nest_count = NFA_NEST(skb, type);
-	u_int64_t tmp;
+	ct_ctr_t tmp;
 
-	tmp = htonl(ct->counters[dir].packets);
-	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
+	if (unlikely(events & (IPCT_ACCT_OFLOW_PKTS + dir)))
+		tmp = CT_CTR_HIGHEST_BIT;
+	else
+		tmp = ct->counters[dir].packets;
+	tmp = ct_ctr_to_be(tmp);
+	NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(ct_ctr_t), &tmp);
 
-	tmp = htonl(ct->counters[dir].bytes);
-	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
+	if (unlikely(events & (IPCT_ACCT_OFLOW_BYTES + dir)))
+		tmp = CT_CTR_HIGHEST_BIT;
+	else
+		tmp = ct->counters[dir].bytes;
+	tmp = ct_ctr_to_be(tmp);
+	NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(ct_ctr_t), &tmp);
 
 	NFA_NEST_END(skb, nest_count);
 
@@ -189,10 +197,10 @@ ctnetlink_dump_counters(struct sk_buff *
 
 nfattr_failure:
 	return -ENOMEM;
-}
 #else
-#define ctnetlink_dump_counters(a, b, c) (0)
+	return 0;
 #endif
+}
 
 #ifdef CONFIG_IP_NF_CONNTRACK_MARK
 static inline int
@@ -268,8 +276,8 @@ ctnetlink_fill_info(struct sk_buff *skb,
 
 	if (ctnetlink_dump_status(skb, ct) < 0 ||
 	    ctnetlink_dump_timeout(skb, ct) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, event) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, event) < 0 ||
 	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -325,6 +333,14 @@ static int ctnetlink_conntrack_event(str
 		group = NFNLGRP_CONNTRACK_UPDATE;
 		goto alloc_skb;
 	} 
+	/* FIXME: what if we have a status update _and_ an overflow event
+	 * at the same time? */
+	if (events & IPCT_ACCT_OFLOW_BITS) {
+		type = IPCTNL_MSG_CT_NEW;
+		group = NFNLGRP_CONNTRACK_CTR_OVERFLOW;
+		goto alloc_skb;
+	}
+
 	
 	return NOTIFY_DONE;
 
@@ -370,8 +386,8 @@ alloc_skb:
 	    && ctnetlink_dump_helpinfo(skb, ct) < 0)
 		goto nfattr_failure;
 
-	if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+	if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, events) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, events) < 0)
 		goto nfattr_failure;
 
 	nlh->nlmsg_len = skb->tail - b;

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2005-10-10 21:36 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-10-07 10:54 [PATCH] use 32bit counters for connection-based accounting Harald Welte
2005-10-07 20:26 ` David S. Miller
2005-10-07 22:04   ` Harald Welte
2005-10-10  4:45     ` David S. Miller
2005-10-10  5:16     ` Patrick Schaaf
2005-10-10  8:30       ` Harald Welte
2005-10-10  8:59         ` Patrick Schaaf
2005-10-10  9:47           ` Harald Welte
2005-10-10 16:38             ` Patrick Schaaf
2005-10-10 21:36               ` Harald Welte
2005-10-10 14:00           ` Amin Azez

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.