From: Eric Dumazet <dada1@cosmosbay.com>
To: "David S. Miller" <davem@davemloft.net>
Cc: Linux Netdev List <netdev@vger.kernel.org>
Subject: [PATCH] net: Use a percpu_counter for orphan_count
Date: Wed, 26 Nov 2008 00:09:30 +0100 [thread overview]
Message-ID: <492C85AA.9080205@cosmosbay.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 758 bytes --]
Hi David
Here is the second patch, after one for sockets_allocated
Thank you
[PATCH] net: Use a percpu_counter for orphan_count
Instead of using one atomic_t per protocol, use a percpu_counter
for "orphan_count", to reduce cache line contention on
heavy duty network servers.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
include/net/sock.h | 2 +-
include/net/tcp.h | 2 +-
net/dccp/dccp.h | 2 +-
net/dccp/proto.c | 16 ++++++++++------
net/ipv4/inet_connection_sock.c | 4 ++--
net/ipv4/proc.c | 2 +-
net/ipv4/tcp.c | 12 +++++++-----
net/ipv4/tcp_timer.c | 2 +-
8 files changed, 24 insertions(+), 18 deletions(-)
[-- Attachment #2: orphan.patch --]
[-- Type: text/plain, Size: 5847 bytes --]
diff --git a/include/net/sock.h b/include/net/sock.h
index a2a3890..5a3a151 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -666,7 +666,7 @@ struct proto {
unsigned int obj_size;
int slab_flags;
- atomic_t *orphan_count;
+ struct percpu_counter *orphan_count;
struct request_sock_ops *rsk_prot;
struct timewait_sock_ops *twsk_prot;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 723fc1b..2c60d8b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,7 +46,7 @@
extern struct inet_hashinfo tcp_hashinfo;
-extern atomic_t tcp_orphan_count;
+extern struct percpu_counter tcp_orphan_count;
extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER (128 + MAX_HEADER)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 031ce35..33a1127 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -49,7 +49,7 @@ extern int dccp_debug;
extern struct inet_hashinfo dccp_hashinfo;
-extern atomic_t dccp_orphan_count;
+extern struct percpu_counter dccp_orphan_count;
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ea85c42..db225f9 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -40,8 +40,7 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
EXPORT_SYMBOL_GPL(dccp_statistics);
-atomic_t dccp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter dccp_orphan_count;
EXPORT_SYMBOL_GPL(dccp_orphan_count);
struct inet_hashinfo dccp_hashinfo;
@@ -1000,7 +999,7 @@ adjudge_to_death:
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk);
- atomic_inc(sk->sk_prot->orphan_count);
+ percpu_counter_inc(sk->sk_prot->orphan_count);
/*
* It is the last release_sock in its life. It will remove backlog.
@@ -1064,18 +1063,21 @@ static int __init dccp_init(void)
{
unsigned long goal;
int ehash_order, bhash_order, i;
- int rc = -ENOBUFS;
+ int rc;
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb));
-
+ rc = percpu_counter_init(&dccp_orphan_count, 0);
+ if (rc)
+ goto out;
+ rc = -ENOBUFS;
inet_hashinfo_init(&dccp_hashinfo);
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
- goto out;
+ goto out_free_percpu;
/*
* Size and allocate the main established and bind bucket
@@ -1168,6 +1170,8 @@ out_free_dccp_ehash:
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_hashinfo.bind_bucket_cachep = NULL;
+out_free_percpu:
+ percpu_counter_destroy(&dccp_orphan_count);
goto out;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 05af807..1ccdbba 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk)
sk_refcnt_debug_release(sk);
- atomic_dec(sk->sk_prot->orphan_count);
+ percpu_counter_dec(sk->sk_prot->orphan_count);
sock_put(sk);
}
@@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk)
sock_orphan(child);
- atomic_inc(sk->sk_prot->orphan_count);
+ percpu_counter_inc(sk->sk_prot->orphan_count);
inet_csk_destroy_sock(child);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4944b47..614958b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
sock_prot_inuse_get(net, &tcp_prot),
- atomic_read(&tcp_orphan_count),
+ (int)percpu_counter_sum_positive(&tcp_orphan_count),
tcp_death_row.tw_count,
(int)percpu_counter_sum_positive(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e6fade9..0192434 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -277,8 +277,7 @@
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
-atomic_t tcp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
int sysctl_tcp_mem[3] __read_mostly;
@@ -1837,7 +1836,7 @@ adjudge_to_death:
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk);
- atomic_inc(sk->sk_prot->orphan_count);
+ percpu_counter_inc(sk->sk_prot->orphan_count);
/* It is the last release_sock in its life. It will remove backlog. */
release_sock(sk);
@@ -1888,9 +1887,11 @@ adjudge_to_death:
}
}
if (sk->sk_state != TCP_CLOSE) {
+ int orphan_count = percpu_counter_read_positive(
+ sk->sk_prot->orphan_count);
+
sk_mem_reclaim(sk);
- if (tcp_too_many_orphans(sk,
- atomic_read(sk->sk_prot->orphan_count))) {
+ if (tcp_too_many_orphans(sk, orphan_count)) {
if (net_ratelimit())
printk(KERN_INFO "TCP: too many of orphaned "
"sockets\n");
@@ -2689,6 +2690,7 @@ void __init tcp_init(void)
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
percpu_counter_init(&tcp_sockets_allocated, 0);
+ percpu_counter_init(&tcp_orphan_count, 0);
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3df339e..cc4e6d2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk)
static int tcp_out_of_resources(struct sock *sk, int do_reset)
{
struct tcp_sock *tp = tcp_sk(sk);
- int orphans = atomic_read(&tcp_orphan_count);
+ int orphans = percpu_counter_read_positive(&tcp_orphan_count);
/* If peer does not open window for long time, or did not transmit
* anything for long time, penalize it. */
next reply other threads:[~2008-11-25 23:09 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-11-25 23:09 Eric Dumazet [this message]
2008-11-26 5:17 ` [PATCH] net: Use a percpu_counter for orphan_count David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=492C85AA.9080205@cosmosbay.com \
--to=dada1@cosmosbay.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).