From: Jesper Dangaard Brouer <brouer@redhat.com>
To: Eric Dumazet <eric.dumazet@gmail.com>,
"David S. Miller" <davem@davemloft.net>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>,
netdev@vger.kernel.org, Florian Westphal <fw@strlen.de>,
Daniel Borkmann <dborkman@redhat.com>,
Hannes Frederic Sowa <hannes@stressinduktion.org>
Subject: [net-next PATCH 3/3] net: frag queue per hash bucket locking
Date: Wed, 27 Mar 2013 16:56:55 +0100 [thread overview]
Message-ID: <20130327155601.15203.25289.stgit@dragon> (raw)
In-Reply-To: <20130327155238.15203.6688.stgit@dragon>
This patch implements per hash bucket locking for the frag queue
hash. This removes two write locks, and the only remaining write
lock is for protecting hash rebuild. This essentially reduce the
readers-writer lock to a rebuild lock.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
include/net/inet_frag.h | 9 ++++++-
net/ipv4/inet_fragment.c | 60 ++++++++++++++++++++++++++++++++++++----------
2 files changed, 55 insertions(+), 14 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 7cac9c5..c4f5183 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -50,10 +50,17 @@ struct inet_frag_queue {
*/
#define INETFRAGS_MAXDEPTH 128
+struct inet_frag_bucket {
+ struct hlist_head chain;
+ spinlock_t chain_lock;
+ u16 chain_len;
+};
+
struct inet_frags {
- struct hlist_head hash[INETFRAGS_HASHSZ];
+ struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
/* This rwlock is a global lock (seperate per IPv4, IPv6 and
* netfilter). Important to keep this on a seperate cacheline.
+ * Its primarily a rebuild protection rwlock.
*/
rwlock_t lock ____cacheline_aligned_in_smp;
int secret_interval;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 1206ca6..3471599 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -52,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
unsigned long now = jiffies;
int i;
+ /* Per bucket lock NOT needed here, due to write lock protection */
write_lock(&f->lock);
+
get_random_bytes(&f->rnd, sizeof(u32));
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
struct hlist_node *n;
- hlist_for_each_entry_safe(q, n, &f->hash[i], list) {
+ hb = &f->hash[i];
+ hlist_for_each_entry_safe(q, n, &hb->chain, list) {
unsigned int hval = f->hashfn(q);
if (hval != i) {
+ struct inet_frag_bucket *hb_dest;
+
hlist_del(&q->list);
/* Relink to new hash chain. */
- hlist_add_head(&q->list, &f->hash[hval]);
+ hb_dest = &f->hash[hval];
+ hlist_add_head(&q->list, &hb_dest->chain);
}
}
}
@@ -78,9 +85,13 @@ void inet_frags_init(struct inet_frags *f)
{
int i;
- for (i = 0; i < INETFRAGS_HASHSZ; i++)
- INIT_HLIST_HEAD(&f->hash[i]);
+ for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+ struct inet_frag_bucket *hb = &f->hash[i];
+ spin_lock_init(&hb->chain_lock);
+ INIT_HLIST_HEAD(&hb->chain);
+ hb->chain_len = 0;
+ }
rwlock_init(&f->lock);
f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
@@ -122,9 +133,19 @@ EXPORT_SYMBOL(inet_frags_exit_net);
static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
{
- write_lock(&f->lock);
+ struct inet_frag_bucket *hb;
+ unsigned int hash;
+
+ read_lock(&f->lock);
+ hash = f->hashfn(fq);
+ hb = &f->hash[hash];
+
+ spin_lock_bh(&hb->chain_lock);
hlist_del(&fq->list);
- write_unlock(&f->lock);
+ hb->chain_len--;
+ spin_unlock_bh(&hb->chain_lock);
+
+ read_unlock(&f->lock);
inet_frag_lru_del(fq);
}
@@ -226,27 +247,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in, struct inet_frags *f,
void *arg)
{
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *qp;
#ifdef CONFIG_SMP
#endif
unsigned int hash;
- write_lock(&f->lock);
+ read_lock(&f->lock); /* Protects against hash rebuild */
/*
* While we stayed w/o the lock other CPU could update
* the rnd seed, so we need to re-calculate the hash
* chain. Fortunatelly the qp_in can be used to get one.
*/
hash = f->hashfn(qp_in);
+ hb = &f->hash[hash];
+ spin_lock_bh(&hb->chain_lock);
+
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
* such entry could be created on other cpu, while we
- * promoted read lock to write lock.
+ * released the hash bucket lock.
*/
- hlist_for_each_entry(qp, &f->hash[hash], list) {
+ hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
- write_unlock(&f->lock);
+ spin_unlock_bh(&hb->chain_lock);
+ read_unlock(&f->lock);
qp_in->last_in |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
@@ -258,8 +284,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
atomic_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &f->hash[hash]);
- write_unlock(&f->lock);
+ hlist_add_head(&qp->list, &hb->chain);
+ hb->chain_len++;
+ spin_unlock_bh(&hb->chain_lock);
+ read_unlock(&f->lock);
inet_frag_lru_add(nf, qp);
return qp;
}
@@ -300,17 +328,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash)
__releases(&f->lock)
{
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
int depth = 0;
- hlist_for_each_entry(q, &f->hash[hash], list) {
+ hb = &f->hash[hash];
+
+ spin_lock_bh(&hb->chain_lock);
+ hlist_for_each_entry(q, &hb->chain, list) {
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
+ spin_unlock_bh(&hb->chain_lock);
read_unlock(&f->lock);
return q;
}
depth++;
}
+ spin_unlock_bh(&hb->chain_lock);
read_unlock(&f->lock);
if (depth <= INETFRAGS_MAXDEPTH)
next prev parent reply other threads:[~2013-03-27 15:57 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-27 15:54 [net-next PATCH 0/3] net: frag performance followup Jesper Dangaard Brouer
2013-03-27 15:55 ` [net-next PATCH 1/3] net: frag, avoid several CPUs grabbing same frag queue during LRU evictor loop Jesper Dangaard Brouer
2013-03-27 16:14 ` Eric Dumazet
2013-03-27 17:10 ` David Miller
2013-03-27 15:55 ` [net-next PATCH 2/3] net: use the frag lru_lock to protect netns_frags.nqueues update Jesper Dangaard Brouer
2013-03-27 16:21 ` Eric Dumazet
2013-03-27 17:10 ` David Miller
2013-03-27 15:56 ` Jesper Dangaard Brouer [this message]
2013-03-27 17:25 ` [net-next PATCH 3/3] net: frag queue per hash bucket locking Eric Dumazet
2013-03-28 18:57 ` Hannes Frederic Sowa
2013-03-28 19:03 ` David Miller
2013-03-28 19:10 ` Hannes Frederic Sowa
2013-03-28 19:19 ` David Miller
2013-03-28 20:22 ` Eric Dumazet
2013-03-28 23:30 ` Hannes Frederic Sowa
2013-03-28 23:39 ` Eric Dumazet
2013-03-29 0:33 ` Hannes Frederic Sowa
2013-03-29 19:01 ` Jesper Dangaard Brouer
2013-03-29 19:05 ` Eric Dumazet
2013-03-29 19:22 ` David Miller
2013-04-02 15:23 ` Jesper Dangaard Brouer
2013-04-03 22:11 ` Jesper Dangaard Brouer
2013-04-04 7:52 ` [net-next PATCH V2] " Jesper Dangaard Brouer
2013-04-04 9:03 ` Hannes Frederic Sowa
2013-04-04 9:27 ` Jesper Dangaard Brouer
2013-04-04 9:38 ` [net-next PATCH V3] " Jesper Dangaard Brouer
2013-04-04 9:58 ` Hannes Frederic Sowa
2013-04-04 16:24 ` Eric Dumazet
2013-04-04 21:38 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130327155601.15203.25289.stgit@dragon \
--to=brouer@redhat.com \
--cc=davem@davemloft.net \
--cc=dborkman@redhat.com \
--cc=eric.dumazet@gmail.com \
--cc=fw@strlen.de \
--cc=hannes@stressinduktion.org \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).