netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: Eric Dumazet <eric.dumazet@gmail.com>,
	"David S. Miller" <davem@davemloft.net>,
	Florian Westphal <fw@strlen.de>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>,
	netdev@vger.kernel.org, Pablo Neira Ayuso <pablo@netfilter.org>,
	Thomas Graf <tgraf@suug.ch>, Cong Wang <amwang@redhat.com>,
	"Patrick McHardy" <kaber@trash.net>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Herbert Xu <herbert@gondor.hengli.com.au>
Subject: [net-next PATCH V2 8/9] net: frag queue locking per hash bucket
Date: Thu, 29 Nov 2012 17:15:47 +0100	[thread overview]
Message-ID: <20121129161512.17754.93933.stgit@dragon> (raw)
In-Reply-To: <20121129161019.17754.29670.stgit@dragon>

This patch implements per hash bucket locking for the frag queue
hash.  This removes two write locks, and the only remaining write
lock is for protecting hash rebuild.  This essentially reduce the
readers-writer lock to a rebuild lock.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

---
V2:
 - Fixed two bugs
 - 1) Missed/too-late read_lock() for protecting hashfn in fq_unlink()
 - 2) Uses old hash bucket instead of new dest bucket in inet_frag_secret_rebuild()

 include/net/inet_frag.h  |   10 +++++++-
 net/ipv4/inet_fragment.c |   56 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index f58590f..431f68e 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -49,9 +49,15 @@ struct inet_frag_queue {
 
 #define INETFRAGS_HASHSZ		64
 
+
+struct inet_frag_bucket {
+	struct hlist_head	chain;
+	spinlock_t		chain_lock;
+};
+
 struct inet_frags {
-	struct hlist_head	hash[INETFRAGS_HASHSZ];
-	rwlock_t		lock;
+	struct inet_frag_bucket	hash[INETFRAGS_HASHSZ];
+	rwlock_t		lock; /* Rebuild lock */
 	u32			rnd;
 	int			qsize;
 	int			secret_interval;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 9b97f2e..59999e6 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -41,20 +41,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
 	unsigned long now = jiffies;
 	int i;
 
+	/* Per bucket lock NOT needed here, due to write lock protection */
 	write_lock(&f->lock);
+
 	get_random_bytes(&f->rnd, sizeof(u32));
 	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+		struct inet_frag_bucket *hb;
 		struct inet_frag_queue *q;
 		struct hlist_node *p, *n;
 
-		hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) {
+		hb = &f->hash[i];
+		hlist_for_each_entry_safe(q, p, n, &hb->chain, list) {
 			unsigned int hval = f->hashfn(q);
 
 			if (hval != i) {
+				struct inet_frag_bucket *hb_dest;
+
 				hlist_del(&q->list);
 
 				/* Relink to new hash chain. */
-				hlist_add_head(&q->list, &f->hash[hval]);
+				hb_dest = &f->hash[hval];
+				hlist_add_head(&q->list, &hb->chain);
 			}
 		}
 	}
@@ -67,9 +74,12 @@ void inet_frags_init(struct inet_frags *f)
 {
 	int i;
 
-	for (i = 0; i < INETFRAGS_HASHSZ; i++)
-		INIT_HLIST_HEAD(&f->hash[i]);
+	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+		struct inet_frag_bucket *hb = &f->hash[i];
 
+		spin_lock_init(&hb->chain_lock);
+		INIT_HLIST_HEAD(&hb->chain);
+	}
 	rwlock_init(&f->lock);
 
 	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
@@ -129,9 +139,17 @@ EXPORT_SYMBOL(inet_frags_exit_net);
 
 static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
 {
-	write_lock(&f->lock);
+	struct inet_frag_bucket *hb;
+	unsigned int hash;
+
+	read_lock(&f->lock);
+	hash = f->hashfn(fq);
+	hb = &f->hash[hash];
+
+	spin_lock_bh(&hb->chain_lock);
 	hlist_del(&fq->list);
-	write_unlock(&f->lock);
+	spin_unlock_bh(&hb->chain_lock);
+	read_unlock(&f->lock);
 	inet_frag_lru_del(fq);
 }
 
@@ -245,28 +263,33 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 		struct inet_frag_queue *qp_in, struct inet_frags *f,
 		void *arg)
 {
+	struct inet_frag_bucket *hb;
 	struct inet_frag_queue *qp;
 #ifdef CONFIG_SMP
 	struct hlist_node *n;
 #endif
 	unsigned int hash;
 
-	write_lock(&f->lock);
+	read_lock(&f->lock); /* Protects against hash rebuild */
 	/*
 	 * While we stayed w/o the lock other CPU could update
 	 * the rnd seed, so we need to re-calculate the hash
 	 * chain. Fortunatelly the qp_in can be used to get one.
 	 */
 	hash = f->hashfn(qp_in);
+	hb = &f->hash[hash];
+	spin_lock_bh(&hb->chain_lock);
+
 #ifdef CONFIG_SMP
 	/* With SMP race we have to recheck hash table, because
 	 * such entry could be created on other cpu, while we
-	 * promoted read lock to write lock.
+	 * promoted read lock to write lock. ***Comment FIXME***
 	 */
-	hlist_for_each_entry(qp, n, &f->hash[hash], list) {
+	hlist_for_each_entry(qp, n, &hb->chain, list) {
 		if (qp->net == nf && f->match(qp, arg)) {
 			atomic_inc(&qp->refcnt);
-			write_unlock(&f->lock);
+			spin_unlock_bh(&hb->chain_lock);
+			read_unlock(&f->lock);
 			qp_in->last_in |= INET_FRAG_COMPLETE;
 			inet_frag_put(qp_in, f);
 			return qp;
@@ -278,8 +301,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 		atomic_inc(&qp->refcnt);
 
 	atomic_inc(&qp->refcnt);
-	hlist_add_head(&qp->list, &f->hash[hash]);
-	write_unlock(&f->lock);
+	hlist_add_head(&qp->list, &hb->chain);
+	spin_unlock_bh(&hb->chain_lock);
+	read_unlock(&f->lock);
 	inet_frag_lru_add(nf, qp);
 	return qp;
 }
@@ -328,16 +352,22 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 		struct inet_frags *f, void *key, unsigned int hash)
 	__releases(&f->lock)
 {
+	struct inet_frag_bucket *hb;
 	struct inet_frag_queue *q;
 	struct hlist_node *n;
 
-	hlist_for_each_entry(q, n, &f->hash[hash], list) {
+	hb = &f->hash[hash];
+
+	spin_lock_bh(&hb->chain_lock);
+	hlist_for_each_entry(q, n, &hb->chain, list) {
 		if (q->net == nf && f->match(q, key)) {
 			atomic_inc(&q->refcnt);
+			spin_unlock_bh(&hb->chain_lock);
 			read_unlock(&f->lock);
 			return q;
 		}
 	}
+	spin_unlock_bh(&hb->chain_lock);
 	read_unlock(&f->lock);
 
 	return inet_frag_create(nf, f, key);

  parent reply	other threads:[~2012-11-29 16:17 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-29 16:10 [net-next PATCH V2 0/9] net: fragmentation performance scalability on NUMA/SMP systems Jesper Dangaard Brouer
2012-11-29 16:11 ` [net-next PATCH V2 1/9] net: frag evictor, avoid killing warm frag queues Jesper Dangaard Brouer
2012-11-29 17:44   ` David Miller
2012-11-29 22:17     ` Jesper Dangaard Brouer
2012-11-29 23:01       ` Eric Dumazet
2012-11-30 10:04         ` Jesper Dangaard Brouer
2012-11-30 14:52           ` Eric Dumazet
2012-11-30 15:45             ` Jesper Dangaard Brouer
2012-11-30 16:37               ` Eric Dumazet
2012-11-30 21:37                 ` Jesper Dangaard Brouer
2012-11-30 22:25                   ` Eric Dumazet
2012-11-30 23:23                     ` Jesper Dangaard Brouer
2012-11-30 23:47                       ` Stephen Hemminger
2012-12-01  0:03                         ` Eric Dumazet
2012-12-01  0:13                           ` Stephen Hemminger
2012-11-30 23:58                       ` Eric Dumazet
2012-12-04 13:30                         ` [net-next PATCH V3-evictor] " Jesper Dangaard Brouer
2012-12-04 14:32                           ` [net-next PATCH V3-evictor] net: frag evictor,avoid " David Laight
2012-12-04 14:47                           ` [net-next PATCH V3-evictor] net: frag evictor, avoid " Eric Dumazet
2012-12-04 17:51                             ` Jesper Dangaard Brouer
2012-12-05  9:24                           ` Jesper Dangaard Brouer
2012-12-06 12:26                             ` Jesper Dangaard Brouer
2012-12-06 12:32                               ` Florian Westphal
2012-12-06 13:29                                 ` David Laight
2012-12-06 21:38                                   ` David Miller
2012-12-06 13:55                                 ` Jesper Dangaard Brouer
2012-12-06 14:47                                   ` Eric Dumazet
2012-12-06 15:23                                     ` Jesper Dangaard Brouer
2012-11-29 23:32       ` [net-next PATCH V2 1/9] " Eric Dumazet
2012-11-30 12:01       ` Jesper Dangaard Brouer
2012-11-30 14:57         ` Eric Dumazet
2012-11-29 16:11 ` [net-next PATCH V2 2/9] net: frag cache line adjust inet_frag_queue.net Jesper Dangaard Brouer
2012-11-29 16:12 ` [net-next PATCH V2 3/9] net: frag, move LRU list maintenance outside of rwlock Jesper Dangaard Brouer
2012-11-29 17:43   ` Eric Dumazet
2012-11-29 17:48     ` David Miller
2012-11-29 17:54       ` Eric Dumazet
2012-11-29 18:05         ` David Miller
2012-11-29 18:24           ` Eric Dumazet
2012-11-29 18:31             ` David Miller
2012-11-29 18:33               ` Eric Dumazet
2012-11-29 18:36                 ` David Miller
2012-11-29 22:33         ` Jesper Dangaard Brouer
2012-11-29 16:12 ` [net-next PATCH V2 4/9] net: frag helper functions for mem limit tracking Jesper Dangaard Brouer
2012-11-29 16:13 ` [net-next PATCH V2 5/9] net: frag, per CPU resource, mem limit and LRU list accounting Jesper Dangaard Brouer
2012-11-29 17:06   ` Eric Dumazet
2012-11-29 17:31     ` David Miller
2012-12-03 14:02     ` Jesper Dangaard Brouer
2012-12-03 17:25       ` David Miller
2012-11-29 16:14 ` [net-next PATCH V2 6/9] net: frag, implement dynamic percpu alloc of frag_cpu_limit Jesper Dangaard Brouer
2012-11-29 16:15 ` [net-next PATCH V2 7/9] net: frag, move nqueues counter under LRU lock protection Jesper Dangaard Brouer
2012-11-29 16:15 ` Jesper Dangaard Brouer [this message]
2012-11-29 17:08   ` [net-next PATCH V2 8/9] net: frag queue locking per hash bucket Eric Dumazet
2012-11-30 12:55     ` Jesper Dangaard Brouer
2012-11-29 16:16 ` [net-next PATCH V2 9/9] net: increase frag queue hash size and cache-line Jesper Dangaard Brouer
2012-11-29 16:39   ` [net-next PATCH V2 9/9] net: increase frag queue hash size andcache-line David Laight
2012-11-29 16:55   ` [net-next PATCH V2 9/9] net: increase frag queue hash size and cache-line Eric Dumazet
2012-11-29 20:53     ` Jesper Dangaard Brouer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121129161512.17754.93933.stgit@dragon \
    --to=brouer@redhat.com \
    --cc=amwang@redhat.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=fw@strlen.de \
    --cc=herbert@gondor.hengli.com.au \
    --cc=kaber@trash.net \
    --cc=netdev@vger.kernel.org \
    --cc=pablo@netfilter.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=tgraf@suug.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).