[Patch bpf-next 1/3] bpf: use index instead of hash for map_locked[]

bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Cong Wang <xiyou.wangcong@gmail.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, Cong Wang <cong.wang@bytedance.com>,
	Song Liu <songliubraving@fb.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Dongdong Wang <wangdongdong.6@bytedance.com>
Subject: [Patch bpf-next 1/3] bpf: use index instead of hash for map_locked[]
Date: Thu, 10 Dec 2020 16:06:47 -0800	[thread overview]
Message-ID: <20201211000649.236635-2-xiyou.wangcong@gmail.com> (raw)
In-Reply-To: <20201211000649.236635-1-xiyou.wangcong@gmail.com>

From: Cong Wang <cong.wang@bytedance.com>

Commit 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked")
introduced a percpu counter map_locked to bail out NMI case. It uses
the hash of each bucket for indexing, which requires callers of
htab_lock_bucket()/htab_unlock_bucket() to pass it in. But hash value
is not always available, especially when we traverse the whole hash
table where we do not have keys to compute the hash. We can just
compute the index of each bucket with its address and use index
instead.

This is a prerequisite for the following timeout map patch.

Cc: Song Liu <songliubraving@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Dongdong Wang <wangdongdong.6@bytedance.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 kernel/bpf/hashtab.c | 57 +++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 7e848200cd26..f0b7b54fa3a8 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -156,16 +156,17 @@ static void htab_init_buckets(struct bpf_htab *htab)
 }
 
 static inline int htab_lock_bucket(const struct bpf_htab *htab,
-				   struct bucket *b, u32 hash,
-				   unsigned long *pflags)
+				   struct bucket *b, unsigned long *pflags)
 {
 	unsigned long flags;
+	unsigned int index;
 
-	hash = hash & HASHTAB_MAP_LOCK_MASK;
+	index = b - htab->buckets;
+	index &= HASHTAB_MAP_LOCK_MASK;
 
 	migrate_disable();
-	if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
-		__this_cpu_dec(*(htab->map_locked[hash]));
+	if (unlikely(__this_cpu_inc_return(*(htab->map_locked[index])) != 1)) {
+		__this_cpu_dec(*(htab->map_locked[index]));
 		migrate_enable();
 		return -EBUSY;
 	}
@@ -180,15 +181,17 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,
 }
 
 static inline void htab_unlock_bucket(const struct bpf_htab *htab,
-				      struct bucket *b, u32 hash,
-				      unsigned long flags)
+				      struct bucket *b, unsigned long flags)
 {
-	hash = hash & HASHTAB_MAP_LOCK_MASK;
+	unsigned int index;
+
+	index = b - htab->buckets;
+	index &= HASHTAB_MAP_LOCK_MASK;
 	if (htab_use_raw_lock(htab))
 		raw_spin_unlock_irqrestore(&b->raw_lock, flags);
 	else
 		spin_unlock_irqrestore(&b->lock, flags);
-	__this_cpu_dec(*(htab->map_locked[hash]));
+	__this_cpu_dec(*(htab->map_locked[index]));
 	migrate_enable();
 }
 
@@ -710,7 +713,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 	b = __select_bucket(htab, tgt_l->hash);
 	head = &b->head;
 
-	ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags);
+	ret = htab_lock_bucket(htab, b, &flags);
 	if (ret)
 		return false;
 
@@ -720,7 +723,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 			break;
 		}
 
-	htab_unlock_bucket(htab, b, tgt_l->hash, flags);
+	htab_unlock_bucket(htab, b, flags);
 
 	return l == tgt_l;
 }
@@ -1019,7 +1022,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 		 */
 	}
 
-	ret = htab_lock_bucket(htab, b, hash, &flags);
+	ret = htab_lock_bucket(htab, b, &flags);
 	if (ret)
 		return ret;
 
@@ -1062,7 +1065,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	}
 	ret = 0;
 err:
-	htab_unlock_bucket(htab, b, hash, flags);
+	htab_unlock_bucket(htab, b, flags);
 	return ret;
 }
 
@@ -1100,7 +1103,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 		return -ENOMEM;
 	memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
 
-	ret = htab_lock_bucket(htab, b, hash, &flags);
+	ret = htab_lock_bucket(htab, b, &flags);
 	if (ret)
 		return ret;
 
@@ -1121,7 +1124,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 	ret = 0;
 
 err:
-	htab_unlock_bucket(htab, b, hash, flags);
+	htab_unlock_bucket(htab, b, flags);
 
 	if (ret)
 		bpf_lru_push_free(&htab->lru, &l_new->lru_node);
@@ -1156,7 +1159,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
-	ret = htab_lock_bucket(htab, b, hash, &flags);
+	ret = htab_lock_bucket(htab, b, &flags);
 	if (ret)
 		return ret;
 
@@ -1181,7 +1184,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 	}
 	ret = 0;
 err:
-	htab_unlock_bucket(htab, b, hash, flags);
+	htab_unlock_bucket(htab, b, flags);
 	return ret;
 }
 
@@ -1221,7 +1224,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 			return -ENOMEM;
 	}
 
-	ret = htab_lock_bucket(htab, b, hash, &flags);
+	ret = htab_lock_bucket(htab, b, &flags);
 	if (ret)
 		return ret;
 
@@ -1245,7 +1248,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 	}
 	ret = 0;
 err:
-	htab_unlock_bucket(htab, b, hash, flags);
+	htab_unlock_bucket(htab, b, flags);
 	if (l_new)
 		bpf_lru_push_free(&htab->lru, &l_new->lru_node);
 	return ret;
@@ -1283,7 +1286,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
-	ret = htab_lock_bucket(htab, b, hash, &flags);
+	ret = htab_lock_bucket(htab, b, &flags);
 	if (ret)
 		return ret;
 
@@ -1296,7 +1299,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 		ret = -ENOENT;
 	}
 
-	htab_unlock_bucket(htab, b, hash, flags);
+	htab_unlock_bucket(htab, b, flags);
 	return ret;
 }
 
@@ -1318,7 +1321,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
-	ret = htab_lock_bucket(htab, b, hash, &flags);
+	ret = htab_lock_bucket(htab, b, &flags);
 	if (ret)
 		return ret;
 
@@ -1329,7 +1332,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 	else
 		ret = -ENOENT;
 
-	htab_unlock_bucket(htab, b, hash, flags);
+	htab_unlock_bucket(htab, b, flags);
 	if (l)
 		bpf_lru_push_free(&htab->lru, &l->lru_node);
 	return ret;
@@ -1480,7 +1483,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
 	head = &b->head;
 	/* do not grab the lock unless need it (bucket_cnt > 0). */
 	if (locked) {
-		ret = htab_lock_bucket(htab, b, batch, &flags);
+		ret = htab_lock_bucket(htab, b, &flags);
 		if (ret)
 			goto next_batch;
 	}
@@ -1500,7 +1503,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
 		/* Note that since bucket_cnt > 0 here, it is implicit
 		 * that the locked was grabbed, so release it.
 		 */
-		htab_unlock_bucket(htab, b, batch, flags);
+		htab_unlock_bucket(htab, b, flags);
 		rcu_read_unlock();
 		bpf_enable_instrumentation();
 		goto after_loop;
@@ -1511,7 +1514,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
 		/* Note that since bucket_cnt > 0 here, it is implicit
 		 * that the locked was grabbed, so release it.
 		 */
-		htab_unlock_bucket(htab, b, batch, flags);
+		htab_unlock_bucket(htab, b, flags);
 		rcu_read_unlock();
 		bpf_enable_instrumentation();
 		kvfree(keys);
@@ -1564,7 +1567,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
 		dst_val += value_size;
 	}
 
-	htab_unlock_bucket(htab, b, batch, flags);
+	htab_unlock_bucket(htab, b, flags);
 	locked = false;
 
 	while (node_to_free) {
-- 
2.25.1

next prev parent reply	other threads:[~2020-12-11  0:08 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-11  0:06 [Patch bpf-next 0/3] bpf: introduce timeout map Cong Wang
2020-12-11  0:06 ` Cong Wang [this message]
2020-12-11  0:06 ` [Patch bpf-next 2/3] " Cong Wang
2020-12-11  0:06 ` [Patch bpf-next 3/3] tools: add a test case for bpf " Cong Wang
2020-12-11 19:55 ` [Patch bpf-next 0/3] bpf: introduce " Andrii Nakryiko
2020-12-12 22:25   ` Cong Wang
2020-12-12 23:18     ` Cong Wang
2020-12-14  1:33       ` Andrey Ignatov
2020-12-14 18:40         ` Cong Wang

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:7e848200cd2 dfblob:f0b7b54fa3a )
 OR (
bs:"[Patch bpf-next 1/3] bpf: use index instead of hash for map_locked[]" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201211000649.236635-2-xiyou.wangcong@gmail.com \
    --to=xiyou.wangcong@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=cong.wang@bytedance.com \
    --cc=daniel@iogearbox.net \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=wangdongdong.6@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).