* [PATCH 3/6] rhashtable: reset iter when rhashtable_walk_start sees new table
2018-04-18 6:47 [PATCH 0/6] Assorted rhashtable improvements NeilBrown
@ 2018-04-18 6:47 ` NeilBrown
2018-04-18 6:47 ` [PATCH 1/6] rhashtable: remove outdated comments about grow_decision etc NeilBrown
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2018-04-18 6:47 UTC (permalink / raw)
To: Thomas Graf, Herbert Xu; +Cc: netdev, inux-kernel
The documentation claims that when rhashtable_walk_start_check()
detects a resize event, it will rewind back to the beginning
of the table. This is not true. We need to set ->slot and
->skip to be zero for it to be true.
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
---
lib/rhashtable.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 19db8e563c40..28e1be9f681b 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -733,6 +733,8 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
if (!iter->walker.tbl && !iter->end_of_table) {
iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
+ iter->slot = 0;
+ iter->skip = 0;
return -EAGAIN;
}
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 1/6] rhashtable: remove outdated comments about grow_decision etc
2018-04-18 6:47 [PATCH 0/6] Assorted rhashtable improvements NeilBrown
2018-04-18 6:47 ` [PATCH 3/6] rhashtable: reset iter when rhashtable_walk_start sees new table NeilBrown
@ 2018-04-18 6:47 ` NeilBrown
2018-04-18 6:47 ` [PATCH 2/6] rhashtable: remove incorrect comment on r{hl, hash}table_walk_enter() NeilBrown
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2018-04-18 6:47 UTC (permalink / raw)
To: Thomas Graf, Herbert Xu; +Cc: netdev, inux-kernel
grow_decision and shink_decision no longer exist, so remove
the remaining references to them.
Signed-off-by: NeilBrown <neilb@suse.com>
---
include/linux/rhashtable.h | 33 ++++++++++++++-------------------
1 file changed, 14 insertions(+), 19 deletions(-)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 1f8ad121eb43..87d443a5b11d 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -836,9 +836,8 @@ static inline void *__rhashtable_insert_fast(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhashtable_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
@@ -866,9 +865,8 @@ static inline int rhashtable_insert_fast(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhltable_insert_key(
struct rhltable *hlt, const void *key, struct rhlist_head *list,
@@ -890,9 +888,8 @@ static inline int rhltable_insert_key(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhltable_insert(
struct rhltable *hlt, struct rhlist_head *list,
@@ -922,9 +919,8 @@ static inline int rhltable_insert(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhashtable_lookup_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
@@ -981,9 +977,8 @@ static inline void *rhashtable_lookup_get_insert_fast(
*
* Lookups may occur in parallel with hashtable mutations and resizing.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*
* Returns zero on success.
*/
@@ -1134,8 +1129,8 @@ static inline int __rhashtable_remove_fast(
* walk the bucket chain upon removal. The removal operation is thus
* considerable slow if the hash table is not correctly sized.
*
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%.
*
* Returns zero on success, -ENOENT if the entry could not be found.
*/
@@ -1156,8 +1151,8 @@ static inline int rhashtable_remove_fast(
* walk the bucket chain upon removal. The removal operation is thus
* considerable slow if the hash table is not correctly sized.
*
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%
*
* Returns zero on success, -ENOENT if the entry could not be found.
*/
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 2/6] rhashtable: remove incorrect comment on r{hl, hash}table_walk_enter()
2018-04-18 6:47 [PATCH 0/6] Assorted rhashtable improvements NeilBrown
2018-04-18 6:47 ` [PATCH 3/6] rhashtable: reset iter when rhashtable_walk_start sees new table NeilBrown
2018-04-18 6:47 ` [PATCH 1/6] rhashtable: remove outdated comments about grow_decision etc NeilBrown
@ 2018-04-18 6:47 ` NeilBrown
2018-04-18 6:47 ` [PATCH 4/6] rhashtable: improve rhashtable_walk stability when stop/start used NeilBrown
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2018-04-18 6:47 UTC (permalink / raw)
To: Thomas Graf, Herbert Xu; +Cc: netdev, inux-kernel
Neither rhashtable_walk_enter() or rhltable_walk_enter() sleep, so
remove the comments which suggest that they do.
Signed-off-by: NeilBrown <neilb@suse.com>
---
include/linux/rhashtable.h | 3 ---
lib/rhashtable.c | 3 ---
2 files changed, 6 deletions(-)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 87d443a5b11d..b01d88e196c2 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1268,9 +1268,6 @@ static inline int rhashtable_walk_init(struct rhashtable *ht,
* For a completely stable walk you should construct your own data
* structure outside the hash table.
*
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
- *
* You must call rhashtable_walk_exit after this function returns.
*/
static inline void rhltable_walk_enter(struct rhltable *hlt,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 2b2b79974b61..19db8e563c40 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -668,9 +668,6 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
* For a completely stable walk you should construct your own data
* structure outside the hash table.
*
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
- *
* You must call rhashtable_walk_exit after this function returns.
*/
void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 4/6] rhashtable: improve rhashtable_walk stability when stop/start used.
2018-04-18 6:47 [PATCH 0/6] Assorted rhashtable improvements NeilBrown
` (2 preceding siblings ...)
2018-04-18 6:47 ` [PATCH 2/6] rhashtable: remove incorrect comment on r{hl, hash}table_walk_enter() NeilBrown
@ 2018-04-18 6:47 ` NeilBrown
2018-04-18 6:47 ` [PATCH 5/6] rhashtable: further improve stability of rhashtable_walk NeilBrown
2018-04-18 6:47 ` [PATCH 6/6] rhashtable: add rhashtable_walk_prev() NeilBrown
5 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2018-04-18 6:47 UTC (permalink / raw)
To: Thomas Graf, Herbert Xu; +Cc: netdev, inux-kernel
When a walk of an rhashtable is interrupted with rhastable_walk_stop()
and then rhashtable_walk_start(), the location to restart from is based
on a 'skip' count in the current hash chain, and this can be incorrect
if insertions or deletions have happened. This does not happen when
the walk is not stopped and started as iter->p is a placeholder which
is safe to use while holding the RCU read lock.
In rhashtable_walk_start() we can revalidate that 'p' is still in the
same hash chain. If it isn't then the current method is still used.
With this patch, if a rhashtable walker ensures that the current
object remains in the table over a stop/start period (possibly by
elevating the reference count if that is sufficient), it can be sure
that a walk will not miss objects that were in the hashtable for the
whole time of the walk.
rhashtable_walk_start() may not find the object even though it is
still in the hashtable if a rehash has moved it to a new table. In
this case it will (eventually) get -EAGAIN and will need to proceed
through the whole table again to be sure to see everything at least
once.
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
---
lib/rhashtable.c | 44 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 41 insertions(+), 3 deletions(-)
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 28e1be9f681b..16cde54a553b 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -723,6 +723,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
__acquires(RCU)
{
struct rhashtable *ht = iter->ht;
+ bool rhlist = ht->rhlist;
rcu_read_lock();
@@ -731,13 +732,52 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
list_del(&iter->walker.list);
spin_unlock(&ht->lock);
- if (!iter->walker.tbl && !iter->end_of_table) {
+ if (iter->end_of_table)
+ return 0;
+ if (!iter->walker.tbl) {
iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
iter->slot = 0;
iter->skip = 0;
return -EAGAIN;
}
+ if (iter->p && !rhlist) {
+ /*
+ * We need to validate that 'p' is still in the table, and
+ * if so, update 'skip'
+ */
+ struct rhash_head *p;
+ int skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ skip++;
+ if (p == iter->p) {
+ iter->skip = skip;
+ goto found;
+ }
+ }
+ iter->p = NULL;
+ } else if (iter->p && rhlist) {
+ /* Need to validate that 'list' is still in the table, and
+ * if so, update 'skip' and 'p'.
+ */
+ struct rhash_head *p;
+ struct rhlist_head *list;
+ int skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ for (list = container_of(p, struct rhlist_head, rhead);
+ list;
+ list = rcu_dereference(list->next)) {
+ skip++;
+ if (list == iter->list) {
+ iter->p = p;
+ skip = skip;
+ goto found;
+ }
+ }
+ }
+ iter->p = NULL;
+ }
+found:
return 0;
}
EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
@@ -913,8 +953,6 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
iter->walker.tbl = NULL;
spin_unlock(&ht->lock);
- iter->p = NULL;
-
out:
rcu_read_unlock();
}
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 5/6] rhashtable: further improve stability of rhashtable_walk
2018-04-18 6:47 [PATCH 0/6] Assorted rhashtable improvements NeilBrown
` (3 preceding siblings ...)
2018-04-18 6:47 ` [PATCH 4/6] rhashtable: improve rhashtable_walk stability when stop/start used NeilBrown
@ 2018-04-18 6:47 ` NeilBrown
2018-04-18 6:47 ` [PATCH 6/6] rhashtable: add rhashtable_walk_prev() NeilBrown
5 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2018-04-18 6:47 UTC (permalink / raw)
To: Thomas Graf, Herbert Xu; +Cc: netdev, inux-kernel
If the sequence:
obj = rhashtable_walk_next(iter);
rhashtable_walk_stop(iter);
rhashtable_remove_fast(ht, &obj->head, params);
rhashtable_walk_start(iter);
races with another thread inserting or removing
an object on the same hash chain, a subsequent
rhashtable_walk_next() is not guaranteed to get the "next"
object. It is possible that an object could be
repeated, or missed.
This can be made more reliable by keeping the objects in a hash chain
sorted by memory address. A subsequent rhashtable_walk_next()
call can reliably find the correct position in the list, and thus
find the 'next' object.
It is not possible (certainly not so easy) to achieve this with an
rhltable as keeping the hash chain in order is not so easy. When the
first object with a given key is removed, it is replaced in the chain
with the next object with the same key, and the address of that
object may not be correctly ordered.
No current user of rhltable_walk_enter() calls
rhashtable_walk_start() more than once, so no current code
could benefit from a more reliable walk of rhltables.
This patch only attempts to improve walks for rhashtables.
- a new object is always inserted after the last object with a
smaller address, or at the start
- when rhashtable_walk_start() is called, it records that 'p' is not
'safe', meaning that it cannot be dereferenced. The revalidation
that was previously done here is moved to rhashtable_walk_next()
- when rhashtable_walk_next() is called while p is not NULL and not
safe, it walks the chain looking for the first object with an
address greater than p and returns that. If there is none, it moves
to the next hash chain.
Signed-off-by: NeilBrown <neilb@suse.com>
---
include/linux/rhashtable.h | 12 ++++++--
lib/rhashtable.c | 66 +++++++++++++++++++++++++++-----------------
2 files changed, 50 insertions(+), 28 deletions(-)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index b01d88e196c2..5ce6201f246e 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -207,6 +207,7 @@ struct rhashtable_iter {
struct rhashtable_walker walker;
unsigned int slot;
unsigned int skip;
+ bool p_is_unsafe;
bool end_of_table;
};
@@ -730,6 +731,7 @@ static inline void *__rhashtable_insert_fast(
.ht = ht,
.key = key,
};
+ struct rhash_head __rcu **inspos;
struct rhash_head __rcu **pprev;
struct bucket_table *tbl;
struct rhash_head *head;
@@ -757,6 +759,7 @@ static inline void *__rhashtable_insert_fast(
data = ERR_PTR(-ENOMEM);
if (!pprev)
goto out;
+ inspos = pprev;
rht_for_each_continue(head, *pprev, tbl, hash) {
struct rhlist_head *plist;
@@ -768,6 +771,8 @@ static inline void *__rhashtable_insert_fast(
params.obj_cmpfn(&arg, rht_obj(ht, head)) :
rhashtable_compare(&arg, rht_obj(ht, head)))) {
pprev = &head->next;
+ if (head < obj)
+ inspos = &head->next;
continue;
}
@@ -798,7 +803,7 @@ static inline void *__rhashtable_insert_fast(
if (unlikely(rht_grow_above_100(ht, tbl)))
goto slow_path;
- head = rht_dereference_bucket(*pprev, tbl, hash);
+ head = rht_dereference_bucket(*inspos, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (rhlist) {
@@ -808,7 +813,7 @@ static inline void *__rhashtable_insert_fast(
RCU_INIT_POINTER(list->next, NULL);
}
- rcu_assign_pointer(*pprev, obj);
+ rcu_assign_pointer(*inspos, obj);
atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
@@ -1263,7 +1268,8 @@ static inline int rhashtable_walk_init(struct rhashtable *ht,
* Note that if you restart a walk after rhashtable_walk_stop you
* may see the same object twice. Also, you may miss objects if
* there are removals in between rhashtable_walk_stop and the next
- * call to rhashtable_walk_start.
+ * call to rhashtable_walk_start. Note that this is different to
+ * rhashtable_walk_enter() which never leads to missing objects.
*
* For a completely stable walk you should construct your own data
* structure outside the hash table.
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 16cde54a553b..be7eb57d9398 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -566,6 +566,10 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
return ERR_PTR(-ENOMEM);
head = rht_dereference_bucket(*pprev, tbl, hash);
+ while (!rht_is_a_nulls(head) && head < obj) {
+ pprev = &head->next;
+ head = rht_dereference_bucket(*pprev, tbl, hash);
+ }
RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) {
@@ -660,10 +664,10 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
*
* This function prepares a hash table walk.
*
- * Note that if you restart a walk after rhashtable_walk_stop you
- * may see the same object twice. Also, you may miss objects if
- * there are removals in between rhashtable_walk_stop and the next
- * call to rhashtable_walk_start.
+ * A walk is guaranteed to return every object that was in
+ * the table before this call, and is still in the table when
+ * rhashtable_walk_next() returns NULL. Duplicates can be
+ * seen, but only if there is a rehash event during the walk.
*
* For a completely stable walk you should construct your own data
* structure outside the hash table.
@@ -743,19 +747,10 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
if (iter->p && !rhlist) {
/*
- * We need to validate that 'p' is still in the table, and
- * if so, update 'skip'
+ * 'p' will be revalidated when rhashtable_walk_next()
+ * is called.
*/
- struct rhash_head *p;
- int skip = 0;
- rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
- skip++;
- if (p == iter->p) {
- iter->skip = skip;
- goto found;
- }
- }
- iter->p = NULL;
+ iter->p_is_unsafe = true;
} else if (iter->p && rhlist) {
/* Need to validate that 'list' is still in the table, and
* if so, update 'skip' and 'p'.
@@ -872,15 +867,35 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
bool rhlist = ht->rhlist;
if (p) {
- if (!rhlist || !(list = rcu_dereference(list->next))) {
- p = rcu_dereference(p->next);
- list = container_of(p, struct rhlist_head, rhead);
- }
- if (!rht_is_a_nulls(p)) {
- iter->skip++;
- iter->p = p;
- iter->list = list;
- return rht_obj(ht, rhlist ? &list->rhead : p);
+ if (!rhlist && iter->p_is_unsafe) {
+ /*
+ * First time next() was called after start().
+ * Need to find location of 'p' in the list.
+ */
+ struct rhash_head *p;
+
+ iter->skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ iter->skip++;
+ if (p <= iter->p)
+ continue;
+
+ /* p is the next object after iter->p */
+ iter->p = p;
+ iter->p_is_unsafe = false;
+ return rht_obj(ht, p);
+ }
+ } else {
+ if (!rhlist || !(list = rcu_dereference(list->next))) {
+ p = rcu_dereference(p->next);
+ list = container_of(p, struct rhlist_head, rhead);
+ }
+ if (!rht_is_a_nulls(p)) {
+ iter->skip++;
+ iter->p = p;
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
+ }
}
/* At the end of this slot, switch to next one and then find
@@ -890,6 +905,7 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
iter->slot++;
}
+ iter->p_is_unsafe = false;
return __rhashtable_walk_find_next(iter);
}
EXPORT_SYMBOL_GPL(rhashtable_walk_next);
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 6/6] rhashtable: add rhashtable_walk_prev()
2018-04-18 6:47 [PATCH 0/6] Assorted rhashtable improvements NeilBrown
` (4 preceding siblings ...)
2018-04-18 6:47 ` [PATCH 5/6] rhashtable: further improve stability of rhashtable_walk NeilBrown
@ 2018-04-18 6:47 ` NeilBrown
5 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2018-04-18 6:47 UTC (permalink / raw)
To: Thomas Graf, Herbert Xu; +Cc: netdev, inux-kernel
rhashtable_walk_prev() returns the object returned by
the previous rhashtable_walk_next(), providing it is still in the
table (or was during this grace period).
This works even if rhashtable_walk_stop() and rhashtable_talk_start()
have been called since the last rhashtable_walk_next().
If there have been no calls to rhashtable_walk_next(), or if the
object is gone from the table, then NULL is returned.
This can usefully be used in a seq_file ->start() function.
If the pos is the same as was returned by the last ->next() call,
then rhashtable_walk_prev() can be used to re-establish the
current location in the table. If it returns NULL, then
rhashtable_walk_next() should be used.
Signed-off-by: NeilBrown <neilb@suse.com>
---
include/linux/rhashtable.h | 1 +
lib/rhashtable.c | 30 ++++++++++++++++++++++++++++++
2 files changed, 31 insertions(+)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 5ce6201f246e..b1ad2b6a3f3f 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -397,6 +397,7 @@ static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
void *rhashtable_walk_next(struct rhashtable_iter *iter);
void *rhashtable_walk_peek(struct rhashtable_iter *iter);
+void *rhashtable_walk_prev(struct rhashtable_iter *iter);
void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
void rhashtable_free_and_destroy(struct rhashtable *ht,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index be7eb57d9398..d2f941146ea3 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -910,6 +910,36 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
}
EXPORT_SYMBOL_GPL(rhashtable_walk_next);
+/**
+ * rhashtable_walk_prev - Return the previously returned object, if available
+ * @iter: Hash table iterator
+ *
+ * If rhashtable_walk_next() has previously been called and the object
+ * it returned is still in the hash table, that object is returned again,
+ * otherwise %NULL is returned.
+ *
+ * If the recent rhashtable_walk_next() call was since the most recent
+ * rhashtable_walk_start() call then the returned object may not, strictly
+ * speaking, still be in the table. It will be safe to dereference.
+ *
+ * Note that the iterator is not changed and in particular it does not
+ * step backwards.
+ */
+void *rhashtable_walk_prev(struct rhashtable_iter *iter)
+{
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+
+ if (!p)
+ return NULL;
+ if (!iter->p_is_unsafe || ht->rhlist)
+ return p;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot)
+ if (p == iter->p)
+ return p;
+ return NULL;
+}
+
/**
* rhashtable_walk_peek - Return the next object but don't advance the iterator
* @iter: Hash table iterator
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 5/6] rhashtable: further improve stability of rhashtable_walk
2018-04-18 6:47 [PATCH 0/6] Assorted rhashtable improvements. RESEND NeilBrown
@ 2018-04-18 6:47 ` NeilBrown
0 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2018-04-18 6:47 UTC (permalink / raw)
To: Thomas Graf, Herbert Xu; +Cc: netdev, linux-kernel
If the sequence:
obj = rhashtable_walk_next(iter);
rhashtable_walk_stop(iter);
rhashtable_remove_fast(ht, &obj->head, params);
rhashtable_walk_start(iter);
races with another thread inserting or removing
an object on the same hash chain, a subsequent
rhashtable_walk_next() is not guaranteed to get the "next"
object. It is possible that an object could be
repeated, or missed.
This can be made more reliable by keeping the objects in a hash chain
sorted by memory address. A subsequent rhashtable_walk_next()
call can reliably find the correct position in the list, and thus
find the 'next' object.
It is not possible (certainly not so easy) to achieve this with an
rhltable as keeping the hash chain in order is not so easy. When the
first object with a given key is removed, it is replaced in the chain
with the next object with the same key, and the address of that
object may not be correctly ordered.
No current user of rhltable_walk_enter() calls
rhashtable_walk_start() more than once, so no current code
could benefit from a more reliable walk of rhltables.
This patch only attempts to improve walks for rhashtables.
- a new object is always inserted after the last object with a
smaller address, or at the start
- when rhashtable_walk_start() is called, it records that 'p' is not
'safe', meaning that it cannot be dereferenced. The revalidation
that was previously done here is moved to rhashtable_walk_next()
- when rhashtable_walk_next() is called while p is not NULL and not
safe, it walks the chain looking for the first object with an
address greater than p and returns that. If there is none, it moves
to the next hash chain.
Signed-off-by: NeilBrown <neilb@suse.com>
---
include/linux/rhashtable.h | 12 ++++++--
lib/rhashtable.c | 66 +++++++++++++++++++++++++++-----------------
2 files changed, 50 insertions(+), 28 deletions(-)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index b01d88e196c2..5ce6201f246e 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -207,6 +207,7 @@ struct rhashtable_iter {
struct rhashtable_walker walker;
unsigned int slot;
unsigned int skip;
+ bool p_is_unsafe;
bool end_of_table;
};
@@ -730,6 +731,7 @@ static inline void *__rhashtable_insert_fast(
.ht = ht,
.key = key,
};
+ struct rhash_head __rcu **inspos;
struct rhash_head __rcu **pprev;
struct bucket_table *tbl;
struct rhash_head *head;
@@ -757,6 +759,7 @@ static inline void *__rhashtable_insert_fast(
data = ERR_PTR(-ENOMEM);
if (!pprev)
goto out;
+ inspos = pprev;
rht_for_each_continue(head, *pprev, tbl, hash) {
struct rhlist_head *plist;
@@ -768,6 +771,8 @@ static inline void *__rhashtable_insert_fast(
params.obj_cmpfn(&arg, rht_obj(ht, head)) :
rhashtable_compare(&arg, rht_obj(ht, head)))) {
pprev = &head->next;
+ if (head < obj)
+ inspos = &head->next;
continue;
}
@@ -798,7 +803,7 @@ static inline void *__rhashtable_insert_fast(
if (unlikely(rht_grow_above_100(ht, tbl)))
goto slow_path;
- head = rht_dereference_bucket(*pprev, tbl, hash);
+ head = rht_dereference_bucket(*inspos, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (rhlist) {
@@ -808,7 +813,7 @@ static inline void *__rhashtable_insert_fast(
RCU_INIT_POINTER(list->next, NULL);
}
- rcu_assign_pointer(*pprev, obj);
+ rcu_assign_pointer(*inspos, obj);
atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
@@ -1263,7 +1268,8 @@ static inline int rhashtable_walk_init(struct rhashtable *ht,
* Note that if you restart a walk after rhashtable_walk_stop you
* may see the same object twice. Also, you may miss objects if
* there are removals in between rhashtable_walk_stop and the next
- * call to rhashtable_walk_start.
+ * call to rhashtable_walk_start. Note that this is different to
+ * rhashtable_walk_enter() which never leads to missing objects.
*
* For a completely stable walk you should construct your own data
* structure outside the hash table.
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 16cde54a553b..be7eb57d9398 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -566,6 +566,10 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
return ERR_PTR(-ENOMEM);
head = rht_dereference_bucket(*pprev, tbl, hash);
+ while (!rht_is_a_nulls(head) && head < obj) {
+ pprev = &head->next;
+ head = rht_dereference_bucket(*pprev, tbl, hash);
+ }
RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) {
@@ -660,10 +664,10 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
*
* This function prepares a hash table walk.
*
- * Note that if you restart a walk after rhashtable_walk_stop you
- * may see the same object twice. Also, you may miss objects if
- * there are removals in between rhashtable_walk_stop and the next
- * call to rhashtable_walk_start.
+ * A walk is guaranteed to return every object that was in
+ * the table before this call, and is still in the table when
+ * rhashtable_walk_next() returns NULL. Duplicates can be
+ * seen, but only if there is a rehash event during the walk.
*
* For a completely stable walk you should construct your own data
* structure outside the hash table.
@@ -743,19 +747,10 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
if (iter->p && !rhlist) {
/*
- * We need to validate that 'p' is still in the table, and
- * if so, update 'skip'
+ * 'p' will be revalidated when rhashtable_walk_next()
+ * is called.
*/
- struct rhash_head *p;
- int skip = 0;
- rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
- skip++;
- if (p == iter->p) {
- iter->skip = skip;
- goto found;
- }
- }
- iter->p = NULL;
+ iter->p_is_unsafe = true;
} else if (iter->p && rhlist) {
/* Need to validate that 'list' is still in the table, and
* if so, update 'skip' and 'p'.
@@ -872,15 +867,35 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
bool rhlist = ht->rhlist;
if (p) {
- if (!rhlist || !(list = rcu_dereference(list->next))) {
- p = rcu_dereference(p->next);
- list = container_of(p, struct rhlist_head, rhead);
- }
- if (!rht_is_a_nulls(p)) {
- iter->skip++;
- iter->p = p;
- iter->list = list;
- return rht_obj(ht, rhlist ? &list->rhead : p);
+ if (!rhlist && iter->p_is_unsafe) {
+ /*
+ * First time next() was called after start().
+ * Need to find location of 'p' in the list.
+ */
+ struct rhash_head *p;
+
+ iter->skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ iter->skip++;
+ if (p <= iter->p)
+ continue;
+
+ /* p is the next object after iter->p */
+ iter->p = p;
+ iter->p_is_unsafe = false;
+ return rht_obj(ht, p);
+ }
+ } else {
+ if (!rhlist || !(list = rcu_dereference(list->next))) {
+ p = rcu_dereference(p->next);
+ list = container_of(p, struct rhlist_head, rhead);
+ }
+ if (!rht_is_a_nulls(p)) {
+ iter->skip++;
+ iter->p = p;
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
+ }
}
/* At the end of this slot, switch to next one and then find
@@ -890,6 +905,7 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
iter->slot++;
}
+ iter->p_is_unsafe = false;
return __rhashtable_walk_find_next(iter);
}
EXPORT_SYMBOL_GPL(rhashtable_walk_next);
^ permalink raw reply related [flat|nested] 8+ messages in thread