* [PATCHSET v3] sbitmap optimizations
@ 2018-11-30 2:09 Jens Axboe
2018-11-30 2:09 ` [PATCH 1/2] sbitmap: ammortize cost of clearing bits Jens Axboe
2018-11-30 2:09 ` [PATCH 2/2] sbitmap: optimize wakeup check Jens Axboe
0 siblings, 2 replies; 3+ messages in thread
From: Jens Axboe @ 2018-11-30 2:09 UTC (permalink / raw)
To: linux-block, osandov
The v2 posting got screwed up somehow, sending a v3 just to make
sure things are sane.
Changes:
- Dropped the alignment patch, it should not be needed unless we have
debugging enabled of some sort.
- Fumbled the optimized wakeup, it's important we match prep + finish
with that interface.
--
Jens Axboe
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 1/2] sbitmap: ammortize cost of clearing bits
2018-11-30 2:09 [PATCHSET v3] sbitmap optimizations Jens Axboe
@ 2018-11-30 2:09 ` Jens Axboe
2018-11-30 2:09 ` [PATCH 2/2] sbitmap: optimize wakeup check Jens Axboe
1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2018-11-30 2:09 UTC (permalink / raw)
To: linux-block, osandov; +Cc: Jens Axboe
sbitmap maintains a set of words that we use to set and clear bits, with
each bit representing a tag for blk-mq. Even though we spread the bits
out and maintain a hint cache, one particular bit allocated will end up
being cleared in the exact same spot.
This introduces batched clearing of bits. Instead of clearing a given
bit, the same bit is set in a cleared/free mask instead. If we fail
allocating a bit from a given word, then we check the free mask, and
batch move those cleared bits at that time. This trades 64 atomic bitops
for 2 cmpxchg().
In a threaded poll test case, half the overhead of getting and clearing
tags is removed with this change. On another poll test case with a
single thread, performance is unchanged.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
include/linux/sbitmap.h | 26 +++++++++++++++---
lib/sbitmap.c | 60 ++++++++++++++++++++++++++++++++++++++---
2 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 804a50983ec5..cec685b89998 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -30,14 +30,19 @@ struct seq_file;
*/
struct sbitmap_word {
/**
- * @word: The bitmap word itself.
+ * @depth: Number of bits being used in @word/@cleared
*/
- unsigned long word;
+ unsigned long depth;
/**
- * @depth: Number of bits being used in @word.
+ * @word: word holding free bits
*/
- unsigned long depth;
+ unsigned long word ____cacheline_aligned_in_smp;
+
+ /**
+ * @cleared: word holding cleared bits
+ */
+ unsigned long cleared ____cacheline_aligned_in_smp;
} ____cacheline_aligned_in_smp;
/**
@@ -310,6 +315,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
}
+/*
+ * This one is special, since it doesn't actually clear the bit, rather it
+ * sets the corresponding bit in the ->cleared mask instead. Paired with
+ * the caller doing sbitmap_batch_clear() if a given index is full, which
+ * will clear the previously freed entries in the corresponding ->word.
+ */
+static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
+{
+ unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;
+
+ set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
+}
+
static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb,
unsigned int bitnr)
{
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 45cab6bbc1c7..2316f53f3e1d 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -111,6 +111,58 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
return nr;
}
+/*
+ * See if we have deferred clears that we can batch move
+ */
+static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
+{
+ unsigned long mask, val;
+
+ if (!sb->map[index].cleared)
+ return false;
+
+ /*
+ * First get a stable cleared mask, setting the old mask to 0.
+ */
+ do {
+ mask = sb->map[index].cleared;
+ } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask);
+
+ /*
+ * Now clear the masked bits in our free word
+ */
+ do {
+ val = sb->map[index].word;
+ } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
+
+ /*
+ * If someone found ->cleared == 0 before we wrote ->word, then
+ * they could have failed when they should not have. Check for
+ * waiters.
+ */
+ smp_mb__after_atomic();
+ sbitmap_queue_wake_up(container_of(sb, struct sbitmap_queue, sb));
+ return true;
+}
+
+static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
+ unsigned int alloc_hint, bool round_robin)
+{
+ int nr;
+
+ do {
+ nr = __sbitmap_get_word(&sb->map[index].word,
+ sb->map[index].depth, alloc_hint,
+ !round_robin);
+ if (nr != -1)
+ break;
+ if (!sbitmap_deferred_clear(sb, index))
+ break;
+ } while (1);
+
+ return nr;
+}
+
int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
{
unsigned int i, index;
@@ -129,9 +181,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
alloc_hint = 0;
for (i = 0; i < sb->map_nr; i++) {
- nr = __sbitmap_get_word(&sb->map[index].word,
- sb->map[index].depth, alloc_hint,
- !round_robin);
+ nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
+ round_robin);
if (nr != -1) {
nr += index << sb->shift;
break;
@@ -514,7 +565,8 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu)
{
- sbitmap_clear_bit_unlock(&sbq->sb, nr);
+ sbitmap_deferred_clear_bit(&sbq->sb, nr);
+
/*
* Pairs with the memory barrier in set_current_state() to ensure the
* proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
--
2.17.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] sbitmap: optimize wakeup check
2018-11-30 2:09 [PATCHSET v3] sbitmap optimizations Jens Axboe
2018-11-30 2:09 ` [PATCH 1/2] sbitmap: ammortize cost of clearing bits Jens Axboe
@ 2018-11-30 2:09 ` Jens Axboe
1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2018-11-30 2:09 UTC (permalink / raw)
To: linux-block, osandov; +Cc: Jens Axboe
Even if we have no waiters on any of the sbitmap_queue wait states, we
still have to loop every entry to check. We do this for every IO, so
the cost adds up.
Shift a bit of the cost to the slow path, when we actually have waiters.
Wrap prepare_to_wait_exclusive() and finish_wait(), so we can maintain
an internal count of how many are currently active. Then we can simply
check this count in sbq_wake_ptr() and not have to loop if we don't
have any sleepers.
Convert the two users of sbitmap with waiting, blk-mq-tag and iSCSI.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
block/blk-mq-tag.c | 13 ++++++-------
drivers/target/iscsi/iscsi_target_util.c | 9 ++++++---
include/linux/sbitmap.h | 19 +++++++++++++++++++
lib/sbitmap.c | 21 +++++++++++++++++++++
4 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 87bc5df72d48..4704046ce493 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -154,12 +154,13 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (tag != -1)
break;
- prepare_to_wait_exclusive(&ws->wait, &wait,
- TASK_UNINTERRUPTIBLE);
+ sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
tag = __blk_mq_get_tag(data, bt);
- if (tag != -1)
+ if (tag != -1) {
+ sbitmap_finish_wait(bt, ws, &wait);
break;
+ }
if (data->ctx)
blk_mq_put_ctx(data->ctx);
@@ -167,6 +168,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
bt_prev = bt;
io_schedule();
+ sbitmap_finish_wait(bt, ws, &wait);
+
data->ctx = blk_mq_get_ctx(data->q);
data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
data->ctx->cpu);
@@ -176,8 +179,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
else
bt = &tags->bitmap_tags;
- finish_wait(&ws->wait, &wait);
-
/*
* If destination hw queue is changed, fake wake up on
* previous queue for compensating the wake up miss, so
@@ -192,8 +193,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (drop_ctx && data->ctx)
blk_mq_put_ctx(data->ctx);
- finish_wait(&ws->wait, &wait);
-
found_tag:
return tag + tag_offset;
}
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 36b742932c72..37db1f80a219 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -152,22 +152,25 @@ static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup)
int tag = -1;
DEFINE_WAIT(wait);
struct sbq_wait_state *ws;
+ struct sbitmap_queue *sbq;
if (state == TASK_RUNNING)
return tag;
- ws = &se_sess->sess_tag_pool.ws[0];
+ sbq = &se_sess->sess_tag_pool;
+ ws = &sbq->ws[0];
for (;;) {
- prepare_to_wait_exclusive(&ws->wait, &wait, state);
+ sbitmap_prepare_to_wait(sbq, ws, &wait, state);
if (signal_pending_state(state, current))
break;
tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup);
if (tag >= 0)
break;
schedule();
+ sbitmap_finish_wait(sbq, ws, &wait);
}
+ sbitmap_finish_wait(sbq, ws, &wait);
- finish_wait(&ws->wait, &wait);
return tag;
}
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index cec685b89998..a65d5b67a611 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -130,6 +130,11 @@ struct sbitmap_queue {
*/
struct sbq_wait_state *ws;
+ /*
+ * @ws_active: count of currently active ws waitqueues
+ */
+ atomic_t ws_active;
+
/**
* @round_robin: Allocate bits in strict round-robin order.
*/
@@ -549,4 +554,18 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
*/
void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
+/*
+ * Wrapper around prepare_to_wait_exclusive(), which maintains some extra
+ * internal state.
+ */
+void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
+ struct sbq_wait_state *ws,
+ struct wait_queue_entry *wait, int state);
+
+/*
+ * Must be paired with sbitmap_prepare_to_wait().
+ */
+void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
+ struct wait_queue_entry *wait);
+
#endif /* __LINUX_SCALE_BITMAP_H */
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2316f53f3e1d..ba51f538011a 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -381,6 +381,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
sbq->min_shallow_depth = UINT_MAX;
sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
atomic_set(&sbq->wake_index, 0);
+ atomic_set(&sbq->ws_active, 0);
sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
if (!sbq->ws) {
@@ -496,6 +497,9 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
{
int i, wake_index;
+ if (!atomic_read(&sbq->ws_active))
+ return NULL;
+
wake_index = atomic_read(&sbq->wake_index);
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
struct sbq_wait_state *ws = &sbq->ws[wake_index];
@@ -636,3 +640,20 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_show);
+
+void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
+ struct sbq_wait_state *ws,
+ struct wait_queue_entry *wait, int state)
+{
+ atomic_inc(&sbq->ws_active);
+ prepare_to_wait_exclusive(&ws->wait, wait, state);
+}
+EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);
+
+void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
+ struct wait_queue_entry *wait)
+{
+ finish_wait(&ws->wait, wait);
+ atomic_dec(&sbq->ws_active);
+}
+EXPORT_SYMBOL_GPL(sbitmap_finish_wait);
--
2.17.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-11-30 2:09 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-11-30 2:09 [PATCHSET v3] sbitmap optimizations Jens Axboe
2018-11-30 2:09 ` [PATCH 1/2] sbitmap: ammortize cost of clearing bits Jens Axboe
2018-11-30 2:09 ` [PATCH 2/2] sbitmap: optimize wakeup check Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).