public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
From: Omar Sandoval <osandov@osandov.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Subject: Re: [PATCH 1/2] sbitmap: ammortize cost of clearing bits
Date: Fri, 30 Nov 2018 12:03:08 -0800	[thread overview]
Message-ID: <20181130200308.GB27411@vader> (raw)
In-Reply-To: <20181130160118.24683-2-axboe@kernel.dk>

On Fri, Nov 30, 2018 at 09:01:17AM -0700, Jens Axboe wrote:
> sbitmap maintains a set of words that we use to set and clear bits, with
> each bit representing a tag for blk-mq. Even though we spread the bits
> out and maintain a hint cache, one particular bit allocated will end up
> being cleared in the exact same spot.
> 
> This introduces batched clearing of bits. Instead of clearing a given
> bit, the same bit is set in a cleared/free mask instead. If we fail
> allocating a bit from a given word, then we check the free mask, and
> batch move those cleared bits at that time. This trades 64 atomic bitops
> for 2 cmpxchg().
> 
> In a threaded poll test case, half the overhead of getting and clearing
> tags is removed with this change. On another poll test case with a
> single thread, performance is unchanged.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  include/linux/sbitmap.h | 31 +++++++++++++---
>  lib/sbitmap.c           | 80 +++++++++++++++++++++++++++++++++++++----
>  2 files changed, 100 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
> index 804a50983ec5..07f117ee19dc 100644
> --- a/include/linux/sbitmap.h
> +++ b/include/linux/sbitmap.h
> @@ -30,14 +30,24 @@ struct seq_file;
>   */
>  struct sbitmap_word {
>  	/**
> -	 * @word: The bitmap word itself.
> +	 * @depth: Number of bits being used in @word/@cleared
>  	 */
> -	unsigned long word;
> +	unsigned long depth;
>  
>  	/**
> -	 * @depth: Number of bits being used in @word.
> +	 * @word: word holding free bits
>  	 */
> -	unsigned long depth;
> +	unsigned long word ____cacheline_aligned_in_smp;

Still splitting up word and depth in separate cachelines?

> +	/**
> +	 * @cleared: word holding cleared bits
> +	 */
> +	unsigned long cleared ____cacheline_aligned_in_smp;
> +
> +	/**
> +	 * @swap_lock: Held while swapping word <-> cleared
> +	 */
> +	spinlock_t swap_lock;
>  } ____cacheline_aligned_in_smp;
>  
>  /**
> @@ -310,6 +320,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
>  	clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
>  }
>  
> +/*
> + * This one is special, since it doesn't actually clear the bit, rather it
> + * sets the corresponding bit in the ->cleared mask instead. Paired with
> + * the caller doing sbitmap_batch_clear() if a given index is full, which
> + * will clear the previously freed entries in the corresponding ->word.
> + */
> +static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
> +{
> +	unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;
> +
> +	set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
> +}
> +
>  static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb,
>  					    unsigned int bitnr)
>  {
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 45cab6bbc1c7..f6a9553617bd 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -59,6 +59,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
>  	for (i = 0; i < sb->map_nr; i++) {
>  		sb->map[i].depth = min(depth, bits_per_word);
>  		depth -= sb->map[i].depth;
> +		spin_lock_init(&sb->map[i].swap_lock);
>  	}
>  	return 0;
>  }
> @@ -111,6 +112,57 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
>  	return nr;
>  }
>  
> +/*
> + * See if we have deferred clears that we can batch move
> + */
> +static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
> +{
> +	unsigned long mask, val;
> +	bool ret = false;
> +
> +	spin_lock(&sb->map[index].swap_lock);
> +
> +	if (!sb->map[index].cleared)
> +		goto out_unlock;
> +
> +	/*
> +	 * First get a stable cleared mask, setting the old mask to 0.
> +	 */
> +	do {
> +		mask = sb->map[index].cleared;
> +	} while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask);
> +
> +	/*
> +	 * Now clear the masked bits in our free word
> +	 */
> +	do {
> +		val = sb->map[index].word;
> +	} while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
> +
> +	ret = true;
> +out_unlock:
> +	spin_unlock(&sb->map[index].swap_lock);
> +	return ret;
> +}

Okay, I couldn't find any holes in this one :)

> +static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
> +				     unsigned int alloc_hint, bool round_robin)
> +{
> +	int nr;
> +
> +	do {
> +		nr = __sbitmap_get_word(&sb->map[index].word,
> +					sb->map[index].depth, alloc_hint,
> +					!round_robin);
> +		if (nr != -1)
> +			break;
> +		if (!sbitmap_deferred_clear(sb, index))
> +			break;
> +	} while (1);
> +
> +	return nr;
> +}
> +
>  int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
>  {
>  	unsigned int i, index;
> @@ -129,9 +181,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
>  		alloc_hint = 0;
>  
>  	for (i = 0; i < sb->map_nr; i++) {
> -		nr = __sbitmap_get_word(&sb->map[index].word,
> -					sb->map[index].depth, alloc_hint,
> -					!round_robin);
> +		nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
> +						round_robin);
>  		if (nr != -1) {
>  			nr += index << sb->shift;
>  			break;
> @@ -206,23 +257,37 @@ bool sbitmap_any_bit_clear(const struct sbitmap *sb)
>  }
>  EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
>  
> -unsigned int sbitmap_weight(const struct sbitmap *sb)
> +static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
>  {
>  	unsigned int i, weight = 0;
>  
>  	for (i = 0; i < sb->map_nr; i++) {
>  		const struct sbitmap_word *word = &sb->map[i];
>  
> -		weight += bitmap_weight(&word->word, word->depth);
> +		if (set)
> +			weight += bitmap_weight(&word->word, word->depth);

Should probably do
			weight -= bitmap_weight(&word->cleared, word->depth);

too, right?

> +		else
> +			weight += bitmap_weight(&word->cleared, word->depth);
>  	}
>  	return weight;
>  }
> +
> +unsigned int sbitmap_weight(const struct sbitmap *sb)
> +{
> +	return __sbitmap_weight(sb, true);
> +}
>  EXPORT_SYMBOL_GPL(sbitmap_weight);
>  
> +static unsigned int sbitmap_cleared(const struct sbitmap *sb)
> +{
> +	return __sbitmap_weight(sb, false);
> +}
> +
>  void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
>  {
>  	seq_printf(m, "depth=%u\n", sb->depth);
> -	seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
> +	seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb));
> +	seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb));
>  	seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
>  	seq_printf(m, "map_nr=%u\n", sb->map_nr);
>  }
> @@ -514,7 +579,8 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
>  void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
>  			 unsigned int cpu)
>  {
> -	sbitmap_clear_bit_unlock(&sbq->sb, nr);
> +	sbitmap_deferred_clear_bit(&sbq->sb, nr);
> +
>  	/*
>  	 * Pairs with the memory barrier in set_current_state() to ensure the
>  	 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
> -- 
> 2.17.1
> 

  reply	other threads:[~2018-11-30 20:03 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-30 16:01 [PATCHSET v4] sbitmap optimizations Jens Axboe
2018-11-30 16:01 ` [PATCH 1/2] sbitmap: ammortize cost of clearing bits Jens Axboe
2018-11-30 20:03   ` Omar Sandoval [this message]
2018-11-30 20:10     ` Jens Axboe
2018-11-30 21:41       ` Omar Sandoval
2018-11-30 16:01 ` [PATCH 2/2] sbitmap: optimize wakeup check Jens Axboe
2018-11-30 21:37   ` Omar Sandoval
  -- strict thread matches above, loose matches on Subject: below --
2018-11-30  2:09 [PATCHSET v3] sbitmap optimizations Jens Axboe
2018-11-30  2:09 ` [PATCH 1/2] sbitmap: ammortize cost of clearing bits Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181130200308.GB27411@vader \
    --to=osandov@osandov.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox