Linux block layer
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@suse.de>
To: Ming Lei <ming.lei@redhat.com>, Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	linux-scsi@vger.kernel.org
Cc: Omar Sandoval <osandov@fb.com>,
	Kashyap Desai <kashyap.desai@broadcom.com>,
	Sumanesh Samanta <sumanesh.samanta@broadcom.com>,
	"Ewan D . Milne" <emilne@redhat.com>
Subject: Re: [PATCH V3 for 5.11 04/12] sbitmap: move allocation hint into sbitmap
Date: Wed, 23 Sep 2020 08:36:42 +0200	[thread overview]
Message-ID: <351a36ab-a26c-6042-818d-28e167eb74e0@suse.de> (raw)
In-Reply-To: <20200923013339.1621784-5-ming.lei@redhat.com>

On 9/23/20 3:33 AM, Ming Lei wrote:
> Allocation hint should have belonged to sbitmap, also when sbitmap's
> depth is high and no need to use mulitple wakeup queues, user can
> benefit from percpu allocation hint too.
> 
> So move allocation hint into sbitmap.
> 
> Cc: Omar Sandoval <osandov@fb.com>
> Cc: Kashyap Desai <kashyap.desai@broadcom.com>
> Cc: Sumanesh Samanta <sumanesh.samanta@broadcom.com>
> Cc: Ewan D. Milne <emilne@redhat.com>
> Cc: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>   block/blk-mq.c          |   2 +-
>   block/kyber-iosched.c   |   2 +-
>   include/linux/sbitmap.h |  41 ++++++++------
>   lib/sbitmap.c           | 115 ++++++++++++++++++++++++----------------
>   4 files changed, 97 insertions(+), 63 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 45149970b891..88154cea83d8 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2684,7 +2684,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
>   		goto free_cpumask;
>   
>   	if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
> -				gfp, node, false))
> +				gfp, node, false, false))
>   		goto free_ctxs;
>   	hctx->nr_ctx = 0;
>   
> diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
> index cc8bcfe1d587..3949d68ac4c1 100644
> --- a/block/kyber-iosched.c
> +++ b/block/kyber-iosched.c
> @@ -480,7 +480,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
>   	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
>   		if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
>   				      ilog2(8), GFP_KERNEL, hctx->numa_node,
> -				      false)) {
> +				      false, false)) {
>   			while (--i >= 0)
>   				sbitmap_free(&khd->kcq_map[i]);
>   			goto err_kcqs;
> diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
> index 68097b052ec3..103b41c03311 100644
> --- a/include/linux/sbitmap.h
> +++ b/include/linux/sbitmap.h
> @@ -70,6 +70,14 @@ struct sbitmap {
>   	 * @map: Allocated bitmap.
>   	 */
>   	struct sbitmap_word *map;
> +
> +	/*
> +	 * @alloc_hint: Cache of last successfully allocated or freed bit.
> +	 *
> +	 * This is per-cpu, which allows multiple users to stick to different
> +	 * cachelines until the map is exhausted.
> +	 */
> +	unsigned int __percpu *alloc_hint;
>   };
>   
>   #define SBQ_WAIT_QUEUES 8
> @@ -105,14 +113,6 @@ struct sbitmap_queue {
>   	 */
>   	struct sbitmap sb;
>   
> -	/*
> -	 * @alloc_hint: Cache of last successfully allocated or freed bit.
> -	 *
> -	 * This is per-cpu, which allows multiple users to stick to different
> -	 * cachelines until the map is exhausted.
> -	 */
> -	unsigned int __percpu *alloc_hint;
> -
>   	/**
>   	 * @wake_batch: Number of bits which must be freed before we wake up any
>   	 * waiters.
> @@ -152,11 +152,13 @@ struct sbitmap_queue {
>    * @round_robin: If true, be stricter about allocation order; always allocate
>    *               starting from the last allocated bit. This is less efficient
>    *               than the default behavior (false).
> + * @alloc_hint: If true, apply percpu hint for where to start searching for
> + * 		a free bit.
>    *
>    * Return: Zero on success or negative errno on failure.
>    */
>   int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
> -		      gfp_t flags, int node, bool round_robin);
> +		      gfp_t flags, int node, bool round_robin, bool alloc_hint);
>   
>   /**
>    * sbitmap_free() - Free memory used by a &struct sbitmap.
> @@ -164,6 +166,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
>    */
>   static inline void sbitmap_free(struct sbitmap *sb)
>   {
> +	free_percpu(sb->alloc_hint);
>   	kfree(sb->map);
>   	sb->map = NULL;
>   }
> @@ -181,19 +184,17 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth);
>   /**
>    * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap.
>    * @sb: Bitmap to allocate from.
> - * @alloc_hint: Hint for where to start searching for a free bit.
>    *
>    * This operation provides acquire barrier semantics if it succeeds.
>    *
>    * Return: Non-negative allocated bit number if successful, -1 otherwise.
>    */
> -int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint);
> +int sbitmap_get(struct sbitmap *sb);
>   
>   /**
>    * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
>    * limiting the depth used from each word.
>    * @sb: Bitmap to allocate from.
> - * @alloc_hint: Hint for where to start searching for a free bit.
>    * @shallow_depth: The maximum number of bits to allocate from a single word.
>    *
>    * This rather specific operation allows for having multiple users with
> @@ -205,8 +206,7 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint);
>    *
>    * Return: Non-negative allocated bit number if successful, -1 otherwise.
>    */
> -int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
> -			unsigned long shallow_depth);
> +int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth);
>   
>   /**
>    * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap.
> @@ -320,6 +320,18 @@ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int b
>   	set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
>   }
>   
> +/*
> + * Pair of sbitmap_get, and this one applies both cleared bit and
> + * allocation hint.
> + */
> +static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr)
> +{
> +	sbitmap_deferred_clear_bit(sb, bitnr);
> +
> +	if (likely(sb->alloc_hint && !sb->round_robin && bitnr < sb->depth))
> +                *this_cpu_ptr(sb->alloc_hint) = bitnr;
> +}
> +
>   static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
>   {
>   	return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
> @@ -368,7 +380,6 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
>   static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
>   {
>   	kfree(sbq->ws);
> -	free_percpu(sbq->alloc_hint);
>   	sbitmap_free(&sbq->sb);
>   }
>   
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 4e4423414f4d..16f59e99143e 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -9,52 +9,51 @@
>   #include <linux/sbitmap.h>
>   #include <linux/seq_file.h>
>   
> -static int init_alloc_hint(struct sbitmap_queue *sbq, gfp_t flags)
> +static int init_alloc_hint(struct sbitmap *sb, gfp_t flags)
>   {
> -	unsigned depth = sbq->sb.depth;
> +	unsigned depth = sb->depth;
>   
> -	sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
> -	if (!sbq->alloc_hint)
> +	sb->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
> +	if (!sb->alloc_hint)
>   		return -ENOMEM;
>   
> -	if (depth && !sbq->sb.round_robin) {
> +	if (depth && !sb->round_robin) {
>   		int i;
>   
>   		for_each_possible_cpu(i)
> -			*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
> +			*per_cpu_ptr(sb->alloc_hint, i) = prandom_u32() % depth;
>   	}
> -
>   	return 0;
>   }
>   
> -static inline unsigned update_alloc_hint_before_get(struct sbitmap_queue *sbq,
> +static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb,
>   						    unsigned int depth)
>   {
>   	unsigned hint;
>   
> -	hint = this_cpu_read(*sbq->alloc_hint);
> +	hint = this_cpu_read(*sb->alloc_hint);
>   	if (unlikely(hint >= depth)) {
>   		hint = depth ? prandom_u32() % depth : 0;
> -		this_cpu_write(*sbq->alloc_hint, hint);
> +		this_cpu_write(*sb->alloc_hint, hint);
>   	}
>   
>   	return hint;
>   }
>   
> -static inline void update_alloc_hint_after_get(struct sbitmap_queue *sbq,
> +static inline void update_alloc_hint_after_get(struct sbitmap *sb,
>   					       unsigned int depth,
>   					       unsigned int hint,
>   					       unsigned int nr)
>   {
>   	if (nr == -1) {
>   		/* If the map is full, a hint won't do us much good. */
> -		this_cpu_write(*sbq->alloc_hint, 0);
> -	} else if (nr == hint || unlikely(sbq->sb.round_robin)) {
> +		this_cpu_write(*sb->alloc_hint, 0);
> +	} else if (nr == hint || unlikely(sb->round_robin)) {
>   		/* Only update the hint if we used it. */
>   		hint = nr + 1;
>   		if (hint >= depth - 1)
>   			hint = 0;
> -		this_cpu_write(*sbq->alloc_hint, hint);
> +		this_cpu_write(*sb->alloc_hint, hint);
>   	}
>   }
>   
> @@ -91,7 +90,8 @@ static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
>   }
>   
>   int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
> -		      gfp_t flags, int node, bool round_robin)
> +		      gfp_t flags, int node, bool round_robin,
> +		      bool alloc_hint)
>   {
>   	unsigned int bits_per_word;
>   	unsigned int i;
> @@ -123,9 +123,18 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
>   		return 0;
>   	}
>   
> +	if (alloc_hint) {
> +		if (init_alloc_hint(sb, flags))
> +			return -ENOMEM;
> +	} else {
> +		sb->alloc_hint = NULL;
> +	}
> +
>   	sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node);
> -	if (!sb->map)
> +	if (!sb->map) {
> +		free_percpu(sb->alloc_hint);
>   		return -ENOMEM;
> +	}
>   
>   	for (i = 0; i < sb->map_nr; i++) {
>   		sb->map[i].depth = min(depth, bits_per_word);
> @@ -204,7 +213,7 @@ static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
>   	return nr;
>   }
>   
> -int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint)
> +static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint)
>   {
>   	unsigned int i, index;
>   	int nr = -1;
> @@ -236,10 +245,29 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint)
>   
>   	return nr;
>   }
> +
> +int sbitmap_get(struct sbitmap *sb)
> +{
> +	int nr;
> +
> +	if (likely(sb->alloc_hint)) {
> +		unsigned int hint, depth;
> +
> +		depth = READ_ONCE(sb->depth);
> +		hint = update_alloc_hint_before_get(sb, depth);
> +		nr = __sbitmap_get(sb, hint);
> +		update_alloc_hint_after_get(sb, depth, hint, nr);
> +	} else {
> +		nr = __sbitmap_get(sb, 0);
> +	}
> +
> +	return nr;
> +}
>   EXPORT_SYMBOL_GPL(sbitmap_get);
>   
Can't you move the 'alloc_hint' test into update_alloc_hint_before_get()?
That way we can simplify the code and would avoid the 'if' clause here.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

  reply	other threads:[~2020-09-23  6:36 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-23  1:33 [PATCH V3 for 5.11 00/12] blk-mq/scsi: tracking device queue depth via sbitmap Ming Lei
2020-09-23  1:33 ` [PATCH V3 for 5.11 01/12] sbitmap: remove sbitmap_clear_bit_unlock Ming Lei
2020-09-23  7:59   ` Johannes Thumshirn
2020-09-23  1:33 ` [PATCH V3 for 5.11 02/12] sbitmap: maintain allocation round_robin in sbitmap Ming Lei
2020-09-23  1:33 ` [PATCH V3 for 5.11 03/12] sbitmap: add helpers for updating allocation hint Ming Lei
2020-09-23  1:33 ` [PATCH V3 for 5.11 04/12] sbitmap: move allocation hint into sbitmap Ming Lei
2020-09-23  6:36   ` Hannes Reinecke [this message]
2020-11-10  4:20     ` Ming Lei
2020-09-23  1:33 ` [PATCH V3 for 5.11 05/12] sbitmap: export sbitmap_weight Ming Lei
2020-09-23  6:37   ` Hannes Reinecke
2020-09-23  1:33 ` [PATCH V3 for 5.11 06/12] sbitmap: add helper of sbitmap_calculate_shift Ming Lei
2020-09-23  6:38   ` Hannes Reinecke
2020-09-23  1:33 ` [PATCH V3 for 5.11 07/12] blk-mq: add callbacks for storing & retrieving budget token Ming Lei
2020-09-23  6:39   ` Hannes Reinecke
2020-09-23  1:33 ` [PATCH V3 for 5.11 08/12] blk-mq: return budget token from .get_budget callback Ming Lei
2020-09-23  6:45   ` Hannes Reinecke
2020-09-23  1:33 ` [PATCH V3 for 5.11 09/12] scsi: put hot fields of scsi_host_template into one cacheline Ming Lei
2020-09-23  6:46   ` Hannes Reinecke
2020-09-23  1:33 ` [PATCH V3 for 5.11 10/12] scsi: add scsi_device_busy() to read sdev->device_busy Ming Lei
2020-09-23  6:47   ` Hannes Reinecke
2020-09-23  1:33 ` [PATCH V3 for 5.11 11/12] scsi: make sure sdev->queue_depth is <= shost->can_queue Ming Lei
2020-09-23  6:47   ` Hannes Reinecke
2020-09-23  7:38   ` John Garry
2020-11-10  9:28     ` Ming Lei
2020-09-23  1:33 ` [PATCH V3 for 5.11 12/12] scsi: replace sdev->device_busy with sbitmap Ming Lei
2020-09-23  6:50   ` Hannes Reinecke
2020-09-23 21:31 ` [PATCH V3 for 5.11 00/12] blk-mq/scsi: tracking device queue depth via sbitmap Sumanesh Samanta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=351a36ab-a26c-6042-818d-28e167eb74e0@suse.de \
    --to=hare@suse.de \
    --cc=axboe@kernel.dk \
    --cc=emilne@redhat.com \
    --cc=kashyap.desai@broadcom.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=ming.lei@redhat.com \
    --cc=osandov@fb.com \
    --cc=sumanesh.samanta@broadcom.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox