Re: [dm-devel] [dm-6.4 PATCH v2 3/9] dm bufio: improve concurrent IO performance

From: Jens Axboe <axboe@kernel.dk>
To: Mike Snitzer <snitzer@kernel.org>, dm-devel@redhat.com
Cc: ebiggers@kernel.org, keescook@chromium.org, heinzm@redhat.com,
	nhuck@google.com, linux-block@vger.kernel.org, ejt@redhat.com,
	mpatocka@redhat.com, luomeng12@huawei.com
Subject: Re: [dm-devel] [dm-6.4 PATCH v2 3/9] dm bufio: improve concurrent IO performance
Date: Fri, 24 Mar 2023 13:34:01 -0600	[thread overview]
Message-ID: <a1b8ceb8-0a67-86a1-2222-1625f6ebbe33@kernel.dk> (raw)
In-Reply-To: <20230324175656.85082-4-snitzer@kernel.org>

Just some random drive-by comments.

> diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
> index 1de1bdcda1ce..a58f8ac3ba75 100644
> --- a/drivers/md/dm-bufio.c
> +++ b/drivers/md/dm-bufio.c
> +static void lru_destroy(struct lru *lru)
> +{
> +	BUG_ON(lru->cursor);
> +	BUG_ON(!list_empty(&lru->iterators));
> +}

Ehm no, WARN_ON_ONCE() for these presumably.

> +/*
> + * Insert a new entry into the lru.
> + */
> +static void lru_insert(struct lru *lru, struct lru_entry *le)
> +{
> +	/*
> +	 * Don't be tempted to set to 1, makes the lru aspect
> +	 * perform poorly.
> +	 */
> +	atomic_set(&le->referenced, 0);
> +
> +	if (lru->cursor)
> +		list_add_tail(&le->list, lru->cursor);
> +
> +	else {
> +		INIT_LIST_HEAD(&le->list);
> +		lru->cursor = &le->list;
> +	}

Extra empty line, and missing braces on the first line.

> +static inline struct lru_entry *to_le(struct list_head *l)
> +{
> +	return container_of(l, struct lru_entry, list);
> +}

Useless helper.

> +/*
> + * Remove an lru_iter from the list of cursors in the lru.
> + */
> +static void lru_iter_end(struct lru_iter *it)
> +{
> +	list_del(&it->list);
> +}

Ditto

> +/*
> + * Remove a specific entry from the lru.
> + */
> +static void lru_remove(struct lru *lru, struct lru_entry *le)
> +{
> +	lru_iter_invalidate(lru, le);
> +	if (lru->count == 1)
> +		lru->cursor = NULL;
> +	else {
> +		if (lru->cursor == &le->list)
> +			lru->cursor = lru->cursor->next;
> +		list_del(&le->list);
> +	}
> +	lru->count--;
> +}

Style again, be consistent with braces.

> +static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
> +{
> +	unsigned long tested = 0;
> +	struct list_head *h = lru->cursor;
> +	struct lru_entry *le;
> +
> +	if (!h)
> +		return NULL;
> +	/*
> +	 * In the worst case we have to loop around twice. Once to clear
> +	 * the reference flags, and then again to discover the predicate
> +	 * fails for all entries.
> +	 */
> +	while (tested < lru->count) {
> +		le = container_of(h, struct lru_entry, list);
> +
> +		if (atomic_read(&le->referenced))
> +			atomic_set(&le->referenced, 0);
> +		else {
> +			tested++;
> +			switch (pred(le, context)) {
> +			case ER_EVICT:
> +				/*
> +				 * Adjust the cursor, so we start the next
> +				 * search from here.
> +				 */
> +				lru->cursor = le->list.next;
> +				lru_remove(lru, le);
> +				return le;
> +
> +			case ER_DONT_EVICT:
> +				break;
> +
> +			case ER_STOP:
> +				lru->cursor = le->list.next;
> +				return NULL;
> +			}
> +		}

Again bad bracing.

> @@ -116,9 +366,579 @@ struct dm_buffer {
>  #endif
>  };
>  
> +/*--------------------------------------------------------------*/
> +
> +/*
> + * The buffer cache manages buffers, particularly:
> + *  - inc/dec of holder count
> + *  - setting the last_accessed field
> + *  - maintains clean/dirty state along with lru
> + *  - selecting buffers that match predicates
> + *
> + * It does *not* handle:
> + *  - allocation/freeing of buffers.
> + *  - IO
> + *  - Eviction or cache sizing.
> + *
> + * cache_get() and cache_put() are threadsafe, you do not need to
> + * protect these calls with a surrounding mutex.  All the other
> + * methods are not threadsafe; they do use locking primitives, but
> + * only enough to ensure get/put are threadsafe.
> + */
> +
> +#define NR_LOCKS 64
> +#define LOCKS_MASK (NR_LOCKS - 1)
> +
> +struct tree_lock {
> +	struct rw_semaphore lock;
> +} ____cacheline_aligned_in_smp;
> +
> +struct dm_buffer_cache {
> +	/*
> +	 * We spread entries across multiple trees to reduce contention
> +	 * on the locks.
> +	 */
> +	struct tree_lock locks[NR_LOCKS];
> +	struct rb_root roots[NR_LOCKS];
> +	struct lru lru[LIST_SIZE];
> +};

This:

struct foo_tree {
	struct rw_semaphore lock;
	struct rb_root root;
	struct lru lru;
} ____cacheline_aligned_in_smp;

would be a lot better.

And where does this NR_LOCKS come from? Don't make up magic values out
of thin air. Should this be per-cpu? per-node? N per node? I'll bet you
that 64 is way too much for most use cases, and too little for others.

> +static bool cache_insert(struct dm_buffer_cache *bc, struct dm_buffer *b)
> +{
> +	bool r;
> +
> +	BUG_ON(b->list_mode >= LIST_SIZE);
> +
> +	cache_write_lock(bc, b->block);
> +	BUG_ON(atomic_read(&b->hold_count) != 1);
> +	r = __cache_insert(&bc->roots[cache_index(b->block)], b);
> +	if (r)
> +		lru_insert(&bc->lru[b->list_mode], &b->lru);
> +	cache_write_unlock(bc, b->block);
> +
> +	return r;
> +}

Again, not BUG_ON's.

> +/*
> + * Removes buffer from cache, ownership of the buffer passes back to the caller.
> + * Fails if the hold_count is not one (ie. the caller holds the only reference).
> + *
> + * Not threadsafe.
> + */
> +static bool cache_remove(struct dm_buffer_cache *bc, struct dm_buffer *b)
> +{
> +	bool r;
> +
> +	cache_write_lock(bc, b->block);
> +
> +	if (atomic_read(&b->hold_count) != 1)
> +		r = false;
> +
> +	else {
> +		r = true;
> +		rb_erase(&b->node, &bc->roots[cache_index(b->block)]);
> +		lru_remove(&bc->lru[b->list_mode], &b->lru);
> +	}
> +
> +	cache_write_unlock(bc, b->block);
> +
> +	return r;
> +}

Braces again.

> +static struct dm_buffer *__find_next(struct rb_root *root, sector_t block)
> +{
> +	struct rb_node *n = root->rb_node;
> +	struct dm_buffer *b;
> +	struct dm_buffer *best = NULL;
> +
> +	while (n) {
> +		b = container_of(n, struct dm_buffer, node);
> +
> +		if (b->block == block)
> +			return b;
> +
> +		if (block <= b->block) {
> +			n = n->rb_left;
> +			best = b;
> +		} else
> +			n = n->rb_right;
> +	}

And again.

> @@ -1141,7 +1904,6 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
>  	}
>  
>  	*bp = b;
> -
>  	return b->data;
>  }
>  

Unrelated change. There are a bunch of these.

I stopped reading here, the patch is just too long. Surely this could be
split up?

 1 file changed, 1292 insertions(+), 477 deletions(-)

That's not a patch, that's a patch series.

-- 
Jens Axboe

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel