From: Mike Snitzer <snitzer@kernel.org>
To: dm-devel@redhat.com
Cc: axboe@kernel.dk, ebiggers@kernel.org, keescook@chromium.org,
heinzm@redhat.com, Mike Snitzer <snitzer@kernel.org>,
nhuck@google.com, linux-block@vger.kernel.org, ejt@redhat.com,
mpatocka@redhat.com, luomeng12@huawei.com
Subject: [dm-devel] [dm-6.4 PATCH v3 05/20] dm bufio: add LRU abstraction
Date: Mon, 27 Mar 2023 16:11:28 -0400 [thread overview]
Message-ID: <20230327201143.51026-6-snitzer@kernel.org> (raw)
In-Reply-To: <20230327201143.51026-1-snitzer@kernel.org>
From: Joe Thornber <ejt@redhat.com>
A CLOCK algorithm is used in this LRU abstraction. This avoids
relinking list nodes, which would require a write lock protecting it.
None of the LRU methods are threadsafe; locking must be done at a
higher level.
Code that uses this new LRU will be introduced in the next 2 commits.
As such, this commit will cause "defined but not used" compiler warnings
that will be resolved by the next 2 commits.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
drivers/md/dm-bufio.c | 235 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 235 insertions(+)
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 64fb5fd39bd9..a0bb0de0d4e7 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -66,6 +66,241 @@
#define LIST_DIRTY 1
#define LIST_SIZE 2
+/*--------------------------------------------------------------*/
+
+/*
+ * Rather than use an LRU list, we use a clock algorithm where entries
+ * are held in a circular list. When an entry is 'hit' a reference bit
+ * is set. The least recently used entry is approximated by running a
+ * cursor around the list selecting unreferenced entries. Referenced
+ * entries have their reference bit cleared as the cursor passes them.
+ */
+struct lru_entry {
+ struct list_head list;
+ atomic_t referenced;
+};
+
+struct lru_iter {
+ struct lru *lru;
+ struct list_head list;
+ struct lru_entry *stop;
+ struct lru_entry *e;
+};
+
+struct lru {
+ struct list_head *cursor;
+ unsigned long count;
+
+ struct list_head iterators;
+};
+
+/*--------------*/
+
+static void lru_init(struct lru *lru)
+{
+ lru->cursor = NULL;
+ lru->count = 0;
+ INIT_LIST_HEAD(&lru->iterators);
+}
+
+static void lru_destroy(struct lru *lru)
+{
+ WARN_ON_ONCE(lru->cursor);
+ WARN_ON_ONCE(!list_empty(&lru->iterators));
+}
+
+/*
+ * Insert a new entry into the lru.
+ */
+static void lru_insert(struct lru *lru, struct lru_entry *le)
+{
+ /*
+ * Don't be tempted to set to 1, makes the lru aspect
+ * perform poorly.
+ */
+ atomic_set(&le->referenced, 0);
+
+ if (lru->cursor) {
+ list_add_tail(&le->list, lru->cursor);
+ } else {
+ INIT_LIST_HEAD(&le->list);
+ lru->cursor = &le->list;
+ }
+ lru->count++;
+}
+
+/*--------------*/
+
+/*
+ * Convert a list_head pointer to an lru_entry pointer.
+ */
+static inline struct lru_entry *to_le(struct list_head *l)
+{
+ return container_of(l, struct lru_entry, list);
+}
+
+/*
+ * Initialize an lru_iter and add it to the list of cursors in the lru.
+ */
+static void lru_iter_begin(struct lru *lru, struct lru_iter *it)
+{
+ it->lru = lru;
+ it->stop = lru->cursor ? to_le(lru->cursor->prev) : NULL;
+ it->e = lru->cursor ? to_le(lru->cursor) : NULL;
+ list_add(&it->list, &lru->iterators);
+}
+
+/*
+ * Remove an lru_iter from the list of cursors in the lru.
+ */
+static inline void lru_iter_end(struct lru_iter *it)
+{
+ list_del(&it->list);
+}
+
+/* Predicate function type to be used with lru_iter_next */
+typedef bool (*iter_predicate)(struct lru_entry *le, void *context);
+
+/*
+ * Advance the cursor to the next entry that passes the
+ * predicate, and return that entry. Returns NULL if the
+ * iteration is complete.
+ */
+static struct lru_entry *lru_iter_next(struct lru_iter *it,
+ iter_predicate pred, void *context)
+{
+ struct lru_entry *e;
+
+ while (it->e) {
+ e = it->e;
+
+ /* advance the cursor */
+ if (it->e == it->stop)
+ it->e = NULL;
+ else
+ it->e = to_le(it->e->list.next);
+
+ if (pred(e, context))
+ return e;
+ }
+
+ return NULL;
+}
+
+/*
+ * Invalidate a specific lru_entry and update all cursors in
+ * the lru accordingly.
+ */
+static void lru_iter_invalidate(struct lru *lru, struct lru_entry *e)
+{
+ struct lru_iter *it;
+
+ list_for_each_entry(it, &lru->iterators, list) {
+ /* Move c->e forwards if necc. */
+ if (it->e == e) {
+ it->e = to_le(it->e->list.next);
+ if (it->e == e)
+ it->e = NULL;
+ }
+
+ /* Move it->stop backwards if necc. */
+ if (it->stop == e) {
+ it->stop = to_le(it->stop->list.prev);
+ if (it->stop == e)
+ it->stop = NULL;
+ }
+ }
+}
+
+/*--------------*/
+
+/*
+ * Remove a specific entry from the lru.
+ */
+static void lru_remove(struct lru *lru, struct lru_entry *le)
+{
+ lru_iter_invalidate(lru, le);
+ if (lru->count == 1) {
+ lru->cursor = NULL;
+ } else {
+ if (lru->cursor == &le->list)
+ lru->cursor = lru->cursor->next;
+ list_del(&le->list);
+ }
+ lru->count--;
+}
+
+/*
+ * Mark as referenced.
+ */
+static inline void lru_reference(struct lru_entry *le)
+{
+ atomic_set(&le->referenced, 1);
+}
+
+/*--------------*/
+
+/*
+ * Remove the least recently used entry (approx), that passes the predicate.
+ * Returns NULL on failure.
+ */
+enum evict_result {
+ ER_EVICT,
+ ER_DONT_EVICT,
+ ER_STOP, /* stop looking for something to evict */
+};
+
+typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
+
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+{
+ unsigned long tested = 0;
+ struct list_head *h = lru->cursor;
+ struct lru_entry *le;
+
+ if (!h)
+ return NULL;
+ /*
+ * In the worst case we have to loop around twice. Once to clear
+ * the reference flags, and then again to discover the predicate
+ * fails for all entries.
+ */
+ while (tested < lru->count) {
+ le = container_of(h, struct lru_entry, list);
+
+ if (atomic_read(&le->referenced)) {
+ atomic_set(&le->referenced, 0);
+ } else {
+ tested++;
+ switch (pred(le, context)) {
+ case ER_EVICT:
+ /*
+ * Adjust the cursor, so we start the next
+ * search from here.
+ */
+ lru->cursor = le->list.next;
+ lru_remove(lru, le);
+ return le;
+
+ case ER_DONT_EVICT:
+ break;
+
+ case ER_STOP:
+ lru->cursor = le->list.next;
+ return NULL;
+ }
+ }
+
+ h = h->next;
+
+ cond_resched();
+ }
+
+ return NULL;
+}
+
+/*--------------------------------------------------------------*/
+
/*
* Linking of buffers:
* All buffers are linked to buffer_tree with their node field.
--
2.40.0
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
WARNING: multiple messages have this Message-ID (diff)
From: Mike Snitzer <snitzer@kernel.org>
To: dm-devel@redhat.com
Cc: linux-block@vger.kernel.org, axboe@kernel.dk, ejt@redhat.com,
mpatocka@redhat.com, heinzm@redhat.com, nhuck@google.com,
ebiggers@kernel.org, keescook@chromium.org, luomeng12@huawei.com,
Mike Snitzer <snitzer@kernel.org>
Subject: [dm-6.4 PATCH v3 05/20] dm bufio: add LRU abstraction
Date: Mon, 27 Mar 2023 16:11:28 -0400 [thread overview]
Message-ID: <20230327201143.51026-6-snitzer@kernel.org> (raw)
In-Reply-To: <20230327201143.51026-1-snitzer@kernel.org>
From: Joe Thornber <ejt@redhat.com>
A CLOCK algorithm is used in this LRU abstraction. This avoids
relinking list nodes, which would require a write lock protecting it.
None of the LRU methods are threadsafe; locking must be done at a
higher level.
Code that uses this new LRU will be introduced in the next 2 commits.
As such, this commit will cause "defined but not used" compiler warnings
that will be resolved by the next 2 commits.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
drivers/md/dm-bufio.c | 235 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 235 insertions(+)
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 64fb5fd39bd9..a0bb0de0d4e7 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -66,6 +66,241 @@
#define LIST_DIRTY 1
#define LIST_SIZE 2
+/*--------------------------------------------------------------*/
+
+/*
+ * Rather than use an LRU list, we use a clock algorithm where entries
+ * are held in a circular list. When an entry is 'hit' a reference bit
+ * is set. The least recently used entry is approximated by running a
+ * cursor around the list selecting unreferenced entries. Referenced
+ * entries have their reference bit cleared as the cursor passes them.
+ */
+struct lru_entry {
+ struct list_head list;
+ atomic_t referenced;
+};
+
+struct lru_iter {
+ struct lru *lru;
+ struct list_head list;
+ struct lru_entry *stop;
+ struct lru_entry *e;
+};
+
+struct lru {
+ struct list_head *cursor;
+ unsigned long count;
+
+ struct list_head iterators;
+};
+
+/*--------------*/
+
+static void lru_init(struct lru *lru)
+{
+ lru->cursor = NULL;
+ lru->count = 0;
+ INIT_LIST_HEAD(&lru->iterators);
+}
+
+static void lru_destroy(struct lru *lru)
+{
+ WARN_ON_ONCE(lru->cursor);
+ WARN_ON_ONCE(!list_empty(&lru->iterators));
+}
+
+/*
+ * Insert a new entry into the lru.
+ */
+static void lru_insert(struct lru *lru, struct lru_entry *le)
+{
+ /*
+ * Don't be tempted to set to 1, makes the lru aspect
+ * perform poorly.
+ */
+ atomic_set(&le->referenced, 0);
+
+ if (lru->cursor) {
+ list_add_tail(&le->list, lru->cursor);
+ } else {
+ INIT_LIST_HEAD(&le->list);
+ lru->cursor = &le->list;
+ }
+ lru->count++;
+}
+
+/*--------------*/
+
+/*
+ * Convert a list_head pointer to an lru_entry pointer.
+ */
+static inline struct lru_entry *to_le(struct list_head *l)
+{
+ return container_of(l, struct lru_entry, list);
+}
+
+/*
+ * Initialize an lru_iter and add it to the list of cursors in the lru.
+ */
+static void lru_iter_begin(struct lru *lru, struct lru_iter *it)
+{
+ it->lru = lru;
+ it->stop = lru->cursor ? to_le(lru->cursor->prev) : NULL;
+ it->e = lru->cursor ? to_le(lru->cursor) : NULL;
+ list_add(&it->list, &lru->iterators);
+}
+
+/*
+ * Remove an lru_iter from the list of cursors in the lru.
+ */
+static inline void lru_iter_end(struct lru_iter *it)
+{
+ list_del(&it->list);
+}
+
+/* Predicate function type to be used with lru_iter_next */
+typedef bool (*iter_predicate)(struct lru_entry *le, void *context);
+
+/*
+ * Advance the cursor to the next entry that passes the
+ * predicate, and return that entry. Returns NULL if the
+ * iteration is complete.
+ */
+static struct lru_entry *lru_iter_next(struct lru_iter *it,
+ iter_predicate pred, void *context)
+{
+ struct lru_entry *e;
+
+ while (it->e) {
+ e = it->e;
+
+ /* advance the cursor */
+ if (it->e == it->stop)
+ it->e = NULL;
+ else
+ it->e = to_le(it->e->list.next);
+
+ if (pred(e, context))
+ return e;
+ }
+
+ return NULL;
+}
+
+/*
+ * Invalidate a specific lru_entry and update all cursors in
+ * the lru accordingly.
+ */
+static void lru_iter_invalidate(struct lru *lru, struct lru_entry *e)
+{
+ struct lru_iter *it;
+
+ list_for_each_entry(it, &lru->iterators, list) {
+ /* Move c->e forwards if necc. */
+ if (it->e == e) {
+ it->e = to_le(it->e->list.next);
+ if (it->e == e)
+ it->e = NULL;
+ }
+
+ /* Move it->stop backwards if necc. */
+ if (it->stop == e) {
+ it->stop = to_le(it->stop->list.prev);
+ if (it->stop == e)
+ it->stop = NULL;
+ }
+ }
+}
+
+/*--------------*/
+
+/*
+ * Remove a specific entry from the lru.
+ */
+static void lru_remove(struct lru *lru, struct lru_entry *le)
+{
+ lru_iter_invalidate(lru, le);
+ if (lru->count == 1) {
+ lru->cursor = NULL;
+ } else {
+ if (lru->cursor == &le->list)
+ lru->cursor = lru->cursor->next;
+ list_del(&le->list);
+ }
+ lru->count--;
+}
+
+/*
+ * Mark as referenced.
+ */
+static inline void lru_reference(struct lru_entry *le)
+{
+ atomic_set(&le->referenced, 1);
+}
+
+/*--------------*/
+
+/*
+ * Remove the least recently used entry (approx), that passes the predicate.
+ * Returns NULL on failure.
+ */
+enum evict_result {
+ ER_EVICT,
+ ER_DONT_EVICT,
+ ER_STOP, /* stop looking for something to evict */
+};
+
+typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
+
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+{
+ unsigned long tested = 0;
+ struct list_head *h = lru->cursor;
+ struct lru_entry *le;
+
+ if (!h)
+ return NULL;
+ /*
+ * In the worst case we have to loop around twice. Once to clear
+ * the reference flags, and then again to discover the predicate
+ * fails for all entries.
+ */
+ while (tested < lru->count) {
+ le = container_of(h, struct lru_entry, list);
+
+ if (atomic_read(&le->referenced)) {
+ atomic_set(&le->referenced, 0);
+ } else {
+ tested++;
+ switch (pred(le, context)) {
+ case ER_EVICT:
+ /*
+ * Adjust the cursor, so we start the next
+ * search from here.
+ */
+ lru->cursor = le->list.next;
+ lru_remove(lru, le);
+ return le;
+
+ case ER_DONT_EVICT:
+ break;
+
+ case ER_STOP:
+ lru->cursor = le->list.next;
+ return NULL;
+ }
+ }
+
+ h = h->next;
+
+ cond_resched();
+ }
+
+ return NULL;
+}
+
+/*--------------------------------------------------------------*/
+
/*
* Linking of buffers:
* All buffers are linked to buffer_tree with their node field.
--
2.40.0
next prev parent reply other threads:[~2023-03-27 20:13 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-27 20:11 [dm-devel] [dm-6.4 PATCH v3 00/20] dm bufio, thin: improve concurrent IO performance Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 01/20] dm bufio: remove unused dm_bufio_release_move interface Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 02/20] dm bufio: use WARN_ON in dm_bufio_client_destroy and dm_bufio_exit Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 03/20] dm bufio: never crash if dm_bufio_in_request() Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 04/20] dm bufio: don't bug for clear developer oversight Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer [this message]
2023-03-27 20:11 ` [dm-6.4 PATCH v3 05/20] dm bufio: add LRU abstraction Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 06/20] dm bufio: add dm_buffer_cache abstraction Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 07/20] dm bufio: improve concurrent IO performance Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 08/20] dm bufio: add lock_history optimization for cache iterators Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 09/20] dm bufio: move dm_bufio_client members to avoid spanning cachelines Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 10/20] dm bufio: use waitqueue_active in __free_buffer_wake Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 11/20] dm bufio: use multi-page bio vector Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 12/20] dm thin: speed up cell_defer_no_holder() Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 13/20] dm: split discards further if target sets max_discard_granularity Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 14/20] dm bio prison v1: improve concurrent IO performance Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 15/20] dm bio prison v1: add dm_cell_key_has_valid_range Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 16/20] dm: add dm_num_sharded_locks() Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 17/20] dm bufio: prepare to intelligently size dm_buffer_cache's buffer_trees Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 18/20] dm bufio: " Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 19/20] dm bio prison v1: prepare to intelligently size dm_bio_prison's prison_regions Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
2023-03-27 20:11 ` [dm-devel] [dm-6.4 PATCH v3 20/20] dm bio prison v1: " Mike Snitzer
2023-03-27 20:11 ` Mike Snitzer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230327201143.51026-6-snitzer@kernel.org \
--to=snitzer@kernel.org \
--cc=axboe@kernel.dk \
--cc=dm-devel@redhat.com \
--cc=ebiggers@kernel.org \
--cc=ejt@redhat.com \
--cc=heinzm@redhat.com \
--cc=keescook@chromium.org \
--cc=linux-block@vger.kernel.org \
--cc=luomeng12@huawei.com \
--cc=mpatocka@redhat.com \
--cc=nhuck@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.