From: Jonathan Derrick <jonathan.derrick@linux.dev>
To: Song Liu <song@kernel.org>
Cc: <linux-raid@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
jonathan.derrick@solidigm.com, jonathanx.sk.derrick@intel.com,
Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>,
Jonathan Derrick <jonathan.derrick@linux.dev>
Subject: [PATCH v2 1/3] md/bitmap: Add chunk-threshold unplugging
Date: Thu, 13 Oct 2022 16:41:49 -0600 [thread overview]
Message-ID: <20221013224151.300-2-jonathan.derrick@linux.dev> (raw)
In-Reply-To: <20221013224151.300-1-jonathan.derrick@linux.dev>
Add a mechanism to allow bitmap unplugging and flushing to wait until it
has surpassed a defined threshold of dirty chunks. This allows certain
high I/O write workloads to make good forward progress between bitmap
updates or provide reliable bitmap consistency. The default behavior is
previous behavior of always unplugging when called.
Signed-off-by: Jonathan Derrick <jonathan.derrick@linux.dev>
---
drivers/md/md-bitmap.c | 35 +++++++++++++++++++++++++++++++----
drivers/md/md-bitmap.h | 1 +
drivers/md/md.h | 1 +
3 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index bf6dffadbe6f..c5c77f8371a8 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1004,7 +1004,7 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
/* this gets called when the md device is ready to unplug its underlying
* (slave) device queues -- before we let any writes go down, we need to
* sync the dirty pages of the bitmap file to disk */
-void md_bitmap_unplug(struct bitmap *bitmap)
+static void __md_bitmap_unplug(struct bitmap *bitmap)
{
unsigned long i;
int dirty, need_write;
@@ -1038,6 +1038,33 @@ void md_bitmap_unplug(struct bitmap *bitmap)
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
md_bitmap_file_kick(bitmap);
}
+
+/*
+ * Conditional unplug based on user-defined parameter
+ * Defaults to unconditional behavior
+ */
+void md_bitmap_unplug(struct bitmap *bitmap)
+{
+ unsigned int flush_threshold = bitmap->mddev->bitmap_info.flush_threshold;
+
+ if (!flush_threshold) {
+ __md_bitmap_unplug(bitmap);
+ } else {
+ struct bitmap_page *bp = bitmap->counts.bp;
+ unsigned long pages = bitmap->counts.pages;
+ unsigned long k, count = 0;
+
+ for (k = 0; k < pages; k++)
+ if (bp[k].map && !bp[k].hijacked)
+ count += bp[k].count;
+
+ if (count - bitmap->unplugged_count > flush_threshold) {
+ bitmap->unplugged_count = count;
+ md_bitmap_daemon_work(&bitmap->mddev->daemon_timer);
+ __md_bitmap_unplug(bitmap);
+ }
+ }
+}
EXPORT_SYMBOL(md_bitmap_unplug);
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
@@ -2012,9 +2039,9 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
for (i = 0; i < bitmap->storage.file_pages; i++)
if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
- md_bitmap_unplug(bitmap);
+ __md_bitmap_unplug(bitmap);
}
- md_bitmap_unplug(mddev->bitmap);
+ __md_bitmap_unplug(mddev->bitmap);
*low = lo;
*high = hi;
md_bitmap_free(bitmap);
@@ -2246,7 +2273,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
spin_unlock_irq(&bitmap->counts.lock);
if (!init) {
- md_bitmap_unplug(bitmap);
+ __md_bitmap_unplug(bitmap);
bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
}
ret = 0;
diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h
index cfd7395de8fd..49a93d8ff307 100644
--- a/drivers/md/md-bitmap.h
+++ b/drivers/md/md-bitmap.h
@@ -223,6 +223,7 @@ struct bitmap {
unsigned long daemon_lastrun; /* jiffies of last run */
unsigned long last_end_sync; /* when we lasted called end_sync to
* update bitmap with resync progress */
+ unsigned long unplugged_count; /* last dirty count from md_bitmap_unplug */
atomic_t pending_writes; /* pending writes to the bitmap file */
wait_queue_head_t write_wait;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b4e2d8b87b61..1a558cb18bd4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -501,6 +501,7 @@ struct mddev {
int external;
int nodes; /* Maximum number of nodes in the cluster */
char cluster_name[64]; /* Name of the cluster */
+ unsigned int flush_threshold; /* how many dirty chunks between updates */
} bitmap_info;
atomic_t max_corr_read_errors; /* max read retries */
--
2.31.1
next prev parent reply other threads:[~2022-10-13 22:45 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-13 22:41 [PATCH v2 0/3] Bitmap percentage flushing Jonathan Derrick
2022-10-13 22:41 ` Jonathan Derrick [this message]
2022-10-14 1:11 ` [PATCH v2 1/3] md/bitmap: Add chunk-threshold unplugging Jonathan Derrick
2022-10-13 22:41 ` [PATCH v2 2/3] md/bitmap: Add sysfs interface for flush threshold Jonathan Derrick
2022-10-13 22:41 ` [PATCH v2 3/3] md/bitmap: Convert daemon_work to proper timer Jonathan Derrick
2022-10-14 21:10 ` [PATCH v2 0/3] Bitmap percentage flushing John Stoffel
2022-10-15 22:27 ` Jonathan Derrick
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221013224151.300-2-jonathan.derrick@linux.dev \
--to=jonathan.derrick@linux.dev \
--cc=jonathan.derrick@solidigm.com \
--cc=jonathanx.sk.derrick@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=mariusz.tkaczyk@linux.intel.com \
--cc=song@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.