* [PATCH 1/2] md/r5cache: handle R5LOG_PAYLOAD_FLUSH in recovery
@ 2017-03-08 1:44 Song Liu
2017-03-08 1:44 ` [PATCH 2/2] md/r5cache: generate R5LOG_PAYLOAD_FLUSH Song Liu
0 siblings, 1 reply; 3+ messages in thread
From: Song Liu @ 2017-03-08 1:44 UTC (permalink / raw)
To: linux-raid; +Cc: shli, neilb, kernel-team, dan.j.williams, hch, Song Liu
This patch adds handling of R5LOG_PAYLOAD_FLUSH in journal recovery.
Next patch will add logic that generate R5LOG_PAYLOAD_FLUSH on flush
finish.
When R5LOG_PAYLOAD_FLUSH is seen in recovery, pending data and parity
will be dropped from recovery. This will reduce the number of stripes
to replay, and thus accelerate the recovery process.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
drivers/md/raid5-cache.c | 47 +++++++++++++++++++++++++++++++++++++++++------
1 file changed, 41 insertions(+), 6 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0d744d5..e69f922 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1957,6 +1957,7 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log,
sector_t log_offset = r5l_ring_add(log, ctx->pos, BLOCK_SECTORS);
struct page *page;
struct r5l_payload_data_parity *payload;
+ struct r5l_payload_flush *payload_flush;
page = alloc_page(GFP_KERNEL);
if (!page)
@@ -1964,6 +1965,7 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log,
while (mb_offset < le32_to_cpu(mb->meta_size)) {
payload = (void *)mb + mb_offset;
+ payload_flush = (void *)mb + mb_offset;
if (payload->header.type == R5LOG_PAYLOAD_DATA) {
if (r5l_recovery_verify_data_checksum(
@@ -1982,15 +1984,23 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log,
BLOCK_SECTORS),
payload->checksum[1]) < 0)
goto mismatch;
- } else /* not R5LOG_PAYLOAD_DATA or R5LOG_PAYLOAD_PARITY */
+ } else if (payload->header.type == R5LOG_PAYLOAD_FLUSH) {
+ /* nothing to do for R5LOG_PAYLOAD_FLUSH here */
+ } else /* not R5LOG_PAYLOAD_DATA/PARITY/FLUSH */
goto mismatch;
- log_offset = r5l_ring_add(log, log_offset,
- le32_to_cpu(payload->size));
+ if (payload->header.type == R5LOG_PAYLOAD_FLUSH) {
+ mb_offset += sizeof(struct r5l_payload_flush) +
+ le32_to_cpu(payload_flush->size);
+ } else {
+ /* DATA or PARITY payload */
+ log_offset = r5l_ring_add(log, log_offset,
+ le32_to_cpu(payload->size));
+ mb_offset += sizeof(struct r5l_payload_data_parity) +
+ sizeof(__le32) *
+ (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
+ }
- mb_offset += sizeof(struct r5l_payload_data_parity) +
- sizeof(__le32) *
- (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
}
put_page(page);
@@ -2018,6 +2028,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
struct r5conf *conf = mddev->private;
struct r5l_meta_block *mb;
struct r5l_payload_data_parity *payload;
+ struct r5l_payload_flush *payload_flush;
int mb_offset;
sector_t log_offset;
sector_t stripe_sect;
@@ -2043,6 +2054,30 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
int dd;
payload = (void *)mb + mb_offset;
+ payload_flush = (void *)mb + mb_offset;
+
+ if (payload->header.type == R5LOG_PAYLOAD_FLUSH) {
+ int i, count;
+
+ count = le32_to_cpu(payload_flush->size) / sizeof(__le64);
+ for (i = 0; i < count; ++i) {
+ stripe_sect = le64_to_cpu(payload_flush->flush_stripes[i]);
+ sh = r5c_recovery_lookup_stripe(cached_stripe_list,
+ stripe_sect);
+ if (sh) {
+ WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
+ r5l_recovery_reset_stripe(sh);
+ list_del_init(&sh->lru);
+ raid5_release_stripe(sh);
+ }
+ }
+
+ mb_offset += sizeof(struct r5l_payload_flush) +
+ le32_to_cpu(payload_flush->size);
+ continue;
+ }
+
+ /* DATA or PARITY payload */
stripe_sect = (payload->header.type == R5LOG_PAYLOAD_DATA) ?
raid5_compute_sector(
conf, le64_to_cpu(payload->location), 0, &dd,
--
2.9.3
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH 2/2] md/r5cache: generate R5LOG_PAYLOAD_FLUSH
2017-03-08 1:44 [PATCH 1/2] md/r5cache: handle R5LOG_PAYLOAD_FLUSH in recovery Song Liu
@ 2017-03-08 1:44 ` Song Liu
2017-03-09 21:53 ` Shaohua Li
0 siblings, 1 reply; 3+ messages in thread
From: Song Liu @ 2017-03-08 1:44 UTC (permalink / raw)
To: linux-raid; +Cc: shli, neilb, kernel-team, dan.j.williams, hch, Song Liu
In r5c_finish_stripe_write_out(), R5LOG_PAYLOAD_FLUSH is append to
log->current_io.
Appending R5LOG_PAYLOAD_FLUSH in quiesce needs extra writes to
journal. To simplify the logic, we just skip R5LOG_PAYLOAD_FLUSH in
quiesce.
Even R5LOG_PAYLOAD_FLUSH supports multiple stripes per payload.
However, current implementation is one stripe per R5LOG_PAYLOAD_FLUSH,
which is simpler.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
drivers/md/raid5-cache.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 59 insertions(+), 1 deletion(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index e69f922..fd0bfea 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -590,7 +590,21 @@ static void r5l_log_endio(struct bio *bio)
mempool_free(io->meta_page, log->meta_pool);
spin_lock_irqsave(&log->io_list_lock, flags);
- __r5l_set_io_unit_state(io, IO_UNIT_IO_END);
+
+ if (list_empty(&io->stripe_list))
+ /*
+ * this io_unit only has R5LOG_PAYLOAD_FLUSH, set
+ * to IO_UNIT_STRIPE_END
+ */
+ __r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
+ else
+ /*
+ * io_unit with R5LOG_PAYLOAD_FLUSH and also DATA/PARITY
+ * set to IO_UNIT_IO_END and wait for all stripes get
+ * handled.
+ */
+ __r5l_set_io_unit_state(io, IO_UNIT_IO_END);
+
if (log->need_cache_flush)
r5l_move_to_end_ios(log);
else
@@ -843,6 +857,41 @@ static void r5l_append_payload_page(struct r5l_log *log, struct page *page)
r5_reserve_log_entry(log, io);
}
+static void r5l_append_flush_payload(struct r5l_log *log, sector_t sect)
+{
+ struct mddev *mddev = log->rdev->mddev;
+ struct r5conf *conf = mddev->private;
+ struct r5l_io_unit *io;
+ struct r5l_payload_flush *payload;
+ int meta_size;
+
+ /*
+ * payload_flush requires extra writes to the journal.
+ * To avoid handling the extra IO in quiesce, just skip
+ * flush_payload
+ */
+ if (conf->quiesce)
+ return;
+
+ mutex_lock(&log->io_mutex);
+ meta_size = sizeof(struct r5l_payload_flush) + sizeof(__le64);
+
+ if (r5l_get_meta(log, meta_size)) {
+ mutex_unlock(&log->io_mutex);
+ return;
+ }
+
+ /* current implementation is one stripe per flush payload */
+ io = log->current_io;
+ payload = page_address(io->meta_page) + io->meta_offset;
+ payload->header.type = cpu_to_le16(R5LOG_PAYLOAD_FLUSH);
+ payload->header.flags = cpu_to_le16(0);
+ payload->size = cpu_to_le32(sizeof(__le64));
+ payload->flush_stripes[0] = cpu_to_le64(sect);
+ io->meta_offset += meta_size;
+ mutex_unlock(&log->io_mutex);
+}
+
static int r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh,
int data_pages, int parity_pages)
{
@@ -1466,6 +1515,13 @@ static void r5l_do_reclaim(struct r5l_log *log)
list_empty(&log->finished_ios)))
break;
+ /*
+ * In some cases, io_unit with only R5LOG_PAYLOAD_FLUSH
+ * will stay in finished_ios list. It is necessary to
+ * complete them before quiesce.
+ */
+ r5l_complete_finished_ios(log);
+
md_wakeup_thread(log->rdev->mddev->thread);
wait_event_lock_irq(log->iounit_wait,
r5l_reclaimable_space(log) > reclaimable,
@@ -2784,6 +2840,8 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
atomic_dec(&conf->r5c_flushing_full_stripes);
atomic_dec(&conf->r5c_cached_full_stripes);
}
+
+ r5l_append_flush_payload(log, sh->sector);
}
int
--
2.9.3
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH 2/2] md/r5cache: generate R5LOG_PAYLOAD_FLUSH
2017-03-08 1:44 ` [PATCH 2/2] md/r5cache: generate R5LOG_PAYLOAD_FLUSH Song Liu
@ 2017-03-09 21:53 ` Shaohua Li
0 siblings, 0 replies; 3+ messages in thread
From: Shaohua Li @ 2017-03-09 21:53 UTC (permalink / raw)
To: Song Liu; +Cc: linux-raid, shli, neilb, kernel-team, dan.j.williams, hch
On Tue, Mar 07, 2017 at 05:44:22PM -0800, Song Liu wrote:
> In r5c_finish_stripe_write_out(), R5LOG_PAYLOAD_FLUSH is append to
> log->current_io.
>
> Appending R5LOG_PAYLOAD_FLUSH in quiesce needs extra writes to
> journal. To simplify the logic, we just skip R5LOG_PAYLOAD_FLUSH in
> quiesce.
>
> Even R5LOG_PAYLOAD_FLUSH supports multiple stripes per payload.
> However, current implementation is one stripe per R5LOG_PAYLOAD_FLUSH,
> which is simpler.
much simpiler than I expected :)
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
> drivers/md/raid5-cache.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
> index e69f922..fd0bfea 100644
> --- a/drivers/md/raid5-cache.c
> +++ b/drivers/md/raid5-cache.c
> @@ -590,7 +590,21 @@ static void r5l_log_endio(struct bio *bio)
> mempool_free(io->meta_page, log->meta_pool);
>
> spin_lock_irqsave(&log->io_list_lock, flags);
> - __r5l_set_io_unit_state(io, IO_UNIT_IO_END);
> +
> + if (list_empty(&io->stripe_list))
> + /*
> + * this io_unit only has R5LOG_PAYLOAD_FLUSH, set
> + * to IO_UNIT_STRIPE_END
> + */
> + __r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
> + else
> + /*
> + * io_unit with R5LOG_PAYLOAD_FLUSH and also DATA/PARITY
> + * set to IO_UNIT_IO_END and wait for all stripes get
> + * handled.
> + */
> + __r5l_set_io_unit_state(io, IO_UNIT_IO_END);
This part along with r5l_do_reclaim change looks strange. Could we call
__r5l_stripe_write_finished here? It makes more sense as we also free io unit
and do other things. The r5l_do_reclaim part looks quite hackish.
Thanks,
Shaohua
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-03-09 21:53 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-08 1:44 [PATCH 1/2] md/r5cache: handle R5LOG_PAYLOAD_FLUSH in recovery Song Liu
2017-03-08 1:44 ` [PATCH 2/2] md/r5cache: generate R5LOG_PAYLOAD_FLUSH Song Liu
2017-03-09 21:53 ` Shaohua Li
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox