* [PATCH 01/11] raid5-cache: free I/O units earlier
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 02/11] raid5-cache: rename flushed_ios to finished_ios Christoph Hellwig
` (10 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
There is no good reason to keep the I/O unit structures around after the
stripe has been written back to the RAID array. The only information
we need is the log sequence number, and the checkpoint offset of the
highest successfull writeback. Store those in the log structure, and
free the IO units from __r5l_stripe_write_finished.
Besides simplifying the code this also avoid having to keep the allocation
for the I/O unit around for a potentially long time as superblock updates
that checkpoint the log do not happen very often.
This also fixes the previously incorrect calculation of 'free' in
r5l_do_reclaim as a side effect: previous if took the last unit which
isn't checkpointed into account.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 144 ++++++++++++++++++-----------------------------
1 file changed, 55 insertions(+), 89 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index e917e53..5b5f346 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -51,6 +51,9 @@ struct r5l_log {
sector_t log_start; /* log head. where new data appends */
u64 seq; /* log head sequence */
+ sector_t next_checkpoint;
+ u64 next_cp_seq;
+
struct mutex io_mutex;
struct r5l_io_unit *current_io; /* current io_unit accepting new data */
@@ -65,9 +68,6 @@ struct r5l_log {
* cache flush */
struct list_head flushed_ios; /* io_units which settle down in log disk */
struct bio flush_bio;
- struct list_head stripe_end_ios;/* io_units which have been completely
- * written to the RAID but have not yet
- * been considered for updating super */
struct kmem_cache *io_kc;
@@ -186,35 +186,6 @@ static void r5l_move_io_unit_list(struct list_head *from, struct list_head *to,
}
}
-/*
- * We don't want too many io_units reside in stripe_end_ios list, which will
- * waste a lot of memory. So we try to remove some. But we must keep at least 2
- * io_units. The superblock must point to a valid meta, if it's the last meta,
- * recovery can scan less
- */
-static void r5l_compress_stripe_end_list(struct r5l_log *log)
-{
- struct r5l_io_unit *first, *last, *io;
-
- first = list_first_entry(&log->stripe_end_ios,
- struct r5l_io_unit, log_sibling);
- last = list_last_entry(&log->stripe_end_ios,
- struct r5l_io_unit, log_sibling);
- if (first == last)
- return;
- list_del(&first->log_sibling);
- list_del(&last->log_sibling);
- while (!list_empty(&log->stripe_end_ios)) {
- io = list_first_entry(&log->stripe_end_ios,
- struct r5l_io_unit, log_sibling);
- list_del(&io->log_sibling);
- first->log_end = io->log_end;
- r5l_free_io_unit(log, io);
- }
- list_add_tail(&first->log_sibling, &log->stripe_end_ios);
- list_add_tail(&last->log_sibling, &log->stripe_end_ios);
-}
-
static void __r5l_set_io_unit_state(struct r5l_io_unit *io,
enum r5l_io_unit_state state)
{
@@ -548,31 +519,53 @@ static void r5l_run_no_space_stripes(struct r5l_log *log)
spin_unlock(&log->no_space_stripes_lock);
}
+static sector_t r5l_reclaimable_space(struct r5l_log *log)
+{
+ return r5l_ring_distance(log, log->last_checkpoint,
+ log->next_checkpoint);
+}
+
+static bool r5l_complete_flushed_ios(struct r5l_log *log)
+{
+ struct r5l_io_unit *io, *next;
+ bool found = false;
+
+ assert_spin_locked(&log->io_list_lock);
+
+ list_for_each_entry_safe(io, next, &log->flushed_ios, log_sibling) {
+ /* don't change list order */
+ if (io->state < IO_UNIT_STRIPE_END)
+ break;
+
+ log->next_checkpoint = io->log_start;
+ log->next_cp_seq = io->seq;
+
+ list_del(&io->log_sibling);
+ r5l_free_io_unit(log, io);
+
+ found = true;
+ }
+
+
+ return found;
+}
+
static void __r5l_stripe_write_finished(struct r5l_io_unit *io)
{
struct r5l_log *log = io->log;
- struct r5l_io_unit *last;
- sector_t reclaimable_space;
unsigned long flags;
spin_lock_irqsave(&log->io_list_lock, flags);
__r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
- /* might move 0 entry */
- r5l_move_io_unit_list(&log->flushed_ios, &log->stripe_end_ios,
- IO_UNIT_STRIPE_END);
- if (list_empty(&log->stripe_end_ios)) {
+
+ if (!r5l_complete_flushed_ios(log)) {
spin_unlock_irqrestore(&log->io_list_lock, flags);
return;
}
- last = list_last_entry(&log->stripe_end_ios,
- struct r5l_io_unit, log_sibling);
- reclaimable_space = r5l_ring_distance(log, log->last_checkpoint,
- last->log_end);
- if (reclaimable_space >= log->max_free_space)
+ if (r5l_reclaimable_space(log) > log->max_free_space)
r5l_wake_reclaim(log, 0);
- r5l_compress_stripe_end_list(log);
spin_unlock_irqrestore(&log->io_list_lock, flags);
wake_up(&log->iounit_wait);
}
@@ -651,13 +644,6 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
submit_bio(WRITE_FLUSH, &log->flush_bio);
}
-static void r5l_kick_io_unit(struct r5l_log *log)
-{
- md_wakeup_thread(log->rdev->mddev->thread);
- wait_event_lock_irq(log->iounit_wait, !list_empty(&log->stripe_end_ios),
- log->io_list_lock);
-}
-
static void r5l_write_super(struct r5l_log *log, sector_t cp);
static void r5l_write_super_and_discard_space(struct r5l_log *log,
sector_t end)
@@ -698,10 +684,10 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
static void r5l_do_reclaim(struct r5l_log *log)
{
- struct r5l_io_unit *io, *last;
- LIST_HEAD(list);
- sector_t free = 0;
sector_t reclaim_target = xchg(&log->reclaim_target, 0);
+ sector_t reclaimable;
+ sector_t next_checkpoint;
+ u64 next_cp_seq;
spin_lock_irq(&log->io_list_lock);
/*
@@ -710,60 +696,41 @@ static void r5l_do_reclaim(struct r5l_log *log)
* shouldn't reuse space of an unreclaimable io_unit
*/
while (1) {
- struct list_head *target_list = NULL;
-
- while (!list_empty(&log->stripe_end_ios)) {
- io = list_first_entry(&log->stripe_end_ios,
- struct r5l_io_unit, log_sibling);
- list_move_tail(&io->log_sibling, &list);
- free += r5l_ring_distance(log, io->log_start,
- io->log_end);
- }
-
- if (free >= reclaim_target ||
+ reclaimable = r5l_reclaimable_space(log);
+ if (reclaimable >= reclaim_target ||
(list_empty(&log->running_ios) &&
list_empty(&log->io_end_ios) &&
list_empty(&log->flushing_ios) &&
list_empty(&log->flushed_ios)))
break;
- /* Below waiting mostly happens when we shutdown the raid */
- if (!list_empty(&log->flushed_ios))
- target_list = &log->flushed_ios;
- else if (!list_empty(&log->flushing_ios))
- target_list = &log->flushing_ios;
- else if (!list_empty(&log->io_end_ios))
- target_list = &log->io_end_ios;
- else if (!list_empty(&log->running_ios))
- target_list = &log->running_ios;
-
- r5l_kick_io_unit(log);
+ md_wakeup_thread(log->rdev->mddev->thread);
+ wait_event_lock_irq(log->iounit_wait,
+ r5l_reclaimable_space(log) > reclaimable,
+ log->io_list_lock);
}
+
+ next_checkpoint = log->next_checkpoint;
+ next_cp_seq = log->next_cp_seq;
spin_unlock_irq(&log->io_list_lock);
- if (list_empty(&list))
+ BUG_ON(reclaimable < 0);
+ if (reclaimable == 0)
return;
- /* super always point to last valid meta */
- last = list_last_entry(&list, struct r5l_io_unit, log_sibling);
/*
* write_super will flush cache of each raid disk. We must write super
* here, because the log area might be reused soon and we don't want to
* confuse recovery
*/
- r5l_write_super_and_discard_space(log, last->log_start);
+ r5l_write_super_and_discard_space(log, next_checkpoint);
mutex_lock(&log->io_mutex);
- log->last_checkpoint = last->log_start;
- log->last_cp_seq = last->seq;
+ log->last_checkpoint = next_checkpoint;
+ log->last_cp_seq = next_cp_seq;
mutex_unlock(&log->io_mutex);
- r5l_run_no_space_stripes(log);
- while (!list_empty(&list)) {
- io = list_first_entry(&list, struct r5l_io_unit, log_sibling);
- list_del(&io->log_sibling);
- r5l_free_io_unit(log, io);
- }
+ r5l_run_no_space_stripes(log);
}
static void r5l_reclaim_thread(struct md_thread *thread)
@@ -1153,7 +1120,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
spin_lock_init(&log->io_list_lock);
INIT_LIST_HEAD(&log->running_ios);
INIT_LIST_HEAD(&log->io_end_ios);
- INIT_LIST_HEAD(&log->stripe_end_ios);
INIT_LIST_HEAD(&log->flushing_ios);
INIT_LIST_HEAD(&log->flushed_ios);
bio_init(&log->flush_bio);
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 02/11] raid5-cache: rename flushed_ios to finished_ios
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
2015-10-05 7:31 ` [PATCH 01/11] raid5-cache: free I/O units earlier Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 03/11] raid5-cache: factor out a helper to run all stripes for an I/O unit Christoph Hellwig
` (9 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
After this series we won't nessecarily have flushed the cache for these
I/Os, so give the list a more neutral name.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 5b5f346..9f5a48e 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -66,7 +66,7 @@ struct r5l_log {
* to the RAID */
struct list_head flushing_ios; /* io_units which are waiting for log
* cache flush */
- struct list_head flushed_ios; /* io_units which settle down in log disk */
+ struct list_head finished_ios; /* io_units which settle down in log disk */
struct bio flush_bio;
struct kmem_cache *io_kc;
@@ -525,14 +525,14 @@ static sector_t r5l_reclaimable_space(struct r5l_log *log)
log->next_checkpoint);
}
-static bool r5l_complete_flushed_ios(struct r5l_log *log)
+static bool r5l_complete_finished_ios(struct r5l_log *log)
{
struct r5l_io_unit *io, *next;
bool found = false;
assert_spin_locked(&log->io_list_lock);
- list_for_each_entry_safe(io, next, &log->flushed_ios, log_sibling) {
+ list_for_each_entry_safe(io, next, &log->finished_ios, log_sibling) {
/* don't change list order */
if (io->state < IO_UNIT_STRIPE_END)
break;
@@ -558,7 +558,7 @@ static void __r5l_stripe_write_finished(struct r5l_io_unit *io)
spin_lock_irqsave(&log->io_list_lock, flags);
__r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
- if (!r5l_complete_flushed_ios(log)) {
+ if (!r5l_complete_finished_ios(log)) {
spin_unlock_irqrestore(&log->io_list_lock, flags);
return;
}
@@ -602,7 +602,7 @@ static void r5l_log_flush_endio(struct bio *bio)
raid5_release_stripe(sh);
}
}
- list_splice_tail_init(&log->flushing_ios, &log->flushed_ios);
+ list_splice_tail_init(&log->flushing_ios, &log->finished_ios);
spin_unlock_irqrestore(&log->io_list_lock, flags);
}
@@ -701,7 +701,7 @@ static void r5l_do_reclaim(struct r5l_log *log)
(list_empty(&log->running_ios) &&
list_empty(&log->io_end_ios) &&
list_empty(&log->flushing_ios) &&
- list_empty(&log->flushed_ios)))
+ list_empty(&log->finished_ios)))
break;
md_wakeup_thread(log->rdev->mddev->thread);
@@ -1121,7 +1121,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
INIT_LIST_HEAD(&log->running_ios);
INIT_LIST_HEAD(&log->io_end_ios);
INIT_LIST_HEAD(&log->flushing_ios);
- INIT_LIST_HEAD(&log->flushed_ios);
+ INIT_LIST_HEAD(&log->finished_ios);
bio_init(&log->flush_bio);
log->io_kc = KMEM_CACHE(r5l_io_unit, 0);
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 03/11] raid5-cache: factor out a helper to run all stripes for an I/O unit
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
2015-10-05 7:31 ` [PATCH 01/11] raid5-cache: free I/O units earlier Christoph Hellwig
2015-10-05 7:31 ` [PATCH 02/11] raid5-cache: rename flushed_ios to finished_ios Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 04/11] raid5-cache: simplify state machine when caches flushes are not needed Christoph Hellwig
` (8 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 9f5a48e..b04e908 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -194,6 +194,17 @@ static void __r5l_set_io_unit_state(struct r5l_io_unit *io,
io->state = state;
}
+static void r5l_io_run_stripes(struct r5l_io_unit *io)
+{
+ struct stripe_head *sh, *next;
+
+ list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
+ list_del_init(&sh->log_list);
+ set_bit(STRIPE_HANDLE, &sh->state);
+ raid5_release_stripe(sh);
+ }
+}
+
static void r5l_log_endio(struct bio *bio)
{
struct r5l_io_unit *io = bio->bi_private;
@@ -587,21 +598,13 @@ static void r5l_log_flush_endio(struct bio *bio)
flush_bio);
unsigned long flags;
struct r5l_io_unit *io;
- struct stripe_head *sh;
if (bio->bi_error)
md_error(log->rdev->mddev, log->rdev);
spin_lock_irqsave(&log->io_list_lock, flags);
- list_for_each_entry(io, &log->flushing_ios, log_sibling) {
- while (!list_empty(&io->stripe_list)) {
- sh = list_first_entry(&io->stripe_list,
- struct stripe_head, log_list);
- list_del_init(&sh->log_list);
- set_bit(STRIPE_HANDLE, &sh->state);
- raid5_release_stripe(sh);
- }
- }
+ list_for_each_entry(io, &log->flushing_ios, log_sibling)
+ r5l_io_run_stripes(io);
list_splice_tail_init(&log->flushing_ios, &log->finished_ios);
spin_unlock_irqrestore(&log->io_list_lock, flags);
}
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 04/11] raid5-cache: simplify state machine when caches flushes are not needed
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (2 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 03/11] raid5-cache: factor out a helper to run all stripes for an I/O unit Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 05/11] raid5-cache: clean up r5l_get_meta Christoph Hellwig
` (7 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
For devices without a volatile write cache we don't need to send a FLUSH
command to ensure writes are stable on disk, and thus can avoid the whole
step of batching up bios for processing by the MD thread.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index b04e908..0d18ed7 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -83,6 +83,8 @@ struct r5l_log {
struct list_head no_space_stripes; /* pending stripes, log has no space */
spinlock_t no_space_stripes_lock;
+
+ bool need_cache_flush;
};
/*
@@ -205,6 +207,22 @@ static void r5l_io_run_stripes(struct r5l_io_unit *io)
}
}
+static void r5l_log_run_stripes(struct r5l_log *log)
+{
+ struct r5l_io_unit *io, *next;
+
+ assert_spin_locked(&log->io_list_lock);
+
+ list_for_each_entry_safe(io, next, &log->running_ios, log_sibling) {
+ /* don't change list order */
+ if (io->state < IO_UNIT_IO_END)
+ break;
+
+ list_move_tail(&io->log_sibling, &log->finished_ios);
+ r5l_io_run_stripes(io);
+ }
+}
+
static void r5l_log_endio(struct bio *bio)
{
struct r5l_io_unit *io = bio->bi_private;
@@ -221,11 +239,15 @@ static void r5l_log_endio(struct bio *bio)
spin_lock_irqsave(&log->io_list_lock, flags);
__r5l_set_io_unit_state(io, IO_UNIT_IO_END);
- r5l_move_io_unit_list(&log->running_ios, &log->io_end_ios,
- IO_UNIT_IO_END);
+ if (log->need_cache_flush)
+ r5l_move_io_unit_list(&log->running_ios, &log->io_end_ios,
+ IO_UNIT_IO_END);
+ else
+ r5l_log_run_stripes(log);
spin_unlock_irqrestore(&log->io_list_lock, flags);
- md_wakeup_thread(log->rdev->mddev->thread);
+ if (log->need_cache_flush)
+ md_wakeup_thread(log->rdev->mddev->thread);
}
static void r5l_submit_current_io(struct r5l_log *log)
@@ -626,7 +648,8 @@ static void r5l_log_flush_endio(struct bio *bio)
void r5l_flush_stripe_to_raid(struct r5l_log *log)
{
bool do_flush;
- if (!log)
+
+ if (!log || !log->need_cache_flush)
return;
spin_lock_irq(&log->io_list_lock);
@@ -1115,6 +1138,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
return -ENOMEM;
log->rdev = rdev;
+ log->need_cache_flush = (rdev->bdev->bd_disk->queue->flush_flags != 0);
+
log->uuid_checksum = crc32_le(~0, (void *)rdev->mddev->uuid,
sizeof(rdev->mddev->uuid));
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 05/11] raid5-cache: clean up r5l_get_meta
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (3 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 04/11] raid5-cache: simplify state machine when caches flushes are not needed Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 06/11] raid5-cache: refactor bio allocation Christoph Hellwig
` (6 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
Remove the only partially used local 'io' variable to simplify the code
flow.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0d18ed7..05126151 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -324,16 +324,12 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
static int r5l_get_meta(struct r5l_log *log, unsigned int payload_size)
{
- struct r5l_io_unit *io;
-
- io = log->current_io;
- if (io && io->meta_offset + payload_size > PAGE_SIZE)
+ if (log->current_io &&
+ log->current_io->meta_offset + payload_size > PAGE_SIZE)
r5l_submit_current_io(log);
- io = log->current_io;
- if (io)
- return 0;
- log->current_io = r5l_new_meta(log);
+ if (!log->current_io)
+ log->current_io = r5l_new_meta(log);
return 0;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 06/11] raid5-cache: refactor bio allocation
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (4 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 05/11] raid5-cache: clean up r5l_get_meta Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 07/11] raid5-cache: take rdev->data_offset into account early on Christoph Hellwig
` (5 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
Split out a helper to allocate a bio for log writes.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 45 ++++++++++++++++++++-------------------------
1 file changed, 20 insertions(+), 25 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 05126151..4d4dc4d 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -278,11 +278,25 @@ static void r5l_submit_current_io(struct r5l_log *log)
}
}
+static struct bio *r5l_bio_alloc(struct r5l_log *log, struct r5l_io_unit *io)
+{
+ struct bio *bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL, BIO_MAX_PAGES);
+
+ bio->bi_rw = WRITE;
+ bio->bi_bdev = log->rdev->bdev;
+ bio->bi_iter.bi_sector = log->log_start;
+ bio->bi_end_io = r5l_log_endio;
+ bio->bi_private = io;
+
+ bio_list_add(&io->bios, bio);
+ atomic_inc(&io->pending_io);
+ return bio;
+}
+
static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
{
struct r5l_io_unit *io;
struct r5l_meta_block *block;
- struct bio *bio;
io = r5l_alloc_io_unit(log);
@@ -296,17 +310,8 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
io->meta_offset = sizeof(struct r5l_meta_block);
io->seq = log->seq;
- bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL, BIO_MAX_PAGES);
- io->current_bio = bio;
- bio->bi_rw = WRITE;
- bio->bi_bdev = log->rdev->bdev;
- bio->bi_iter.bi_sector = log->log_start;
- bio_add_page(bio, io->meta_page, PAGE_SIZE, 0);
- bio->bi_end_io = r5l_log_endio;
- bio->bi_private = io;
-
- bio_list_add(&io->bios, bio);
- atomic_inc(&io->pending_io);
+ io->current_bio = r5l_bio_alloc(log, io);
+ bio_add_page(io->current_bio, io->meta_page, PAGE_SIZE, 0);
log->seq++;
log->log_start = r5l_ring_add(log, log->log_start, BLOCK_SECTORS);
@@ -360,19 +365,9 @@ static void r5l_append_payload_page(struct r5l_log *log, struct page *page)
struct r5l_io_unit *io = log->current_io;
alloc_bio:
- if (!io->current_bio) {
- struct bio *bio;
-
- bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL, BIO_MAX_PAGES);
- bio->bi_rw = WRITE;
- bio->bi_bdev = log->rdev->bdev;
- bio->bi_iter.bi_sector = log->log_start;
- bio->bi_end_io = r5l_log_endio;
- bio->bi_private = io;
- bio_list_add(&io->bios, bio);
- atomic_inc(&io->pending_io);
- io->current_bio = bio;
- }
+ if (!io->current_bio)
+ io->current_bio = r5l_bio_alloc(log, io);
+
if (!bio_add_page(io->current_bio, page, PAGE_SIZE, 0)) {
io->current_bio = NULL;
goto alloc_bio;
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 07/11] raid5-cache: take rdev->data_offset into account early on
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (5 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 06/11] raid5-cache: refactor bio allocation Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 08/11] raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta Christoph Hellwig
` (4 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
Set up bi_sector properly when we allocate an bio instead of updating it
at submission time.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 4d4dc4d..f3958e5 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -271,11 +271,8 @@ static void r5l_submit_current_io(struct r5l_log *log)
__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
spin_unlock_irqrestore(&log->io_list_lock, flags);
- while ((bio = bio_list_pop(&io->bios))) {
- /* all IO must start from rdev->data_offset */
- bio->bi_iter.bi_sector += log->rdev->data_offset;
+ while ((bio = bio_list_pop(&io->bios)))
submit_bio(WRITE, bio);
- }
}
static struct bio *r5l_bio_alloc(struct r5l_log *log, struct r5l_io_unit *io)
@@ -284,7 +281,7 @@ static struct bio *r5l_bio_alloc(struct r5l_log *log, struct r5l_io_unit *io)
bio->bi_rw = WRITE;
bio->bi_bdev = log->rdev->bdev;
- bio->bi_iter.bi_sector = log->log_start;
+ bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start;
bio->bi_end_io = r5l_log_endio;
bio->bi_private = io;
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 08/11] raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (6 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 07/11] raid5-cache: take rdev->data_offset into account early on Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 09/11] raid5-cache: new helper: r5_reserve_log_entry Christoph Hellwig
` (3 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
This is the only user, and keeping all code initializing the io_unit
structure together improves readbility.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 26 ++++++++------------------
1 file changed, 8 insertions(+), 18 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index f3958e5..b716c41 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -150,23 +150,6 @@ static bool r5l_has_free_space(struct r5l_log *log, sector_t size)
return log->device_size > used_size + size;
}
-static struct r5l_io_unit *r5l_alloc_io_unit(struct r5l_log *log)
-{
- struct r5l_io_unit *io;
- /* We can't handle memory allocate failure so far */
- gfp_t gfp = GFP_NOIO | __GFP_NOFAIL;
-
- io = kmem_cache_zalloc(log->io_kc, gfp);
- io->log = log;
- io->meta_page = alloc_page(gfp | __GFP_ZERO);
-
- bio_list_init(&io->bios);
- INIT_LIST_HEAD(&io->log_sibling);
- INIT_LIST_HEAD(&io->stripe_list);
- io->state = IO_UNIT_RUNNING;
- return io;
-}
-
static void r5l_free_io_unit(struct r5l_log *log, struct r5l_io_unit *io)
{
__free_page(io->meta_page);
@@ -295,8 +278,15 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
struct r5l_io_unit *io;
struct r5l_meta_block *block;
- io = r5l_alloc_io_unit(log);
+ /* We can't handle memory allocate failure so far */
+ io = kmem_cache_zalloc(log->io_kc, GFP_NOIO | __GFP_NOFAIL);
+ io->log = log;
+ bio_list_init(&io->bios);
+ INIT_LIST_HEAD(&io->log_sibling);
+ INIT_LIST_HEAD(&io->stripe_list);
+ io->state = IO_UNIT_RUNNING;
+ io->meta_page = alloc_page(GFP_NOIO | __GFP_NOFAIL | __GFP_ZERO);
block = page_address(io->meta_page);
block->magic = cpu_to_le32(R5LOG_MAGIC);
block->version = R5LOG_VERSION;
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 09/11] raid5-cache: new helper: r5_reserve_log_entry
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (7 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 08/11] raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 10/11] raid5-cache: small log->seq cleanup Christoph Hellwig
` (2 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
Factor out code to reserve log space.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 30 +++++++++++++++++++-----------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index b716c41..87fa3b5 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -273,6 +273,23 @@ static struct bio *r5l_bio_alloc(struct r5l_log *log, struct r5l_io_unit *io)
return bio;
}
+static void r5_reserve_log_entry(struct r5l_log *log, struct r5l_io_unit *io)
+{
+ log->log_start = r5l_ring_add(log, log->log_start, BLOCK_SECTORS);
+
+ /*
+ * If we filled up the log device start from the beginning again,
+ * which will require a new bio.
+ *
+ * Note: for this to work properly the log size needs to me a multiple
+ * of BLOCK_SECTORS.
+ */
+ if (log->log_start == 0)
+ io->current_bio = NULL;
+
+ io->log_end = log->log_start;
+}
+
static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
{
struct r5l_io_unit *io;
@@ -301,11 +318,7 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
bio_add_page(io->current_bio, io->meta_page, PAGE_SIZE, 0);
log->seq++;
- log->log_start = r5l_ring_add(log, log->log_start, BLOCK_SECTORS);
- io->log_end = log->log_start;
- /* current bio hit disk end */
- if (log->log_start == 0)
- io->current_bio = NULL;
+ r5_reserve_log_entry(log, io);
spin_lock_irq(&log->io_list_lock);
list_add_tail(&io->log_sibling, &log->running_ios);
@@ -359,13 +372,8 @@ alloc_bio:
io->current_bio = NULL;
goto alloc_bio;
}
- log->log_start = r5l_ring_add(log, log->log_start,
- BLOCK_SECTORS);
- /* current bio hit disk end */
- if (log->log_start == 0)
- io->current_bio = NULL;
- io->log_end = log->log_start;
+ r5_reserve_log_entry(log, io);
}
static void r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh,
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 10/11] raid5-cache: small log->seq cleanup
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (8 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 09/11] raid5-cache: new helper: r5_reserve_log_entry Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-05 7:31 ` [PATCH 11/11] raid5-cache: use bio chaining Christoph Hellwig
2015-10-06 18:14 ` raid5-cache I/O path improvements V3 Shaohua Li
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 87fa3b5..e64f94e 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -312,12 +312,11 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
io->log_start = log->log_start;
io->meta_offset = sizeof(struct r5l_meta_block);
- io->seq = log->seq;
+ io->seq = log->seq++;
io->current_bio = r5l_bio_alloc(log, io);
bio_add_page(io->current_bio, io->meta_page, PAGE_SIZE, 0);
- log->seq++;
r5_reserve_log_entry(log, io);
spin_lock_irq(&log->io_list_lock);
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 11/11] raid5-cache: use bio chaining
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (9 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 10/11] raid5-cache: small log->seq cleanup Christoph Hellwig
@ 2015-10-05 7:31 ` Christoph Hellwig
2015-10-06 18:14 ` raid5-cache I/O path improvements V3 Shaohua Li
11 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2015-10-05 7:31 UTC (permalink / raw)
To: Shaohua Li, neilb; +Cc: dan.j.williams, Kernel-team, linux-raid
Simplify the bio completion handler by using bio chaining and submitting
bios as soon as they are full.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/raid5-cache.c | 38 ++++++++++++++++----------------------
1 file changed, 16 insertions(+), 22 deletions(-)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index e64f94e..040405e 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -100,8 +100,6 @@ struct r5l_io_unit {
struct page *meta_page; /* store meta block */
int meta_offset; /* current offset in meta_page */
- struct bio_list bios;
- atomic_t pending_io; /* pending bios not written to log yet */
struct bio *current_bio;/* current_bio accepting new data */
atomic_t pending_stripe;/* how many stripes not flushed to raid */
@@ -112,6 +110,7 @@ struct r5l_io_unit {
struct list_head stripe_list; /* stripes added to the io_unit */
int state;
+ bool need_split_bio;
};
/* r5l_io_unit state */
@@ -217,9 +216,6 @@ static void r5l_log_endio(struct bio *bio)
bio_put(bio);
- if (!atomic_dec_and_test(&io->pending_io))
- return;
-
spin_lock_irqsave(&log->io_list_lock, flags);
__r5l_set_io_unit_state(io, IO_UNIT_IO_END);
if (log->need_cache_flush)
@@ -237,7 +233,6 @@ static void r5l_submit_current_io(struct r5l_log *log)
{
struct r5l_io_unit *io = log->current_io;
struct r5l_meta_block *block;
- struct bio *bio;
unsigned long flags;
u32 crc;
@@ -254,22 +249,17 @@ static void r5l_submit_current_io(struct r5l_log *log)
__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
spin_unlock_irqrestore(&log->io_list_lock, flags);
- while ((bio = bio_list_pop(&io->bios)))
- submit_bio(WRITE, bio);
+ submit_bio(WRITE, io->current_bio);
}
-static struct bio *r5l_bio_alloc(struct r5l_log *log, struct r5l_io_unit *io)
+static struct bio *r5l_bio_alloc(struct r5l_log *log)
{
struct bio *bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL, BIO_MAX_PAGES);
bio->bi_rw = WRITE;
bio->bi_bdev = log->rdev->bdev;
bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start;
- bio->bi_end_io = r5l_log_endio;
- bio->bi_private = io;
- bio_list_add(&io->bios, bio);
- atomic_inc(&io->pending_io);
return bio;
}
@@ -285,7 +275,7 @@ static void r5_reserve_log_entry(struct r5l_log *log, struct r5l_io_unit *io)
* of BLOCK_SECTORS.
*/
if (log->log_start == 0)
- io->current_bio = NULL;
+ io->need_split_bio = true;
io->log_end = log->log_start;
}
@@ -298,7 +288,6 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
/* We can't handle memory allocate failure so far */
io = kmem_cache_zalloc(log->io_kc, GFP_NOIO | __GFP_NOFAIL);
io->log = log;
- bio_list_init(&io->bios);
INIT_LIST_HEAD(&io->log_sibling);
INIT_LIST_HEAD(&io->stripe_list);
io->state = IO_UNIT_RUNNING;
@@ -314,7 +303,9 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
io->meta_offset = sizeof(struct r5l_meta_block);
io->seq = log->seq++;
- io->current_bio = r5l_bio_alloc(log, io);
+ io->current_bio = r5l_bio_alloc(log);
+ io->current_bio->bi_end_io = r5l_log_endio;
+ io->current_bio->bi_private = io;
bio_add_page(io->current_bio, io->meta_page, PAGE_SIZE, 0);
r5_reserve_log_entry(log, io);
@@ -363,15 +354,18 @@ static void r5l_append_payload_page(struct r5l_log *log, struct page *page)
{
struct r5l_io_unit *io = log->current_io;
-alloc_bio:
- if (!io->current_bio)
- io->current_bio = r5l_bio_alloc(log, io);
+ if (io->need_split_bio) {
+ struct bio *prev = io->current_bio;
- if (!bio_add_page(io->current_bio, page, PAGE_SIZE, 0)) {
- io->current_bio = NULL;
- goto alloc_bio;
+ io->current_bio = r5l_bio_alloc(log);
+ bio_chain(io->current_bio, prev);
+
+ submit_bio(WRITE, prev);
}
+ if (!bio_add_page(io->current_bio, page, PAGE_SIZE, 0))
+ BUG();
+
r5_reserve_log_entry(log, io);
}
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: raid5-cache I/O path improvements V3
2015-10-05 7:31 raid5-cache I/O path improvements V3 Christoph Hellwig
` (10 preceding siblings ...)
2015-10-05 7:31 ` [PATCH 11/11] raid5-cache: use bio chaining Christoph Hellwig
@ 2015-10-06 18:14 ` Shaohua Li
2015-10-08 2:45 ` Neil Brown
11 siblings, 1 reply; 14+ messages in thread
From: Shaohua Li @ 2015-10-06 18:14 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: neilb, dan.j.williams, Kernel-team, linux-raid
On Mon, Oct 05, 2015 at 09:31:05AM +0200, Christoph Hellwig wrote:
> Hi Shaohua, hi Neil,
>
> this series contains a few updates to the raid5-cache feature, and goes
> on top of dm/devel + the series Shaohua sent yesterday.
>
> Besides the rebase the only changes compared to the previous version
> is that we only bypasss the batch and flush state for devices that
> do not have a volatile cache at all like persistent memory or high
> performance flash devices.
Thanks Christoph, patches look great.
Reviewed-by: Shaohua Li <shli@fb.com>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: raid5-cache I/O path improvements V3
2015-10-06 18:14 ` raid5-cache I/O path improvements V3 Shaohua Li
@ 2015-10-08 2:45 ` Neil Brown
0 siblings, 0 replies; 14+ messages in thread
From: Neil Brown @ 2015-10-08 2:45 UTC (permalink / raw)
To: Shaohua Li, Christoph Hellwig; +Cc: dan.j.williams, Kernel-team, linux-raid
[-- Attachment #1: Type: text/plain, Size: 777 bytes --]
Shaohua Li <shli@fb.com> writes:
> On Mon, Oct 05, 2015 at 09:31:05AM +0200, Christoph Hellwig wrote:
>> Hi Shaohua, hi Neil,
>>
>> this series contains a few updates to the raid5-cache feature, and goes
>> on top of dm/devel + the series Shaohua sent yesterday.
>>
>> Besides the rebase the only changes compared to the previous version
>> is that we only bypasss the batch and flush state for devices that
>> do not have a volatile cache at all like persistent memory or high
>> performance flash devices.
>
> Thanks Christoph, patches look great.
>
> Reviewed-by: Shaohua Li <shli@fb.com>
Thanks. I've applied all these (fixing up minor conflict due to an
earlier patch I didn't accept yet). They are all in my 'devel' branch.
Thanks,
NeilBrown
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread