linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Shaohua Li <shli@fb.com>, neilb@suse.de
Cc: linux-raid@vger.kernel.org, Kernel-team@fb.com, dan.j.williams@intel.com
Subject: [PATCH 02/12] raid5-cache: free I/O units earlier
Date: Sat, 12 Sep 2015 08:17:08 +0200	[thread overview]
Message-ID: <1442038638-6947-3-git-send-email-hch@lst.de> (raw)
In-Reply-To: <1442038638-6947-1-git-send-email-hch@lst.de>

There is no good reason to keep the I/O unit structures around after the
stripe has been written back to the RAID array.  The only information
we need is the log sequence number, and the checkpoint offset of the
highest successfull writeback.  Store those in the log structure, and
free the IO units from __r5l_stripe_write_finished.

Besides simplifying the code this also avoid having to keep the allocation
for the I/O unit around for a potentially long time as superblock updates
that checkpoint the log do not happen very often.

This also fixes the previously incorrect calculation of 'free' in
r5l_do_reclaim as a side effect: previous if took the last unit which
isn't checkpointed into account.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/md/raid5-cache.c | 145 ++++++++++++++++++-----------------------------
 1 file changed, 55 insertions(+), 90 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index c345479..803bcc6 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -51,6 +51,9 @@ struct r5l_log {
 	sector_t log_start; /* log head. where new data appends */
 	u64 seq; /* log head sequence */
 
+	sector_t next_checkpoint;
+	u64 next_cp_seq;
+
 	struct mutex io_mutex;
 	struct r5l_io_unit *current_io; /* current io_unit accepting new data */
 
@@ -65,10 +68,6 @@ struct r5l_log {
 					* cache flush */
 	struct list_head flushed_ios; /* io_units which settle down in log disk */
 	struct bio flush_bio;
-	struct list_head stripe_end_ios; /* io_units which have been
-					  * completely written to the RAID *
-					  * but have not yet been considered *
-					  * for updating super */
 
 	struct kmem_cache *io_kc;
 
@@ -185,35 +184,6 @@ static void r5l_move_io_unit_list(struct list_head *from, struct list_head *to,
        }
 }
 
-/*
- * We don't want too many io_units reside in stripe_end_ios list, which will
- * waste a lot of memory. So we try to remove some. But we must keep at least 2
- * io_units. The superblock must point to a valid meta, if it's the last meta,
- * recovery can scan less
- * */
-static void r5l_compress_stripe_end_list(struct r5l_log *log)
-{
-	struct r5l_io_unit *first, *last, *io;
-
-	first = list_first_entry(&log->stripe_end_ios,
-		struct r5l_io_unit, log_sibling);
-	last = list_last_entry(&log->stripe_end_ios,
-		struct r5l_io_unit, log_sibling);
-	if (first == last)
-		return;
-	list_del(&first->log_sibling);
-	list_del(&last->log_sibling);
-	while (!list_empty(&log->stripe_end_ios)) {
-		io = list_first_entry(&log->stripe_end_ios,
-			struct r5l_io_unit, log_sibling);
-		list_del(&io->log_sibling);
-		first->log_end = io->log_end;
-		r5l_free_io_unit(log, io);
-	}
-	list_add_tail(&first->log_sibling, &log->stripe_end_ios);
-	list_add_tail(&last->log_sibling, &log->stripe_end_ios);
-}
-
 static void __r5l_set_io_unit_state(struct r5l_io_unit *io,
 	enum r5l_io_unit_state state)
 {
@@ -540,31 +510,53 @@ static void r5l_run_no_space_stripes(struct r5l_log *log)
 	spin_unlock(&log->no_space_stripes_lock);
 }
 
+static sector_t r5l_reclaimable_space(struct r5l_log *log)
+{
+	return r5l_ring_distance(log, log->last_checkpoint,
+				 log->next_checkpoint);
+}
+
+static bool r5l_complete_flushed_ios(struct r5l_log *log)
+{
+	struct r5l_io_unit *io, *next;
+	bool found = false;
+
+	assert_spin_locked(&log->io_list_lock);
+
+	list_for_each_entry_safe(io, next, &log->flushed_ios, log_sibling) {
+		/* don't change list order */
+		if (io->state < IO_UNIT_STRIPE_END)
+			break;
+
+		log->next_checkpoint = io->log_start;
+		log->next_cp_seq = io->seq;
+
+		list_del(&io->log_sibling);
+		r5l_free_io_unit(log, io);
+
+		found = true;
+	}
+	
+
+	return found;
+}
+
 static void __r5l_stripe_write_finished(struct r5l_io_unit *io)
 {
 	struct r5l_log *log = io->log;
-	struct r5l_io_unit *last;
-	sector_t reclaimable_space;
 	unsigned long flags;
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
 	__r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
-	/* might move 0 entry */
-	r5l_move_io_unit_list(&log->flushed_ios, &log->stripe_end_ios,
-		IO_UNIT_STRIPE_END);
-	if (list_empty(&log->stripe_end_ios)) {
+
+	if (!r5l_complete_flushed_ios(log)) {
 		spin_unlock_irqrestore(&log->io_list_lock, flags);
 		return;
 	}
 
-	last = list_last_entry(&log->stripe_end_ios,
-			struct r5l_io_unit, log_sibling);
-	reclaimable_space = r5l_ring_distance(log, log->last_checkpoint,
-				last->log_end);
-	if (reclaimable_space >= log->max_free_space)
+	if (r5l_reclaimable_space(log) > log->max_free_space)
 		r5l_wake_reclaim(log, 0);
 
-	r5l_compress_stripe_end_list(log);
 	spin_unlock_irqrestore(&log->io_list_lock, flags);
 	wake_up(&log->iounit_wait);
 }
@@ -640,13 +632,6 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
 	submit_bio(WRITE_FLUSH, &log->flush_bio);
 }
 
-static void r5l_kick_io_unit(struct r5l_log *log)
-{
-	md_wakeup_thread(log->rdev->mddev->thread);
-	wait_event_lock_irq(log->iounit_wait, !list_empty(&log->stripe_end_ios),
-		log->io_list_lock);
-}
-
 static void r5l_write_super(struct r5l_log *log, sector_t cp);
 static void r5l_write_super_and_discard_space(struct r5l_log *log,
 	sector_t end)
@@ -678,10 +663,10 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
 
 static void r5l_do_reclaim(struct r5l_log *log)
 {
-	struct r5l_io_unit *io, *last;
-	LIST_HEAD(list);
-	sector_t free = 0;
 	sector_t reclaim_target = xchg(&log->reclaim_target, 0);
+	sector_t reclaimable;
+	sector_t next_checkpoint;
+	u64 next_cp_seq;
 
 	spin_lock_irq(&log->io_list_lock);
 	/*
@@ -690,60 +675,41 @@ static void r5l_do_reclaim(struct r5l_log *log)
 	 * shouldn't reuse space of an unreclaimable io_unit
 	 * */
 	while (1) {
-		struct list_head *target_list = NULL;
-
-		while (!list_empty(&log->stripe_end_ios)) {
-			io = list_first_entry(&log->stripe_end_ios,
-				struct r5l_io_unit, log_sibling);
-			list_move_tail(&io->log_sibling, &list);
-			free += r5l_ring_distance(log, io->log_start,
-						io->log_end);
-		}
-
-		if (free >= reclaim_target ||
+		reclaimable = r5l_reclaimable_space(log);
+		if (reclaimable >= reclaim_target ||
 		    (list_empty(&log->running_ios) &&
 		     list_empty(&log->io_end_ios) &&
 		     list_empty(&log->flushing_ios) &&
 		     list_empty(&log->flushed_ios)))
 			break;
 
-		/* Below waiting mostly happens when we shutdown the raid */
-		if (!list_empty(&log->flushed_ios))
-			target_list = &log->flushed_ios;
-		else if (!list_empty(&log->flushing_ios))
-			target_list = &log->flushing_ios;
-		else if (!list_empty(&log->io_end_ios))
-			target_list = &log->io_end_ios;
-		else if (!list_empty(&log->running_ios))
-			target_list = &log->running_ios;
-
-		r5l_kick_io_unit(log);
+		md_wakeup_thread(log->rdev->mddev->thread);
+		wait_event_lock_irq(log->iounit_wait,
+				    r5l_reclaimable_space(log) > reclaimable,
+				    log->io_list_lock);
 	}
+
+	next_checkpoint = log->next_checkpoint;
+	next_cp_seq = log->next_cp_seq;
 	spin_unlock_irq(&log->io_list_lock);
 
-	if (list_empty(&list))
+	BUG_ON(reclaimable < 0);
+	if (reclaimable == 0)
 		return;
 
-	/* super always point to last valid meta */
-	last = list_last_entry(&list, struct r5l_io_unit, log_sibling);
 	/*
 	 * write_super will flush cache of each raid disk. We must write super
 	 * here, because the log area might be reused soon and we don't want to
 	 * confuse recovery
 	 * */
-	r5l_write_super_and_discard_space(log, last->log_start);
+	r5l_write_super_and_discard_space(log, next_checkpoint);
 
 	mutex_lock(&log->io_mutex);
-	log->last_checkpoint = last->log_start;
-	log->last_cp_seq = last->seq;
+	log->last_checkpoint = next_checkpoint;
+	log->last_cp_seq = next_cp_seq;
 	mutex_unlock(&log->io_mutex);
-	r5l_run_no_space_stripes(log);
 
-	while (!list_empty(&list)) {
-		io = list_first_entry(&list, struct r5l_io_unit, log_sibling);
-		list_del(&io->log_sibling);
-		r5l_free_io_unit(log, io);
-	}
+	r5l_run_no_space_stripes(log);
 }
 
 static void r5l_reclaim_thread(struct md_thread *thread)
@@ -1105,7 +1071,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	spin_lock_init(&log->io_list_lock);
 	INIT_LIST_HEAD(&log->running_ios);
 	INIT_LIST_HEAD(&log->io_end_ios);
-	INIT_LIST_HEAD(&log->stripe_end_ios);
 	INIT_LIST_HEAD(&log->flushing_ios);
 	INIT_LIST_HEAD(&log->flushed_ios);
 	bio_init(&log->flush_bio);
-- 
1.9.1


  parent reply	other threads:[~2015-09-12  6:17 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-12  6:17 raid5-cache I/O path improvements V2 Christoph Hellwig
2015-09-12  6:17 ` [PATCH 01/12] raid5-cache: port to 4.3-rc Christoph Hellwig
2015-09-12  6:17 ` Christoph Hellwig [this message]
2015-09-15  7:00   ` [PATCH 02/12] raid5-cache: free I/O units earlier Neil Brown
2015-09-17  1:50     ` Christoph Hellwig
2015-09-15  8:07   ` Neil Brown
2015-09-17  1:48     ` Christoph Hellwig
2015-09-12  6:17 ` [PATCH 03/12] raid5-cache: rename flushed_ios to finished_ios Christoph Hellwig
2015-09-12  6:17 ` [PATCH 04/12] raid5-cache: factor out a helper to run all stripes for an I/O unit Christoph Hellwig
2015-09-12  6:17 ` [PATCH 05/12] raid5-cache: use FUA writes for the log Christoph Hellwig
2015-09-12  6:17 ` [PATCH 06/12] raid5-cache: clean up r5l_get_meta Christoph Hellwig
2015-09-12  6:17 ` [PATCH 07/12] raid5-cache: refactor bio allocation Christoph Hellwig
2015-09-12  6:17 ` [PATCH 08/12] raid5-cache: take rdev->data_offset into account early on Christoph Hellwig
2015-09-12  6:17 ` [PATCH 09/12] raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta Christoph Hellwig
2015-09-12  6:17 ` [PATCH 10/12] raid5-cache: new helper: r5_reserve_log_entry Christoph Hellwig
2015-09-12  6:17 ` [PATCH 11/12] raid5-cache: small log->seq cleanup Christoph Hellwig
2015-09-12  6:17 ` [PATCH 12/12] raid5-cache: use bio chaining Christoph Hellwig
2015-09-14 19:11 ` raid5-cache I/O path improvements V2 Shaohua Li
2015-09-15  7:23   ` Neil Brown
2015-09-15 21:54     ` Shaohua Li
2015-09-17  1:53       ` Christoph Hellwig
2015-09-28 14:01       ` Christoph Hellwig
2015-09-30  5:39         ` Neil Brown
2015-09-30 15:00           ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1442038638-6947-3-git-send-email-hch@lst.de \
    --to=hch@lst.de \
    --cc=Kernel-team@fb.com \
    --cc=dan.j.williams@intel.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=shli@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).