linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
To: shli@kernel.org
Cc: linux-raid@vger.kernel.org
Subject: [PATCH v2 11/12] raid5-ppl: support disk add/remove with distributed PPL
Date: Mon,  5 Dec 2016 16:31:12 +0100	[thread overview]
Message-ID: <20161205153113.7268-12-artur.paszkiewicz@intel.com> (raw)
In-Reply-To: <20161205153113.7268-1-artur.paszkiewicz@intel.com>

Add a function to modify the log by adding or removing an rdev when a
drive fails or is added as a spare.

Adding a drive to the log is as simple as initializing and adding a new
child log, removing a drive is more complicated because it requires
stopping the child log and freeing all of its resources. In order to do
that, we busy wait for any submitted log bios to complete and then
manually finish and free the io_units. No new log requests will happen
at this point. A new list is added to struct r5l_io_unit to have access
to stripes that have been written to the log but are not completely
processed yet.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/md.c          |  3 +-
 drivers/md/raid5-cache.c | 13 ++++++-
 drivers/md/raid5-cache.h |  3 ++
 drivers/md/raid5-ppl.c   | 89 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/md/raid5.c       | 20 +++++++++++
 5 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7049833..279e303 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8334,7 +8334,8 @@ static int remove_and_add_spares(struct mddev *mddev,
 		    !test_bit(Blocked, &rdev->flags) &&
 		    ((test_bit(RemoveSynchronized, &rdev->flags) ||
 		     (!test_bit(In_sync, &rdev->flags) &&
-		      !test_bit(Journal, &rdev->flags))) &&
+		      !test_bit(Journal, &rdev->flags) &&
+		      !test_bit(JournalPpl, &rdev->flags))) &&
 		    atomic_read(&rdev->nr_pending)==0)) {
 			if (mddev->pers->hot_remove_disk(
 				    mddev, rdev) == 0) {
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index be534d8..b69a289 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -345,7 +345,7 @@ void r5l_io_run_stripes(struct r5l_io_unit *io)
 	struct stripe_head *sh, *next;
 
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
-		list_del_init(&sh->log_list);
+		list_move_tail(&sh->log_list, &io->stripe_finished_list);
 
 		r5c_finish_cache_stripe(sh);
 
@@ -553,6 +553,7 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
 	io->log = log;
 	INIT_LIST_HEAD(&io->log_sibling);
 	INIT_LIST_HEAD(&io->stripe_list);
+	INIT_LIST_HEAD(&io->stripe_finished_list);
 	bio_list_init(&io->flush_barriers);
 	io->state = IO_UNIT_RUNNING;
 
@@ -2546,6 +2547,16 @@ void r5l_exit_log(struct r5l_log *log)
 	kfree(log);
 }
 
+/*
+ * operation: 0 - remove rdev from log, 1 - add rdev to log
+ */
+int r5l_modify_log(struct r5l_log *log, struct md_rdev *rdev, int operation)
+{
+	if (log && log->policy->modify_log)
+		return log->policy->modify_log(log, rdev, operation);
+	return 0;
+}
+
 struct r5l_policy r5l_journal = {
 	.init_log = __r5l_init_log,
 	.exit_log = __r5l_exit_log,
diff --git a/drivers/md/raid5-cache.h b/drivers/md/raid5-cache.h
index 0446100..9d5fa0df 100644
--- a/drivers/md/raid5-cache.h
+++ b/drivers/md/raid5-cache.h
@@ -110,6 +110,7 @@ struct r5l_io_unit {
 	sector_t log_end;	/* where the io_unit ends */
 	struct list_head log_sibling; /* log->running_ios */
 	struct list_head stripe_list; /* stripes added to the io_unit */
+	struct list_head stripe_finished_list; /* stripes written to log */
 
 	int state;
 	bool need_split_bio;
@@ -139,6 +140,7 @@ enum r5l_io_unit_state {
 struct r5l_policy {
 	int (*init_log)(struct r5l_log *log, struct r5conf *conf);
 	void (*exit_log)(struct r5l_log *log);
+	int (*modify_log)(struct r5l_log *log, struct md_rdev *rdev, int op);
 	int (*write_stripe)(struct r5l_log *log, struct stripe_head *sh);
 	void (*write_stripe_run)(struct r5l_log *log);
 	void (*flush_stripe_to_raid)(struct r5l_log *log);
@@ -149,6 +151,7 @@ struct r5l_policy {
 
 extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev, int policy_type);
 extern void r5l_exit_log(struct r5l_log *log);
+extern int r5l_modify_log(struct r5l_log *log, struct md_rdev *rdev, int operation);
 extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh);
 extern void r5l_write_stripe_run(struct r5l_log *log);
 extern void r5l_flush_stripe_to_raid(struct r5l_log *log);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 17e9803..1a9581c 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -108,6 +108,7 @@ static struct r5l_io_unit *ppl_new_iounit(struct r5l_log *log,
 	io->log = log;
 	INIT_LIST_HEAD(&io->log_sibling);
 	INIT_LIST_HEAD(&io->stripe_list);
+	INIT_LIST_HEAD(&io->stripe_finished_list);
 	io->state = IO_UNIT_RUNNING;
 
 	io->meta_page = mempool_alloc(log->meta_pool, GFP_NOIO);
@@ -990,6 +991,93 @@ static int __ppl_init_log(struct r5l_log *log, struct r5conf *conf)
 	return ret;
 }
 
+static void ppl_log_stop(struct r5l_log *log)
+{
+	struct r5l_io_unit *io, *next;
+	unsigned long flags;
+	bool wait;
+
+	/* wait for in flight ios to complete */
+	do {
+		wait = false;
+		spin_lock_irqsave(&log->io_list_lock, flags);
+		list_for_each_entry(io, &log->running_ios, log_sibling) {
+			if (io->state == IO_UNIT_IO_START) {
+				wait = true;
+				break;
+			}
+		}
+		if (!wait)
+			wait = !list_empty(&log->flushing_ios);
+		spin_unlock_irqrestore(&log->io_list_lock, flags);
+	} while (wait);
+
+	/* clean up iounits */
+	spin_lock_irqsave(&log->io_list_lock, flags);
+
+	list_for_each_entry_safe(io, next, &log->running_ios, log_sibling) {
+		list_move_tail(&io->log_sibling, &log->finished_ios);
+		bio_put(io->current_bio);
+		mempool_free(io->meta_page, log->meta_pool);
+	}
+	list_splice_tail_init(&log->io_end_ios, &log->finished_ios);
+
+	list_for_each_entry_safe(io, next, &log->finished_ios, log_sibling) {
+		struct stripe_head *sh;
+		list_for_each_entry(sh, &io->stripe_list, log_list) {
+			clear_bit(STRIPE_LOG_TRAPPED, &sh->state);
+			sh->log_io = NULL;
+		}
+		r5l_io_run_stripes(io);
+		list_for_each_entry(sh, &io->stripe_finished_list, log_list) {
+			sh->log_io = NULL;
+		}
+		list_del(&io->log_sibling);
+		mempool_free(io, log->io_pool);
+	}
+	r5l_run_no_mem_stripe(log);
+
+	spin_unlock_irqrestore(&log->io_list_lock, flags);
+}
+
+static int __ppl_modify_log(struct r5l_log *log, struct md_rdev *rdev, int op)
+{
+	struct r5l_log *log_child;
+	struct ppl_conf *ppl_conf = log->private;
+
+	if (!rdev)
+		return -EINVAL;
+
+	dbg("rdev->raid_disk: %d op: %d\n", rdev->raid_disk, op);
+
+	if (rdev->raid_disk < 0)
+		return 0;
+
+	if (rdev->raid_disk >= ppl_conf->count)
+		return -ENODEV;
+
+	if (op == 0) {
+		log_child = ppl_conf->child_logs[rdev->raid_disk];
+		if (!log_child)
+			return 0;
+		ppl_conf->child_logs[rdev->raid_disk] = NULL;
+		ppl_log_stop(log_child);
+		ppl_exit_log_child(log_child);
+	} else if (op == 1) {
+		int ret = ppl_init_log_child(log, rdev, &log_child);
+		if (ret)
+			return ret;
+		ret = ppl_write_empty_header(log_child);
+		if (ret)
+			return ret;
+		ppl_conf->child_logs[rdev->raid_disk] = log_child;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int __ppl_write_stripe(struct r5l_log *log, struct stripe_head *sh)
 {
 	struct ppl_conf *ppl_conf = log->private;
@@ -1027,6 +1115,7 @@ static void __ppl_flush_stripe_to_raid(struct r5l_log *log)
 struct r5l_policy r5l_ppl = {
 	.init_log = __ppl_init_log,
 	.exit_log = __ppl_exit_log,
+	.modify_log = __ppl_modify_log,
 	.write_stripe = __ppl_write_stripe,
 	.write_stripe_run = __ppl_write_stripe_run,
 	.flush_stripe_to_raid = __ppl_flush_stripe_to_raid,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ed340c3..67c8dce 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7466,6 +7466,19 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 			*rdevp = rdev;
 		}
 	}
+	if (test_bit(JournalPpl, &rdev->flags) && conf->log) {
+		int ret;
+		if (conf->log->rwh_policy != RWH_POLICY_PPL)
+			return -EINVAL;
+		ret = r5l_modify_log(conf->log, rdev, 0);
+		if (ret)
+			return ret;
+		if (p->replacement) {
+			ret = r5l_modify_log(conf->log, p->replacement, 1);
+			if (ret)
+				return ret;
+		}
+	}
 	if (p->replacement) {
 		/* We must have just cleared 'rdev' */
 		p->rdev = p->replacement;
@@ -7558,6 +7571,13 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 		}
 	}
 out:
+	if (conf->log && !test_bit(Replacement, &rdev->flags) &&
+	    conf->log->rwh_policy == RWH_POLICY_PPL) {
+		int ret = r5l_modify_log(conf->log, rdev, 1);
+		if (ret)
+			return ret;
+	}
+
 	print_raid5_conf(conf);
 	return err;
 }
-- 
2.10.1


  parent reply	other threads:[~2016-12-05 15:31 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-05 15:31 [PATCH v2 00/12] Partial Parity Log for MD RAID 5 Artur Paszkiewicz
2016-12-05 15:31 ` [PATCH v2 01/12] raid5-cache: move declarations to separate header Artur Paszkiewicz
2016-12-05 15:31 ` [PATCH v2 02/12] raid5-cache: add policy logic Artur Paszkiewicz
2016-12-05 15:31 ` [PATCH v2 03/12] raid5-cache: add a new policy Artur Paszkiewicz
2016-12-07  0:46   ` NeilBrown
2016-12-07 14:36     ` Artur Paszkiewicz
2016-12-07 23:24       ` NeilBrown
2016-12-08 10:28         ` Artur Paszkiewicz
2016-12-08 21:22           ` NeilBrown
2016-12-05 15:31 ` [PATCH v2 04/12] md: superblock changes for PPL Artur Paszkiewicz
2016-12-05 15:31 ` [PATCH v2 05/12] raid5-ppl: Partial Parity Log implementation Artur Paszkiewicz
2016-12-06  1:06   ` kbuild test robot
2016-12-07  1:17   ` NeilBrown
2016-12-07 14:37     ` Artur Paszkiewicz
2016-12-05 15:31 ` [PATCH v2 06/12] raid5-ppl: calculate partial parity Artur Paszkiewicz
2016-12-05 15:31 ` [PATCH v2 07/12] md: mddev_find_container helper function Artur Paszkiewicz
2016-12-07  1:23   ` NeilBrown
2016-12-05 15:31 ` [PATCH v2 08/12] md: expose rdev->sb_start as sysfs attribute Artur Paszkiewicz
2016-12-07  1:25   ` NeilBrown
2016-12-05 15:31 ` [PATCH v2 09/12] raid5-ppl: read PPL signature from IMSM metadata Artur Paszkiewicz
2016-12-07  1:25   ` NeilBrown
2016-12-07 14:38     ` Artur Paszkiewicz
2016-12-07 23:27       ` NeilBrown
2016-12-08 10:36         ` Artur Paszkiewicz
2016-12-05 15:31 ` [PATCH v2 10/12] raid5-ppl: recovery from dirty shutdown using PPL Artur Paszkiewicz
2016-12-05 15:31 ` Artur Paszkiewicz [this message]
2016-12-07  1:29   ` [PATCH v2 11/12] raid5-ppl: support disk add/remove with distributed PPL NeilBrown
2016-12-05 15:31 ` [PATCH v2 12/12] raid5-ppl: runtime PPL enabling or disabling Artur Paszkiewicz
2016-12-07  0:32 ` [PATCH v2 00/12] Partial Parity Log for MD RAID 5 NeilBrown
2016-12-07 14:36   ` Artur Paszkiewicz
2016-12-07 17:09     ` Shaohua Li
2016-12-13 15:25       ` Jes Sorensen
2016-12-14 19:47         ` Shaohua Li
2016-12-15 11:44           ` Artur Paszkiewicz
2016-12-16 23:24             ` Shaohua Li
2017-01-03 15:42               ` Jes Sorensen
2017-01-04  8:01                 ` Artur Paszkiewicz
2017-01-04 13:29                   ` Jes Sorensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161205153113.7268-12-artur.paszkiewicz@intel.com \
    --to=artur.paszkiewicz@intel.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=shli@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).