From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [PATCH 15/18] md: hopefully enable suspend/resume of md devices.
Date: Thu, 12 Feb 2009 14:10:11 +1100 [thread overview]
Message-ID: <20090212031011.23983.37119.stgit@notabene.brown> (raw)
In-Reply-To: <20090212031009.23983.14496.stgit@notabene.brown>
---
drivers/md/md.c | 79 ++++++++++++++++++++++++++++++++++++---------
include/linux/raid/md_k.h | 2 +
2 files changed, 66 insertions(+), 15 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index cbe0f20..0e0e1ff 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -202,12 +202,68 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
)
-static int md_fail_request(struct request_queue *q, struct bio *bio)
+/* Rather than calling directly into the personality make_request function,
+ * IO requests come here first so that we can check if the device is
+ * being suspended pending a reconfiguration.
+ * We hold a refcount over the call to ->make_request. By the time that
+ * call has finished, the bio has been linked into some internal structure
+ * and so is visible to ->quiesce(), so we don't need the refcount any more.
+ */
+static int md_make_request(struct request_queue *q, struct bio *bio)
{
- bio_io_error(bio);
- return 0;
+ mddev_t *mddev = q->queuedata;
+ int rv;
+ if (mddev == NULL || mddev->pers == NULL) {
+ bio_io_error(bio);
+ return 0;
+ }
+ rcu_read_lock();
+ if (mddev->suspended) {
+ DEFINE_WAIT(__wait);
+ for(;;) {
+ prepare_to_wait(&mddev->sb_wait, &__wait,
+ TASK_UNINTERRUPTIBLE);
+ if (!mddev->suspended)
+ break;
+ rcu_read_unlock();
+ schedule();
+ rcu_read_lock();
+ }
+ finish_wait(&mddev->sb_wait, &__wait);
+ }
+ atomic_inc(&mddev->active_io);
+ rcu_read_unlock();
+ rv = mddev->pers->make_request(q, bio);
+ if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
+ wake_up(&mddev->sb_wait);
+
+ return rv;
}
+static void mddev_suspend(mddev_t *mddev)
+{
+ BUG_ON(mddev->suspended);
+ mddev->suspended = 1;
+ synchronize_rcu();
+ wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
+ mddev->pers->quiesce(mddev, 1);
+ md_unregister_thread(mddev->thread);
+ mddev->thread = NULL;
+ /* we now know that no code is executing in the personality module,
+ * except possibly the tail end of a ->bi_end_io function, but that
+ * is certain to complete before the module has a chance to get
+ * unloaded
+ */
+}
+
+static void mddev_resume(mddev_t *mddev)
+{
+ mddev->suspended = 0;
+ wake_up(&mddev->sb_wait);
+ mddev->pers->quiesce(mddev, 0);
+}
+
+
static inline mddev_t *mddev_get(mddev_t *mddev)
{
atomic_inc(&mddev->active);
@@ -315,6 +371,7 @@ static mddev_t * mddev_find(dev_t unit)
init_timer(&new->safemode_timer);
atomic_set(&new->active, 1);
atomic_set(&new->openers, 0);
+ atomic_set(&new->active_io, 0);
spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
init_waitqueue_head(&new->recovery_wait);
@@ -3597,10 +3654,12 @@ static int md_alloc(dev_t dev, char *name)
mddev_put(mddev);
return -ENOMEM;
}
+ mddev->queue->queuedata = mddev;
+
/* Can be unlocked because the queue is new: no concurrency */
queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
- blk_queue_make_request(mddev->queue, md_fail_request);
+ blk_queue_make_request(mddev->queue, md_make_request);
disk = alloc_disk(1 << shift);
if (!disk) {
@@ -3896,16 +3955,6 @@ static int do_md_run(mddev_t * mddev)
set_capacity(disk, mddev->array_sectors);
- /* If we call blk_queue_make_request here, it will
- * re-initialise max_sectors etc which may have been
- * refined inside -> run. So just set the bits we need to set.
- * Most initialisation happended when we called
- * blk_queue_make_request(..., md_fail_request)
- * earlier.
- */
- mddev->queue->queuedata = mddev;
- mddev->queue->make_request_fn = mddev->pers->make_request;
-
/* If there is a partially-recovered drive we need to
* start recovery here. If we leave it to md_check_recovery,
* it will remove the drives and not do the right thing
@@ -4035,7 +4084,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
md_super_wait(mddev);
if (mddev->ro)
set_disk_ro(disk, 0);
- blk_queue_make_request(mddev->queue, md_fail_request);
+
mddev->pers->stop(mddev);
mddev->queue->merge_bvec_fn = NULL;
mddev->queue->unplug_fn = NULL;
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 9598155..a815bab 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -132,6 +132,8 @@ struct mddev_s
#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2 /* superblock update in progress */
+ int suspended;
+ atomic_t active_io;
int ro;
struct gendisk *gendisk;
next prev parent reply other threads:[~2009-02-12 3:10 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-02-12 3:10 [PATCH 00/18] Assorted md patches headed for 2.6.30 NeilBrown
2009-02-12 3:10 ` [PATCH 01/18] md: never clear bit from the write-intent bitmap when the array is degraded NeilBrown
2009-02-12 3:10 ` [PATCH 08/18] md/raid5: change raid5_compute_sector and stripe_to_pdidx to take a 'previous' argument NeilBrown
2009-02-12 3:10 ` [PATCH 04/18] md: be more consistent about setting WriteMostly flag when adding a drive to an array NeilBrown
2009-02-12 3:10 ` [PATCH 07/18] md/raid5: simplify interface for init_stripe and get_active_stripe NeilBrown
2009-02-12 3:10 ` [PATCH 05/18] md: Make mddev->size sector-based NeilBrown
2009-02-12 3:10 ` [PATCH 02/18] md: write bitmap information to devices that are undergoing recovery NeilBrown
2009-02-12 3:10 ` [PATCH 06/18] md: Represent raid device size in sectors NeilBrown
2009-02-12 3:10 ` [PATCH 03/18] md: occasionally checkpoint drive recovery to reduce duplicate effort after a crash NeilBrown
2009-02-12 17:26 ` John Stoffel
2009-02-13 16:20 ` Bill Davidsen
2009-02-13 16:34 ` Jon Nelson
2009-02-12 3:10 ` [PATCH 17/18] md: add ->takeover method for raid5 to be able to take over raid1 NeilBrown
2009-02-12 3:10 ` [PATCH 16/18] md: add ->takeover method to support changing the personality managing an array NeilBrown
2009-02-12 3:10 ` [PATCH 10/18] md/raid5: simplify raid5_compute_sector interface NeilBrown
2009-02-12 3:10 ` [PATCH 09/18] md/raid6: remove expectation that Q device is immediately after P device NeilBrown
2009-02-12 16:56 ` Andre Noll
2009-02-13 22:19 ` Dan Williams
2009-02-16 0:08 ` Neil Brown
2009-02-13 16:37 ` Bill Davidsen
2009-02-16 5:15 ` Neil Brown
2009-02-12 3:10 ` [PATCH 14/18] md: md_unregister_thread should cope with being passed NULL NeilBrown
2009-02-12 3:10 ` [PATCH 11/18] md/raid5: Add support for new layouts for raid5 and raid6 NeilBrown
2009-02-12 3:10 ` [PATCH 18/18] md/raid5: allow layout/chunksize to be changed on an active2-drive raid5 NeilBrown
2009-02-12 3:10 ` [PATCH 13/18] md/raid5: refactor raid5 "run" NeilBrown
2009-02-12 3:10 ` NeilBrown [this message]
2009-02-12 3:10 ` [PATCH 12/18] md/raid5: finish support for DDF/raid6 NeilBrown
2009-02-12 8:11 ` [PATCH 00/18] Assorted md patches headed for 2.6.30 Keld Jørn Simonsen
2009-02-12 9:13 ` Steve Fairbairn
2009-02-12 9:46 ` Keld Jørn Simonsen
2009-02-12 10:52 ` NeilBrown
2009-02-12 11:16 ` Keld Jørn Simonsen
2009-02-12 10:53 ` Julian Cowley
2009-02-13 16:54 ` Bill Davidsen
2009-02-16 5:35 ` Neil Brown
2009-02-16 17:31 ` Nagilum
2009-02-12 22:57 ` Dan Williams
2009-02-13 16:56 ` Bill Davidsen
2009-02-12 9:21 ` NeilBrown
2009-02-12 9:53 ` Keld Jørn Simonsen
2009-02-12 10:45 ` NeilBrown
2009-02-12 11:11 ` Keld Jørn Simonsen
2009-02-12 15:28 ` Wil Reichert
2009-02-12 17:44 ` Keld Jørn Simonsen
2009-02-12 9:42 ` Farkas Levente
2009-02-12 10:40 ` NeilBrown
2009-02-12 11:17 ` Farkas Levente
2009-02-13 17:02 ` Bill Davidsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090212031011.23983.37119.stgit@notabene.brown \
--to=neilb@suse.de \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.