From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de
Cc: ed.ciechanowski@intel.com, marcin.labun@intel.com,
linux-raid@vger.kernel.org
Subject: [PATCH 2/2] md: add 'recovery_start' sysfs attribute
Date: Sat, 12 Dec 2009 21:17:12 -0700 [thread overview]
Message-ID: <20091213041711.12532.26335.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20091213041123.12532.15225.stgit@dwillia2-linux.ch.intel.com>
Enable external metadata arrays to manage rebuild checkpointing via a
md/recovery_start attribute that overrides rdev->recovery_offset.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
Documentation/md.txt | 15 +++++++++--
drivers/md/md.c | 69 +++++++++++++++++++++++++++++++++++++++++++-------
drivers/md/md.h | 1 +
3 files changed, 72 insertions(+), 13 deletions(-)
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 4edd39e..2b03814 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -233,9 +233,18 @@ All md devices contain:
resync_start
The point at which resync should start. If no resync is needed,
- this will be a very large number. At array creation it will
- default to 0, though starting the array as 'clean' will
- set it much larger.
+ this will be a very large number (or 'none' since 2.6.30-rc1). At
+ array creation it will default to 0, though starting the array as
+ 'clean' will set it much larger.
+
+ recovery_start
+ The point at which recovery should start when rebuilding a degraded
+ array member. This value overrides the 'recovery_offset' read from
+ the metadata. Setting this value to zero tells md to use/report
+ the default recovery_offset read from the metadata. This value
+ auto-resets itself to zero (default recovery_offset) after it has
+ been consumed by the recovery process. This value cannot be
+ changed while a recovery is in-flight.
new_dev
This file can be written but not read. The value written should
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3e8fb67..5f09d40 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2983,6 +2983,56 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
static struct md_sysfs_entry md_resync_start =
__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
+static sector_t md_recovery_offset(mddev_t *mddev)
+{
+ /* this is sometimes called outside mddev_lock() hence the
+ * rcu_read_lock()
+ */
+ sector_t recovery_offset = MaxSector;
+ mdk_rdev_t *rdev;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Faulty, &rdev->flags) &&
+ !test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < recovery_offset)
+ recovery_offset = rdev->recovery_offset;
+ rcu_read_unlock();
+
+ return recovery_offset;
+}
+
+static ssize_t recovery_start_show(mddev_t *mddev, char *page)
+{
+ unsigned long long recovery_start = mddev->recovery_start;
+
+ if (recovery_start == 0)
+ recovery_start = md_recovery_offset(mddev);
+
+ if (recovery_start == MaxSector)
+ return sprintf(page, "none\n");
+
+ return sprintf(page, "%llu\n", recovery_start);
+}
+
+static ssize_t recovery_start_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ unsigned long long recovery_start;
+
+ if (strict_strtoull(buf, 10, &recovery_start))
+ return -EINVAL;
+
+ if (!mddev->ro || !mddev->degraded || md_recovery_offset(mddev) > 0)
+ return -EBUSY;
+
+ mddev->recovery_start = recovery_start;
+ return len;
+}
+
+static struct md_sysfs_entry md_recovery_start =
+__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
+
/*
* The array state can be:
*
@@ -3788,6 +3838,7 @@ static struct attribute *md_default_attrs[] = {
&md_chunk_size.attr,
&md_size.attr,
&md_resync_start.attr,
+ &md_recovery_start.attr,
&md_metadata.attr,
&md_new_device.attr,
&md_safe_delay.attr,
@@ -4426,6 +4477,7 @@ out:
mddev->dev_sectors = 0;
mddev->raid_disks = 0;
mddev->recovery_cp = 0;
+ mddev->recovery_start = 0;
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->reshape_position = MaxSector;
@@ -6338,18 +6390,15 @@ void md_do_sync(mddev_t *mddev)
} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
max_sectors = mddev->dev_sectors;
- else {
+ else if (mddev->recovery_start) {
+ /* userspace requested override of rdev->recovery_offset */
+ max_sectors = mddev->dev_sectors;
+ j = mddev->recovery_start;
+ mddev->recovery_start = 0;
+ } else {
/* recovery follows the physical size of devices */
max_sectors = mddev->dev_sectors;
- j = MaxSector;
- rcu_read_lock();
- list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
- if (rdev->raid_disk >= 0 &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset < j)
- j = rdev->recovery_offset;
- rcu_read_unlock();
+ j = md_recovery_offset(mddev);
}
printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f184b69..03a18b4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -252,6 +252,7 @@ struct mddev_s
atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait;
sector_t recovery_cp;
+ sector_t recovery_start; /* override rdev->recovery_offset */
sector_t resync_min; /* user requested sync
* starts here */
sector_t resync_max; /* resync should pause
next prev parent reply other threads:[~2009-12-13 4:17 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-12-13 4:17 [GIT PATCH 0/2] external-metadata recovery checkpointing for 2.6.33 Dan Williams
2009-12-13 4:17 ` [PATCH 1/2] md: rcu_read_lock() walk of mddev->disks in md_do_sync() Dan Williams
2009-12-13 4:17 ` Dan Williams [this message]
2009-12-14 4:07 ` [GIT PATCH 0/2] external-metadata recovery checkpointing for 2.6.33 Neil Brown
2009-12-14 4:49 ` Dan Williams
2009-12-14 5:35 ` Neil Brown
2009-12-15 0:37 ` Dan Williams
2009-12-15 4:19 ` Dan Williams
2009-12-15 18:03 ` Dan Williams
2009-12-16 5:16 ` Neil Brown
2009-12-16 6:24 ` Dan Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091213041711.12532.26335.stgit@dwillia2-linux.ch.intel.com \
--to=dan.j.williams@intel.com \
--cc=ed.ciechanowski@intel.com \
--cc=linux-raid@vger.kernel.org \
--cc=marcin.labun@intel.com \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).