* parallel resync
@ 2003-09-29 17:51 Luca Berra
0 siblings, 0 replies; 5+ messages in thread
From: Luca Berra @ 2003-09-29 17:51 UTC (permalink / raw)
To: linux-raid
hello,
i have this problem
i have one server connected via two fc cards to two different storages.
the storages are intelligent so they handle raid internally and allow
mapping of one or more internal raid sets to different luns seen by the
host.
I use md to mirror between the two storages for disaster recovery
purposes.
The problem is that after an unclean shutdown (we had a big power outage
this weekend) all luns are resynced in parallel, thus bringing the
server and both storages down to their knees.
Having a look at match_mddev_units() in md.c (kernel 2.4) it seems to me
that raid code uses device major/minor number to determine if two md
devices are on the same underlying physical device.
from dev_unit():
mask = ~((1 << hd->minor_shift) - 1);
return MKDEV(MAJOR(dev), MINOR(dev) & mask);
in my case the logical drives are seen as different scsi devices by sd
layer, so all devices would appear to be on different disks, hence the
parallel resync effect.
I can change /proc/sys/dev/raid/speed_limit_max to a lower value to make
the server suffer less, but this won't stop the head trashing effect on
the storage.
Is there any way of having raid code use a different method for deciding
which devices are on the same physical device. i.e checking on which
scsi channel they appear.
I can, if i am short of option change match_mddev_units() to use a
different match_dev_unit() which uses a different dev_unit() which only
checks major, but i would have to hardcode a lot of stuff because sd
uses different major numbers (and i am thinking only of sd driver), or i
could add tunable with a kernel or module parameter that changes the
behaviour of md_do_sync()
smth like
recheck:
serialize = 0;
ITERATE_MDDEV(mddev2,tmp) {
if (mddev2 == mddev)
continue;
+ if (force_serialize) {
+ if (mddev2->curr_resync) {
+ printk(KERN_INFO "md: delaying resync of md%d until md%d "
+ "has finished resync (force_serialize=1)\n",
+ mdidx(mddev), mdidx(mddev2));
+ serialize = 1;
+ break;
+ }
+ } else
if (mddev2->curr_resync && match_mddev_units(mddev,mddev2)) {
printk(KERN_INFO "md: delaying resync of md%d until md%d "
"has finished resync (they share one or more physical units)\n",
mdidx(mddev), mdidx(mddev2));
serialize = 1;
break;
}
}
An other idea could be storing a container indicator in the md superblock that
can be initialized by mdadm.
comments?
L.
--
Luca Berra -- bluca@comedia.it
Communication Media & Services S.r.l.
/"\
\ / ASCII RIBBON CAMPAIGN
X AGAINST HTML MAIL
/ \
^ permalink raw reply [flat|nested] 5+ messages in thread* parallel resync
@ 2008-04-29 9:59 Bernd Schubert
2008-05-01 5:35 ` Neil Brown
0 siblings, 1 reply; 5+ messages in thread
From: Bernd Schubert @ 2008-04-29 9:59 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-raid, Dan Williams
Hello Neil,
I think in about December I already asked if you could include our parallel
resync patch. We need this patch, since we are doing software raid over
fast hardware raid devices and with these hardware raid system the cpu is
the bottleneck and not disk i/o.
Here is the previous thread: http://www.issociate.de/board/post/470063/[PATCH]_(2nd_try)_force_parallel_resync.html
Is there any chance you could add this patch as well, or if the patch is
not suitable yet, could you please tell me what needs to be done?
Index: linus/drivers/md/md.c
===================================================================
--- linus.orig/drivers/md/md.c
+++ linus/drivers/md/md.c
@@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock);
static void md_print_devices(void);
+static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
/*
@@ -2979,6 +2981,34 @@ degraded_show(mddev_t *mddev, char *page
static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
static ssize_t
+sync_force_parallel_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d\n", mddev->parallel_resync);
+}
+
+static ssize_t
+sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ char *e;
+ unsigned long n = simple_strtoul(buf, &e, 10);
+
+ if (!*buf || (*e && *e != '\n') || (n != 0 && n != 1))
+ return -EINVAL;
+
+ mddev->parallel_resync = n;
+
+ if (mddev->sync_thread) {
+ wake_up(&resync_wait);
+ }
+ return len;
+}
+
+/* force parallel resync, even with shared block devices */
+static struct md_sysfs_entry md_sync_force_parallel =
+__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
+ sync_force_parallel_show, sync_force_parallel_store);
+
+static ssize_t
sync_speed_show(mddev_t *mddev, char *page)
{
unsigned long resync, dt, db;
@@ -3153,6 +3183,7 @@ static struct attribute *md_redundancy_a
&md_sync_min.attr,
&md_sync_max.attr,
&md_sync_speed.attr,
+ &md_sync_force_parallel.attr,
&md_sync_completed.attr,
&md_max_sync.attr,
&md_suspend_lo.attr,
@@ -5413,8 +5444,6 @@ void md_allow_write(mddev_t *mddev)
}
EXPORT_SYMBOL_GPL(md_allow_write);
-static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
-
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
void md_do_sync(mddev_t *mddev)
@@ -5478,8 +5507,9 @@ void md_do_sync(mddev_t *mddev)
for_each_mddev(mddev2, tmp) {
if (mddev2 == mddev)
continue;
- if (mddev2->curr_resync &&
- match_mddev_units(mddev,mddev2)) {
+ if (!mddev->parallel_resync
+ && mddev2->curr_resync
+ && match_mddev_units(mddev,mddev2)) {
DEFINE_WAIT(wq);
if (mddev < mddev2 && mddev->curr_resync == 2) {
/* arbitrarily yield */
Index: linus/include/linux/raid/md_k.h
===================================================================
--- linus.orig/include/linux/raid/md_k.h
+++ linus/include/linux/raid/md_k.h
@@ -176,6 +176,9 @@ struct mddev_s
int sync_speed_min;
int sync_speed_max;
+ /* resync even though the same disks are shared among md-devices */
+ int parallel_resync;
+
int ok_start_degraded;
/* recovery/resync flags
* NEEDED: we might need to start a resync/recover
Thanks,
Bernd
--
Bernd Schubert
Q-Leap Networks GmbH
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: parallel resync
2008-04-29 9:59 Bernd Schubert
@ 2008-05-01 5:35 ` Neil Brown
2008-05-01 21:17 ` Bernd Schubert
0 siblings, 1 reply; 5+ messages in thread
From: Neil Brown @ 2008-05-01 5:35 UTC (permalink / raw)
To: Bernd Schubert; +Cc: linux-raid, Dan Williams
On Tuesday April 29, bs@q-leap.de wrote:
> Hello Neil,
>
> I think in about December I already asked if you could include our parallel
> resync patch. We need this patch, since we are doing software raid over
> fast hardware raid devices and with these hardware raid system the cpu is
> the bottleneck and not disk i/o.
>
> Here is the previous thread: http://www.issociate.de/board/post/470063/[PATCH]_(2nd_try)_force_parallel_resync.html
>
> Is there any chance you could add this patch as well, or if the patch is
> not suitable yet, could you please tell me what needs to be done?
>
Sorry for not responding earlier....
I seem to remember the setting used to be global, but I see in this
patch it is per-array -- which makes much more sense.
Though it brings up an interesting question. If two arrays share a
device, and exactly one of them is flagged for parallel sync, does
that make sense?
Maybe it does. I suspect the reality is that it is individual devices
that should be flagged for parallel sync or not, and the setting on
the array is just an "and" of the settings for the devices in the
array. So different settings on arrays which share one device can
make sense...
Note: I'm *not* suggesting that the setting should be moved to the
component devices - that would be too clumsy. I'm just musing.
If get 3 errors and 2 warnings from ./scripts/checkpatch.pl. If you
fix those I'll take the patch.
Thanks,
NeilBrown
ERROR: code indent should use tabs where possible
#85: FILE: drivers/md/md.c:2986:
+ return sprintf(page, "%d\n", mddev->parallel_resync);$
WARNING: consider using strict_strtoul in preference to simple_strtoul
#92: FILE: drivers/md/md.c:2993:
+ unsigned long n = simple_strtoul(buf, &e, 10);
WARNING: braces {} are not necessary for single statement blocks
#99: FILE: drivers/md/md.c:3000:
+ if (mddev->sync_thread) {
+ wake_up(&resync_wait);
+ }
ERROR: space required after that ',' (ctx:VxV)
#139: FILE: drivers/md/md.c:5512:
+ && match_mddev_units(mddev,mddev2)) {
^
ERROR: Missing Signed-off-by: line(s)
total: 3 errors, 2 warnings, 77 lines checked
>
> Index: linus/drivers/md/md.c
> ===================================================================
> --- linus.orig/drivers/md/md.c
> +++ linus/drivers/md/md.c
> @@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock);
>
> static void md_print_devices(void);
>
> +static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
> +
> #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
>
> /*
> @@ -2979,6 +2981,34 @@ degraded_show(mddev_t *mddev, char *page
> static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
>
> static ssize_t
> +sync_force_parallel_show(mddev_t *mddev, char *page)
> +{
> + return sprintf(page, "%d\n", mddev->parallel_resync);
> +}
> +
> +static ssize_t
> +sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
> +{
> + char *e;
> + unsigned long n = simple_strtoul(buf, &e, 10);
> +
> + if (!*buf || (*e && *e != '\n') || (n != 0 && n != 1))
> + return -EINVAL;
> +
> + mddev->parallel_resync = n;
> +
> + if (mddev->sync_thread) {
> + wake_up(&resync_wait);
> + }
> + return len;
> +}
> +
> +/* force parallel resync, even with shared block devices */
> +static struct md_sysfs_entry md_sync_force_parallel =
> +__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
> + sync_force_parallel_show, sync_force_parallel_store);
> +
> +static ssize_t
> sync_speed_show(mddev_t *mddev, char *page)
> {
> unsigned long resync, dt, db;
> @@ -3153,6 +3183,7 @@ static struct attribute *md_redundancy_a
> &md_sync_min.attr,
> &md_sync_max.attr,
> &md_sync_speed.attr,
> + &md_sync_force_parallel.attr,
> &md_sync_completed.attr,
> &md_max_sync.attr,
> &md_suspend_lo.attr,
> @@ -5413,8 +5444,6 @@ void md_allow_write(mddev_t *mddev)
> }
> EXPORT_SYMBOL_GPL(md_allow_write);
>
> -static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
> -
> #define SYNC_MARKS 10
> #define SYNC_MARK_STEP (3*HZ)
> void md_do_sync(mddev_t *mddev)
> @@ -5478,8 +5507,9 @@ void md_do_sync(mddev_t *mddev)
> for_each_mddev(mddev2, tmp) {
> if (mddev2 == mddev)
> continue;
> - if (mddev2->curr_resync &&
> - match_mddev_units(mddev,mddev2)) {
> + if (!mddev->parallel_resync
> + && mddev2->curr_resync
> + && match_mddev_units(mddev,mddev2)) {
> DEFINE_WAIT(wq);
> if (mddev < mddev2 && mddev->curr_resync == 2) {
> /* arbitrarily yield */
> Index: linus/include/linux/raid/md_k.h
> ===================================================================
> --- linus.orig/include/linux/raid/md_k.h
> +++ linus/include/linux/raid/md_k.h
> @@ -176,6 +176,9 @@ struct mddev_s
> int sync_speed_min;
> int sync_speed_max;
>
> + /* resync even though the same disks are shared among md-devices */
> + int parallel_resync;
> +
> int ok_start_degraded;
> /* recovery/resync flags
> * NEEDED: we might need to start a resync/recover
>
>
>
> Thanks,
> Bernd
>
>
> --
> Bernd Schubert
> Q-Leap Networks GmbH
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: parallel resync
2008-05-01 5:35 ` Neil Brown
@ 2008-05-01 21:17 ` Bernd Schubert
2008-05-02 0:28 ` Neil Brown
0 siblings, 1 reply; 5+ messages in thread
From: Bernd Schubert @ 2008-05-01 21:17 UTC (permalink / raw)
To: Neil Brown; +Cc: linux-raid, Dan Williams
On Thursday 01 May 2008 07:35:32 Neil Brown wrote:
> On Tuesday April 29, bs@q-leap.de wrote:
>
> Sorry for not responding earlier....
No problem, we are all always overly busy ;)
>
> I seem to remember the setting used to be global, but I see in this
> patch it is per-array -- which makes much more sense.
> Though it brings up an interesting question. If two arrays share a
> device, and exactly one of them is flagged for parallel sync, does
> that make sense?
>
> Maybe it does. I suspect the reality is that it is individual devices
> that should be flagged for parallel sync or not, and the setting on
> the array is just an "and" of the settings for the devices in the
> array. So different settings on arrays which share one device can
> make sense...
>
> Note: I'm *not* suggesting that the setting should be moved to the
> component devices - that would be too clumsy. I'm just musing.
>
> If get 3 errors and 2 warnings from ./scripts/checkpatch.pl. If you
> fix those I'll take the patch.
Below is a new version of the patch. If kmail should break something, I
also uploaded it here:
http://www.pci.uni-heidelberg.de/tc/usr/bernd/downloads/md/parallel_resync.patch
Unfortunately I presently don't have any non-production system I could
easily reboot to test the new version on.
Thanks,
Bernd
Allow parallel resync of md-devices.
Signed-off-by: Bernd Schubert <bs@q-leap.de>
Index: linus/drivers/md/md.c
===================================================================
--- linus.orig/drivers/md/md.c
+++ linus/drivers/md/md.c
@@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock);
static void md_print_devices(void);
+static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
/*
@@ -2979,6 +2981,36 @@ degraded_show(mddev_t *mddev, char *page
static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
static ssize_t
+sync_force_parallel_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d\n", mddev->parallel_resync);
+}
+
+static ssize_t
+sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ long n;
+
+ if (strict_strtol(buf, 10, &n))
+ return -EINVAL;
+
+ if (n != 0 && n != 1)
+ return -EINVAL;
+
+ mddev->parallel_resync = n;
+
+ if (mddev->sync_thread)
+ wake_up(&resync_wait);
+
+ return len;
+}
+
+/* force parallel resync, even with shared block devices */
+static struct md_sysfs_entry md_sync_force_parallel =
+__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
+ sync_force_parallel_show, sync_force_parallel_store);
+
+static ssize_t
sync_speed_show(mddev_t *mddev, char *page)
{
unsigned long resync, dt, db;
@@ -3153,6 +3185,7 @@ static struct attribute *md_redundancy_a
&md_sync_min.attr,
&md_sync_max.attr,
&md_sync_speed.attr,
+ &md_sync_force_parallel.attr,
&md_sync_completed.attr,
&md_max_sync.attr,
&md_suspend_lo.attr,
@@ -5413,8 +5446,6 @@ void md_allow_write(mddev_t *mddev)
}
EXPORT_SYMBOL_GPL(md_allow_write);
-static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
-
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
void md_do_sync(mddev_t *mddev)
@@ -5478,8 +5509,9 @@ void md_do_sync(mddev_t *mddev)
for_each_mddev(mddev2, tmp) {
if (mddev2 == mddev)
continue;
- if (mddev2->curr_resync &&
- match_mddev_units(mddev,mddev2)) {
+ if (!mddev->parallel_resync
+ && mddev2->curr_resync
+ && match_mddev_units(mddev, mddev2)) {
DEFINE_WAIT(wq);
if (mddev < mddev2 && mddev->curr_resync == 2) {
/* arbitrarily yield */
Index: linus/include/linux/raid/md_k.h
===================================================================
--- linus.orig/include/linux/raid/md_k.h
+++ linus/include/linux/raid/md_k.h
@@ -176,6 +176,9 @@ struct mddev_s
int sync_speed_min;
int sync_speed_max;
+ /* resync even though the same disks are shared among md-devices */
+ int parallel_resync;
+
int ok_start_degraded;
/* recovery/resync flags
* NEEDED: we might need to start a resync/recover
--
Bernd Schubert
Q-Leap Networks GmbH
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-05-02 0:28 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-09-29 17:51 parallel resync Luca Berra
-- strict thread matches above, loose matches on Subject: below --
2008-04-29 9:59 Bernd Schubert
2008-05-01 5:35 ` Neil Brown
2008-05-01 21:17 ` Bernd Schubert
2008-05-02 0:28 ` Neil Brown
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).