linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] md - 1 of 2 - Use "shedule_timeout(2)" instead of yield() as it seems to wait for less time.
  2004-03-05  5:45 [PATCH] md - 0 of 2 - Introduction NeilBrown
  2004-03-05  5:45 ` [PATCH] md - 2 of 2 - Allow assembling of partitioned arrays at boot time NeilBrown
@ 2004-03-05  5:45 ` NeilBrown
  2004-03-05  6:16   ` Andrew Morton
  2004-03-05  8:42 ` [PATCH] md - 0 of 2 - Introduction Clemens Schwaighofer
  2 siblings, 1 reply; 7+ messages in thread
From: NeilBrown @ 2004-03-05  5:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


 ----------- Diffstat output ------------
 ./drivers/md/raid5.c     |    3 ++-
 ./drivers/md/raid6main.c |    3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~	2004-03-05 16:43:07.000000000 +1100
+++ ./drivers/md/raid5.c	2004-03-05 16:43:07.000000000 +1100
@@ -1409,7 +1409,8 @@ static int sync_request (mddev_t *mddev,
 		/* make sure we don't swamp the stripe cache if someone else
 		 * is trying to get access 
 		 */
-		yield();
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(2);
 	}
 	spin_lock(&sh->lock);	
 	set_bit(STRIPE_SYNCING, &sh->state);

diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~	2004-03-05 16:43:07.000000000 +1100
+++ ./drivers/md/raid6main.c	2004-03-05 16:43:07.000000000 +1100
@@ -1571,7 +1571,8 @@ static int sync_request (mddev_t *mddev,
 		/* make sure we don't swamp the stripe cache if someone else
 		 * is trying to get access
 		 */
-		yield();
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(2);
 	}
 	spin_lock(&sh->lock);
 	set_bit(STRIPE_SYNCING, &sh->state);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] md - 0 of 2 - Introduction
@ 2004-03-05  5:45 NeilBrown
  2004-03-05  5:45 ` [PATCH] md - 2 of 2 - Allow assembling of partitioned arrays at boot time NeilBrown
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: NeilBrown @ 2004-03-05  5:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


Here are two patches for md that I believe are suitable
for 2.6.4.

1/ change yield() to schedule_timeout(2) as promised a while back.

2/ Support assembling partitionable raid at boot-time so that it is
   possible to boot from a partition of a raid array without needing
   an initramfs.

Thanks,
NeilBrown


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] md - 2 of 2 - Allow assembling of partitioned arrays at boot time.
  2004-03-05  5:45 [PATCH] md - 0 of 2 - Introduction NeilBrown
@ 2004-03-05  5:45 ` NeilBrown
  2004-03-05  5:45 ` [PATCH] md - 1 of 2 - Use "shedule_timeout(2)" instead of yield() as it seems to wait for less time NeilBrown
  2004-03-05  8:42 ` [PATCH] md - 0 of 2 - Introduction Clemens Schwaighofer
  2 siblings, 0 replies; 7+ messages in thread
From: NeilBrown @ 2004-03-05  5:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


kernel parameters:
   raid=partitionable
     will make all auto-detected md arrays partitionable
   md=d....
     will assemble an array as a partitionable array.

 ----------- Diffstat output ------------
 ./drivers/md/md.c     |   19 ++++++---
 ./init/do_mounts_md.c |   99 +++++++++++++++++++++++++++++---------------------
 2 files changed, 70 insertions(+), 48 deletions(-)

diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~	2004-03-05 16:43:23.000000000 +1100
+++ ./drivers/md/md.c	2004-03-05 16:43:24.000000000 +1100
@@ -57,7 +57,7 @@
 
 
 #ifndef MODULE
-static void autostart_arrays (void);
+static void autostart_arrays (int part);
 #endif
 
 static mdk_personality_t *pers[MAX_PERSONALITY];
@@ -1792,7 +1792,7 @@ static void autorun_array(mddev_t *mddev
  *
  * If "unit" is allocated, then bump its reference count
  */
-static void autorun_devices(void)
+static void autorun_devices(int part)
 {
 	struct list_head candidates;
 	struct list_head *tmp;
@@ -1825,7 +1825,12 @@ static void autorun_devices(void)
 			       bdevname(rdev0->bdev, b), rdev0->preferred_minor);
 			break;
 		}
-		dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
+		if (part)
+			dev = MKDEV(mdp_major,
+				    rdev0->preferred_minor << MdpMinorShift);
+		else
+			dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
+
 		md_probe(dev, NULL, NULL);
 		mddev = mddev_find(dev);
 		if (!mddev) {
@@ -1922,7 +1927,7 @@ static int autostart_array(dev_t startde
 	/*
 	 * possibly return codes
 	 */
-	autorun_devices();
+	autorun_devices(0);
 	return 0;
 
 }
@@ -2407,7 +2412,7 @@ static int md_ioctl(struct inode *inode,
 #ifndef MODULE
 		case RAID_AUTORUN:
 			err = 0;
-			autostart_arrays();
+			autostart_arrays(arg);
 			goto done;
 #endif
 		default:;
@@ -3577,7 +3582,7 @@ void md_autodetect_dev(dev_t dev)
 }
 
 
-static void autostart_arrays(void)
+static void autostart_arrays(int part)
 {
 	char b[BDEVNAME_SIZE];
 	mdk_rdev_t *rdev;
@@ -3602,7 +3607,7 @@ static void autostart_arrays(void)
 	}
 	dev_cnt = 0;
 
-	autorun_devices();
+	autorun_devices(part);
 }
 
 #endif

diff ./init/do_mounts_md.c~current~ ./init/do_mounts_md.c
--- ./init/do_mounts_md.c~current~	2004-03-05 16:43:23.000000000 +1100
+++ ./init/do_mounts_md.c	2004-03-05 16:43:24.000000000 +1100
@@ -12,14 +12,17 @@
  * The code for that is here.
  */
 
-static int __initdata raid_noautodetect;
+static int __initdata raid_noautodetect, raid_autopart;
 
 static struct {
-	char device_set [MAX_MD_DEVS];
-	int pers[MAX_MD_DEVS];
-	int chunk[MAX_MD_DEVS];
-	char *device_names[MAX_MD_DEVS];
-} md_setup_args __initdata;
+	int minor;
+	int partitioned;
+	int pers;
+	int chunk;
+	char *device_names;
+} md_setup_args[MAX_MD_DEVS] __initdata;
+
+static int md_setup_ents __initdata;
 
 /*
  * Parse the command-line parameters given our kernel, but do not
@@ -43,21 +46,37 @@ static struct {
  */
 static int __init md_setup(char *str)
 {
-	int minor, level, factor, fault, pers;
+	int minor, level, factor, fault, pers, partitioned = 0;
 	char *pername = "";
-	char *str1 = str;
+	char *str1;
+	int ent;
 
+	if (*str == 'd') {
+		partitioned = 1;
+		str++;
+	}
 	if (get_option(&str, &minor) != 2) {	/* MD Number */
 		printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
 		return 0;
 	}
+	str1 = str;
 	if (minor >= MAX_MD_DEVS) {
 		printk(KERN_WARNING "md: md=%d, Minor device number too high.\n", minor);
 		return 0;
-	} else if (md_setup_args.device_names[minor]) {
-		printk(KERN_WARNING "md: md=%d, Specified more than once. "
-		       "Replacing previous definition.\n", minor);
 	}
+	for (ent=0 ; ent< md_setup_ents ; ent++) 
+		if (md_setup_args[ent].minor == minor &&
+		    md_setup_args[ent].partitioned == partitioned) {
+			printk(KERN_WARNING "md: md=%s%d, Specified more than once. "
+			       "Replacing previous definition.\n", partitioned?"d":"", minor);
+			break;
+		}
+	if (ent >= MAX_MD_DEVS) {
+		printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor);
+		return 0;
+	}
+	if (ent >= md_setup_ents)
+		md_setup_ents++;
 	switch (get_option(&str, &level)) {	/* RAID Personality */
 	case 2: /* could be 0 or -1.. */
 		if (level == 0 || level == LEVEL_LINEAR) {
@@ -66,24 +85,16 @@ static int __init md_setup(char *str)
 				printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
 				return 0;
 			}
-			md_setup_args.pers[minor] = level;
-			md_setup_args.chunk[minor] = 1 << (factor+12);
-			switch(level) {
-			case LEVEL_LINEAR:
+			md_setup_args[ent].pers = level;
+			md_setup_args[ent].chunk = 1 << (factor+12);
+			if (level ==  LEVEL_LINEAR) {
 				pers = LINEAR;
 				pername = "linear";
-				break;
-			case 0:
+			} else {
 				pers = RAID0;
 				pername = "raid0";
-				break;
-			default:
-				printk(KERN_WARNING
-				       "md: The kernel has not been configured for raid%d support!\n",
-				       level);
-				return 0;
 			}
-			md_setup_args.pers[minor] = pers;
+			md_setup_args[ent].pers = pers;
 			break;
 		}
 		/* FALL THROUGH */
@@ -91,35 +102,38 @@ static int __init md_setup(char *str)
 		str = str1;
 		/* FALL THROUGH */
 	case 0:
-		md_setup_args.pers[minor] = 0;
+		md_setup_args[ent].pers = 0;
 		pername="super-block";
 	}
 
 	printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n",
 		minor, pername, str);
-	md_setup_args.device_names[minor] = str;
+	md_setup_args[ent].device_names = str;
+	md_setup_args[ent].partitioned = partitioned;
+	md_setup_args[ent].minor = minor;
 
 	return 1;
 }
 
 static void __init md_setup_drive(void)
 {
-	int minor, i;
+	int minor, i, ent, partitioned;
 	dev_t dev;
 	dev_t devices[MD_SB_DISKS+1];
 
-	for (minor = 0; minor < MAX_MD_DEVS; minor++) {
+	for (ent = 0; ent < md_setup_ents ; ent++) {
 		int fd;
 		int err = 0;
 		char *devname;
 		mdu_disk_info_t dinfo;
 		char name[16], devfs_name[16];
 
-		if (!(devname = md_setup_args.device_names[minor]))
-			continue;
-		
-		sprintf(name, "/dev/md%d", minor);
-		sprintf(devfs_name, "/dev/md/%d", minor);
+		minor = md_setup_args[ent].minor;
+		partitioned = md_setup_args[ent].partitioned;
+		devname = md_setup_args[ent].device_names;
+
+		sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
+		sprintf(devfs_name, "/dev/md/%s%d", partitioned?"d":"", minor);
 		create_dev(name, MKDEV(MD_MAJOR, minor), devfs_name);
 		for (i = 0; i < MD_SB_DISKS && devname != 0; i++) {
 			char *p;
@@ -143,20 +157,19 @@ static void __init md_setup_drive(void)
 			}
 
 			devices[i] = dev;
-			md_setup_args.device_set[minor] = 1;
 
 			devname = p;
 		}
 		devices[i] = 0;
 
-		if (!md_setup_args.device_set[minor])
+		if (!i)
 			continue;
 
-		printk(KERN_INFO "md: Loading md%d: %s\n", minor, md_setup_args.device_names[minor]);
+		printk(KERN_INFO "md: Loading md%s%d: %s\n", partitioned?"_d":"", minor, md_setup_args[ent].device_names);
 
 		fd = open(name, 0, 0);
 		if (fd < 0) {
-			printk(KERN_ERR "md: open failed - cannot start array %d\n", minor);
+			printk(KERN_ERR "md: open failed - cannot start array %s\n", name);
 			continue;
 		}
 		if (sys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
@@ -167,10 +180,10 @@ static void __init md_setup_drive(void)
 			continue;
 		}
 
-		if (md_setup_args.pers[minor]) {
+		if (md_setup_args[ent].pers) {
 			/* non-persistent */
 			mdu_array_info_t ainfo;
-			ainfo.level = pers_to_level(md_setup_args.pers[minor]);
+			ainfo.level = pers_to_level(md_setup_args[ent].pers);
 			ainfo.size = 0;
 			ainfo.nr_disks =0;
 			ainfo.raid_disks =0;
@@ -181,7 +194,7 @@ static void __init md_setup_drive(void)
 
 			ainfo.state = (1 << MD_SB_CLEAN);
 			ainfo.layout = 0;
-			ainfo.chunk_size = md_setup_args.chunk[minor];
+			ainfo.chunk_size = md_setup_args[ent].chunk;
 			err = sys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
 			for (i = 0; !err && i <= MD_SB_DISKS; i++) {
 				dev = devices[i];
@@ -229,6 +242,10 @@ static int __init raid_setup(char *str)
 
 		if (!strncmp(str, "noautodetect", wlen))
 			raid_noautodetect = 1;
+		if (strncmp(str, "partitionable", wlen)==0)
+			raid_autopart = 1;
+		if (strncmp(str, "part", wlen)==0)
+			raid_autopart = 1;
 		pos += wlen+1;
 	}
 	return 1;
@@ -245,7 +262,7 @@ void __init md_run_setup(void)
 	else {
 		int fd = open("/dev/md0", 0, 0);
 		if (fd >= 0) {
-			sys_ioctl(fd, RAID_AUTORUN, 0);
+			sys_ioctl(fd, RAID_AUTORUN, raid_autopart);
 			close(fd);
 		}
 	}

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] md - 1 of 2 - Use "shedule_timeout(2)" instead of yield() as it seems to wait for less time.
  2004-03-05  5:45 ` [PATCH] md - 1 of 2 - Use "shedule_timeout(2)" instead of yield() as it seems to wait for less time NeilBrown
@ 2004-03-05  6:16   ` Andrew Morton
  2004-03-10  2:26     ` Neil Brown
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2004-03-05  6:16 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid

NeilBrown <neilb@cse.unsw.edu.au> wrote:
>
> +		schedule_timeout(2);

Are you sure we want to sleep for 20-30 milliseconds on a 100 Hz machine?

Wouldn't it be better to do

diff -puN drivers/md/raid5.c~md-use-schedule_timeout drivers/md/raid5.c
--- 25/drivers/md/raid5.c~md-use-schedule_timeout	2004-03-04 22:15:14.000000000 -0800
+++ 25-akpm/drivers/md/raid5.c	2004-03-04 22:15:41.000000000 -0800
@@ -1409,7 +1409,8 @@ static int sync_request (mddev_t *mddev,
 		/* make sure we don't swamp the stripe cache if someone else
 		 * is trying to get access 
 		 */
-		yield();
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(max(HZ/500, 1));
 	}
 	spin_lock(&sh->lock);	
 	set_bit(STRIPE_SYNCING, &sh->state);
diff -puN drivers/md/raid6main.c~md-use-schedule_timeout drivers/md/raid6main.c
--- 25/drivers/md/raid6main.c~md-use-schedule_timeout	2004-03-04 22:15:14.000000000 -0800
+++ 25-akpm/drivers/md/raid6main.c	2004-03-04 22:15:50.000000000 -0800
@@ -1571,7 +1571,8 @@ static int sync_request (mddev_t *mddev,
 		/* make sure we don't swamp the stripe cache if someone else
 		 * is trying to get access
 		 */
-		yield();
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(max(HZ/500, 1));
 	}
 	spin_lock(&sh->lock);
 	set_bit(STRIPE_SYNCING, &sh->state);

_

?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] md - 0 of 2 - Introduction
  2004-03-05  5:45 [PATCH] md - 0 of 2 - Introduction NeilBrown
  2004-03-05  5:45 ` [PATCH] md - 2 of 2 - Allow assembling of partitioned arrays at boot time NeilBrown
  2004-03-05  5:45 ` [PATCH] md - 1 of 2 - Use "shedule_timeout(2)" instead of yield() as it seems to wait for less time NeilBrown
@ 2004-03-05  8:42 ` Clemens Schwaighofer
  2004-03-05 10:01   ` Michael Tokarev
  2 siblings, 1 reply; 7+ messages in thread
From: Clemens Schwaighofer @ 2004-03-05  8:42 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

NeilBrown wrote:
| Here are two patches for md that I believe are suitable
| for 2.6.4.
|
| 2/ Support assembling partitionable raid at boot-time so that it is
|    possible to boot from a partition of a raid array without needing
|    an initramfs.

I have a RAID 1 here and I boot with grub and I don't need ana initramfs
(initrd) ...

- --
Clemens Schwaighofer - IT Engineer & System Administration
==========================================================
Tequila Japan, 6-17-2 Ginza Chuo-ku, Tokyo 104-8167, JAPAN
Tel: +81-(0)3-3545-7703            Fax: +81-(0)3-3545-7343
http://www.tequila.jp
==========================================================
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.4 (GNU/Linux)

iD8DBQFASD1ojBz/yQjBxz8RAq6lAKDdN8hKgdBocQZOqC0yGoYd/Z6MCQCfdaL/
BFv6yTZznpg7wIEWxlQELak=
=Zht7
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] md - 0 of 2 - Introduction
  2004-03-05  8:42 ` [PATCH] md - 0 of 2 - Introduction Clemens Schwaighofer
@ 2004-03-05 10:01   ` Michael Tokarev
  0 siblings, 0 replies; 7+ messages in thread
From: Michael Tokarev @ 2004-03-05 10:01 UTC (permalink / raw)
  Cc: linux-raid

Clemens Schwaighofer wrote:
> 
> NeilBrown wrote:
> | Here are two patches for md that I believe are suitable
> | for 2.6.4.
> |
> | 2/ Support assembling partitionable raid at boot-time so that it is
> |    possible to boot from a partition of a raid array without needing
> |    an initramfs.
> 
> I have a RAID 1 here and I boot with grub and I don't need ana initramfs
> (initrd) ...

The keyword here is "partitionable".

/mjt

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] md - 1 of 2 - Use "shedule_timeout(2)" instead of yield() as it seems to wait for less time.
  2004-03-05  6:16   ` Andrew Morton
@ 2004-03-10  2:26     ` Neil Brown
  0 siblings, 0 replies; 7+ messages in thread
From: Neil Brown @ 2004-03-10  2:26 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid

On Thursday March 4, akpm@osdl.org wrote:
> NeilBrown <neilb@cse.unsw.edu.au> wrote:
> >
> > +		schedule_timeout(2);
> 
> Are you sure we want to sleep for 20-30 milliseconds on a 100 Hz
> machine?

I'm not sure it really matters.  This bit of code doesn't trigger very
often.

I'm not unhappy with your version, but I suspect that just
	schedule_timeout(1)
would be ok.  I think I used '2' because there is no lower bound on
how long schedule_timeout(1) will sleep (until the next tick, which
could be anytime - right?).
But on reflection, that doesn't really matter.  As long as it pauses
for a little while at least once in a while, it will give any non-sync
IO a chance, that once that starts happening, resync will
automatically wind back and there won't be a problem anymore.

So I think (and some testing seems to demonstrate) that it doesn't
really matter.  So leave it as it is I guess.

Thank,
NeilBrown


> 
> Wouldn't it be better to do
> 
> diff -puN drivers/md/raid5.c~md-use-schedule_timeout drivers/md/raid5.c
> --- 25/drivers/md/raid5.c~md-use-schedule_timeout	2004-03-04 22:15:14.000000000 -0800
> +++ 25-akpm/drivers/md/raid5.c	2004-03-04 22:15:41.000000000 -0800
> @@ -1409,7 +1409,8 @@ static int sync_request (mddev_t *mddev,
>  		/* make sure we don't swamp the stripe cache if someone else
>  		 * is trying to get access 
>  		 */
> -		yield();
> +		set_current_state(TASK_UNINTERRUPTIBLE);
> +		schedule_timeout(max(HZ/500, 1));
>  	}
>  	spin_lock(&sh->lock);	
>  	set_bit(STRIPE_SYNCING, &sh->state);
> diff -puN drivers/md/raid6main.c~md-use-schedule_timeout drivers/md/raid6main.c
> --- 25/drivers/md/raid6main.c~md-use-schedule_timeout	2004-03-04 22:15:14.000000000 -0800
> +++ 25-akpm/drivers/md/raid6main.c	2004-03-04 22:15:50.000000000 -0800
> @@ -1571,7 +1571,8 @@ static int sync_request (mddev_t *mddev,
>  		/* make sure we don't swamp the stripe cache if someone else
>  		 * is trying to get access
>  		 */
> -		yield();
> +		set_current_state(TASK_UNINTERRUPTIBLE);
> +		schedule_timeout(max(HZ/500, 1));
>  	}
>  	spin_lock(&sh->lock);
>  	set_bit(STRIPE_SYNCING, &sh->state);
> 
> _
> 
> ?

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2004-03-10  2:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-03-05  5:45 [PATCH] md - 0 of 2 - Introduction NeilBrown
2004-03-05  5:45 ` [PATCH] md - 2 of 2 - Allow assembling of partitioned arrays at boot time NeilBrown
2004-03-05  5:45 ` [PATCH] md - 1 of 2 - Use "shedule_timeout(2)" instead of yield() as it seems to wait for less time NeilBrown
2004-03-05  6:16   ` Andrew Morton
2004-03-10  2:26     ` Neil Brown
2004-03-05  8:42 ` [PATCH] md - 0 of 2 - Introduction Clemens Schwaighofer
2004-03-05 10:01   ` Michael Tokarev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).