All of lore.kernel.org
 help / color / mirror / Atom feed
From: Evan Felix <evan.felix@pnl.gov>
To: Neil Brown <neilb@cse.unsw.edu.au>,
	Nathan D Tenney <Nathan.Tenney@pnl.gov>
Cc: linux-raid <linux-raid@vger.kernel.org>
Subject: Re: Raid Array with 3.5Tb
Date: Mon, 22 Mar 2004 08:54:12 -0800	[thread overview]
Message-ID: <1079974452.13232.10.camel@e-linux> (raw)
In-Reply-To: <16391.51319.698137.120756@notabene.cse.unsw.edu.au>

[-- Attachment #1: Type: text/plain, Size: 6602 bytes --]

Remember the 3.5Tb Array i've been trying to build, i finally got around
to getting some source code changes that seem to work much better. 
Attached you will find a patch that fixes the raid5 code to a point
where it seems to re-sync and recover without complaining about maps not
being correct.  The patch below is build against a 2.6.3, but will patch
2.6.4 sources as well.  At this point i'd like to hear comments,
thoughts on the changes i've made.  Some notes:

1. raid0 seems to work fine at 3.5T
2. I have not looked at the raid6 code, but it does not work at 3.5Tb
3. Formatting the array with ext3 takes a very long time, not sure why
yet.

Here is the patch:
diff -urN -X /home/efelix/.cvsignore kernel-source-2.6.3/drivers/md/md.c
kernel-source-2.6.3evan1/drivers/md/md.c
--- kernel-source-2.6.3/drivers/md/md.c	2004-02-19 08:54:51.000000000
+0000
+++ kernel-source-2.6.3evan1/drivers/md/md.c	2004-03-17
21:52:25.000000000 +0000
@@ -3138,13 +3138,14 @@
 static void md_do_sync(mddev_t *mddev)
 {
 	mddev_t *mddev2;
-	unsigned int max_sectors, currspeed = 0,
-		j, window;
+	unsigned int currspeed = 0,
+		 window;
+	sector_t max_sectors,j;
 	unsigned long mark[SYNC_MARKS];
-	unsigned long mark_cnt[SYNC_MARKS];
+	sector_t mark_cnt[SYNC_MARKS];
 	int last_mark,m;
 	struct list_head *tmp;
-	unsigned long last_check;
+	sector_t last_check;
 
 	/* just incase thread restarts... */
 	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -3282,7 +3283,7 @@
 		 */
 		cond_resched();
 
-		currspeed =
(j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
+		currspeed = ((unsigned
long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1)
+1;
 
 		if (currspeed > sysctl_speed_limit_min) {
 			if ((currspeed > sysctl_speed_limit_max) ||
diff -urN -X /home/efelix/.cvsignore
kernel-source-2.6.3/drivers/md/raid5.c
kernel-source-2.6.3evan1/drivers/md/raid5.c
--- kernel-source-2.6.3/drivers/md/raid5.c	2004-02-19 08:54:52.000000000
+0000
+++ kernel-source-2.6.3evan1/drivers/md/raid5.c	2004-03-17
20:46:52.000000000 +0000
@@ -181,7 +181,7 @@
 
 static void raid5_build_block (struct stripe_head *sh, int i);
 
-static inline void init_stripe(struct stripe_head *sh, unsigned long
sector, int pd_idx)
+static inline void init_stripe(struct stripe_head *sh, sector_t sector,
int pd_idx)
 {
 	raid5_conf_t *conf = sh->raid_conf;
 	int disks = conf->raid_disks, i;
@@ -218,7 +218,7 @@
 	insert_hash(conf, sh);
 }
 
-static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned
long sector)
+static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t
sector)
 {
 	struct stripe_head *sh;
 
@@ -231,7 +231,7 @@
 	return NULL;
 }
 
-static struct stripe_head *get_active_stripe(raid5_conf_t *conf,
unsigned long sector, 
+static struct stripe_head *get_active_stripe(raid5_conf_t *conf,
sector_t sector, 
 					     int pd_idx, int noblock) 
 {
 	struct stripe_head *sh;
@@ -495,7 +495,7 @@
  * Input: a 'big' sector number,
  * Output: index of the data and parity disk, and the sector # in them.
  */
-static unsigned long raid5_compute_sector(sector_t r_sector, unsigned
int raid_disks,
+static sector_t raid5_compute_sector(sector_t r_sector, unsigned int
raid_disks,
 			unsigned int data_disks, unsigned int * dd_idx,
 			unsigned int * pd_idx, raid5_conf_t *conf)
 {
@@ -556,7 +556,7 @@
 	/*
 	 * Finally, compute the new sector number
 	 */
-	new_sector = stripe * sectors_per_chunk + chunk_offset;
+	new_sector = (sector_t)stripe * sectors_per_chunk + chunk_offset;
 	return new_sector;
 }
 
@@ -567,7 +567,7 @@
 	int raid_disks = conf->raid_disks, data_disks = raid_disks - 1;
 	sector_t new_sector = sh->sector, check;
 	int sectors_per_chunk = conf->chunk_size >> 9;
-	long stripe;
+	sector_t stripe;
 	int chunk_offset;
 	int chunk_number, dummy1, dummy2, dd_idx = i;
 	sector_t r_sector;
@@ -1388,7 +1389,7 @@
 	unsigned long stripe;
 	int chunk_offset;
 	int dd_idx, pd_idx;
-	unsigned long first_sector;
+	sector_t first_sector;
 	int raid_disks = conf->raid_disks;
 	int data_disks = raid_disks-1;
 
@@ -1401,7 +1402,7 @@
 	stripe = x;
 	BUG_ON(x != stripe);
 
-	first_sector =
raid5_compute_sector(stripe*data_disks*sectors_per_chunk
+	first_sector =
raid5_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk
 		+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
 	sh = get_active_stripe(conf, sector_nr, pd_idx, 1);
 	if (sh == NULL) {
diff -urN -X /home/efelix/.cvsignore
kernel-source-2.6.3/include/linux/raid/md_k.h
kernel-source-2.6.3evan1/include/linux/raid/md_k.h
--- kernel-source-2.6.3/include/linux/raid/md_k.h	2004-02-19
08:55:57.000000000 +0000
+++ kernel-source-2.6.3evan1/include/linux/raid/md_k.h	2004-03-10
21:14:39.000000000 +0000
@@ -211,9 +211,9 @@
 
 	struct mdk_thread_s		*thread;	/* management thread */
 	struct mdk_thread_s		*sync_thread;	/* doing resync or reconstruct */
-	unsigned long			curr_resync;	/* blocks scheduled */
+	sector_t			curr_resync;	/* blocks scheduled */
 	unsigned long			resync_mark;	/* a recent timestamp */
-	unsigned long			resync_mark_cnt;/* blocks written at resync_mark */
+	sector_t			resync_mark_cnt;/* blocks written at resync_mark */
 
 	/* recovery/resync flags 
 	 * NEEDED:   we might need to start a resync/recover
-------------------

Evan

On Fri, 2004-01-16 at 03:18, Neil Brown wrote:
> On Thursday January 15, evan.felix@pnl.gov wrote:
> > I've been attempting to create a large raid 5 device using the linux
> > 2.6.1 kernel, with the Large Block Device configured on.  I have in
> the
> > system 16 250G disks.  I built an array with mdadm -C -n 15 -x 1
> > /dev/md2 /dev/sd[a-p]
> > 
> > The resync/recovery seemed to be going fine, but at some point i
> started
> > seeing:
> > 
> > kernel: compute_blocknr: map not correct
> > kernel: compute_blocknr: map not correct
> ...
> > 
> > Has anyone else made an array this large? and does anybody have any
> > pointers on where i can start looking at code to fix this?
> 
> I would look in  drivers/md/raid5.c and compute_blocknr() in that
> file.
> 
> I would change "chunk_number" to a sector_t and change:
>         chunk_number = stripe * data_disks + i;
> to read
>         chunk_number = (sector_t)stripe * data_disks + i;
> 
> Possibly "stripe" and "chunk_number" should both be "sector_t", but
> I'm not at all sure.
> 
> Please let me know if it helps.
> 
> NeilBrown
-- 
-------------------------
Evan Felix
Administrator of Supercomputer #5 in Top 500, Nov 2003
Environmental Molecular Sciences Laboratory
Pacific Northwest National Laboratory
Operated for the U.S. DOE by Battelle

[-- Attachment #2: bigraid5.patch --]
[-- Type: text/x-patch, Size: 4719 bytes --]

diff -urN -X /home/efelix/.cvsignore kernel-source-2.6.3/drivers/md/md.c kernel-source-2.6.3evan1/drivers/md/md.c
--- kernel-source-2.6.3/drivers/md/md.c	2004-02-19 08:54:51.000000000 +0000
+++ kernel-source-2.6.3evan1/drivers/md/md.c	2004-03-17 21:52:25.000000000 +0000
@@ -3138,13 +3138,14 @@
 static void md_do_sync(mddev_t *mddev)
 {
 	mddev_t *mddev2;
-	unsigned int max_sectors, currspeed = 0,
-		j, window;
+	unsigned int currspeed = 0,
+		 window;
+	sector_t max_sectors,j;
 	unsigned long mark[SYNC_MARKS];
-	unsigned long mark_cnt[SYNC_MARKS];
+	sector_t mark_cnt[SYNC_MARKS];
 	int last_mark,m;
 	struct list_head *tmp;
-	unsigned long last_check;
+	sector_t last_check;
 
 	/* just incase thread restarts... */
 	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -3282,7 +3283,7 @@
 		 */
 		cond_resched();
 
-		currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
+		currspeed = ((unsigned long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
 
 		if (currspeed > sysctl_speed_limit_min) {
 			if ((currspeed > sysctl_speed_limit_max) ||
diff -urN -X /home/efelix/.cvsignore kernel-source-2.6.3/drivers/md/raid5.c kernel-source-2.6.3evan1/drivers/md/raid5.c
--- kernel-source-2.6.3/drivers/md/raid5.c	2004-02-19 08:54:52.000000000 +0000
+++ kernel-source-2.6.3evan1/drivers/md/raid5.c	2004-03-17 20:46:52.000000000 +0000
@@ -181,7 +181,7 @@
 
 static void raid5_build_block (struct stripe_head *sh, int i);
 
-static inline void init_stripe(struct stripe_head *sh, unsigned long sector, int pd_idx)
+static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
 {
 	raid5_conf_t *conf = sh->raid_conf;
 	int disks = conf->raid_disks, i;
@@ -218,7 +218,7 @@
 	insert_hash(conf, sh);
 }
 
-static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long sector)
+static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
 {
 	struct stripe_head *sh;
 
@@ -231,7 +231,7 @@
 	return NULL;
 }
 
-static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long sector, 
+static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, 
 					     int pd_idx, int noblock) 
 {
 	struct stripe_head *sh;
@@ -495,7 +495,7 @@
  * Input: a 'big' sector number,
  * Output: index of the data and parity disk, and the sector # in them.
  */
-static unsigned long raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
+static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
 			unsigned int data_disks, unsigned int * dd_idx,
 			unsigned int * pd_idx, raid5_conf_t *conf)
 {
@@ -556,7 +556,7 @@
 	/*
 	 * Finally, compute the new sector number
 	 */
-	new_sector = stripe * sectors_per_chunk + chunk_offset;
+	new_sector = (sector_t)stripe * sectors_per_chunk + chunk_offset;
 	return new_sector;
 }
 
@@ -567,7 +567,7 @@
 	int raid_disks = conf->raid_disks, data_disks = raid_disks - 1;
 	sector_t new_sector = sh->sector, check;
 	int sectors_per_chunk = conf->chunk_size >> 9;
-	long stripe;
+	sector_t stripe;
 	int chunk_offset;
 	int chunk_number, dummy1, dummy2, dd_idx = i;
 	sector_t r_sector;
@@ -1388,7 +1389,7 @@
 	unsigned long stripe;
 	int chunk_offset;
 	int dd_idx, pd_idx;
-	unsigned long first_sector;
+	sector_t first_sector;
 	int raid_disks = conf->raid_disks;
 	int data_disks = raid_disks-1;
 
@@ -1401,7 +1402,7 @@
 	stripe = x;
 	BUG_ON(x != stripe);
 
-	first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk
+	first_sector = raid5_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk
 		+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
 	sh = get_active_stripe(conf, sector_nr, pd_idx, 1);
 	if (sh == NULL) {
diff -urN -X /home/efelix/.cvsignore kernel-source-2.6.3/include/linux/raid/md_k.h kernel-source-2.6.3evan1/include/linux/raid/md_k.h
--- kernel-source-2.6.3/include/linux/raid/md_k.h	2004-02-19 08:55:57.000000000 +0000
+++ kernel-source-2.6.3evan1/include/linux/raid/md_k.h	2004-03-10 21:14:39.000000000 +0000
@@ -211,9 +211,9 @@
 
 	struct mdk_thread_s		*thread;	/* management thread */
 	struct mdk_thread_s		*sync_thread;	/* doing resync or reconstruct */
-	unsigned long			curr_resync;	/* blocks scheduled */
+	sector_t			curr_resync;	/* blocks scheduled */
 	unsigned long			resync_mark;	/* a recent timestamp */
-	unsigned long			resync_mark_cnt;/* blocks written at resync_mark */
+	sector_t			resync_mark_cnt;/* blocks written at resync_mark */
 
 	/* recovery/resync flags 
 	 * NEEDED:   we might need to start a resync/recover

  reply	other threads:[~2004-03-22 16:54 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-01-15 19:49 Raid Array with 3.5Tb Evan Felix
2004-01-15 22:25 ` Guy
2004-01-16 11:18 ` Neil Brown
2004-03-22 16:54   ` Evan Felix [this message]
2004-03-24 21:14   ` Evan Felix
2004-03-25  1:28     ` Neil Brown
2004-03-25  1:28       ` Neil Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1079974452.13232.10.camel@e-linux \
    --to=evan.felix@pnl.gov \
    --cc=Nathan.Tenney@pnl.gov \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@cse.unsw.edu.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.