All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 007 of 7] md: Only checkpoint expansion progress occasionally.
Date: Tue, 24 Jan 2006 11:41:22 +1100	[thread overview]
Message-ID: <1060124004122.5107@suse.de> (raw)
In-Reply-To: 20060124112626.4447.patches@notabene


Instead of checkpointing at each stripe, only checkpoint
when a new write would overwrite uncheckpointed data.
Block any write to the uncheckpointed area.
Arbitrarily checkpoint every 3Meg.


Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./drivers/md/raid5.c         |   53 ++++++++++++++++++++++++++++++++++---------
 ./include/linux/raid/raid5.h |    3 ++
 2 files changed, 45 insertions(+), 11 deletions(-)

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~	2006-01-24 11:19:21.000000000 +1100
+++ ./drivers/md/raid5.c	2006-01-24 11:26:16.000000000 +1100
@@ -1748,8 +1748,9 @@ static int make_request(request_queue_t 
 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
 		DEFINE_WAIT(w);
 		int disks;
-		
+
 	retry:
+		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 		if (likely(conf->expand_progress == MaxSector))
 			disks = conf->raid_disks;
 		else {
@@ -1757,6 +1758,13 @@ static int make_request(request_queue_t 
 			disks = conf->raid_disks;
 			if (logical_sector >= conf->expand_progress)
 				disks = conf->previous_raid_disks;
+			else {
+				if (logical_sector >= conf->expand_lo) {
+					spin_unlock_irq(&conf->device_lock);
+					schedule();
+					goto retry;
+				}
+			}
 			spin_unlock_irq(&conf->device_lock);
 		}
  		new_sector = raid5_compute_sector(logical_sector, disks, disks - 1,
@@ -1765,7 +1773,6 @@ static int make_request(request_queue_t 
 			(unsigned long long)new_sector, 
 			(unsigned long long)logical_sector);
 
-		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 		sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
 		if (sh) {
 			if (unlikely(conf->expand_progress != MaxSector)) {
@@ -1863,6 +1870,7 @@ static sector_t sync_request(mddev_t *md
 		 */
 		int i;
 		int dd_idx;
+		sector_t writepos, safepos, gap;
 
 		if (sector_nr == 0 &&
 		    conf->expand_progress != 0) {
@@ -1873,15 +1881,36 @@ static sector_t sync_request(mddev_t *md
 			return sector_nr;
 		}
 
-		/* Cannot proceed until we've updated the superblock... */
-		wait_event(conf->wait_for_overlap,
-			   atomic_read(&conf->reshape_stripes)==0);
-		mddev->reshape_position = conf->expand_progress;
-
-		mddev->sb_dirty = 1;
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
-			kthread_should_stop());
+		/* we update the metadata when there is more than 3Meg
+		 * in the block range (that is rather arbitrary, should
+		 * probably be time based) or when the data about to be
+		 * copied would over-write the source of the data at
+		 * the front of the range.
+		 * i.e. one new_stripe forward from expand_progress new_maps
+		 * to after where expand_lo old_maps to
+		 */
+		writepos = conf->expand_progress +
+			conf->chunk_size/512*(conf->raid_disks-1);
+		sector_div(writepos, conf->raid_disks-1);
+		safepos = conf->expand_lo;
+		sector_div(safepos, conf->previous_raid_disks-1);
+		gap = conf->expand_progress - conf->expand_lo;
+
+		if (writepos >= safepos ||
+		    gap > (conf->raid_disks-1)*3000*2 /*3Meg*/) {
+			/* Cannot proceed until we've updated the superblock... */
+			wait_event(conf->wait_for_overlap,
+				   atomic_read(&conf->reshape_stripes)==0);
+			mddev->reshape_position = conf->expand_progress;
+			mddev->sb_dirty = 1;
+			md_wakeup_thread(mddev->thread);
+			wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
+				   kthread_should_stop());
+			spin_lock_irq(&conf->device_lock);
+			conf->expand_lo = mddev->reshape_position;
+			spin_unlock_irq(&conf->device_lock);
+			wake_up(&conf->wait_for_overlap);
+		}
 
 		for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
 			int j;
@@ -2312,6 +2341,7 @@ static int run(mddev_t *mddev)
 
 	if (conf->expand_progress != MaxSector) {
 		printk("...ok start reshape thread\n");
+		conf->expand_lo = conf->expand_progress;
 		atomic_set(&conf->reshape_stripes, 0);
 		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -2599,6 +2629,7 @@ static int raid5_reshape(mddev_t *mddev,
 	conf->previous_raid_disks = conf->raid_disks;
 	conf->raid_disks = raid_disks;
 	conf->expand_progress = 0;
+	conf->expand_lo = 0;
 	spin_unlock_irq(&conf->device_lock);
 
 	/* Add some new drives, as many as will fit.

diff ./include/linux/raid/raid5.h~current~ ./include/linux/raid/raid5.h
--- ./include/linux/raid/raid5.h~current~	2006-01-24 11:19:21.000000000 +1100
+++ ./include/linux/raid/raid5.h	2006-01-24 11:26:16.000000000 +1100
@@ -217,6 +217,9 @@ struct raid5_private_data {
 
 	/* used during an expand */
 	sector_t		expand_progress;	/* MaxSector when no expand happening */
+	sector_t		expand_lo; /* from here up to expand_progress it out-of-bounds
+					    * as we haven't flushed the metadata yet
+					    */
 	int			previous_raid_disks;
 
 	struct list_head	handle_list; /* stripes needing handling */

  parent reply	other threads:[~2006-01-24  0:41 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-24  0:40 [PATCH 000 of 7] md: Introduction - raid5 reshape mark-2 NeilBrown
2006-01-24  0:40 ` [PATCH 001 of 7] md: Split disks array out of raid5 conf structure so it is easier to grow NeilBrown
2006-01-24  0:40 ` [PATCH 002 of 7] md: Allow stripes to be expanded in preparation for expanding an array NeilBrown
2006-01-24  0:41 ` [PATCH 003 of 7] md: Infrastructure to allow normal IO to continue while array is expanding NeilBrown
2006-01-24  0:41 ` [PATCH 004 of 7] md: Core of raid5 resize process NeilBrown
2006-01-24  0:41 ` [PATCH 005 of 7] md: Final stages of raid5 expand code NeilBrown
2006-01-24  0:41 ` [PATCH 006 of 7] md: Checkpoint and allow restart of raid5 reshape NeilBrown
2006-01-27 12:37   ` Molle Bestefich
2006-01-24  0:41 ` NeilBrown [this message]
2006-01-24  9:23 ` [PATCH 000 of 7] md: Introduction - raid5 reshape mark-2 Lars Marowsky-Bree
2006-01-24  9:23   ` Lars Marowsky-Bree
2006-01-24  9:32   ` Neil Brown
2006-02-07 17:13 ` Henrik Holst
2006-02-09  3:32   ` Neil Brown
2006-02-09  6:35     ` Kernels and MD versions (was: md: Introduction - raid5 reshape mark-2) Patrik Jonsson
2006-02-09 18:07       ` Mr. James W. Laferriere

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1060124004122.5107@suse.de \
    --to=neilb@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.