[PATCH 005 of 10] md: Don't write dirty/clean update to spares - leave them alone

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 005 of 10] md: Don't write dirty/clean update to spares - leave them alone
Date: Thu, 1 Jun 2006 15:13:52 +1000	[thread overview]
Message-ID: <1060601051352.27601@suse.de> (raw)
In-Reply-To: 20060601150955.27444.patches@notabene


- record the 'event' count on each individual device (they
  might sometimes be slightly different now)
- add a new value for 'sb_dirty': '3' means that the super
  block only needs to be updated to record a clean<->dirty
  transition.
- Prefer odd event numbers for dirty states and even numbers
  for clean states
- Using all the above, don't update the superblock on
  a spare device if the update is just doing a clean-dirty
  transition.  To accomodate this, a transition from
  dirty back to clean might now decrement the events counter
  if nothing else has changed.

The net effect of this is that spare drives will not see any IO
requests during normal running of the array, so they can go to sleep
if that is what they want to do.

Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./drivers/md/md.c           |   65 ++++++++++++++++++++++++++++++++++++++------
 ./include/linux/raid/md_k.h |    1 
 2 files changed, 58 insertions(+), 8 deletions(-)

diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~	2006-06-01 15:05:29.000000000 +1000
+++ ./drivers/md/md.c	2006-06-01 15:05:29.000000000 +1000
@@ -1558,15 +1558,30 @@ static void md_print_devices(void)
 }
 
 
-static void sync_sbs(mddev_t * mddev)
+static void sync_sbs(mddev_t * mddev, int nospares)
 {
+	/* Update each superblock (in-memory image), but
+	 * if we are allowed to, skip spares which already
+	 * have the right event counter, or have one earlier
+	 * (which would mean they aren't being marked as dirty
+	 * with the rest of the array)
+	 */
 	mdk_rdev_t *rdev;
 	struct list_head *tmp;
 
 	ITERATE_RDEV(mddev,rdev,tmp) {
-		super_types[mddev->major_version].
-			sync_super(mddev, rdev);
-		rdev->sb_loaded = 1;
+		if (rdev->sb_events == mddev->events ||
+		    (nospares &&
+		     rdev->raid_disk < 0 &&
+		     (rdev->sb_events&1)==0 &&
+		     rdev->sb_events+1 == mddev->events)) {
+			/* Don't update this superblock */
+			rdev->sb_loaded = 2;
+		} else {
+			super_types[mddev->major_version].
+				sync_super(mddev, rdev);
+			rdev->sb_loaded = 1;
+		}
 	}
 }
 
@@ -1576,12 +1591,42 @@ void md_update_sb(mddev_t * mddev)
 	struct list_head *tmp;
 	mdk_rdev_t *rdev;
 	int sync_req;
+	int nospares = 0;
 
 repeat:
 	spin_lock_irq(&mddev->write_lock);
 	sync_req = mddev->in_sync;
 	mddev->utime = get_seconds();
-	mddev->events ++;
+	if (mddev->sb_dirty == 3)
+		/* just a clean<-> dirty transition, possibly leave spares alone,
+		 * though if events isn't the right even/odd, we will have to do
+		 * spares after all
+		 */
+		nospares = 1;
+
+	/* If this is just a dirty<->clean transition, and the array is clean
+	 * and 'events' is odd, we can roll back to the previous clean state */
+	if (mddev->sb_dirty == 3
+	    && (mddev->in_sync && mddev->recovery_cp == MaxSector)
+	    && (mddev->events & 1))
+		mddev->events--;
+	else {
+		/* otherwise we have to go forward and ... */
+		mddev->events ++;
+		if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
+			/* .. if the array isn't clean, insist on an odd 'events' */
+			if ((mddev->events&1)==0) {
+				mddev->events++;
+				nospares = 0;
+			}
+		} else {
+			/* otherwise insist on an even 'events' (for clean states) */
+			if ((mddev->events&1)) {
+				mddev->events++;
+				nospares = 0;
+			}
+		}
+	}
 
 	if (!mddev->events) {
 		/*
@@ -1593,7 +1638,7 @@ repeat:
 		mddev->events --;
 	}
 	mddev->sb_dirty = 2;
-	sync_sbs(mddev);
+	sync_sbs(mddev, nospares);
 
 	/*
 	 * do not write anything to disk if using
@@ -1615,6 +1660,8 @@ repeat:
 	ITERATE_RDEV(mddev,rdev,tmp) {
 		char b[BDEVNAME_SIZE];
 		dprintk(KERN_INFO "md: ");
+		if (rdev->sb_loaded != 1)
+			continue; /* no noise on spare devices */
 		if (test_bit(Faulty, &rdev->flags))
 			dprintk("(skipping faulty ");
 
@@ -1626,6 +1673,7 @@ repeat:
 			dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
 				bdevname(rdev->bdev,b),
 				(unsigned long long)rdev->sb_offset);
+			rdev->sb_events = mddev->events;
 
 		} else
 			dprintk(")\n");
@@ -1895,6 +1943,7 @@ static mdk_rdev_t *md_import_device(dev_
 	rdev->desc_nr = -1;
 	rdev->flags = 0;
 	rdev->data_offset = 0;
+	rdev->sb_events = 0;
 	atomic_set(&rdev->nr_pending, 0);
 	atomic_set(&rdev->read_errors, 0);
 	atomic_set(&rdev->corrected_errors, 0);
@@ -4708,7 +4757,7 @@ void md_write_start(mddev_t *mddev, stru
 		spin_lock_irq(&mddev->write_lock);
 		if (mddev->in_sync) {
 			mddev->in_sync = 0;
-			mddev->sb_dirty = 1;
+			mddev->sb_dirty = 3;
 			md_wakeup_thread(mddev->thread);
 		}
 		spin_unlock_irq(&mddev->write_lock);
@@ -5055,7 +5104,7 @@ void md_check_recovery(mddev_t *mddev)
 		if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
 		    !mddev->in_sync && mddev->recovery_cp == MaxSector) {
 			mddev->in_sync = 1;
-			mddev->sb_dirty = 1;
+			mddev->sb_dirty = 3;
 		}
 		if (mddev->safemode == 1)
 			mddev->safemode = 0;

diff ./include/linux/raid/md_k.h~current~ ./include/linux/raid/md_k.h
--- ./include/linux/raid/md_k.h~current~	2006-06-01 15:03:28.000000000 +1000
+++ ./include/linux/raid/md_k.h	2006-06-01 15:05:29.000000000 +1000
@@ -58,6 +58,7 @@ struct mdk_rdev_s
 
 	struct page	*sb_page;
 	int		sb_loaded;
+	__u64		sb_events;
 	sector_t	data_offset;	/* start of data in array */
 	sector_t	sb_offset;
 	int		sb_size;	/* bytes in the superblock */

next prev parent reply	other threads:[~2006-06-01  5:13 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-06-01  5:13 [PATCH 000 of 10] md: Introduction - assorted patches for -mm NeilBrown
2006-06-01  5:13 ` [PATCH 001 of 10] md: md Kconfig speeling feex NeilBrown
2006-06-01  5:13 ` [PATCH 002 of 10] md: Fix Kconfig error NeilBrown
2006-06-01  5:13 ` [PATCH 003 of 10] md: Fix bug that stops raid5 resync from happening NeilBrown
2006-06-01  5:13 ` [PATCH 004 of 10] md: Allow re-add to work on array without bitmaps NeilBrown
2006-06-01  5:13 ` NeilBrown [this message]
2006-06-01  5:13 ` [PATCH 006 of 10] md: Set/get state of array via sysfs NeilBrown
2006-06-01  5:33   ` Chris Wright
2006-06-01  5:43     ` Neil Brown
2006-06-01  5:14 ` [PATCH 007 of 10] md: Allow rdev state to be set " NeilBrown
2006-06-01  5:14 ` [PATCH 008 of 10] md: Allow raid 'layout' to be read and " NeilBrown
2006-06-01  5:34   ` Chris Wright
2006-06-01  5:44     ` Neil Brown
2006-06-01  5:14 ` [PATCH 009 of 10] md: Allow resync_start to be set and queried " NeilBrown
2006-06-01  5:14 ` [PATCH 010 of 10] md: Allow the write_mostly flag to be set " NeilBrown
2006-08-05  5:59   ` Mike Snitzer
2006-08-05 23:43     ` Mike Snitzer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1060601051352.27601@suse.de \
    --to=neilb@suse.de \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).