All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 001 of 7] md: Support 'external' metadata for md arrays.
Date: Fri, 14 Dec 2007 17:26:08 +1100	[thread overview]
Message-ID: <1071214062608.1815@suse.de> (raw)
In-Reply-To: 20071214171950.1308.patches@notabene


- Add a state flag 'external' to indicate that the metadata is managed
  externally (by user-space) so important changes need to be 
  left of user-space to handle.
  Alternates are non-persistant ('none') where there is no stable metadata -
  after the  array is stopped there is no record of it's status - and 
  internal which can be version 0.90 or version 1.x
  These are selected by writing to the 'metadata' attribute.



- move the updating of superblocks (sync_sbs) to after we have checked if
  there are any superblocks or not.

- New array state 'write_pending'.  This means that the metadata records
  the array as 'clean', but a write has been requested, so the metadata has
  to be updated to record a 'dirty' array before the write can continue.
  This change is reported to md by writing 'active' to the array_state
  attribute.

- tidy up marking of sb_dirty:
   - don't set sb_dirty when resync finishes as md_check_recovery
     calls md_update_sb when the sync thread finishes anyway.
   - Don't set sb_dirty in multipath_run as the array might not be dirty.
   - don't mark superblock dirty when switching to 'clean' if there
     is no internal superblock (if external, userspace can choose to
     update the superblock whenever it chooses to).

Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./drivers/md/md.c           |   77 +++++++++++++++++++++++++++++++++-----------
 ./include/linux/raid/md_k.h |    3 +
 2 files changed, 61 insertions(+), 19 deletions(-)

diff .prev/drivers/md/md.c ./drivers/md/md.c
--- .prev/drivers/md/md.c	2007-12-14 16:07:51.000000000 +1100
+++ ./drivers/md/md.c	2007-12-14 16:08:28.000000000 +1100
@@ -778,7 +778,8 @@ static int super_90_validate(mddev_t *md
 		mddev->major_version = 0;
 		mddev->minor_version = sb->minor_version;
 		mddev->patch_version = sb->patch_version;
-		mddev->persistent = ! sb->not_persistent;
+		mddev->persistent = 1;
+		mddev->external = 0;
 		mddev->chunk_size = sb->chunk_size;
 		mddev->ctime = sb->ctime;
 		mddev->utime = sb->utime;
@@ -904,7 +905,7 @@ static void super_90_sync(mddev_t *mddev
 	sb->size  = mddev->size;
 	sb->raid_disks = mddev->raid_disks;
 	sb->md_minor = mddev->md_minor;
-	sb->not_persistent = !mddev->persistent;
+	sb->not_persistent = 0;
 	sb->utime = mddev->utime;
 	sb->state = 0;
 	sb->events_hi = (mddev->events>>32);
@@ -1158,6 +1159,7 @@ static int super_1_validate(mddev_t *mdd
 		mddev->major_version = 1;
 		mddev->patch_version = 0;
 		mddev->persistent = 1;
+		mddev->external = 0;
 		mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
 		mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
 		mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
@@ -1699,18 +1701,20 @@ repeat:
 		MD_BUG();
 		mddev->events --;
 	}
-	sync_sbs(mddev, nospares);
 
 	/*
 	 * do not write anything to disk if using
 	 * nonpersistent superblocks
 	 */
 	if (!mddev->persistent) {
-		clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+		if (!mddev->external)
+			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+
 		spin_unlock_irq(&mddev->write_lock);
 		wake_up(&mddev->sb_wait);
 		return;
 	}
+	sync_sbs(mddev, nospares);
 	spin_unlock_irq(&mddev->write_lock);
 
 	dprintk(KERN_INFO 
@@ -2430,6 +2434,8 @@ array_state_show(mddev_t *mddev, char *p
 		case 0:
 			if (mddev->in_sync)
 				st = clean;
+			else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
+				st = write_pending;
 			else if (mddev->safemode)
 				st = active_idle;
 			else
@@ -2460,11 +2466,9 @@ array_state_store(mddev_t *mddev, const 
 		break;
 	case clear:
 		/* stopping an active array */
-		if (mddev->pers) {
-			if (atomic_read(&mddev->active) > 1)
-				return -EBUSY;
-			err = do_md_stop(mddev, 0);
-		}
+		if (atomic_read(&mddev->active) > 1)
+			return -EBUSY;
+		err = do_md_stop(mddev, 0);
 		break;
 	case inactive:
 		/* stopping an active array */
@@ -2472,7 +2476,8 @@ array_state_store(mddev_t *mddev, const 
 			if (atomic_read(&mddev->active) > 1)
 				return -EBUSY;
 			err = do_md_stop(mddev, 2);
-		}
+		} else
+			err = 0; /* already inactive */
 		break;
 	case suspended:
 		break; /* not supported yet */
@@ -2500,9 +2505,15 @@ array_state_store(mddev_t *mddev, const 
 			restart_array(mddev);
 			spin_lock_irq(&mddev->write_lock);
 			if (atomic_read(&mddev->writes_pending) == 0) {
-				mddev->in_sync = 1;
-				set_bit(MD_CHANGE_CLEAN, &mddev->flags);
-			}
+				if (mddev->in_sync == 0) {
+					mddev->in_sync = 1;
+					if (mddev->persistent)
+						set_bit(MD_CHANGE_CLEAN,
+							&mddev->flags);
+				}
+				err = 0;
+			} else
+				err = -EBUSY;
 			spin_unlock_irq(&mddev->write_lock);
 		} else {
 			mddev->ro = 0;
@@ -2513,7 +2524,8 @@ array_state_store(mddev_t *mddev, const 
 	case active:
 		if (mddev->pers) {
 			restart_array(mddev);
-			clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
+			if (mddev->external)
+				clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
 			wake_up(&mddev->sb_wait);
 			err = 0;
 		} else {
@@ -2664,7 +2676,9 @@ __ATTR(component_size, S_IRUGO|S_IWUSR, 
 
 
 /* Metdata version.
- * This is either 'none' for arrays with externally managed metadata,
+ * This is one of
+ *   'none' for arrays with no metadata (good luck...)
+ *   'external' for arrays with externally managed metadata,
  * or N.M for internally known formats
  */
 static ssize_t
@@ -2673,6 +2687,8 @@ metadata_show(mddev_t *mddev, char *page
 	if (mddev->persistent)
 		return sprintf(page, "%d.%d\n",
 			       mddev->major_version, mddev->minor_version);
+	else if (mddev->external)
+		return sprintf(page, "external:%s\n", mddev->metadata_type);
 	else
 		return sprintf(page, "none\n");
 }
@@ -2687,6 +2703,21 @@ metadata_store(mddev_t *mddev, const cha
 
 	if (cmd_match(buf, "none")) {
 		mddev->persistent = 0;
+		mddev->external = 0;
+		mddev->major_version = 0;
+		mddev->minor_version = 90;
+		return len;
+	}
+	if (strncmp(buf, "external:", 9) == 0) {
+		int namelen = len-9;
+		if (namelen >= sizeof(mddev->metadata_type))
+			namelen = sizeof(mddev->metadata_type)-1;
+		strncpy(mddev->metadata_type, buf+9, namelen);
+		mddev->metadata_type[namelen] = 0;
+		if (namelen && mddev->metadata_type[namelen-1] == '\n')
+			mddev->metadata_type[--namelen] = 0;
+		mddev->persistent = 0;
+		mddev->external = 1;
 		mddev->major_version = 0;
 		mddev->minor_version = 90;
 		return len;
@@ -2703,6 +2734,7 @@ metadata_store(mddev_t *mddev, const cha
 	mddev->major_version = major;
 	mddev->minor_version = minor;
 	mddev->persistent = 1;
+	mddev->external = 0;
 	return len;
 }
 
@@ -3527,6 +3559,7 @@ static int do_md_stop(mddev_t * mddev, i
 		mddev->raid_disks = 0;
 		mddev->recovery_cp = 0;
 		mddev->reshape_position = MaxSector;
+		mddev->external = 0;
 
 	} else if (mddev->pers)
 		printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -4168,13 +4201,15 @@ static int set_array_info(mddev_t * mdde
 	else
 		mddev->recovery_cp = 0;
 	mddev->persistent    = ! info->not_persistent;
+	mddev->external	     = 0;
 
 	mddev->layout        = info->layout;
 	mddev->chunk_size    = info->chunk_size;
 
 	mddev->max_disks     = MD_SB_DISKS;
 
-	mddev->flags         = 0;
+	if (mddev->persistent)
+		mddev->flags         = 0;
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 
 	mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
@@ -4985,7 +5020,10 @@ static int md_seq_show(struct seq_file *
 					   mddev->major_version,
 					   mddev->minor_version);
 			}
-		} else
+		} else if (mddev->external)
+			seq_printf(seq, " super external:%s",
+				   mddev->metadata_type);
+		else
 			seq_printf(seq, " super non-persistent");
 
 		if (mddev->pers) {
@@ -5591,7 +5629,7 @@ void md_check_recovery(mddev_t *mddev)
 	}
 
 	if ( ! (
-		mddev->flags ||
+		(mddev->flags && !mddev->external) ||
 		test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
 		test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
 		(mddev->safemode == 1) ||
@@ -5607,7 +5645,8 @@ void md_check_recovery(mddev_t *mddev)
 		if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
 		    !mddev->in_sync && mddev->recovery_cp == MaxSector) {
 			mddev->in_sync = 1;
-			set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+			if (mddev->persistent)
+				set_bit(MD_CHANGE_CLEAN, &mddev->flags);
 		}
 		if (mddev->safemode == 1)
 			mddev->safemode = 0;

diff .prev/include/linux/raid/md_k.h ./include/linux/raid/md_k.h
--- .prev/include/linux/raid/md_k.h	2007-12-14 16:07:51.000000000 +1100
+++ ./include/linux/raid/md_k.h	2007-12-14 16:07:54.000000000 +1100
@@ -130,6 +130,9 @@ struct mddev_s
 					minor_version,
 					patch_version;
 	int				persistent;
+	int 				external;	/* metadata is
+							 * managed externally */
+	char				metadata_type[17]; /* externally set*/
 	int				chunk_size;
 	time_t				ctime, utime;
 	int				level, layout;

  reply	other threads:[~2007-12-14  6:26 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-12-14  6:26 [PATCH 000 of 7] md: Introduction EXPLAIN PATCH SET HERE NeilBrown
2007-12-14  6:26 ` NeilBrown
2007-12-14  6:26 ` NeilBrown [this message]
2007-12-25 22:03   ` [PATCH 001 of 7] md: Support 'external' metadata for md arrays Andrew Morton
2007-12-14  6:26 ` [PATCH 002 of 7] md: Give userspace control over removing failed devices when external metdata in use NeilBrown
2007-12-14  6:26 ` [PATCH 003 of 7] md: Allow a maximum extent to be set for resyncing NeilBrown
2007-12-14  6:26 ` [PATCH 004 of 7] md: Allow devices to be shared between md arrays NeilBrown
2007-12-25 22:04   ` Andrew Morton
2007-12-14  6:26 ` [PATCH 005 of 7] md: Lock address when changing attributes of component devices NeilBrown
2007-12-14  6:26 ` [PATCH 006 of 7] md: Allow an md array to appear with 0 drives if it has external metadata NeilBrown
2007-12-14  6:26 ` [PATCH 007 of 7] md: Get name for block device in sysfs NeilBrown
2007-12-14  6:26   ` NeilBrown
2007-12-15 16:58   ` Kay Sievers
2007-12-16 22:43     ` Neil Brown
2007-12-17  2:10       ` Kay Sievers
2007-12-17  5:29         ` /sys/block [was: [PATCH 007 of 7] md: Get name for block device in sysfs] Michael Tokarev
2007-12-17  8:24           ` Kay Sievers
2007-12-17  8:32             ` Michael Tokarev
2007-12-17  9:13               ` Michael Tokarev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1071214062608.1815@suse.de \
    --to=neilb@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.