All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 00/12] snapshot-merge target
@ 2009-11-17 16:12 Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 01/12] dm exception store: add snapshot-merge specific methods Mike Snitzer
                   ` (13 more replies)
  0 siblings, 14 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

snapshot-merge target, quilt tree is maintained here:
http://people.redhat.com/msnitzer/patches/snapshot-merge/kernel/2.6.33/

For LVM2 support please see:
http://people.redhat.com/msnitzer/patches/snapshot-merge/lvm2/LVM2-2.02.55/

This patch series builds on Alasdair's latest 'editing' quilt tree;
but also depends on "dm snapshot: rework writing to snapshot origin",
see: http://patchwork.kernel.org/patch/60691/

Mike Snitzer (2):
  dm exception store: snapshot-merge usage accounting
  dm snapshot: merge a linear region of chunks using one large IO

Mikulas Patocka (10):
  dm exception store: add snapshot-merge specific methods
  dm snapshot: add snapshot-merge target
  dm snapshot: merge target should not allocate new exceptions
  dm snapshot: do not allow more than one merging snapshot.
  dm snapshot: the merge procedure
  dm snapshot: queue writes to an area that is actively being merged
  dm snapshot: do not merge a chunk until active writes to it finish
  dm snapshot: make exceptions in other snapshots when merging
  dm snapshot: make exceptions if merge is dispatching to origin
  dm snapshot: redirect accesses to origin if merging snap invalidated

 drivers/md/dm-exception-store.h |   27 +++
 drivers/md/dm-snap-persistent.c |   91 +++++++++-
 drivers/md/dm-snap.c            |  409 +++++++++++++++++++++++++++++++++++++--
 3 files changed, 509 insertions(+), 18 deletions(-)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 01/12] dm exception store: add snapshot-merge specific methods
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 02/12] dm exception store: snapshot-merge usage accounting Mike Snitzer
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

prepare_merge: returns the last chunk in the variables passed by reference.
	The return value is the number of consecutive chunks.
commit_merge: permanently removes 'n' chunks from the exception store.
	'n' is less or equal that the number returned by prepare_merge.

If the caller wishes, it can do the optimization of merging several consecutive
chunks at once. If it doesn't want to do this optimization, it just calls
commit_merge with n == 1.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-exception-store.h |   16 +++++++++
 drivers/md/dm-snap-persistent.c |   70 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index bb88746..534427f 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -75,6 +75,22 @@ struct dm_exception_store_type {
 				  void *callback_context);
 
 	/*
+	 * Returns the last chunk in the pointers. (TODO: -ENOPARSE)
+	 * > 0:  the number of consecutive chunks that can
+	 *       be copied in one shot.
+	 * == 0: the exception store is empty.
+	 * < 0:  error.
+	 */
+	int (*prepare_merge) (struct dm_exception_store *store,
+			      chunk_t *old_chunk, chunk_t *new_chunk);
+
+	/*
+	 * Clear the last n exceptions.
+	 * n must be <= the value returned by prepare_merge.
+	 */
+	int (*commit_merge) (struct dm_exception_store *store, int n);
+
+	/*
 	 * The snapshot is invalid, note this in the metadata.
 	 */
 	void (*drop_snapshot) (struct dm_exception_store *store);
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 157999e..1f5752e 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -409,6 +409,15 @@ static void write_exception(struct pstore *ps,
 	e->new_chunk = cpu_to_le64(de->new_chunk);
 }
 
+static void clear_exception(struct pstore *ps, uint32_t index)
+{
+	struct disk_exception *e = get_exception(ps, index);
+
+	/* clear it */
+	e->old_chunk = 0;
+	e->new_chunk = 0;
+}
+
 /*
  * Registers the exceptions that are present in the current area.
  * 'full' is filled in to indicate if the area has been
@@ -680,6 +689,63 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 	ps->callback_count = 0;
 }
 
+static int persistent_prepare_merge(struct dm_exception_store *store,
+				    chunk_t *old_chunk, chunk_t *new_chunk)
+{
+	int r, i;
+	struct pstore *ps = get_info(store);
+	struct disk_exception de;
+
+	if (!ps->current_committed) {
+		if (!ps->current_area)
+			return 0;
+		ps->current_area--;
+		r = area_io(ps, READ);
+		if (r < 0)
+			return r;
+		ps->current_committed = ps->exceptions_per_area;
+	}
+
+	read_exception(ps, ps->current_committed - 1, &de);
+	*old_chunk = de.old_chunk;
+	*new_chunk = de.new_chunk;
+
+	for (i = 1; i < ps->current_committed; i++) {
+		read_exception(ps, ps->current_committed - 1 - i, &de);
+		if (de.old_chunk != *old_chunk - i ||
+		    de.new_chunk != *new_chunk - i)
+			break;
+	}
+
+	return i;
+}
+
+static int persistent_commit_merge(struct dm_exception_store *store, int n)
+{
+	int r, i;
+	struct pstore *ps = get_info(store);
+
+	BUG_ON(n > ps->current_committed);
+
+	for (i = 0; i < n; i++)
+		clear_exception(ps, ps->current_committed - 1 - i);
+
+	r = area_io(ps, WRITE);
+	if (r < 0)
+		return r;
+
+	ps->current_committed -= i;
+
+	/*
+	 * ps->next_free cannot really be reliably decreased here (because of
+	 * misordered chunks), so don't do it. We don't even need it, because
+	 * there is no situation where merging snapshot would become
+	 * non-merging.
+	 */
+
+	return 0;
+}
+
 static void persistent_drop_snapshot(struct dm_exception_store *store)
 {
 	struct pstore *ps = get_info(store);
@@ -748,6 +814,8 @@ static struct dm_exception_store_type _persistent_type = {
 	.read_metadata = persistent_read_metadata,
 	.prepare_exception = persistent_prepare_exception,
 	.commit_exception = persistent_commit_exception,
+	.prepare_merge = persistent_prepare_merge,
+	.commit_merge = persistent_commit_merge,
 	.drop_snapshot = persistent_drop_snapshot,
 	.usage = persistent_usage,
 	.status = persistent_status,
@@ -761,6 +829,8 @@ static struct dm_exception_store_type _persistent_compat_type = {
 	.read_metadata = persistent_read_metadata,
 	.prepare_exception = persistent_prepare_exception,
 	.commit_exception = persistent_commit_exception,
+	.prepare_merge = persistent_prepare_merge,
+	.commit_merge = persistent_commit_merge,
 	.drop_snapshot = persistent_drop_snapshot,
 	.usage = persistent_usage,
 	.status = persistent_status,
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 02/12] dm exception store: snapshot-merge usage accounting
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 01/12] dm exception store: add snapshot-merge specific methods Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 03/12] dm snapshot: add snapshot-merge target Mike Snitzer
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

Conditionally adjust snapshot usage accounting if snapshot-merge is in
progress.  Care is taken to preserve the established kernel<->userspace
interface.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-snap-persistent.c |   21 ++++++++++++++++++++-
 1 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 1f5752e..3cb609b 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -91,6 +91,7 @@ struct pstore {
 	struct dm_exception_store *store;
 	int version;
 	int valid;
+	int merging;	/* 1 if there is merging going on */
 	uint32_t exceptions_per_area;
 
 	/*
@@ -506,7 +507,20 @@ static void persistent_usage(struct dm_exception_store *store,
 {
 	struct pstore *ps = get_info(store);
 
-	*sectors_allocated = ps->next_free * store->chunk_size;
+	/*
+	 * Must maintain the fact that DM reports all metadata chunks
+	 * in 'sectors_allocated'
+	 * - preserves the established kernel<->userspace interface
+	 * - snapshot-merge must account for the first two metadata
+	 *   chunks in its 'sectors_allocated'
+	 */
+	if (!ps->merging) {
+		*sectors_allocated = ps->next_free * store->chunk_size;
+	} else {
+		*sectors_allocated =
+			(area_location(ps, ps->current_area) - 1 +
+			 ps->current_committed + 2) * store->chunk_size;
+	}
 	*total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
 
 	/*
@@ -608,6 +622,8 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
 	chunk_t next_free;
 	sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
 
+	ps->merging = 0;
+
 	/* Is there enough room ? */
 	if (size < ((ps->next_free + 1) * store->chunk_size))
 		return -ENOSPC;
@@ -696,6 +712,8 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
 	struct pstore *ps = get_info(store);
 	struct disk_exception de;
 
+	ps->merging = 1;
+
 	if (!ps->current_committed) {
 		if (!ps->current_area)
 			return 0;
@@ -767,6 +785,7 @@ static int persistent_ctr(struct dm_exception_store *store,
 
 	ps->store = store;
 	ps->valid = 1;
+	ps->merging = 0;
 	ps->version = SNAPSHOT_DISK_VERSION;
 	ps->area = NULL;
 	ps->zero_area = NULL;
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 03/12] dm snapshot: add snapshot-merge target
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 01/12] dm exception store: add snapshot-merge specific methods Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 02/12] dm exception store: snapshot-merge usage accounting Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 04/12] dm snapshot: merge target should not allocate new exceptions Mike Snitzer
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

For starters, snapshot-merge is identical to the snapshot target.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   48 ++++++++++++++++++++++++++++++++++++------------
 1 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 11af0a9..51bc74f 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1647,6 +1647,21 @@ static struct target_type snapshot_target = {
 	.iterate_devices = snapshot_iterate_devices,
 };
 
+static struct target_type merge_target = {
+	.name    = "snapshot-merge",
+	.version = {1, 9, 0},
+	.module  = THIS_MODULE,
+	.ctr     = snapshot_ctr,
+	.dtr     = snapshot_dtr,
+	.map     = snapshot_map,
+	.end_io  = snapshot_end_io,
+	.postsuspend = snapshot_postsuspend,
+	.preresume  = snapshot_preresume,
+	.resume  = snapshot_resume,
+	.status  = snapshot_status,
+	.iterate_devices = snapshot_iterate_devices,
+};
+
 static int __init dm_snapshot_init(void)
 {
 	int r;
@@ -1658,7 +1673,7 @@ static int __init dm_snapshot_init(void)
 	}
 
 	r = dm_register_target(&snapshot_target);
-	if (r) {
+	if (r < 0) {
 		DMERR("snapshot target register failed %d", r);
 		goto bad_register_snapshot_target;
 	}
@@ -1666,34 +1681,40 @@ static int __init dm_snapshot_init(void)
 	r = dm_register_target(&origin_target);
 	if (r < 0) {
 		DMERR("Origin target register failed %d", r);
-		goto bad1;
+		goto bad_register_origin_target;
+	}
+
+	r = dm_register_target(&merge_target);
+	if (r < 0) {
+		DMERR("Merge target register failed %d", r);
+		goto bad_register_merge_target;
 	}
 
 	r = init_origin_hash();
 	if (r) {
 		DMERR("init_origin_hash failed.");
-		goto bad2;
+		goto bad_origin_hash;
 	}
 
 	exception_cache = KMEM_CACHE(dm_exception, 0);
 	if (!exception_cache) {
 		DMERR("Couldn't create exception cache.");
 		r = -ENOMEM;
-		goto bad3;
+		goto bad_exception_cache;
 	}
 
 	pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
 	if (!pending_cache) {
 		DMERR("Couldn't create pending cache.");
 		r = -ENOMEM;
-		goto bad4;
+		goto bad_pending_cache;
 	}
 
 	tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
 	if (!tracked_chunk_cache) {
 		DMERR("Couldn't create cache to track chunks in use.");
 		r = -ENOMEM;
-		goto bad5;
+		goto bad_tracked_chunk_cache;
 	}
 
 	ksnapd = create_singlethread_workqueue("ksnapd");
@@ -1707,19 +1728,21 @@ static int __init dm_snapshot_init(void)
 
 bad_pending_pool:
 	kmem_cache_destroy(tracked_chunk_cache);
-bad5:
+bad_tracked_chunk_cache:
 	kmem_cache_destroy(pending_cache);
-bad4:
+bad_pending_cache:
 	kmem_cache_destroy(exception_cache);
-bad3:
+bad_exception_cache:
 	exit_origin_hash();
-bad2:
+bad_origin_hash:
+	dm_unregister_target(&merge_target);
+bad_register_merge_target:
 	dm_unregister_target(&origin_target);
-bad1:
+bad_register_origin_target:
 	dm_unregister_target(&snapshot_target);
-
 bad_register_snapshot_target:
 	dm_exception_store_exit();
+
 	return r;
 }
 
@@ -1729,6 +1752,7 @@ static void __exit dm_snapshot_exit(void)
 
 	dm_unregister_target(&snapshot_target);
 	dm_unregister_target(&origin_target);
+	dm_unregister_target(&merge_target);
 
 	exit_origin_hash();
 	kmem_cache_destroy(pending_cache);
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 04/12] dm snapshot: merge target should not allocate new exceptions
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (2 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 03/12] dm snapshot: add snapshot-merge target Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 05/12] dm snapshot: do not allow more than one merging snapshot Mike Snitzer
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

Make new method, snapshot_merge_map, that won't allocate exceptions.
Modify __origin_write so that it doesn't allocate exceptions in
merging snapshots.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   46 +++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 45 insertions(+), 1 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 51bc74f..5fd7055 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -143,6 +143,11 @@ static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
 	return lhs == rhs;
 }
 
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+			       union map_info *map_context);
+
+#define is_merge(ti)	((ti)->type->map == snapshot_merge_map)
+
 struct dm_snap_pending_exception {
 	struct dm_exception e;
 
@@ -1256,6 +1261,40 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 	return r;
 }
 
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+			       union map_info *map_context)
+{
+	struct dm_exception *e;
+	struct dm_snapshot *s = ti->private;
+	int r = DM_MAPIO_REMAPPED;
+	chunk_t chunk;
+
+	chunk = sector_to_chunk(s->store, bio->bi_sector);
+
+	down_read(&s->lock);
+
+	/* Full snapshots are not usable */
+	if (!s->valid) {
+		r = -EIO;
+		goto out_unlock;
+	}
+
+	/* If the block is already remapped - use that */
+	e = dm_lookup_exception(&s->complete, chunk);
+	if (e) {
+		remap_exception(s, e, bio, chunk);
+		goto out_unlock;
+	}
+
+	bio->bi_bdev = s->origin->bdev;
+
+ out_unlock:
+	up_read(&s->lock);
+
+	return r;
+}
+
+
 static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
 			   int error, union map_info *map_context)
 {
@@ -1425,6 +1464,11 @@ static int __origin_write(struct list_head *snapshots,
 	/* Do all the snapshots on this origin */
 	list_for_each_entry (snap, snapshots, list) {
 
+		/* If the snapshot is merging, don't make new exceptions in it
+		   - but in this case no one should be writing to the origin */
+		if (is_merge(snap->ti))
+			continue;
+
 		down_write(&snap->lock);
 
 		/* Only deal with valid and active snapshots */
@@ -1653,7 +1697,7 @@ static struct target_type merge_target = {
 	.module  = THIS_MODULE,
 	.ctr     = snapshot_ctr,
 	.dtr     = snapshot_dtr,
-	.map     = snapshot_map,
+	.map     = snapshot_merge_map,
 	.end_io  = snapshot_end_io,
 	.postsuspend = snapshot_postsuspend,
 	.preresume  = snapshot_preresume,
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 05/12] dm snapshot: do not allow more than one merging snapshot.
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (3 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 04/12] dm snapshot: merge target should not allocate new exceptions Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 06/12] dm snapshot: the merge procedure Mike Snitzer
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

It is pointless to merge two snapshots simultaneously.
Allowing multiple merging snapshots would complicate things later.
Adds function to find merging snapshot for a given origin device.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   33 +++++++++++++++++++++++++++++++++
 1 files changed, 33 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5fd7055..2a9fae9 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -561,6 +561,30 @@ out:
 	return handover_snap;
 }
 
+static struct dm_snapshot *find_merging_snapshot(struct block_device *origin)
+{
+	struct dm_snapshot *s, *merging_snap = NULL;
+	struct origin *o;
+
+	down_read(&_origins_lock);
+
+	o = __lookup_origin(origin);
+	if (!o)
+		goto out;
+
+	list_for_each_entry(s, &o->snapshots, list) {
+		if (is_merge(s->ti)) {
+			merging_snap = s;
+			break;
+		}
+	}
+
+out:
+	up_read(&_origins_lock);
+
+	return merging_snap;
+}
+
 #define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
 
 /*
@@ -732,6 +756,15 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	/* Does snapshot need exceptions handed over to it? */
 	handover_snap = find_snapshot_using_cow(s);
 	if (handover_snap) {
+		if (is_merge(s->ti)) {
+			r = -EINVAL;
+			/* Do not allow more than one merging snapshot */
+			if (find_merging_snapshot(s->origin->bdev)) {
+				ti->error = "A snapshot is already merging.";
+				goto bad_load_and_register;
+			}
+		}
+
 		down_write(&handover_snap->lock);
 		if (handover_snap->is_handover_source) {
 			up_write(&handover_snap->lock);
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 06/12] dm snapshot: the merge procedure
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (4 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 05/12] dm snapshot: do not allow more than one merging snapshot Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 07/12] dm snapshot: queue writes to an area that is actively being merged Mike Snitzer
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

We don't need a separate thread, kcopyd does the job just fine
(provided that we have private kcopyd).

Merging is started when origin is resumed and it is stopped when
origin is suspended or when the merging snapshot is destoyed.

Merging is not interlocked with writes, so there is a race condition
with concurrent access. It will be fixed in further patches.

Adds a supporting function to decrement consecutive chunk counter.
Care is taken to increment the exception's old_chunk and new_chunk,
prior to the dm_consecutive_chunk_count_dec() call, if the chunk is at
the start of an exception's consecutive chunk range.  This allows for
snapshot-merge to support chunks that are added to the 'complete'
exception hash table before existing chunks.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-exception-store.h |   11 +++
 drivers/md/dm-snap.c            |  178 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 183 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 534427f..7b83002 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -153,6 +153,13 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
 	BUG_ON(!dm_consecutive_chunk_count(e));
 }
 
+static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
+{
+	BUG_ON(!dm_consecutive_chunk_count(e));
+
+	e->new_chunk -= (1ULL << DM_CHUNK_NUMBER_BITS);
+}
+
 #  else
 #    define DM_CHUNK_CONSECUTIVE_BITS 0
 
@@ -170,6 +177,10 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
 {
 }
 
+static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
+{
+}
+
 #  endif
 
 /*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 2a9fae9..f9aac21 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -117,6 +117,13 @@ struct dm_snapshot {
 	mempool_t *tracked_chunk_pool;
 	spinlock_t tracked_chunk_lock;
 	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+
+	/* Merge operation is in progress */
+	int merge_running;
+
+	/* It is requested to shut down merging */
+	/* Cleared back to 0 when the merging is stopped */
+	int merge_shutdown;
 };
 
 struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
@@ -655,6 +662,123 @@ static int init_hash_tables(struct dm_snapshot *s)
 	return 0;
 }
 
+static void merge_callback(int read_err, unsigned long write_err,
+			   void *context);
+
+static void snapshot_merge_process(struct dm_snapshot *s)
+{
+	int r;
+	chunk_t old_chunk, new_chunk;
+	struct dm_exception *e;
+	struct dm_io_region src, dest;
+
+	BUG_ON(!s->merge_running);
+	if (s->merge_shutdown)
+		goto shut;
+
+	if (!s->valid) {
+		DMERR("snapshot is invalid, can't merge");
+		goto shut;
+	}
+
+	r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk);
+	if (r <= 0) {
+		if (r < 0)
+			DMERR("Read error in exception store, "
+			      "shutting down merge");
+		goto shut;
+	}
+
+	/* TODO: use larger I/O size once we verify that kcopyd handles it */
+
+	/* !!! FIXME: intelock writes to this chunk */
+	down_write(&s->lock);
+	e = dm_lookup_exception(&s->complete, old_chunk);
+	if (!e) {
+		DMERR("exception for block %llu is on disk but not in memory",
+		      (unsigned long long)old_chunk);
+		up_write(&s->lock);
+		goto shut;
+	}
+	if (dm_consecutive_chunk_count(e)) {
+		if (old_chunk == e->old_chunk) {
+			e->old_chunk++;
+			e->new_chunk++;
+		} else if (old_chunk != e->old_chunk +
+			   dm_consecutive_chunk_count(e)) {
+			DMERR("merge from the middle of a chunk range");
+			up_write(&s->lock);
+			goto shut;
+		}
+		dm_consecutive_chunk_count_dec(e);
+	} else {
+		dm_remove_exception(e);
+		free_completed_exception(e);
+	}
+	up_write(&s->lock);
+
+	dest.bdev = s->origin->bdev;
+	dest.sector = chunk_to_sector(s->store, old_chunk);
+	dest.count = min((sector_t)s->store->chunk_size,
+			 get_dev_size(dest.bdev) - dest.sector);
+
+	src.bdev = s->cow->bdev;
+	src.sector = chunk_to_sector(s->store, new_chunk);
+	src.count = dest.count;
+
+	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
+	return;
+
+shut:
+	s->merge_running = 0;
+}
+
+static void merge_callback(int read_err, unsigned long write_err, void *context)
+{
+	int r;
+	struct dm_snapshot *s = context;
+
+	if (read_err || write_err) {
+		if (read_err)
+			DMERR("Read error in data, shutting down merge");
+		else
+			DMERR("Write error in data, shutting down merge");
+		goto shut;
+	}
+
+	r = s->store->type->commit_merge(s->store, 1);
+	if (r < 0) {
+		DMERR("Write error in exception store, shutting down merge");
+		goto shut;
+	}
+
+	snapshot_merge_process(s);
+	return;
+
+shut:
+	s->merge_running = 0;
+}
+
+static void start_merge(struct dm_snapshot *s)
+{
+	if (!s->merge_running && !s->merge_shutdown) {
+		s->merge_running = 1;
+		snapshot_merge_process(s);
+	}
+}
+
+/*
+ * Stop the merging process and wait until it finishes.
+ */
+static void stop_merge(struct dm_snapshot *s)
+{
+	while (s->merge_running) {
+		s->merge_shutdown = 1;
+		msleep(1);
+	}
+	s->merge_shutdown = 0;
+}
+
 /*
  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
  */
@@ -720,6 +844,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	init_rwsem(&s->lock);
 	INIT_LIST_HEAD(&s->list);
 	spin_lock_init(&s->pe_lock);
+	s->merge_running = 0;
+	s->merge_shutdown = 0;
 
 	/* Allocate hash table for COW data */
 	if (init_hash_tables(s)) {
@@ -763,6 +889,13 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 				ti->error = "A snapshot is already merging.";
 				goto bad_load_and_register;
 			}
+
+			if (!handover_snap->store->type->prepare_merge ||
+			    !handover_snap->store->type->commit_merge) {
+				ti->error = "Merging snapshot store must "
+					    "support snapshot-merge";
+				goto bad_load_and_register;
+			}
 		}
 
 		down_write(&handover_snap->lock);
@@ -916,6 +1049,9 @@ static void snapshot_dtr(struct dm_target *ti)
 	}
 
 normal_snapshot:
+	if (is_merge(ti))
+		stop_merge(s);
+
 	/* Prevent further origin writes from using this snapshot. */
 	/* After this returns there can be no new kcopyd jobs. */
 	unregister_snapshot(s);
@@ -1340,6 +1476,13 @@ static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
 	return 0;
 }
 
+static void snapshot_merge_presuspend(struct dm_target *ti)
+{
+	struct dm_snapshot *s = ti->private;
+
+	stop_merge(s);
+}
+
 static void snapshot_postsuspend(struct dm_target *ti)
 {
 	struct dm_snapshot *s = ti->private;
@@ -1419,6 +1562,32 @@ static void snapshot_resume(struct dm_target *ti)
 	up_write(&s->lock);
 }
 
+static chunk_t get_origin_minimum_chunksize(struct block_device *bdev)
+{
+	chunk_t min_chunksize;
+
+	down_read(&_origins_lock);
+
+	min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
+
+	up_read(&_origins_lock);
+
+	return min_chunksize;
+}
+
+static void snapshot_merge_resume(struct dm_target *ti)
+{
+	struct dm_snapshot *s = ti->private;
+
+	snapshot_resume(ti);
+	/*
+	 * snapshot-merge can take on the role of the origin too
+	 * - must adjust snapshot-merge's ti->split_io accordingly
+	 */
+	ti->split_io = get_origin_minimum_chunksize(s->origin->bdev);
+	start_merge(s);
+}
+
 static int snapshot_status(struct dm_target *ti, status_type_t type,
 			   char *result, unsigned int maxlen)
 {
@@ -1664,11 +1833,7 @@ static void origin_resume(struct dm_target *ti)
 {
 	struct dm_dev *dev = ti->private;
 
-	down_read(&_origins_lock);
-
-	ti->split_io = __minimum_chunk_size(__lookup_origin(dev->bdev));
-
-	up_read(&_origins_lock);
+	ti->split_io = get_origin_minimum_chunksize(dev->bdev);
 }
 
 static int origin_status(struct dm_target *ti, status_type_t type, char *result,
@@ -1732,9 +1897,10 @@ static struct target_type merge_target = {
 	.dtr     = snapshot_dtr,
 	.map     = snapshot_merge_map,
 	.end_io  = snapshot_end_io,
+	.presuspend = snapshot_merge_presuspend,
 	.postsuspend = snapshot_postsuspend,
 	.preresume  = snapshot_preresume,
-	.resume  = snapshot_resume,
+	.resume  = snapshot_merge_resume,
 	.status  = snapshot_status,
 	.iterate_devices = snapshot_iterate_devices,
 };
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 07/12] dm snapshot: queue writes to an area that is actively being merged
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (5 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 06/12] dm snapshot: the merge procedure Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 08/12] dm snapshot: do not merge a chunk until active writes to it finish Mike Snitzer
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

New variables merge_write_interlock and merge_write_interlock_n
determine the chunk number (on the origin device) and number of chunks
that are being merged.  Writes to this area are held on the queue
merge_write_list.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |  119 +++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 88 insertions(+), 31 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f9aac21..e37fc93 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -124,6 +124,16 @@ struct dm_snapshot {
 	/* It is requested to shut down merging */
 	/* Cleared back to 0 when the merging is stopped */
 	int merge_shutdown;
+
+	/* Merging this area --- block any writes */
+	chunk_t merge_write_interlock;
+	int merge_write_interlock_n;
+
+	/*
+	 * A list of requests that were delayed because
+	 * of racing with merge
+	 */
+	struct bio_list merge_write_list;
 };
 
 struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
@@ -662,6 +672,9 @@ static int init_hash_tables(struct dm_snapshot *s)
 	return 0;
 }
 
+static void flush_bios(struct bio *bio);
+static void error_bios(struct bio *bio);
+
 static void merge_callback(int read_err, unsigned long write_err,
 			   void *context);
 
@@ -669,7 +682,6 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 {
 	int r;
 	chunk_t old_chunk, new_chunk;
-	struct dm_exception *e;
 	struct dm_io_region src, dest;
 
 	BUG_ON(!s->merge_running);
@@ -691,32 +703,6 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 
 	/* TODO: use larger I/O size once we verify that kcopyd handles it */
 
-	/* !!! FIXME: intelock writes to this chunk */
-	down_write(&s->lock);
-	e = dm_lookup_exception(&s->complete, old_chunk);
-	if (!e) {
-		DMERR("exception for block %llu is on disk but not in memory",
-		      (unsigned long long)old_chunk);
-		up_write(&s->lock);
-		goto shut;
-	}
-	if (dm_consecutive_chunk_count(e)) {
-		if (old_chunk == e->old_chunk) {
-			e->old_chunk++;
-			e->new_chunk++;
-		} else if (old_chunk != e->old_chunk +
-			   dm_consecutive_chunk_count(e)) {
-			DMERR("merge from the middle of a chunk range");
-			up_write(&s->lock);
-			goto shut;
-		}
-		dm_consecutive_chunk_count_dec(e);
-	} else {
-		dm_remove_exception(e);
-		free_completed_exception(e);
-	}
-	up_write(&s->lock);
-
 	dest.bdev = s->origin->bdev;
 	dest.sector = chunk_to_sector(s->store, old_chunk);
 	dest.count = min((sector_t)s->store->chunk_size,
@@ -726,6 +712,13 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 	src.sector = chunk_to_sector(s->store, new_chunk);
 	src.count = dest.count;
 
+	down_write(&s->lock);
+	s->merge_write_interlock = old_chunk;
+	s->merge_write_interlock_n = 1;
+	up_write(&s->lock);
+
+	/* !!! FIXME: wait until writes to this chunk drain */
+
 	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
 	return;
 
@@ -733,10 +726,25 @@ shut:
 	s->merge_running = 0;
 }
 
+/* This function drops s->lock */
+static inline void release_write_interlock(struct dm_snapshot *s, int err)
+{
+	struct bio *b;
+	s->merge_write_interlock = 0;
+	s->merge_write_interlock_n = 0;
+	b = bio_list_get(&s->merge_write_list);
+	up_write(&s->lock);
+	if (!err)
+		flush_bios(b);
+	else
+		error_bios(b);
+}
+
 static void merge_callback(int read_err, unsigned long write_err, void *context)
 {
-	int r;
+	int r, i;
 	struct dm_snapshot *s = context;
+	struct dm_exception *e;
 
 	if (read_err || write_err) {
 		if (read_err)
@@ -746,16 +754,51 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
 		goto shut;
 	}
 
-	r = s->store->type->commit_merge(s->store, 1);
+	r = s->store->type->commit_merge(s->store, s->merge_write_interlock_n);
 	if (r < 0) {
 		DMERR("Write error in exception store, shutting down merge");
 		goto shut;
 	}
 
+	down_write(&s->lock);
+	/*
+	 * Must process chunks (and associated exceptions) in reverse
+	 * so that dm_consecutive_chunk_count_dec() accounting works
+	 */
+	for (i = s->merge_write_interlock_n - 1; i >= 0; i--) {
+		chunk_t old_chunk = s->merge_write_interlock + i;
+		e = dm_lookup_exception(&s->complete, old_chunk);
+		if (!e) {
+			DMERR("exception for block %llu is on "
+			      "disk but not in memory",
+			      (unsigned long long)old_chunk);
+			up_write(&s->lock);
+			goto shut;
+		}
+		if (dm_consecutive_chunk_count(e)) {
+			if (old_chunk == e->old_chunk) {
+				e->old_chunk++;
+				e->new_chunk++;
+			} else if (old_chunk != e->old_chunk +
+				   dm_consecutive_chunk_count(e)) {
+				DMERR("merge from the middle of a chunk range");
+				up_write(&s->lock);
+				goto shut;
+			}
+			dm_consecutive_chunk_count_dec(e);
+		} else {
+			dm_remove_exception(e);
+			free_completed_exception(e);
+		}
+	}
+	release_write_interlock(s, 0);
+
 	snapshot_merge_process(s);
 	return;
 
 shut:
+	down_write(&s->lock);
+	release_write_interlock(s, 1);
 	s->merge_running = 0;
 }
 
@@ -846,6 +889,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	spin_lock_init(&s->pe_lock);
 	s->merge_running = 0;
 	s->merge_shutdown = 0;
+	s->merge_write_interlock = 0;
+	s->merge_write_interlock_n = 0;
+	bio_list_init(&s->merge_write_list);
 
 	/* Allocate hash table for COW data */
 	if (init_hash_tables(s)) {
@@ -1440,7 +1486,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 
 	chunk = sector_to_chunk(s->store, bio->bi_sector);
 
-	down_read(&s->lock);
+	down_write(&s->lock);
 
 	/* Full snapshots are not usable */
 	if (!s->valid) {
@@ -1451,6 +1497,17 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 	/* If the block is already remapped - use that */
 	e = dm_lookup_exception(&s->complete, chunk);
 	if (e) {
+		/* We are copying this area --- so don't write to it */
+		if (bio_rw(bio) == WRITE &&
+		    chunk >= s->merge_write_interlock &&
+		    chunk < (s->merge_write_interlock +
+			     s->merge_write_interlock_n)) {
+			bio->bi_bdev = s->origin->bdev;
+			bio_list_add(&s->merge_write_list, bio);
+
+			r = DM_MAPIO_SUBMITTED;
+			goto out_unlock;
+		}
 		remap_exception(s, e, bio, chunk);
 		goto out_unlock;
 	}
@@ -1458,7 +1515,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 	bio->bi_bdev = s->origin->bdev;
 
  out_unlock:
-	up_read(&s->lock);
+	up_write(&s->lock);
 
 	return r;
 }
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 08/12] dm snapshot: do not merge a chunk until active writes to it finish
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (6 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 07/12] dm snapshot: queue writes to an area that is actively being merged Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 09/12] dm snapshot: make exceptions in other snapshots when merging Mike Snitzer
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

Track in-progress writes on the merging snapshot device and delay
merging the chunk until all writes to that chunk finish.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e37fc93..7e7b7ef 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -717,7 +717,8 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 	s->merge_write_interlock_n = 1;
 	up_write(&s->lock);
 
-	/* !!! FIXME: wait until writes to this chunk drain */
+	while (__chunk_is_tracked(s, old_chunk))
+		msleep(1);
 
 	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
 	return;
@@ -1508,7 +1509,11 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 			r = DM_MAPIO_SUBMITTED;
 			goto out_unlock;
 		}
+
 		remap_exception(s, e, bio, chunk);
+
+		if (bio_rw(bio) == WRITE)
+			map_context->ptr = track_chunk(s, chunk);
 		goto out_unlock;
 	}
 
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 09/12] dm snapshot: make exceptions in other snapshots when merging
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (7 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 08/12] dm snapshot: do not merge a chunk until active writes to it finish Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 10/12] dm snapshot: make exceptions if merge is dispatching to origin Mike Snitzer
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

When there is one merging snapshot and other non-merging snapshots,
make exceptions in non-merging snapshots while merging.

This is not yet complete (the exception is not made when remapping
directly to the origin), another patch fixes that.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   31 +++++++++++++++++++++++++++++++
 1 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7e7b7ef..2ded8ee 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -271,6 +271,8 @@ struct origin {
 static struct list_head *_origins;
 static struct rw_semaphore _origins_lock;
 
+static DECLARE_WAIT_QUEUE_HEAD(_pending_exception_done);
+
 static int init_origin_hash(void)
 {
 	int i;
@@ -675,6 +677,9 @@ static int init_hash_tables(struct dm_snapshot *s)
 static void flush_bios(struct bio *bio);
 static void error_bios(struct bio *bio);
 
+static int __origin_write(struct list_head *snapshots,
+			  sector_t sector, struct bio *bio);
+
 static void merge_callback(int read_err, unsigned long write_err,
 			   void *context);
 
@@ -682,6 +687,9 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 {
 	int r;
 	chunk_t old_chunk, new_chunk;
+	struct origin *o;
+	chunk_t min_chunksize;
+	int must_wait;
 	struct dm_io_region src, dest;
 
 	BUG_ON(!s->merge_running);
@@ -712,6 +720,27 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 	src.sector = chunk_to_sector(s->store, new_chunk);
 	src.count = dest.count;
 
+test_again:
+	/* Reallocate other snapshots */
+	down_read(&_origins_lock);
+	o = __lookup_origin(s->origin->bdev);
+	must_wait = 0;
+	min_chunksize = __minimum_chunk_size(o);
+	if (min_chunksize) {
+		chunk_t n;
+		for (n = 0; n < s->store->chunk_size; n += min_chunksize) {
+			r = __origin_write(&o->snapshots, dest.sector + n,
+					   NULL);
+			if (r == DM_MAPIO_SUBMITTED)
+				must_wait = 1;
+		}
+	}
+	up_read(&_origins_lock);
+	if (must_wait) {
+		sleep_on_timeout(&_pending_exception_done, HZ / 100 + 1);
+		goto test_again;
+	}
+
 	down_write(&s->lock);
 	s->merge_write_interlock = old_chunk;
 	s->merge_write_interlock_n = 1;
@@ -1266,6 +1295,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 	origin_bios = bio_list_get(&pe->origin_bios);
 	free_pending_exception(pe);
 
+	wake_up_all(&_pending_exception_done);
+
 	up_write(&s->lock);
 
 	/* Submit any pending write bios */
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 10/12] dm snapshot: make exceptions if merge is dispatching to origin
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (8 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 09/12] dm snapshot: make exceptions in other snapshots when merging Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 11/12] dm snapshot: redirect accesses to origin if merging snap invalidated Mike Snitzer
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

If write request to merging snapshot device is to be dispatched
directly to the origin (because the chunk is not remapped or was
already merged), we must make exceptions in other snapshots.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 2ded8ee..5b2d0f4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1550,6 +1550,11 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 
 	bio->bi_bdev = s->origin->bdev;
 
+	if (bio_rw(bio) == WRITE) {
+		up_write(&s->lock);
+		return do_origin(s->origin, bio);
+	}
+
  out_unlock:
 	up_write(&s->lock);
 
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 11/12] dm snapshot: redirect accesses to origin if merging snap invalidated
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (9 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 10/12] dm snapshot: make exceptions if merge is dispatching to origin Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:12 ` [PATCH v3 12/12] dm snapshot: merge a linear region of chunks using one large IO Mike Snitzer
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

If we are merging invalidated snapshot, redirect all accesses to the
origin device.

Userspace code will remove the invalidated snapshot after a few
seconds, but during this period, the origin device must be active (it
may for example contain root filesystem and having unbootable computer
is not desirable).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |    9 ++++-----
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5b2d0f4..3d214f4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1520,11 +1520,9 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 
 	down_write(&s->lock);
 
-	/* Full snapshots are not usable */
-	if (!s->valid) {
-		r = -EIO;
-		goto out_unlock;
-	}
+	/* Full merging snapshots are redirected to the origin */
+	if (!s->valid)
+		goto redirect_to_origin;
 
 	/* If the block is already remapped - use that */
 	e = dm_lookup_exception(&s->complete, chunk);
@@ -1548,6 +1546,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 		goto out_unlock;
 	}
 
+ redirect_to_origin:
 	bio->bi_bdev = s->origin->bdev;
 
 	if (bio_rw(bio) == WRITE) {
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 12/12] dm snapshot: merge a linear region of chunks using one large IO
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (10 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 11/12] dm snapshot: redirect accesses to origin if merging snap invalidated Mike Snitzer
@ 2009-11-17 16:12 ` Mike Snitzer
  2009-11-17 16:20   ` Mike Snitzer
  2009-11-17 20:35 ` [PATCH v3 13/12] dm snapshot: avoid __minimum_chunk_size() during merge Mike Snitzer
  2009-11-19 11:25 ` [PATCH v3 00/12] snapshot-merge target Andi Kleen
  13 siblings, 1 reply; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:12 UTC (permalink / raw)
  To: dm-devel

s->store->type->prepare_merge returns the number of chunks that can be
linearly copied starting from the returned chunk number backward. (but
the caller is allowed to copy less, and the caller puts the number of
copied chunks to s->store->type->commit_merge)

I.e. if returned chunk numbers are old_chunk == 10 and new_chunk == 20
and returned value is 3, then chunk 20 can be copied to 10, chunk 19 to
9 and 18 to 8.

s->merge_write_interlock_n has now been allowed to be increased up to
the full range of chunks returned from s->store->type->prepare_merge.
Until now kcopyd was only ever allowed to copy one chunk at a time;
as a result snapshot-merge performance was extremely slow.

Also, snapshot_merge_process() needs to delay the merging of _all_
chunks that have in-progress writes; not just the first chunk in the
region that is to be merged.

snapshot-merge performance is now very respectible.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-snap.c |   35 +++++++++++++++++++++++------------
 1 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 3d214f4..34d8c3f 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -685,12 +685,13 @@ static void merge_callback(int read_err, unsigned long write_err,
 
 static void snapshot_merge_process(struct dm_snapshot *s)
 {
-	int r;
+	int r, i, linear_chunks;
 	chunk_t old_chunk, new_chunk;
 	struct origin *o;
 	chunk_t min_chunksize;
 	int must_wait;
 	struct dm_io_region src, dest;
+	sector_t io_size;
 
 	BUG_ON(!s->merge_running);
 	if (s->merge_shutdown)
@@ -701,34 +702,41 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 		goto shut;
 	}
 
-	r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk);
-	if (r <= 0) {
-		if (r < 0)
+	linear_chunks = s->store->type->prepare_merge(s->store,
+						      &old_chunk, &new_chunk);
+	if (linear_chunks <= 0) {
+		if (linear_chunks < 0)
 			DMERR("Read error in exception store, "
 			      "shutting down merge");
 		goto shut;
 	}
+	/* Adjust old_chunk and new_chunk to reflect start of linear region */
+	old_chunk = old_chunk + 1 - linear_chunks;
+	new_chunk = new_chunk + 1 - linear_chunks;
 
-	/* TODO: use larger I/O size once we verify that kcopyd handles it */
+	/*
+	 * Use one (potentially large) I/O to copy all 'linear_chunks'
+	 * from the exception store to the origin
+	 */
+	io_size = linear_chunks * s->store->chunk_size;
 
 	dest.bdev = s->origin->bdev;
 	dest.sector = chunk_to_sector(s->store, old_chunk);
-	dest.count = min((sector_t)s->store->chunk_size,
-			 get_dev_size(dest.bdev) - dest.sector);
+	dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
 
 	src.bdev = s->cow->bdev;
 	src.sector = chunk_to_sector(s->store, new_chunk);
 	src.count = dest.count;
 
 test_again:
-	/* Reallocate other snapshots */
+	/* Reallocate other snapshots; must account for all 'linear_chunks' */
 	down_read(&_origins_lock);
 	o = __lookup_origin(s->origin->bdev);
 	must_wait = 0;
 	min_chunksize = __minimum_chunk_size(o);
 	if (min_chunksize) {
 		chunk_t n;
-		for (n = 0; n < s->store->chunk_size; n += min_chunksize) {
+		for (n = 0; n < io_size; n += min_chunksize) {
 			r = __origin_write(&o->snapshots, dest.sector + n,
 					   NULL);
 			if (r == DM_MAPIO_SUBMITTED)
@@ -743,11 +751,14 @@ test_again:
 
 	down_write(&s->lock);
 	s->merge_write_interlock = old_chunk;
-	s->merge_write_interlock_n = 1;
+	s->merge_write_interlock_n = linear_chunks;
 	up_write(&s->lock);
 
-	while (__chunk_is_tracked(s, old_chunk))
-		msleep(1);
+	/* Wait until writes to all 'linear_chunks' drain */
+	for (i = 0; i < linear_chunks; i++) {
+		while (__chunk_is_tracked(s, old_chunk + i))
+			msleep(1);
+	}
 
 	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
 	return;
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 12/12] dm snapshot: merge a linear region of chunks using one large IO
  2009-11-17 16:12 ` [PATCH v3 12/12] dm snapshot: merge a linear region of chunks using one large IO Mike Snitzer
@ 2009-11-17 16:20   ` Mike Snitzer
  0 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 16:20 UTC (permalink / raw)
  To: dm-devel

On Tue, Nov 17 2009 at 11:12am -0500,
Mike Snitzer <snitzer@redhat.com> wrote:

> s->store->type->prepare_merge returns the number of chunks that can be
> linearly copied starting from the returned chunk number backward. (but
> the caller is allowed to copy less, and the caller puts the number of
> copied chunks to s->store->type->commit_merge)
> 
> I.e. if returned chunk numbers are old_chunk == 10 and new_chunk == 20
> and returned value is 3, then chunk 20 can be copied to 10, chunk 19 to
> 9 and 18 to 8.
> 
> s->merge_write_interlock_n has now been allowed to be increased up to
> the full range of chunks returned from s->store->type->prepare_merge.
> Until now kcopyd was only ever allowed to copy one chunk at a time;
> as a result snapshot-merge performance was extremely slow.
> 
> Also, snapshot_merge_process() needs to delay the merging of _all_
> chunks that have in-progress writes; not just the first chunk in the
> region that is to be merged.
> 
> snapshot-merge performance is now very respectible.

s/respectible/respectable/

Also, I removed the following from the patch header but it helps show
how "respectable" snapshot-merge performance is with this patch:

Here are performance results from some mkfs-based testing:

# lvcreate -n testlv -L 32G test
# lvcreate -n testlv_snap -s -L 7G test/testlv

# time mkfs.ext3 /dev/test/testlv
...
real    1m7.827s
user    0m0.116s
sys     0m11.017s

# lvs
 LV          VG   Attr   LSize  Origin Snap%  Move Log Copy%  Convert
 testlv      test owi-a- 32.00G
 testlv_snap test swi-a-  7.00G testlv   9.05

before:
-------
# time lvconvert --merge test/testlv_snap
 Merging of volume testlv_snap started.
 ...
 Merge into logical volume testlv finished.
 Logical volume "snapshot1" successfully removed

real    22m33.100s
user    0m0.045s
sys     0m0.711s


after:
------
# time lvconvert --merge test/testlv_snap
 Merging of volume testlv_snap started.
 testlv: Merged: 6.4%
 testlv: Merged: 3.5%
 testlv: Merged: 0.9%
 testlv: Merged: 0.0%
 Merge into logical volume testlv finished.
 Logical volume "snapshot1" successfully removed

real    1m0.881s
user    0m0.015s
sys     0m0.560s

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 13/12] dm snapshot: avoid __minimum_chunk_size() during merge
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (11 preceding siblings ...)
  2009-11-17 16:12 ` [PATCH v3 12/12] dm snapshot: merge a linear region of chunks using one large IO Mike Snitzer
@ 2009-11-17 20:35 ` Mike Snitzer
  2009-11-19 11:25 ` [PATCH v3 00/12] snapshot-merge target Andi Kleen
  13 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-17 20:35 UTC (permalink / raw)
  To: dm-devel

The merging snapshot already has the origin's __minimum_chunk_size()
stored in the target's 'split_io'.  Avoid rediscovering it every call to
snapshot_merge_process().

Signed-off-by: Mike Snitzer <snitzer@redhat.com>

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 34d8c3f..f91ee08 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -730,20 +730,25 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 
 test_again:
 	/* Reallocate other snapshots; must account for all 'linear_chunks' */
-	down_read(&_origins_lock);
-	o = __lookup_origin(s->origin->bdev);
 	must_wait = 0;
-	min_chunksize = __minimum_chunk_size(o);
+	/*
+	 * Merging snapshot already has the origin's __minimum_chunk_size()
+	 * stored in split_io (see: snapshot_merge_resume); avoid rediscovery
+	 */
+	min_chunksize = s->ti->split_io;
 	if (min_chunksize) {
 		chunk_t n;
+
+		down_read(&_origins_lock);
+		o = __lookup_origin(s->origin->bdev);
 		for (n = 0; n < io_size; n += min_chunksize) {
 			r = __origin_write(&o->snapshots, dest.sector + n,
 					   NULL);
 			if (r == DM_MAPIO_SUBMITTED)
 				must_wait = 1;
 		}
+		up_read(&_origins_lock);
 	}
-	up_read(&_origins_lock);
 	if (must_wait) {
 		sleep_on_timeout(&_pending_exception_done, HZ / 100 + 1);
 		goto test_again;

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 00/12] snapshot-merge target
  2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
                   ` (12 preceding siblings ...)
  2009-11-17 20:35 ` [PATCH v3 13/12] dm snapshot: avoid __minimum_chunk_size() during merge Mike Snitzer
@ 2009-11-19 11:25 ` Andi Kleen
  2009-11-19 14:02   ` Mike Snitzer
  13 siblings, 1 reply; 20+ messages in thread
From: Andi Kleen @ 2009-11-19 11:25 UTC (permalink / raw)
  To: device-mapper development; +Cc: snitzer

Mike Snitzer <snitzer@redhat.com> writes:

> snapshot-merge target, quilt tree is maintained here:
> http://people.redhat.com/msnitzer/patches/snapshot-merge/kernel/2.6.33/

It might seem trivial to you, but afaik nothing in the whole series
(nor in the comments in the code) says what snapshot merging
actually is. Please write meaningful changelogs and comments.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 00/12] snapshot-merge target
  2009-11-19 11:25 ` [PATCH v3 00/12] snapshot-merge target Andi Kleen
@ 2009-11-19 14:02   ` Mike Snitzer
  2009-11-19 14:53     ` Alasdair G Kergon
  2009-11-19 16:08     ` Andi Kleen
  0 siblings, 2 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-19 14:02 UTC (permalink / raw)
  To: device-mapper development; +Cc: Andi Kleen

On Thu, Nov 19 2009 at  6:25am -0500,
Andi Kleen <andi@firstfloor.org> wrote:

> Mike Snitzer <snitzer@redhat.com> writes:
> 
> > snapshot-merge target, quilt tree is maintained here:
> > http://people.redhat.com/msnitzer/patches/snapshot-merge/kernel/2.6.33/
> 
> It might seem trivial to you, but afaik nothing in the whole series
> (nor in the comments in the code) says what snapshot merging
> actually is. Please write meaningful changelogs and comments.

OK, I hadn't noticed because I was so focused on the details.

DM (and LVM) snapshot merging allows a snapshot (its COW store) to be
merged back into the snapshot's origin device.

One expected use of snapshot merging is rollback of a root filesystem
after system upgrades (e.g.: yum update).

Mike

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Re: [PATCH v3 00/12] snapshot-merge target
  2009-11-19 14:02   ` Mike Snitzer
@ 2009-11-19 14:53     ` Alasdair G Kergon
  2009-11-19 17:45       ` Mike Snitzer
  2009-11-19 16:08     ` Andi Kleen
  1 sibling, 1 reply; 20+ messages in thread
From: Alasdair G Kergon @ 2009-11-19 14:53 UTC (permalink / raw)
  To: device-mapper development; +Cc: Andi Kleen

On Thu, Nov 19, 2009 at 09:02:33AM -0500, Mike Snitzer wrote:
> DM (and LVM) snapshot merging allows a snapshot (its COW store) to be
> merged back into the snapshot's origin device.
> One expected use of snapshot merging is rollback of a root filesystem
> after system upgrades (e.g.: yum update).

Has the patch to 'Documentation/device-mapper' been written yet?
(It won't go upstream until we have that of course.  Same for the
replicator and any other new dm targets.)
 
Alasdair

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 00/12] snapshot-merge target
  2009-11-19 14:02   ` Mike Snitzer
  2009-11-19 14:53     ` Alasdair G Kergon
@ 2009-11-19 16:08     ` Andi Kleen
  1 sibling, 0 replies; 20+ messages in thread
From: Andi Kleen @ 2009-11-19 16:08 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: device-mapper development, Andi Kleen

> OK, I hadn't noticed because I was so focused on the details.
> 
> DM (and LVM) snapshot merging allows a snapshot (its COW store) to be
> merged back into the snapshot's origin device.
> 
> One expected use of snapshot merging is rollback of a root filesystem
> after system upgrades (e.g.: yum update).

Thanks. Can you put that into the changelogs (at least for the main
feature) and into a overview comment somewhere in the code?

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 00/12] snapshot-merge target
  2009-11-19 14:53     ` Alasdair G Kergon
@ 2009-11-19 17:45       ` Mike Snitzer
  0 siblings, 0 replies; 20+ messages in thread
From: Mike Snitzer @ 2009-11-19 17:45 UTC (permalink / raw)
  To: Alasdair G Kergon; +Cc: device-mapper development, Andi Kleen

On Thu, Nov 19 2009 at  9:53am -0500,
Alasdair G Kergon <agk@redhat.com> wrote:

> On Thu, Nov 19, 2009 at 09:02:33AM -0500, Mike Snitzer wrote:
> > DM (and LVM) snapshot merging allows a snapshot (its COW store) to be
> > merged back into the snapshot's origin device.
> > One expected use of snapshot merging is rollback of a root filesystem
> > after system upgrades (e.g.: yum update).
> 
> Has the patch to 'Documentation/device-mapper' been written yet?
> (It won't go upstream until we have that of course.  Same for the
> replicator and any other new dm targets.)

Nope, I'll work on that (and adding comments like Andi suggested).  I'll
include these changes into the v4 snapshot-merge patchset submission
that I'll be getting out today.  The v4 patchset will be based on v9 of
the handover patch that we worked on (which I just emailed to dm-devel).

Mike

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2009-11-19 17:45 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-17 16:12 [PATCH v3 00/12] snapshot-merge target Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 01/12] dm exception store: add snapshot-merge specific methods Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 02/12] dm exception store: snapshot-merge usage accounting Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 03/12] dm snapshot: add snapshot-merge target Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 04/12] dm snapshot: merge target should not allocate new exceptions Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 05/12] dm snapshot: do not allow more than one merging snapshot Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 06/12] dm snapshot: the merge procedure Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 07/12] dm snapshot: queue writes to an area that is actively being merged Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 08/12] dm snapshot: do not merge a chunk until active writes to it finish Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 09/12] dm snapshot: make exceptions in other snapshots when merging Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 10/12] dm snapshot: make exceptions if merge is dispatching to origin Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 11/12] dm snapshot: redirect accesses to origin if merging snap invalidated Mike Snitzer
2009-11-17 16:12 ` [PATCH v3 12/12] dm snapshot: merge a linear region of chunks using one large IO Mike Snitzer
2009-11-17 16:20   ` Mike Snitzer
2009-11-17 20:35 ` [PATCH v3 13/12] dm snapshot: avoid __minimum_chunk_size() during merge Mike Snitzer
2009-11-19 11:25 ` [PATCH v3 00/12] snapshot-merge target Andi Kleen
2009-11-19 14:02   ` Mike Snitzer
2009-11-19 14:53     ` Alasdair G Kergon
2009-11-19 17:45       ` Mike Snitzer
2009-11-19 16:08     ` Andi Kleen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.