* [PATCH] [dm-thin] Allow userland access to metadata of a live thin provisioning pool
@ 2012-05-17 14:47 Joe Thornber
2012-05-22 21:21 ` Mike Snitzer
0 siblings, 1 reply; 3+ messages in thread
From: Joe Thornber @ 2012-05-17 14:47 UTC (permalink / raw)
To: dm-devel; +Cc: Joe Thornber
New feature.
This patch implements two new messages that can be sent to the thin
pool target allowing it to take a snapshot of the _metadata_. This,
read-only snapshot can be accessed by userland, concurrently with the
live target.
Only one metadata snapshot can be held at a time. The pool's status
line will give the block location for the current msnap.
The thin-provisioning-tools have been updated to v0.1.5. The
thin_dump program can now be used to display the msnap. eg,
thin_dump -m <msnap root> <metadata dev>
Available here: https://github.com/jthornber/thin-provisioning-tools
Now that userland can access the metadata we can do various things
that have traditionally been kernel side tasks:
i) Incremental backups.
By using metadata snapshots we can work out what blocks have
changed over time. Combined with data snapshots we can ensure
the data doesn't change while we back it up.
A short proof of concept script can be found here:
https://github.com/jthornber/thinp-test-suite/blob/master/incremental_backup_example.rb
ii) Migration of thin devices from one pool to another.
iii) Merging snapshots back into an external origin.
iv) Asyncronous replication.
---
Documentation/device-mapper/thin-provisioning.txt | 11 ++
drivers/md/dm-thin-metadata.c | 128 ++++++++++++++++++++-
drivers/md/dm-thin-metadata.h | 13 ++-
drivers/md/dm-thin.c | 40 ++++++-
4 files changed, 182 insertions(+), 10 deletions(-)
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 3370bc4..897e66d 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -287,6 +287,17 @@ iii) Messages
the current transaction id is when you change it with this
compare-and-swap message.
+ reserve_metadata_snap
+
+ Reserve a copy of the data mapping btree for use by userland.
+ This allows userland to inspect the mappings as they were when
+ this message was executed. Use the pool's status command to
+ get the root block.
+
+ release_metadata_snap
+
+ Release a previously reserved copy of the data mapping btree.
+
'thin' target
-------------
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 737d388..7e272e2 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1082,12 +1082,82 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
return 0;
}
-static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
- dm_block_t *result)
+static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
+{
+ int r, inc;
+ struct thin_disk_superblock *disk_super;
+ struct dm_block *copy, *sblock;
+ dm_block_t held_root;
+
+ /*
+ * Copy the superblock.
+ */
+ dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
+ r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION, &sb_validator, ©, &inc);
+ if (r)
+ return r;
+ BUG_ON(!inc);
+
+ held_root = dm_block_location(copy);
+ disk_super = dm_block_data(copy);
+ if (le64_to_cpu(disk_super->held_root)) {
+ DMWARN("pool already has a metadata snapshot");
+ dm_tm_dec(pmd->tm, held_root);
+ dm_tm_unlock(pmd->tm, copy);
+ pmd->need_commit = 1;
+ return -EBUSY;
+ }
+
+ /*
+ * Wipe the spacemap since we're not publishing this.
+ */
+ memset(&disk_super->data_space_map_root, 0, sizeof(disk_super->data_space_map_root));
+ memset(&disk_super->metadata_space_map_root, 0,
+ sizeof(disk_super->metadata_space_map_root));
+
+ /*
+ * Increment the data structures that need to be preserved.
+ */
+ dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
+ dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
+ dm_tm_unlock(pmd->tm, copy);
+
+ /*
+ * Write the held root into the superblock.
+ */
+ r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+ &sb_validator, &sblock);
+ if (r) {
+ dm_tm_dec(pmd->tm, held_root);
+ pmd->need_commit = 1;
+ return r;
+ }
+
+ disk_super = dm_block_data(sblock);
+ disk_super->held_root = cpu_to_le64(held_root);
+ dm_tm_unlock(pmd->tm, sblock);
+
+ pmd->need_commit = 1;
+ return 0;
+}
+
+int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
+{
+ int r;
+
+ down_write(&pmd->root_lock);
+ r = __reserve_metadata_snap(pmd);
+ up_write(&pmd->root_lock);
+
+ return r;
+}
+
+static int __release_metadata_snap(struct dm_pool_metadata *pmd)
{
int r;
struct thin_disk_superblock *disk_super;
- struct dm_block *sblock;
+ struct dm_block *sblock, *copy;
+ dm_block_t held_root;
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &sblock);
@@ -1095,18 +1165,64 @@ static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
return r;
disk_super = dm_block_data(sblock);
+ held_root = le64_to_cpu(disk_super->held_root);
+ disk_super->held_root = cpu_to_le64(0);
+ pmd->need_commit = 1;
+
+ dm_tm_unlock(pmd->tm, sblock);
+
+ if (!held_root) {
+ DMWARN("pool has no metadata snap");
+ return -EINVAL;
+ }
+
+ r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, ©);
+ if (r)
+ return r;
+
+ disk_super = dm_block_data(copy);
+ dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
+ dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
+ dm_sm_dec_block(pmd->metadata_sm, held_root);
+ return dm_tm_unlock(pmd->tm, copy);
+}
+
+int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
+{
+ int r;
+
+ down_write(&pmd->root_lock);
+ r = __release_metadata_snap(pmd);
+ up_write(&pmd->root_lock);
+
+ return r;
+}
+
+static int __get_metadata_snap(struct dm_pool_metadata *pmd,
+ dm_block_t *result)
+{
+ int r;
+ struct thin_disk_superblock *disk_super;
+ struct dm_block *sblock;
+
+ r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+ &sb_validator, &sblock);
+ if (r)
+ return r;
+
+ disk_super = dm_block_data(sblock);
*result = le64_to_cpu(disk_super->held_root);
return dm_bm_unlock(sblock);
}
-int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
- dm_block_t *result)
+int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
+ dm_block_t *result)
{
int r;
down_read(&pmd->root_lock);
- r = __get_held_metadata_root(pmd, result);
+ r = __get_metadata_snap(pmd, result);
up_read(&pmd->root_lock);
return r;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index ed4725e..b88918c 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -90,11 +90,18 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
/*
* Hold/get root for userspace transaction.
+ *
+ * The metadata snapshot is a copy of the current superblock (minus the
+ * space maps). Userland can access the data structures for READ
+ * operations only. A small performance hit is incurred by providing this
+ * copy of the metadata to userland due to extra copy-on-write operations
+ * on the metadata nodes. Release this as soon as you finish with it.
*/
-int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd);
+int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd);
+int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd);
-int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
- dm_block_t *result);
+int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
+ dm_block_t *result);
/*
* Actions on a single virtual device.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 2fd87b5..1645529 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2274,6 +2274,36 @@ static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct po
return 0;
}
+static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
+{
+ int r;
+
+ r = check_arg_count(argc, 1);
+ if (r)
+ return r;
+
+ r = dm_pool_reserve_metadata_snap(pool->pmd);
+ if (r)
+ DMWARN("metadata snap request failed");
+
+ return r;
+}
+
+static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
+{
+ int r;
+
+ r = check_arg_count(argc, 1);
+ if (r)
+ return r;
+
+ r = dm_pool_release_metadata_snap(pool->pmd);
+ if (r)
+ DMWARN("release metadata snap request failed");
+
+ return r;
+}
+
/*
* Messages supported:
* create_thin <dev_id>
@@ -2281,6 +2311,8 @@ static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct po
* delete <dev_id>
* trim <dev_id> <new_size_in_sectors>
* set_transaction_id <current_trans_id> <new_trans_id>
+ * reserve_metadata_snap
+ * release_metadata_snap
*/
static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
{
@@ -2300,6 +2332,12 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
else if (!strcasecmp(argv[0], "set_transaction_id"))
r = process_set_transaction_id_mesg(argc, argv, pool);
+ else if (!strcasecmp(argv[0], "reserve_metadata_snap"))
+ r = process_reserve_metadata_snap_mesg(argc, argv, pool);
+
+ else if (!strcasecmp(argv[0], "release_metadata_snap"))
+ r = process_release_metadata_snap_mesg(argc, argv, pool);
+
else
DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
@@ -2359,7 +2397,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
if (r)
return r;
- r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
+ r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
if (r)
return r;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: Allow userland access to metadata of a live thin provisioning pool
2012-05-17 14:47 [PATCH] [dm-thin] Allow userland access to metadata of a live thin provisioning pool Joe Thornber
@ 2012-05-22 21:21 ` Mike Snitzer
2012-05-23 18:07 ` Joe Thornber
0 siblings, 1 reply; 3+ messages in thread
From: Mike Snitzer @ 2012-05-22 21:21 UTC (permalink / raw)
To: device-mapper development; +Cc: Joe Thornber, Alasdair G. Kergon
On Thu, May 17 2012 at 10:47am -0400,
Joe Thornber <ejt@redhat.com> wrote:
> New feature.
>
> This patch implements two new messages that can be sent to the thin
> pool target allowing it to take a snapshot of the _metadata_. This,
> read-only snapshot can be accessed by userland, concurrently with the
> live target.
>
> Only one metadata snapshot can be held at a time. The pool's status
> line will give the block location for the current msnap.
>
> The thin-provisioning-tools have been updated to v0.1.5. The
> thin_dump program can now be used to display the msnap. eg,
>
> thin_dump -m <msnap root> <metadata dev>
>
> Available here: https://github.com/jthornber/thin-provisioning-tools
>
> Now that userland can access the metadata we can do various things
> that have traditionally been kernel side tasks:
>
> i) Incremental backups.
>
> By using metadata snapshots we can work out what blocks have
> changed over time. Combined with data snapshots we can ensure
> the data doesn't change while we back it up.
>
> A short proof of concept script can be found here:
>
> https://github.com/jthornber/thinp-test-suite/blob/master/incremental_backup_example.rb
>
> ii) Migration of thin devices from one pool to another.
>
> iii) Merging snapshots back into an external origin.
>
> iv) Asyncronous replication.
Hey Joe,
You forgot your Signed-off-by.
I reviewed the patch and had a few suggestions; please see the following
small incremental patch (Alasdair please feel free to fold these nits
into Joe's original patch).
The switch from dm_tm_unlock() to dm_bm_unlock() in 2 places was done
because the block was locked with the block-manager (rather than a tm
wrapper).
There are still remnants of "held root" but I didn't feel that strongly
to change the superblock's variable name, clean up the header comment,
etc.
Acked-by: Mike Snitzer <snitzer@redhat.com>
---
Documentation/device-mapper/thin-provisioning.txt | 2 +-
drivers/md/dm-thin-metadata.c | 10 ++++++----
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 897e66da..f5cfc62 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -292,7 +292,7 @@ iii) Messages
Reserve a copy of the data mapping btree for use by userland.
This allows userland to inspect the mappings as they were when
this message was executed. Use the pool's status command to
- get the root block.
+ get the root block associated with the metadata snapshot.
release_metadata_snap
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 7e272e2..f53c6d0 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1093,7 +1093,8 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
* Copy the superblock.
*/
dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
- r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION, &sb_validator, ©, &inc);
+ r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
+ &sb_validator, ©, &inc);
if (r)
return r;
BUG_ON(!inc);
@@ -1111,7 +1112,8 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
/*
* Wipe the spacemap since we're not publishing this.
*/
- memset(&disk_super->data_space_map_root, 0, sizeof(disk_super->data_space_map_root));
+ memset(&disk_super->data_space_map_root, 0,
+ sizeof(disk_super->data_space_map_root));
memset(&disk_super->metadata_space_map_root, 0,
sizeof(disk_super->metadata_space_map_root));
@@ -1135,7 +1137,7 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
disk_super = dm_block_data(sblock);
disk_super->held_root = cpu_to_le64(held_root);
- dm_tm_unlock(pmd->tm, sblock);
+ dm_bm_unlock(sblock);
pmd->need_commit = 1;
return 0;
@@ -1169,7 +1171,7 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd)
disk_super->held_root = cpu_to_le64(0);
pmd->need_commit = 1;
- dm_tm_unlock(pmd->tm, sblock);
+ dm_bm_unlock(sblock);
if (!held_root) {
DMWARN("pool has no metadata snap");
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: Allow userland access to metadata of a live thin provisioning pool
2012-05-22 21:21 ` Mike Snitzer
@ 2012-05-23 18:07 ` Joe Thornber
0 siblings, 0 replies; 3+ messages in thread
From: Joe Thornber @ 2012-05-23 18:07 UTC (permalink / raw)
To: device-mapper development; +Cc: Joe Thornber, Alasdair G. Kergon
On Tue, May 22, 2012 at 05:21:38PM -0400, Mike Snitzer wrote:
> I reviewed the patch and had a few suggestions; please see the following
> small incremental patch (Alasdair please feel free to fold these nits
> into Joe's original patch).
Yep, those changes are all good. Thanks.
- Joe
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-05-23 18:07 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-17 14:47 [PATCH] [dm-thin] Allow userland access to metadata of a live thin provisioning pool Joe Thornber
2012-05-22 21:21 ` Mike Snitzer
2012-05-23 18:07 ` Joe Thornber
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).