All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joe Thornber <ejt@redhat.com>
To: dm-devel@redhat.com
Cc: Joe Thornber <ejt@redhat.com>
Subject: [PATCH 6/8] [persistent-data] Add a transactional array.
Date: Thu, 13 Dec 2012 20:19:14 +0000	[thread overview]
Message-ID: <1355429956-22785-7-git-send-email-ejt@redhat.com> (raw)
In-Reply-To: <1355429956-22785-1-git-send-email-ejt@redhat.com>

---
 drivers/md/persistent-data/Makefile   |    1 +
 drivers/md/persistent-data/dm-array.c |  818 +++++++++++++++++++++++++++++++++
 drivers/md/persistent-data/dm-array.h |  128 ++++++
 3 files changed, 947 insertions(+)
 create mode 100644 drivers/md/persistent-data/dm-array.c
 create mode 100644 drivers/md/persistent-data/dm-array.h

diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile
index d8e7cb7..ebd8d80 100644
--- a/drivers/md/persistent-data/Makefile
+++ b/drivers/md/persistent-data/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o
 dm-persistent-data-objs := \
+	dm-array.o \
 	dm-block-manager.o \
 	dm-space-map-common.o \
 	dm-space-map-disk.o \
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
new file mode 100644
index 0000000..d762caf
--- /dev/null
+++ b/drivers/md/persistent-data/dm-array.c
@@ -0,0 +1,818 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-array.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "array"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The array is implemented as a fully populated btree, which points to
+ * blocks that contain the packed values.  This is more space efficient
+ * than just using a btree since we don't store 1 key per value.
+ */
+struct array_block {
+	__le32 csum;
+	__le32 max_entries;
+	__le32 nr_entries;
+	__le32 value_size;
+	__le64 blocknr; /* Block this node is supposed to live in. */
+} __packed;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Validator methods.  As usual we calculate a checksum, and also write the
+ * block location into the header (paranoia about ssds remapping areas by
+ * mistake).
+ */
+#define CSUM_XOR 595846735
+
+static void array_block_prepare_for_write(struct dm_block_validator *v,
+					  struct dm_block *b,
+					  size_t block_size)
+{
+	struct array_block *bh_le = dm_block_data(b);
+
+	bh_le->blocknr = cpu_to_le64(dm_block_location(b));
+	bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
+						 block_size - sizeof(__le32),
+						 CSUM_XOR));
+}
+
+static int array_block_check(struct dm_block_validator *v,
+			     struct dm_block *b,
+			     size_t block_size)
+{
+	struct array_block *bh_le = dm_block_data(b);
+	__le32 csum_disk;
+
+	if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
+		DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
+			    le64_to_cpu(bh_le->blocknr), dm_block_location(b));
+		return -ENOTBLK;
+	}
+
+	csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
+					       block_size - sizeof(__le32),
+					       CSUM_XOR));
+	if (csum_disk != bh_le->csum) {
+		DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
+			    le32_to_cpu(csum_disk), le32_to_cpu(bh_le->csum));
+		return -EILSEQ;
+	}
+
+	return 0;
+}
+
+static struct dm_block_validator array_validator = {
+	.name = "array",
+	.prepare_for_write = array_block_prepare_for_write,
+	.check = array_block_check
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Functions for manipulating the array blocks.
+ */
+
+/*
+ * Returns a pointer to a value within an array block.
+ *
+ * index - The index into _this_ specific block.
+ */
+static void *element_at(struct dm_array_info *info, struct array_block *ab,
+			unsigned index)
+{
+	unsigned char *entry = (unsigned char *) (ab + 1);
+	entry += index * info->value_type.size;
+	return entry;
+}
+
+/*
+ * Utility function that calls one of the value_type methods on every value
+ * in an array block.
+ */
+static void on_entries(struct dm_array_info *info, struct array_block *ab,
+		       void (*fn)(void *, const void *))
+{
+	unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
+
+	for (i = 0; i < nr_entries; i++)
+		fn(info->value_type.context, element_at(info, ab, i));
+}
+
+/*
+ * Increment every value in an array block.
+ */
+static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
+{
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	if (vt->inc)
+		on_entries(info, ab, vt->inc);
+}
+
+/*
+ * Decrement every value in an array block.
+ */
+static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
+{
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	if (vt->dec)
+		on_entries(info, ab, vt->dec);
+}
+
+/*
+ * Each array block can hold this many values.
+ */
+static uint32_t calc_max_entries(size_t value_size, size_t block_size)
+{
+	return (block_size - sizeof(struct array_block)) / value_size;
+}
+
+/*
+ * Allocate a new array block.  The caller will need to unlock block.
+ */
+static int alloc_ablock(struct dm_array_info *info, size_t block_size,
+			struct dm_block **block, struct array_block **ab)
+{
+	int r;
+
+	r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
+	if (r)
+		return r;
+
+	(*ab) = dm_block_data(*block);
+	(*ab)->max_entries =
+		cpu_to_le32(calc_max_entries(info->value_type.size, block_size));
+	(*ab)->nr_entries = cpu_to_le32(0);
+	(*ab)->value_size = cpu_to_le32(info->value_type.size);
+
+	return 0;
+}
+
+/*
+ * Pad an array block out with a particular value.  Every instance will
+ * cause an increment of the value_type.  new_nr must always be more than
+ * the current number of entries.
+ */
+static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
+			const void *value, unsigned new_nr)
+{
+	unsigned i;
+	uint32_t nr_entries;
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+	BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
+
+	nr_entries = le32_to_cpu(ab->nr_entries);
+	for (i = nr_entries; i < new_nr; i++) {
+		if (vt->inc)
+			vt->inc(vt->context, value);
+		memcpy(element_at(info, ab, i), value, vt->size);
+	}
+	ab->nr_entries = cpu_to_le32(new_nr);
+}
+
+/*
+ * Remove some entries from the back of an array block.  Every value
+ * removed will be decremented.  new_nr must be <= the current number of
+ * entries.
+ */
+static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
+			unsigned new_nr)
+{
+	unsigned i;
+	uint32_t nr_entries;
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+	BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
+
+	nr_entries = le32_to_cpu(ab->nr_entries);
+	for (i = nr_entries; i > new_nr; i--)
+		if (vt->dec)
+			vt->dec(vt->context, element_at(info, ab, i - 1));
+	ab->nr_entries = cpu_to_le32(new_nr);
+}
+
+/*
+ * Read locks a block, and coerces it to an array block.  The caller must
+ * unlock 'block' when finished.
+ */
+static int get_ablock(struct dm_array_info *info, dm_block_t b,
+		      struct dm_block **block, struct array_block **ab)
+{
+	int r;
+
+	r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
+	if (r)
+		return r;
+
+	*ab = dm_block_data(*block);
+	return 0;
+}
+
+/*
+ * Unlocks an array block.
+ */
+static int unlock_ablock(struct dm_array_info *info, struct dm_block *block)
+{
+	return dm_tm_unlock(info->btree_info.tm, block);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Btree manipulation.
+ */
+
+/*
+ * Looks up an array block in the btree, and then read locks it.
+ *
+ * index is the index of the index of the array_block, (ie. the array index
+ * / max_entries).
+ */
+static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
+			 unsigned index, struct dm_block **block,
+			 struct array_block **ab)
+{
+	int r;
+	uint64_t key = index;
+	__le64 block_le;
+
+	r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
+	if (r)
+		return r;
+
+	return get_ablock(info, le64_to_cpu(block_le), block, ab);
+}
+
+/*
+ * Insert an array block into the btree.  The block is _not_ unlocked.
+ */
+static int insert_ablock(struct dm_array_info *info, uint64_t index,
+			 struct dm_block *block, dm_block_t *root)
+{
+	__le64 block_le = cpu_to_le64(dm_block_location(block));
+
+	__dm_bless_for_disk(block_le);
+	return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
+}
+
+/*
+ * Looks up an array block in the btree.  Then shadows it, and updates the
+ * btree to point to this new shadow.  'root' is an input/output parameter
+ * for both the current root block, and the new one.
+ */
+static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
+			 unsigned index, struct dm_block **block,
+			 struct array_block **ab)
+{
+	int r, inc;
+	uint64_t key = index;
+	dm_block_t b;
+	__le64 block_le;
+
+	/*
+	 * lookup
+	 */
+	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
+	if (r)
+		return r;
+	b = le64_to_cpu(block_le);
+
+	/*
+	 * shadow
+	 */
+	r = dm_tm_shadow_block(info->btree_info.tm, b,
+			       &array_validator, block, &inc);
+	if (r)
+		return r;
+
+	*ab = dm_block_data(*block);
+	if (inc)
+		inc_ablock_entries(info, *ab);
+
+	/*
+	 * Reinsert.
+	 *
+	 * The shadow op will often be a noop.  Only insert if it really
+	 * copied data.
+	 */
+	if (dm_block_location(*block) != b)
+		r = insert_ablock(info, index, *block, root);
+
+	return r;
+}
+
+static int insert_full_ablocks(struct dm_array_info *info, size_t block_size,
+			       unsigned begin_block, unsigned end_block,
+			       unsigned max_entries, const void *value,
+			       dm_block_t *root)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+
+
+	while (begin_block != end_block) {
+		r = alloc_ablock(info, block_size, &block, &ab);
+		if (r)
+			return r;
+
+		fill_ablock(info, ab, value, le32_to_cpu(ab->max_entries));
+
+		r = insert_ablock(info, begin_block, block, root);
+		if (r) {
+			unlock_ablock(info, block);
+			return r;
+		}
+
+		unlock_ablock(info, block);
+		begin_block++;
+	}
+
+	return 0;
+}
+
+/*
+ * Allocate an new array block, and fill it with some values.
+ */
+static int insert_partial_ablock(struct dm_array_info *info, size_t block_size,
+				 unsigned block_index, unsigned nr,
+				 const void *value, dm_block_t *root)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+
+	if (nr == 0)
+		return 0;
+
+	r = alloc_ablock(info, block_size, &block, &ab);
+	if (r)
+		return r;
+
+	fill_ablock(info, ab, value, nr);
+	r = insert_ablock(info, block_index, block, root);
+	unlock_ablock(info, block);
+
+	return r;
+}
+
+/*
+ * There are a bunch of functions involved with resizing an array.  This
+ * structure holds information that commonly needed by them.  Purely here
+ * to reduce parameter count.
+ */
+struct resize {
+	/*
+	 * Describes the array.
+	 */
+	struct dm_array_info *info;
+
+	/*
+	 * The current root of the array.  This gets updated.
+	 */
+	dm_block_t root;
+
+	/*
+	 * Metadata block size.  Used to calculate the nr entries in an
+	 * array block.
+	 */
+	size_t block_size;
+
+	/*
+	 * Maximum nr entries in an array block.
+	 */
+	unsigned max_entries;
+
+	/*
+	 * nr of completely full blocks in the array.
+	 *
+	 * 'old' refers to before the resize, 'new' after.
+	 */
+	unsigned old_nr_full_blocks, new_nr_full_blocks;
+
+	/*
+	 * Number of entries in the final block.  0 iff only full blocks in
+	 * the array.
+	 */
+	unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block;
+
+	/*
+	 * The default value used when growing the array.
+	 */
+	const void *value;
+};
+
+/*
+ * Removes a consecutive set of array blocks from the btree.  The values
+ * in block are decremented as a side effect of the btree remove.
+ *
+ * begin_index - the index of the first array block to remove.
+ * end_index - the one-past-the-end value.  ie. this block is not removed.
+ */
+static int drop_blocks(struct resize *resize, unsigned begin_index,
+		       unsigned end_index)
+{
+	int r;
+
+	while (begin_index != end_index) {
+		uint64_t key = begin_index++;
+		r = dm_btree_remove(&resize->info->btree_info, resize->root,
+				    &key, &resize->root);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/*
+ * Calculates how many blocks are needed for the array.
+ */
+static unsigned total_nr_blocks_needed(unsigned nr_full_blocks,
+				       unsigned nr_entries_in_last_block)
+{
+	return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
+}
+
+/*
+ * Shrink an array.
+ */
+static int shrink(struct resize *resize)
+{
+	int r;
+	unsigned begin, end;
+	struct dm_block *block;
+	struct array_block *ab;
+
+	/*
+	 * Lose some blocks from the back?
+	 */
+	if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
+		begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
+					       resize->new_nr_entries_in_last_block);
+		end = total_nr_blocks_needed(resize->old_nr_full_blocks,
+					     resize->old_nr_entries_in_last_block);
+
+		r = drop_blocks(resize, begin, end);
+		if (r)
+			return r;
+	}
+
+	/*
+	 * Trim the new tail block
+	 */
+	if (resize->new_nr_entries_in_last_block) {
+		r = shadow_ablock(resize->info, &resize->root,
+				  resize->new_nr_full_blocks, &block, &ab);
+		if (r)
+			return r;
+
+		trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
+		unlock_ablock(resize->info, block);
+	}
+
+	return 0;
+}
+
+/*
+ * Grow an array.
+ */
+static int grow(struct resize *resize)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+
+	if (resize->new_nr_full_blocks > resize->old_nr_full_blocks) {
+		/*
+		 * Pad the end of the old block?
+		 */
+		if (resize->old_nr_entries_in_last_block > 0) {
+			r = shadow_ablock(resize->info, &resize->root,
+					  resize->old_nr_full_blocks, &block, &ab);
+			if (r)
+				return r;
+
+			fill_ablock(resize->info, ab, resize->value, resize->max_entries);
+			unlock_ablock(resize->info, block);
+		}
+
+		/*
+		 * Add the full blocks.
+		 */
+		r = insert_full_ablocks(resize->info, resize->block_size,
+					resize->old_nr_full_blocks,
+					resize->new_nr_full_blocks,
+					resize->max_entries, resize->value,
+					&resize->root);
+		if (r)
+			return r;
+
+		/*
+		 * Add new tail block?
+		 */
+		if (resize->new_nr_entries_in_last_block)
+			r = insert_partial_ablock(resize->info, resize->block_size,
+						  resize->new_nr_full_blocks,
+						  resize->new_nr_entries_in_last_block,
+						  resize->value, &resize->root);
+	} else {
+		if (!resize->old_nr_entries_in_last_block) {
+			r = insert_partial_ablock(resize->info, resize->block_size,
+						  resize->new_nr_full_blocks,
+						  resize->new_nr_entries_in_last_block,
+						  resize->value, &resize->root);
+		} else {
+			r = shadow_ablock(resize->info, &resize->root,
+					  resize->old_nr_full_blocks, &block, &ab);
+			if (r)
+				return r;
+
+			fill_ablock(resize->info, ab, resize->value, resize->new_nr_entries_in_last_block);
+			unlock_ablock(resize->info, block);
+		}
+	}
+
+	return r;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * These are the value_type functions for the btree elements, which point
+ * to array blocks.
+ */
+static void block_inc(void *context, const void *value)
+{
+	__le64 block_le;
+	struct dm_array_info *info = context;
+
+	memcpy(&block_le, value, sizeof(block_le));
+	dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
+}
+
+static void block_dec(void *context, const void *value)
+{
+	int r;
+	uint64_t b;
+	__le64 block_le;
+	uint32_t ref_count;
+	struct dm_block *block;
+	struct array_block *ab;
+	struct dm_array_info *info = context;
+
+	memcpy(&block_le, value, sizeof(block_le));
+	b = le64_to_cpu(block_le);
+
+	r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
+	if (r) {
+		DMERR_LIMIT("couldn't get reference count");
+		return;
+	}
+
+	if (ref_count == 1) {
+		/*
+		 * We're about to drop the last reference to this ablock.
+		 * So we need to decrement the ref count of the contents.
+		 */
+		r = get_ablock(info, b, &block, &ab);
+		if (r) {
+			DMERR_LIMIT("couldn't get array block");
+			return;
+		}
+
+		dec_ablock_entries(info, ab);
+		unlock_ablock(info, block);
+	}
+
+	dm_tm_dec(info->btree_info.tm, b);
+}
+
+static int block_equal(void *context, const void *value1, const void *value2)
+{
+	return !memcmp(value1, value2, sizeof(__le64));
+}
+
+/*----------------------------------------------------------------*/
+
+void dm_setup_array_info(struct dm_array_info *info,
+			 struct dm_transaction_manager *tm,
+			 struct dm_btree_value_type *vt)
+{
+	struct dm_btree_value_type *bvt = &info->btree_info.value_type;
+
+	memcpy(&info->value_type, vt, sizeof(info->value_type));
+	info->btree_info.tm = tm;
+	info->btree_info.levels = 1;
+
+	bvt->context = info;
+	bvt->size = sizeof(__le64);
+	bvt->inc = block_inc;
+	bvt->dec = block_dec;
+	bvt->equal = block_equal;
+}
+EXPORT_SYMBOL_GPL(dm_setup_array_info);
+
+int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
+{
+	return dm_btree_empty(&info->btree_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_array_empty);
+
+static int array_resize(struct dm_array_info *info, dm_block_t root,
+			uint32_t old_size, uint32_t new_size,
+			const void *value, dm_block_t *new_root)
+{
+	int r;
+	struct resize resize;
+
+	if (old_size == new_size)
+		return 0;
+
+	resize.info = info;
+	resize.root = root;
+	resize.block_size = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+	resize.max_entries = calc_max_entries(info->value_type.size,
+					      resize.block_size);
+
+	resize.old_nr_full_blocks = old_size / resize.max_entries;
+	resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
+	resize.new_nr_full_blocks = new_size / resize.max_entries;
+	resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
+	resize.value = value;
+
+	r = ((new_size > old_size) ? grow : shrink)(&resize);
+	if (r)
+		return r;
+
+	*new_root = resize.root;
+	return 0;
+}
+
+int dm_array_resize(struct dm_array_info *info, dm_block_t root,
+		    uint32_t old_size, uint32_t new_size,
+		    const void *value, dm_block_t *new_root)
+		    __dm_written_to_disk(value)
+{
+	int r = array_resize(info, root, old_size, new_size, value, new_root);
+	__dm_unbless_for_disk(value);
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_resize);
+
+int dm_array_del(struct dm_array_info *info, dm_block_t root)
+{
+	return dm_btree_del(&info->btree_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_array_del);
+
+int dm_array_get(struct dm_array_info *info, dm_block_t root,
+		 uint32_t index, void *value_le)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+	size_t block_size;
+	unsigned entry, max_entries;
+
+	block_size = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+	max_entries = calc_max_entries(info->value_type.size, block_size);
+
+	r = lookup_ablock(info, root, index / max_entries, &block, &ab);
+	if (r)
+		return r;
+
+	entry = index % max_entries;
+	if (entry >= le32_to_cpu(ab->nr_entries))
+		r = -ENODATA;
+	else
+		memcpy(value_le, element_at(info, ab, entry),
+		       info->value_type.size);
+
+	unlock_ablock(info, block);
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_get);
+
+static int array_set(struct dm_array_info *info, dm_block_t root,
+		     uint32_t index, const void *value, dm_block_t *new_root)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+	size_t block_size;
+	unsigned max_entries;
+	unsigned entry;
+	void *old_value;
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	block_size = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+	max_entries = calc_max_entries(info->value_type.size, block_size);
+
+	r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
+	if (r)
+		return r;
+	*new_root = root;
+
+	entry = index % max_entries;
+	if (entry >= le32_to_cpu(ab->nr_entries)) {
+		r = -ENODATA;
+		goto out;
+	}
+
+	old_value = element_at(info, ab, entry);
+	if (vt->dec &&
+	    (!vt->equal || !vt->equal(vt->context, old_value, value))) {
+		vt->dec(vt->context, old_value);
+		if (vt->inc)
+			vt->inc(vt->context, value);
+	}
+
+	memcpy(old_value, value, info->value_type.size);
+
+out:
+	unlock_ablock(info, block);
+	return r;
+}
+
+int dm_array_set(struct dm_array_info *info, dm_block_t root,
+		 uint32_t index, const void *value, dm_block_t *new_root)
+		 __dm_written_to_disk(value)
+{
+	int r;
+
+	r = array_set(info, root, index, value, new_root);
+	__dm_unbless_for_disk(value);
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_set);
+
+struct walk_info {
+	struct dm_array_info *info;
+	int (*fn)(void *context, uint64_t key, void *leaf);
+	void *context;
+};
+
+static int walk_ablock(void *context, uint64_t *keys, void *leaf)
+{
+	struct walk_info *wi = context;
+
+	int r;
+	unsigned i;
+	__le64 block_le;
+	unsigned nr_entries, max_entries;
+	struct dm_block *block;
+	struct array_block *ab;
+
+	memcpy(&block_le, leaf, sizeof(block_le));
+	r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
+	if (r)
+		return r;
+
+	max_entries = le32_to_cpu(ab->max_entries);
+	nr_entries = le32_to_cpu(ab->nr_entries);
+	for (i = 0; i < nr_entries; i++) {
+		r = wi->fn(wi->context, keys[0] * max_entries + i,
+			   element_at(wi->info, ab, i));
+
+		if (r)
+			break;
+	}
+
+	unlock_ablock(wi->info, block);
+	return r;
+}
+
+int dm_array_walk(struct dm_array_info *info, dm_block_t root,
+		  int (*fn)(void *, uint64_t key, void *leaf),
+		  void *context)
+{
+	struct walk_info wi;
+
+	wi.info = info;
+	wi.fn = fn;
+	wi.context = context;
+
+	return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
+}
+EXPORT_SYMBOL_GPL(dm_array_walk);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h
new file mode 100644
index 0000000..0fb868d
--- /dev/null
+++ b/drivers/md/persistent-data/dm-array.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#ifndef _LINUX_DM_ARRAY_H
+#define _LINUX_DM_ARRAY_H
+
+#include "dm-btree.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The dm-array is a persistent version of an array.  It packs the data
+ * more efficiently than a btree which will result in less disk space use,
+ * and a performance boost.  The get and set operations are still O(ln(n)),
+ * but with a much smaller constant.
+ *
+ * The value type structure is reused from the btree type to support proper
+ * reference counting of values.
+ *
+ * The arrays implicitly know their length, and bounds are checked for
+ * lookups and updates.  It doesn't store this in an accessible place
+ * because it would waste a whole metadata block.  Make sure you store the
+ * size along with the array root in your encompassing data.
+ */
+
+/*
+ * Describes an array.  Don't initialise this structure yourself, use the
+ * setup function below.
+ */
+struct dm_array_info {
+	struct dm_transaction_manager *tm;
+	struct dm_btree_value_type value_type;
+	struct dm_btree_info btree_info;
+};
+
+/*
+ * Sets up a dm_array_info structure.
+ *
+ * info - the structure being filled in.
+ * tm   - the transaction manager that should supervise this structure.
+ * vt   - describes the leaf values.
+ */
+void dm_setup_array_info(struct dm_array_info *info,
+			 struct dm_transaction_manager *tm,
+			 struct dm_btree_value_type *vt);
+
+/*
+ * Initialise an empty array, zero length array.
+ *
+ * info - describes the array
+ * root - on success this will be filled out with the root block
+ */
+int dm_array_empty(struct dm_array_info *info, dm_block_t *root);
+
+/*
+ * Resizes the array.
+ *
+ * info - describes the array
+ * root - the root block of the array on disk
+ * old_size - yes, the caller is responsible for remembering the size of the array
+ * new_size - can be bigger or smaller than old_size
+ * value - if we're growing the array the new entries will have this value
+ * new_root - on success, points to the new root block
+ *
+ * If growing the inc function for value will be called the appropriate
+ * number of times.  So if the caller is holding a reference they may want
+ * to drop it.
+ */
+int dm_array_resize(struct dm_array_info *info, dm_block_t root,
+		    uint32_t old_size, uint32_t new_size,
+		    const void *value, dm_block_t *new_root)
+	__dm_written_to_disk(value);
+
+/*
+ * Frees a whole array.  The value_type's decrement operation will be called
+ * for all values in the array
+ */
+int dm_array_del(struct dm_array_info *info, dm_block_t root);
+
+/*
+ * Lookup a value in the array
+ *
+ * info - describes the array
+ * root - root block of the array
+ * index - array index
+ * value - the value to be read.  Will be in on disk format of course.
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_array_get(struct dm_array_info *info, dm_block_t root,
+		 uint32_t index, void *value);
+
+/*
+ * Set an entry in the array.
+ *
+ * info - describes the array
+ * root - root block of the array
+ * index - array index
+ * value - value to be written to disk.  Make sure you bless this before
+ *         calling.
+ * new_root - the new root block
+ *
+ * The old value being overwritten will be decremented, the new value
+ * incremented.
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_array_set(struct dm_array_info *info, dm_block_t root,
+		 uint32_t index, const void *value, dm_block_t *new_root)
+	__dm_written_to_disk(value);
+
+/*
+ * Walk through all the entries in an array.
+ *
+ * info - describes the array
+ * root - root block of the array
+ * fn - called back for every element
+ * context - passed to the callback
+ */
+int dm_array_walk(struct dm_array_info *info, dm_block_t root,
+		  int (*fn)(void *, uint64_t key, void *leaf),
+		  void *context);
+
+/*----------------------------------------------------------------*/
+
+#endif
-- 
1.7.10.4

  parent reply	other threads:[~2012-12-13 20:19 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-13 20:19 Another cache target Joe Thornber
2012-12-13 20:19 ` [PATCH 1/8] [persistent-data] Fix a bug in btree_del, and another bug that was compensating for it Joe Thornber
2012-12-13 20:19 ` [PATCH 2/8] [persistent-data] dm_btree_walk Joe Thornber
2012-12-13 20:19 ` [PATCH 3/8] [persistent-data] tweak an error message Joe Thornber
2012-12-13 20:19 ` [PATCH 4/8] [dm-bio-prison] Change the bio-prison interface so the memory for the cells is passed in Joe Thornber
2013-01-14 10:02   ` Alasdair G Kergon
2013-01-14 14:06     ` thornber
2013-01-14 14:22       ` Alasdair G Kergon
2013-01-21 23:32   ` Alasdair G Kergon
2013-01-22 11:31     ` thornber
2013-01-22 12:10       ` Alasdair G Kergon
2012-12-13 20:19 ` [PATCH 5/8] [dm-thin] Fix a race condition between discard bios and ordinary bios Joe Thornber
2012-12-14 15:52   ` Mike Snitzer
2013-01-22  0:03   ` Alasdair G Kergon
2013-01-24  2:35   ` Alasdair G Kergon
2013-01-24 13:23     ` thornber
2013-02-06  0:11       ` Mikulas Patocka
2013-02-07 11:20         ` thornber
2012-12-13 20:19 ` Joe Thornber [this message]
2013-01-22 21:18   ` [PATCH 6/8] [persistent-data] Add a transactional array Alasdair G Kergon
2013-01-23 12:07     ` thornber
2013-01-25 20:11   ` Alasdair G Kergon
2013-01-28 13:06     ` thornber
2013-01-28 20:25       ` Alasdair G Kergon
2013-01-28 14:57     ` thornber
2013-01-28 20:22       ` Alasdair G Kergon
2012-12-13 20:19 ` [PATCH 7/8] [persistent-data] transactional bitset Joe Thornber
2013-01-22 21:59   ` Alasdair G Kergon
2012-12-13 20:19 ` [PATCH 8/8] [dm-cache] cache target Joe Thornber
2012-12-14  0:17   ` Darrick J. Wong
2012-12-14 10:09     ` thornber
2013-02-12 15:27   ` Alasdair G Kergon
2013-02-12 16:40     ` Alasdair G Kergon
2013-02-12 17:29       ` Alasdair G Kergon
2013-02-14 13:57       ` Joe Thornber
2013-02-14 14:05     ` Joe Thornber
2013-02-14 21:06       ` Alasdair G Kergon
2012-12-13 21:57 ` Another " Mike Snitzer
2012-12-14  1:16   ` Darrick J. Wong
2012-12-14  2:19     ` Mike Snitzer
2012-12-14  2:27       ` Mike Snitzer
2012-12-14  2:42         ` Darrick J. Wong
2012-12-14  4:23           ` Mike Snitzer
2012-12-14  2:34       ` Darrick J. Wong
2012-12-14 10:24         ` thornber
2012-12-14 12:11           ` thornber
2012-12-14 21:51             ` Darrick J. Wong
2012-12-15  8:23               ` Joe Thornber
2012-12-18  1:49                 ` Darrick J. Wong
2012-12-18  2:31                   ` Alasdair G Kergon
2013-01-08  0:19                 ` Darrick J. Wong
2013-01-08 13:55                   ` thornber
2012-12-22 18:50       ` Mark Hills
2012-12-17 16:54     ` Heinz Mauelshagen
2012-12-18 15:44       ` basic cache policy module fix [was: Re: Another cache target] Mike Snitzer
2012-12-20  1:14         ` Darrick J. Wong
2012-12-20 12:57           ` Heinz Mauelshagen
2012-12-20 13:24             ` Mike Snitzer
2012-12-20 16:10               ` Darrick J. Wong
2012-12-20 17:02                 ` Heinz Mauelshagen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1355429956-22785-7-git-send-email-ejt@redhat.com \
    --to=ejt@redhat.com \
    --cc=dm-devel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.