All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Add dm-userspace to the Xen kernel
@ 2006-06-09 21:08 Dan Smith
  2006-06-09 21:48 ` Anthony Liguori
  2006-06-10 20:40 ` Bastian Blank
  0 siblings, 2 replies; 17+ messages in thread
From: Dan Smith @ 2006-06-09 21:08 UTC (permalink / raw)
  To: Xen Developers


[-- Attachment #1.1.1: Type: text/plain, Size: 404 bytes --]

This patch adds dm-userspace to the -xen Linux kernel.  I'd like to
get it into the tree so that people that want to can play with it.
Anyone wishing to to do so can download the tools separately, but they
need the kernel module to be able to use it.

The tools are available here:

  http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz
  http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.1.2: dm-userspace.patch --]
[-- Type: text/x-patch, Size: 44884 bytes --]

# HG changeset patch
# User Dan Smith <danms@us.ibm.com>
# Node ID db178a1b30f3e92da9ce6fd14f757efa9f6763c5
# Parent  5a0ed6c476732da229c3307ea5357cdd196e5462
This adds dm-userspace to the xen linux kernel via another entry in the
patches/ directory.  The dm-userspace module is completely self-contained
and will not affect anything unless it is loaded.  People wishing to
experiment with dm-userspace can download the tools packages separately, but
they need this module to use them.

Signed-off-by: Dan Smith <danms@us.ibm.com>

diff -r 5a0ed6c47673 -r db178a1b30f3 buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32	Fri Jun  9 14:29:00 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_32	Fri Jun  9 13:27:36 2006 -0700
@@ -1187,6 +1187,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_MULTIPATH_EMC=m
+CONFIG_DM_USERSPACE=m
 
 #
 # Fusion MPT device support
diff -r 5a0ed6c47673 -r db178a1b30f3 patches/linux-2.6.16.13/dm-userspace.patch
--- /dev/null	Thu Jan  1 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/dm-userspace.patch	Fri Jun  9 13:27:36 2006 -0700
@@ -0,0 +1,1737 @@
+diff -Naur ./drivers/md/dm-userspace.c ../linux-2.6.16.13-dmu/drivers/md/dm-userspace.c
+--- ./drivers/md/dm-userspace.c	1969-12-31 16:00:00.000000000 -0800
++++ ../linux-2.6.16.13-dmu/drivers/md/dm-userspace.c	2006-06-09 12:00:32.627933616 -0700
+@@ -0,0 +1,1613 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/bio.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/types.h>
++#include <linux/poll.h>
++
++#include <linux/dm-userspace.h>
++
++#include "dm.h"
++#include "dm-bio-list.h"
++#include "kcopyd.h"
++
++#define DMU_DEBUG      0
++
++#define DMU_COPY_PAGES 256
++#define DMU_KEY_LEN    256
++
++#define DMU_PREFIX     "dm-userspace: "
++#define DMU_SET_ERROR(ti, msg) ti->error = DMU_PREFIX msg
++
++#define DMU_LIFETIME   128
++
++#if DMU_DEBUG
++#define DPRINTK( s, arg... ) printk(DMU_PREFIX s, ##arg)
++#else
++#define DPRINTK( s, arg... )
++#endif
++
++kmem_cache_t *request_cache;
++kmem_cache_t *remap_cache;
++
++static int enable_watchdog = 0;
++static struct work_struct wd;
++
++static spinlock_t devices_lock;
++static LIST_HEAD(devices);
++
++/* Device number for the control device */
++static dev_t our_dev;
++
++struct target_device {
++	struct list_head list;
++	struct block_device *bdev;
++	struct kref users;
++};
++
++struct hash_table {
++	struct list_head *table;
++	uint64_t size;
++	uint32_t mask;
++	uint64_t count;
++};
++
++/* A dm-userspace device, which consists of multiple targets sharing a
++ * common key
++ */
++struct dmu_device {
++	spinlock_t lock;
++	struct list_head list;
++	struct list_head requests;
++	struct list_head target_devs;
++
++	struct hash_table remaps;
++
++	struct cdev cdev;
++	dev_t ctl_dev;
++
++	char key[DMU_KEY_LEN];
++	struct kref users;
++
++	wait_queue_head_t wqueue;
++
++	uint64_t block_size;
++	uint64_t block_mask;
++	unsigned int block_shift;
++
++	struct kcopyd_client *kcopyd_client;
++
++	/*
++	 * Count of the number of outstanding requests that have been
++	 * made against this device, but have not yet been flushed
++	 */
++	atomic_t remap_ct;
++
++	uint32_t id_counter;
++};
++
++struct userspace_request {
++	spinlock_t lock;
++	struct list_head list;
++	struct dmu_device *dev;
++	int type;
++	int sent;
++	uint32_t flags;
++	uint32_t id;
++	union {
++		struct bio_list bios;
++		uint64_t block;
++	} u;
++	atomic_t refcnt;
++};
++
++struct dmu_map {
++	spinlock_t lock;
++	uint64_t org_block; /* Original block */
++	uint64_t new_block; /* Destination block */
++	int64_t offset;
++	uint32_t flags;
++	struct target_device *src;
++	struct target_device *dest;
++	struct bio_list bios;
++	struct list_head list;
++	struct dmu_device *dev;
++
++	uint32_t use_count;
++
++	struct dmu_map *next; /* Next remap that is dependent on this one */
++};
++
++/* Forward delcarations */
++static struct file_operations ctl_fops;
++static void copy_block(struct dmu_map *remap);
++
++/*
++ * Return the block number for @sector
++ */
++static inline u64 dmu_block(struct dmu_device *dev,
++				 sector_t sector)
++{
++	return sector >> dev->block_shift;
++}
++
++/*
++ * Return the sector offset in a block for @sector
++ */
++static inline u64 dmu_sector_offset(struct dmu_device *dev,
++				 sector_t sector)
++{
++	return sector & dev->block_mask;
++}
++
++/*
++ * Return the starting sector for @block
++ */
++static inline u64 dmu_sector(struct dmu_device *dev,
++				  uint64_t block)
++{
++	return block << dev->block_shift;
++}
++
++static void error_bios(struct bio_list *bios)
++{
++	struct bio *bio;
++	int count = 0;
++
++	while ((bio = bio_list_pop(bios)) != NULL) {
++		bio_io_error(bio, bio->bi_size);
++		count++;
++	}
++
++	if (count)
++		printk(KERN_ERR DMU_PREFIX
++		       "*** Failed %i requests\n", count);
++}
++
++static void init_remap(struct dmu_device *dev, struct dmu_map *remap)
++{
++	spin_lock_init(&remap->lock);
++	remap->org_block = remap->new_block = 0;
++	remap->offset = 0;
++	remap->flags = 0;
++	remap->src = remap->dest = NULL;
++	bio_list_init(&remap->bios);
++	INIT_LIST_HEAD(&remap->list);
++	remap->dev = dev;
++	remap->use_count = DMU_LIFETIME;
++	remap->next = NULL;
++}
++
++static void init_request(struct dmu_device *dev,
++			 int type,
++			 struct userspace_request *req)
++{
++	spin_lock_init(&req->lock);
++	INIT_LIST_HEAD(&req->list);
++	req->dev = dev;
++	req->type = type;
++	req->sent = 0;
++	req->flags = 0;
++	if (type == DM_USERSPACE_COPY_FINISHED) {
++		req->u.block = 0;
++		req->id = 0;
++	} else {
++		bio_list_init(&req->u.bios);
++		spin_lock(&dev->lock);
++		dev->id_counter++;
++		if (dev->id_counter == 0)
++			dev->id_counter = 1;
++		req->id = dev->id_counter;
++		spin_unlock(&dev->lock);
++	}
++	atomic_set(&req->refcnt, 0);
++}
++
++static void destroy_remap(struct dmu_map *remap)
++{
++	error_bios(&remap->bios);
++}
++
++/*
++ * For an even block distribution, this is not too bad, but it could
++ * probably be better
++ */
++static uint32_t ht_hash(struct hash_table *ht, uint64_t block)
++{
++	return (uint32_t)block & ht->mask;
++}
++
++static int ht_init(struct hash_table *ht, unsigned long size)
++{
++	uint64_t i;
++	unsigned long pages;
++	unsigned int order = ffs((size * sizeof(struct list_head *)) / 
++				 PAGE_SIZE);
++
++	DPRINTK("Going to allocate 2^%u pages for %lu-entry table\n",
++		order, size);
++
++	pages = __get_free_pages(GFP_ATOMIC, order);
++	if (!pages) {
++		DPRINTK("Failed to allocate hash table (%lu)\n", size);
++		return 0;
++	}
++
++	ht->table = (void *)pages;
++	ht->size = size;
++	ht->count = 0;
++	ht->mask = size - 1;
++
++	for (i = 0; i < size; i++)
++		INIT_LIST_HEAD(&ht->table[i]);
++
++	return 1;
++}
++
++static void ht_insert_bucket(struct dmu_map *map, struct list_head *list)
++{
++	list_add_tail(&map->list, list);
++}
++
++/*
++ * I'm sure this is quite dumb, but it works for now
++ */
++static int ht_should_grow(struct hash_table *ht)
++{
++	return ht->count > (2 * (ht->size / 4));
++}
++
++static void ht_grow_table(struct hash_table *ht);
++static void ht_insert_map(struct hash_table *ht, struct dmu_map *map)
++{
++	uint32_t addr;
++
++	addr = ht_hash(ht, map->org_block) & ht->mask;
++
++	BUG_ON(addr >= ht->size);
++
++	ht_insert_bucket(map, &ht->table[addr]);
++	ht->count++;
++
++	if (ht_should_grow(ht))
++		ht_grow_table(ht);
++}
++
++static void ht_insert_map_dev(struct dmu_device *dev, struct dmu_map *map)
++{
++	spin_lock(&dev->lock);
++	ht_insert_map(&dev->remaps, map);
++	spin_unlock(&dev->lock);
++}
++
++static void ht_delete_map(struct hash_table *ht, struct dmu_map *map)
++{
++	list_del(&map->list);
++	BUG_ON(ht->count == 0);
++	ht->count--;
++}
++
++static void ht_delete_map_dev(struct dmu_device *dev, struct dmu_map *map)
++{
++	spin_lock(&dev->lock);
++	ht_delete_map(&dev->remaps, map);
++	spin_unlock(&dev->lock);
++}
++
++static struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block)
++{
++	uint32_t addr;
++	struct dmu_map *m;
++
++	addr = ht_hash(ht, block) & ht->mask;
++
++	BUG_ON(addr >= ht->size);
++
++	list_for_each_entry(m, &ht->table[addr], list) {
++		if (m->org_block == block)
++			return m;
++	}
++
++	return NULL;
++}
++
++static struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block)
++{
++	struct dmu_map *remap;
++
++	spin_lock(&dev->lock);
++
++	remap = ht_find_map(&dev->remaps, block);
++
++	spin_unlock(&dev->lock);
++
++	return remap;
++}
++
++static void ht_grow_table(struct hash_table *ht)
++{
++	struct hash_table old_table;
++	uint64_t i;
++
++	old_table = *ht;
++
++	if (!ht_init(ht, old_table.size * 2)) {
++		DPRINTK("Can't grow table to %llu\n",
++			old_table.size * 2);
++		return;
++	}
++
++	DPRINTK("Growing from %llu to %llu\n",
++		old_table.size, ht->size);
++
++	for (i = 0; i < old_table.size; i++ ) {
++		struct dmu_map *m, *n;
++		list_for_each_entry_safe(m, n, &old_table.table[i],
++					 list) {
++			list_del_init(&m->list);
++			ht_insert_map(ht, m);
++		}
++	}
++
++	free_pages((unsigned long)old_table.table,
++		   ffs((old_table.size * sizeof(struct list_head *))
++		       / PAGE_SIZE));
++}
++
++static uint64_t ht_destroy_table(struct hash_table *ht)
++{
++	uint64_t i, count = 0;
++	struct dmu_map *m, *n;
++
++	for (i = 0; i < ht->size; i++) {
++		list_for_each_entry_safe(m, n, &ht->table[i], list) {
++			ht_delete_map(ht, m);
++			kmem_cache_free(remap_cache, m);
++			count++;
++		}
++	}
++
++	return count;
++}
++
++static struct target_device *get_target(struct dmu_device *dev,
++					dev_t devno)
++{
++
++	struct target_device *target;
++	struct block_device *bdev;
++
++	spin_lock(&dev->lock);
++	list_for_each_entry(target, &dev->target_devs, list) {
++		if (target->bdev->bd_dev == devno) {
++			spin_unlock(&dev->lock);
++			goto out;
++		}
++	}
++	spin_unlock(&dev->lock);
++
++	bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE);
++	if (IS_ERR(bdev)) {
++		printk(KERN_ERR DMU_PREFIX "Unable to lookup device %x\n",
++		       devno);
++		return NULL;
++	}
++
++	target = kmalloc(sizeof(*target), GFP_KERNEL);
++	if (!target) {
++		printk(KERN_ERR DMU_PREFIX
++		       "Unable to alloc new target device\n");
++		return NULL;
++	}
++
++	target->bdev = bdev;
++	INIT_LIST_HEAD(&target->list);
++
++	spin_lock(&dev->lock);
++	list_add_tail(&target->list, &dev->target_devs);
++	spin_unlock(&dev->lock);
++
++ out:
++	return target;
++}
++
++/* Caller must hold dev->lock */
++static void put_target(struct dmu_device *dev,
++		       struct target_device *target)
++{
++	list_del(&target->list);
++
++	bd_release(target->bdev);
++	blkdev_put(target->bdev);
++
++	kfree(target);
++}
++
++/*
++ * Add a request to the device's request queue
++ */
++static void add_request(struct dmu_device *dev,
++			       struct userspace_request *req)
++{
++	spin_lock(&dev->lock);
++	list_add_tail(&req->list, &dev->requests);
++	spin_unlock(&dev->lock);
++
++	wake_up(&dev->wqueue);
++}
++
++/*
++ *
++ */
++static int have_pending_requests(struct dmu_device *dev)
++{
++	struct userspace_request *req;
++	int ret = 0;
++
++	spin_lock(&dev->lock);
++	
++	list_for_each_entry(req, &dev->requests, list) {
++		if (!req->sent) {
++			ret = 1;
++			break;
++		}
++	}
++
++	spin_unlock(&dev->lock);
++
++	return ret;
++}
++
++/*
++ * This periodically dumps out some debug information.  It's really
++ * only useful while developing.
++ */
++static void watchdog(void *data)
++{
++	unsigned int v_remaps, i_remaps, reqs, s_reqs, devs = 0;
++	struct dmu_device *dev;
++	struct dmu_map *map;
++	struct userspace_request *req;
++	uint64_t i;
++
++	spin_lock(&devices_lock);
++
++	list_for_each_entry(dev, &devices, list) {
++		spin_lock(&dev->lock);
++
++		v_remaps = i_remaps = reqs = s_reqs = 0;
++
++		for (i = 0; i < dev->remaps.size; i++) {
++			list_for_each_entry(map, &dev->remaps.table[i], list)
++				if (dmu_get_flag(&map->flags, DMU_FLAG_VALID))
++					v_remaps++;
++				else
++					i_remaps++;
++		}
++
++		list_for_each_entry(req, &dev->requests, list)
++			if (req->sent)
++				s_reqs++;
++			else
++				reqs++;
++
++		printk("Device %x:%x: "
++		       "  reqs: %u/%u "
++		       "  inv maps: %u "
++		       "  val maps: %u (%i)\n",
++		       MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev),
++		       reqs, s_reqs, i_remaps, v_remaps,
++		       atomic_read(&dev->remap_ct));
++		devs++;
++		
++		spin_unlock(&dev->lock);
++	}
++
++	spin_unlock(&devices_lock);
++
++	schedule_delayed_work(&wd, HZ);
++}
++
++static void __bio_remap(struct bio *bio,
++		      struct dmu_map *remap)
++{
++	BUG_ON(remap->dest == NULL);
++
++	bio->bi_sector = dmu_sector(remap->dev, remap->new_block) +
++		dmu_sector_offset(remap->dev, bio->bi_sector) +
++		remap->offset;
++
++	bio->bi_bdev = remap->dest->bdev;
++}
++
++/* 
++   Pop, remap, and flush a bio.  Set VALID flag if no bios
++   available 
++*/
++static struct bio *pop_and_remap(struct dmu_map *remap)
++{
++	struct bio *bio = NULL;
++
++	spin_lock(&remap->lock);
++
++	bio = bio_list_pop(&remap->bios);
++	if (bio)
++		__bio_remap(bio, remap);
++	else {
++		/* If there are no more bios, we must set the VALID
++		   flag before we release the lock */
++		dmu_set_flag(&remap->flags, DMU_FLAG_VALID);
++	}
++
++	spin_unlock(&remap->lock);
++
++	return bio;
++}
++
++static void get_remap_attrs(struct dmu_map *remap,
++			    int *copy_first,
++			    int *temporary,
++			    struct dmu_map **next)
++{
++	spin_lock(&remap->lock);
++
++	*copy_first = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST);
++	*temporary = dmu_get_flag(&remap->flags, DMU_FLAG_TEMPORARY);
++	*next = remap->next;
++	remap->next = NULL;
++
++	spin_unlock(&remap->lock);
++}
++
++static void remap_flusher(struct dmu_map *remap)
++{
++	struct bio *bio;
++	struct userspace_request *req;
++	int copy_first = 0, temporary = 0;
++	struct dmu_map *next;
++
++	DPRINTK("Flushing bios for block %llu:%llu\n",
++	       remap->org_block, remap->new_block);
++
++	while (1) {
++
++		bio = pop_and_remap(remap);
++
++		if (bio)
++			generic_make_request(bio);
++		else
++			break;
++
++		atomic_dec(&remap->dev->remap_ct);
++
++		DPRINTK("Flushed %llu:%llu (%u bytes)\n",
++			dmu_block(remap->dev, bio->bi_sector),
++			dmu_sector_offset(remap->dev, bio->bi_sector),
++			bio->bi_size);
++	}
++
++	get_remap_attrs(remap, &copy_first, &temporary, &next);
++
++	if (next) {
++		/* FIXME: Make copy_block check for this flag and just
++		 flush if not set to avoid this non-locked access */
++		if (dmu_get_flag(&next->flags, DMU_FLAG_COPY_FIRST))
++			copy_block(next);
++		else
++			remap_flusher(next);
++	}
++	
++	/* Notify userspace */
++	if (copy_first) {
++		req = kmem_cache_alloc(request_cache, GFP_KERNEL);
++		if (!req) {
++			printk(KERN_ERR DMU_PREFIX
++			       "Failed to allocate copy response\n");
++			return;
++		}
++
++		init_request(remap->dev, DM_USERSPACE_COPY_FINISHED, req);
++
++		req->u.block = remap->org_block;
++
++		add_request(remap->dev, req);
++	}
++
++	if (temporary) {
++		destroy_remap(remap);
++		kmem_cache_free(remap_cache, remap);
++	}
++}
++
++static void destroy_dmu_device(struct kref *ref)
++{
++	struct dmu_device *dev;
++	struct list_head *cursor, *next;
++	uint64_t remaps;
++
++	dev = container_of(ref, struct dmu_device, users);
++
++	DPRINTK("Destroying device: %s\n", dev->key);
++
++	spin_lock(&devices_lock);
++	list_del(&dev->list);
++	spin_unlock(&devices_lock);
++
++	list_for_each_safe(cursor, next, &dev->target_devs) {
++		struct target_device *target;
++
++		target = list_entry(cursor,
++				    struct target_device,
++				    list);
++
++		put_target(dev, target);
++	}
++
++	remaps = ht_destroy_table(&dev->remaps);
++	DPRINTK("Destroyed %llu/%llu remaps\n", remaps, dev->remaps.count);
++
++	list_for_each_safe(cursor, next, &dev->requests) {
++		struct userspace_request *req;
++
++		req = list_entry(cursor,
++				 struct userspace_request,
++				 list);
++
++		list_del(&req->list);
++
++		error_bios(&req->u.bios);
++
++		kmem_cache_free(request_cache, req);
++	}
++
++	kcopyd_client_destroy(dev->kcopyd_client);
++
++	cdev_del(&dev->cdev);
++	kfree(dev);
++}
++
++static inline void get_dev(struct dmu_device *dev)
++{
++	DPRINTK("get on %s\n", dev->key);
++	kref_get(&dev->users);
++}
++
++static inline void put_dev(struct dmu_device *dev)
++{
++	DPRINTK("put on %s\n", dev->key);
++	kref_put(&dev->users, destroy_dmu_device);
++}
++
++static int get_free_minor(void)
++{
++	struct dmu_device *dev;
++	int minor = 0;
++
++	spin_lock(&devices_lock);
++
++	list_for_each_entry(dev, &devices, list) {
++		if (MINOR(dev->ctl_dev) != minor)
++			break;
++		minor++;
++	}
++
++	spin_unlock(&devices_lock);
++
++	return minor;
++}
++
++static int init_dmu_device(struct dmu_device *dev, u32 block_size)
++{
++	int ret;
++
++	cdev_init(&dev->cdev, &ctl_fops);
++	dev->cdev.owner = THIS_MODULE;
++	dev->cdev.ops = &ctl_fops;
++
++	init_waitqueue_head(&dev->wqueue);
++	INIT_LIST_HEAD(&dev->list);
++	INIT_LIST_HEAD(&dev->requests);
++	INIT_LIST_HEAD(&dev->target_devs);
++	kref_init(&dev->users);
++	spin_lock_init(&dev->lock);
++
++	atomic_set(&dev->remap_ct, 0);
++	dev->id_counter = 1; /* reserve 0 for unsolicited maps */
++
++	if (!ht_init(&dev->remaps, 2048)) {
++		printk(KERN_ERR DMU_PREFIX
++		       "Unable to allocate hash table\n");
++		return 0;
++	}
++
++	dev->block_size  = block_size;
++	dev->block_mask  = block_size - 1;
++	dev->block_shift = ffs(block_size) - 1;
++
++	ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopyd_client);
++	if (ret) {
++		printk(DMU_PREFIX "Failed to initialize kcopyd client\n");
++		return 0;
++	}
++
++	return 1;
++}
++
++static struct dmu_device *new_dmu_device(char *key,
++					 struct dm_target *ti,
++					 u32 block_size)
++{
++	struct dmu_device *dev, *ptr;
++	int                ret;
++
++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++	if (dev == NULL) {
++		printk(DMU_PREFIX "Failed to allocate new userspace device\n");
++		return NULL;
++	}
++
++	if (!init_dmu_device(dev, block_size))
++		goto bad1;
++
++	snprintf(dev->key, DMU_KEY_LEN, "%s", key);
++
++	DPRINTK("New device with size %llu mask 0x%llX shift %u\n",
++		dev->block_size, dev->block_mask, dev->block_shift);
++
++	dev->ctl_dev = MKDEV(MAJOR(our_dev), get_free_minor());
++
++	ret = cdev_add(&dev->cdev, dev->ctl_dev, 1);
++	if (ret < 0) {
++		printk(DMU_PREFIX "Failed to register control device %d:%d\n",
++		       MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev));
++		goto bad2;
++	}
++
++	DPRINTK("Registered new control interface: %i:%i\n",
++		MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev));
++
++	spin_lock(&devices_lock);
++	if (list_empty(&devices))
++		list_add(&dev->list, &devices);
++	else
++		list_for_each_entry(ptr, &devices, list)
++			if (MINOR(ptr->ctl_dev) < MINOR(dev->ctl_dev))
++				list_add(&dev->list, &ptr->list);
++	spin_unlock(&devices_lock);
++
++	return dev;
++
++ bad2:
++	cdev_del(&dev->cdev);
++ bad1:
++	kfree(dev);
++	printk(KERN_ERR DMU_PREFIX "Failed to create device\n");
++	return NULL;
++}
++
++static struct dmu_device *find_dmu_device(const char *key)
++{
++	struct dmu_device *dev;
++	struct dmu_device *match = NULL;
++
++	spin_lock(&devices_lock);
++
++	list_for_each_entry(dev, &devices, list) {
++		spin_lock(&dev->lock);
++		if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) {
++			match = dev;
++			spin_unlock(&dev->lock);
++			break;
++		}
++		spin_unlock(&dev->lock);
++	}
++
++	spin_unlock(&devices_lock);
++
++	return match;
++}
++
++static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv)
++{
++	uint64_t block_size;
++	struct dmu_device *dev;
++	char *device_key;
++	char *block_size_param;
++
++	if (argc < 2) {
++		DMU_SET_ERROR(ti, "Invalid argument count");
++		return -EINVAL;
++	}
++
++	device_key = argv[0];
++	block_size_param = argv[1];
++
++	block_size = simple_strtoul(block_size_param, NULL, 10) / 512;
++
++	dev = find_dmu_device(device_key);
++	if (dev == NULL) {
++		dev = new_dmu_device(device_key,
++				     ti,
++				     block_size);
++		if (dev == NULL) {
++			DMU_SET_ERROR(ti, "Failed to create device");
++			goto bad;
++		}
++	} else {
++		get_dev(dev);
++	}
++
++	spin_lock(&dev->lock);
++	if (dev->block_size != block_size) {
++		DMU_SET_ERROR(ti, "Invalid block size");
++		goto bad;
++	}
++	spin_unlock(&dev->lock);
++
++	ti->private  = dev;
++	ti->split_io = block_size;
++
++	DPRINTK("  block-size:  %llu sectors\n", dev->block_size);
++	DPRINTK("  block-shift: %u\n", dev->block_shift);
++	DPRINTK("  block-mask:  %llx\n", dev->block_mask);
++
++	return 0;
++
++ bad:
++	if (dev) {
++		spin_unlock(&dev->lock);
++		put_dev(dev);
++	}
++
++	return -EINVAL;
++}
++
++static void dmu_dtr(struct dm_target *ti)
++{
++	struct dmu_device *dev = (struct dmu_device *) ti->private;
++
++	put_dev(dev);
++
++	DPRINTK("destroyed %d:%d\n", (int)ti->begin, (int)ti->len);
++}
++
++/* Search @dev for an outstanding request for remapping @block */
++static struct userspace_request *find_existing_req(struct dmu_device *dev,
++						   uint64_t block)
++{
++	struct userspace_request *req;
++	struct userspace_request *maybe = NULL;
++
++	spin_lock(&dev->lock);
++
++	list_for_each_entry(req, &dev->requests, list) {
++		/* FIXME: Blech */
++		if ((req->type == DM_USERSPACE_MAP_BLOCK) &&
++		    (dmu_block(dev, req->u.bios.head->bi_sector) == block)) {
++			if (maybe) {
++				atomic_dec(&maybe->refcnt);
++			}
++			maybe = req;
++			atomic_inc(&maybe->refcnt);
++		}
++	}
++
++	spin_unlock(&dev->lock);
++
++	return maybe;
++}
++
++static int make_new_request(struct dmu_device *dev, struct bio *bio)
++{
++	struct userspace_request *req;
++
++	req = kmem_cache_alloc(request_cache, GFP_KERNEL);
++	if (req == NULL)
++		goto bad;
++
++	init_request(dev, DM_USERSPACE_MAP_BLOCK, req);
++
++	dmu_set_flag(&req->flags, DMU_FLAG_RD);
++	if (bio_rw(bio))
++		dmu_set_flag(&req->flags, DMU_FLAG_WR);
++	else
++		dmu_clr_flag(&req->flags, DMU_FLAG_WR);
++	bio_list_add(&req->u.bios, bio);
++
++	add_request(dev, req);
++
++	DPRINTK("Queued %s request for sector " SECTOR_FORMAT "\n",
++		dmu_get_flag(&req->flags, DMU_FLAG_WR) ? "write" : "read",
++		bio->bi_sector);
++
++	return 0;
++
++ bad:
++	printk(KERN_ERR DMU_PREFIX "Failed to queue bio!\n");
++	return -1;
++}
++
++static int dmu_map_remap_case(struct dmu_device *dev,
++			      struct dmu_map *remap,
++			      struct bio *bio)
++{
++	int ret = 0;
++
++	spin_lock(&remap->lock);
++	
++	if (dmu_get_flag(&remap->flags, DMU_FLAG_WR) != bio_rw(bio)) {
++		ret = -1;
++	} else {
++		if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) {
++			__bio_remap(bio, remap);
++			ret = 1;
++			atomic_dec(&dev->remap_ct);
++		} else {
++			bio_list_add(&remap->bios, bio);
++		}
++	}
++
++	spin_unlock(&remap->lock);
++
++	return ret;
++}
++
++static int dmu_map_request_case(struct dmu_device *dev,
++				struct userspace_request *req,
++				struct bio *bio)
++{
++	int ret = 0;
++	int req_rw = dmu_get_flag(&req->flags, DMU_FLAG_WR);
++
++	spin_lock(&req->lock);
++
++	if (!req_rw && bio_rw(bio) && !req->sent) {
++		/* Convert to R/W and Queue */
++		dmu_set_flag(&req->flags, DMU_FLAG_WR);
++		bio_list_add(&req->u.bios, bio);
++	} else if (!req_rw && bio_rw(bio) && req->sent) {		
++		/* Can't convert, must re-request */
++		ret = -1;
++	} else {
++		/* Queue */
++		bio_list_add(&req->u.bios, bio);
++	}
++
++	spin_unlock(&req->lock);
++
++	return ret;
++}
++
++static int dmu_map(struct dm_target *ti, struct bio *bio,
++		   union map_info *map_context)
++{
++	struct dmu_device *dev = (struct dmu_device *) ti->private;
++	struct dmu_map *remap;
++	struct userspace_request *req;
++	int ret = 0;
++	u64 block;
++	    
++	atomic_inc(&dev->remap_ct);
++
++	block = dmu_block(dev, bio->bi_sector);
++
++	remap = ht_find_map_dev(dev, block);
++	if (remap) {
++		ret = dmu_map_remap_case(dev, remap, bio);
++		if (ret >= 0)
++			goto done;
++	}
++
++	req = find_existing_req(dev, block);
++	if (req) {
++		ret = dmu_map_request_case(dev, req, bio);
++		atomic_dec(&req->refcnt);
++		if (ret >= 0)
++			goto done;
++	}
++
++	ret = make_new_request(dev, bio);
++
++ done:
++	return ret;
++}
++
++static int dmu_status(struct dm_target *ti, status_type_t type,
++		      char *result, unsigned int maxlen)
++{
++	struct dmu_device *dev = (struct dmu_device *) ti->private;
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		snprintf(result, maxlen, "%x:%x\n",
++			 MAJOR(dev->ctl_dev),
++			 MINOR(dev->ctl_dev));
++		break;
++
++	case STATUSTYPE_TABLE:
++		snprintf(result, maxlen, "%s %llu",
++			 dev->key,
++			 dev->block_size * 512);
++		break;
++	}
++
++	return 0;
++}
++
++static struct target_type userspace_target = {
++	.name    = "userspace",
++	.version = {0, 1, 0},
++	.module  = THIS_MODULE,
++	.ctr     = dmu_ctr,
++	.dtr     = dmu_dtr,
++	.map     = dmu_map,
++	.status  = dmu_status,
++};
++
++static int format_userspace_message(struct dmu_write *msg,
++				      struct userspace_request *req)
++{
++	int ret = 1;
++
++	spin_lock(&req->lock);
++
++	if (req->sent)
++		goto out;
++
++	msg->id = req->id;
++	msg->type = req->type;
++	dmu_cpy_flag(&msg->flags, req->flags, DMU_FLAG_RD);
++	dmu_cpy_flag(&msg->flags, req->flags, DMU_FLAG_WR);
++
++	if (msg->type == DM_USERSPACE_MAP_BLOCK) {
++		msg->org_block = dmu_block(req->dev,
++					   req->u.bios.head->bi_sector);
++		DPRINTK("Asking userspace to map %llu (%c)\n",
++			msg->org_block,
++			dmu_get_flag(&msg->flags, DMU_FLAG_WR) ? 'W' : 'R');
++	} else if (msg->type == DM_USERSPACE_COPY_FINISHED) {
++		msg->org_block = req->u.block;
++	} else {
++		printk(KERN_INFO DMU_PREFIX
++		       "Userspace sent unknown message type %i\n", 
++		       msg->type);
++		list_del(&req->list);
++		ret = 0;
++	}
++
++	req->sent = 1;
++
++ out:
++	spin_unlock(&req->lock);
++	
++	if (msg->type == DM_USERSPACE_COPY_FINISHED) {
++		/* COPY_FINISHED messages don't get responses, so
++		 * we take them off the request queue here
++		 */
++		list_del(&req->list);
++		kmem_cache_free(request_cache, req);
++	}
++
++	return ret;
++}
++
++ssize_t dmu_ctl_read(struct file *file, char __user *buffer,
++		     size_t size, loff_t *offset)
++{
++
++	struct dmu_device  *dev = (struct dmu_device *)file->private_data;
++	struct dmu_write   *msg;
++	struct userspace_request *req = NULL;
++	struct userspace_request *next;
++	int                 ret = 0;
++	int                 num_reqs, req_idx = 0;
++
++	num_reqs = size / sizeof(*msg);
++
++	if (num_reqs == 0)
++		return -EINVAL;
++
++	msg = kmalloc(num_reqs * sizeof(*msg), GFP_KERNEL);
++	if (!msg) {
++		printk(KERN_ERR DMU_PREFIX 
++		       "Failed to alloc %i reqs!\n", num_reqs);
++		return -ENOMEM;
++	}
++	
++	while (!have_pending_requests(dev)) {
++		if (file->f_flags & O_NONBLOCK) {
++			return 0;
++		}
++
++		if (wait_event_interruptible(dev->wqueue,
++					     have_pending_requests(dev)))
++			return -ERESTARTSYS;
++	}
++
++	spin_lock(&dev->lock);
++
++	list_for_each_entry_safe(req, next, &dev->requests, list) {
++
++		if (!format_userspace_message(&msg[req_idx], req))
++			continue;
++
++		ret += sizeof(*msg);
++		if (++req_idx >= num_reqs) {
++			break;
++		}
++	}
++
++	spin_unlock(&dev->lock);
++
++	if (copy_to_user(buffer, msg, sizeof(*msg) * req_idx)) {
++		DPRINTK("control read copy_to_user failed!\n");
++		ret = -EFAULT;
++	}
++
++	kfree(msg);
++
++	return ret;
++}
++
++static void copy_callback(int read_err,
++                          unsigned int write_err,
++                          void *data)
++{
++	remap_flusher((struct dmu_map *)data);
++}
++
++static void copy_block(struct dmu_map *remap)
++{
++	struct io_region src, dst;
++	struct kcopyd_client *client;
++
++	spin_lock(&remap->lock);
++
++	src.bdev = remap->src->bdev;
++	src.sector = remap->org_block << remap->dev->block_shift;
++	src.count = remap->dev->block_size;
++
++	dst.bdev = remap->dest->bdev;
++	dst.sector = (remap->new_block << remap->dev->block_shift);
++	dst.sector += remap->offset;
++	dst.count = remap->dev->block_size;
++
++	DPRINTK("Copying: "
++		SECTOR_FORMAT "(" SECTOR_FORMAT "):" SECTOR_FORMAT " -> "
++		SECTOR_FORMAT "(" SECTOR_FORMAT "):" SECTOR_FORMAT "\n",
++		remap->org_block,
++		src.sector,
++		src.count * 512,
++		remap->new_block,
++		dst.sector,
++		dst.count * 512);
++
++	client = remap->dev->kcopyd_client;
++
++	spin_unlock(&remap->lock);
++
++	kcopyd_copy(client, &src, 1, &dst, 0, copy_callback, remap);
++}
++
++static int remap_request(struct dmu_write *msg,
++			 struct dmu_device *dev,
++			 struct userspace_request *req)
++
++{
++	struct dmu_map *remap = NULL, *parent = NULL;
++	struct target_device *s_dev = NULL, *d_dev = NULL;
++	int is_chained = 0;
++	struct bio_list bio_holder;
++
++	if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) {
++		s_dev = get_target(dev, MKDEV(msg->src_maj, msg->src_min));
++		if (!s_dev) {
++			printk(KERN_ERR DMU_PREFIX
++			       "Failed to find src device %i:%i\n",
++			       msg->src_maj, msg->src_min);
++			goto bad;
++		}
++	}
++
++	d_dev = get_target(dev, MKDEV(msg->dest_maj, msg->dest_min));
++	if (!d_dev) {
++		printk(KERN_ERR DMU_PREFIX "Failed to find dst device %i:%i\n",
++		       msg->dest_maj, msg->dest_min);
++		goto bad;
++	}
++
++	if (req) {
++		while (atomic_read(&req->refcnt) != 0) {
++			DPRINTK("Waiting for exclusive use of request\n");
++			schedule();
++		}
++
++		spin_lock(&req->lock);
++		bio_holder = req->u.bios;
++		spin_unlock(&req->lock);
++	} else {
++		bio_list_init(&bio_holder);
++	}
++	
++	/* Allocate a new remap early (before grabbing locks), since
++	   we will most likely need it */
++	remap = kmem_cache_alloc(remap_cache, GFP_KERNEL);
++	if (!remap) {
++		printk(KERN_ERR DMU_PREFIX "Failed to alloc remap!");
++		goto bad;
++	}
++	init_remap(dev, remap);
++	spin_lock(&remap->lock);
++	remap->org_block = msg->org_block;
++
++	spin_lock(&dev->lock);
++
++	/* Here, we insert the new remap into the table, and remove
++	   the existing map, if present, all in one locked operation */
++
++	parent = ht_find_map(&dev->remaps, msg->org_block);
++	if (parent) {
++
++		spin_lock(&parent->lock);
++
++		if (!dmu_get_flag(&parent->flags, DMU_FLAG_VALID)) {
++			if (dmu_get_flag(&parent->flags, DMU_FLAG_WR) ==
++			    dmu_get_flag(&msg->flags, DMU_FLAG_WR)) {
++				/* Perms match for this not-yet-valid remap,
++				   so tag our bios on to it and bail */
++				bio_list_merge(&parent->bios,
++					       &bio_holder);
++
++				spin_unlock(&parent->lock);
++				spin_unlock(&dev->lock);
++				kmem_cache_free(remap_cache, remap);
++				return 1;
++			} else {
++				/* Remove parent from remap table, and
++				   chain our new remap to this one so
++				   it will fire when parent goes
++				   valid */
++				list_del(&parent->list);
++				if (parent->next) {
++					DPRINTK("Parent already chained!\n");
++					BUG();
++				}
++				parent->next = remap;
++				dmu_set_flag(&parent->flags, 
++					     DMU_FLAG_TEMPORARY);	
++				is_chained = 1;
++			}
++		} else {
++			/* Remove existing valid remap */
++			list_del(&parent->list);
++			destroy_remap(parent);
++			kmem_cache_free(remap_cache, parent);
++		}
++
++		spin_unlock(&parent->lock);
++	}
++	
++	ht_insert_map(&dev->remaps, remap);
++		
++	spin_unlock(&dev->lock);
++
++	remap->new_block  = msg->new_block;
++	remap->use_count  = DMU_LIFETIME;
++	remap->offset     = msg->offset;
++	remap->src        = s_dev;
++	remap->dest       = d_dev;
++	remap->dev        = dev;
++
++	dmu_clr_flag(&remap->flags, DMU_FLAG_VALID);
++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_TEMPORARY);
++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_WR);
++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_RD);
++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_COPY_FIRST);
++
++	remap->bios = bio_holder;
++
++	spin_unlock(&remap->lock);
++
++	if (! is_chained) {
++		if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST))
++			copy_block(remap);
++		else {
++			remap_flusher(remap);
++		}
++	}
++
++	return 1;
++
++ bad:
++	printk(KERN_ERR DMU_PREFIX "Remap error: chaos may ensue\n");
++
++	return 0;
++}
++
++static int invalidate_request(struct dmu_write *msg,
++			      struct dmu_device *dev)
++{
++	struct dmu_map *remap;
++	struct userspace_request *req;
++	int ret = 1;
++
++	remap = ht_find_map_dev(dev, msg->org_block);
++	if (!remap)
++		ret = 0;
++	else {
++		spin_lock(&dev->lock);
++		spin_lock(&remap->lock);
++		if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID))
++			ht_delete_map(&dev->remaps, remap);
++		else 
++			ret = 0;
++		spin_unlock(&remap->lock);
++		spin_unlock(&dev->lock);
++	}
++
++	req = kmem_cache_alloc(request_cache, GFP_KERNEL);
++	if (!req) {
++		printk(KERN_ERR DMU_PREFIX
++		       "Failed to allocate request\n");
++		return 0;
++	}
++
++	if (ret)
++		init_request(dev, DM_USERSPACE_INVAL_COMPLETE, req);
++	else
++		init_request(dev, DM_USERSPACE_INVAL_FAILED, req);
++
++	req->u.block = msg->org_block;
++
++	add_request(dev, req);
++
++	return ret;
++}
++
++ssize_t dmu_ctl_write(struct file *file, const char __user *buffer,
++		      size_t size, loff_t *offset)
++{
++
++	struct dmu_device *dev = (struct dmu_device *)file->private_data;
++	struct dmu_write msg;
++	struct userspace_request *next;
++	struct userspace_request *req = NULL, *match = NULL;
++	int num_resp, resp_idx;
++	int ret = 0;
++
++	num_resp = size / sizeof(struct dmu_write);
++
++	if (num_resp == 0)
++		return -EINVAL;
++
++	for (resp_idx = 0; resp_idx < num_resp; resp_idx++) {
++		if (copy_from_user(&msg, buffer+ret, sizeof(msg))) {
++			printk(DMU_PREFIX
++			       "control_write copy_from_user failed!\n");
++			ret = -EFAULT;
++			goto out;
++		}
++
++		ret += sizeof(msg);
++
++		match = NULL;
++		/* See if we have a pending request that matches this */
++		spin_lock(&dev->lock);
++		list_for_each_entry_safe(req, next, &dev->requests, list) {
++			if ((req->type == DM_USERSPACE_MAP_BLOCK) &&
++			    (req->id == msg.id)) {
++				list_del(&req->list);
++				match = req;
++				break;
++			}
++		}
++		spin_unlock(&dev->lock);
++
++		if (!match)
++			DPRINTK("Processing unsolicited request: %u\n", 
++				msg.id);
++
++		switch (msg.type) {
++
++		case DM_USERSPACE_MAP_BLOCK:
++			DPRINTK("Got map: %llu -> %llu:%lli (%i:%i) [%c]\n",
++				msg.org_block,
++				msg.new_block,
++				msg.offset,
++				msg.dest_maj,
++				msg.dest_min,
++				dmu_get_flag(&msg.flags, DMU_FLAG_WR)?'W':'R');
++			remap_request(&msg, dev, match);
++			break;
++
++		case DM_USERSPACE_MAP_FAILED:
++			if (match) {
++				printk(KERN_EMERG DMU_PREFIX
++				       "userspace reported "
++				       "failure to map sector %lu\n",
++				       (unsigned long)
++				       match->u.bios.head->bi_sector);
++
++				spin_lock(&match->lock);
++				error_bios(&match->u.bios);
++				spin_unlock(&match->lock);
++			}
++			break;
++		default:
++			printk(KERN_ERR DMU_PREFIX
++			       "Unknown request type: %i\n", msg.type);
++		}
++
++		if (match)
++			kmem_cache_free(request_cache, match);
++	}
++ out:
++	return ret;
++}
++
++int dmu_ctl_open(struct inode *inode, struct file *file)
++{
++	struct dmu_device *dev;
++
++	dev = container_of(inode->i_cdev, struct dmu_device, cdev);
++
++	get_dev(dev);
++
++	file->private_data = dev;
++
++	return 0;
++}
++
++int dmu_ctl_release(struct inode *inode, struct file *file)
++{
++	struct dmu_device *dev;
++
++	dev = (struct dmu_device *)file->private_data;
++
++	put_dev(dev);
++
++	return 0;
++}
++
++unsigned dmu_ctl_poll(struct file *file, poll_table *wait)
++{
++	struct dmu_device *dev;
++	unsigned mask = 0;
++
++	dev = (struct dmu_device *)file->private_data;
++
++	poll_wait(file, &dev->wqueue, wait);
++
++	if (have_pending_requests(dev))
++		mask |= POLLIN | POLLRDNORM;
++
++	return mask;
++}
++
++static struct file_operations ctl_fops = {
++	.open    = dmu_ctl_open,
++	.release = dmu_ctl_release,
++	.read    = dmu_ctl_read,
++	.write   = dmu_ctl_write,
++	.poll    = dmu_ctl_poll,
++	.owner   = THIS_MODULE,
++};
++
++int __init dm_userspace_init(void)
++{
++	int r = dm_register_target(&userspace_target);
++	if (r < 0) {
++		DMERR(DMU_PREFIX "Register failed %d", r);
++		return 0;
++	}
++
++	spin_lock_init(&devices_lock);
++
++	if (enable_watchdog) {
++		INIT_WORK(&wd, watchdog, NULL);
++		schedule_delayed_work(&wd, HZ);
++	}
++
++	request_cache =
++		kmem_cache_create("dm-userspace-requests",
++				  sizeof(struct userspace_request),
++				  __alignof__ (struct userspace_request),
++				  0, NULL, NULL);
++	if (!request_cache) {
++		DMERR(DMU_PREFIX "Failed to allocate request cache\n");
++		goto bad;
++	}
++
++	remap_cache =
++		kmem_cache_create("dm-userspace-remaps",
++				  sizeof(struct dmu_map),
++				  __alignof__ (struct dmu_map),
++				  0, NULL, NULL);
++	if (!remap_cache) {
++		DMERR(DMU_PREFIX "Failed to allocate remap cache\n");
++		goto bad2;
++	}
++
++	r = alloc_chrdev_region(&our_dev, 0, 10, "dm-userspace");
++	if (r) {
++		DMERR(DMU_PREFIX "Failed to allocate chardev region\n");
++		goto bad3;
++	}
++
++	DPRINTK(DMU_PREFIX "Loaded (major %i)\n", MAJOR(our_dev));
++
++	return 1;
++
++ bad3:
++	kmem_cache_destroy(remap_cache);
++ bad2:
++	kmem_cache_destroy(request_cache);
++ bad:
++	dm_unregister_target(&userspace_target);
++	return 0;
++
++}
++
++void __exit dm_userspace_exit(void)
++{
++	int r;
++	struct list_head *cursor, *next;
++	struct dmu_device *dev;
++
++	DPRINTK(DMU_PREFIX "Unloading\n");
++
++	if (enable_watchdog)
++		if (!cancel_delayed_work(&wd))
++			flush_scheduled_work();
++
++	spin_lock(&devices_lock);
++
++	list_for_each_safe(cursor, next, &devices) {
++		dev = list_entry(cursor, struct dmu_device, list);
++		list_del(cursor);
++	}
++
++	spin_unlock(&devices_lock);
++
++	unregister_chrdev_region(our_dev, 10);
++
++	kmem_cache_destroy(request_cache);
++	kmem_cache_destroy(remap_cache);
++
++	r = dm_unregister_target(&userspace_target);
++	if (r < 0)
++		DMERR(DMU_PREFIX "unregister failed %d", r);
++}
++
++module_init(dm_userspace_init);
++module_exit(dm_userspace_exit);
++
++module_param(enable_watchdog, int, S_IRUGO);
++
++MODULE_DESCRIPTION(DM_NAME " userspace target");
++MODULE_AUTHOR("Dan Smith");
++MODULE_LICENSE("GPL");
+diff -Naur ./drivers/md/Kconfig ../linux-2.6.16.13-dmu/drivers/md/Kconfig
+--- ./drivers/md/Kconfig	2006-05-02 14:38:44.000000000 -0700
++++ ../linux-2.6.16.13-dmu/drivers/md/Kconfig	2006-06-09 10:20:35.701604992 -0700
+@@ -210,6 +210,12 @@
+        ---help---
+          Allow volume managers to take writeable snapshots of a device.
+ 
++config DM_USERSPACE
++       tristate "Userspace target (EXPERIMENTAL)"
++       depends on BLK_DEV_DM && EXPERIMENTAL
++       ---help---
++	 A target that provides a userspace interface to device-mapper
++
+ config DM_MIRROR
+        tristate "Mirror target (EXPERIMENTAL)"
+        depends on BLK_DEV_DM && EXPERIMENTAL
+diff -Naur ./drivers/md/Makefile ../linux-2.6.16.13-dmu/drivers/md/Makefile
+--- ./drivers/md/Makefile	2006-05-02 14:38:44.000000000 -0700
++++ ../linux-2.6.16.13-dmu/drivers/md/Makefile	2006-06-09 10:20:35.701604992 -0700
+@@ -37,6 +37,7 @@
+ obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
+ obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
+ obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
++obj-$(CONFIG_DM_USERSPACE)      += dm-userspace.o
+ 
+ quiet_cmd_unroll = UNROLL  $@
+       cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
+diff -Naur ./include/linux/dm-userspace.h ../linux-2.6.16.13-dmu/include/linux/dm-userspace.h
+--- ./include/linux/dm-userspace.h	1969-12-31 16:00:00.000000000 -0800
++++ ../linux-2.6.16.13-dmu/include/linux/dm-userspace.h	2006-06-09 12:00:32.630933160 -0700
+@@ -0,0 +1,89 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ */
++
++#ifndef __DM_USERSPACE_H
++#define __DM_USERSPACE_H
++
++#ifdef __KERNEL__
++# include <linux/types.h>
++#else
++# include <stdint.h>
++#endif
++
++/*
++ * Message Types
++ */
++#define DM_USERSPACE_MAP_BLOCK        1
++#define DM_USERSPACE_MAP_FAILED       2
++#define DM_USERSPACE_MAP_INVALIDATE   3 
++#define DM_USERSPACE_COPY_FINISHED  100
++#define DM_USERSPACE_INVAL_COMPLETE 101
++#define DM_USERSPACE_INVAL_FAILED   102
++
++/*
++ * Flags and associated macros
++ */
++#define DMU_FLAG_VALID       1
++#define DMU_FLAG_RD          2
++#define DMU_FLAG_WR          4
++#define DMU_FLAG_COPY_FIRST  8
++#define DMU_FLAG_TEMPORARY  16
++
++static int dmu_get_flag(uint32_t *flags, uint32_t flag)
++{
++	return (*flags & flag) != 0;
++}
++
++static void dmu_set_flag(uint32_t *flags, uint32_t flag)
++{
++	*flags |= flag;
++}
++
++static void dmu_clr_flag(uint32_t *flags, uint32_t flag)
++{
++	*flags &= (~flag);
++}
++
++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag)
++{
++	*flags = (*flags & ~flag) | (src & flag);
++}
++
++/*
++ * This is the message that is passed back and forth between the
++ * kernel and the user application
++ */
++struct dmu_write {
++	uint32_t id;
++	uint32_t type;              /* Type of request */
++	uint32_t flags;        /* Flags */
++
++	uint64_t org_block;    /* Block that was accessed */
++	uint64_t new_block;    /* The new block it should go to */
++  	int64_t offset;        /* Sector offset of the block, if needed  */
++
++	uint32_t src_maj;      /* The source device for copying */
++	uint32_t src_min;
++
++	uint32_t dest_maj;     /* Destination device for copying, and */
++	uint32_t dest_min;     /* for the block access                */
++
++};
++
++#endif

[-- Attachment #1.1.3: Type: text/plain, Size: 92 bytes --]

-- 
Dan Smith
IBM Linux Technology Center
Open Hypervisor Team
email: danms@us.ibm.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-09 21:08 Dan Smith
@ 2006-06-09 21:48 ` Anthony Liguori
  2006-06-09 23:12   ` Dan Smith
  2006-06-10 20:40 ` Bastian Blank
  1 sibling, 1 reply; 17+ messages in thread
From: Anthony Liguori @ 2006-06-09 21:48 UTC (permalink / raw)
  To: Dan Smith; +Cc: Xen Developers

Shouldn't this go to LKML?

Regards,

Anthony Liguori

Dan Smith wrote:
> This patch adds dm-userspace to the -xen Linux kernel.  I'd like to
> get it into the tree so that people that want to can play with it.
> Anyone wishing to to do so can download the tools separately, but they
> need the kernel module to be able to use it.
>
> The tools are available here:
>
>   http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz
>   http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz
>   
> ------------------------------------------------------------------------
>
> # HG changeset patch
> # User Dan Smith <danms@us.ibm.com>
> # Node ID db178a1b30f3e92da9ce6fd14f757efa9f6763c5
> # Parent  5a0ed6c476732da229c3307ea5357cdd196e5462
> This adds dm-userspace to the xen linux kernel via another entry in the
> patches/ directory.  The dm-userspace module is completely self-contained
> and will not affect anything unless it is loaded.  People wishing to
> experiment with dm-userspace can download the tools packages separately, but
> they need this module to use them.
>
> Signed-off-by: Dan Smith <danms@us.ibm.com>
>
> diff -r 5a0ed6c47673 -r db178a1b30f3 buildconfigs/linux-defconfig_xen_x86_32
> --- a/buildconfigs/linux-defconfig_xen_x86_32	Fri Jun  9 14:29:00 2006 +0100
> +++ b/buildconfigs/linux-defconfig_xen_x86_32	Fri Jun  9 13:27:36 2006 -0700
> @@ -1187,6 +1187,7 @@ CONFIG_DM_ZERO=m
>  CONFIG_DM_ZERO=m
>  CONFIG_DM_MULTIPATH=m
>  CONFIG_DM_MULTIPATH_EMC=m
> +CONFIG_DM_USERSPACE=m
>  
>  #
>  # Fusion MPT device support
> diff -r 5a0ed6c47673 -r db178a1b30f3 patches/linux-2.6.16.13/dm-userspace.patch
> --- /dev/null	Thu Jan  1 00:00:00 1970 +0000
> +++ b/patches/linux-2.6.16.13/dm-userspace.patch	Fri Jun  9 13:27:36 2006 -0700
> @@ -0,0 +1,1737 @@
> +diff -Naur ./drivers/md/dm-userspace.c ../linux-2.6.16.13-dmu/drivers/md/dm-userspace.c
> +--- ./drivers/md/dm-userspace.c	1969-12-31 16:00:00.000000000 -0800
> ++++ ../linux-2.6.16.13-dmu/drivers/md/dm-userspace.c	2006-06-09 12:00:32.627933616 -0700
> +@@ -0,0 +1,1613 @@
> ++/*
> ++ * Copyright (C) International Business Machines Corp., 2006
> ++ * Author: Dan Smith <danms@us.ibm.com>
> ++ *
> ++ * This program is free software; you can redistribute it and/or modify
> ++ * it under the terms of the GNU General Public License as published by
> ++ * the Free Software Foundation; under version 2 of the License.
> ++ *
> ++ * This program is distributed in the hope that it will be useful,
> ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
> ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> ++ * GNU General Public License for more details.
> ++ *
> ++ * You should have received a copy of the GNU General Public License
> ++ * along with this program; if not, write to the Free Software
> ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> ++ *
> ++ */
> ++
> ++#include <linux/module.h>
> ++#include <linux/init.h>
> ++#include <linux/blkdev.h>
> ++#include <linux/bio.h>
> ++#include <linux/slab.h>
> ++#include <linux/spinlock.h>
> ++#include <linux/list.h>
> ++#include <linux/fs.h>
> ++#include <linux/cdev.h>
> ++#include <linux/types.h>
> ++#include <linux/poll.h>
> ++
> ++#include <linux/dm-userspace.h>
> ++
> ++#include "dm.h"
> ++#include "dm-bio-list.h"
> ++#include "kcopyd.h"
> ++
> ++#define DMU_DEBUG      0
> ++
> ++#define DMU_COPY_PAGES 256
> ++#define DMU_KEY_LEN    256
> ++
> ++#define DMU_PREFIX     "dm-userspace: "
> ++#define DMU_SET_ERROR(ti, msg) ti->error = DMU_PREFIX msg
> ++
> ++#define DMU_LIFETIME   128
> ++
> ++#if DMU_DEBUG
> ++#define DPRINTK( s, arg... ) printk(DMU_PREFIX s, ##arg)
> ++#else
> ++#define DPRINTK( s, arg... )
> ++#endif
> ++
> ++kmem_cache_t *request_cache;
> ++kmem_cache_t *remap_cache;
> ++
> ++static int enable_watchdog = 0;
> ++static struct work_struct wd;
> ++
> ++static spinlock_t devices_lock;
> ++static LIST_HEAD(devices);
> ++
> ++/* Device number for the control device */
> ++static dev_t our_dev;
> ++
> ++struct target_device {
> ++	struct list_head list;
> ++	struct block_device *bdev;
> ++	struct kref users;
> ++};
> ++
> ++struct hash_table {
> ++	struct list_head *table;
> ++	uint64_t size;
> ++	uint32_t mask;
> ++	uint64_t count;
> ++};
> ++
> ++/* A dm-userspace device, which consists of multiple targets sharing a
> ++ * common key
> ++ */
> ++struct dmu_device {
> ++	spinlock_t lock;
> ++	struct list_head list;
> ++	struct list_head requests;
> ++	struct list_head target_devs;
> ++
> ++	struct hash_table remaps;
> ++
> ++	struct cdev cdev;
> ++	dev_t ctl_dev;
> ++
> ++	char key[DMU_KEY_LEN];
> ++	struct kref users;
> ++
> ++	wait_queue_head_t wqueue;
> ++
> ++	uint64_t block_size;
> ++	uint64_t block_mask;
> ++	unsigned int block_shift;
> ++
> ++	struct kcopyd_client *kcopyd_client;
> ++
> ++	/*
> ++	 * Count of the number of outstanding requests that have been
> ++	 * made against this device, but have not yet been flushed
> ++	 */
> ++	atomic_t remap_ct;
> ++
> ++	uint32_t id_counter;
> ++};
> ++
> ++struct userspace_request {
> ++	spinlock_t lock;
> ++	struct list_head list;
> ++	struct dmu_device *dev;
> ++	int type;
> ++	int sent;
> ++	uint32_t flags;
> ++	uint32_t id;
> ++	union {
> ++		struct bio_list bios;
> ++		uint64_t block;
> ++	} u;
> ++	atomic_t refcnt;
> ++};
> ++
> ++struct dmu_map {
> ++	spinlock_t lock;
> ++	uint64_t org_block; /* Original block */
> ++	uint64_t new_block; /* Destination block */
> ++	int64_t offset;
> ++	uint32_t flags;
> ++	struct target_device *src;
> ++	struct target_device *dest;
> ++	struct bio_list bios;
> ++	struct list_head list;
> ++	struct dmu_device *dev;
> ++
> ++	uint32_t use_count;
> ++
> ++	struct dmu_map *next; /* Next remap that is dependent on this one */
> ++};
> ++
> ++/* Forward delcarations */
> ++static struct file_operations ctl_fops;
> ++static void copy_block(struct dmu_map *remap);
> ++
> ++/*
> ++ * Return the block number for @sector
> ++ */
> ++static inline u64 dmu_block(struct dmu_device *dev,
> ++				 sector_t sector)
> ++{
> ++	return sector >> dev->block_shift;
> ++}
> ++
> ++/*
> ++ * Return the sector offset in a block for @sector
> ++ */
> ++static inline u64 dmu_sector_offset(struct dmu_device *dev,
> ++				 sector_t sector)
> ++{
> ++	return sector & dev->block_mask;
> ++}
> ++
> ++/*
> ++ * Return the starting sector for @block
> ++ */
> ++static inline u64 dmu_sector(struct dmu_device *dev,
> ++				  uint64_t block)
> ++{
> ++	return block << dev->block_shift;
> ++}
> ++
> ++static void error_bios(struct bio_list *bios)
> ++{
> ++	struct bio *bio;
> ++	int count = 0;
> ++
> ++	while ((bio = bio_list_pop(bios)) != NULL) {
> ++		bio_io_error(bio, bio->bi_size);
> ++		count++;
> ++	}
> ++
> ++	if (count)
> ++		printk(KERN_ERR DMU_PREFIX
> ++		       "*** Failed %i requests\n", count);
> ++}
> ++
> ++static void init_remap(struct dmu_device *dev, struct dmu_map *remap)
> ++{
> ++	spin_lock_init(&remap->lock);
> ++	remap->org_block = remap->new_block = 0;
> ++	remap->offset = 0;
> ++	remap->flags = 0;
> ++	remap->src = remap->dest = NULL;
> ++	bio_list_init(&remap->bios);
> ++	INIT_LIST_HEAD(&remap->list);
> ++	remap->dev = dev;
> ++	remap->use_count = DMU_LIFETIME;
> ++	remap->next = NULL;
> ++}
> ++
> ++static void init_request(struct dmu_device *dev,
> ++			 int type,
> ++			 struct userspace_request *req)
> ++{
> ++	spin_lock_init(&req->lock);
> ++	INIT_LIST_HEAD(&req->list);
> ++	req->dev = dev;
> ++	req->type = type;
> ++	req->sent = 0;
> ++	req->flags = 0;
> ++	if (type == DM_USERSPACE_COPY_FINISHED) {
> ++		req->u.block = 0;
> ++		req->id = 0;
> ++	} else {
> ++		bio_list_init(&req->u.bios);
> ++		spin_lock(&dev->lock);
> ++		dev->id_counter++;
> ++		if (dev->id_counter == 0)
> ++			dev->id_counter = 1;
> ++		req->id = dev->id_counter;
> ++		spin_unlock(&dev->lock);
> ++	}
> ++	atomic_set(&req->refcnt, 0);
> ++}
> ++
> ++static void destroy_remap(struct dmu_map *remap)
> ++{
> ++	error_bios(&remap->bios);
> ++}
> ++
> ++/*
> ++ * For an even block distribution, this is not too bad, but it could
> ++ * probably be better
> ++ */
> ++static uint32_t ht_hash(struct hash_table *ht, uint64_t block)
> ++{
> ++	return (uint32_t)block & ht->mask;
> ++}
> ++
> ++static int ht_init(struct hash_table *ht, unsigned long size)
> ++{
> ++	uint64_t i;
> ++	unsigned long pages;
> ++	unsigned int order = ffs((size * sizeof(struct list_head *)) / 
> ++				 PAGE_SIZE);
> ++
> ++	DPRINTK("Going to allocate 2^%u pages for %lu-entry table\n",
> ++		order, size);
> ++
> ++	pages = __get_free_pages(GFP_ATOMIC, order);
> ++	if (!pages) {
> ++		DPRINTK("Failed to allocate hash table (%lu)\n", size);
> ++		return 0;
> ++	}
> ++
> ++	ht->table = (void *)pages;
> ++	ht->size = size;
> ++	ht->count = 0;
> ++	ht->mask = size - 1;
> ++
> ++	for (i = 0; i < size; i++)
> ++		INIT_LIST_HEAD(&ht->table[i]);
> ++
> ++	return 1;
> ++}
> ++
> ++static void ht_insert_bucket(struct dmu_map *map, struct list_head *list)
> ++{
> ++	list_add_tail(&map->list, list);
> ++}
> ++
> ++/*
> ++ * I'm sure this is quite dumb, but it works for now
> ++ */
> ++static int ht_should_grow(struct hash_table *ht)
> ++{
> ++	return ht->count > (2 * (ht->size / 4));
> ++}
> ++
> ++static void ht_grow_table(struct hash_table *ht);
> ++static void ht_insert_map(struct hash_table *ht, struct dmu_map *map)
> ++{
> ++	uint32_t addr;
> ++
> ++	addr = ht_hash(ht, map->org_block) & ht->mask;
> ++
> ++	BUG_ON(addr >= ht->size);
> ++
> ++	ht_insert_bucket(map, &ht->table[addr]);
> ++	ht->count++;
> ++
> ++	if (ht_should_grow(ht))
> ++		ht_grow_table(ht);
> ++}
> ++
> ++static void ht_insert_map_dev(struct dmu_device *dev, struct dmu_map *map)
> ++{
> ++	spin_lock(&dev->lock);
> ++	ht_insert_map(&dev->remaps, map);
> ++	spin_unlock(&dev->lock);
> ++}
> ++
> ++static void ht_delete_map(struct hash_table *ht, struct dmu_map *map)
> ++{
> ++	list_del(&map->list);
> ++	BUG_ON(ht->count == 0);
> ++	ht->count--;
> ++}
> ++
> ++static void ht_delete_map_dev(struct dmu_device *dev, struct dmu_map *map)
> ++{
> ++	spin_lock(&dev->lock);
> ++	ht_delete_map(&dev->remaps, map);
> ++	spin_unlock(&dev->lock);
> ++}
> ++
> ++static struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block)
> ++{
> ++	uint32_t addr;
> ++	struct dmu_map *m;
> ++
> ++	addr = ht_hash(ht, block) & ht->mask;
> ++
> ++	BUG_ON(addr >= ht->size);
> ++
> ++	list_for_each_entry(m, &ht->table[addr], list) {
> ++		if (m->org_block == block)
> ++			return m;
> ++	}
> ++
> ++	return NULL;
> ++}
> ++
> ++static struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block)
> ++{
> ++	struct dmu_map *remap;
> ++
> ++	spin_lock(&dev->lock);
> ++
> ++	remap = ht_find_map(&dev->remaps, block);
> ++
> ++	spin_unlock(&dev->lock);
> ++
> ++	return remap;
> ++}
> ++
> ++static void ht_grow_table(struct hash_table *ht)
> ++{
> ++	struct hash_table old_table;
> ++	uint64_t i;
> ++
> ++	old_table = *ht;
> ++
> ++	if (!ht_init(ht, old_table.size * 2)) {
> ++		DPRINTK("Can't grow table to %llu\n",
> ++			old_table.size * 2);
> ++		return;
> ++	}
> ++
> ++	DPRINTK("Growing from %llu to %llu\n",
> ++		old_table.size, ht->size);
> ++
> ++	for (i = 0; i < old_table.size; i++ ) {
> ++		struct dmu_map *m, *n;
> ++		list_for_each_entry_safe(m, n, &old_table.table[i],
> ++					 list) {
> ++			list_del_init(&m->list);
> ++			ht_insert_map(ht, m);
> ++		}
> ++	}
> ++
> ++	free_pages((unsigned long)old_table.table,
> ++		   ffs((old_table.size * sizeof(struct list_head *))
> ++		       / PAGE_SIZE));
> ++}
> ++
> ++static uint64_t ht_destroy_table(struct hash_table *ht)
> ++{
> ++	uint64_t i, count = 0;
> ++	struct dmu_map *m, *n;
> ++
> ++	for (i = 0; i < ht->size; i++) {
> ++		list_for_each_entry_safe(m, n, &ht->table[i], list) {
> ++			ht_delete_map(ht, m);
> ++			kmem_cache_free(remap_cache, m);
> ++			count++;
> ++		}
> ++	}
> ++
> ++	return count;
> ++}
> ++
> ++static struct target_device *get_target(struct dmu_device *dev,
> ++					dev_t devno)
> ++{
> ++
> ++	struct target_device *target;
> ++	struct block_device *bdev;
> ++
> ++	spin_lock(&dev->lock);
> ++	list_for_each_entry(target, &dev->target_devs, list) {
> ++		if (target->bdev->bd_dev == devno) {
> ++			spin_unlock(&dev->lock);
> ++			goto out;
> ++		}
> ++	}
> ++	spin_unlock(&dev->lock);
> ++
> ++	bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE);
> ++	if (IS_ERR(bdev)) {
> ++		printk(KERN_ERR DMU_PREFIX "Unable to lookup device %x\n",
> ++		       devno);
> ++		return NULL;
> ++	}
> ++
> ++	target = kmalloc(sizeof(*target), GFP_KERNEL);
> ++	if (!target) {
> ++		printk(KERN_ERR DMU_PREFIX
> ++		       "Unable to alloc new target device\n");
> ++		return NULL;
> ++	}
> ++
> ++	target->bdev = bdev;
> ++	INIT_LIST_HEAD(&target->list);
> ++
> ++	spin_lock(&dev->lock);
> ++	list_add_tail(&target->list, &dev->target_devs);
> ++	spin_unlock(&dev->lock);
> ++
> ++ out:
> ++	return target;
> ++}
> ++
> ++/* Caller must hold dev->lock */
> ++static void put_target(struct dmu_device *dev,
> ++		       struct target_device *target)
> ++{
> ++	list_del(&target->list);
> ++
> ++	bd_release(target->bdev);
> ++	blkdev_put(target->bdev);
> ++
> ++	kfree(target);
> ++}
> ++
> ++/*
> ++ * Add a request to the device's request queue
> ++ */
> ++static void add_request(struct dmu_device *dev,
> ++			       struct userspace_request *req)
> ++{
> ++	spin_lock(&dev->lock);
> ++	list_add_tail(&req->list, &dev->requests);
> ++	spin_unlock(&dev->lock);
> ++
> ++	wake_up(&dev->wqueue);
> ++}
> ++
> ++/*
> ++ *
> ++ */
> ++static int have_pending_requests(struct dmu_device *dev)
> ++{
> ++	struct userspace_request *req;
> ++	int ret = 0;
> ++
> ++	spin_lock(&dev->lock);
> ++	
> ++	list_for_each_entry(req, &dev->requests, list) {
> ++		if (!req->sent) {
> ++			ret = 1;
> ++			break;
> ++		}
> ++	}
> ++
> ++	spin_unlock(&dev->lock);
> ++
> ++	return ret;
> ++}
> ++
> ++/*
> ++ * This periodically dumps out some debug information.  It's really
> ++ * only useful while developing.
> ++ */
> ++static void watchdog(void *data)
> ++{
> ++	unsigned int v_remaps, i_remaps, reqs, s_reqs, devs = 0;
> ++	struct dmu_device *dev;
> ++	struct dmu_map *map;
> ++	struct userspace_request *req;
> ++	uint64_t i;
> ++
> ++	spin_lock(&devices_lock);
> ++
> ++	list_for_each_entry(dev, &devices, list) {
> ++		spin_lock(&dev->lock);
> ++
> ++		v_remaps = i_remaps = reqs = s_reqs = 0;
> ++
> ++		for (i = 0; i < dev->remaps.size; i++) {
> ++			list_for_each_entry(map, &dev->remaps.table[i], list)
> ++				if (dmu_get_flag(&map->flags, DMU_FLAG_VALID))
> ++					v_remaps++;
> ++				else
> ++					i_remaps++;
> ++		}
> ++
> ++		list_for_each_entry(req, &dev->requests, list)
> ++			if (req->sent)
> ++				s_reqs++;
> ++			else
> ++				reqs++;
> ++
> ++		printk("Device %x:%x: "
> ++		       "  reqs: %u/%u "
> ++		       "  inv maps: %u "
> ++		       "  val maps: %u (%i)\n",
> ++		       MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev),
> ++		       reqs, s_reqs, i_remaps, v_remaps,
> ++		       atomic_read(&dev->remap_ct));
> ++		devs++;
> ++		
> ++		spin_unlock(&dev->lock);
> ++	}
> ++
> ++	spin_unlock(&devices_lock);
> ++
> ++	schedule_delayed_work(&wd, HZ);
> ++}
> ++
> ++static void __bio_remap(struct bio *bio,
> ++		      struct dmu_map *remap)
> ++{
> ++	BUG_ON(remap->dest == NULL);
> ++
> ++	bio->bi_sector = dmu_sector(remap->dev, remap->new_block) +
> ++		dmu_sector_offset(remap->dev, bio->bi_sector) +
> ++		remap->offset;
> ++
> ++	bio->bi_bdev = remap->dest->bdev;
> ++}
> ++
> ++/* 
> ++   Pop, remap, and flush a bio.  Set VALID flag if no bios
> ++   available 
> ++*/
> ++static struct bio *pop_and_remap(struct dmu_map *remap)
> ++{
> ++	struct bio *bio = NULL;
> ++
> ++	spin_lock(&remap->lock);
> ++
> ++	bio = bio_list_pop(&remap->bios);
> ++	if (bio)
> ++		__bio_remap(bio, remap);
> ++	else {
> ++		/* If there are no more bios, we must set the VALID
> ++		   flag before we release the lock */
> ++		dmu_set_flag(&remap->flags, DMU_FLAG_VALID);
> ++	}
> ++
> ++	spin_unlock(&remap->lock);
> ++
> ++	return bio;
> ++}
> ++
> ++static void get_remap_attrs(struct dmu_map *remap,
> ++			    int *copy_first,
> ++			    int *temporary,
> ++			    struct dmu_map **next)
> ++{
> ++	spin_lock(&remap->lock);
> ++
> ++	*copy_first = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST);
> ++	*temporary = dmu_get_flag(&remap->flags, DMU_FLAG_TEMPORARY);
> ++	*next = remap->next;
> ++	remap->next = NULL;
> ++
> ++	spin_unlock(&remap->lock);
> ++}
> ++
> ++static void remap_flusher(struct dmu_map *remap)
> ++{
> ++	struct bio *bio;
> ++	struct userspace_request *req;
> ++	int copy_first = 0, temporary = 0;
> ++	struct dmu_map *next;
> ++
> ++	DPRINTK("Flushing bios for block %llu:%llu\n",
> ++	       remap->org_block, remap->new_block);
> ++
> ++	while (1) {
> ++
> ++		bio = pop_and_remap(remap);
> ++
> ++		if (bio)
> ++			generic_make_request(bio);
> ++		else
> ++			break;
> ++
> ++		atomic_dec(&remap->dev->remap_ct);
> ++
> ++		DPRINTK("Flushed %llu:%llu (%u bytes)\n",
> ++			dmu_block(remap->dev, bio->bi_sector),
> ++			dmu_sector_offset(remap->dev, bio->bi_sector),
> ++			bio->bi_size);
> ++	}
> ++
> ++	get_remap_attrs(remap, &copy_first, &temporary, &next);
> ++
> ++	if (next) {
> ++		/* FIXME: Make copy_block check for this flag and just
> ++		 flush if not set to avoid this non-locked access */
> ++		if (dmu_get_flag(&next->flags, DMU_FLAG_COPY_FIRST))
> ++			copy_block(next);
> ++		else
> ++			remap_flusher(next);
> ++	}
> ++	
> ++	/* Notify userspace */
> ++	if (copy_first) {
> ++		req = kmem_cache_alloc(request_cache, GFP_KERNEL);
> ++		if (!req) {
> ++			printk(KERN_ERR DMU_PREFIX
> ++			       "Failed to allocate copy response\n");
> ++			return;
> ++		}
> ++
> ++		init_request(remap->dev, DM_USERSPACE_COPY_FINISHED, req);
> ++
> ++		req->u.block = remap->org_block;
> ++
> ++		add_request(remap->dev, req);
> ++	}
> ++
> ++	if (temporary) {
> ++		destroy_remap(remap);
> ++		kmem_cache_free(remap_cache, remap);
> ++	}
> ++}
> ++
> ++static void destroy_dmu_device(struct kref *ref)
> ++{
> ++	struct dmu_device *dev;
> ++	struct list_head *cursor, *next;
> ++	uint64_t remaps;
> ++
> ++	dev = container_of(ref, struct dmu_device, users);
> ++
> ++	DPRINTK("Destroying device: %s\n", dev->key);
> ++
> ++	spin_lock(&devices_lock);
> ++	list_del(&dev->list);
> ++	spin_unlock(&devices_lock);
> ++
> ++	list_for_each_safe(cursor, next, &dev->target_devs) {
> ++		struct target_device *target;
> ++
> ++		target = list_entry(cursor,
> ++				    struct target_device,
> ++				    list);
> ++
> ++		put_target(dev, target);
> ++	}
> ++
> ++	remaps = ht_destroy_table(&dev->remaps);
> ++	DPRINTK("Destroyed %llu/%llu remaps\n", remaps, dev->remaps.count);
> ++
> ++	list_for_each_safe(cursor, next, &dev->requests) {
> ++		struct userspace_request *req;
> ++
> ++		req = list_entry(cursor,
> ++				 struct userspace_request,
> ++				 list);
> ++
> ++		list_del(&req->list);
> ++
> ++		error_bios(&req->u.bios);
> ++
> ++		kmem_cache_free(request_cache, req);
> ++	}
> ++
> ++	kcopyd_client_destroy(dev->kcopyd_client);
> ++
> ++	cdev_del(&dev->cdev);
> ++	kfree(dev);
> ++}
> ++
> ++static inline void get_dev(struct dmu_device *dev)
> ++{
> ++	DPRINTK("get on %s\n", dev->key);
> ++	kref_get(&dev->users);
> ++}
> ++
> ++static inline void put_dev(struct dmu_device *dev)
> ++{
> ++	DPRINTK("put on %s\n", dev->key);
> ++	kref_put(&dev->users, destroy_dmu_device);
> ++}
> ++
> ++static int get_free_minor(void)
> ++{
> ++	struct dmu_device *dev;
> ++	int minor = 0;
> ++
> ++	spin_lock(&devices_lock);
> ++
> ++	list_for_each_entry(dev, &devices, list) {
> ++		if (MINOR(dev->ctl_dev) != minor)
> ++			break;
> ++		minor++;
> ++	}
> ++
> ++	spin_unlock(&devices_lock);
> ++
> ++	return minor;
> ++}
> ++
> ++static int init_dmu_device(struct dmu_device *dev, u32 block_size)
> ++{
> ++	int ret;
> ++
> ++	cdev_init(&dev->cdev, &ctl_fops);
> ++	dev->cdev.owner = THIS_MODULE;
> ++	dev->cdev.ops = &ctl_fops;
> ++
> ++	init_waitqueue_head(&dev->wqueue);
> ++	INIT_LIST_HEAD(&dev->list);
> ++	INIT_LIST_HEAD(&dev->requests);
> ++	INIT_LIST_HEAD(&dev->target_devs);
> ++	kref_init(&dev->users);
> ++	spin_lock_init(&dev->lock);
> ++
> ++	atomic_set(&dev->remap_ct, 0);
> ++	dev->id_counter = 1; /* reserve 0 for unsolicited maps */
> ++
> ++	if (!ht_init(&dev->remaps, 2048)) {
> ++		printk(KERN_ERR DMU_PREFIX
> ++		       "Unable to allocate hash table\n");
> ++		return 0;
> ++	}
> ++
> ++	dev->block_size  = block_size;
> ++	dev->block_mask  = block_size - 1;
> ++	dev->block_shift = ffs(block_size) - 1;
> ++
> ++	ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopyd_client);
> ++	if (ret) {
> ++		printk(DMU_PREFIX "Failed to initialize kcopyd client\n");
> ++		return 0;
> ++	}
> ++
> ++	return 1;
> ++}
> ++
> ++static struct dmu_device *new_dmu_device(char *key,
> ++					 struct dm_target *ti,
> ++					 u32 block_size)
> ++{
> ++	struct dmu_device *dev, *ptr;
> ++	int                ret;
> ++
> ++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
> ++	if (dev == NULL) {
> ++		printk(DMU_PREFIX "Failed to allocate new userspace device\n");
> ++		return NULL;
> ++	}
> ++
> ++	if (!init_dmu_device(dev, block_size))
> ++		goto bad1;
> ++
> ++	snprintf(dev->key, DMU_KEY_LEN, "%s", key);
> ++
> ++	DPRINTK("New device with size %llu mask 0x%llX shift %u\n",
> ++		dev->block_size, dev->block_mask, dev->block_shift);
> ++
> ++	dev->ctl_dev = MKDEV(MAJOR(our_dev), get_free_minor());
> ++
> ++	ret = cdev_add(&dev->cdev, dev->ctl_dev, 1);
> ++	if (ret < 0) {
> ++		printk(DMU_PREFIX "Failed to register control device %d:%d\n",
> ++		       MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev));
> ++		goto bad2;
> ++	}
> ++
> ++	DPRINTK("Registered new control interface: %i:%i\n",
> ++		MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev));
> ++
> ++	spin_lock(&devices_lock);
> ++	if (list_empty(&devices))
> ++		list_add(&dev->list, &devices);
> ++	else
> ++		list_for_each_entry(ptr, &devices, list)
> ++			if (MINOR(ptr->ctl_dev) < MINOR(dev->ctl_dev))
> ++				list_add(&dev->list, &ptr->list);
> ++	spin_unlock(&devices_lock);
> ++
> ++	return dev;
> ++
> ++ bad2:
> ++	cdev_del(&dev->cdev);
> ++ bad1:
> ++	kfree(dev);
> ++	printk(KERN_ERR DMU_PREFIX "Failed to create device\n");
> ++	return NULL;
> ++}
> ++
> ++static struct dmu_device *find_dmu_device(const char *key)
> ++{
> ++	struct dmu_device *dev;
> ++	struct dmu_device *match = NULL;
> ++
> ++	spin_lock(&devices_lock);
> ++
> ++	list_for_each_entry(dev, &devices, list) {
> ++		spin_lock(&dev->lock);
> ++		if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) {
> ++			match = dev;
> ++			spin_unlock(&dev->lock);
> ++			break;
> ++		}
> ++		spin_unlock(&dev->lock);
> ++	}
> ++
> ++	spin_unlock(&devices_lock);
> ++
> ++	return match;
> ++}
> ++
> ++static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> ++{
> ++	uint64_t block_size;
> ++	struct dmu_device *dev;
> ++	char *device_key;
> ++	char *block_size_param;
> ++
> ++	if (argc < 2) {
> ++		DMU_SET_ERROR(ti, "Invalid argument count");
> ++		return -EINVAL;
> ++	}
> ++
> ++	device_key = argv[0];
> ++	block_size_param = argv[1];
> ++
> ++	block_size = simple_strtoul(block_size_param, NULL, 10) / 512;
> ++
> ++	dev = find_dmu_device(device_key);
> ++	if (dev == NULL) {
> ++		dev = new_dmu_device(device_key,
> ++				     ti,
> ++				     block_size);
> ++		if (dev == NULL) {
> ++			DMU_SET_ERROR(ti, "Failed to create device");
> ++			goto bad;
> ++		}
> ++	} else {
> ++		get_dev(dev);
> ++	}
> ++
> ++	spin_lock(&dev->lock);
> ++	if (dev->block_size != block_size) {
> ++		DMU_SET_ERROR(ti, "Invalid block size");
> ++		goto bad;
> ++	}
> ++	spin_unlock(&dev->lock);
> ++
> ++	ti->private  = dev;
> ++	ti->split_io = block_size;
> ++
> ++	DPRINTK("  block-size:  %llu sectors\n", dev->block_size);
> ++	DPRINTK("  block-shift: %u\n", dev->block_shift);
> ++	DPRINTK("  block-mask:  %llx\n", dev->block_mask);
> ++
> ++	return 0;
> ++
> ++ bad:
> ++	if (dev) {
> ++		spin_unlock(&dev->lock);
> ++		put_dev(dev);
> ++	}
> ++
> ++	return -EINVAL;
> ++}
> ++
> ++static void dmu_dtr(struct dm_target *ti)
> ++{
> ++	struct dmu_device *dev = (struct dmu_device *) ti->private;
> ++
> ++	put_dev(dev);
> ++
> ++	DPRINTK("destroyed %d:%d\n", (int)ti->begin, (int)ti->len);
> ++}
> ++
> ++/* Search @dev for an outstanding request for remapping @block */
> ++static struct userspace_request *find_existing_req(struct dmu_device *dev,
> ++						   uint64_t block)
> ++{
> ++	struct userspace_request *req;
> ++	struct userspace_request *maybe = NULL;
> ++
> ++	spin_lock(&dev->lock);
> ++
> ++	list_for_each_entry(req, &dev->requests, list) {
> ++		/* FIXME: Blech */
> ++		if ((req->type == DM_USERSPACE_MAP_BLOCK) &&
> ++		    (dmu_block(dev, req->u.bios.head->bi_sector) == block)) {
> ++			if (maybe) {
> ++				atomic_dec(&maybe->refcnt);
> ++			}
> ++			maybe = req;
> ++			atomic_inc(&maybe->refcnt);
> ++		}
> ++	}
> ++
> ++	spin_unlock(&dev->lock);
> ++
> ++	return maybe;
> ++}
> ++
> ++static int make_new_request(struct dmu_device *dev, struct bio *bio)
> ++{
> ++	struct userspace_request *req;
> ++
> ++	req = kmem_cache_alloc(request_cache, GFP_KERNEL);
> ++	if (req == NULL)
> ++		goto bad;
> ++
> ++	init_request(dev, DM_USERSPACE_MAP_BLOCK, req);
> ++
> ++	dmu_set_flag(&req->flags, DMU_FLAG_RD);
> ++	if (bio_rw(bio))
> ++		dmu_set_flag(&req->flags, DMU_FLAG_WR);
> ++	else
> ++		dmu_clr_flag(&req->flags, DMU_FLAG_WR);
> ++	bio_list_add(&req->u.bios, bio);
> ++
> ++	add_request(dev, req);
> ++
> ++	DPRINTK("Queued %s request for sector " SECTOR_FORMAT "\n",
> ++		dmu_get_flag(&req->flags, DMU_FLAG_WR) ? "write" : "read",
> ++		bio->bi_sector);
> ++
> ++	return 0;
> ++
> ++ bad:
> ++	printk(KERN_ERR DMU_PREFIX "Failed to queue bio!\n");
> ++	return -1;
> ++}
> ++
> ++static int dmu_map_remap_case(struct dmu_device *dev,
> ++			      struct dmu_map *remap,
> ++			      struct bio *bio)
> ++{
> ++	int ret = 0;
> ++
> ++	spin_lock(&remap->lock);
> ++	
> ++	if (dmu_get_flag(&remap->flags, DMU_FLAG_WR) != bio_rw(bio)) {
> ++		ret = -1;
> ++	} else {
> ++		if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) {
> ++			__bio_remap(bio, remap);
> ++			ret = 1;
> ++			atomic_dec(&dev->remap_ct);
> ++		} else {
> ++			bio_list_add(&remap->bios, bio);
> ++		}
> ++	}
> ++
> ++	spin_unlock(&remap->lock);
> ++
> ++	return ret;
> ++}
> ++
> ++static int dmu_map_request_case(struct dmu_device *dev,
> ++				struct userspace_request *req,
> ++				struct bio *bio)
> ++{
> ++	int ret = 0;
> ++	int req_rw = dmu_get_flag(&req->flags, DMU_FLAG_WR);
> ++
> ++	spin_lock(&req->lock);
> ++
> ++	if (!req_rw && bio_rw(bio) && !req->sent) {
> ++		/* Convert to R/W and Queue */
> ++		dmu_set_flag(&req->flags, DMU_FLAG_WR);
> ++		bio_list_add(&req->u.bios, bio);
> ++	} else if (!req_rw && bio_rw(bio) && req->sent) {		
> ++		/* Can't convert, must re-request */
> ++		ret = -1;
> ++	} else {
> ++		/* Queue */
> ++		bio_list_add(&req->u.bios, bio);
> ++	}
> ++
> ++	spin_unlock(&req->lock);
> ++
> ++	return ret;
> ++}
> ++
> ++static int dmu_map(struct dm_target *ti, struct bio *bio,
> ++		   union map_info *map_context)
> ++{
> ++	struct dmu_device *dev = (struct dmu_device *) ti->private;
> ++	struct dmu_map *remap;
> ++	struct userspace_request *req;
> ++	int ret = 0;
> ++	u64 block;
> ++	    
> ++	atomic_inc(&dev->remap_ct);
> ++
> ++	block = dmu_block(dev, bio->bi_sector);
> ++
> ++	remap = ht_find_map_dev(dev, block);
> ++	if (remap) {
> ++		ret = dmu_map_remap_case(dev, remap, bio);
> ++		if (ret >= 0)
> ++			goto done;
> ++	}
> ++
> ++	req = find_existing_req(dev, block);
> ++	if (req) {
> ++		ret = dmu_map_request_case(dev, req, bio);
> ++		atomic_dec(&req->refcnt);
> ++		if (ret >= 0)
> ++			goto done;
> ++	}
> ++
> ++	ret = make_new_request(dev, bio);
> ++
> ++ done:
> ++	return ret;
> ++}
> ++
> ++static int dmu_status(struct dm_target *ti, status_type_t type,
> ++		      char *result, unsigned int maxlen)
> ++{
> ++	struct dmu_device *dev = (struct dmu_device *) ti->private;
> ++
> ++	switch (type) {
> ++	case STATUSTYPE_INFO:
> ++		snprintf(result, maxlen, "%x:%x\n",
> ++			 MAJOR(dev->ctl_dev),
> ++			 MINOR(dev->ctl_dev));
> ++		break;
> ++
> ++	case STATUSTYPE_TABLE:
> ++		snprintf(result, maxlen, "%s %llu",
> ++			 dev->key,
> ++			 dev->block_size * 512);
> ++		break;
> ++	}
> ++
> ++	return 0;
> ++}
> ++
> ++static struct target_type userspace_target = {
> ++	.name    = "userspace",
> ++	.version = {0, 1, 0},
> ++	.module  = THIS_MODULE,
> ++	.ctr     = dmu_ctr,
> ++	.dtr     = dmu_dtr,
> ++	.map     = dmu_map,
> ++	.status  = dmu_status,
> ++};
> ++
> ++static int format_userspace_message(struct dmu_write *msg,
> ++				      struct userspace_request *req)
> ++{
> ++	int ret = 1;
> ++
> ++	spin_lock(&req->lock);
> ++
> ++	if (req->sent)
> ++		goto out;
> ++
> ++	msg->id = req->id;
> ++	msg->type = req->type;
> ++	dmu_cpy_flag(&msg->flags, req->flags, DMU_FLAG_RD);
> ++	dmu_cpy_flag(&msg->flags, req->flags, DMU_FLAG_WR);
> ++
> ++	if (msg->type == DM_USERSPACE_MAP_BLOCK) {
> ++		msg->org_block = dmu_block(req->dev,
> ++					   req->u.bios.head->bi_sector);
> ++		DPRINTK("Asking userspace to map %llu (%c)\n",
> ++			msg->org_block,
> ++			dmu_get_flag(&msg->flags, DMU_FLAG_WR) ? 'W' : 'R');
> ++	} else if (msg->type == DM_USERSPACE_COPY_FINISHED) {
> ++		msg->org_block = req->u.block;
> ++	} else {
> ++		printk(KERN_INFO DMU_PREFIX
> ++		       "Userspace sent unknown message type %i\n", 
> ++		       msg->type);
> ++		list_del(&req->list);
> ++		ret = 0;
> ++	}
> ++
> ++	req->sent = 1;
> ++
> ++ out:
> ++	spin_unlock(&req->lock);
> ++	
> ++	if (msg->type == DM_USERSPACE_COPY_FINISHED) {
> ++		/* COPY_FINISHED messages don't get responses, so
> ++		 * we take them off the request queue here
> ++		 */
> ++		list_del(&req->list);
> ++		kmem_cache_free(request_cache, req);
> ++	}
> ++
> ++	return ret;
> ++}
> ++
> ++ssize_t dmu_ctl_read(struct file *file, char __user *buffer,
> ++		     size_t size, loff_t *offset)
> ++{
> ++
> ++	struct dmu_device  *dev = (struct dmu_device *)file->private_data;
> ++	struct dmu_write   *msg;
> ++	struct userspace_request *req = NULL;
> ++	struct userspace_request *next;
> ++	int                 ret = 0;
> ++	int                 num_reqs, req_idx = 0;
> ++
> ++	num_reqs = size / sizeof(*msg);
> ++
> ++	if (num_reqs == 0)
> ++		return -EINVAL;
> ++
> ++	msg = kmalloc(num_reqs * sizeof(*msg), GFP_KERNEL);
> ++	if (!msg) {
> ++		printk(KERN_ERR DMU_PREFIX 
> ++		       "Failed to alloc %i reqs!\n", num_reqs);
> ++		return -ENOMEM;
> ++	}
> ++	
> ++	while (!have_pending_requests(dev)) {
> ++		if (file->f_flags & O_NONBLOCK) {
> ++			return 0;
> ++		}
> ++
> ++		if (wait_event_interruptible(dev->wqueue,
> ++					     have_pending_requests(dev)))
> ++			return -ERESTARTSYS;
> ++	}
> ++
> ++	spin_lock(&dev->lock);
> ++
> ++	list_for_each_entry_safe(req, next, &dev->requests, list) {
> ++
> ++		if (!format_userspace_message(&msg[req_idx], req))
> ++			continue;
> ++
> ++		ret += sizeof(*msg);
> ++		if (++req_idx >= num_reqs) {
> ++			break;
> ++		}
> ++	}
> ++
> ++	spin_unlock(&dev->lock);
> ++
> ++	if (copy_to_user(buffer, msg, sizeof(*msg) * req_idx)) {
> ++		DPRINTK("control read copy_to_user failed!\n");
> ++		ret = -EFAULT;
> ++	}
> ++
> ++	kfree(msg);
> ++
> ++	return ret;
> ++}
> ++
> ++static void copy_callback(int read_err,
> ++                          unsigned int write_err,
> ++                          void *data)
> ++{
> ++	remap_flusher((struct dmu_map *)data);
> ++}
> ++
> ++static void copy_block(struct dmu_map *remap)
> ++{
> ++	struct io_region src, dst;
> ++	struct kcopyd_client *client;
> ++
> ++	spin_lock(&remap->lock);
> ++
> ++	src.bdev = remap->src->bdev;
> ++	src.sector = remap->org_block << remap->dev->block_shift;
> ++	src.count = remap->dev->block_size;
> ++
> ++	dst.bdev = remap->dest->bdev;
> ++	dst.sector = (remap->new_block << remap->dev->block_shift);
> ++	dst.sector += remap->offset;
> ++	dst.count = remap->dev->block_size;
> ++
> ++	DPRINTK("Copying: "
> ++		SECTOR_FORMAT "(" SECTOR_FORMAT "):" SECTOR_FORMAT " -> "
> ++		SECTOR_FORMAT "(" SECTOR_FORMAT "):" SECTOR_FORMAT "\n",
> ++		remap->org_block,
> ++		src.sector,
> ++		src.count * 512,
> ++		remap->new_block,
> ++		dst.sector,
> ++		dst.count * 512);
> ++
> ++	client = remap->dev->kcopyd_client;
> ++
> ++	spin_unlock(&remap->lock);
> ++
> ++	kcopyd_copy(client, &src, 1, &dst, 0, copy_callback, remap);
> ++}
> ++
> ++static int remap_request(struct dmu_write *msg,
> ++			 struct dmu_device *dev,
> ++			 struct userspace_request *req)
> ++
> ++{
> ++	struct dmu_map *remap = NULL, *parent = NULL;
> ++	struct target_device *s_dev = NULL, *d_dev = NULL;
> ++	int is_chained = 0;
> ++	struct bio_list bio_holder;
> ++
> ++	if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) {
> ++		s_dev = get_target(dev, MKDEV(msg->src_maj, msg->src_min));
> ++		if (!s_dev) {
> ++			printk(KERN_ERR DMU_PREFIX
> ++			       "Failed to find src device %i:%i\n",
> ++			       msg->src_maj, msg->src_min);
> ++			goto bad;
> ++		}
> ++	}
> ++
> ++	d_dev = get_target(dev, MKDEV(msg->dest_maj, msg->dest_min));
> ++	if (!d_dev) {
> ++		printk(KERN_ERR DMU_PREFIX "Failed to find dst device %i:%i\n",
> ++		       msg->dest_maj, msg->dest_min);
> ++		goto bad;
> ++	}
> ++
> ++	if (req) {
> ++		while (atomic_read(&req->refcnt) != 0) {
> ++			DPRINTK("Waiting for exclusive use of request\n");
> ++			schedule();
> ++		}
> ++
> ++		spin_lock(&req->lock);
> ++		bio_holder = req->u.bios;
> ++		spin_unlock(&req->lock);
> ++	} else {
> ++		bio_list_init(&bio_holder);
> ++	}
> ++	
> ++	/* Allocate a new remap early (before grabbing locks), since
> ++	   we will most likely need it */
> ++	remap = kmem_cache_alloc(remap_cache, GFP_KERNEL);
> ++	if (!remap) {
> ++		printk(KERN_ERR DMU_PREFIX "Failed to alloc remap!");
> ++		goto bad;
> ++	}
> ++	init_remap(dev, remap);
> ++	spin_lock(&remap->lock);
> ++	remap->org_block = msg->org_block;
> ++
> ++	spin_lock(&dev->lock);
> ++
> ++	/* Here, we insert the new remap into the table, and remove
> ++	   the existing map, if present, all in one locked operation */
> ++
> ++	parent = ht_find_map(&dev->remaps, msg->org_block);
> ++	if (parent) {
> ++
> ++		spin_lock(&parent->lock);
> ++
> ++		if (!dmu_get_flag(&parent->flags, DMU_FLAG_VALID)) {
> ++			if (dmu_get_flag(&parent->flags, DMU_FLAG_WR) ==
> ++			    dmu_get_flag(&msg->flags, DMU_FLAG_WR)) {
> ++				/* Perms match for this not-yet-valid remap,
> ++				   so tag our bios on to it and bail */
> ++				bio_list_merge(&parent->bios,
> ++					       &bio_holder);
> ++
> ++				spin_unlock(&parent->lock);
> ++				spin_unlock(&dev->lock);
> ++				kmem_cache_free(remap_cache, remap);
> ++				return 1;
> ++			} else {
> ++				/* Remove parent from remap table, and
> ++				   chain our new remap to this one so
> ++				   it will fire when parent goes
> ++				   valid */
> ++				list_del(&parent->list);
> ++				if (parent->next) {
> ++					DPRINTK("Parent already chained!\n");
> ++					BUG();
> ++				}
> ++				parent->next = remap;
> ++				dmu_set_flag(&parent->flags, 
> ++					     DMU_FLAG_TEMPORARY);	
> ++				is_chained = 1;
> ++			}
> ++		} else {
> ++			/* Remove existing valid remap */
> ++			list_del(&parent->list);
> ++			destroy_remap(parent);
> ++			kmem_cache_free(remap_cache, parent);
> ++		}
> ++
> ++		spin_unlock(&parent->lock);
> ++	}
> ++	
> ++	ht_insert_map(&dev->remaps, remap);
> ++		
> ++	spin_unlock(&dev->lock);
> ++
> ++	remap->new_block  = msg->new_block;
> ++	remap->use_count  = DMU_LIFETIME;
> ++	remap->offset     = msg->offset;
> ++	remap->src        = s_dev;
> ++	remap->dest       = d_dev;
> ++	remap->dev        = dev;
> ++
> ++	dmu_clr_flag(&remap->flags, DMU_FLAG_VALID);
> ++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_TEMPORARY);
> ++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_WR);
> ++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_RD);
> ++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_COPY_FIRST);
> ++
> ++	remap->bios = bio_holder;
> ++
> ++	spin_unlock(&remap->lock);
> ++
> ++	if (! is_chained) {
> ++		if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST))
> ++			copy_block(remap);
> ++		else {
> ++			remap_flusher(remap);
> ++		}
> ++	}
> ++
> ++	return 1;
> ++
> ++ bad:
> ++	printk(KERN_ERR DMU_PREFIX "Remap error: chaos may ensue\n");
> ++
> ++	return 0;
> ++}
> ++
> ++static int invalidate_request(struct dmu_write *msg,
> ++			      struct dmu_device *dev)
> ++{
> ++	struct dmu_map *remap;
> ++	struct userspace_request *req;
> ++	int ret = 1;
> ++
> ++	remap = ht_find_map_dev(dev, msg->org_block);
> ++	if (!remap)
> ++		ret = 0;
> ++	else {
> ++		spin_lock(&dev->lock);
> ++		spin_lock(&remap->lock);
> ++		if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID))
> ++			ht_delete_map(&dev->remaps, remap);
> ++		else 
> ++			ret = 0;
> ++		spin_unlock(&remap->lock);
> ++		spin_unlock(&dev->lock);
> ++	}
> ++
> ++	req = kmem_cache_alloc(request_cache, GFP_KERNEL);
> ++	if (!req) {
> ++		printk(KERN_ERR DMU_PREFIX
> ++		       "Failed to allocate request\n");
> ++		return 0;
> ++	}
> ++
> ++	if (ret)
> ++		init_request(dev, DM_USERSPACE_INVAL_COMPLETE, req);
> ++	else
> ++		init_request(dev, DM_USERSPACE_INVAL_FAILED, req);
> ++
> ++	req->u.block = msg->org_block;
> ++
> ++	add_request(dev, req);
> ++
> ++	return ret;
> ++}
> ++
> ++ssize_t dmu_ctl_write(struct file *file, const char __user *buffer,
> ++		      size_t size, loff_t *offset)
> ++{
> ++
> ++	struct dmu_device *dev = (struct dmu_device *)file->private_data;
> ++	struct dmu_write msg;
> ++	struct userspace_request *next;
> ++	struct userspace_request *req = NULL, *match = NULL;
> ++	int num_resp, resp_idx;
> ++	int ret = 0;
> ++
> ++	num_resp = size / sizeof(struct dmu_write);
> ++
> ++	if (num_resp == 0)
> ++		return -EINVAL;
> ++
> ++	for (resp_idx = 0; resp_idx < num_resp; resp_idx++) {
> ++		if (copy_from_user(&msg, buffer+ret, sizeof(msg))) {
> ++			printk(DMU_PREFIX
> ++			       "control_write copy_from_user failed!\n");
> ++			ret = -EFAULT;
> ++			goto out;
> ++		}
> ++
> ++		ret += sizeof(msg);
> ++
> ++		match = NULL;
> ++		/* See if we have a pending request that matches this */
> ++		spin_lock(&dev->lock);
> ++		list_for_each_entry_safe(req, next, &dev->requests, list) {
> ++			if ((req->type == DM_USERSPACE_MAP_BLOCK) &&
> ++			    (req->id == msg.id)) {
> ++				list_del(&req->list);
> ++				match = req;
> ++				break;
> ++			}
> ++		}
> ++		spin_unlock(&dev->lock);
> ++
> ++		if (!match)
> ++			DPRINTK("Processing unsolicited request: %u\n", 
> ++				msg.id);
> ++
> ++		switch (msg.type) {
> ++
> ++		case DM_USERSPACE_MAP_BLOCK:
> ++			DPRINTK("Got map: %llu -> %llu:%lli (%i:%i) [%c]\n",
> ++				msg.org_block,
> ++				msg.new_block,
> ++				msg.offset,
> ++				msg.dest_maj,
> ++				msg.dest_min,
> ++				dmu_get_flag(&msg.flags, DMU_FLAG_WR)?'W':'R');
> ++			remap_request(&msg, dev, match);
> ++			break;
> ++
> ++		case DM_USERSPACE_MAP_FAILED:
> ++			if (match) {
> ++				printk(KERN_EMERG DMU_PREFIX
> ++				       "userspace reported "
> ++				       "failure to map sector %lu\n",
> ++				       (unsigned long)
> ++				       match->u.bios.head->bi_sector);
> ++
> ++				spin_lock(&match->lock);
> ++				error_bios(&match->u.bios);
> ++				spin_unlock(&match->lock);
> ++			}
> ++			break;
> ++		default:
> ++			printk(KERN_ERR DMU_PREFIX
> ++			       "Unknown request type: %i\n", msg.type);
> ++		}
> ++
> ++		if (match)
> ++			kmem_cache_free(request_cache, match);
> ++	}
> ++ out:
> ++	return ret;
> ++}
> ++
> ++int dmu_ctl_open(struct inode *inode, struct file *file)
> ++{
> ++	struct dmu_device *dev;
> ++
> ++	dev = container_of(inode->i_cdev, struct dmu_device, cdev);
> ++
> ++	get_dev(dev);
> ++
> ++	file->private_data = dev;
> ++
> ++	return 0;
> ++}
> ++
> ++int dmu_ctl_release(struct inode *inode, struct file *file)
> ++{
> ++	struct dmu_device *dev;
> ++
> ++	dev = (struct dmu_device *)file->private_data;
> ++
> ++	put_dev(dev);
> ++
> ++	return 0;
> ++}
> ++
> ++unsigned dmu_ctl_poll(struct file *file, poll_table *wait)
> ++{
> ++	struct dmu_device *dev;
> ++	unsigned mask = 0;
> ++
> ++	dev = (struct dmu_device *)file->private_data;
> ++
> ++	poll_wait(file, &dev->wqueue, wait);
> ++
> ++	if (have_pending_requests(dev))
> ++		mask |= POLLIN | POLLRDNORM;
> ++
> ++	return mask;
> ++}
> ++
> ++static struct file_operations ctl_fops = {
> ++	.open    = dmu_ctl_open,
> ++	.release = dmu_ctl_release,
> ++	.read    = dmu_ctl_read,
> ++	.write   = dmu_ctl_write,
> ++	.poll    = dmu_ctl_poll,
> ++	.owner   = THIS_MODULE,
> ++};
> ++
> ++int __init dm_userspace_init(void)
> ++{
> ++	int r = dm_register_target(&userspace_target);
> ++	if (r < 0) {
> ++		DMERR(DMU_PREFIX "Register failed %d", r);
> ++		return 0;
> ++	}
> ++
> ++	spin_lock_init(&devices_lock);
> ++
> ++	if (enable_watchdog) {
> ++		INIT_WORK(&wd, watchdog, NULL);
> ++		schedule_delayed_work(&wd, HZ);
> ++	}
> ++
> ++	request_cache =
> ++		kmem_cache_create("dm-userspace-requests",
> ++				  sizeof(struct userspace_request),
> ++				  __alignof__ (struct userspace_request),
> ++				  0, NULL, NULL);
> ++	if (!request_cache) {
> ++		DMERR(DMU_PREFIX "Failed to allocate request cache\n");
> ++		goto bad;
> ++	}
> ++
> ++	remap_cache =
> ++		kmem_cache_create("dm-userspace-remaps",
> ++				  sizeof(struct dmu_map),
> ++				  __alignof__ (struct dmu_map),
> ++				  0, NULL, NULL);
> ++	if (!remap_cache) {
> ++		DMERR(DMU_PREFIX "Failed to allocate remap cache\n");
> ++		goto bad2;
> ++	}
> ++
> ++	r = alloc_chrdev_region(&our_dev, 0, 10, "dm-userspace");
> ++	if (r) {
> ++		DMERR(DMU_PREFIX "Failed to allocate chardev region\n");
> ++		goto bad3;
> ++	}
> ++
> ++	DPRINTK(DMU_PREFIX "Loaded (major %i)\n", MAJOR(our_dev));
> ++
> ++	return 1;
> ++
> ++ bad3:
> ++	kmem_cache_destroy(remap_cache);
> ++ bad2:
> ++	kmem_cache_destroy(request_cache);
> ++ bad:
> ++	dm_unregister_target(&userspace_target);
> ++	return 0;
> ++
> ++}
> ++
> ++void __exit dm_userspace_exit(void)
> ++{
> ++	int r;
> ++	struct list_head *cursor, *next;
> ++	struct dmu_device *dev;
> ++
> ++	DPRINTK(DMU_PREFIX "Unloading\n");
> ++
> ++	if (enable_watchdog)
> ++		if (!cancel_delayed_work(&wd))
> ++			flush_scheduled_work();
> ++
> ++	spin_lock(&devices_lock);
> ++
> ++	list_for_each_safe(cursor, next, &devices) {
> ++		dev = list_entry(cursor, struct dmu_device, list);
> ++		list_del(cursor);
> ++	}
> ++
> ++	spin_unlock(&devices_lock);
> ++
> ++	unregister_chrdev_region(our_dev, 10);
> ++
> ++	kmem_cache_destroy(request_cache);
> ++	kmem_cache_destroy(remap_cache);
> ++
> ++	r = dm_unregister_target(&userspace_target);
> ++	if (r < 0)
> ++		DMERR(DMU_PREFIX "unregister failed %d", r);
> ++}
> ++
> ++module_init(dm_userspace_init);
> ++module_exit(dm_userspace_exit);
> ++
> ++module_param(enable_watchdog, int, S_IRUGO);
> ++
> ++MODULE_DESCRIPTION(DM_NAME " userspace target");
> ++MODULE_AUTHOR("Dan Smith");
> ++MODULE_LICENSE("GPL");
> +diff -Naur ./drivers/md/Kconfig ../linux-2.6.16.13-dmu/drivers/md/Kconfig
> +--- ./drivers/md/Kconfig	2006-05-02 14:38:44.000000000 -0700
> ++++ ../linux-2.6.16.13-dmu/drivers/md/Kconfig	2006-06-09 10:20:35.701604992 -0700
> +@@ -210,6 +210,12 @@
> +        ---help---
> +          Allow volume managers to take writeable snapshots of a device.
> + 
> ++config DM_USERSPACE
> ++       tristate "Userspace target (EXPERIMENTAL)"
> ++       depends on BLK_DEV_DM && EXPERIMENTAL
> ++       ---help---
> ++	 A target that provides a userspace interface to device-mapper
> ++
> + config DM_MIRROR
> +        tristate "Mirror target (EXPERIMENTAL)"
> +        depends on BLK_DEV_DM && EXPERIMENTAL
> +diff -Naur ./drivers/md/Makefile ../linux-2.6.16.13-dmu/drivers/md/Makefile
> +--- ./drivers/md/Makefile	2006-05-02 14:38:44.000000000 -0700
> ++++ ../linux-2.6.16.13-dmu/drivers/md/Makefile	2006-06-09 10:20:35.701604992 -0700
> +@@ -37,6 +37,7 @@
> + obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
> + obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
> + obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
> ++obj-$(CONFIG_DM_USERSPACE)      += dm-userspace.o
> + 
> + quiet_cmd_unroll = UNROLL  $@
> +       cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
> +diff -Naur ./include/linux/dm-userspace.h ../linux-2.6.16.13-dmu/include/linux/dm-userspace.h
> +--- ./include/linux/dm-userspace.h	1969-12-31 16:00:00.000000000 -0800
> ++++ ../linux-2.6.16.13-dmu/include/linux/dm-userspace.h	2006-06-09 12:00:32.630933160 -0700
> +@@ -0,0 +1,89 @@
> ++/*
> ++ * Copyright (C) International Business Machines Corp., 2006
> ++ * Author: Dan Smith <danms@us.ibm.com>
> ++ *
> ++ * This program is free software; you can redistribute it and/or modify
> ++ * it under the terms of the GNU General Public License as published by
> ++ * the Free Software Foundation; under version 2 of the License.
> ++ *
> ++ * This program is distributed in the hope that it will be useful,
> ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
> ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> ++ * GNU General Public License for more details.
> ++ *
> ++ * You should have received a copy of the GNU General Public License
> ++ * along with this program; if not, write to the Free Software
> ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> ++ *
> ++ */
> ++
> ++#ifndef __DM_USERSPACE_H
> ++#define __DM_USERSPACE_H
> ++
> ++#ifdef __KERNEL__
> ++# include <linux/types.h>
> ++#else
> ++# include <stdint.h>
> ++#endif
> ++
> ++/*
> ++ * Message Types
> ++ */
> ++#define DM_USERSPACE_MAP_BLOCK        1
> ++#define DM_USERSPACE_MAP_FAILED       2
> ++#define DM_USERSPACE_MAP_INVALIDATE   3 
> ++#define DM_USERSPACE_COPY_FINISHED  100
> ++#define DM_USERSPACE_INVAL_COMPLETE 101
> ++#define DM_USERSPACE_INVAL_FAILED   102
> ++
> ++/*
> ++ * Flags and associated macros
> ++ */
> ++#define DMU_FLAG_VALID       1
> ++#define DMU_FLAG_RD          2
> ++#define DMU_FLAG_WR          4
> ++#define DMU_FLAG_COPY_FIRST  8
> ++#define DMU_FLAG_TEMPORARY  16
> ++
> ++static int dmu_get_flag(uint32_t *flags, uint32_t flag)
> ++{
> ++	return (*flags & flag) != 0;
> ++}
> ++
> ++static void dmu_set_flag(uint32_t *flags, uint32_t flag)
> ++{
> ++	*flags |= flag;
> ++}
> ++
> ++static void dmu_clr_flag(uint32_t *flags, uint32_t flag)
> ++{
> ++	*flags &= (~flag);
> ++}
> ++
> ++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag)
> ++{
> ++	*flags = (*flags & ~flag) | (src & flag);
> ++}
> ++
> ++/*
> ++ * This is the message that is passed back and forth between the
> ++ * kernel and the user application
> ++ */
> ++struct dmu_write {
> ++	uint32_t id;
> ++	uint32_t type;              /* Type of request */
> ++	uint32_t flags;        /* Flags */
> ++
> ++	uint64_t org_block;    /* Block that was accessed */
> ++	uint64_t new_block;    /* The new block it should go to */
> ++  	int64_t offset;        /* Sector offset of the block, if needed  */
> ++
> ++	uint32_t src_maj;      /* The source device for copying */
> ++	uint32_t src_min;
> ++
> ++	uint32_t dest_maj;     /* Destination device for copying, and */
> ++	uint32_t dest_min;     /* for the block access                */
> ++
> ++};
> ++
> ++#endif
> ------------------------------------------------------------------------
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-09 21:48 ` Anthony Liguori
@ 2006-06-09 23:12   ` Dan Smith
  2006-06-10  8:55     ` Keir Fraser
  0 siblings, 1 reply; 17+ messages in thread
From: Dan Smith @ 2006-06-09 23:12 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Xen Developers


[-- Attachment #1.1: Type: text/plain, Size: 781 bytes --]

AL> Shouldn't this go to LKML?

Perhaps so.  I haven't received much feedback from the device-mapper
maintainer and community, although I plan to push it out for
acceptance on Monday.  However, it may take a while before it makes it
into mainline, and even when it does, the linux version in the xen
tree will have to be updated before it will be easily available to xen
users.  My thought is that since it's a clean patch applied at build
time, that it would be ok to get it in the tree for testing and
experimentation in the meantime.  If/when it goes into mainline and is
merged into xen, then the patch can just come out.

What do the maintainers think about this plan?

-- 
Dan Smith
IBM Linux Technology Center
Open Hypervisor Team
email: danms@us.ibm.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-09 23:12   ` Dan Smith
@ 2006-06-10  8:55     ` Keir Fraser
  0 siblings, 0 replies; 17+ messages in thread
From: Keir Fraser @ 2006-06-10  8:55 UTC (permalink / raw)
  To: Dan Smith; +Cc: Xen Developers


On 10 Jun 2006, at 00:12, Dan Smith wrote:

> Perhaps so.  I haven't received much feedback from the device-mapper
> maintainer and community, although I plan to push it out for
> acceptance on Monday.  However, it may take a while before it makes it
> into mainline, and even when it does, the linux version in the xen
> tree will have to be updated before it will be easily available to xen
> users.  My thought is that since it's a clean patch applied at build
> time, that it would be ok to get it in the tree for testing and
> experimentation in the meantime.  If/when it goes into mainline and is
> merged into xen, then the patch can just come out.
>
> What do the maintainers think about this plan?

If there are potential users who will shout for it to go in then it 
seems reasonable.

  -- Keir

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-09 21:08 Dan Smith
  2006-06-09 21:48 ` Anthony Liguori
@ 2006-06-10 20:40 ` Bastian Blank
  2006-06-12 14:52   ` Dan Smith
  1 sibling, 1 reply; 17+ messages in thread
From: Bastian Blank @ 2006-06-10 20:40 UTC (permalink / raw)
  To: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 1136 bytes --]

On Fri, Jun 09, 2006 at 02:08:15PM -0700, Dan Smith wrote:
> ++/*
> ++ * This is the message that is passed back and forth between the
> ++ * kernel and the user application
> ++ */
> ++struct dmu_write {
> ++	uint32_t id;
> ++	uint32_t type;              /* Type of request */
> ++	uint32_t flags;        /* Flags */
> ++
> ++	uint64_t org_block;    /* Block that was accessed */
> ++	uint64_t new_block;    /* The new block it should go to */
> ++  	int64_t offset;        /* Sector offset of the block, if needed  */
> ++
> ++	uint32_t src_maj;      /* The source device for copying */
> ++	uint32_t src_min;
> ++
> ++	uint32_t dest_maj;     /* Destination device for copying, and */
> ++	uint32_t dest_min;     /* for the block access                */
> ++
> ++};
> ++
> ++#endif

Any reason why
1. this struct differs from the old patches published on dm-devel and
   lkml?
2. Why do you use a construct which gives different allignment on i386
   and x86_64? (i386 alligns 64bit ints on 32bit, x86_64 on 64bit

Bastian

-- 
A woman should have compassion.
		-- Kirk, "Catspaw", stardate 3018.2

[-- Attachment #1.2: Digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-10 20:40 ` Bastian Blank
@ 2006-06-12 14:52   ` Dan Smith
  2006-06-13  8:57     ` Bastian Blank
  0 siblings, 1 reply; 17+ messages in thread
From: Dan Smith @ 2006-06-12 14:52 UTC (permalink / raw)
  To: Bastian Blank; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 1090 bytes --]

BB> 1. this struct differs from the old patches published on dm-devel
BB> and lkml?

I added an 'id' field, which contains a unique integer for each
request, which helps me to match up the response to the correct item
in the kernel queue.  Previously, I was matching based on the original
block, which has the potential to be wrong if there are two requests
on the queue for the same block (i.e. one for a read mapping and one
for a write mapping).  I thought the id would be easier.

BB> 2. Why do you use a construct which gives different allignment on
BB> i386 and x86_64? (i386 alligns 64bit ints on 32bit, x86_64 on
BB> 64bit

I assume you mean because there are an odd number of 32-bit fields, is
that correct?  The answer to the question is: "because I haven't given
much thought to x86_64 issues yet" :).  This week, I plan to test on
x86_64, so I can submit another patch with resolutions to any other
x86_64 issues that may be present, if it's likely to be accepted.

-- 
Dan Smith
IBM Linux Technology Center
Open Hypervisor Team
email: danms@us.ibm.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-12 14:52   ` Dan Smith
@ 2006-06-13  8:57     ` Bastian Blank
  0 siblings, 0 replies; 17+ messages in thread
From: Bastian Blank @ 2006-06-13  8:57 UTC (permalink / raw)
  To: Dan Smith; +Cc: xen-devel

On Mon, Jun 12, 2006 at 07:52:06AM -0700, Dan Smith wrote:
> BB> 2. Why do you use a construct which gives different allignment on
> BB> i386 and x86_64? (i386 alligns 64bit ints on 32bit, x86_64 on
> BB> 64bit
> 
> I assume you mean because there are an odd number of 32-bit fields, is
> that correct?  The answer to the question is: "because I haven't given
> much thought to x86_64 issues yet" :).  This week, I plan to test on
> x86_64, so I can submit another patch with resolutions to any other
> x86_64 issues that may be present, if it's likely to be accepted.

The problem is not between x86_64 kernel and userspace, but if you mix
x86_64 kernel and i386 userland, which is allowed.

Bastian

-- 
	"That unit is a woman."
	"A mass of conflicting impulses."
		-- Spock and Nomad, "The Changeling", stardate 3541.9

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] Add dm-userspace to the Xen kernel
@ 2006-06-15 19:47 Dan Smith
  2006-06-16 18:31 ` Niraj Tolia
  0 siblings, 1 reply; 17+ messages in thread
From: Dan Smith @ 2006-06-15 19:47 UTC (permalink / raw)
  To: Xen Developers


[-- Attachment #1.1.1: Type: text/plain, Size: 562 bytes --]

This patch adds dm-userspace to the -xen Linux kernel.  I'd like to
get it into the tree so that people that want to can play with it.
Anyone wishing to to do so can download the tools separately, but they
need the kernel module to be able to use it.

I've updated the code for and tested it on x86_64.

The tools are available here:

  http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz
  http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz

-- 
Dan Smith
IBM Linux Technology Center
Open Hypervisor Team
email: danms@us.ibm.com


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.1.2: dm-userspace.patch --]
[-- Type: text/x-patch, Size: 45126 bytes --]

# HG changeset patch
# User Dan Smith <danms@us.ibm.com>
# Node ID f9f920c9285e246c803ae91c0a866b15e52f2c0d
# Parent  161473836da3ccf922f44e910f8b5c01c709d1e6
This adds dm-userspace to the xen linux kernel via another entry in the
patches/ directory.  The dm-userspace module is completely self-contained
and will not affect anything unless it is loaded.  People wishing to
experiment with dm-userspace can download the tools packages separately, but
they need this module to use them.

Signed-off-by: Dan Smith <danms@us.ibm.com>

diff -r 161473836da3 -r f9f920c9285e buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32	Wed Jun 14 22:15:13 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_32	Thu Jun 15 12:33:22 2006 -0700
@@ -1187,6 +1187,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_MULTIPATH_EMC=m
+CONFIG_DM_USERSPACE=m
 
 #
 # Fusion MPT device support
diff -r 161473836da3 -r f9f920c9285e buildconfigs/linux-defconfig_xen_x86_64
--- a/buildconfigs/linux-defconfig_xen_x86_64	Wed Jun 14 22:15:13 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_64	Thu Jun 15 12:33:22 2006 -0700
@@ -1130,6 +1130,7 @@ CONFIG_DM_ZERO=m
 CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_MULTIPATH_EMC=m
+CONFIG_DM_USERSPACE=m
 
 #
 # Fusion MPT device support
diff -r 161473836da3 -r f9f920c9285e patches/linux-2.6.16.13/dm-userspace.patch
--- /dev/null	Thu Jan  1 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/dm-userspace.patch	Thu Jun 15 12:33:22 2006 -0700
@@ -0,0 +1,1735 @@
+diff -Naur ./drivers/md/dm-userspace.c ../linux-2.6.16.13-dmu/drivers/md/dm-userspace.c
+--- ./drivers/md/dm-userspace.c	1969-12-31 16:00:00.000000000 -0800
++++ ../linux-2.6.16.13-dmu/drivers/md/dm-userspace.c	2006-06-15 09:17:17.000000000 -0700
+@@ -0,0 +1,1612 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/bio.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/types.h>
++#include <linux/poll.h>
++
++#include <linux/dm-userspace.h>
++
++#include "dm.h"
++#include "dm-bio-list.h"
++#include "kcopyd.h"
++
++#define DMU_DEBUG      0
++
++#define DMU_COPY_PAGES 256
++#define DMU_KEY_LEN    256
++
++#define DMU_PREFIX     "dm-userspace: "
++#define DMU_SET_ERROR(ti, msg) ti->error = DMU_PREFIX msg
++
++#if DMU_DEBUG
++#define DPRINTK( s, arg... ) printk(DMU_PREFIX s, ##arg)
++#else
++#define DPRINTK( s, arg... )
++#endif
++
++kmem_cache_t *request_cache;
++kmem_cache_t *remap_cache;
++
++static int enable_watchdog = 0;
++static struct work_struct wd;
++
++static spinlock_t devices_lock;
++static LIST_HEAD(devices);
++
++/* Device number for the control device */
++static dev_t our_dev;
++
++struct target_device {
++	struct list_head list;
++	struct block_device *bdev;
++	struct kref users;
++};
++
++struct hash_table {
++	struct list_head *table;
++	uint64_t size;
++	uint32_t mask;
++	uint64_t count;
++};
++
++/* A dm-userspace device, which consists of multiple targets sharing a
++ * common key
++ */
++struct dmu_device {
++	spinlock_t lock;
++	struct list_head list;
++	struct list_head requests;
++	struct list_head target_devs;
++
++	struct hash_table remaps;
++
++	struct cdev cdev;
++	dev_t ctl_dev;
++
++	char key[DMU_KEY_LEN];
++	struct kref users;
++
++	wait_queue_head_t wqueue;
++
++	uint64_t block_size;
++	uint64_t block_mask;
++	unsigned int block_shift;
++
++	struct kcopyd_client *kcopyd_client;
++
++	/*
++	 * Count of the number of outstanding requests that have been
++	 * made against this device, but have not yet been flushed
++	 */
++	atomic_t remap_ct;
++
++	uint32_t id_counter;
++};
++
++struct userspace_request {
++	spinlock_t lock;
++	struct list_head list;
++	struct dmu_device *dev;
++	int type;
++	int sent;
++	uint32_t flags;
++	uint32_t id;
++	union {
++		struct bio_list bios;
++		uint64_t block;
++	} u;
++	atomic_t refcnt;
++};
++
++struct dmu_map {
++	spinlock_t lock;
++	uint64_t org_block; /* Original block */
++	uint64_t new_block; /* Destination block */
++	int64_t offset;
++	uint32_t flags;
++	struct target_device *src;
++	struct target_device *dest;
++	struct bio_list bios;
++	struct list_head list;
++	struct dmu_device *dev;
++
++	struct dmu_map *next; /* Next remap that is dependent on this one */
++};
++
++/* Forward delcarations */
++static struct file_operations ctl_fops;
++static void copy_block(struct dmu_map *remap);
++static void remap_flusher(struct dmu_map *remap);
++
++/*
++ * Return the block number for @sector
++ */
++static inline u64 dmu_block(struct dmu_device *dev,
++				 sector_t sector)
++{
++	return sector >> dev->block_shift;
++}
++
++/*
++ * Return the sector offset in a block for @sector
++ */
++static inline u64 dmu_sector_offset(struct dmu_device *dev,
++				 sector_t sector)
++{
++	return sector & dev->block_mask;
++}
++
++/*
++ * Return the starting sector for @block
++ */
++static inline u64 dmu_sector(struct dmu_device *dev,
++				  uint64_t block)
++{
++	return block << dev->block_shift;
++}
++
++static void copy_or_flush(struct dmu_map *remap)
++{
++	int copy;
++
++	spin_lock(&remap->lock);
++	copy = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST);
++	spin_unlock(&remap->lock);
++
++	if (copy)
++		copy_block(remap);
++	else
++		remap_flusher(remap);
++}
++
++static void error_bios(struct bio_list *bios)
++{
++	struct bio *bio;
++	int count = 0;
++
++	while ((bio = bio_list_pop(bios)) != NULL) {
++		bio_io_error(bio, bio->bi_size);
++		count++;
++	}
++
++	if (count)
++		printk(KERN_ERR DMU_PREFIX
++		       "*** Failed %i requests\n", count);
++}
++
++static void init_remap(struct dmu_device *dev, struct dmu_map *remap)
++{
++	spin_lock_init(&remap->lock);
++	remap->org_block = remap->new_block = 0;
++	remap->offset = 0;
++	remap->flags = 0;
++	remap->src = remap->dest = NULL;
++	bio_list_init(&remap->bios);
++	INIT_LIST_HEAD(&remap->list);
++	remap->dev = dev;
++	remap->next = NULL;
++}
++
++static void init_request(struct dmu_device *dev,
++			 int type,
++			 struct userspace_request *req)
++{
++	spin_lock_init(&req->lock);
++	INIT_LIST_HEAD(&req->list);
++	req->dev = dev;
++	req->type = type;
++	req->sent = 0;
++	req->flags = 0;
++	if (type == DM_USERSPACE_COPY_FINISHED) {
++		req->u.block = 0;
++		req->id = 0;
++	} else {
++		bio_list_init(&req->u.bios);
++		spin_lock(&dev->lock);
++		dev->id_counter++;
++		if (dev->id_counter == 0)
++			dev->id_counter = 1;
++		req->id = dev->id_counter;
++		spin_unlock(&dev->lock);
++	}
++	atomic_set(&req->refcnt, 0);
++}
++
++static void destroy_remap(struct dmu_map *remap)
++{
++	error_bios(&remap->bios);
++}
++
++/*
++ * For an even block distribution, this is not too bad, but it could
++ * probably be better
++ */
++static uint32_t ht_hash(struct hash_table *ht, uint64_t block)
++{
++	return (uint32_t)block & ht->mask;
++}
++
++static int ht_init(struct hash_table *ht, unsigned long size)
++{
++	uint64_t i;
++	unsigned long pages;
++	unsigned int order = ffs((size * sizeof(struct list_head *)) /
++				 PAGE_SIZE);
++
++	DPRINTK("Going to allocate 2^%u pages for %lu-entry table\n",
++		order, size);
++
++	pages = __get_free_pages(GFP_ATOMIC, order);
++	if (!pages) {
++		DPRINTK("Failed to allocate hash table (%lu)\n", size);
++		return 0;
++	}
++
++	ht->table = (void *)pages;
++	ht->size = size;
++	ht->count = 0;
++	ht->mask = size - 1;
++
++	for (i = 0; i < size; i++)
++		INIT_LIST_HEAD(&ht->table[i]);
++
++	return 1;
++}
++
++static void ht_insert_bucket(struct dmu_map *map, struct list_head *list)
++{
++	list_add_tail(&map->list, list);
++}
++
++/*
++ * I'm sure this is quite dumb, but it works for now
++ */
++static int ht_should_grow(struct hash_table *ht)
++{
++	return ht->count > (2 * (ht->size / 4));
++}
++
++static void ht_grow_table(struct hash_table *ht);
++static void ht_insert_map(struct hash_table *ht, struct dmu_map *map)
++{
++	uint32_t addr;
++
++	addr = ht_hash(ht, map->org_block) & ht->mask;
++
++	BUG_ON(addr >= ht->size);
++
++	ht_insert_bucket(map, &ht->table[addr]);
++	ht->count++;
++
++	if (ht_should_grow(ht))
++		ht_grow_table(ht);
++}
++
++static void ht_delete_map(struct hash_table *ht, struct dmu_map *map)
++{
++	list_del(&map->list);
++	BUG_ON(ht->count == 0);
++	ht->count--;
++}
++
++static struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block)
++{
++	uint32_t addr;
++	struct dmu_map *m;
++
++	addr = ht_hash(ht, block) & ht->mask;
++
++	BUG_ON(addr >= ht->size);
++
++	list_for_each_entry(m, &ht->table[addr], list) {
++		if (m->org_block == block)
++			return m;
++	}
++
++	return NULL;
++}
++
++static struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block)
++{
++	struct dmu_map *remap;
++
++	spin_lock(&dev->lock);
++
++	remap = ht_find_map(&dev->remaps, block);
++
++	spin_unlock(&dev->lock);
++
++	return remap;
++}
++
++static void ht_grow_table(struct hash_table *ht)
++{
++	struct hash_table old_table;
++	uint64_t i;
++
++	old_table = *ht;
++
++	if (!ht_init(ht, old_table.size * 2)) {
++		DPRINTK("Can't grow table to %llu\n",
++			old_table.size * 2);
++		return;
++	}
++
++	DPRINTK("Growing from %llu to %llu\n",
++		old_table.size, ht->size);
++
++	for (i = 0; i < old_table.size; i++ ) {
++		struct dmu_map *m, *n;
++		list_for_each_entry_safe(m, n, &old_table.table[i],
++					 list) {
++			list_del_init(&m->list);
++			ht_insert_map(ht, m);
++		}
++	}
++
++	free_pages((unsigned long)old_table.table,
++		   ffs((old_table.size * sizeof(struct list_head *))
++		       / PAGE_SIZE));
++}
++
++static uint64_t ht_destroy_table(struct hash_table *ht)
++{
++	uint64_t i, count = 0;
++	struct dmu_map *m, *n;
++
++	for (i = 0; i < ht->size; i++) {
++		list_for_each_entry_safe(m, n, &ht->table[i], list) {
++			ht_delete_map(ht, m);
++			kmem_cache_free(remap_cache, m);
++			count++;
++		}
++	}
++
++	return count;
++}
++
++static struct target_device *get_target(struct dmu_device *dev,
++					dev_t devno)
++{
++
++	struct target_device *target;
++	struct block_device *bdev;
++
++	spin_lock(&dev->lock);
++	list_for_each_entry(target, &dev->target_devs, list) {
++		if (target->bdev->bd_dev == devno) {
++			spin_unlock(&dev->lock);
++			goto out;
++		}
++	}
++	spin_unlock(&dev->lock);
++
++	bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE);
++	if (IS_ERR(bdev)) {
++		printk(KERN_ERR DMU_PREFIX "Unable to lookup device %x\n",
++		       devno);
++		return NULL;
++	}
++
++	target = kmalloc(sizeof(*target), GFP_KERNEL);
++	if (!target) {
++		printk(KERN_ERR DMU_PREFIX
++		       "Unable to alloc new target device\n");
++		return NULL;
++	}
++
++	target->bdev = bdev;
++	INIT_LIST_HEAD(&target->list);
++
++	spin_lock(&dev->lock);
++	list_add_tail(&target->list, &dev->target_devs);
++	spin_unlock(&dev->lock);
++
++ out:
++	return target;
++}
++
++/* Caller must hold dev->lock */
++static void put_target(struct dmu_device *dev,
++		       struct target_device *target)
++{
++	list_del(&target->list);
++
++	bd_release(target->bdev);
++	blkdev_put(target->bdev);
++
++	kfree(target);
++}
++
++/*
++ * Add a request to the device's request queue
++ */
++static void add_request(struct dmu_device *dev,
++			       struct userspace_request *req)
++{
++	spin_lock(&dev->lock);
++	list_add_tail(&req->list, &dev->requests);
++	spin_unlock(&dev->lock);
++
++	wake_up(&dev->wqueue);
++}
++
++/*
++ *
++ */
++static int have_pending_requests(struct dmu_device *dev)
++{
++	struct userspace_request *req;
++	int ret = 0;
++
++	spin_lock(&dev->lock);
++
++	list_for_each_entry(req, &dev->requests, list) {
++		if (!req->sent) {
++			ret = 1;
++			break;
++		}
++	}
++
++	spin_unlock(&dev->lock);
++
++	return ret;
++}
++
++/*
++ * This periodically dumps out some debug information.  It's really
++ * only useful while developing.
++ */
++static void watchdog(void *data)
++{
++	unsigned int v_remaps, i_remaps, reqs, s_reqs, devs = 0;
++	struct dmu_device *dev;
++	struct dmu_map *map;
++	struct userspace_request *req;
++	uint64_t i;
++
++	spin_lock(&devices_lock);
++
++	list_for_each_entry(dev, &devices, list) {
++		spin_lock(&dev->lock);
++
++		v_remaps = i_remaps = reqs = s_reqs = 0;
++
++		for (i = 0; i < dev->remaps.size; i++) {
++			list_for_each_entry(map, &dev->remaps.table[i], list)
++				if (dmu_get_flag(&map->flags, DMU_FLAG_VALID))
++					v_remaps++;
++				else
++					i_remaps++;
++		}
++
++		list_for_each_entry(req, &dev->requests, list)
++			if (req->sent)
++				s_reqs++;
++			else
++				reqs++;
++
++		printk("Device %x:%x: "
++		       "  reqs: %u/%u "
++		       "  inv maps: %u "
++		       "  val maps: %u (%i)\n",
++		       MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev),
++		       reqs, s_reqs, i_remaps, v_remaps,
++		       atomic_read(&dev->remap_ct));
++		devs++;
++
++		spin_unlock(&dev->lock);
++	}
++
++	spin_unlock(&devices_lock);
++
++	schedule_delayed_work(&wd, HZ);
++}
++
++static void __bio_remap(struct bio *bio,
++		      struct dmu_map *remap)
++{
++	BUG_ON(remap->dest == NULL);
++
++	bio->bi_sector = dmu_sector(remap->dev, remap->new_block) +
++		dmu_sector_offset(remap->dev, bio->bi_sector) +
++		remap->offset;
++
++	bio->bi_bdev = remap->dest->bdev;
++}
++
++/*
++   Pop, remap, and flush a bio.  Set VALID flag if no bios
++   available
++*/
++static struct bio *pop_and_remap(struct dmu_map *remap)
++{
++	struct bio *bio = NULL;
++
++	spin_lock(&remap->lock);
++
++	bio = bio_list_pop(&remap->bios);
++	if (bio)
++		__bio_remap(bio, remap);
++	else {
++		/* If there are no more bios, we must set the VALID
++		   flag before we release the lock */
++		dmu_set_flag(&remap->flags, DMU_FLAG_VALID);
++	}
++
++	spin_unlock(&remap->lock);
++
++	return bio;
++}
++
++static void get_remap_attrs(struct dmu_map *remap,
++			    int *copy_first,
++			    int *temporary,
++			    struct dmu_map **next)
++{
++	spin_lock(&remap->lock);
++
++	*copy_first = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST);
++	*temporary = dmu_get_flag(&remap->flags, DMU_FLAG_TEMPORARY);
++	*next = remap->next;
++	remap->next = NULL;
++
++	spin_unlock(&remap->lock);
++}
++
++static void remap_flusher(struct dmu_map *remap)
++{
++	struct bio *bio;
++	struct userspace_request *req;
++	int copy_first = 0, temporary = 0;
++	struct dmu_map *next;
++
++	DPRINTK("Flushing bios for block %llu:%llu\n",
++	       remap->org_block, remap->new_block);
++
++	while (1) {
++
++		bio = pop_and_remap(remap);
++
++		if (bio)
++			generic_make_request(bio);
++		else
++			break;
++
++		atomic_dec(&remap->dev->remap_ct);
++
++		DPRINTK("Flushed %llu:%llu (%u bytes)\n",
++			dmu_block(remap->dev, bio->bi_sector),
++			dmu_sector_offset(remap->dev, bio->bi_sector),
++			bio->bi_size);
++	}
++
++	get_remap_attrs(remap, &copy_first, &temporary, &next);
++
++	if (next)
++		copy_or_flush(next);
++
++	/* Notify userspace */
++	if (copy_first) {
++		req = kmem_cache_alloc(request_cache, GFP_KERNEL);
++		if (!req) {
++			printk(KERN_ERR DMU_PREFIX
++			       "Failed to allocate copy response\n");
++			return;
++		}
++
++		init_request(remap->dev, DM_USERSPACE_COPY_FINISHED, req);
++
++		req->u.block = remap->org_block;
++
++		add_request(remap->dev, req);
++	}
++
++	if (temporary) {
++		destroy_remap(remap);
++		kmem_cache_free(remap_cache, remap);
++	}
++}
++
++static void destroy_dmu_device(struct kref *ref)
++{
++	struct dmu_device *dev;
++	struct list_head *cursor, *next;
++	uint64_t remaps;
++
++	dev = container_of(ref, struct dmu_device, users);
++
++	DPRINTK("Destroying device: %s\n", dev->key);
++
++	spin_lock(&devices_lock);
++	list_del(&dev->list);
++	spin_unlock(&devices_lock);
++
++	list_for_each_safe(cursor, next, &dev->target_devs) {
++		struct target_device *target;
++
++		target = list_entry(cursor,
++				    struct target_device,
++				    list);
++
++		put_target(dev, target);
++	}
++
++	remaps = ht_destroy_table(&dev->remaps);
++	DPRINTK("Destroyed %llu/%llu remaps\n", remaps, dev->remaps.count);
++
++	list_for_each_safe(cursor, next, &dev->requests) {
++		struct userspace_request *req;
++
++		req = list_entry(cursor,
++				 struct userspace_request,
++				 list);
++
++		list_del(&req->list);
++
++		error_bios(&req->u.bios);
++
++		kmem_cache_free(request_cache, req);
++	}
++
++	kcopyd_client_destroy(dev->kcopyd_client);
++
++	cdev_del(&dev->cdev);
++	kfree(dev);
++}
++
++static inline void get_dev(struct dmu_device *dev)
++{
++	DPRINTK("get on %s\n", dev->key);
++	kref_get(&dev->users);
++}
++
++static inline void put_dev(struct dmu_device *dev)
++{
++	DPRINTK("put on %s\n", dev->key);
++	kref_put(&dev->users, destroy_dmu_device);
++}
++
++static int get_free_minor(void)
++{
++	struct dmu_device *dev;
++	int minor = 0;
++
++	spin_lock(&devices_lock);
++
++	list_for_each_entry(dev, &devices, list) {
++		if (MINOR(dev->ctl_dev) != minor)
++			break;
++		minor++;
++	}
++
++	spin_unlock(&devices_lock);
++
++	return minor;
++}
++
++static int init_dmu_device(struct dmu_device *dev, u32 block_size)
++{
++	int ret;
++
++	cdev_init(&dev->cdev, &ctl_fops);
++	dev->cdev.owner = THIS_MODULE;
++	dev->cdev.ops = &ctl_fops;
++
++	init_waitqueue_head(&dev->wqueue);
++	INIT_LIST_HEAD(&dev->list);
++	INIT_LIST_HEAD(&dev->requests);
++	INIT_LIST_HEAD(&dev->target_devs);
++	kref_init(&dev->users);
++	spin_lock_init(&dev->lock);
++
++	atomic_set(&dev->remap_ct, 0);
++	dev->id_counter = 1; /* reserve 0 for unsolicited maps */
++
++	if (!ht_init(&dev->remaps, 2048)) {
++		printk(KERN_ERR DMU_PREFIX
++		       "Unable to allocate hash table\n");
++		return 0;
++	}
++
++	dev->block_size  = block_size;
++	dev->block_mask  = block_size - 1;
++	dev->block_shift = ffs(block_size) - 1;
++
++	ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopyd_client);
++	if (ret) {
++		printk(DMU_PREFIX "Failed to initialize kcopyd client\n");
++		return 0;
++	}
++
++	return 1;
++}
++
++static struct dmu_device *new_dmu_device(char *key,
++					 struct dm_target *ti,
++					 u32 block_size)
++{
++	struct dmu_device *dev, *ptr;
++	int                ret;
++
++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++	if (dev == NULL) {
++		printk(DMU_PREFIX "Failed to allocate new userspace device\n");
++		return NULL;
++	}
++
++	if (!init_dmu_device(dev, block_size))
++		goto bad1;
++
++	snprintf(dev->key, DMU_KEY_LEN, "%s", key);
++
++	DPRINTK("New device with size %llu mask 0x%llX shift %u\n",
++		dev->block_size, dev->block_mask, dev->block_shift);
++
++	dev->ctl_dev = MKDEV(MAJOR(our_dev), get_free_minor());
++
++	ret = cdev_add(&dev->cdev, dev->ctl_dev, 1);
++	if (ret < 0) {
++		printk(DMU_PREFIX "Failed to register control device %d:%d\n",
++		       MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev));
++		goto bad2;
++	}
++
++	DPRINTK("Registered new control interface: %i:%i\n",
++		MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev));
++
++	spin_lock(&devices_lock);
++	if (list_empty(&devices))
++		list_add(&dev->list, &devices);
++	else
++		list_for_each_entry(ptr, &devices, list)
++			if (MINOR(ptr->ctl_dev) < MINOR(dev->ctl_dev))
++				list_add(&dev->list, &ptr->list);
++	spin_unlock(&devices_lock);
++
++	return dev;
++
++ bad2:
++	cdev_del(&dev->cdev);
++ bad1:
++	kfree(dev);
++	printk(KERN_ERR DMU_PREFIX "Failed to create device\n");
++	return NULL;
++}
++
++static struct dmu_device *find_dmu_device(const char *key)
++{
++	struct dmu_device *dev;
++	struct dmu_device *match = NULL;
++
++	spin_lock(&devices_lock);
++
++	list_for_each_entry(dev, &devices, list) {
++		spin_lock(&dev->lock);
++		if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) {
++			match = dev;
++			spin_unlock(&dev->lock);
++			break;
++		}
++		spin_unlock(&dev->lock);
++	}
++
++	spin_unlock(&devices_lock);
++
++	return match;
++}
++
++static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv)
++{
++	uint64_t block_size;
++	struct dmu_device *dev;
++	char *device_key;
++	char *block_size_param;
++
++	if (argc < 2) {
++		DMU_SET_ERROR(ti, "Invalid argument count");
++		return -EINVAL;
++	}
++
++	device_key = argv[0];
++	block_size_param = argv[1];
++
++	block_size = simple_strtoul(block_size_param, NULL, 10) / 512;
++
++	dev = find_dmu_device(device_key);
++	if (dev == NULL) {
++		dev = new_dmu_device(device_key,
++				     ti,
++				     block_size);
++		if (dev == NULL) {
++			DMU_SET_ERROR(ti, "Failed to create device");
++			goto bad;
++		}
++	} else {
++		get_dev(dev);
++	}
++
++	spin_lock(&dev->lock);
++	if (dev->block_size != block_size) {
++		DMU_SET_ERROR(ti, "Invalid block size");
++		goto bad;
++	}
++	spin_unlock(&dev->lock);
++
++	ti->private  = dev;
++	ti->split_io = block_size;
++
++	DPRINTK("  block-size:  %llu sectors\n", dev->block_size);
++	DPRINTK("  block-shift: %u\n", dev->block_shift);
++	DPRINTK("  block-mask:  %llx\n", dev->block_mask);
++
++	return 0;
++
++ bad:
++	if (dev) {
++		spin_unlock(&dev->lock);
++		put_dev(dev);
++	}
++
++	return -EINVAL;
++}
++
++static void dmu_dtr(struct dm_target *ti)
++{
++	struct dmu_device *dev = (struct dmu_device *) ti->private;
++
++	put_dev(dev);
++
++	DPRINTK("destroyed %d:%d\n", (int)ti->begin, (int)ti->len);
++}
++
++/* Search @dev for an outstanding request for remapping @block */
++static struct userspace_request *find_existing_req(struct dmu_device *dev,
++						   uint64_t block)
++{
++	struct userspace_request *req;
++	struct userspace_request *maybe = NULL;
++
++	spin_lock(&dev->lock);
++
++	list_for_each_entry(req, &dev->requests, list) {
++		if ((req->type == DM_USERSPACE_MAP_BLOCK) &&
++		    (dmu_block(dev, req->u.bios.head->bi_sector) == block)) {
++			if (maybe) {
++				atomic_dec(&maybe->refcnt);
++			}
++			maybe = req;
++			atomic_inc(&maybe->refcnt);
++		}
++	}
++
++	spin_unlock(&dev->lock);
++
++	return maybe;
++}
++
++static int make_new_request(struct dmu_device *dev, struct bio *bio)
++{
++	struct userspace_request *req;
++
++	req = kmem_cache_alloc(request_cache, GFP_KERNEL);
++	if (req == NULL)
++		goto bad;
++
++	init_request(dev, DM_USERSPACE_MAP_BLOCK, req);
++
++	dmu_set_flag(&req->flags, DMU_FLAG_RD);
++	if (bio_rw(bio))
++		dmu_set_flag(&req->flags, DMU_FLAG_WR);
++	else
++		dmu_clr_flag(&req->flags, DMU_FLAG_WR);
++	bio_list_add(&req->u.bios, bio);
++
++	add_request(dev, req);
++
++	DPRINTK("Queued %s request for sector " SECTOR_FORMAT "\n",
++		dmu_get_flag(&req->flags, DMU_FLAG_WR) ? "write" : "read",
++		bio->bi_sector);
++
++	return 0;
++
++ bad:
++	printk(KERN_ERR DMU_PREFIX "Failed to queue bio!\n");
++	return -1;
++}
++
++static int dmu_map_remap_case(struct dmu_device *dev,
++			      struct dmu_map *remap,
++			      struct bio *bio)
++{
++	int ret = 0;
++
++	spin_lock(&remap->lock);
++
++	if (dmu_get_flag(&remap->flags, DMU_FLAG_WR) != bio_rw(bio)) {
++		ret = -1;
++	} else {
++		if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) {
++			__bio_remap(bio, remap);
++			ret = 1;
++			atomic_dec(&dev->remap_ct);
++		} else {
++			bio_list_add(&remap->bios, bio);
++		}
++	}
++
++	spin_unlock(&remap->lock);
++
++	return ret;
++}
++
++static int dmu_map_request_case(struct dmu_device *dev,
++				struct userspace_request *req,
++				struct bio *bio)
++{
++	int ret = 0;
++	int req_rw = dmu_get_flag(&req->flags, DMU_FLAG_WR);
++
++	spin_lock(&req->lock);
++
++	if (!req_rw && bio_rw(bio) && !req->sent) {
++		/* Convert to R/W and Queue */
++		dmu_set_flag(&req->flags, DMU_FLAG_WR);
++		bio_list_add(&req->u.bios, bio);
++	} else if (!req_rw && bio_rw(bio) && req->sent) {
++		/* Can't convert, must re-request */
++		ret = -1;
++	} else {
++		/* Queue */
++		bio_list_add(&req->u.bios, bio);
++	}
++
++	spin_unlock(&req->lock);
++
++	return ret;
++}
++
++static int dmu_map(struct dm_target *ti, struct bio *bio,
++		   union map_info *map_context)
++{
++	struct dmu_device *dev = (struct dmu_device *) ti->private;
++	struct dmu_map *remap;
++	struct userspace_request *req;
++	int ret = 0;
++	u64 block;
++
++	atomic_inc(&dev->remap_ct);
++
++	block = dmu_block(dev, bio->bi_sector);
++
++	remap = ht_find_map_dev(dev, block);
++	if (remap) {
++		ret = dmu_map_remap_case(dev, remap, bio);
++		if (ret >= 0)
++			goto done;
++	}
++
++	req = find_existing_req(dev, block);
++	if (req) {
++		ret = dmu_map_request_case(dev, req, bio);
++		atomic_dec(&req->refcnt);
++		if (ret >= 0)
++			goto done;
++	}
++
++	ret = make_new_request(dev, bio);
++
++ done:
++	return ret;
++}
++
++static int dmu_status(struct dm_target *ti, status_type_t type,
++		      char *result, unsigned int maxlen)
++{
++	struct dmu_device *dev = (struct dmu_device *) ti->private;
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		snprintf(result, maxlen, "%x:%x\n",
++			 MAJOR(dev->ctl_dev),
++			 MINOR(dev->ctl_dev));
++		break;
++
++	case STATUSTYPE_TABLE:
++		snprintf(result, maxlen, "%s %llu",
++			 dev->key,
++			 dev->block_size * 512);
++		break;
++	}
++
++	return 0;
++}
++
++static struct target_type userspace_target = {
++	.name    = "userspace",
++	.version = {0, 1, 0},
++	.module  = THIS_MODULE,
++	.ctr     = dmu_ctr,
++	.dtr     = dmu_dtr,
++	.map     = dmu_map,
++	.status  = dmu_status,
++};
++
++static int format_userspace_message(struct dmu_write *msg,
++				      struct userspace_request *req)
++{
++	int ret = 1;
++
++	spin_lock(&req->lock);
++
++	if (req->sent)
++		goto out;
++
++	msg->id = req->id;
++	msg->type = req->type;
++	dmu_cpy_flag(&msg->flags, req->flags, DMU_FLAG_RD);
++	dmu_cpy_flag(&msg->flags, req->flags, DMU_FLAG_WR);
++
++	switch (msg->type) {
++	case DM_USERSPACE_MAP_BLOCK:
++		msg->org_block = dmu_block(req->dev,
++					   req->u.bios.head->bi_sector);
++		DPRINTK("Asking userspace to map %llu (%c)\n",
++			msg->org_block,
++			dmu_get_flag(&msg->flags, DMU_FLAG_WR) ? 'W' : 'R');
++		break;
++
++	case DM_USERSPACE_COPY_FINISHED:
++	case DM_USERSPACE_INVAL_COMPLETE:
++	case DM_USERSPACE_INVAL_FAILED:
++		msg->org_block = req->u.block;
++		break;
++
++	default:
++		printk(KERN_INFO DMU_PREFIX
++		       "Unknown message type %i\n", msg->type);
++		ret = 0;
++	}
++
++	req->sent = 1;
++
++ out:
++	spin_unlock(&req->lock);
++
++	if (msg->type != DM_USERSPACE_MAP_BLOCK) {
++		/* COPY_FINISHED, et al messages don't get responses,
++		 * so we take them off the request queue here
++		 */
++		list_del(&req->list);
++		kmem_cache_free(request_cache, req);
++	}
++
++	return ret;
++}
++
++ssize_t dmu_ctl_read(struct file *file, char __user *buffer,
++		     size_t size, loff_t *offset)
++{
++
++	struct dmu_device  *dev = (struct dmu_device *)file->private_data;
++	struct dmu_write   *msg;
++	struct userspace_request *req = NULL;
++	struct userspace_request *next;
++	int                 ret = 0;
++	int                 num_reqs, req_idx = 0;
++
++	num_reqs = size / sizeof(*msg);
++
++	if (num_reqs == 0)
++		return -EINVAL;
++
++	msg = kmalloc(num_reqs * sizeof(*msg), GFP_KERNEL);
++	if (!msg) {
++		printk(KERN_ERR DMU_PREFIX
++		       "Failed to alloc %i reqs!\n", num_reqs);
++		return -ENOMEM;
++	}
++
++	while (!have_pending_requests(dev)) {
++		if (file->f_flags & O_NONBLOCK) {
++			return 0;
++		}
++
++		if (wait_event_interruptible(dev->wqueue,
++					     have_pending_requests(dev)))
++			return -ERESTARTSYS;
++	}
++
++	spin_lock(&dev->lock);
++
++	list_for_each_entry_safe(req, next, &dev->requests, list) {
++
++		if (!format_userspace_message(&msg[req_idx], req))
++			continue;
++
++		ret += sizeof(*msg);
++		if (++req_idx >= num_reqs) {
++			break;
++		}
++	}
++
++	spin_unlock(&dev->lock);
++
++	if (copy_to_user(buffer, msg, sizeof(*msg) * req_idx)) {
++		DPRINTK("control read copy_to_user failed!\n");
++		ret = -EFAULT;
++	}
++
++	kfree(msg);
++
++	return ret;
++}
++
++static void copy_callback(int read_err,
++                          unsigned int write_err,
++                          void *data)
++{
++	remap_flusher((struct dmu_map *)data);
++}
++
++static void copy_block(struct dmu_map *remap)
++{
++	struct io_region src, dst;
++	struct kcopyd_client *client;
++
++	spin_lock(&remap->lock);
++
++	src.bdev = remap->src->bdev;
++	src.sector = remap->org_block << remap->dev->block_shift;
++	src.count = remap->dev->block_size;
++
++	dst.bdev = remap->dest->bdev;
++	dst.sector = (remap->new_block << remap->dev->block_shift);
++	dst.sector += remap->offset;
++	dst.count = remap->dev->block_size;
++
++	DPRINTK("Copying: "
++		SECTOR_FORMAT "(" SECTOR_FORMAT "):" SECTOR_FORMAT " -> "
++		SECTOR_FORMAT "(" SECTOR_FORMAT "):" SECTOR_FORMAT "\n",
++		remap->org_block,
++		src.sector,
++		src.count * 512,
++		remap->new_block,
++		dst.sector,
++		dst.count * 512);
++
++	client = remap->dev->kcopyd_client;
++
++	spin_unlock(&remap->lock);
++
++	kcopyd_copy(client, &src, 1, &dst, 0, copy_callback, remap);
++}
++
++static int remap_request(struct dmu_write *msg,
++			 struct dmu_device *dev,
++			 struct userspace_request *req)
++
++{
++	struct dmu_map *remap = NULL, *parent = NULL;
++	struct target_device *s_dev = NULL, *d_dev = NULL;
++	int is_chained = 0;
++	struct bio_list bio_holder;
++
++	if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) {
++		s_dev = get_target(dev, MKDEV(msg->src_maj, msg->src_min));
++		if (!s_dev) {
++			printk(KERN_ERR DMU_PREFIX
++			       "Failed to find src device %i:%i\n",
++			       msg->src_maj, msg->src_min);
++			goto bad;
++		}
++	}
++
++	d_dev = get_target(dev, MKDEV(msg->dest_maj, msg->dest_min));
++	if (!d_dev) {
++		printk(KERN_ERR DMU_PREFIX "Failed to find dst device %i:%i\n",
++		       msg->dest_maj, msg->dest_min);
++		goto bad;
++	}
++
++	if (req) {
++		while (atomic_read(&req->refcnt) != 0) {
++			DPRINTK("Waiting for exclusive use of request\n");
++			schedule();
++		}
++
++		spin_lock(&req->lock);
++		bio_holder = req->u.bios;
++		spin_unlock(&req->lock);
++	} else {
++		bio_list_init(&bio_holder);
++	}
++
++	/* Allocate a new remap early (before grabbing locks), since
++	   we will most likely need it */
++	remap = kmem_cache_alloc(remap_cache, GFP_KERNEL);
++	if (!remap) {
++		printk(KERN_ERR DMU_PREFIX "Failed to alloc remap!");
++		goto bad;
++	}
++	init_remap(dev, remap);
++	spin_lock(&remap->lock);
++	remap->org_block = msg->org_block;
++
++	spin_lock(&dev->lock);
++
++	/* Here, we insert the new remap into the table, and remove
++	   the existing map, if present, all in one locked operation */
++
++	parent = ht_find_map(&dev->remaps, msg->org_block);
++	if (parent) {
++
++		spin_lock(&parent->lock);
++
++		if (!dmu_get_flag(&parent->flags, DMU_FLAG_VALID)) {
++			if (dmu_get_flag(&parent->flags, DMU_FLAG_WR) ==
++			    dmu_get_flag(&msg->flags, DMU_FLAG_WR) &&
++			    (parent->new_block == msg->new_block)) {
++				/* Perms match for this not-yet-valid remap,
++				   so tag our bios on to it and bail */
++				bio_list_merge(&parent->bios,
++					       &bio_holder);
++
++				spin_unlock(&parent->lock);
++				spin_unlock(&dev->lock);
++				kmem_cache_free(remap_cache, remap);
++				return 1;
++			} else {
++				/* Remove parent from remap table, and
++				   chain our new remap to this one so
++				   it will fire when parent goes
++				   valid */
++				list_del(&parent->list);
++				if (parent->next) {
++					DPRINTK("Parent already chained!\n");
++					BUG();
++				}
++				parent->next = remap;
++				dmu_set_flag(&parent->flags,
++					     DMU_FLAG_TEMPORARY);
++				is_chained = 1;
++			}
++		} else {
++			/* Remove existing valid remap */
++			list_del(&parent->list);
++			destroy_remap(parent);
++			kmem_cache_free(remap_cache, parent);
++		}
++
++		spin_unlock(&parent->lock);
++	}
++
++	ht_insert_map(&dev->remaps, remap);
++
++	spin_unlock(&dev->lock);
++
++	remap->new_block  = msg->new_block;
++	remap->offset     = msg->offset;
++	remap->src        = s_dev;
++	remap->dest       = d_dev;
++	remap->dev        = dev;
++
++	dmu_clr_flag(&remap->flags, DMU_FLAG_VALID);
++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_TEMPORARY);
++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_WR);
++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_RD);
++	dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_COPY_FIRST);
++
++	remap->bios = bio_holder;
++
++	spin_unlock(&remap->lock);
++
++	if (! is_chained)
++		copy_or_flush(remap);
++
++	return 1;
++
++ bad:
++	printk(KERN_ERR DMU_PREFIX "Remap error: chaos may ensue\n");
++
++	return 0;
++}
++
++static int invalidate_request(struct dmu_write *msg,
++			      struct dmu_device *dev)
++{
++	struct dmu_map *remap;
++	struct userspace_request *req;
++	int ret = 1;
++
++	remap = ht_find_map_dev(dev, msg->org_block);
++	if (!remap)
++		ret = 0;
++	else {
++		spin_lock(&dev->lock);
++		spin_lock(&remap->lock);
++		if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID))
++			ht_delete_map(&dev->remaps, remap);
++		else
++			ret = 0;
++		spin_unlock(&remap->lock);
++		spin_unlock(&dev->lock);
++	}
++
++	req = kmem_cache_alloc(request_cache, GFP_KERNEL);
++	if (!req) {
++		printk(KERN_ERR DMU_PREFIX
++		       "Failed to allocate request\n");
++		return 0;
++	}
++
++	if (ret) {
++		DPRINTK("Invalidated mapping for: %llu\n",
++			msg->org_block);
++		init_request(dev, DM_USERSPACE_INVAL_COMPLETE, req);
++	} else {
++		DPRINTK("Failed to invalidate mapping for: %llu\n",
++			msg->org_block);
++		init_request(dev, DM_USERSPACE_INVAL_FAILED, req);
++	}
++
++	req->u.block = msg->org_block;
++
++	add_request(dev, req);
++
++	return ret;
++}
++
++ssize_t dmu_ctl_write(struct file *file, const char __user *buffer,
++		      size_t size, loff_t *offset)
++{
++
++	struct dmu_device *dev = (struct dmu_device *)file->private_data;
++	struct dmu_write msg;
++	struct userspace_request *next;
++	struct userspace_request *req = NULL, *match = NULL;
++	int num_resp, resp_idx;
++	int ret = 0;
++
++	num_resp = size / sizeof(struct dmu_write);
++
++	if (num_resp == 0)
++		return -EINVAL;
++
++	for (resp_idx = 0; resp_idx < num_resp; resp_idx++) {
++		if (copy_from_user(&msg, buffer+ret, sizeof(msg))) {
++			printk(DMU_PREFIX
++			       "control_write copy_from_user failed!\n");
++			ret = -EFAULT;
++			goto out;
++		}
++
++		ret += sizeof(msg);
++
++		match = NULL;
++		/* See if we have a pending request that matches this */
++		spin_lock(&dev->lock);
++		list_for_each_entry_safe(req, next, &dev->requests, list) {
++			if ((req->type == DM_USERSPACE_MAP_BLOCK) &&
++			    (req->id == msg.id)) {
++				list_del(&req->list);
++				match = req;
++				break;
++			}
++		}
++		spin_unlock(&dev->lock);
++
++		if (!match)
++			DPRINTK("Processing unsolicited request: %u\n",
++				msg.id);
++
++		switch (msg.type) {
++
++		case DM_USERSPACE_MAP_BLOCK:
++			DPRINTK("Got map: %llu -> %llu:%lli (%i:%i) [%c]\n",
++				msg.org_block,
++				msg.new_block,
++				msg.offset,
++				msg.dest_maj,
++				msg.dest_min,
++				dmu_get_flag(&msg.flags, DMU_FLAG_WR)?'W':'R');
++			remap_request(&msg, dev, match);
++			break;
++
++		case DM_USERSPACE_MAP_FAILED:
++			if (match) {
++				printk(KERN_EMERG DMU_PREFIX
++				       "userspace reported "
++				       "failure to map sector %lu\n",
++				       (unsigned long)
++				       match->u.bios.head->bi_sector);
++
++				spin_lock(&match->lock);
++				error_bios(&match->u.bios);
++				spin_unlock(&match->lock);
++			}
++			break;
++
++		case DM_USERSPACE_MAP_INVALIDATE:
++			invalidate_request(&msg, dev);
++			break;
++
++		default:
++			printk(KERN_ERR DMU_PREFIX
++			       "Unknown request type: %i\n", msg.type);
++		}
++
++		if (match)
++			kmem_cache_free(request_cache, match);
++	}
++ out:
++	return ret;
++}
++
++int dmu_ctl_open(struct inode *inode, struct file *file)
++{
++	struct dmu_device *dev;
++
++	dev = container_of(inode->i_cdev, struct dmu_device, cdev);
++
++	get_dev(dev);
++
++	file->private_data = dev;
++
++	return 0;
++}
++
++int dmu_ctl_release(struct inode *inode, struct file *file)
++{
++	struct dmu_device *dev;
++
++	dev = (struct dmu_device *)file->private_data;
++
++	put_dev(dev);
++
++	return 0;
++}
++
++unsigned dmu_ctl_poll(struct file *file, poll_table *wait)
++{
++	struct dmu_device *dev;
++	unsigned mask = 0;
++
++	dev = (struct dmu_device *)file->private_data;
++
++	poll_wait(file, &dev->wqueue, wait);
++
++	if (have_pending_requests(dev))
++		mask |= POLLIN | POLLRDNORM;
++
++	return mask;
++}
++
++static struct file_operations ctl_fops = {
++	.open    = dmu_ctl_open,
++	.release = dmu_ctl_release,
++	.read    = dmu_ctl_read,
++	.write   = dmu_ctl_write,
++	.poll    = dmu_ctl_poll,
++	.owner   = THIS_MODULE,
++};
++
++int __init dm_userspace_init(void)
++{
++	int r = dm_register_target(&userspace_target);
++	if (r < 0) {
++		DMERR(DMU_PREFIX "Register failed %d", r);
++		return 0;
++	}
++
++	spin_lock_init(&devices_lock);
++
++	if (enable_watchdog) {
++		INIT_WORK(&wd, watchdog, NULL);
++		schedule_delayed_work(&wd, HZ);
++	}
++
++	request_cache =
++		kmem_cache_create("dm-userspace-requests",
++				  sizeof(struct userspace_request),
++				  __alignof__ (struct userspace_request),
++				  0, NULL, NULL);
++	if (!request_cache) {
++		DMERR(DMU_PREFIX "Failed to allocate request cache\n");
++		goto bad;
++	}
++
++	remap_cache =
++		kmem_cache_create("dm-userspace-remaps",
++				  sizeof(struct dmu_map),
++				  __alignof__ (struct dmu_map),
++				  0, NULL, NULL);
++	if (!remap_cache) {
++		DMERR(DMU_PREFIX "Failed to allocate remap cache\n");
++		goto bad2;
++	}
++
++	r = alloc_chrdev_region(&our_dev, 0, 10, "dm-userspace");
++	if (r) {
++		DMERR(DMU_PREFIX "Failed to allocate chardev region\n");
++		goto bad3;
++	}
++
++	DPRINTK(DMU_PREFIX "Loaded (major %i)\n", MAJOR(our_dev));
++
++	return 1;
++
++ bad3:
++	kmem_cache_destroy(remap_cache);
++ bad2:
++	kmem_cache_destroy(request_cache);
++ bad:
++	dm_unregister_target(&userspace_target);
++	return 0;
++
++}
++
++void __exit dm_userspace_exit(void)
++{
++	int r;
++	struct list_head *cursor, *next;
++	struct dmu_device *dev;
++
++	DPRINTK(DMU_PREFIX "Unloading\n");
++
++	if (enable_watchdog)
++		if (!cancel_delayed_work(&wd))
++			flush_scheduled_work();
++
++	spin_lock(&devices_lock);
++
++	list_for_each_safe(cursor, next, &devices) {
++		dev = list_entry(cursor, struct dmu_device, list);
++		list_del(cursor);
++	}
++
++	spin_unlock(&devices_lock);
++
++	unregister_chrdev_region(our_dev, 10);
++
++	kmem_cache_destroy(request_cache);
++	kmem_cache_destroy(remap_cache);
++
++	r = dm_unregister_target(&userspace_target);
++	if (r < 0)
++		DMERR(DMU_PREFIX "unregister failed %d", r);
++}
++
++module_init(dm_userspace_init);
++module_exit(dm_userspace_exit);
++
++module_param(enable_watchdog, int, S_IRUGO);
++
++MODULE_DESCRIPTION(DM_NAME " userspace target");
++MODULE_AUTHOR("Dan Smith");
++MODULE_LICENSE("GPL");
+diff -Naur ./drivers/md/Kconfig ../linux-2.6.16.13-dmu/drivers/md/Kconfig
+--- ./drivers/md/Kconfig	2006-05-02 14:38:44.000000000 -0700
++++ ../linux-2.6.16.13-dmu/drivers/md/Kconfig	2006-06-15 09:17:17.000000000 -0700
+@@ -210,6 +210,12 @@
+        ---help---
+          Allow volume managers to take writeable snapshots of a device.
+ 
++config DM_USERSPACE
++       tristate "Userspace target (EXPERIMENTAL)"
++       depends on BLK_DEV_DM && EXPERIMENTAL
++       ---help---
++	 A target that provides a userspace interface to device-mapper
++
+ config DM_MIRROR
+        tristate "Mirror target (EXPERIMENTAL)"
+        depends on BLK_DEV_DM && EXPERIMENTAL
+diff -Naur ./drivers/md/Makefile ../linux-2.6.16.13-dmu/drivers/md/Makefile
+--- ./drivers/md/Makefile	2006-05-02 14:38:44.000000000 -0700
++++ ../linux-2.6.16.13-dmu/drivers/md/Makefile	2006-06-15 09:17:17.000000000 -0700
+@@ -37,6 +37,7 @@
+ obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
+ obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
+ obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
++obj-$(CONFIG_DM_USERSPACE)      += dm-userspace.o
+ 
+ quiet_cmd_unroll = UNROLL  $@
+       cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
+diff -Naur ./include/linux/dm-userspace.h ../linux-2.6.16.13-dmu/include/linux/dm-userspace.h
+--- ./include/linux/dm-userspace.h	1969-12-31 16:00:00.000000000 -0800
++++ ../linux-2.6.16.13-dmu/include/linux/dm-userspace.h	2006-06-15 09:17:17.000000000 -0700
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ */
++
++#ifndef __DM_USERSPACE_H
++#define __DM_USERSPACE_H
++
++#ifdef __KERNEL__
++# include <linux/types.h>
++#else
++# include <stdint.h>
++#endif
++
++/*
++ * Message Types
++ */
++#define DM_USERSPACE_MAP_BLOCK        1
++#define DM_USERSPACE_MAP_FAILED       2
++#define DM_USERSPACE_MAP_INVALIDATE   3
++#define DM_USERSPACE_COPY_FINISHED  100
++#define DM_USERSPACE_INVAL_COMPLETE 101
++#define DM_USERSPACE_INVAL_FAILED   102
++
++/*
++ * Flags and associated macros
++ */
++#define DMU_FLAG_VALID       1
++#define DMU_FLAG_RD          2
++#define DMU_FLAG_WR          4
++#define DMU_FLAG_COPY_FIRST  8
++#define DMU_FLAG_TEMPORARY  16
++
++static int dmu_get_flag(uint32_t *flags, uint32_t flag)
++{
++	return (*flags & flag) != 0;
++}
++
++static void dmu_set_flag(uint32_t *flags, uint32_t flag)
++{
++	*flags |= flag;
++}
++
++static void dmu_clr_flag(uint32_t *flags, uint32_t flag)
++{
++	*flags &= (~flag);
++}
++
++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag)
++{
++	*flags = (*flags & ~flag) | (src & flag);
++}
++
++/*
++ * This is the message that is passed back and forth between the
++ * kernel and the user application
++ */
++struct dmu_write {
++	uint64_t org_block;    /* Block that was accessed */
++	uint64_t new_block;    /* The new block it should go to */
++  	int64_t offset;        /* Sector offset of the block, if needed  */
++
++	uint32_t id;           /* Unique ID for this request */
++	uint32_t type;         /* Type of request */
++	uint32_t flags;        /* Flags */
++
++	uint32_t src_maj;      /* The source device for copying */
++	uint32_t src_min;
++
++	uint32_t dest_maj;     /* Destination device for copying, and */
++	uint32_t dest_min;     /* for the block access                */
++};
++
++#endif

[-- Attachment #1.2: Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
@ 2006-06-15 21:07 Anthony Liguori
  2006-06-15 21:38 ` Nivedita Singhvi
  0 siblings, 1 reply; 17+ messages in thread
From: Anthony Liguori @ 2006-06-15 21:07 UTC (permalink / raw)
  To: Dan Smith, xen-devel

I think dm-userspace is great, but I don't think it belongs in 
patches/.  To quote Chris W.:

"Good things there are bits that are ready to go upstream or fixes to 
base Linux that are transient by version (backport of a fix that's in 
the -rc for the next kernel or somesuch)."

Until this ends up in -mm, I really don't think we should add this to 
the -xen tree as it has no dependence on Xen.  We should be good members 
of the kernel community here and go through the normal channels.  At any 
rate, I would think that you would want it to be useful first before 
adding it which means bringing in all of the tools into the tree.

If a user already has to fetch a bunch of tools and compile from source, 
adding an additional patch to the Xen build isn't so bad.

Regards,

Anthony Liguori

Dan Smith wrote:

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-15 21:07 Anthony Liguori
@ 2006-06-15 21:38 ` Nivedita Singhvi
  2006-06-15 21:44   ` Anthony Liguori
  0 siblings, 1 reply; 17+ messages in thread
From: Nivedita Singhvi @ 2006-06-15 21:38 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Dan Smith, xen-devel

Anthony Liguori wrote:
> I think dm-userspace is great, but I don't think it belongs in 
> patches/.  To quote Chris W.:
> 
> "Good things there are bits that are ready to go upstream or fixes to 
> base Linux that are transient by version (backport of a fix that's in 
> the -rc for the next kernel or somesuch)."
> 
> Until this ends up in -mm, I really don't think we should add this to 
> the -xen tree as it has no dependence on Xen.  We should be good members 
> of the kernel community here and go through the normal channels.  At any 
> rate, I would think that you would want it to be useful first before 
> adding it which means bringing in all of the tools into the tree.

It won't be useful in the kernel unless there are actual
consumers, of which Xen could/might be one. Having a use
case flesh out problems and get the performance and design
architecture examined in detail will be useful to the Linux
maintainers - they get something more robust and better
designed, although it's certainly not Xen's obligation to
provide that, per se. I'm just seeing the possibility of
mutual benefit, here.

That said, I believe since the patch is in the device-mapper
tree for review, there is a good chance it will go into
kernel in any case.

thanks,
Nivedita

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-15 21:38 ` Nivedita Singhvi
@ 2006-06-15 21:44   ` Anthony Liguori
  2006-06-15 22:09     ` Dan Smith
  0 siblings, 1 reply; 17+ messages in thread
From: Anthony Liguori @ 2006-06-15 21:44 UTC (permalink / raw)
  To: Nivedita Singhvi; +Cc: Dan Smith, xen-devel

Nivedita Singhvi wrote:
> Anthony Liguori wrote:
>> I think dm-userspace is great, but I don't think it belongs in 
>> patches/.  To quote Chris W.:
>>
>> "Good things there are bits that are ready to go upstream or fixes to 
>> base Linux that are transient by version (backport of a fix that's in 
>> the -rc for the next kernel or somesuch)."
>>
>> Until this ends up in -mm, I really don't think we should add this to 
>> the -xen tree as it has no dependence on Xen.  We should be good 
>> members of the kernel community here and go through the normal 
>> channels.  At any rate, I would think that you would want it to be 
>> useful first before adding it which means bringing in all of the 
>> tools into the tree.
>
> It won't be useful in the kernel unless there are actual
> consumers, of which Xen could/might be one. Having a use
> case flesh out problems and get the performance and design
> architecture examined in detail will be useful to the Linux
> maintainers - they get something more robust and better
> designed, although it's certainly not Xen's obligation to
> provide that, per se. I'm just seeing the possibility of
> mutual benefit, here.
>
> That said, I believe since the patch is in the device-mapper
> tree for review, there is a good chance it will go into
> kernel in any case.

Agreed.  I think it's a good idea to let it go through that review 
process before going into the Xen tree.  As I said, once it's in -mm, I 
would imagine that it the interfaces would be stable enough that there's 
no harm putting it into the Xen tree.

Especially since libdmu is being considered for inclusion in the 
device-mapper userspace.  Things would get pretty ugly if the 
dm-userspace ABI changes between now and when it shows up in -mm as 
there'd be conflicting userspace packages.

Regards,

Anthony Liguori

> thanks,
> Nivedita
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-15 21:44   ` Anthony Liguori
@ 2006-06-15 22:09     ` Dan Smith
  0 siblings, 0 replies; 17+ messages in thread
From: Dan Smith @ 2006-06-15 22:09 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 1297 bytes --]

AL> Agreed.  I think it's a good idea to let it go through that review
AL> process before going into the Xen tree.  As I said, once it's in
AL> -mm, I would imagine that it the interfaces would be stable enough
AL> that there's no harm putting it into the Xen tree.

That may be true, however, putting it into the Xen tree at the moment
would only be temporary anyway.  I think that it changing shape while
in the Xen tree isn't likely to break anything, as people using it
at that point would have to know that it was subject to change.  I
don't think there is any danger in anyone snapshotting the Xen tree
with a not-yet-final version of dm-userspace in it and assuming it's
production-ready.

To quote your quote:

CW> "Good things there are bits that are ready to go upstream or fixes
CW> to base Linux that are transient by version (backport of a fix
CW> that's in the -rc for the next kernel or somesuch)."

The code has been accepted into the device-mapper review tree, which
may qualify it as almost "ready to go upstream".  So, if we fall under
that umbrella, I think it's reasonable to get it into the Xen tree for
testing and review.  Perhaps Chris can weigh in here?

-- 
Dan Smith
IBM Linux Technology Center
Open Hypervisor Team
email: danms@us.ibm.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-15 19:47 [PATCH] Add dm-userspace to the Xen kernel Dan Smith
@ 2006-06-16 18:31 ` Niraj Tolia
  2006-06-16 18:50   ` Dan Smith
  2006-06-16 18:56   ` Anthony Liguori
  0 siblings, 2 replies; 17+ messages in thread
From: Niraj Tolia @ 2006-06-16 18:31 UTC (permalink / raw)
  To: Dan Smith; +Cc: Xen Developers


[-- Attachment #1.1: Type: text/plain, Size: 1459 bytes --]

Hi Dan,

I had a question or two about dm-userspace. It looks very interesting and I
hope to find time sometime soon to experiment with it. My question was,
would this allow me to export a logical disk without having all the data
present locally. The scenario I am thinking of would be applicable for VM
migration across WANs.

In that case, if I was to represent a virtual disk as a number of logical
blocks, I could have the userspace application page the logical blocks in on
demand. This would be useful when a VM migrates to a machine it has never
been to earlier. Does this make sense? Would it work with dm-userspace?

Cheers,
Niraj

On 6/15/06, Dan Smith <danms@us.ibm.com> wrote:
>
> This patch adds dm-userspace to the -xen Linux kernel.  I'd like to
> get it into the tree so that people that want to can play with it.
> Anyone wishing to to do so can download the tools separately, but they
> need the kernel module to be able to use it.
>
> I've updated the code for and tested it on x86_64.
>
> The tools are available here:
>
>   http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz
>   http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz
>
> --
> Dan Smith
> IBM Linux Technology Center
> Open Hypervisor Team
> email: danms@us.ibm.com
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>
>
>
>


-- 
http://www.cs.cmu.edu/~ntolia

[-- Attachment #1.2: Type: text/html, Size: 2171 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-16 18:31 ` Niraj Tolia
@ 2006-06-16 18:50   ` Dan Smith
  2006-06-16 18:56   ` Anthony Liguori
  1 sibling, 0 replies; 17+ messages in thread
From: Dan Smith @ 2006-06-16 18:50 UTC (permalink / raw)
  To: Niraj Tolia; +Cc: Xen Developers


[-- Attachment #1.1: Type: text/plain, Size: 1172 bytes --]

NT> My question was, would this allow me to export a logical disk
NT> without having all the data present locally. The scenario I am
NT> thinking of would be applicable for VM migration across WANs.

Sure.

NT> In that case, if I was to represent a virtual disk as a number of
NT> logical blocks, I could have the userspace application page the
NT> logical blocks in on demand. This would be useful when a VM
NT> migrates to a machine it has never been to earlier. Does this make
NT> sense? Would it work with dm-userspace?

Definitely!  You could migrate the domain to another machine, and then
start a background process that starts copying its blocks to the other
machine.  If a request comes in for a block (dm-userspace will let you
handle it from the userspace app doing the copy), you immediately pull
it across to satisfy the request.  This would be a very interesting
way to support migration of domains with block devices, without
needing to wait for the block migration before starting the domain
back up on the target machine.

Very cool idea!

-- 
Dan Smith
IBM Linux Technology Center
Open Hypervisor Team
email: danms@us.ibm.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-16 18:31 ` Niraj Tolia
  2006-06-16 18:50   ` Dan Smith
@ 2006-06-16 18:56   ` Anthony Liguori
  2006-06-16 19:39     ` Andrew Warfield
  2006-06-16 19:41     ` Niraj Tolia
  1 sibling, 2 replies; 17+ messages in thread
From: Anthony Liguori @ 2006-06-16 18:56 UTC (permalink / raw)
  To: Niraj Tolia; +Cc: Dan Smith, Xen Developers

You probably want to check out drbd.  A number of people have used it 
successfully to do storage migration with Xen domains.

Regards,

Anthony Liguori

Niraj Tolia wrote:
> Hi Dan,
>
> I had a question or two about dm-userspace. It looks very interesting 
> and I hope to find time sometime soon to experiment with it. My 
> question was, would this allow me to export a logical disk without 
> having all the data present locally. The scenario I am thinking of 
> would be applicable for VM migration across WANs.
>
> In that case, if I was to represent a virtual disk as a number of 
> logical blocks, I could have the userspace application page the 
> logical blocks in on demand. This would be useful when a VM migrates 
> to a machine it has never been to earlier. Does this make sense? Would 
> it work with dm-userspace?
>
> Cheers,
> Niraj
>
> On 6/15/06, *Dan Smith* <danms@us.ibm.com <mailto:danms@us.ibm.com>> 
> wrote:
>
>     This patch adds dm-userspace to the -xen Linux kernel.  I'd like to
>     get it into the tree so that people that want to can play with it.
>     Anyone wishing to to do so can download the tools separately, but they
>     need the kernel module to be able to use it.
>
>     I've updated the code for and tested it on x86_64.
>
>     The tools are available here:
>
>       http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz
>     <http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz>
>       http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz
>
>     --
>     Dan Smith
>     IBM Linux Technology Center
>     Open Hypervisor Team
>     email: danms@us.ibm.com <mailto:danms@us.ibm.com>
>
>
>
>     _______________________________________________
>     Xen-devel mailing list
>     Xen-devel@lists.xensource.com <mailto:Xen-devel@lists.xensource.com>
>     http://lists.xensource.com/xen-devel
>
>
>
>
>
>
> -- 
> http://www.cs.cmu.edu/~ntolia <http://www.cs.cmu.edu/%7Entolia>
> ------------------------------------------------------------------------
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>   

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-16 18:56   ` Anthony Liguori
@ 2006-06-16 19:39     ` Andrew Warfield
  2006-06-16 19:41     ` Niraj Tolia
  1 sibling, 0 replies; 17+ messages in thread
From: Andrew Warfield @ 2006-06-16 19:39 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Niraj Tolia, Xen Developers, Dan Smith

On 6/16/06, Anthony Liguori <aliguori@us.ibm.com> wrote:
> You probably want to check out drbd.  A number of people have used it
> successfully to do storage migration with Xen domains.

gnbd is also worth a peek...

a.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Add dm-userspace to the Xen kernel
  2006-06-16 18:56   ` Anthony Liguori
  2006-06-16 19:39     ` Andrew Warfield
@ 2006-06-16 19:41     ` Niraj Tolia
  1 sibling, 0 replies; 17+ messages in thread
From: Niraj Tolia @ 2006-06-16 19:41 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Dan Smith, Xen Developers


[-- Attachment #1.1: Type: text/plain, Size: 2578 bytes --]

On 6/16/06, Anthony Liguori <aliguori@us.ibm.com> wrote:
>
> You probably want to check out drbd.  A number of people have used it
> successfully to do storage migration with Xen domains.



DRBD has a number of disadvantages. The last time I checked, it only mirrors
data between two nodes. Further, it requires a complete copy of the disk to
be present on both nodes.

Niraj


Regards,
>
> Anthony Liguori
>
> Niraj Tolia wrote:
> > Hi Dan,
> >
> > I had a question or two about dm-userspace. It looks very interesting
> > and I hope to find time sometime soon to experiment with it. My
> > question was, would this allow me to export a logical disk without
> > having all the data present locally. The scenario I am thinking of
> > would be applicable for VM migration across WANs.
> >
> > In that case, if I was to represent a virtual disk as a number of
> > logical blocks, I could have the userspace application page the
> > logical blocks in on demand. This would be useful when a VM migrates
> > to a machine it has never been to earlier. Does this make sense? Would
> > it work with dm-userspace?
> >
> > Cheers,
> > Niraj
> >
> > On 6/15/06, *Dan Smith* <danms@us.ibm.com <mailto:danms@us.ibm.com>>
> > wrote:
> >
> >     This patch adds dm-userspace to the -xen Linux kernel.  I'd like to
> >     get it into the tree so that people that want to can play with it.
> >     Anyone wishing to to do so can download the tools separately, but
> they
> >     need the kernel module to be able to use it.
> >
> >     I've updated the code for and tested it on x86_64.
> >
> >     The tools are available here:
> >
> >       http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz
> >     <http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz>
> >       http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz
> >
> >     --
> >     Dan Smith
> >     IBM Linux Technology Center
> >     Open Hypervisor Team
> >     email: danms@us.ibm.com <mailto:danms@us.ibm.com>
> >
> >
> >
> >     _______________________________________________
> >     Xen-devel mailing list
> >     Xen-devel@lists.xensource.com <mailto:Xen-devel@lists.xensource.com>
> >     http://lists.xensource.com/xen-devel
> >
> >
> >
> >
> >
> >
> > --
> > http://www.cs.cmu.edu/~ntolia <http://www.cs.cmu.edu/%7Entolia>
> > ------------------------------------------------------------------------
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xensource.com
> > http://lists.xensource.com/xen-devel
> >
>
>


-- 
http://www.cs.cmu.edu/~ntolia

[-- Attachment #1.2: Type: text/html, Size: 4493 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2006-06-16 19:41 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-15 19:47 [PATCH] Add dm-userspace to the Xen kernel Dan Smith
2006-06-16 18:31 ` Niraj Tolia
2006-06-16 18:50   ` Dan Smith
2006-06-16 18:56   ` Anthony Liguori
2006-06-16 19:39     ` Andrew Warfield
2006-06-16 19:41     ` Niraj Tolia
  -- strict thread matches above, loose matches on Subject: below --
2006-06-15 21:07 Anthony Liguori
2006-06-15 21:38 ` Nivedita Singhvi
2006-06-15 21:44   ` Anthony Liguori
2006-06-15 22:09     ` Dan Smith
2006-06-09 21:08 Dan Smith
2006-06-09 21:48 ` Anthony Liguori
2006-06-09 23:12   ` Dan Smith
2006-06-10  8:55     ` Keir Fraser
2006-06-10 20:40 ` Bastian Blank
2006-06-12 14:52   ` Dan Smith
2006-06-13  8:57     ` Bastian Blank

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.